def pairOpt(workloads): nStep = 1000 nTry = 10 dt = 1 # machines = range(1, int(len(workloads)/2) + 1) idx = lambda hi, ni: ni + hi * len(machines) ridx = lambda x: (x // len(machines), x % len(machines)) models = {} # param = { # 'loss': 'ls', # 'learning_rate': .1, # 'alpha': 0.9, # 'max_depth': 4, # 'n_estimators': 100, # 'm': 10000, # } perflog = { 'realized': [], 'predicted': [], } totrain = [] for hi in machines: model = model_xgb.XGBoost() model.init(**params) totrain.append((model, hi, nTrain, dt, libdata.apps)) lmodels = pool.map(trainModel, totrain) for i in range(len(lmodels)): models[i + 1] = lmodels[i] cache = {} print(workloads) stdwl = list(stdWorkloads(workloads)) print(stdwl) table = [] for i in range(len(stdwl)): hi = i + 1 app0, app1 = stdwl[i] p = _pair_predict(models[hi], hi, app0, app1, cache=cache, dt=dt) table.append({'power': p['power_0'], 'app': app0}) table.append({'power': p['power_1'], 'app': app1}) tableidx = [idx(hi, ni) for hi in machines for ni in range(2)] table = pd.DataFrame(table, index=tableidx) while nStep > 0: nStep -= 1 perflog['realized'].append( oneRun([ table['app'].loc[idx(hi, ni)] for hi in machines for ni in range(2) ])['pkgpwr']) perflog['predicted'].append(table['power'].sum()) tsorted = table.sort_values(by='power', ascending=False) # print(tsorted) ok = False for i, j in [(k, len(tsorted) - l - 1) for k in range(nTry) for l in range(nTry)]: h0, n0 = ridx(tsorted.index[i]) h1, n1 = ridx(tsorted.index[j]) if tsorted.loc[idx(h0, n0), 'app'] == tsorted.loc[idx(h1, n1), 'app']: continue #print(tsorted.columns.values) pbefore = tsorted['power'].loc[[ idx(h0, n0), idx(h0, 1 - n0), idx(h1, n1), idx(h1, 1 - n1) ]].sum() a00, a01 = tsorted.loc[idx(h0, 0), 'app'], tsorted.loc[idx(h0, 1), 'app'] a10, a11 = tsorted.loc[idx(h1, 0), 'app'], tsorted.loc[idx(h1, 1), 'app'] a0before = [a00, a01] a1before = [a10, a11] a0after = a0before[:] a1after = a1before[:] a0after[n0] = a1before[n1] a1after[n1] = a0before[n0] if h0 == h1: a0after[n1] = a1after[n1] a1after[n0] = a0after[n0] # print(a0before, a0after, a1before, a1after) pa0, pa1 = _pair_predict(models[h0], h0, a0after[0], a0after[1], cache=cache, dt=dt) pb0, pb1 = _pair_predict(models[h1], h1, a1after[0], a1after[1], cache=cache, dt=dt) if pa0 + pa1 + pb0 + pb1 < pbefore: tsorted.set_value(idx(h0, 0), 'app', a0after[0]) tsorted.set_value(idx(h0, 1), 'app', a0after[1]) tsorted.set_value(idx(h1, 0), 'app', a1after[0]) tsorted.set_value(idx(h1, 1), 'app', a1after[1]) tsorted.set_value(idx(h0, 0), 'power', pa0) tsorted.set_value(idx(h0, 1), 'power', pa1) tsorted.set_value(idx(h1, 0), 'power', pb0) tsorted.set_value(idx(h1, 1), 'power', pb1) ok = True break if not ok: break table = tsorted perflog = pd.DataFrame(perflog) fig, ax = plt.subplots() ax.plot(perflog.index, perflog['realized'], 'r') ax.plot(perflog.index, perflog['predicted'], 'b') fig.suptitle(','.join(workloads)) #fig.savefig(optpdf, format='pdf') return [ table['app'].loc[idx(hi, ni)] for hi in machines for ni in range(2) ]
allres = [] pred_time = 0 train_time = 0 for eval_times in range(10): print("start: %s\t%d" % (str(datetime.datetime.now()), eval_times)) pool = mp.Pool(mp.cpu_count()) totrain = [] #apps_train = rand.sample(libdata.apps, 9) apps_train = apps_npb temp = list(set(libdata.apps) - set(list(apps_train))) apps_validation = temp print("validation apps: ", apps_validation) train_start = time.time() for hi in machines: if ml_method == 'xgb': model = model_xgb.XGBoost() if ml_method == 'lr': model = model_lr.LR() if ml_method == 'svr': model = model_svr.SVM() if ml_method == 'gp': model = model_gp.GPR() if ml_method == 'mlp': model = model_mlp.MLP() model.init(**params) totrain.append((model, hi, nTrain, dt, apps_train)) lmodels = pool.map(trainModel, totrain) print("finish: %s\t%d" % (str(datetime.datetime.now()), eval_times)) for i in range(len(lmodels)): models[i + 1] = lmodels[i] train_time += (time.time() - train_start)