def extractMachineData(): datafiles = fileutils.getFilelist( "D:/googleClusterData/clusterdata-2011-1/task_usage") machineUsage = {} startAt = 0 for machine in machines: machineUsage[machine] = [] for datafile in datafiles[startAt:]: print datafile for row in fileutils.getCsvRows(datafile): curMachine = row[4] if curMachine in machines: machineUsage[curMachine].append(row) for machine in machineUsage.keys(): if startAt == 0: fileutils.writeCSV("d:/data/perMachine5/" + machine + ".csv", machineUsage[machine]) else: fileutils.writeCSV("d:/data/perMachine5/" + machine + ".csv", machineUsage[machine], mode='ab') startAt += 1 machineUsage.clear() for machine in machines: machineUsage[machine] = []
def extractMachineData(): datafiles = fileutils.getFilelist("D:/googleClusterData/clusterdata-2011-1/task_usage") machineUsage = {} startAt = 0; for machine in machines: machineUsage[machine] = [] for datafile in datafiles[startAt:]: print datafile for row in fileutils.getCsvRows(datafile): curMachine = row[4] if curMachine in machines: machineUsage[curMachine].append(row) for machine in machineUsage.keys(): if startAt == 0: fileutils.writeCSV("d:/data/perMachine5/"+machine+".csv", machineUsage[machine]) else: fileutils.writeCSV("d:/data/perMachine5/"+machine+".csv", machineUsage[machine], mode='ab') startAt += 1 machineUsage.clear() for machine in machines: machineUsage[machine] = []
trainer = BackpropTrainer(n, trainds, learningrate=eta, weightdecay=lmda, momentum=0.1, shuffle=False) trainer.trainEpochs(epochs) pred = np.nan_to_num(n.activateOnDataset(validds)) validerr = eval.calc_RMSE(validds['target'], pred) varscore = explained_variance_score(validds['target'], pred) return validerr, varscore, n if __name__ == '__main__': files = fileutils.getFilelist("../data/cpuRate") # target_files = ['cpu_1095481','cpu_1303745','cpu_1335782','cpu_1338948','cpu_1442486585','cpu_155313295','cpu_1664088958','cpu_317488701','cpu_317499484','cpu_3858945898','cpu_4304743890','cpu_4820238819','cpu_5796442','cpu_660404','cpu_711355','cpu_717319','cpu_904514','cpu_905062','cpu_907812'] for machine in files[98:100]: # for machine in target_files[16:19]: machine = machine.strip('.csv').split('/')[-1] # machine = 'cpu_1095481' print(machine) data = np.genfromtxt("../data/cpuRate/" + machine + ".csv", skip_header=1, delimiter=',', usecols=(1)) miniters = 100
data = data/np.max(data) minimum = np.percentile(data,minpercentile) N = len(data) result = [] print filename, "started..." y = data[0:input_window].tolist() model = HW_model.HW_model(y, minimum, 'additive') for strIndex in range(0,N-input_window - predic_window, step): if strIndex == 0: model.fit() else: y = data[strIndex:strIndex+input_window].tolist() model.update(y) y_pred = model.predict(fc=predic_window) result.append(y_pred) f = filename.split('/')[-1] fileutils.writeCSV("D:/Wikipage data/pageviews_hw/"+f, np.atleast_2d(result)) print filename, "complete!" if __name__ == '__main__': aggregatedRmse = None pool = ThreadPool(4) files = fileutils.getFilelist("D:/Wikipage data/pageviews") # performsSlidingWindowForecast(files[1]) pool.map(performsSlidingWindowForecast, files) pool.close() pool.join()
def main(): global METHOD global TYPE global OUTPUT global INPUT # Launch main menu # main_menu() print "Comparing Forecasting methods:\n" print "Please complete the following:" print "Enter the type of data used in the evaluation:" TYPE = raw_input(" >> ") print "The base INPUT directory is:", INPUT+TYPE print "and OUTPUT directory is:", OUTPUT+TYPE print "Would you like to change it? y or n" ch = raw_input(" >> ").lower() if ch == 'y': print "Enter the base path for the INPUT directory (without the type):" INPUT = raw_input(" >> ") if not os.path.isdir(INPUT): print "Error: Please try again; INPUT directory:" INPUT = raw_input(" >> ") print "And enter the base path for evaluation OUTPUT directory (without the type):" OUTPUT = raw_input(" >> ") if not os.path.isdir(OUTPUT): print "Error: Please try again; evaluation OUTPUT directory:" OUTPUT = raw_input(" >> ") ######### print "Please choose a method to evaluate:" print "1. Holt-Winters" print "2. Auto-regression" print "3. 1st Markov chain" print "4. 2nd Markov chain" print "5. PRESS" print "6. Agile" print "7. FFNN Model" print "8. RNN Model" print "9. Entwine Model" print "10. Moving Average" print "11. Average combo4 Model" print "12. FFNN combo4 Model" print "13. Weighted Average model" print "0. Quit" choice = raw_input(" >> ") ch = choice.lower(); if ch == '': menu_actions['main_menu']() elif ch == '0': exit() else: METHOD = methods_dict[ch] pool = ThreadPool(4) files = fileutils.getFilelist(INPUT+TYPE) params = [] if METHOD =='fnn': if TYPE.startswith("memory"): hyperpath = "../data/"+TYPE.replace("memory", "cpu")+"_networks/hyperparams.csv" else: hyperpath = "../data/"+TYPE+"_networks/hyperparams.csv" hyperparms = np.genfromtxt(hyperpath, delimiter=',', dtype=None) for curRow in hyperparms: if TYPE.startswith("memory"): params.append([INPUT+TYPE+'/'+curRow[0].replace("cpu", "memory").strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]]) else: params.append([INPUT+TYPE+'/'+curRow[0].strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]]) elif METHOD =='rnn': if TYPE.startswith("memory"): hyperpath = "../data/"+TYPE.replace("memory", "cpu")+"_rnn_networks/hyperparams.csv" else: hyperpath = "../data/"+TYPE+"_rnn_networks/hyperparams.csv" hyperparms = np.genfromtxt(hyperpath, delimiter=',', dtype=None) for curRow in hyperparms: if TYPE.startswith("memory"): params.append([INPUT+TYPE+'/'+curRow[0].replace("cpu", "memory").strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]]) else: params.append([INPUT+TYPE+'/'+curRow[0].strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]]) elif METHOD == 'entwine': hyperpath = "../data/entwine_networks/hyperparams.csv" hyperparms = np.genfromtxt(hyperpath, delimiter=',', dtype=None) for curRow in hyperparms: params.append([INPUT+TYPE+'/'+curRow[0].strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]]) else: for f in files: params.append([f, METHOD, TYPE, OUTPUT, INPUT]) if METHOD == 'avg4' or METHOD == 'combo4' or METHOD == 'wa': # ensembleModel(params[0]) pool.map(ensembleModel,params) pool.close() pool.join() else: # print "skip" # performsSlidingWindowForecast(params[0]) pool.map(performsSlidingWindowForecast, params) pool.close() pool.join() pool = ThreadPool(4) results = pool.map(performEvaluations, params) pool.close() pool.join() fileutils.writeCSV(OUTPUT+"results/"+TYPE+"_"+METHOD+".csv", results) print METHOD+" "+ TYPE + " complete" exit()
result = [] max = np.max(data) print filename, "started..." for strIndex in range(0,N-input_window - predic_window, step): if strIndex == 0: y = data[:input_window] model = Press_model.Press_model(y, maximum=max) model.fit() else: # y = data[strIndex:strIndex+input_window,1] y = data[input_window + strIndex - step:input_window + strIndex] model.update(y) y_pred = model.predict(predic_window) y_pred[y_pred<0] = minimum result.append(y_pred) res = np.zeros((len(result),predic_window)) for i in range(len(result)): res[i,:len(result[i])] = result[i] f = filename.split('/')[-1] fileutils.writeCSV("d:/data/"+TYPE+"_press/"+f, np.atleast_2d(res)) print filename, "complete!" if __name__ == '__main__': aggregatedRmse = None pool = ThreadPool(4) files = fileutils.getFilelist("D:/data/"+TYPE) pool.map(performsSlidingWindowForecast, files) pool.close() pool.join()
data = np.nan_to_num(np.genfromtxt(filename, delimiter=',', skip_header=1)[:,1]).ravel() minimum = np.percentile(data,minpercentile) N = len(data) result = [] max = np.max(data) print filename, "started..." for strIndex in range(0,N-input_window - predic_window, step): if strIndex == 0: y = data[:input_window] model = Markov_model.Markov_model(y, maximum=max, order=order_) model.fit() else: # y = data[strIndex:strIndex+input_window,1] y = data[input_window + strIndex - step:input_window + strIndex] model.update(y) y_pred = model.predict(predic_window) y_pred[y_pred<0] = minimum result.append(y_pred.ravel()) f = filename.split('/')[-1] fileutils.writeCSV("d:/data/cpu_markov"+str(order_)+"/"+f, np.atleast_2d(result)) print filename, "complete!" if __name__ == '__main__': aggregatedRmse = None pool = ThreadPool(4) files = fileutils.getFilelist("D:/data/cpu") performsSlidingWindowForecast(files[0]) # pool.map(performsSlidingWindowForecast, files) # pool.close() # pool.join()
if (time >= strTime and time < endTime): row_f = np.float_(row[1]) aggregatedData[x, 1] += row_f else: strTime = endTime endTime += 300e6 x += 1 aggregatedData[x, 0] = strTime while x < numberOfRows: strTime = endTime endTime += 300e6 aggregatedData[x, 0] = strTime x += 1 fileutils.writeCSV(outputDir + '/' + resource + '_' + fileCsv, aggregatedData, header=('Time', resource.capitalize())) if __name__ == '__main__': # ids = np.genfromtxt('d:/data/machineIDs.csv',dtype=str) # machines = machines4[100:200] # extractMachineData() for f in fileutils.getFilelist("D:/data/perMachine5"): print f readAndAggregate(f, "d:/data/cpu5", 'cpu') readAndAggregate(f, "d:/data/memory5", 'memory')
n.addConnection(FullConnection(n['in'], n['hidden'], name = 'in_to_hidden')) n.addConnection(FullConnection(n['hidden'], n['out'], name = 'hidden_to_out')) n.addRecurrentConnection(FullConnection(n['hidden'], n['context'])) rnet = n rnet.sortModules() trainer = BackpropTrainer(n, trainds, learningrate=eta, weightdecay=lmda, momentum=0.1, shuffle=False) trainer.trainEpochs(epochs) pred = np.nan_to_num(n.activateOnDataset(validds)) validerr = eval.calc_RMSE(validds['target'], pred) varscore = explained_variance_score(validds['target'], pred) return validerr, varscore, n if __name__ == '__main__': files = fileutils.getFilelist("D:/Wikipage data/network") # target_files = ['cpu_1095481','cpu_1303745','cpu_1335782','cpu_1338948','cpu_1442486585','cpu_155313295','cpu_1664088958','cpu_317488701','cpu_317499484','cpu_3858945898','cpu_4304743890','cpu_4820238819','cpu_5796442','cpu_660404','cpu_711355','cpu_717319','cpu_904514','cpu_905062','cpu_907812'] for machine in files[30:]: # for machine in target_files[16:19]: machine = machine.replace('.csv','').split('/')[-1] # machine = 'cpu_1095481' print(machine) # data = np.genfromtxt("d:/data/cpu5/"+machine+".csv",skip_header=1, delimiter=',',usecols=(1)) data = np.nan_to_num(np.genfromtxt("D:/Wikipage data/network/"+machine)).ravel() data = data/np.max(data) miniters=100 maxiters=1000
data = np.nan_to_num(np.genfromtxt(filename, delimiter=',', skip_header=1)[:,1]).ravel() minimum = np.percentile(data,minpercentile) N = len(data) result = [] print filename, "started..." for strIndex in range(0,N-input_window - predic_window, step): if strIndex == 0: y = data[:input_window] model = AR_model.AR_model(y, order=30) model.fit() else: y = data[strIndex:strIndex+input_window] model.update(y) y_pred = model.predict(predic_window) y_pred[y_pred[:,0]<0,0] = minimum result.append(y_pred[:,0]) f = filename.split('/')[-1] fileutils.writeCSV("d:/data/"+TYPE+"_ar/"+f, np.atleast_2d(result)) print filename, "complete!" if __name__ == '__main__': aggregatedRmse = None pool = ThreadPool(4) files = fileutils.getFilelist("D:/data/"+TYPE) # print files # performsSlidingWindowForecast(files[0]) pool.map(performsSlidingWindowForecast, files) pool.close() pool.join()
result = [] print filename, "started..." for strIndex in range(0, N - input_window - predic_window, step): if strIndex == 0: y = data[:input_window] model = Wavelet_model(y) model.fit() else: y = data[strIndex : strIndex + input_window] model.update(y) y_pred = model.predict(predic_window) y_pred[y_pred < 0] = minimum result.append(y_pred) f = filename.split("/")[-1] fileutils.writeCSV("d:/data/" + TYPE + "_agile/" + f, np.atleast_2d(result)) print filename, "complete!" if __name__ == "__main__": aggregatedRmse = None pool = ThreadPool(4) root = "D:/data/" + TYPE files = fileutils.getFilelist(root) # performsSlidingWindowForecast(files[1]) pool.map(performsSlidingWindowForecast, files) pool.close() pool.join()
N = len(data) result = [] print filename, "started..." y = data[0:input_window].tolist() model = HW_model.HW_model(y, minimum, 'additive') for strIndex in range(0, N - input_window - predic_window, step): if strIndex == 0: model.fit() else: y = data[strIndex:strIndex + input_window].tolist() model.update(y) y_pred = model.predict(fc=predic_window) result.append(y_pred) f = filename.split('/')[-1] fileutils.writeCSV("D:/Wikipage data/pageviews_hw/" + f, np.atleast_2d(result)) print filename, "complete!" if __name__ == '__main__': aggregatedRmse = None pool = ThreadPool(4) files = fileutils.getFilelist("D:/Wikipage data/pageviews") # performsSlidingWindowForecast(files[1]) pool.map(performsSlidingWindowForecast, files) pool.close() pool.join()
for row in resourcePerTask[:]: time = np.float_(row[0]) if (time>=strTime and time<endTime): row_f = np.float_(row[1]) aggregatedData[x,1] += row_f else: strTime = endTime endTime += 300e6 x +=1 aggregatedData[x,0] = strTime while x < numberOfRows: strTime = endTime endTime += 300e6 aggregatedData[x,0] = strTime x +=1 fileutils.writeCSV(outputDir+'/' + resource + '_' +fileCsv, aggregatedData, header=('Time',resource.capitalize())) if __name__ == '__main__': # ids = np.genfromtxt('d:/data/machineIDs.csv',dtype=str) # machines = machines4[100:200] # extractMachineData() for f in fileutils.getFilelist("D:/data/perMachine5"): print f readAndAggregate(f, "d:/data/cpu5", 'cpu') readAndAggregate(f, "d:/data/memory5", 'memory')
result = [] print filename, "started..." for strIndex in range(0, N - input_window - predic_window, step): if strIndex == 0: y = data[:input_window] model = Wavelet_model(y) model.fit() else: y = data[strIndex:strIndex + input_window] model.update(y) y_pred = model.predict(predic_window) y_pred[y_pred < 0] = minimum result.append(y_pred) f = filename.split('/')[-1] fileutils.writeCSV("d:/data/" + TYPE + "_agile/" + f, np.atleast_2d(result)) print filename, "complete!" if __name__ == '__main__': aggregatedRmse = None pool = ThreadPool(4) root = "D:/data/" + TYPE files = fileutils.getFilelist(root) # performsSlidingWindowForecast(files[1]) pool.map(performsSlidingWindowForecast, files) pool.close() pool.join()
n.addConnection(FullConnection(n['in'], n['hidden'], name = 'in_to_hidden')) n.addConnection(FullConnection(n['hidden'], n['out'], name = 'hidden_to_out')) n.addRecurrentConnection(FullConnection(n['hidden'], n['context'])) rnet = n rnet.sortModules() trainer = BackpropTrainer(n, trainds, learningrate=eta, weightdecay=lmda, momentum=0.1, shuffle=False) trainer.trainEpochs(epochs) pred = np.nan_to_num(n.activateOnDataset(validds)) validerr = eval.calc_RMSE(validds['target'], pred) varscore = explained_variance_score(validds['target'], pred) return validerr, varscore, n if __name__ == '__main__': files = fileutils.getFilelist("../data/cpuRate") # target_files = ['cpu_1095481','cpu_1303745','cpu_1335782','cpu_1338948','cpu_1442486585','cpu_155313295','cpu_1664088958','cpu_317488701','cpu_317499484','cpu_3858945898','cpu_4304743890','cpu_4820238819','cpu_5796442','cpu_660404','cpu_711355','cpu_717319','cpu_904514','cpu_905062','cpu_907812'] for machine in files[98:100]: # for machine in target_files[16:19]: machine = machine.strip('.csv').split('/')[-1] # machine = 'cpu_1095481' print(machine) data = np.genfromtxt("../data/cpuRate/"+machine+".csv",skip_header=1, delimiter=',',usecols=(1)) miniters=100 maxiters=1000 TRAIN = 1000 VALID = 100