Esempio n. 1
0
def extractMachineData():
    datafiles = fileutils.getFilelist(
        "D:/googleClusterData/clusterdata-2011-1/task_usage")
    machineUsage = {}

    startAt = 0

    for machine in machines:
        machineUsage[machine] = []

    for datafile in datafiles[startAt:]:
        print datafile
        for row in fileutils.getCsvRows(datafile):
            curMachine = row[4]
            if curMachine in machines:
                machineUsage[curMachine].append(row)

        for machine in machineUsage.keys():
            if startAt == 0:
                fileutils.writeCSV("d:/data/perMachine5/" + machine + ".csv",
                                   machineUsage[machine])
            else:
                fileutils.writeCSV("d:/data/perMachine5/" + machine + ".csv",
                                   machineUsage[machine],
                                   mode='ab')

        startAt += 1
        machineUsage.clear()
        for machine in machines:
            machineUsage[machine] = []
Esempio n. 2
0
def extractMachineData():
    datafiles = fileutils.getFilelist("D:/googleClusterData/clusterdata-2011-1/task_usage")
    machineUsage = {}
    
    startAt = 0;
    
    for machine in machines:
        machineUsage[machine] = []
    
    for datafile in datafiles[startAt:]:
        print datafile
        for row in fileutils.getCsvRows(datafile):
            curMachine = row[4]
            if curMachine in machines:
                machineUsage[curMachine].append(row)
    
        for machine in machineUsage.keys():
            if startAt == 0:
                fileutils.writeCSV("d:/data/perMachine5/"+machine+".csv", machineUsage[machine])
            else:
                fileutils.writeCSV("d:/data/perMachine5/"+machine+".csv", machineUsage[machine], mode='ab')
                 
        startAt += 1         
        machineUsage.clear()
        for machine in machines:
            machineUsage[machine] = []
Esempio n. 3
0
    trainer = BackpropTrainer(n,
                              trainds,
                              learningrate=eta,
                              weightdecay=lmda,
                              momentum=0.1,
                              shuffle=False)
    trainer.trainEpochs(epochs)
    pred = np.nan_to_num(n.activateOnDataset(validds))
    validerr = eval.calc_RMSE(validds['target'], pred)
    varscore = explained_variance_score(validds['target'], pred)
    return validerr, varscore, n


if __name__ == '__main__':

    files = fileutils.getFilelist("../data/cpuRate")
    #     target_files = ['cpu_1095481','cpu_1303745','cpu_1335782','cpu_1338948','cpu_1442486585','cpu_155313295','cpu_1664088958','cpu_317488701','cpu_317499484','cpu_3858945898','cpu_4304743890','cpu_4820238819','cpu_5796442','cpu_660404','cpu_711355','cpu_717319','cpu_904514','cpu_905062','cpu_907812']

    for machine in files[98:100]:
        #     for machine in target_files[16:19]:

        machine = machine.strip('.csv').split('/')[-1]
        #     machine = 'cpu_1095481'
        print(machine)

        data = np.genfromtxt("../data/cpuRate/" + machine + ".csv",
                             skip_header=1,
                             delimiter=',',
                             usecols=(1))

        miniters = 100
    data = data/np.max(data)
    minimum = np.percentile(data,minpercentile)
    N = len(data)
    result = []
    print filename, "started..."
    y = data[0:input_window].tolist()
    model = HW_model.HW_model(y, minimum, 'additive')
    
    for strIndex in range(0,N-input_window - predic_window, step):
        if strIndex == 0:
            model.fit()
        else:
            y = data[strIndex:strIndex+input_window].tolist()
            model.update(y)
              
        y_pred = model.predict(fc=predic_window)
        result.append(y_pred)
    
    f = filename.split('/')[-1]
    fileutils.writeCSV("D:/Wikipage data/pageviews_hw/"+f, np.atleast_2d(result))
    print filename, "complete!"

if __name__ == '__main__':
    aggregatedRmse = None
    pool = ThreadPool(4)
    files =  fileutils.getFilelist("D:/Wikipage data/pageviews")
    
#     performsSlidingWindowForecast(files[1])
    pool.map(performsSlidingWindowForecast, files)
    pool.close()
    pool.join()
Esempio n. 5
0
def main():
    global METHOD
    global TYPE
    global OUTPUT
    global INPUT
    # Launch main menu
#     main_menu()
    print "Comparing Forecasting methods:\n"
    print "Please complete the following:"  
    print "Enter the type of data used in the evaluation:"
    TYPE = raw_input(" >>  ")
    
    print "The base INPUT directory is:", INPUT+TYPE
    print "and OUTPUT directory is:", OUTPUT+TYPE
    print "Would you like to change it? y or n" 
    ch = raw_input(" >>  ").lower()
    if ch == 'y':
        print "Enter the base path for the INPUT directory (without the type):"
        INPUT = raw_input(" >>  ")
        if not os.path.isdir(INPUT):
            print "Error: Please try again; INPUT directory:"
            INPUT = raw_input(" >>  ")
            
        print "And enter the base path for evaluation OUTPUT directory (without the type):"
        OUTPUT = raw_input(" >>  ")
        if not os.path.isdir(OUTPUT):
            print "Error: Please try again; evaluation OUTPUT directory:"
            OUTPUT = raw_input(" >>  ")
        
#########
    print "Please choose a method to evaluate:"
    print "1. Holt-Winters"
    print "2. Auto-regression"
    print "3. 1st Markov chain"
    print "4. 2nd Markov chain"
    print "5. PRESS"
    print "6. Agile"
    print "7. FFNN Model"
    print "8. RNN Model"
    print "9. Entwine Model"
    print "10. Moving Average"
    print "11. Average combo4 Model"
    print "12. FFNN combo4 Model"
    print "13. Weighted Average model"
    print "0. Quit"
    choice = raw_input(" >>  ")
    ch = choice.lower();
    
    if ch == '':
        menu_actions['main_menu']()
    elif ch == '0':
        exit()
    else:
        METHOD = methods_dict[ch]

        pool = ThreadPool(4)
        files =  fileutils.getFilelist(INPUT+TYPE)
                
        params = []  
        if METHOD =='fnn':
            if TYPE.startswith("memory"):
                hyperpath = "../data/"+TYPE.replace("memory", "cpu")+"_networks/hyperparams.csv"
            else:
                hyperpath = "../data/"+TYPE+"_networks/hyperparams.csv"
            hyperparms =  np.genfromtxt(hyperpath, delimiter=',', dtype=None)
            for curRow in hyperparms:
                if TYPE.startswith("memory"):
                    params.append([INPUT+TYPE+'/'+curRow[0].replace("cpu", "memory").strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]])
                else:
                    params.append([INPUT+TYPE+'/'+curRow[0].strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]])
                    
        elif METHOD =='rnn':
            
            if TYPE.startswith("memory"):
                hyperpath = "../data/"+TYPE.replace("memory", "cpu")+"_rnn_networks/hyperparams.csv"
            else:
                hyperpath = "../data/"+TYPE+"_rnn_networks/hyperparams.csv"
            hyperparms =  np.genfromtxt(hyperpath, delimiter=',', dtype=None)
            for curRow in hyperparms:
                if TYPE.startswith("memory"):
                    params.append([INPUT+TYPE+'/'+curRow[0].replace("cpu", "memory").strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]])
                else:
                    params.append([INPUT+TYPE+'/'+curRow[0].strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]])
                    
        elif METHOD == 'entwine':
            hyperpath = "../data/entwine_networks/hyperparams.csv"
            hyperparms =  np.genfromtxt(hyperpath, delimiter=',', dtype=None)
            for curRow in hyperparms:
                params.append([INPUT+TYPE+'/'+curRow[0].strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]])
                
        else:
            for f in files:
                params.append([f, METHOD, TYPE, OUTPUT, INPUT])
        
        if METHOD == 'avg4' or METHOD == 'combo4' or METHOD == 'wa':
#             ensembleModel(params[0])
            pool.map(ensembleModel,params)
            pool.close()
            pool.join()
        else:
#             print "skip"
#             performsSlidingWindowForecast(params[0])
            pool.map(performsSlidingWindowForecast, params)
            pool.close()
            pool.join()
                  
        pool = ThreadPool(4)
        results = pool.map(performEvaluations, params)
        pool.close()
        pool.join()
        fileutils.writeCSV(OUTPUT+"results/"+TYPE+"_"+METHOD+".csv", results)
        print METHOD+" "+ TYPE + " complete"
          
        exit()
Esempio n. 6
0
    result = []
    max = np.max(data)
    print filename, "started..."
    for strIndex in range(0,N-input_window - predic_window, step):
        if strIndex == 0:
            y = data[:input_window]
            model = Press_model.Press_model(y, maximum=max)
            model.fit()
        else:
#             y = data[strIndex:strIndex+input_window,1]
            y = data[input_window + strIndex - step:input_window + strIndex]
            model.update(y)
        y_pred = model.predict(predic_window)
        y_pred[y_pred<0] = minimum
        result.append(y_pred)
    res = np.zeros((len(result),predic_window))
    
    for i in range(len(result)):
        res[i,:len(result[i])] = result[i] 
    f = filename.split('/')[-1]
    fileutils.writeCSV("d:/data/"+TYPE+"_press/"+f, np.atleast_2d(res))
    print filename, "complete!"

if __name__ == '__main__':
    aggregatedRmse = None
    pool = ThreadPool(4)
    files =  fileutils.getFilelist("D:/data/"+TYPE)
    
    pool.map(performsSlidingWindowForecast, files)
    pool.close()
    pool.join()
Esempio n. 7
0
    data = np.nan_to_num(np.genfromtxt(filename, delimiter=',', skip_header=1)[:,1]).ravel()
    minimum = np.percentile(data,minpercentile)
    N = len(data)
    result = []
    max = np.max(data)
    print filename, "started..."
    for strIndex in range(0,N-input_window - predic_window, step):
        if strIndex == 0:
            y = data[:input_window]
            model = Markov_model.Markov_model(y, maximum=max, order=order_)
            model.fit()
        else:
#             y = data[strIndex:strIndex+input_window,1]
            y = data[input_window + strIndex - step:input_window + strIndex]
            model.update(y)
              
        y_pred = model.predict(predic_window)
        y_pred[y_pred<0] = minimum
        result.append(y_pred.ravel())
    f = filename.split('/')[-1]
    fileutils.writeCSV("d:/data/cpu_markov"+str(order_)+"/"+f, np.atleast_2d(result))
    print filename, "complete!"

if __name__ == '__main__':
    aggregatedRmse = None
    pool = ThreadPool(4)
    files =  fileutils.getFilelist("D:/data/cpu")
    performsSlidingWindowForecast(files[0])
#     pool.map(performsSlidingWindowForecast, files)
#     pool.close()
#     pool.join()
Esempio n. 8
0
        if (time >= strTime and time < endTime):
            row_f = np.float_(row[1])
            aggregatedData[x, 1] += row_f

        else:
            strTime = endTime
            endTime += 300e6
            x += 1
            aggregatedData[x, 0] = strTime

    while x < numberOfRows:
        strTime = endTime
        endTime += 300e6
        aggregatedData[x, 0] = strTime
        x += 1

    fileutils.writeCSV(outputDir + '/' + resource + '_' + fileCsv,
                       aggregatedData,
                       header=('Time', resource.capitalize()))


if __name__ == '__main__':
    #     ids = np.genfromtxt('d:/data/machineIDs.csv',dtype=str)
    #     machines = machines4[100:200]
    #     extractMachineData()

    for f in fileutils.getFilelist("D:/data/perMachine5"):
        print f
        readAndAggregate(f, "d:/data/cpu5", 'cpu')
        readAndAggregate(f, "d:/data/memory5", 'memory')
    n.addConnection(FullConnection(n['in'], n['hidden'], name = 'in_to_hidden'))
    n.addConnection(FullConnection(n['hidden'], n['out'], name = 'hidden_to_out'))
    n.addRecurrentConnection(FullConnection(n['hidden'], n['context']))
    rnet = n
    rnet.sortModules()
    
    trainer = BackpropTrainer(n, trainds, learningrate=eta, weightdecay=lmda, momentum=0.1, shuffle=False)
    trainer.trainEpochs(epochs)
    pred = np.nan_to_num(n.activateOnDataset(validds))
    validerr = eval.calc_RMSE(validds['target'], pred)
    varscore = explained_variance_score(validds['target'], pred)
    return validerr, varscore, n

if __name__ == '__main__':
    
    files =  fileutils.getFilelist("D:/Wikipage data/network")
#     target_files = ['cpu_1095481','cpu_1303745','cpu_1335782','cpu_1338948','cpu_1442486585','cpu_155313295','cpu_1664088958','cpu_317488701','cpu_317499484','cpu_3858945898','cpu_4304743890','cpu_4820238819','cpu_5796442','cpu_660404','cpu_711355','cpu_717319','cpu_904514','cpu_905062','cpu_907812']

    for machine in files[30:]:   
#     for machine in target_files[16:19]:
        
        machine = machine.replace('.csv','').split('/')[-1]
    #     machine = 'cpu_1095481'
        print(machine)
        
#         data = np.genfromtxt("d:/data/cpu5/"+machine+".csv",skip_header=1, delimiter=',',usecols=(1))
        data = np.nan_to_num(np.genfromtxt("D:/Wikipage data/network/"+machine)).ravel()
        data = data/np.max(data)
        
        miniters=100
        maxiters=1000 
Esempio n. 10
0
    data = np.nan_to_num(np.genfromtxt(filename, delimiter=',', skip_header=1)[:,1]).ravel()

    minimum = np.percentile(data,minpercentile)
    N = len(data)
    result = []
    print filename, "started..."
    for strIndex in range(0,N-input_window - predic_window, step):
        if strIndex == 0:
            y = data[:input_window]
            model = AR_model.AR_model(y, order=30)
            model.fit()
        else:
            y = data[strIndex:strIndex+input_window]
            model.update(y)
              
        y_pred = model.predict(predic_window)
        y_pred[y_pred[:,0]<0,0] = minimum
        result.append(y_pred[:,0])
    f = filename.split('/')[-1]
    fileutils.writeCSV("d:/data/"+TYPE+"_ar/"+f, np.atleast_2d(result))
    print filename, "complete!"

if __name__ == '__main__':
    aggregatedRmse = None
    pool = ThreadPool(4)
    files =  fileutils.getFilelist("D:/data/"+TYPE)
#     print files
#     performsSlidingWindowForecast(files[0])
    pool.map(performsSlidingWindowForecast, files)
    pool.close()
    pool.join()
    result = []
    print filename, "started..."
    for strIndex in range(0, N - input_window - predic_window, step):
        if strIndex == 0:
            y = data[:input_window]
            model = Wavelet_model(y)
            model.fit()
        else:
            y = data[strIndex : strIndex + input_window]
            model.update(y)

        y_pred = model.predict(predic_window)
        y_pred[y_pred < 0] = minimum
        result.append(y_pred)
    f = filename.split("/")[-1]
    fileutils.writeCSV("d:/data/" + TYPE + "_agile/" + f, np.atleast_2d(result))
    print filename, "complete!"


if __name__ == "__main__":
    aggregatedRmse = None
    pool = ThreadPool(4)
    root = "D:/data/" + TYPE
    files = fileutils.getFilelist(root)

    #     performsSlidingWindowForecast(files[1])

    pool.map(performsSlidingWindowForecast, files)
    pool.close()
    pool.join()
    N = len(data)
    result = []
    print filename, "started..."
    y = data[0:input_window].tolist()
    model = HW_model.HW_model(y, minimum, 'additive')

    for strIndex in range(0, N - input_window - predic_window, step):
        if strIndex == 0:
            model.fit()
        else:
            y = data[strIndex:strIndex + input_window].tolist()
            model.update(y)

        y_pred = model.predict(fc=predic_window)
        result.append(y_pred)

    f = filename.split('/')[-1]
    fileutils.writeCSV("D:/Wikipage data/pageviews_hw/" + f,
                       np.atleast_2d(result))
    print filename, "complete!"


if __name__ == '__main__':
    aggregatedRmse = None
    pool = ThreadPool(4)
    files = fileutils.getFilelist("D:/Wikipage data/pageviews")

    #     performsSlidingWindowForecast(files[1])
    pool.map(performsSlidingWindowForecast, files)
    pool.close()
    pool.join()
Esempio n. 13
0
     
    for row in resourcePerTask[:]:
        time = np.float_(row[0])
        if (time>=strTime and time<endTime):
            row_f = np.float_(row[1])
            aggregatedData[x,1] += row_f
             
        else:
            strTime = endTime
            endTime += 300e6
            x +=1
            aggregatedData[x,0] = strTime
             
    while x < numberOfRows:
        strTime = endTime
        endTime += 300e6
        aggregatedData[x,0] = strTime
        x +=1
        
    fileutils.writeCSV(outputDir+'/' + resource + '_' +fileCsv, aggregatedData, header=('Time',resource.capitalize()))
  
if __name__ == '__main__':
#     ids = np.genfromtxt('d:/data/machineIDs.csv',dtype=str) 
#     machines = machines4[100:200]
#     extractMachineData()

    for f in fileutils.getFilelist("D:/data/perMachine5"):
        print f
        readAndAggregate(f, "d:/data/cpu5", 'cpu')
        readAndAggregate(f, "d:/data/memory5", 'memory')
Esempio n. 14
0
    result = []
    print filename, "started..."
    for strIndex in range(0, N - input_window - predic_window, step):
        if strIndex == 0:
            y = data[:input_window]
            model = Wavelet_model(y)
            model.fit()
        else:
            y = data[strIndex:strIndex + input_window]
            model.update(y)

        y_pred = model.predict(predic_window)
        y_pred[y_pred < 0] = minimum
        result.append(y_pred)
    f = filename.split('/')[-1]
    fileutils.writeCSV("d:/data/" + TYPE + "_agile/" + f,
                       np.atleast_2d(result))
    print filename, "complete!"


if __name__ == '__main__':
    aggregatedRmse = None
    pool = ThreadPool(4)
    root = "D:/data/" + TYPE
    files = fileutils.getFilelist(root)

    #     performsSlidingWindowForecast(files[1])

    pool.map(performsSlidingWindowForecast, files)
    pool.close()
    pool.join()
Esempio n. 15
0
    n.addConnection(FullConnection(n['in'], n['hidden'], name = 'in_to_hidden'))
    n.addConnection(FullConnection(n['hidden'], n['out'], name = 'hidden_to_out'))
    n.addRecurrentConnection(FullConnection(n['hidden'], n['context']))
    rnet = n
    rnet.sortModules()
    
    trainer = BackpropTrainer(n, trainds, learningrate=eta, weightdecay=lmda, momentum=0.1, shuffle=False)
    trainer.trainEpochs(epochs)
    pred = np.nan_to_num(n.activateOnDataset(validds))
    validerr = eval.calc_RMSE(validds['target'], pred)
    varscore = explained_variance_score(validds['target'], pred)
    return validerr, varscore, n

if __name__ == '__main__':
    
    files =  fileutils.getFilelist("../data/cpuRate")
#     target_files = ['cpu_1095481','cpu_1303745','cpu_1335782','cpu_1338948','cpu_1442486585','cpu_155313295','cpu_1664088958','cpu_317488701','cpu_317499484','cpu_3858945898','cpu_4304743890','cpu_4820238819','cpu_5796442','cpu_660404','cpu_711355','cpu_717319','cpu_904514','cpu_905062','cpu_907812']

    for machine in files[98:100]:   
#     for machine in target_files[16:19]:
        
        machine = machine.strip('.csv').split('/')[-1]
    #     machine = 'cpu_1095481'
        print(machine)
        
        data = np.genfromtxt("../data/cpuRate/"+machine+".csv",skip_header=1, delimiter=',',usecols=(1))
        
        miniters=100
        maxiters=1000 
        TRAIN = 1000
        VALID = 100