コード例 #1
0
def performsSlidingWindowForecast(filename, minpercentile=5, step=30, input_window=3000, predic_window=30):
    '''
    Input window = 250 hours = 250*12 = 3000 
    look ahead window 60 samples =  5 hours = 720min/5 = 60
    '''
    data = np.genfromtxt(filename)
    data = data/np.max(data)
    minimum = np.percentile(data,minpercentile)
    N = len(data)
    result = []
    print filename, "started..."
    y = data[0:input_window].tolist()
    model = HW_model.HW_model(y, minimum, 'additive')
    
    for strIndex in range(0,N-input_window - predic_window, step):
        if strIndex == 0:
            model.fit()
        else:
            y = data[strIndex:strIndex+input_window].tolist()
            model.update(y)
              
        y_pred = model.predict(fc=predic_window)
        result.append(y_pred)
    
    f = filename.split('/')[-1]
    fileutils.writeCSV("D:/Wikipage data/pageviews_hw/"+f, np.atleast_2d(result))
    print filename, "complete!"
コード例 #2
0
def performsSlidingWindowForecast(filename_eta_lmda, minpercentile=5, step=30, input_window=3000, predic_window=30):
    '''
    Input window = 250 hours = 250*12 = 3000 
    look ahead window 60 samples =  5 hours = 720min/5 = 60
    '''
    filename, curEta, curLmda = filename_eta_lmda
    data = np.genfromtxt(filename, delimiter=',', skip_header=1, usecols=(1))
    minimum = np.percentile(data,minpercentile)
    N = len(data)
    
    print filename, "started..."
    curMachine = filename.split('/')[-1]
    model = Fnn_model.Fnn_model(data=data, machineID = curMachine,netPath='../data/cpu2_networks/'+curMachine.replace(".csv",".xml"), eta=curEta, lmda=curLmda)
    model.fit()
    
    pred = []
#     lastFc = 0
    for p in range(input_window, len(data)-predic_window,predic_window):
        fc = np.array(model.predict(predic_window)).flatten()
#         fc[-1] = lastFc
#         lastFc = fc[0]
        fc[fc<0] = minimum
        pred.append(fc)
        model.update()
    
    pred = np.array(pred)
        
    f = filename.split('/')[-1]
    fileutils.writeCSV("d:/data/cpu2_fnn/"+f, np.atleast_2d(pred))
    print filename, "complete!"
コード例 #3
0
def performsSlidingWindowForecast(filename,
                                  minpercentile=5,
                                  step=30,
                                  input_window=3000,
                                  predic_window=30):
    '''
    Input window = 250 hours = 250*12 = 3000 
    look ahead window 60 samples =  5 hours = 720min/5 = 60
    '''
    data = np.nan_to_num(np.genfromtxt(filename, delimiter=',', skip_header=1))
    minimum = np.percentile(data[:, 1], minpercentile)
    N = len(data[:, 1])
    result = []
    print filename, "started..."
    for strIndex in range(0, N - input_window - predic_window, step):
        if strIndex == 0:
            y = data[:input_window, 1]
            model = Norm_model.Norm_model(y)
            model.fit()
        else:
            y = data[strIndex:strIndex + input_window, 1]
            model.update(y)

        y_pred = model.predict(predic_window)
        y_pred[y_pred < 0] = minimum
        result.append(y_pred)
    f = filename.split('/')[-1]
    fileutils.writeCSV("d:/data/cpu_norm_forecasts/" + f,
                       np.atleast_2d(result))
    print filename, "complete!"
コード例 #4
0
def extractMachineData():
    datafiles = fileutils.getFilelist("D:/googleClusterData/clusterdata-2011-1/task_usage")
    machineUsage = {}
    
    startAt = 0;
    
    for machine in machines:
        machineUsage[machine] = []
    
    for datafile in datafiles[startAt:]:
        print datafile
        for row in fileutils.getCsvRows(datafile):
            curMachine = row[4]
            if curMachine in machines:
                machineUsage[curMachine].append(row)
    
        for machine in machineUsage.keys():
            if startAt == 0:
                fileutils.writeCSV("d:/data/perMachine5/"+machine+".csv", machineUsage[machine])
            else:
                fileutils.writeCSV("d:/data/perMachine5/"+machine+".csv", machineUsage[machine], mode='ab')
                 
        startAt += 1         
        machineUsage.clear()
        for machine in machines:
            machineUsage[machine] = []
コード例 #5
0
def performsSlidingWindowForecast(filename, minpercentile=5, step=30, input_window=3000, predic_window=30):
    '''
    Input window = 250 hours = 250*12 = 3000 
    look ahead window 60 samples =  5 hours = 720min/5 = 60
    '''
#Wikidata
#     data = np.genfromtxt(filename)
#     data = data/np.max(data)
    data = np.nan_to_num(np.genfromtxt(filename, delimiter=',', skip_header=1)[:,1]).ravel()

    minimum = np.percentile(data,minpercentile)
    N = len(data)
    result = []
    print filename, "started..."
    for strIndex in range(0,N-input_window - predic_window, step):
        if strIndex == 0:
            y = data[:input_window]
            model = AR_model.AR_model(y, order=30)
            model.fit()
        else:
            y = data[strIndex:strIndex+input_window]
            model.update(y)
              
        y_pred = model.predict(predic_window)
        y_pred[y_pred[:,0]<0,0] = minimum
        result.append(y_pred[:,0])
    f = filename.split('/')[-1]
    fileutils.writeCSV("d:/data/"+TYPE+"_ar/"+f, np.atleast_2d(result))
    print filename, "complete!"
コード例 #6
0
def performsSlidingWindowForecast(filename, minpercentile=5, step=30, input_window=3000, predic_window=30):
    '''
    Input window = 250 hours = 250*12 = 3000 
    look ahead window 60 samples =  5 hours = 720min/5 = 60
    '''
#     Wikidata
#     data = np.genfromtxt(filename)
#     data = data/np.max(data)

    data = np.nan_to_num(np.genfromtxt(filename, delimiter=',', skip_header=1)[:,1]).ravel()
    minimum = np.percentile(data,minpercentile)
    N = len(data)
    result = []
    max = np.max(data)
    print filename, "started..."
    for strIndex in range(0,N-input_window - predic_window, step):
        if strIndex == 0:
            y = data[:input_window]
            model = Press_model.Press_model(y, maximum=max)
            model.fit()
        else:
#             y = data[strIndex:strIndex+input_window,1]
            y = data[input_window + strIndex - step:input_window + strIndex]
            model.update(y)
        y_pred = model.predict(predic_window)
        y_pred[y_pred<0] = minimum
        result.append(y_pred)
    res = np.zeros((len(result),predic_window))
    
    for i in range(len(result)):
        res[i,:len(result[i])] = result[i] 
    f = filename.split('/')[-1]
    fileutils.writeCSV("d:/data/"+TYPE+"_press/"+f, np.atleast_2d(res))
    print filename, "complete!"
コード例 #7
0
def performsSlidingWindowForecast(filename_eta_lmda, minpercentile=5, step=30, input_window=3000, predic_window=30):
    '''
    Input window = 250 hours = 250*12 = 3000 
    look ahead window 60 samples =  5 hours = 720min/5 = 60
    '''
    filename, curEta, curLmda = filename_eta_lmda
    data = np.genfromtxt(filename, delimiter=',', skip_header=1, usecols=(1))
    minimum = np.percentile(data,minpercentile)
    N = len(data)
    
    print filename, "started..."
    curMachine = filename.split('/')[-1][:-4]
#     curMachine = "cpu" + filename.split('/')[-1].strip('.csv')
    
    model = Rnn_model.Rnn_model(data=data, machineID = curMachine, eta=curEta, lmda=curLmda)
    model.fit()
    
    pred = []
    lastFc = None
    for p in range(input_window, len(data)-predic_window,predic_window):
        fc = model.predict(predic_window)
        if lastFc is not None:
            fc[0] = lastFc 
        lastFc = fc[-1]
        fc[fc<0] = minimum
        pred.append(fc)
        model.update()
    
    pred = np.array(pred).ravel()    
    f = filename.split('/')[-1]
    fileutils.writeCSV("d:/data/cpu_rnn_forecasts/"+f, np.atleast_2d(pred))
    print filename, "complete!"
コード例 #8
0
def performsSlidingWindowForecast(filename,
                                  minpercentile=5,
                                  step=30,
                                  input_window=3000,
                                  predic_window=30):
    '''
    Input window = 250 hours = 250*12 = 3000 
    look ahead window 60 samples =  5 hours = 720min/5 = 60
    '''
    data = np.genfromtxt(filename)
    data = data / np.max(data)
    minimum = np.percentile(data, minpercentile)
    N = len(data)
    result = []
    print filename, "started..."
    y = data[0:input_window].tolist()
    model = HW_model.HW_model(y, minimum, 'additive')

    for strIndex in range(0, N - input_window - predic_window, step):
        if strIndex == 0:
            model.fit()
        else:
            y = data[strIndex:strIndex + input_window].tolist()
            model.update(y)

        y_pred = model.predict(fc=predic_window)
        result.append(y_pred)

    f = filename.split('/')[-1]
    fileutils.writeCSV("D:/Wikipage data/pageviews_hw/" + f,
                       np.atleast_2d(result))
    print filename, "complete!"
コード例 #9
0
ファイル: dataplot.py プロジェクト: agravier/ForecastingCloud
def extractMachineData():
    datafiles = fileutils.getFilelist(
        "D:/googleClusterData/clusterdata-2011-1/task_usage")
    machineUsage = {}

    startAt = 0

    for machine in machines:
        machineUsage[machine] = []

    for datafile in datafiles[startAt:]:
        print datafile
        for row in fileutils.getCsvRows(datafile):
            curMachine = row[4]
            if curMachine in machines:
                machineUsage[curMachine].append(row)

        for machine in machineUsage.keys():
            if startAt == 0:
                fileutils.writeCSV("d:/data/perMachine5/" + machine + ".csv",
                                   machineUsage[machine])
            else:
                fileutils.writeCSV("d:/data/perMachine5/" + machine + ".csv",
                                   machineUsage[machine],
                                   mode='ab')

        startAt += 1
        machineUsage.clear()
        for machine in machines:
            machineUsage[machine] = []
コード例 #10
0
def performsSlidingWindowForecast(filename, minpercentile=5, step=30, input_window=3000, predic_window=30, order_=1):
    '''
    Input window = 250 hours = 250*12 = 3000 
    look ahead window 60 samples =  5 hours = 720min/5 = 60
    '''
#     data = np.genfromtxt(filename)
#     data = data/np.max(data)
    data = np.nan_to_num(np.genfromtxt(filename, delimiter=',', skip_header=1)[:,1]).ravel()
    minimum = np.percentile(data,minpercentile)
    N = len(data)
    result = []
    max = np.max(data)
    print filename, "started..."
    for strIndex in range(0,N-input_window - predic_window, step):
        if strIndex == 0:
            y = data[:input_window]
            model = Markov_model.Markov_model(y, maximum=max, order=order_)
            model.fit()
        else:
#             y = data[strIndex:strIndex+input_window,1]
            y = data[input_window + strIndex - step:input_window + strIndex]
            model.update(y)
              
        y_pred = model.predict(predic_window)
        y_pred[y_pred<0] = minimum
        result.append(y_pred.ravel())
    f = filename.split('/')[-1]
    fileutils.writeCSV("d:/data/cpu_markov"+str(order_)+"/"+f, np.atleast_2d(result))
    print filename, "complete!"
コード例 #11
0
ファイル: dataplot.py プロジェクト: agravier/ForecastingCloud
def readAndAggregate(filename, outputDir, resource='cpu'):
    colomn = 1  # start time
    if resource == 'cpu':
        colomn = 5
    elif resource == 'memory':
        colomn = 7
    elif resource == 'diskIO':
        colomn = 11

    resourcePerTask = np.genfromtxt(filename,
                                    delimiter=',',
                                    skiprows=0,
                                    usecols=(0, colomn),
                                    filling_values='0')

    fileCsv = filename.split('/')[-1]

    strTime = 600e6
    endTime = strTime + 300e6
    globalEndTime = 2506200000000

    numberOfRows = globalEndTime / 300e6
    aggregatedData = np.zeros([numberOfRows, 2], dtype=float)
    x = 0

    aggregatedData[x, 0] = strTime

    for row in resourcePerTask[:]:
        time = np.float_(row[0])
        if (time >= strTime and time < endTime):
            row_f = np.float_(row[1])
            aggregatedData[x, 1] += row_f

        else:
            strTime = endTime
            endTime += 300e6
            x += 1
            aggregatedData[x, 0] = strTime

    while x < numberOfRows:
        strTime = endTime
        endTime += 300e6
        aggregatedData[x, 0] = strTime
        x += 1

    fileutils.writeCSV(outputDir + '/' + resource + '_' + fileCsv,
                       aggregatedData,
                       header=('Time', resource.capitalize()))
コード例 #12
0
def readAndAggregate(filename, outputDir, resource='cpu'):
    colomn = 1 # start time
    if resource == 'cpu':
        colomn = 5;
    elif resource == 'memory':
        colomn = 7
    elif resource == 'diskIO':
        colomn = 11 
    
        
    resourcePerTask = np.genfromtxt(filename, delimiter=',', skiprows=0, usecols=(0,colomn), filling_values = '0')
    
    fileCsv = filename.split('/')[-1]
    
    strTime = 600e6
    endTime = strTime + 300e6
    globalEndTime = 2506200000000
    
    numberOfRows = globalEndTime/300e6
    aggregatedData = np.zeros([numberOfRows, 2], dtype=float)
    x = 0
    
    aggregatedData[x,0] = strTime
     
    for row in resourcePerTask[:]:
        time = np.float_(row[0])
        if (time>=strTime and time<endTime):
            row_f = np.float_(row[1])
            aggregatedData[x,1] += row_f
             
        else:
            strTime = endTime
            endTime += 300e6
            x +=1
            aggregatedData[x,0] = strTime
             
    while x < numberOfRows:
        strTime = endTime
        endTime += 300e6
        aggregatedData[x,0] = strTime
        x +=1
        
    fileutils.writeCSV(outputDir+'/' + resource + '_' +fileCsv, aggregatedData, header=('Time',resource.capitalize()))
コード例 #13
0
def performsSlidingWindowForecast(filename_eta_lmda,
                                  minpercentile=5,
                                  step=30,
                                  input_window=3000,
                                  predic_window=30):
    '''
    Input window = 250 hours = 250*12 = 3000 
    look ahead window 60 samples =  5 hours = 720min/5 = 60
    '''
    filename, curEta, curLmda = filename_eta_lmda
    data = np.genfromtxt(filename, delimiter=',', skip_header=1, usecols=(1))
    minimum = np.percentile(data, minpercentile)
    N = len(data)

    print filename, "started..."
    curMachine = filename.split('/')[-1][:-4]
    #     curMachine = "cpu" + filename.split('/')[-1].strip('.csv')

    model = Rnn_model.Rnn_model(data=data,
                                machineID=curMachine,
                                eta=curEta,
                                lmda=curLmda)
    model.fit()

    pred = []
    lastFc = None
    for p in range(input_window, len(data) - predic_window, predic_window):
        fc = model.predict(predic_window)
        if lastFc is not None:
            fc[0] = lastFc
        lastFc = fc[-1]
        fc[fc < 0] = minimum
        pred.append(fc)
        model.update()

    pred = np.array(pred).ravel()
    f = filename.split('/')[-1]
    fileutils.writeCSV("d:/data/cpu_rnn_forecasts/" + f, np.atleast_2d(pred))
    print filename, "complete!"
コード例 #14
0
#     truevals = np.divide(truevals, np.max(truevals))
    
    threshold =  np.percentile(truevals, overload_percentile)
    
    cur_results.append(eval.calc_RMSE(truevals[train_window:], forecasts))
    for val in eval.calc_upper_lower_acc(truevals[train_window:], forecasts):
        cur_results.append(val) 
    for val in eval.calc_persample_accuracy(truevals[train_window:], forecasts, threshold):
        cur_results.append(val)
    for val in eval.calc_overload_states_acc(truevals[train_window:], forecasts, threshold):
        cur_results.append(val)
        
    return cur_results

if __name__ == '__main__':
    files = []
    
    root = "d:/data/"+TYPE+"/"
    for _, _, fs in os.walk(root):
        for f in fs:
            if f.endswith(".csv"):
                files.append(f)          
    pool = ThreadPool(4)
    
#     performEvaluations(files[0])
    results = pool.map(performEvaluations, files)
    pool.close()
    pool.join()
     
    fileutils.writeCSV("d:/data/results/"+TYPE+"_"+METHOD+".csv", results)
    print METHOD+" "+ TYPE + " complete"
コード例 #15
0
def performEvaluations(filename, train_window = 3000, overload_dur = 5, overload_percentile = 70, steps=30):
    cur_results = []
    forecasts = np.genfromtxt("d:/data/cpu_norm_forecasts/"+ filename,delimiter=',',usecols=range(0,steps)).ravel()
    truevals = np.genfromtxt("d:/data/cpuRate/"+filename, delimiter=',',skip_header=1)[train_window:train_window+len(forecasts),1]
    
    threshold =  np.percentile(truevals, overload_percentile)
    
    cur_results.append(eval.calc_RMSE(truevals, forecasts))
    for val in eval.calc_upper_lower_acc(truevals, forecasts):
        cur_results.append(val) 
    for val in eval.calc_persample_accuracy(truevals, forecasts, threshold):
        cur_results.append(val)
    for val in eval.calc_overload_states_acc(truevals, forecasts, threshold):
        cur_results.append(val)
        
    return cur_results

if __name__ == '__main__':
    files = []
    for _, _, fs in os.walk("d:/data/cpuRate/"):
        for f in fs:
            if f.endswith(".csv"):
                files.append(f)
    
    pool = ThreadPool(4)
    results = pool.map(performEvaluations, files)
    pool.close()
    pool.join()
    
    fileutils.writeCSV("d:/data/results/normaldist.csv", results)
コード例 #16
0
ファイル: UI.py プロジェクト: Manrich121/ForecastingCloud
def performsSlidingWindowForecast(params, minpercentile=5, training_window=30, input_window=3000, predic_window=30):
    '''
    Input window = 250 hours = 250*12 = 3000 
    look ahead window 60 samples =  5 hours = 720min/5 = 60
    
    '''
    filename, METHOD, TYPE, OUTPUT = params[0:4]
#Wikidata
#     data = np.genfromtxt(filename)
#     data = data/np.max(data)
    lastFc = None
    if TYPE == 'pageviews' or TYPE == 'network':
        data = np.nan_to_num(np.genfromtxt(filename.replace(".csv",""))).ravel()
        data = data/np.max(data)
    else:
        data = np.nan_to_num(np.genfromtxt(filename, delimiter=',', skip_header=1)[:,1]).ravel()
    minimum = np.percentile(data,minpercentile)
    N = len(data)
    result = []
    print filename, "started..."
    for strIndex in range(0,N-input_window - predic_window, predic_window):
        if strIndex == 0:
            y = data[:input_window]
            if METHOD == 'ar':
                model = AR_model.AR_model(y, order=training_window)
            elif METHOD == 'ma':
                model = MA_model.MA_model(y,order=training_window)
            elif METHOD == 'hw':
                model = HW_model.HW_model(y, minimum, 'additive')
            elif METHOD == 'markov1':
                model = Markov_model.Markov_model(y, order=1)
            elif METHOD == 'markov2':
                model = Markov_model.Markov_model(y, order=2)
            elif METHOD == 'press':
                model = Press_model.Press_model(y)
            elif METHOD == 'agile':
                model = Wavelet_model.Wavelet_model(y)
            elif METHOD == 'fnn':
                filename, METHOD, TYPE, OUTPUT, INPUT, curEta, curLmda = params[:7]
                curMachine = filename.split('/')[-1].replace(".csv",".xml")
                if TYPE.startswith("memory"):
                    curMachine = curMachine.replace("memory", "cpu")
                    model = Fnn_model.Fnn_model(data=data, machineID = curMachine, netPath="../data/"+TYPE.replace("memory", "cpu")+"_networks/"+curMachine, eta=curEta, lmda=curLmda)
                else:
                    model = Fnn_model.Fnn_model(data=data, machineID = curMachine, netPath="../data/"+TYPE+"_networks/"+curMachine, eta=curEta, lmda=curLmda)
            elif METHOD == 'rnn':
                filename, METHOD, TYPE, OUTPUT, INPUT, curEta, curLmda = params[:7]
                curMachine = filename.split('/')[-1]
                if TYPE.startswith("memory"):
                    curMachine = curMachine.replace("memory", "cpu")
                    model = Rnn_model.Rnn_model(data=data, machineID = curMachine, netPath="../data/"+TYPE.replace("memory", "cpu")+"_networks/"+curMachine.replace(".csv",".xml"), eta=curEta, lmda=curLmda)
                else:
                    model = Rnn_model.Rnn_model(data=data, machineID = curMachine, netPath="../data/"+TYPE+"_rnn_networks/"+curMachine.replace(".csv",".xml"), eta=curEta, lmda=curLmda)
            elif METHOD == 'entwine':
                filename, METHOD, TYPE, OUTPUT, INPUT, curEta, curLmda = params[:7]
                curMachine = filename.split('/')[-1]
                data2 = np.nan_to_num(np.genfromtxt(filename.replace("cpu", "memory"), delimiter=',', skip_header=1)[:,1]).ravel()
                
                model = Entwine_model.Entwine_model([data, data2], machineID = curMachine, netPath="../data/entwine_networks/"+curMachine.replace(".csv",".xml"), eta=curEta, lmda=curLmda)
                
            model.fit()
        else:
            if METHOD == 'press':
                y = data[input_window + strIndex - predic_window:input_window + strIndex]
            else:
                y = data[strIndex:strIndex+input_window]
            model.update(y)
        
        p = model.predict(predic_window)
        y_pred = np.atleast_2d(p)
        y_pred = np.reshape(y_pred, (predic_window,1)) 
        
        if METHOD == 'rnn':
            if lastFc is not None:
                y_pred[0,0] = lastFc 
            lastFc = y_pred[-1,0]

        y_pred[y_pred[:,0]<0,0] = minimum
        
        result.append(y_pred[:,0])
    f = filename.split('/')[-1]
    fileutils.writeCSV(OUTPUT+TYPE+"_"+METHOD+"/"+f, np.atleast_2d(result))
    print filename, "complete!"
コード例 #17
0
ファイル: UI.py プロジェクト: Manrich121/ForecastingCloud
def main():
    global METHOD
    global TYPE
    global OUTPUT
    global INPUT
    # Launch main menu
#     main_menu()
    print "Comparing Forecasting methods:\n"
    print "Please complete the following:"  
    print "Enter the type of data used in the evaluation:"
    TYPE = raw_input(" >>  ")
    
    print "The base INPUT directory is:", INPUT+TYPE
    print "and OUTPUT directory is:", OUTPUT+TYPE
    print "Would you like to change it? y or n" 
    ch = raw_input(" >>  ").lower()
    if ch == 'y':
        print "Enter the base path for the INPUT directory (without the type):"
        INPUT = raw_input(" >>  ")
        if not os.path.isdir(INPUT):
            print "Error: Please try again; INPUT directory:"
            INPUT = raw_input(" >>  ")
            
        print "And enter the base path for evaluation OUTPUT directory (without the type):"
        OUTPUT = raw_input(" >>  ")
        if not os.path.isdir(OUTPUT):
            print "Error: Please try again; evaluation OUTPUT directory:"
            OUTPUT = raw_input(" >>  ")
        
#########
    print "Please choose a method to evaluate:"
    print "1. Holt-Winters"
    print "2. Auto-regression"
    print "3. 1st Markov chain"
    print "4. 2nd Markov chain"
    print "5. PRESS"
    print "6. Agile"
    print "7. FFNN Model"
    print "8. RNN Model"
    print "9. Entwine Model"
    print "10. Moving Average"
    print "11. Average combo4 Model"
    print "12. FFNN combo4 Model"
    print "13. Weighted Average model"
    print "0. Quit"
    choice = raw_input(" >>  ")
    ch = choice.lower();
    
    if ch == '':
        menu_actions['main_menu']()
    elif ch == '0':
        exit()
    else:
        METHOD = methods_dict[ch]

        pool = ThreadPool(4)
        files =  fileutils.getFilelist(INPUT+TYPE)
                
        params = []  
        if METHOD =='fnn':
            if TYPE.startswith("memory"):
                hyperpath = "../data/"+TYPE.replace("memory", "cpu")+"_networks/hyperparams.csv"
            else:
                hyperpath = "../data/"+TYPE+"_networks/hyperparams.csv"
            hyperparms =  np.genfromtxt(hyperpath, delimiter=',', dtype=None)
            for curRow in hyperparms:
                if TYPE.startswith("memory"):
                    params.append([INPUT+TYPE+'/'+curRow[0].replace("cpu", "memory").strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]])
                else:
                    params.append([INPUT+TYPE+'/'+curRow[0].strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]])
                    
        elif METHOD =='rnn':
            
            if TYPE.startswith("memory"):
                hyperpath = "../data/"+TYPE.replace("memory", "cpu")+"_rnn_networks/hyperparams.csv"
            else:
                hyperpath = "../data/"+TYPE+"_rnn_networks/hyperparams.csv"
            hyperparms =  np.genfromtxt(hyperpath, delimiter=',', dtype=None)
            for curRow in hyperparms:
                if TYPE.startswith("memory"):
                    params.append([INPUT+TYPE+'/'+curRow[0].replace("cpu", "memory").strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]])
                else:
                    params.append([INPUT+TYPE+'/'+curRow[0].strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]])
                    
        elif METHOD == 'entwine':
            hyperpath = "../data/entwine_networks/hyperparams.csv"
            hyperparms =  np.genfromtxt(hyperpath, delimiter=',', dtype=None)
            for curRow in hyperparms:
                params.append([INPUT+TYPE+'/'+curRow[0].strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]])
                
        else:
            for f in files:
                params.append([f, METHOD, TYPE, OUTPUT, INPUT])
        
        if METHOD == 'avg4' or METHOD == 'combo4' or METHOD == 'wa':
#             ensembleModel(params[0])
            pool.map(ensembleModel,params)
            pool.close()
            pool.join()
        else:
#             print "skip"
#             performsSlidingWindowForecast(params[0])
            pool.map(performsSlidingWindowForecast, params)
            pool.close()
            pool.join()
                  
        pool = ThreadPool(4)
        results = pool.map(performEvaluations, params)
        pool.close()
        pool.join()
        fileutils.writeCSV(OUTPUT+"results/"+TYPE+"_"+METHOD+".csv", results)
        print METHOD+" "+ TYPE + " complete"
          
        exit()
コード例 #18
0
ファイル: UI.py プロジェクト: Manrich121/ForecastingCloud
def ensembleModel(params, types=['ma','ar','fnn','agile'], step=30, input_window=3000):
    input_size = len(types)
    filename, METHOD, TYPE, OUTPUT = params[0:4]
    filename = filename.split('/')[-1]
    
    filename, METHOD, TYPE, OUTPUT = params[0:4]
    filename = filename.split('/')[-1]    
    
    combine_model = np.genfromtxt(OUTPUT+TYPE+"_"+types[0]+"/"+filename, delimiter=',', usecols=range(0,30)).ravel()
    truevals = np.nan_to_num(np.genfromtxt(OUTPUT+TYPE+"/"+filename, delimiter=',',skip_header=1, usecols=(1))[:input_window+len(combine_model)])
        
    for t in types[1:]:
        forecasts = np.genfromtxt(OUTPUT+TYPE+"_"+t+"/"+filename, delimiter=',', usecols=range(0,30)).ravel()
        combine_model = np.vstack((combine_model, forecasts))
        
    average_fc = np.average(combine_model, axis=0)
    
    if METHOD == 'avg4':
        fileutils.writeCSV(OUTPUT+TYPE+"_"+METHOD+"/"+filename, np.atleast_2d(average_fc).reshape([178,30]))
        print filename, "complete"
        return
    if METHOD == 'combo4' or METHOD =='wa':
        training = SupervisedDataSet(input_size, 1)
        for i in range(step):
            
            training.appendLinked([combine_model[t][i] for t in range(input_size)], truevals[i+input_window])
            
        besterr = eval.calc_RMSE(truevals[input_window:input_window+step], average_fc[:step])
        bestNet = None
        
        for i in range(50):
            if METHOD == 'wa':
                net = buildNetwork(input_size, 1, hiddenclass=LinearLayer, bias=False)
            else:
                net = buildNetwork(input_size, 2, 1, hiddenclass=LinearLayer, bias=False)
            trainer = BackpropTrainer(net, training, learningrate=0.001, shuffle=False)
            trainer.trainEpochs(100)
    
            err = eval.calc_RMSE(truevals[input_window:input_window+step], net.activateOnDataset(training))
            if err < besterr:
                bestNet = net
                break
            
        combo_fc = average_fc[0:step].tolist()
#         combo_fc = []
        if bestNet == None:
            combo_fc = average_fc
        else:
            for i in range(step, len(combine_model[0]), step):
                training = SupervisedDataSet(input_size, 1)
                for j in range(i,i+step):
                    combo_fc.append(bestNet.activate([combine_model[t][j] for t in range(input_size)])[0])
                    training.appendLinked([combine_model[t][j] for t in range(input_size)], truevals[j+input_window])
                trainer = BackpropTrainer(bestNet, training, learningrate=0.01, shuffle=False)
                trainer.trainEpochs(2)
                
        result  = np.atleast_2d(combo_fc).reshape([178,30])
        minimum = np.percentile(truevals,5)
        result[0,result[0,:] < minimum] = minimum 
        
        fileutils.writeCSV(OUTPUT+TYPE+"_"+METHOD+"/"+filename, result)
        print filename, "complete"
コード例 #19
0
def performEvaluations(filename, train_window = 3000, overload_dur = 5, overload_percentile = 70, steps=30):
    cur_results = []
    forecasts = np.genfromtxt("d:/data/diskio_ar_forecasts/"+ filename,delimiter=',',usecols=range(0,steps)).ravel()
    truevals = np.genfromtxt("d:/data/diskio/"+filename, delimiter=',',skip_header=1)[train_window:train_window+len(forecasts),1]
    
    threshold =  np.percentile(truevals, overload_percentile)
    
    cur_results.append(eval.calc_RMSE(truevals, forecasts))
    for val in eval.calc_upper_lower_acc(truevals, forecasts):
        cur_results.append(val) 
    for val in eval.calc_persample_accuracy(truevals, forecasts, threshold):
        cur_results.append(val)
    for val in eval.calc_overload_states_acc(truevals, forecasts, threshold):
        cur_results.append(val)
        
    return cur_results

if __name__ == '__main__':
    files = []
    for _, _, fs in os.walk("d:/data/diskio/"):
        for f in fs:
            if f.endswith(".csv"):
                files.append(f)
    
    pool = ThreadPool(4)
    results = pool.map(performEvaluations, files)
    pool.close()
    pool.join()
    
    fileutils.writeCSV("d:/data/results/diskio_autoregressive.csv", results)
コード例 #20
0
                      usecols=range(0, 30))).ravel()  #
    #     truevals = np.genfromtxt("d:/Wikipage data/"+TYPE+"/"+filename, delimiter=',',skip_header=1)[:train_window+len(forecasts),1]

    truevals = np.genfromtxt("d:/Wikipage data/" + TYPE + "/" +
                             filename)[:train_window + len(forecasts)]
    truevals = truevals / np.max(truevals)

    cur_results.append(eval.calc_RMSE(truevals[train_window:], forecasts))
    for val in eval.calc_upper_lower_acc(truevals[train_window:], forecasts):
        cur_results.append(val)

    return cur_results


if __name__ == '__main__':
    files = []
    pool = ThreadPool(4)

    root = "d:/Wikipage data/" + TYPE + "/"
    for _, _, fs in os.walk(root):
        for f in fs:
            files.append(f)

#     performEvaluations(files[0])
    results = pool.map(performEvaluations, files)
    pool.close()
    pool.join()

    fileutils.writeCSV(
        "d:/Wikipage data/results/" + TYPE + "_" + METHOD + ".csv", results)
    print METHOD + " " + TYPE + " complete"
コード例 #21
0
def performEvaluations(filename, train_window = 3000, overload_dur = 5, overload_percentile = 70, steps=30):
    cur_results = []
    forecasts = np.genfromtxt("d:/data/diskio_hw_forecasts/"+ filename,delimiter=',',usecols=range(0,steps)).ravel()
    truevals = np.genfromtxt("d:/data/diskio/"+filename, delimiter=',',skip_header=1)[train_window:train_window+len(forecasts),1]
    
    threshold =  np.percentile(truevals, overload_percentile)
    
    cur_results.append(eval.calc_RMSE(truevals, forecasts))
    for val in eval.calc_upper_lower_acc(truevals, forecasts):
        cur_results.append(val) 
    for val in eval.calc_persample_accuracy(truevals, forecasts, threshold):
        cur_results.append(val)
    for val in eval.calc_overload_states_acc(truevals, forecasts, threshold):
        cur_results.append(val)
        
    return cur_results

if __name__ == '__main__':
    files = []
    for _, _, fs in os.walk("d:/data/diskio/"):
        for f in fs:
            if f.endswith(".csv"):
                files.append(f)
    
    pool = ThreadPool(4)
    results = pool.map(performEvaluations, files)
    pool.close()
    pool.join()
    
    fileutils.writeCSV("d:/data/results/diskio_holtwinters.csv", results)
コード例 #22
0
def performEvaluations(filename, train_window = 3000, overload_dur = 5, overload_percentile = 70, steps=30):
    cur_results = []
    forecasts = np.genfromtxt("d:/data/diskio_hw_forecasts/"+ filename,delimiter=',',usecols=range(0,steps)).ravel()
    truevals = np.genfromtxt("d:/data/diskio/"+filename, delimiter=',',skip_header=1)[train_window:train_window+len(forecasts),1]
    
    threshold =  np.percentile(truevals, overload_percentile)
    
    cur_results.append(eval.calc_RMSE(truevals, forecasts))
    for val in eval.calc_upper_lower_acc(truevals, forecasts):
        cur_results.append(val) 
    for val in eval.calc_persample_accuracy(truevals, forecasts, threshold):
        cur_results.append(val)
    for val in eval.calc_overload_states_acc(truevals, forecasts, threshold):
        cur_results.append(val)
        
    return cur_results

if __name__ == '__main__':
    files = []
    for _, _, fs in os.walk("d:/data/diskio/"):
        for f in fs:
            if f.endswith(".csv"):
                files.append(f)
    
    pool = ThreadPool(4)
    results = pool.map(performEvaluations, files)
    pool.close()
    pool.join()
    
    fileutils.writeCSV("d:/data/results/diskio_holtwinters.csv", results)
コード例 #23
0
                             delimiter=',',
                             skip_header=1)[train_window:train_window +
                                            len(forecasts), 1]

    threshold = np.percentile(truevals, overload_percentile)

    cur_results.append(eval.calc_RMSE(truevals, forecasts))
    for val in eval.calc_upper_lower_acc(truevals, forecasts):
        cur_results.append(val)
    for val in eval.calc_persample_accuracy(truevals, forecasts, threshold):
        cur_results.append(val)
    for val in eval.calc_overload_states_acc(truevals, forecasts, threshold):
        cur_results.append(val)

    return cur_results


if __name__ == '__main__':
    files = []
    for _, _, fs in os.walk("d:/data/cpuRate/"):
        for f in fs:
            if f.endswith(".csv"):
                files.append(f)

    pool = ThreadPool(4)
    results = pool.map(performEvaluations, files)
    pool.close()
    pool.join()

    fileutils.writeCSV("d:/data/results/normaldist.csv", results)