def suppVectorRegress(): kernelList = ["linear","rbf",polyKernel] names = ["linear","radial basis","poly"] preds = [] # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value #print (data[0][1430]) cutoff = len(data[0])-89 #predict march #print cutoff xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] #print xTrain #print xTest # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain,0.0) statistics.estimateMissing(xTest,0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) #print indices trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] #print testIndices detrended,slope,intercept = statistics.detrend(trainIndices,yTrain) yTrain = detrended for gen in range(len(kernelList)): # Use SVR to predict test observations based upon training observations pred = svrPredictions(xTrain,yTrain,xTest,kernelList[gen]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest,trendedPred) print "The Normalized Root-Mean Square Error is " + str(err) + " using kernel " + names[gen] + "..." preds.append(trendedPred) names.append("actual") preds.append(yTest) visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Support Vector Regression Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def suppVectorRegress(): kernelList = ["linear","rbf",polyKernel] names = ["linear","radial basis","poly"] preds = [] # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data)-364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain,0.0) statistics.estimateMissing(xTest,0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended,slope,intercept = statistics.detrend(trainIndices,yTrain) yTrain = detrended for gen in range(len(kernelList)): # Use SVR to predict test observations based upon training observations pred = svrPredictions(xTrain,yTrain,xTest,kernelList[gen]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest,trendedPred) print "The Normalized Root-Mean Square Error is " + str(err) + " using kernel " + names[gen] + "..." preds.append(trendedPred) names.append("actual") preds.append(yTest) visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Support Vector Regression Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def plotOrigVsDetrend(): data = constructData() # Original time series data1 = constructData() origY = data1[1][0:len(data[1])-365] # Detrended time series indices = np.arange(len(data[1])-365) detrendY = statistics.detrend(indices,data[1][0:len(data[1])-365])[0] visualizer.comparisonPlot(2009,1,1,origY,detrendY, "Original","Detrended",plotName="Aggregate Electric Load : Original & Detrended", yAxisName="Kilowatts")
def plotOrigVsDetrend(): data = constructData() # Original time series data1 = constructData() origY = data1[1][0:len(data[1]) - 365] # Detrended time series indices = np.arange(len(data[1]) - 365) detrendY = statistics.detrend(indices, data[1][0:len(data[1]) - 365])[0] visualizer.comparisonPlot( 2009, 1, 1, origY, detrendY, "Original", "Detrended", plotName="Aggregate Electric Load : Original & Detrended", yAxisName="Kilowatts")
def gaussianProcesses(): corrMods = [ 'cubic', 'squared_exponential', 'absolute_exponential', 'linear' ] preds = [] # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data) - 364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended for gen in range(len(corrMods)): # Use GPR to predict test observations based upon training observations pred = gaussProcPred(xTrain, yTrain, xTest, corrMods[gen]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred) print "The Normalized Root-Mean Square Error is " + str( err) + " using covariance function " + corrMods[gen] + "..." preds.append(trendedPred) corrMods.append("actual") data = constructData() cutoff = len(data) - 364 yTest = data[1][cutoff:] preds.append(yTest) visualizer.comparisonPlot( 2014, 1, 1, preds, corrMods, plotName="Gaussian Process Regression Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def predict(request): BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) file = request.FILES.get("file") print request.POST,request.FILES #print requests #get post parameters period = int(request.POST.get("period", "default")) model = request.POST.getlist("model", "default") form = DocumentForm(request.FILES) ##path to save print BASE_DIR+"\\media\\uploadedfile\\"+file.name if(os.path.isfile(BASE_DIR+"\\media\\uploadedfile\\"+file.name)): print "file exist" os.remove(BASE_DIR+"\\media\\uploadedfile\\"+file.name) print "old file removed" else: print "file not present" newdoc = Document(docfile = request.FILES['file']) newdoc.save() print "file saved successfully" ##Fetch data if len(model)==1 and model[0]=="Neural": print "Only Neural Selected" else: print("Fetching Dataset") fetchtime=time.time() xTrain,yTrain,xTest,yTest=fetch_data(file,period) fetchtime=time.time()-fetchtime print("Time to fetch"+str(fetchtime)) # neuralNetwork(xTrain,yTrain,xTest,yTest) models={"Neural":neuralNetwork,"Linear Regression":reg_linear,"SVM":reg_svm,"Lasso-Lars Regression":reg_lassolars,"Theilsen Regression":reg_theilsen,"ARD":reg_ard} accu={"Neural":0.0,"Linear Regression":0.0,"SVM":0.0,"Lasso-Lars Regression":0.0,"Theilsen Regression":0.0,"ARD":0.0} pred={"Neural":[],"Linear Regression":[],"SVM":[],"Lasso-Lars Regression":[],"Theilsen Regression":[],"ARD":[]} mse={"Neural":0.0,"Linear Regression":0.0,"SVM":0.0,"Lasso-Lars Regression":0.0,"Theilsen Regression":0.0,"ARD":0.0} print period obj = [] preds = [] names = [] name2=[] times=[] for x in range(len(model)): print model[x] #print models[model[x]] if model[x]=="Neural": strt_time=time.time() mse[model[x]],accu[model[x]],pred[model[x]],act=models[model[x]](file,period) times.append(time.time()-strt_time-fetchtime) else: strt_time=time.time() mse[model[x]],accu[model[x]],pred[model[x]],act=models[model[x]](xTrain,yTrain,xTest,yTest) times.append(time.time()-strt_time) obj.append({'model':model[x],'mse':mse[model[x]],'accu':accu[model[x]],'time':times[x]}) print "MSE: "+str(mse[model[x]]),"ACC :"+str(accu[model[x]])+"\n" preds.append(pred[model[x]]) names.append(model[x]) name2.append(model[x]) preds.append(act) names.append("Actual") print obj print times #print preds,names #neuralNetwork(file,period) imgurl=BASE_DIR+"\\media\\images\\output1.png" visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Comparison of Models based on the Predicted Load", yAxisName="Predicted Kilowatts") visualizer.exectimeplot(times,name2) return render(request, "output.html", {'obj':obj,'imgurl':imgurl})
def neuralNetwork(file, test_perc): # Retrieve time series data & apply preprocessing #print tdata # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value xData = [] yData = [] print("hello") # book = xlrd.open_workbook("data/data_only.xlsx") print file BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) book = xlrd.open_workbook("%s\media\uploadedfile\%s" % (BASE_DIR, file)) sheet = book.sheet_by_index(0) for rx in range(1, sheet.nrows - 1): #row = sheet.row(rx)[3:] #row = [row[x].value for x in range(0,len(row)-4)] row = sheet.row(rx)[1:50] #including temps rowy = sheet.row(rx + 1)[49] #total of next day row = [row[x].value for x in range(0, len(row) - 1)] rowy = rowy.value xData.append(row) yData.append(rowy) #print "cutoff"+str(cutoff) #print (xData) #print (yData) cutoff = len(xData) - 89 print(cutoff) xTrain = xData[0:cutoff] #print xTrain[47] #print xTrain yTrain = yData[0:cutoff] xTest = xData[cutoff:] #print cutoff #print xTest[0] yTest = yData[cutoff:] print(yTest) # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(xData)) print('ho') print(indices) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended dimensions = [6, 10, 12] neurons = [30, 50, 50] names = [] for x in range(len(dimensions)): s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x]) names.append(s) preds = [] for x in range(len(dimensions)): # Perform dimensionality reduction on the feature vectors pca = PCA(n_components=dimensions[x]) pca.fit(xTrain) xTrainRed = pca.transform(xTrain) xTestRed = pca.transform(xTest) pred = fit_predict(xTrainRed, yTrain, xTestRed, 40, neurons[x]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred) # Append computed predictions to list for classifier predictions preds.append(trendedPred) print "The NRMSE for the neural network is " + str(err) + "..." preds.append(yTest) names.append("actual") return err, trendedPred visualizer.comparisonPlot( 2014, 1, 1, preds, names, plotName="Neural Network Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def clustering(): # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data)-364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain,0.0) statistics.estimateMissing(xTest,0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended,slope,intercept = statistics.detrend(trainIndices,yTrain) yTrain = detrended # Compute centroids and labels of data cward_7,lward_7 = hierarchicalClustering(xTrain,7) cward_365,lward_365 = hierarchicalClustering(xTrain,365) ckmeans_7,lkmeans_7 = kMeansClustering(xTrain,7) ckmeans_365,lkmeans_365 = kMeansClustering(xTrain,365) c = [cward_7,cward_365,ckmeans_7,ckmeans_365] l = [lward_7,lward_365,lkmeans_7,lkmeans_365] algNames = ["agglomerative(7)","agglomerative(365)","k-means(7)","k-means(365)"] preds = [] for t in range(len(c)): # The centroids computed by the current clustering algorithm centroids = c[t] # The labels for the examples defined by the current clustering assignment labels = l[t] # Separate the training samples into cluster sets clusterSets = [] # Time labels for the examples, separated into clusters timeLabels = [] for x in range(len(centroids)): clusterSets.append([]) for x in range(len(labels)): # Place the example into its cluster clusterSets[labels[x]].append((xTrain[x],yTrain[x])) # Compute predictions for each of the test examples pred = predictClustering(centroids,clusterSets,xTest,"euclidean") # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest,trendedPred) # Add to list of predictions preds.append(trendedPred) print "The Normalized Root-Mean Square Error is " + str(err) + " using algorithm " + algNames[t] + "..." algNames.append("actual") preds.append(yTest) visualizer.comparisonPlot(2014,1,1,preds,algNames, plotName="Clustering Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def neuralNetwork(): # Retrieve time series data & apply preprocessing data = constructData() print len(data) # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value #cutoff = len(data[0])-89 cutoff = len(data[0]) - 89 #print "cutoff"+str(cutoff) xTrain = data[0][0:cutoff] #print xTrain[47] print xTrain yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] #print cutoff #print xTest[0] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended dimensions = [6, 10, 12] neurons = [30, 50, 50] names = [] for x in range(len(dimensions)): s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x]) names.append(s) preds = [] for x in range(len(dimensions)): # Perform dimensionality reduction on the feature vectors pca = PCA(n_components=dimensions[x]) pca.fit(xTrain) xTrainRed = pca.transform(xTrain) xTestRed = pca.transform(xTest) pred = fit_predict(xTrainRed, yTrain, xTestRed, 40, neurons[x]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred) # Append computed predictions to list for classifier predictions preds.append(trendedPred) print "The NRMSE for the neural network is " + str(err) + "..." preds.append(yTest) names.append("actual") visualizer.comparisonPlot( 2014, 1, 1, preds, names, plotName="Neural Network Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def clustering(): # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data) - 364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended # Compute centroids and labels of data cward_7, lward_7 = hierarchicalClustering(xTrain, 7) cward_365, lward_365 = hierarchicalClustering(xTrain, 365) ckmeans_7, lkmeans_7 = kMeansClustering(xTrain, 7) ckmeans_365, lkmeans_365 = kMeansClustering(xTrain, 365) c = [cward_7, cward_365, ckmeans_7, ckmeans_365] l = [lward_7, lward_365, lkmeans_7, lkmeans_365] algNames = [ "agglomerative(7)", "agglomerative(365)", "k-means(7)", "k-means(365)" ] preds = [] for t in range(len(c)): # The centroids computed by the current clustering algorithm centroids = c[t] # The labels for the examples defined by the current clustering assignment labels = l[t] # Separate the training samples into cluster sets clusterSets = [] # Time labels for the examples, separated into clusters timeLabels = [] for x in range(len(centroids)): clusterSets.append([]) for x in range(len(labels)): # Place the example into its cluster clusterSets[labels[x]].append((xTrain[x], yTrain[x])) # Compute predictions for each of the test examples pred = predictClustering(centroids, clusterSets, xTest, "euclidean") # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred) # Add to list of predictions preds.append(trendedPred) print "The Normalized Root-Mean Square Error is " + str( err) + " using algorithm " + algNames[t] + "..." algNames.append("actual") preds.append(yTest) visualizer.comparisonPlot( 2014, 1, 1, preds, algNames, plotName="Clustering Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def neuralNetwork(): # Retrieve time series data & apply preprocessing #print tdata # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value xData = [] yData = [] book = xlrd.open_workbook("data/data_with_9_variable.xlsx") sheet = book.sheet_by_index(0) for rx in range(1, sheet.nrows): #row = sheet.row(rx)[3:] #row = [row[x].value for x in range(0,len(row)-4)] row = sheet.row(rx)[1:12] #including temps rowy = sheet.row(rx)[12] #total of next day row = [row[x].value for x in range(0, len(row))] rowy = rowy.value xData.append(row) yData.append(rowy) #print "cutoff"+str(cutoff) print(xData) print(yData) cu = len(xData) - 720 cutoff = len(xData) - 30 print(cutoff) xTrain = xData[cu:cutoff] #print xTrain[47] #print xTrain yTrain = yData[cu:cutoff] xTest = xData[cutoff:] #print cutoff #print xTest[0] yTest = yData[cutoff:] print(yTest) # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(xData)) print('ho') print(indices) trainIndices = indices[cu:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended dimensions = [7, 8, 10, 11] neurons = [300, 500, 500, 500] names = [] for x in range(len(dimensions)): s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x]) names.append(s) preds = [] for x in range(len(dimensions)): # Perform dimensionality reduction on the feature vectors pca = PCA(n_components=dimensions[x]) pca.fit(xTrain) xTrainRed = pca.transform(xTrain) xTestRed = pca.transform(xTest) pred = fit_predict(xTrainRed, yTrain, xTestRed, 100, neurons[x]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred) err2 = statistics.mape(yTest, trendedPred) # Append computed predictions to list for classifier predictions preds.append(trendedPred) print "The NRMSE for the neural network is " + str(err) + "..." print "The %Accuracy for the neural network is " + str( (1 - err2) * 100) + "...\n" preds.append(yTest) names.append("actual") visualizer.comparisonPlot( 2014, 1, 1, preds, names, plotName="Neural Network Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def neuralNetwork(): # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data)-364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain,0.0) statistics.estimateMissing(xTest,0.0) xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended,slope,intercept = statistics.detrend(trainIndices,yTrain) yTrain = detrended dimensions = [6,10,12] neurons = [30,50,50] names = [] for x in range(len(dimensions)): s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x]) names.append(s) preds = [] for x in range(len(dimensions)): # Perform dimensionality reduction on the feature vectors pca = PCA(n_components=dimensions[x]) pca.fit(xTrain) xTrainRed = pca.transform(xTrain) xTestRed = pca.transform(xTest) pred = fit_predict(xTrainRed,yTrain,xTestRed,40,neurons[x]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest,trendedPred) # Append computed predictions to list for classifier predictions preds.append(trendedPred) print "The NRMSE for the neural network is " + str(err) + "..." preds.append(yTest) names.append("actual") visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Neural Network Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")