def loop(weights, training, score, gradient, learningrate, reducelr, gdfunction, maxratio): '''A gradient descent loop for either batch or stochastic_pass. weights -- initial weight vector training -- list of DataPoint instances to train on score -- function [weights features --> score] gradient -- function [score feature label --> gradient] learningrate -- learning rate parameter (lambda) reducelr -- function [number --> number] to reduce learning rate gdfunction -- function with a signature like GradientDescent.batch maxraio -- maximum newerror:olderror ratio before stopping ''' INDENT = ' ' print INDENT * 2 + 'Initial learning rate:', learningrate # initialize the error error = stats.rmse( [score(weights, dp.features) for dp in training], [dp.label for dp in training]) print INDENT * 2 + 'Initial Training RMSE:', error # loop while True: # calculate new weights & error newweights = gdfunction(weights, training, score, gradient, learningrate) try: newerror = stats.rmse( [score(newweights, dp.features) for dp in training], [dp.label for dp in training]) except OverflowError: newerror = error + 1 print INDENT * 3 + 'Training RMSE: Overflow' else: if newerror <= error: print INDENT * 2 + 'Training RMSE: v', newerror else: print INDENT * 3 + 'Training RMSE: ^', newerror # figure out what to do next if newerror <= error: ratio = newerror / error # error went down; accept error and weights error = newerror weights = newweights # do we stop? if ratio > maxratio: print INDENT * 2 + 'Finished learning; error ratio:', print ratio, '>', maxratio break else: # error went up; retry with a smaller lambda learningrate = reducelr(learningrate) print INDENT * 3 + 'Retrying with learning rate:', learningrate # done return weights
def comparing_stat_patch_list(pred, y): patch1 = mpatches.Patch(color='red', label='mean:' + ('%03.6f' % np.mean(pred))) patch2 = mpatches.Patch(color='red', label='std:' + ('%03.6f' % np.std(pred))) patch3 = mpatches.Patch(color='red', label='skewness:' + ('%03.3f' % stats.skewness(pred))) patch4 = mpatches.Patch(color='red', label='kurtosis:' + ('%03.3f' % stats.kurtosis(pred))) patch5 = mpatches.Patch(color='blue', label='mean:' + ('%03.6f' % np.mean(y))) patch6 = mpatches.Patch(color='blue', label='std:' + ('%03.6f' % np.std(y))) patch7 = mpatches.Patch(color='blue', label='skewness:' + ('%03.3f' % stats.skewness(y))) patch8 = mpatches.Patch(color='blue', label='kurtosis:' + ('%03.3f' % stats.kurtosis(y))) #patch9 = mpatches.Patch(color='black', label= 'MAPE:'+ ('%03.3f' % stats.mape(pred, y))) patch10 = mpatches.Patch(color='black', label='RMSE:' + ('%03.6f' % stats.rmse(pred, y))) #plt.text(.25,.5,str(np.mean(pred))) return [ patch5, patch6, patch7, patch8, patch1, patch2, patch3, patch4, patch10 ]
def main2(gdname, gdfunction, training, regression, learningrate): '''Perform gradient descent with a learner and analyze the results.''' # learning rate try: initiallr, reducelr = learningrate suffix = 'dynamic' + str(initiallr) except TypeError: initiallr = learningrate reducelr = lambda x: x suffix = initiallr # learn weights = GradientDescent.loop( len(training[0].features) * [0.0], training, regression.model, regression.gradient, initiallr, reducelr, gdfunction, 0.99) # test terror = stats.rmse( [regression.model(weights, dp.features) for dp in testing], [dp.label for dp in testing]) print INDENT * 2 + 'Testing RMSE:', terror ## produce a result set results = [resultset.DataResult(dp.label, regression.model(weights, dp.features)) \ for dp in testing] ## ## find a good operating point ## op = resultset.minerrop(results) ## print INDENT * 2 + 'Operating Point:', op ## ## assign predictions ## results = resultset.applyop(op, results) ## output roc data roc = resultset.rocdata(results) auc = resultset.auc(roc) with open('{}-{}_lambda={}_auc={}'.format(regression.__name__, gdname, suffix, auc).lower(), mode='wb') as fd: for fpr, tpr in roc: fd.write('{}, {}\n'.format(fpr, tpr))
# prediction confidence, prediction = sorted(zip(classifierResult[1], classifierResult["actualValues"]), reverse=True)[0] #print json.dumps(REZ) # hamming distances # print("---") # keys = list(hamming.keys()) # keys.sort() # bits = hamming[keys[100]] # prev = None # for i in range(len(keys)): # if prev is not None: # print "%s:\tprev=%s\tvalue=%s" % (i, # numpy.count_nonzero(prev != hamming[keys[i]]), # numpy.count_nonzero(bits != hamming[keys[i]])) # prev = hamming[keys[i]] # error stats errorNll = nll(consumptions, predictions) errorMae = mae(consumptions, predictions) errorMape = mape(consumptions, predictions) errorRmse = rmse(consumptions, predictions) print("---") print("mape: %s" % errorMape) print("mae: %s" % errorMae) print("rmse: %s" % errorRmse) print("nll_1000: %s" % errorNll)
def alphasense_compute(dataFrame, t_incl=False, h_incl=False): # lambdas for columns col_skip = 3 if t_incl and h_incl: col_temp = (lambda i: (col_skip + 6 * i)) col_hum = (lambda i: (col_skip + 6 * i + 1)) col_no2 = (lambda i: range((col_skip + 6 * i + 2), (col_skip + 6 * i + 4))) col_ox = (lambda i: range((col_skip + 6 * i + 4), (col_skip + 6 * i + 6))) elif h_incl: col_hum = (lambda i: (col_skip + 5 * i)) col_no2 = (lambda i: range((col_skip + 5 * i + 1), (col_skip + 5 * i + 3))) col_ox = (lambda i: range((col_skip + 5 * i + 3), (col_skip + 5 * i + 5))) elif t_incl: col_temp = (lambda i: (col_skip + 5 * i)) col_no2 = (lambda i: range((col_skip + 5 * i + 1), (col_skip + 5 * i + 3))) col_ox = (lambda i: range((col_skip + 5 * i + 3), (col_skip + 5 * i + 5))) else: col_no2 = (lambda i: range((col_skip + 4 * i), (col_skip + 4 * i + 2))) col_ox = (lambda i: range((col_skip + 4 * i + 2), (col_skip + 4 * i + 4))) #dataFrame = dataFrame.values err_no2 = np.zeros([len(NO2_WE_0T), 4, 5]) err_o3 = np.zeros([len(NO2_WE_0T), 4, 5]) # iterate over sensors for i in xrange(np.size(dataFrame, 1)): if col_ox(i)[-1] >= np.size(dataFrame, 1): break nx = dataFrame[:, col_no2(i)] ox = dataFrame[:, col_ox(i)] # formula 1 pred_no2 = formula1(nx[:, 0], nx[:, 1], NO2_WE_0E[i], NO2_AE_0E[i], NTn, SENSITIVITY_NO2[i]) err_no2[i, 0, 0] = stats.mae(dataFrame[:, 1], pred_no2) err_no2[i, 0, 1] = stats.rmse(dataFrame[:, 1], pred_no2) err_no2[i, 0, 2] = stats.mape(dataFrame[:, 1], pred_no2) err_no2[i, 0, 3] = stats.coeff_deter(dataFrame[:, 1], pred_no2) err_no2[i, 0, 4] = stats.pearson(dataFrame[:, 1], pred_no2) #print np.mean(pred_no2) #print np.std(pred_no2) pred = formula1(ox[:, 0], ox[:, 1], O3_WE_0E[i], O3_AE_0E[i], NTo, SENSITIVITY_O3[i]) err_o3[i, 0, 0] = stats.mae(dataFrame[:, 2], pred - pred_no2) err_o3[i, 0, 1] = stats.rmse(dataFrame[:, 2], pred - pred_no2) err_o3[i, 0, 2] = stats.mape(dataFrame[:, 2], pred - pred_no2) err_o3[i, 0, 3] = stats.coeff_deter(dataFrame[:, 2], pred - pred_no2) err_o3[i, 0, 4] = stats.pearson(dataFrame[:, 2], pred - pred_no2) #print np.mean(pred) #print np.std(pred) # formula 2 pred_no2 = formula2(nx[:, 0], nx[:, 1], NO2_WE_0E[i], NO2_AE_0E[i], NO2_WE_0T[i], NO2_AE_0T[i], KTn, SENSITIVITY_NO2[i]) err_no2[i, 1, 0] = stats.mae(dataFrame[:, 1], pred) err_no2[i, 1, 1] = stats.rmse(dataFrame[:, 1], pred) err_no2[i, 1, 2] = stats.mape(dataFrame[:, 1], pred) err_no2[i, 1, 3] = stats.coeff_deter(dataFrame[:, 1], pred) err_no2[i, 1, 4] = stats.pearson(dataFrame[:, 1], pred) #print np.mean(pred_no2) #print np.std(pred_no2) pred = formula2(ox[:, 0], ox[:, 1], O3_WE_0E[i], O3_AE_0E[i], O3_WE_0T[i], O3_AE_0T[i], KTo, SENSITIVITY_O3[i]) err_o3[i, 1, 0] = stats.mae(dataFrame[:, 2], pred - pred_no2) err_o3[i, 1, 1] = stats.rmse(dataFrame[:, 2], pred - pred_no2) err_o3[i, 1, 2] = stats.mape(dataFrame[:, 2], pred - pred_no2) err_o3[i, 1, 3] = stats.coeff_deter(dataFrame[:, 2], pred - pred_no2) err_o3[i, 1, 4] = stats.pearson(dataFrame[:, 2], pred - pred_no2) #print np.mean(pred) #print np.std(pred) # formula 3 pred_no2 = formula3(nx[:, 0], nx[:, 1], NO2_WE_0E[i], NO2_AE_0E[i], NO2_WE_0T[i], NO2_AE_0T[i], K_Tn, SENSITIVITY_NO2[i]) err_no2[i, 2, 0] = stats.mae(dataFrame[:, 1], pred_no2) err_no2[i, 2, 1] = stats.rmse(dataFrame[:, 1], pred_no2) err_no2[i, 2, 2] = stats.mape(dataFrame[:, 1], pred_no2) err_no2[i, 2, 3] = stats.coeff_deter(dataFrame[:, 1], pred_no2) err_no2[i, 2, 4] = stats.pearson(dataFrame[:, 1], pred_no2) #print np.mean(pred_no2) #print np.std(pred_no2) pred = formula3(ox[:, 0], ox[:, 1], O3_WE_0E[i], O3_AE_0E[i], O3_WE_0T[i], O3_AE_0T[i], K_To, SENSITIVITY_O3[i]) err_o3[i, 2, 0] = stats.mae(dataFrame[:, 2], pred - pred_no2) err_o3[i, 2, 1] = stats.rmse(dataFrame[:, 2], pred - pred_no2) err_o3[i, 2, 2] = stats.mape(dataFrame[:, 2], pred - pred_no2) err_o3[i, 2, 3] = stats.coeff_deter(dataFrame[:, 2], pred - pred_no2) err_o3[i, 2, 4] = stats.pearson(dataFrame[:, 2], pred - pred_no2) #print np.mean(pred) #print np.std(pred) # formula 4 pred_no2 = formula4(nx[:, 0], nx[:, 1], NO2_WE_0E[i], NO2_AE_0E[i], NO2_WE_0T[i], NO2_AE_0T[i], K__Tn, SENSITIVITY_NO2[i]) err_no2[i, 3, 0] = stats.mae(dataFrame[:, 1], pred_no2) err_no2[i, 3, 1] = stats.rmse(dataFrame[:, 1], pred_no2) err_no2[i, 3, 2] = stats.mape(dataFrame[:, 1], pred_no2) err_no2[i, 3, 3] = stats.coeff_deter(dataFrame[:, 1], pred_no2) err_no2[i, 3, 4] = stats.pearson(dataFrame[:, 1], pred_no2) #print np.mean(pred_no2) #print np.std(pred_no2) pred = formula4(ox[:, 0], ox[:, 1], O3_WE_0E[i], O3_AE_0E[i], O3_WE_0T[i], O3_AE_0T[i], K__To, SENSITIVITY_O3[i]) err_o3[i, 3, 0] = stats.mae(dataFrame[:, 2], pred - pred_no2) err_o3[i, 3, 1] = stats.rmse(dataFrame[:, 2], pred - pred_no2) err_o3[i, 3, 2] = stats.mape(dataFrame[:, 2], pred - pred_no2) err_o3[i, 3, 3] = stats.coeff_deter(dataFrame[:, 2], pred - pred_no2) err_o3[i, 3, 4] = stats.pearson(dataFrame[:, 2], pred - pred_no2) #print np.mean(pred) #print np.std(pred) #np.savetxt("alpha-no2-err1.csv", err_no2[0].T, fmt='%0.4g', delimiter=',') #np.savetxt("alpha-no2-err2.csv", err_no2[1].T, fmt='%0.4g', delimiter=',') #np.savetxt("alpha-o3-err1.csv", err_o3[0].T, fmt='%0.4g', delimiter=',') #np.savetxt("alpha-o3-err2.csv", err_o3[1].T, fmt='%0.4g', delimiter=',') #print err_no2 #print err_o3 return err_no2, err_o3