def plot_prediction(prediction, result_dir, y, train_y_max, train_y_min): # denormalise data prediction_denormalised = prediction * (train_y_max - train_y_min) + train_y_min y_denormalised = y * (train_y_max - train_y_min) + train_y_min rmse = util.rmse(prediction_denormalised, y_denormalised) f = open('{0}/rmse_{1}'.format(result_dir, rmse), "w+") f.close() mape = util.mean_absolute_percentage_error(prediction_denormalised, y_denormalised) f = open('{0}/mape_{1}'.format(result_dir, mape), "w+") f.close() r2 = util.r2(prediction_denormalised, y_denormalised) f = open('{0}/r2_{1}'.format(result_dir, r2), "w+") f.close() plt_file = '{0}/plot_unordered_{1}.png'.format(result_dir, mape) util.plot_results_unordered(prediction_denormalised, y_denormalised, plt_file) # output in batches of 1000 hours (+- 40 days) step_size = 1000 for x in range(0, prediction_denormalised.size - step_size, step_size): mape_batch = util.mean_absolute_percentage_error( prediction_denormalised[x:x + step_size], y_denormalised[x:x + step_size]) plt_file = '{0}/plot_unordered_{1}_{2}.png'.format( result_dir, mape_batch, x) util.plot_results_unordered(prediction_denormalised[x:x + step_size], y_denormalised[x:x + step_size], plt_file)
def plot_prediction_compare_with_mike11(prediction, result_dir, y, train_y_max, train_y_min): # denormalise data prediction_denormalised = prediction * (train_y_max - train_y_min) + train_y_min y_denormalised = y * (train_y_max - train_y_min) + train_y_min rmse = util.rmse(prediction_denormalised, y_denormalised) f = open('{0}/rmse_compare_{1}'.format(result_dir, rmse), "w+") f.close() mape = util.mean_absolute_percentage_error(prediction_denormalised, y_denormalised) f = open('{0}/mape_compare_{1}'.format(result_dir, mape), "w+") f.close() r2 = util.r2(prediction_denormalised, y_denormalised) f = open('{0}/r2_compare_{1}'.format(result_dir, r2), "w+") f.close() plt_file = '{0}/plot_compare_{1}.png'.format(result_dir, mape) util.plot_results_unordered_compare_with_mike11(prediction_denormalised, y_denormalised, plt_file) mape_batch = util.mean_absolute_percentage_error(prediction_denormalised, y_denormalised) plt_file = '{0}/plot_compare_{1}.png'.format(result_dir, mape_batch) util.plot_results_unordered(prediction_denormalised, y_denormalised, plt_file)
def baselineTest(sampleSet, trueSet): currentSentenceModesMap = {} for key, val in csv.reader(open("currentCrimeSentenceModes.csv")): currentSentenceModesMap[key] = val predictedSet = [] for crime in sampleSet: predictedSentenceLength = int(currentSentenceModesMap[crime]) predictedSet.append(predictedSentenceLength) percentErrors = [] print "Baseline Test" print "Mean absolute test error:", util.mean_absolute_percentage_error(trueSet, predictedSet, percentErrors) print "Standard deviation:", np.std(np.array(percentErrors))
if __name__ == '__main__': ticker = 'SPY' data = pd.read_csv(stock_io.format_data.format(ticker), header=0).tail(1500).reset_index(drop=True) low_vol_prediction = pd.read_csv(stock_io.file_pred_low.format(ticker), header=None) high_vol_prediction = pd.read_csv(stock_io.file_pred_high.format(ticker), header=None) final_prediction = pd.Series(low_vol_prediction[0]) + pd.Series(high_vol_prediction[0]) mse = mean_squared_error(final_prediction.values, data['close'].tail(252).values) rmse = mse ** 0.5 mape = util.mean_absolute_percentage_error(data['close'].tail(252).reset_index(drop=True), final_prediction) accuracy_act, accuracy_pred = util.get_sim_accuracy(data, final_prediction) util.print_results(accuracy_act, accuracy_pred, mse, rmse, mape) # # simulation[ma] = {'low_vol': {'prediction': low_vol_prediction, 'mse': low_vol_mse, # 'rmse': low_vol_rmse, 'mape': low_vol_mape}, # 'high_vol': {'prediction': high_vol_prediction, 'mse': high_vol_mse, # 'rmse': high_vol_rmse}, # 'final': {'prediction': final_prediction.values.tolist(), 'mse': mse,
def main(): inmatesMap = mapCreator() featureVector = createFeatureVector() allInmateCrimes = [] allInmateCrimesYValues = [] allInmates = [] allInmateYValues = [] for inmate in inmatesMap: if 'IncarcerationDate' not in inmatesMap[inmate]: continue if inmatesMap[inmate]['PrisonReleaseDate'] == '': inmatesMap[inmate]['PrisonReleaseDate'] = inmatesMap[inmate]['IncarcerationDate'] + datetime.timedelta(days=36525) if (inmatesMap[inmate]["PrisonReleaseDate"] - inmatesMap[inmate]["IncarcerationDate"]).days <= 0: continue currentPerson = extractFeatures(inmatesMap[inmate], featureVector) sentenceLength = (inmatesMap[inmate]["PrisonReleaseDate"] - inmatesMap[inmate]["IncarcerationDate"]).days if 'CURRENT_OFFENSES' in inmatesMap[inmate]: for offense in inmatesMap[inmate]['CURRENT_OFFENSES']: crimeDescription = "CURRENT_" + offense["adjudicationcharge_descr"] allInmateCrimes.append(crimeDescription) allInmateCrimesYValues.append(sentenceLength) allInmates.append(currentPerson) # allInmateYValues.append(inmatesMap[inmate]["prisonterm"]) allInmateYValues.append(sentenceLength) X = allInmates[:10000] y = allInmateYValues[:10000] # print testSet # print testSetY sgd = SGDRegressor(loss='epsilon_insensitive', fit_intercept=True, learning_rate='constant', n_iter=4, penalty='none', epsilon=0) sgd.fit(X, y) sgdPredictedSetY = [] sgdTrueSetY = [] for i in range(10001, 20001): sgdTrueSetY.append(allInmateYValues[i]); sgdPredictedSetY.append(sgd.predict(allInmates[i])) percentErrors = [] print "SGD Mean absolute test error:", util.mean_absolute_percentage_error(sgdTrueSetY, sgdPredictedSetY, percentErrors) print "SGD Standard deviation:", np.std(np.array(percentErrors)) svr = svm.SVR() svr.fit(X, y) svrPredictedSetY = [] svrTrueSetY = [] for i in range(10001, 20001): print "true value:", allInmateYValues[i] print "predicted value:", svr.predict(allInmates[i]) print "Difference in true and predicted values:", allInmateYValues[i] - svr.predict(allInmates[i]) svrTrueSetY.append(allInmateYValues[i]); svrPredictedSetY.append(svr.predict(allInmates[i])) percentErrors = [] print "SVR Mean absolute test error:", util.mean_absolute_percentage_error(svrTrueSetY, svrPredictedSetY, percentErrors) print "SVR Standard deviation:", np.std(np.array(percentErrors)) # baselineTest(allInmateCrimes[:10000], allInmateCrimesYValues[:10000]) nbAllInmates = nbTestTransform(allInmates) nbAllInmateYValues = nbRound(allInmateYValues) nbTestSet = [nbAllInmates[i] for i in range(0, 10000)] nbTestSetY = [nbAllInmateYValues[i] for i in range(0, 10000)] nb = BernoulliNB() nb.fit(np.array(nbTestSet), np.array(nbTestSetY)) nbTrueSentenceLength = [] nbTestSentenceLength = [] for i in range(10001, 20001): nbTrueSentenceLength.append(nbAllInmateYValues[i] * 10.0) nbTestSentenceLength.append(nb.predict(nbAllInmates[i] * 10.0)) # print nbTrueSentenceLength # print nbTestSentenceLength percentErrors = [] print "Naive Bayes Mean absolute test error:", util.mean_absolute_percentage_error(nbTrueSentenceLength, nbTestSentenceLength, percentErrors) print "Naive Bayes standard deviation:", np.std(np.array(percentErrors))
def forecasting(config_main, config_pv): df, timestamps = dataImport(config_main, config_pv) config_main.TIMESTAMPS = timestamps df_train, df_validation, df_test, scaler = getParts( df, config_main, config_pv) # here we have numpy array trainX, trainY = buildSet(np.array(df_train), config_pv.LOOK_BACK, config_pv.OUTPUT_SIZE) validationX, validationY = buildSet(np.array(df_validation), config_pv.LOOK_BACK, config_pv.OUTPUT_SIZE) testX, testY = buildSet(np.array(df_test), config_pv.LOOK_BACK, config_pv.OUTPUT_SIZE) # plotInputDay(timestamps, trainY[:, 0], config_pv) if config_pv.LOAD_MODEL: model = loadModel(config_pv) history = None else: model, history = buildModelPv(trainX, trainY, validationX, validationY, config_pv) evalModel(model, testX, testY) # plotting trainPrediction = model.predict(trainX) testPrediction = model.predict(testX) valPrediction = model.predict(validationX) if history is not None: plotHistory(config_pv, history) plotPrediction( trainY, trainPrediction, testY, validationY, valPrediction, testPrediction, timestamps, config_pv, ) plotPredictionPart( config_pv, trainY[24], trainPrediction[24], "1st day of train set", timestamps[24:config_pv.TIME_PER_DAY + 24], "train", ) plotPredictionPart( config_pv, validationY[24], valPrediction[24], "3rd day of validation set", timestamps[len(trainX) + 24:len(trainX) + 24 + config_pv.TIME_PER_DAY], "validation", ) plotPredictionPart( config_pv, testY[24], testPrediction[24], "1st day of test set", timestamps[len(trainX) + len(validationX) + 24:len(trainX) + 24 + len(validationX) + config_pv.TIME_PER_DAY], "test", ) # plotPredictionPartMult( # config_pv, # testY[0], # testPrediction, # "1st day of test set", # timestamps[len(trainX) + len(validationX): len(trainX) + len(validationX) + config_pv.TIME_PER_DAY], # "test" # ) plotEcart( trainY, trainPrediction, validationY, valPrediction, testY, testPrediction, timestamps, config_pv, ) # printing error for _ in [1]: print("training\tMSE :\t{}".format( mean_squared_error(np.array(trainY), np.array(trainPrediction)))) print("validation\t\tMSE :\t{}".format( mean_squared_error(np.array(validationY), np.array(valPrediction)))) print("testing\t\tMSE :\t{}".format( mean_squared_error(np.array(testY), np.array(testPrediction)))) ### print("training\tMAE :\t{}".format( mean_absolute_error(np.array(trainY), np.array(trainPrediction)))) print("validation\t\tMAE :\t{}".format( mean_absolute_error(np.array(validationY), np.array(valPrediction)))) print("testing\t\tMAE :\t{}".format( mean_absolute_error(np.array(testY), np.array(testPrediction)))) ### print("training\tMAPE :\t{} %".format( mean_absolute_percentage_error(np.array(trainY), np.array(trainPrediction)))) print("validation\t\tMAPE :\t{} %".format( mean_absolute_percentage_error(np.array(validationY), np.array(valPrediction)))) print("testing\t\tMAPE :\t{} %".format( mean_absolute_percentage_error(np.array(testY), np.array(testPrediction))))