Ejemplo n.º 1
0
def plot_prediction(prediction, result_dir, y, train_y_max, train_y_min):
    # denormalise data
    prediction_denormalised = prediction * (train_y_max -
                                            train_y_min) + train_y_min
    y_denormalised = y * (train_y_max - train_y_min) + train_y_min

    rmse = util.rmse(prediction_denormalised, y_denormalised)
    f = open('{0}/rmse_{1}'.format(result_dir, rmse), "w+")
    f.close()

    mape = util.mean_absolute_percentage_error(prediction_denormalised,
                                               y_denormalised)
    f = open('{0}/mape_{1}'.format(result_dir, mape), "w+")
    f.close()

    r2 = util.r2(prediction_denormalised, y_denormalised)
    f = open('{0}/r2_{1}'.format(result_dir, r2), "w+")
    f.close()

    plt_file = '{0}/plot_unordered_{1}.png'.format(result_dir, mape)
    util.plot_results_unordered(prediction_denormalised, y_denormalised,
                                plt_file)

    # output in batches of 1000 hours (+- 40 days)
    step_size = 1000
    for x in range(0, prediction_denormalised.size - step_size, step_size):
        mape_batch = util.mean_absolute_percentage_error(
            prediction_denormalised[x:x + step_size],
            y_denormalised[x:x + step_size])
        plt_file = '{0}/plot_unordered_{1}_{2}.png'.format(
            result_dir, mape_batch, x)
        util.plot_results_unordered(prediction_denormalised[x:x + step_size],
                                    y_denormalised[x:x + step_size], plt_file)
Ejemplo n.º 2
0
def plot_prediction_compare_with_mike11(prediction, result_dir, y, train_y_max,
                                        train_y_min):
    # denormalise data
    prediction_denormalised = prediction * (train_y_max -
                                            train_y_min) + train_y_min
    y_denormalised = y * (train_y_max - train_y_min) + train_y_min

    rmse = util.rmse(prediction_denormalised, y_denormalised)
    f = open('{0}/rmse_compare_{1}'.format(result_dir, rmse), "w+")
    f.close()

    mape = util.mean_absolute_percentage_error(prediction_denormalised,
                                               y_denormalised)
    f = open('{0}/mape_compare_{1}'.format(result_dir, mape), "w+")
    f.close()

    r2 = util.r2(prediction_denormalised, y_denormalised)
    f = open('{0}/r2_compare_{1}'.format(result_dir, r2), "w+")
    f.close()

    plt_file = '{0}/plot_compare_{1}.png'.format(result_dir, mape)
    util.plot_results_unordered_compare_with_mike11(prediction_denormalised,
                                                    y_denormalised, plt_file)

    mape_batch = util.mean_absolute_percentage_error(prediction_denormalised,
                                                     y_denormalised)
    plt_file = '{0}/plot_compare_{1}.png'.format(result_dir, mape_batch)
    util.plot_results_unordered(prediction_denormalised, y_denormalised,
                                plt_file)
def baselineTest(sampleSet, trueSet):
    currentSentenceModesMap = {}
    for key, val in csv.reader(open("currentCrimeSentenceModes.csv")):
        currentSentenceModesMap[key] = val
    
    predictedSet = []
    for crime in sampleSet:
        predictedSentenceLength = int(currentSentenceModesMap[crime])
        predictedSet.append(predictedSentenceLength)

    percentErrors = []
    print "Baseline Test"
    print "Mean absolute test error:", util.mean_absolute_percentage_error(trueSet, predictedSet, percentErrors)
    print "Standard deviation:", np.std(np.array(percentErrors))
Ejemplo n.º 4
0

if __name__ == '__main__':

    ticker = 'SPY'

    data = pd.read_csv(stock_io.format_data.format(ticker), header=0).tail(1500).reset_index(drop=True)
    
    low_vol_prediction = pd.read_csv(stock_io.file_pred_low.format(ticker), header=None)
    high_vol_prediction = pd.read_csv(stock_io.file_pred_high.format(ticker), header=None)
    
    final_prediction = pd.Series(low_vol_prediction[0]) + pd.Series(high_vol_prediction[0])

    mse = mean_squared_error(final_prediction.values, data['close'].tail(252).values)
    rmse = mse ** 0.5
    mape = util.mean_absolute_percentage_error(data['close'].tail(252).reset_index(drop=True), final_prediction)
    
    accuracy_act, accuracy_pred = util.get_sim_accuracy(data, final_prediction)
    
    util.print_results(accuracy_act, accuracy_pred,
                       mse, rmse, mape)
    
    


#
#        simulation[ma] = {'low_vol': {'prediction': low_vol_prediction, 'mse': low_vol_mse,
#                                      'rmse': low_vol_rmse, 'mape': low_vol_mape},
#                          'high_vol': {'prediction': high_vol_prediction, 'mse': high_vol_mse,
#                                       'rmse': high_vol_rmse},
#                          'final': {'prediction': final_prediction.values.tolist(), 'mse': mse,
def main():
    inmatesMap = mapCreator()
    featureVector = createFeatureVector()

    allInmateCrimes = []
    allInmateCrimesYValues = []
    allInmates = []
    allInmateYValues = []
    for inmate in inmatesMap:
        if 'IncarcerationDate' not in inmatesMap[inmate]:
            continue
        if inmatesMap[inmate]['PrisonReleaseDate'] == '':
            inmatesMap[inmate]['PrisonReleaseDate'] = inmatesMap[inmate]['IncarcerationDate'] + datetime.timedelta(days=36525)
        if (inmatesMap[inmate]["PrisonReleaseDate"] - inmatesMap[inmate]["IncarcerationDate"]).days <= 0:
            continue

        currentPerson = extractFeatures(inmatesMap[inmate], featureVector)

        sentenceLength = (inmatesMap[inmate]["PrisonReleaseDate"] - inmatesMap[inmate]["IncarcerationDate"]).days
        if 'CURRENT_OFFENSES' in inmatesMap[inmate]:
            for offense in inmatesMap[inmate]['CURRENT_OFFENSES']:
                crimeDescription = "CURRENT_" + offense["adjudicationcharge_descr"]
                allInmateCrimes.append(crimeDescription)
                allInmateCrimesYValues.append(sentenceLength)

        allInmates.append(currentPerson)
        # allInmateYValues.append(inmatesMap[inmate]["prisonterm"])
        allInmateYValues.append(sentenceLength)

    X = allInmates[:10000]
    y = allInmateYValues[:10000]

    # print testSet
    # print testSetY

    sgd = SGDRegressor(loss='epsilon_insensitive', fit_intercept=True, learning_rate='constant', n_iter=4, penalty='none', epsilon=0)
    sgd.fit(X, y)
    sgdPredictedSetY = []
    sgdTrueSetY = []
    for i in range(10001, 20001):
        sgdTrueSetY.append(allInmateYValues[i]);
        sgdPredictedSetY.append(sgd.predict(allInmates[i]))
    percentErrors = []
    print "SGD Mean absolute test error:", util.mean_absolute_percentage_error(sgdTrueSetY, sgdPredictedSetY, percentErrors)
    print "SGD Standard deviation:", np.std(np.array(percentErrors))


    svr = svm.SVR()
    svr.fit(X, y)
    svrPredictedSetY = []
    svrTrueSetY = []
    for i in range(10001, 20001):
        print "true value:", allInmateYValues[i]
        print "predicted value:", svr.predict(allInmates[i])
        print "Difference in true and predicted values:", allInmateYValues[i] - svr.predict(allInmates[i])
        svrTrueSetY.append(allInmateYValues[i]);
        svrPredictedSetY.append(svr.predict(allInmates[i]))
    percentErrors = []
    print "SVR Mean absolute test error:", util.mean_absolute_percentage_error(svrTrueSetY, svrPredictedSetY, percentErrors)
    print "SVR Standard deviation:", np.std(np.array(percentErrors))


    # baselineTest(allInmateCrimes[:10000], allInmateCrimesYValues[:10000])

    nbAllInmates = nbTestTransform(allInmates)
    nbAllInmateYValues = nbRound(allInmateYValues)
    nbTestSet = [nbAllInmates[i] for i in range(0, 10000)]
    nbTestSetY = [nbAllInmateYValues[i] for i in range(0, 10000)]
    nb = BernoulliNB()
    nb.fit(np.array(nbTestSet), np.array(nbTestSetY))
    nbTrueSentenceLength = []
    nbTestSentenceLength = []
    for i in range(10001, 20001):
        nbTrueSentenceLength.append(nbAllInmateYValues[i] * 10.0)
        nbTestSentenceLength.append(nb.predict(nbAllInmates[i] * 10.0))
    # print nbTrueSentenceLength
    # print nbTestSentenceLength
    percentErrors = []
    print "Naive Bayes Mean absolute test error:", util.mean_absolute_percentage_error(nbTrueSentenceLength, nbTestSentenceLength, percentErrors)
    print "Naive Bayes standard deviation:", np.std(np.array(percentErrors))
Ejemplo n.º 6
0
def forecasting(config_main, config_pv):
    df, timestamps = dataImport(config_main, config_pv)

    config_main.TIMESTAMPS = timestamps

    df_train, df_validation, df_test, scaler = getParts(
        df, config_main, config_pv)

    # here we have numpy array
    trainX, trainY = buildSet(np.array(df_train), config_pv.LOOK_BACK,
                              config_pv.OUTPUT_SIZE)
    validationX, validationY = buildSet(np.array(df_validation),
                                        config_pv.LOOK_BACK,
                                        config_pv.OUTPUT_SIZE)
    testX, testY = buildSet(np.array(df_test), config_pv.LOOK_BACK,
                            config_pv.OUTPUT_SIZE)

    # plotInputDay(timestamps, trainY[:, 0], config_pv)

    if config_pv.LOAD_MODEL:
        model = loadModel(config_pv)
        history = None
    else:
        model, history = buildModelPv(trainX, trainY, validationX, validationY,
                                      config_pv)

    evalModel(model, testX, testY)

    # plotting
    trainPrediction = model.predict(trainX)
    testPrediction = model.predict(testX)
    valPrediction = model.predict(validationX)

    if history is not None:
        plotHistory(config_pv, history)

    plotPrediction(
        trainY,
        trainPrediction,
        testY,
        validationY,
        valPrediction,
        testPrediction,
        timestamps,
        config_pv,
    )
    plotPredictionPart(
        config_pv,
        trainY[24],
        trainPrediction[24],
        "1st day of train set",
        timestamps[24:config_pv.TIME_PER_DAY + 24],
        "train",
    )
    plotPredictionPart(
        config_pv,
        validationY[24],
        valPrediction[24],
        "3rd day of validation set",
        timestamps[len(trainX) + 24:len(trainX) + 24 + config_pv.TIME_PER_DAY],
        "validation",
    )
    plotPredictionPart(
        config_pv,
        testY[24],
        testPrediction[24],
        "1st day of test set",
        timestamps[len(trainX) + len(validationX) + 24:len(trainX) + 24 +
                   len(validationX) + config_pv.TIME_PER_DAY],
        "test",
    )
    # plotPredictionPartMult(
    #     config_pv,
    #     testY[0],
    #     testPrediction,
    #     "1st day of test set",
    #     timestamps[len(trainX) + len(validationX): len(trainX) + len(validationX) + config_pv.TIME_PER_DAY],
    #     "test"
    # )

    plotEcart(
        trainY,
        trainPrediction,
        validationY,
        valPrediction,
        testY,
        testPrediction,
        timestamps,
        config_pv,
    )
    # printing error
    for _ in [1]:
        print("training\tMSE :\t{}".format(
            mean_squared_error(np.array(trainY), np.array(trainPrediction))))
        print("validation\t\tMSE :\t{}".format(
            mean_squared_error(np.array(validationY),
                               np.array(valPrediction))))
        print("testing\t\tMSE :\t{}".format(
            mean_squared_error(np.array(testY), np.array(testPrediction))))
        ###
        print("training\tMAE :\t{}".format(
            mean_absolute_error(np.array(trainY), np.array(trainPrediction))))
        print("validation\t\tMAE :\t{}".format(
            mean_absolute_error(np.array(validationY),
                                np.array(valPrediction))))
        print("testing\t\tMAE :\t{}".format(
            mean_absolute_error(np.array(testY), np.array(testPrediction))))
        ###
        print("training\tMAPE :\t{} %".format(
            mean_absolute_percentage_error(np.array(trainY),
                                           np.array(trainPrediction))))
        print("validation\t\tMAPE :\t{} %".format(
            mean_absolute_percentage_error(np.array(validationY),
                                           np.array(valPrediction))))
        print("testing\t\tMAPE :\t{} %".format(
            mean_absolute_percentage_error(np.array(testY),
                                           np.array(testPrediction))))