Ejemplo n.º 1
0
def plot_daily_data(unit_number):
    dataframe = load_data()
    dataframe = add_datetime(dataframe)
    dataframe = add_datetime_str(dataframe)
    dataframe = add_weeksday(dataframe)
    dataframe = add_is_weekend(dataframe)
    one_unit_df = dataframe[dataframe['UNIT'] == dataframe.iloc[unit_number]['UNIT']]
    one_unit_df = one_unit_df[one_unit_df['is_weekend'] == 0 ]
    groups = one_unit_df.groupby('DATEn')
    fig, ax = plt.subplots()
    ax.margins(0.05) # Optional, just adds 5% padding to the autoscaling
    for name, group in groups:
        ax.plot(group.Hour, group.ENTRIESn_hourly, marker='o', linestyle='-', ms=12, label=name)
    #ax.legend(("One color a day"))
    plt.xlabel("Time(hour)", fontsize = 20)
    plt.ylabel("Number of Entries", fontsize = 20)
    plt.title("Entries Versus Time In One Day", fontsize = 20)
    plt.text(2,20000,"One color for a day",fontsize = 16)
    plt.show()
    return plt                             
Ejemplo n.º 2
0
def run_for(n):
    index_list = []
    SSE_linear = []
    SSE_two_level = []
    SSE_nonparametric = []
    R2_linear = []
    R2_two_level = []
    R2_nonparametric = []
    dataframe = linear_models.load_data()    
    new_dataframe = linear_models.create_new_dateframe_0(dataframe)
    new_dataframe = linear_models.add_datetime_str(new_dataframe)
    new_dataframe = linear_models.add_dec_time(new_dataframe)
    new_dataframe = linear_models.add_weeksday(new_dataframe)
    new_dataframe = linear_models.add_is_weekend(new_dataframe)
    new_dataframe, dummy_unit_list = linear_models.add_dummy_unit(new_dataframe)
    features_list = ['rain','meantempi', 'dec_time','meanwindspdi','precipi', 'is_weekend']
    features_list.extend(dummy_unit_list)
    numeric_features_list_with_weight = [('meantempi',1), ('dec_time',2), \
                                     ('meanwindspdi',1),('precipi',1)]
    categorical_features = ['unit', 'rain', 'is_weekend']
    linear_models.write_features_to_file(features_list, '../features_list')
    nonparametric_model.write_features_with_weight_to_file(numeric_features_list_with_weight, \
                                       '../numeric_features_list_weight')
    nonparametric_model.write_features_to_file(categorical_features, '../categorical_features')    
    new_dataframe.to_csv('../new_weather_turnstile.csv')    
    from sklearn.cross_validation import train_test_split
    test_propotion = 0.1
    delta = 2
    k = 10
    for i in range(n):
        index_list.append('Split' + ' ' + str(i + 1))
        print "Testing " + str(i + 1) + " begin!"
        print "Data spliting..."            
        dataframe = pandas.read_csv('../new_weather_turnstile.csv')
        train_new_weather_turnstile, test_new_weather_turnstile = \
        train_test_split(dataframe, test_size = test_propotion)
        train_new_weather_turnstile.to_csv('../train_new_weather_turnstile.csv')
        test_new_weather_turnstile.to_csv('../test_new_weather_turnstile.csv')
        
        print "Test simple linear model..." 
        time_intervals = [(0,23.999,linear_models.weeks_day)]
        features_list = linear_models.read_features_from_file('../features_list')
        dataframe = pandas.read_csv('../train_new_weather_turnstile.csv')
        coeff_matrix = linear_models.create_coeff_matrix(features_list, time_intervals, dataframe)
        coeff_matrix.to_csv('../coeff_matrix.csv')        
        features_list = linear_models.read_features_from_file('../features_list')
        coeff_matrix = pandas.read_csv('../coeff_matrix.csv')
        test_dataframe = pandas.read_csv('../test_new_weather_turnstile.csv')
        predictions = linear_models.make_predictions_dataframe(test_dataframe, time_intervals,\
                                                 features_list, coeff_matrix)
        sum_square_complex = linear_models.compute_sum_square(test_dataframe['hourly_entries'], predictions)
        r_square_complex = linear_models.compute_r_squared(test_dataframe['hourly_entries'], predictions)
        SSE_linear.append(sum_square_complex)
        R2_linear.append(r_square_complex)
        
        print "Test level two regression model..."
        time_intervals = [(0,4,linear_models.works_day), (4,8,linear_models.works_day),\
                          (8,12,linear_models.works_day), (12,16,linear_models.works_day), \
                          (16,20,linear_models.works_day), (20,23.999,linear_models.works_day),\
                          (0,4,linear_models.weekend), (4,8,linear_models.weekend),\
                          (8,12,linear_models.weekend), (12,16,linear_models.weekend), \
                          (16,20,linear_models.weekend), (20,23.999,linear_models.weekend)]
        features_list = linear_models.read_features_from_file('../features_list')
        features_list.remove('is_weekend')
        dataframe = pandas.read_csv('../train_new_weather_turnstile.csv')
        coeff_matrix = linear_models.create_coeff_matrix(features_list, time_intervals, dataframe)
        coeff_matrix.to_csv('../coeff_matrix.csv')        
        features_list = linear_models.read_features_from_file('../features_list')
        features_list.remove('is_weekend')
        coeff_matrix = pandas.read_csv('../coeff_matrix.csv')
        test_dataframe = pandas.read_csv('../test_new_weather_turnstile.csv')
        predictions = linear_models.make_predictions_dataframe(test_dataframe, time_intervals,\
                                                 features_list, coeff_matrix)
        sum_square_complex = linear_models.compute_sum_square(test_dataframe['hourly_entries'], predictions)
        r_square_complex = linear_models.compute_r_squared(test_dataframe['hourly_entries'], predictions)
        SSE_two_level.append(sum_square_complex)
        R2_two_level.append(r_square_complex)

        print "Test non-parametric model..."                    
        numeric_features_list_with_weight = nonparametric_model.read_features_with_weight_from_file \
                                    ('../numeric_features_list_weight')
        categorical_features = nonparametric_model.read_features_from_file('../categorical_features')
        dataframe = pandas.read_csv('../train_new_weather_turnstile.csv')
        test_dataframe = pandas.read_csv('../test_new_weather_turnstile.csv')
        
        predictions = nonparametric_model.prediction_dataframe(test_dataframe, dataframe,\
                                           categorical_features, \
                                           numeric_features_list_with_weight,\
                                           k, delta)
        sum_square_complex = nonparametric_model.compute_sum_square(test_dataframe['hourly_entries'], predictions)
        r_square_complex = nonparametric_model.compute_r_squared(test_dataframe['hourly_entries'], predictions) 
        SSE_nonparametric.append(sum_square_complex)
        R2_nonparametric.append(r_square_complex)
    result = pandas.DataFrame({'SSE linear':pandas.Series(SSE_linear, index_list), \
                               'SSE two level': pandas.Series(SSE_two_level, index_list), \
                               'SSE nonparametric': pandas.Series(SSE_nonparametric, index_list),\
                               'R2 linear':pandas.Series(R2_linear, index_list),\
                               'R2 two level':pandas.Series(R2_two_level, index_list),\
                               'R2 nonparametric':pandas.Series(R2_nonparametric, index_list)}) 
    return result