Esempio n. 1
0
def deserializer(preds, data, train_size=0.9, train_phase=False):
    '''
    Arguments:
    preds : Predictions to be converted back to their original values
    data : It takes the data into account because the normalization was made based on the full historic data
    train_size : Only applicable when used in train_phase
    train_phase : When a train-test split is made, this should be set to True so that a cut point (row) is calculated based on the train_size argument, otherwise cut point is set to 0
    
    Returns:
    A list of deserialized prediction values, original true values, and date values for plotting
    '''
    price_matrix = np.array(price_matrix_creator(ser))
    if train_phase:
        row = int(round(train_size * len(price_matrix)))
    else:
        row = 0
    date = ser.index[row + 29:]
    date = np.reshape(date, (date.shape[0]))
    X_test = price_matrix[row:, :-1]
    y_test = price_matrix[row:, -1]
    preds_original = []
    preds = np.reshape(preds, (preds.shape[0]))
    for index in range(0, len(preds)):
        pred = (preds[index] + 1) * X_test[index][0]
        preds_original.append(pred)
    preds_original = np.array(preds_original)
    if train_phase:
        return [date, y_test, preds_original]
    else:
        import datetime
        return [date + datetime.timedelta(days=1), y_test]
Esempio n. 2
0
def create_dummy_sets(start_date, coin_name):

    df_dummy_btc = {'date': [], 'price': []}

    end_date = datetime.date(year=2017, month=6, day=30)
    d = start_date
    delta = datetime.timedelta(days=1)
    price = 0.01

    if (coin_name == 'btc'):
        factor = 2
    elif (coin_name == 'uppy'):
        factor = 3
    elif (coin_name == 'downy'):
        factor = -2

    change_periods = [{
        'start': datetime.date(year=2015, month=2, day=20),
        'end': datetime.date(year=2015, month=4, day=20)
    }, {
        'start': datetime.date(year=2016, month=2, day=20),
        'end': datetime.date(year=2016, month=4, day=20)
    }, {
        'start': datetime.date(year=2017, month=4, day=20),
        'end': datetime.date(year=2017, month=5, day=20)
    }, {
        'start': datetime.date(year=2017, month=6, day=20),
        'end': datetime.date(year=2017, month=7, day=20)
    }]

    while d <= end_date:
        date = d.strftime("%Y-%m-%d")
        # print(date)
        d += delta
        df_dummy_btc['date'].append(d)
        df_dummy_btc['price'].append(price)

        price_delta = 0.5 + random.uniform(-1, 1)
        is_change_period = False
        for n in change_periods:
            if n['start'] <= d <= n['end']:
                is_change_period = True

        if is_change_period:
            price_delta += factor * random.random()

        price += price_delta

    df = pd.DataFrame(df_dummy_btc)

    df.to_csv('dummy_{}.csv'.format(coin_name), index=False)

    plt.plot(df['date'], df['price'], label=coin_name)
    plt.xlabel('date')
    plt.ylabel('price')
  df1 =  df1[booleanos]
 elif (pair[0] == 2 and pair[1] == 0):
  booleanos = crearBooleanos(dates_out_2_0)
  df1 =  df1[booleanos]
 
 minimum_date = min(df1.date)
 maximum_date = max(df1.date)
 date_aux = minimum_date
 while (date_aux != maximum_date):
        if (not((date_aux == df1['date']).any())):
          valores_trafficvolume = []
          for row in df1.values:
                 if (row[0].time() == date_aux.time()):
                         valores_trafficvolume.append(row[1])
          df1.loc[len(df1)] = [date_aux, np.mean(valores_trafficvolume)]
        date_aux += datetime.timedelta(minutes=20)
 df1 = df1.sort_index()
 for day in days:
       df1_aux = df1.copy()
       count += 1;
       print("CUENTA : ", count)
       minimum_date = maximum_date
       maximum_date = datetime.datetime(2016,10,int(day[8:10]),6,00,0)
       date_aux = minimum_date
       while (date_aux != maximum_date):
        if (not((date_aux == df1_aux['date']).any())):
          valores_trafficvolume = []
          for row in df1_aux.values:
                 if (row[0].time() == date_aux.time()):
                         valores_trafficvolume.append(row[1])
        df1_aux.loc[len(df1_aux)] = [date_aux, np.mean(valores_trafficvolume)]
where intersection_id = 'A' AND tollgate_id = 2
order by time_window;""")
rows = cur.fetchall()
df = pd.DataFrame.from_records(rows, columns=['date','avg_travel_time'])
#df = df[(df.avg_travel_time > 50) & (df.avg_travel_time < 150)]
minimum_date = min(df.date)
maximum_date = max(df.date)
date_aux = minimum_date
while (date_aux != maximum_date):
               if (date_aux not in df['date']):
                 valores_avg_travel = []
                 for row in df.values:
                        if (row[0].time() == date_aux.time()):
                                valores_avg_travel.append(row[1])
                 df.loc[len(df)] = [date_aux, np.mean(valores_avg_travel)]
                 date_aux += datetime.timedelta(minutes=20)
df = df.sort_index()
indexes = [i for i in range(len(df))]
print(df.shape)
dates = pd.DatetimeIndex(df['date'].values)
plt.plot(indexes, df['avg_travel_time'].values)
plt.xticks(indexes, dates.time, rotation="vertical")
#plt.show()
serie = pd.Series(df['avg_travel_time'].values, index=df['date'])
autocorrelation_plot(serie)
#plt.show()
model = ARIMA(serie, order=(3,1,0))
model_fit = model.fit(disp=0)
print(model_fit.summary())
# plot residual errors
residuals = pd.DataFrame(model_fit.resid)
 cur.execute(query)
 rows = cur.fetchall()
 df1 = pd.DataFrame.from_records(rows, columns=['date', 'avg_travel_time'])
 df1 = df1[(df1.avg_travel_time > 50) & (df1.avg_travel_time < 150)]
 df1 = df1.reset_index(drop=True)
 minimum_date = min(df1.date)
 maximum_date = max(df1.date)
 date_aux = minimum_date
 while (date_aux != maximum_date):
     if (not ((date_aux == df1['date']).any())):
         valores_avg_travel = []
         for row in df1.values:
             if (row[0].time() == date_aux.time()):
                 valores_avg_travel.append(row[1])
         df1.loc[len(df1)] = [date_aux, np.mean(valores_avg_travel)]
     date_aux += datetime.timedelta(minutes=20)
 df1 = df1.sort_index()
 for day in days:
     df1_aux = df1.copy()
     count += 1
     print("CUENTA : ", count)
     minimum_date = maximum_date
     maximum_date = datetime.datetime(2016, 10, int(day[8:10]), 6, 00, 0)
     date_aux = minimum_date
     while (date_aux != maximum_date):
         if (not ((date_aux == df1_aux['date']).any())):
             valores_avg_travel = []
             for row in df1_aux.values:
                 if (row[0].time() == date_aux.time()):
                     valores_avg_travel.append(row[1])
         df1_aux.loc[len(df1_aux)] = [date_aux, np.mean(valores_avg_travel)]