コード例 #1
0
def test_single_model(X_test, y_test, model_path, is_draw):
    #######
    # test
    #######
    print('testing ...')
    parts = model_path.strip().split('_')
    PowerLSTM_config = {
        'NAME': 'PowerLSTM',
        'LSTM_1_DIM': int(parts[parts.index('lstm1') + 1]),
        'LSTM_2_DIM': int(parts[parts.index('lstm2') + 1]),
        'DENSE_DIM': int(parts[parts.index('dense') + 1]),
        'DROP_RATE': float(parts[parts.index('drop') + 1]),
        'LR': float(parts[parts.index('lr') + 1]),
        'SS': parts[parts.index('ss') + 1],
        'Freq': parts[parts.index('freq') + 1]
    }
    model = build_model(PowerLSTM_config, X_test.shape[-1])
    model.load_weights(model_path)
    y_pred = model.predict(X_test)[:, 0]
    y_pred = np.exp(y_pred) - 1

    # metric
    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print('MSE:', mse)
    print('MAPE:', mape)
    print('R2:', r2)

    if is_draw:
        pyplot.plot(y_test)
        pyplot.plot(y_pred, color='red')
        pyplot.show()

    return mse, mape
コード例 #2
0
def draw_comparison():

    gbt_filename = 'apt29_summer_mse0.0001_mape0.0951.pkl'
    svm_filename = 'apt29_summer_mse0.0001_mape0.1039.pkl'
    lstm_filename = 'apt29_summer_mse0.00006923_mape0.0894_r0.5248_ly1_189_ly2_169.pkl'

    # gbt_filename = 'apt69_spring_mse0.0183_mape0.0878.pkl'
    # svm_filename = 'apt69_spring_mse0.0148_mape0.0783.pkl'
    # lstm_filename = 'apt69_spring_mse0.01271775_mape0.0711_r0.7201_ly1_183_ly2_175.pkl'

    apt = int(gbt_filename.split('_')[0][3:])
    ss = gbt_filename.split('_')[1].title()

    gbt_res = cPickle.load(open(GBT_RES_DIR % '1h' + gbt_filename, 'rb'))
    svm_res = cPickle.load(open(SVM_RES_DIR % '1h' + svm_filename, 'rb'))
    lstm_res = cPickle.load(open(LSTM_RES_DIR % '1h' + lstm_filename, 'rb'))
    y_test, y_pred_gbt, y_pred_svm, y_pred_lstm = \
        gbt_res['y_test'], gbt_res['y_pred'], svm_res['y_pred'], lstm_res['y_pred']

    gbt_mse = mean_squared_error(y_test, y_pred_gbt)
    gbt_mape = mean_absolute_percentage_error(y_test, y_pred_gbt)
    svm_mse = mean_squared_error(y_test, y_pred_svm)
    svm_mape = mean_absolute_percentage_error(y_test, y_pred_svm)
    lstm_mse = mean_squared_error(y_test, y_pred_lstm)
    lstm_mape = mean_absolute_percentage_error(y_test, y_pred_lstm)

    plt.plot(y_test, color='black', label='Original', linestyle='--')
    plt.plot(y_pred_gbt, color='green', label='GBT')
    plt.plot(y_pred_svm, color='blue', label='SVR')
    plt.plot(y_pred_lstm, color='red', label='PowerLSTM')
    # plt.title('Apartment %d (%s)\nGBT mse:%s, mape:%s\nSVM mse:%s, mape:%s\nLSTM mse:%s, mape:%s'
    #           % (apt, ss, gbt_mse, gbt_mape, svm_mse, svm_mape, lstm_mse, lstm_mape),
    #           fontsize=16)
    print 'gbt_mse:', gbt_mse
    print 'svm_mse:', svm_mse
    print 'lstm_mse:', lstm_mse

    print 'gbt_mape:', gbt_mape
    print 'svm_mape:', svm_mape
    print 'lstm_mape:', lstm_mape

    plt.title('Apartment %d (%s)' % (apt, ss), fontsize=16)
    plt.ylabel('Energy consumption', fontsize=14)
    plt.xlabel('Time (hour)', fontsize=14)
    plt.legend()
    plt.savefig(FIG_DIR + 'season_apt%s_%s' % (apt, ss), dpi=300, bbox_inches='tight')
    plt.show()
コード例 #3
0
def draw_pred():
    freq = '1h'
    month = 'aug'

    res_t = pickle.load(open(
        GBT_RES_DIR % freq +
        'fp_all_truth_%s_mse119.2586_mape0.1352.pkl' % month, 'rb'),
                        encoding='latin1')
    res_e = pickle.load(open(
        GBT_RES_DIR % freq +
        'fp_all_estimate_%s_mse268.7807_mape0.2026.pkl' % month, 'rb'),
                        encoding='latin1')

    y_test = res_t['y_test']
    y_pred_t = res_t['y_pred']
    y_pred_e = res_e['y_pred']

    mse_t = mean_squared_error(y_test, y_pred_t)
    mape_t = mean_absolute_percentage_error(y_test, y_pred_t)

    mse_e = mean_squared_error(y_test, y_pred_e)
    mape_e = mean_absolute_percentage_error(y_test, y_pred_e)

    print('mse:', mse_t, mse_e)
    print('mape:', mape_t, mape_e)

    plt.figure(figsize=(5, 4))
    plt.plot(y_test, color='black', linestyle='--', label='Original')
    plt.plot(y_pred_t,
             color='blue',
             linestyle='-',
             label='Prediction (Use ground-truth)')
    plt.plot(y_pred_e,
             color='red',
             linestyle='-',
             label='Prediction (Use estimation)')
    plt.legend(fontsize=8)
    plt.title('All apartments (SUM, August)', fontsize=14)
    plt.xlabel('Time (Hour)', fontsize=14)
    plt.ylabel('Energy consumption', fontsize=16)
    plt.tick_params(axis='both', which='major', labelsize=16)
    plt.savefig(FIG_DIR + 'fp_pred_%s_%s' % (freq, month),
                dpi=300,
                bbox_inches='tight')
    plt.show()
    def fit_best_params(self, y_series, search_params=list) -> None:
        '''
        Using a grid-search approach, fit a SARIMA model and evaluate on
        a validation set. Select the best set of parameters and train 
        model with it.
        
        Args:
            y_series (np.array): The y_series is used as the validation set
            search_params (list): The search_params is used as the list of 
            search terms to fit and evaluate the model. The list takes the
            form of :  [((p, d, q), (s, p, d, q)), 
                        ((p, d, q), (s, p, d, q)), ...]
        
        '''
        
        results_preds = []
        for param in tqdm(search_params, desc="Finding best parameters"):
            # Fit a model with the best params    
            self.set_params({'pdq':param[0], 'spdq':param[1]})
            
            try:
                self.fit()
                predictions = self.model.get_forecast(steps=y_series.size)
                mape_result = mean_absolute_percentage_error(y_series, predictions.predicted_mean)

            except:
                self.model.aic = np.nan
                mape_result = np.nan

            # Store configuration and results and then append to the results_preds data frame
            # AIC stands for Akaike information criterion and can be used to estimate the quality of the models
            # We can use both AIC and MSE 
            row = {'pdq': param[0],
                   'spdq': param[1],
                   'Aic': self.model.aic,
                   'Mse': mape_result}

            results_preds.append(row)
            
        self.search_results = pd.DataFrame(results_preds).dropna()
        
        # Select the row with the smallest AIC
        best_params = self.search_results.sort_values(by='Aic').iloc[0]
        
        # Fit a model with the best params    
        self.set_params({'pdq':best_params['pdq'],
                         'spdq':best_params['spdq']})
        self.set_xseries(np.concatenate((self.x_series, y_series)))
        self.fit()
コード例 #5
0
def show_results(models, fits):
    best_params = [model.best_params_ for model in models]

    scores = []
    for target in TARGET_COLUMNS:
        score = mean_absolute_percentage_error(fits[target],
                                               fits[f"{target}_fitted"])
        scores.append(score)

    print("\n==================== Train results ====================")
    print(f"scores: {scores}")
    print(f"total score: {np.mean(scores)}")
    for params in best_params:
        print(params)
    print("========================= End =========================")
コード例 #6
0
ファイル: aggregation.py プロジェクト: nuaaxc/PowerNet
def draw(freq):
    mse_all = []
    mape_all = []
    apts = []

    for filename in [
            'agg_seed_90_apt_2_summer_mse0.0552_mape0.4141.pkl',
            'agg_seed_90_apt_4_summer_mse0.0139_mape0.3552.pkl',
            'agg_seed_90_apt_8_summer_mse0.0295_mape0.3200.pkl',
            'agg_seed_90_apt_16_summer_mse0.0134_mape0.2330.pkl',
            'agg_seed_90_apt_32_summer_mse0.0087_mape0.1598.pkl',
            'agg_seed_90_apt_64_summer_mse0.0071_mape0.1076.pkl',
            'agg_seed_90_apt_114_summer_mse0.0038_mape0.0935.pkl'
    ]:

        apt = filename.split('_')[4]
        # ss = filename.split('_')[1].title()

        res = cPickle.load(open(GBT_RES_DIR % freq + filename, 'rb'))

        y_test = res['y_test']
        y_pred = res['y_pred']

        mse = mean_squared_error(y_test, y_pred)
        mape = mean_absolute_percentage_error(y_test, y_pred)

        mse_all.append(mse)
        mape_all.append(mape)
        apts.append(apt)

    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))

    ax1.plot(mse_all)
    # ax1.set_title('Apartment %s' % apt)
    ax1.set_xlabel('Granularity')
    ax1.set_xticklabels(apts)
    ax1.set_ylabel('MSE')

    ax2.plot(mape_all)
    # ax2.set_title('Apartment %s' % apt)
    ax2.set_xlabel('Granularity')
    ax2.set_xticklabels(apts)
    ax2.set_ylabel('MAPE')

    f.suptitle('Aggregation Performance', fontsize=16)
    plt.savefig(FIG_DIR + 'agg', dpi=300, bbox_inches='tight')

    plt.show()
コード例 #7
0
ファイル: arima.py プロジェクト: nuaaxc/PowerNet
def train_predict():
    train, test = load_data_arima('/Users/kevin/PycharmProjects/TimeSeries/data/Apt1_2016.csv')
    print len(train), len(test)
    model = ARIMA(train, order=(5, 1, 2))
    model_fit = model.fit(disp=0)
    predictions = model_fit.forecast(steps=len(test))[0]
    # predictions = np.roll(predictions, -1)
    test = test.values
    error = mean_squared_error(test, predictions)
    print 'MSE:', error
    print 'MAPE:', mean_absolute_percentage_error(test, predictions)

    # plot
    pyplot.plot(test)
    pyplot.plot(predictions, color='red')
    pyplot.show()
コード例 #8
0
def test(X_test, y_test, model, model_path, is_draw):
    print('testing ...')
    model.load_weights(model_path)
    y_pred = model.predict(X_test)[:, 0]
    y_pred = np.exp(y_pred) - 1

    # metric
    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    if is_draw:
        pyplot.plot(y_test)
        pyplot.plot(y_pred, color='red')
        pyplot.show()

    return mse, mape, r2
コード例 #9
0
def test(X_test_energy, X_test_weather, y_test, model, model_path, is_draw):
    model.load_weights(model_path)

    print('Testing ...')
    y_pred = teaching(model, X_test_energy, X_test_weather)
    # y_pred = teaching_no(model, X_test_energy, X_test_weather)

    print('Evaluating ...')
    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print('MSE:', mse)
    print('MAPE:', mape)
    print('R2:', r2)

    if is_draw:
        pyplot.plot(y_test)
        pyplot.plot(y_pred, color='green')
        pyplot.show()

    return mse, mape, r2
コード例 #10
0
ファイル: lstm.py プロジェクト: PaschalisSk/stock-prediction
def main(_run,
         stock_file,
         days_back,
         days_forward,
         max_epochs,
         early_stopping_threshold,
         num_neurons,
         num_hidden_layers,
         seed,
         learning_rate,
         batch_size,
         activation,
         optimizer,
         kernel_init,
         regularization,
         loss,
         timesteps,
         use_sent_and_trends=False):
    # Read the stocks csv into a dataframe
    stock = data.Stocks(stock_file)
    stock.calc_patel_TI(days_back)
    if use_sent_and_trends:
        # If we have a sentiment file add it to the stock df
        sentiments = pd.read_csv(
            '../data/nytarticles/microsoft.2013-12-31.2018-12-31.imputed.sent.csv',
            index_col='date')
        trends = pd.read_csv(
            '../data/trends/msft.2013-12-31.2018-12-31.fixed.dates.csv',
            index_col='date')
        sent_trends = pd.merge(sentiments,
                               trends,
                               how='left',
                               left_index=True,
                               right_index=True)
        sent_trends[
            'sent_trends'] = sent_trends['sentiment'] * sent_trends['msft']
        import numpy as np
        sent_trends['randNumCol'] = np.random.randint(1, 100,
                                                      sent_trends.shape[0])
        stock.df = pd.merge(stock.df,
                            sent_trends,
                            how='left',
                            left_index=True,
                            right_index=True)
        stock.df.drop(['sentiment', 'msft', 'sent_trends'],
                      axis='columns',
                      inplace=True)

    stock.shift(days_forward)

    # Create the model
    model = K.Sequential()

    # Create the kernel initializer with the seed
    if kernel_init == 'glorot_uniform':
        kernel_initializer = K.initializers.glorot_uniform(seed)
    else:
        raise NotImplementedError

    # Add the layers
    return_sequences = True
    if num_hidden_layers == 1:
        return_sequences = False
    data_dim = stock.raw_values()['X'].shape[1]
    model.add(
        K.layers.LSTM(num_neurons,
                      input_shape=(timesteps, data_dim),
                      activation=activation,
                      return_sequences=return_sequences,
                      kernel_initializer=kernel_initializer))

    for i in range(num_hidden_layers - 1):
        # If not in the last layer return sequences
        if i != num_hidden_layers - 2:
            model.add(
                K.layers.LSTM(num_neurons,
                              activation=activation,
                              return_sequences=True,
                              kernel_initializer=kernel_initializer))
        else:
            model.add(
                K.layers.LSTM(num_neurons,
                              activation=activation,
                              kernel_initializer=kernel_initializer))

    # Add output layer
    model.add(
        K.layers.Dense(1,
                       activation='linear',
                       kernel_initializer=kernel_initializer))

    # Define Root Mean Squared Relative Error metric
    def root_mean_squared_relative_error(y_true, y_pred):
        squared_relative_error = K.backend.square(
            (y_true - y_pred) /
            K.backend.clip(K.backend.abs(y_true), K.backend.epsilon(), None))
        mean_squared_relative_error = K.backend.mean(squared_relative_error,
                                                     axis=-1)
        return K.backend.sqrt(mean_squared_relative_error)

    # Define Direction Accuracy metric
    def direction_accuracy(y_true, y_pred):
        # sign returns either -1 (if <0), 0 (if ==0), or 1 (if >0)
        true_signs = K.backend.sign(y_true[days_forward:] -
                                    y_true[:-days_forward])
        pred_signs = K.backend.sign(y_pred[days_forward:] -
                                    y_true[:-days_forward])

        equal_signs = K.backend.equal(true_signs, pred_signs)
        return K.backend.mean(equal_signs, axis=-1)

    # Create the optimizer
    if optimizer == 'adagrad':
        optimizer = K.optimizers.Adagrad(learning_rate)
    elif optimizer == 'adam':
        optimizer = K.optimizers.Adam(learning_rate)
    else:
        raise NotImplementedError

    model.compile(optimizer=optimizer,
                  loss=loss,
                  metrics=[
                      'mean_absolute_percentage_error', 'mean_absolute_error',
                      root_mean_squared_relative_error, 'mean_squared_error',
                      direction_accuracy
                  ])

    # Create the logging callback
    # The metrics are logged in the run's metrics and at heartbeat events
    # every 10 secs they get written to mongodb
    def on_epoch_end_metrics_log(epoch, logs):
        for metric_name, metric_value in logs.items():
            # The validation set keys have val_ prepended to the metric,
            # add train_ to the training set keys
            if 'val' not in metric_name:
                metric_name = 'train_' + metric_name

            _run.log_scalar(metric_name, metric_value, epoch)

    metrics_log_callback = K.callbacks.LambdaCallback(
        on_epoch_end=on_epoch_end_metrics_log)

    callbacks_list = [
        K.callbacks.EarlyStopping(monitor='val_loss',
                                  patience=early_stopping_threshold),
        K.callbacks.ModelCheckpoint(filepath='../models/best_model.h5',
                                    monitor='val_loss',
                                    save_best_only=True), metrics_log_callback
    ]

    model.fit(stock.raw_values_lstm_wrapper(dataset='train',
                                            norm=True,
                                            timesteps=timesteps)['X'],
              stock.raw_values_lstm_wrapper(dataset='train',
                                            norm=True,
                                            timesteps=timesteps)['y'],
              epochs=max_epochs,
              batch_size=batch_size,
              verbose=0,
              callbacks=callbacks_list,
              validation_data=(stock.raw_values_lstm_wrapper(
                  dataset='val', norm=True, timesteps=timesteps)['X'],
                               stock.raw_values_lstm_wrapper(
                                   dataset='val',
                                   norm=True,
                                   timesteps=timesteps)['y']))

    # Calculate metrics for normalized values
    test_norm_metrics = model.evaluate(
        stock.raw_values_lstm_wrapper(dataset='test',
                                      norm=True,
                                      timesteps=timesteps)['X'],
        stock.raw_values_lstm_wrapper(dataset='test',
                                      norm=True,
                                      timesteps=timesteps)['y'],
        verbose=0)

    # Log the metrics from the normalized values
    for metric in zip(model.metrics_names, test_norm_metrics):
        _run.log_scalar('test_norm_' + metric[0], metric[1])

    # Now calculate and save the unnormalised metrics
    # Predict returns normalised values
    y_pred_norm = model.predict(
        stock.raw_values_lstm_wrapper(dataset='test',
                                      norm=True,
                                      timesteps=timesteps)['X'])
    # Scale the output back to the actual stock price
    y_pred = stock.denorm_predictions(y_pred_norm)

    # Calculate the unnormalized metrics
    y_true = stock.raw_values_lstm_wrapper(dataset='test',
                                           timesteps=timesteps)['y']

    # df1 = pd.DataFrame({'date': stock.df.index.values[-y_pred.shape[0]:], 'y_pred': y_pred.flatten(), 'y_true': y_true.flatten()})
    # df1.set_index('date', inplace=True)
    # df1.to_csv('plot_data_lstm.csv')
    test_metrics = {
        'test_loss':
        metrics.mean_squared_error(y_true, y_pred),
        'test_mean_absolute_percentage_error':
        metrics.mean_absolute_percentage_error(y_true, y_pred),
        'test_mean_absolute_error':
        metrics.mean_absolute_error(y_true, y_pred),
        'test_root_mean_squared_relative_error':
        metrics.root_mean_squared_relative_error(y_true, y_pred),
        'test_mean_squared_error':
        metrics.mean_squared_error(y_true, y_pred),
        'test_direction_accuracy':
        metrics.direction_accuracy(y_true, y_pred, days_forward)
    }

    # Save the metrics
    for metric_name, metric_value in test_metrics.items():
        _run.log_scalar(metric_name, metric_value)
コード例 #11
0
def draw_mape():
    """
    draw mape for both ground-truth & estimate in the same figure.
    """
    month = 'jul'
    freq = '1h'
    # for 'fp_all_truth_'
    mape_all_truth = []
    res = pickle.load(open(
        GBT_RES_DIR % freq +
        'fp_all_truth_%s_mse119.2586_mape0.1352.pkl' % month, 'rb'),
                      encoding='latin1')
    # res = pickle.load(open(GBT_RES_DIR % freq + 'fp_all_truth_%s_mse46.8571_mape0.1122.pkl' % month, 'rb'),
    #                   encoding='latin1')

    y_test = res['y_test']
    y_pred = res['y_pred']
    length = len(y_test)
    for i in range(length - 1):
        y_t = y_test[:i + 1]
        y_p = y_pred[:i + 1]
        # y_t = y_test[i:i + 1]
        # y_p = y_pred[i:i + 1]
        mape_all_truth.append(mean_absolute_percentage_error(y_t, y_p))
    mape_all_truth = np.array(mape_all_truth)

    # for 'fp_all_estimate_'
    mape_all_estimate = []
    res = pickle.load(open(
        GBT_RES_DIR % freq +
        'fp_all_estimate_%s_mse268.7807_mape0.2026.pkl' % month, 'rb'),
                      encoding='latin1')
    # res = pickle.load(open(GBT_RES_DIR % freq + 'fp_all_estimate_%s_mse76.9154_mape0.1367.pkl' % month, 'rb'),
    #                   encoding='latin1')

    y_test = res['y_test']
    y_pred = res['y_pred']
    length = len(y_test)
    for i in range(length - 1):
        y_t = y_test[:i + 1]
        y_p = y_pred[:i + 1]
        # y_t = y_test[i:i + 1]
        # y_p = y_pred[i:i + 1]
        mape_all_estimate.append(mean_absolute_percentage_error(y_t, y_p))
    mape_all_estimate = np.array(mape_all_estimate)

    pprint(mape_all_truth)
    pprint(mape_all_estimate)

    plt.figure(figsize=(5, 4))
    plt.plot(mape_all_truth, c='blue', label='Prediction (Use ground-truth)')
    plt.plot(mape_all_estimate, c='red', label='Prediction (Use estimation)')
    plt.xlabel('Time (hour)', fontsize=14)
    plt.ylabel('MAPE', fontsize=16)
    plt.title('All apartments (SUM, August)', fontsize=16)
    plt.tick_params(axis='both', which='major', labelsize=16)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.savefig(FIG_DIR + 'fp_all_mape_%s_%s' % (month, freq),
                dpi=300,
                bbox_inches='tight')

    plt.show()
コード例 #12
0
def draw_mse_mape(how_prefix):

    mse_all, mape_all = [], []
    how, apt, ss, freq = None, None, None, None
    for filename in os.listdir(GBT_RES_DIR % '1h'):
        if filename.startswith(how_prefix):
            ss = filename.split('_')[3].title()
            if ss != 'Aug':
                continue
            apt = filename.split('_')[1]
            how = filename.split('_')[2]
            how = 'estimation' if how == 'estimate' else 'ground-truth'
            freq = '1h'

            res = pickle.load(open(GBT_RES_DIR % freq + filename, 'rb'),
                              encoding='latin1')

            y_test = res['y_test']
            y_pred = res['y_pred']

            mse_a, mape_a = [], []
            length = len(y_test)
            # length = int(len(y_test) / 2)
            print(ss)
            for i in range(length - 1):
                y_t = y_test[:i + 1]
                y_p = y_pred[:i + 1]
                # y_t = y_test[i:i + 1]
                # y_p = y_pred[i:i + 1]
                mse = mean_squared_error(y_t, y_p)
                mape = mean_absolute_percentage_error(y_t, y_p)
                mse_a.append(mse)
                mape_a.append(mape)
                print('\t%s\t%s\t%s' % (i + 1, mse, mape))
            mse_all = mse_a
            mape_all = mape_a

    mse_all = np.array(mse_all)
    mape_all = np.array(mape_all)

    # mse_all = np.mean(mse_all, axis=0)
    # mape_all = np.mean(mape_all, axis=0)

    print(mse_all)
    print(mape_all)

    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))

    ax1.plot(mse_all)
    # ax1.set_title('Apartment %s' % apt)
    ax1.set_xlabel('Time (hour)')
    ax1.set_ylabel('MSE')

    ax2.plot(mape_all)
    # ax2.set_title('Apartment %s' % apt)
    ax2.set_xlabel('Time (hour)')
    ax2.set_ylabel('MAPE')

    f.suptitle('All apartments (SUM, Use %s)' % how, fontsize=16)
    plt.subplots_adjust(left=None,
                        bottom=None,
                        right=None,
                        top=None,
                        wspace=0.3,
                        hspace=None)

    # plt.savefig(FIG_DIR + 'fp_all_mse_mape_%s_%s_%s' % (how, apt, freq), dpi=300, bbox_inches='tight')

    plt.show()
コード例 #13
0
def do_model(all_data):
    _steps, tts_factor, num_epochs = get_steps_extra()
    # features = all_data[:-_steps]
    # labels = all_data[_steps:, 4:]
    # tts = train_test_split(features, labels, test_size=0.4)
    # X_train = tts[0]
    # X_test = tts[1]
    # Y_train = tts[2].astype(np.float64)
    # Y_test = tts[3].astype(np.float64)
    split_pos = int(len(all_data) * tts_factor)
    train_data, test_data = all_data[:split_pos], all_data[split_pos:]
    dataX, dataY, fields = create_dataset(test_data, 1, _steps)

    optimiser = {{choice(['adam', 'rmsprop'])}}
    hidden_neurons = int({{quniform(16, 256, 4)}})
    loss_function = 'mse'
    batch_size = int({{quniform(1, 10, 1)}})
    dropout = {{uniform(0, 0.5)}}
    dropout_dense = {{uniform(0, 0.5)}}
    hidden_inner_factor = {{uniform(0.1, 1.9)}}
    inner_hidden_neurons = int(hidden_inner_factor * hidden_neurons)
    dropout_inner = {{uniform(0, 0.5)}}

    dataX = fit_to_batch(dataX, batch_size)
    dataY = fit_to_batch(dataY, batch_size)

    extra_layer = {{choice([True, False])}}
    if not extra_layer:
        dropout_inner = 0

    # X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
    # X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

    # print("X train shape:\t", X_train.shape)
    # print("X test shape:\t", X_test.shape)
    # print("Y train shape:\t", Y_train.shape)
    # print("Y test shape:\t", Y_test.shape)
    print("Steps:\t", _steps)
    print("Extra layer:\t", extra_layer)
    print("Batch size:\t", batch_size)

    # in_neurons = X_train.shape[2]

    out_neurons = 1

    model = Sequential()
    best_weight = BestWeight()
    model.add(
        LSTM(units=hidden_neurons,
             batch_input_shape=(batch_size, 1, fields),
             return_sequences=extra_layer,
             stateful=True,
             dropout=dropout))
    model.add(Activation('relu'))

    if extra_layer:
        dense_input = inner_hidden_neurons
        model.add(
            LSTM(
                units=dense_input,
                # input_shape=hidden_neurons,
                stateful=True,
                return_sequences=False,
                dropout=dropout_inner))
        model.add(Activation('relu'))

    model.add(Dense(units=out_neurons, activation='relu'))
    model.add(Dropout(dropout_dense))
    model.compile(loss=loss_function, optimizer=optimiser)

    history = model.fit(dataX,
                        dataY,
                        batch_size=batch_size,
                        epochs=num_epochs,
                        validation_split=0.3,
                        shuffle=False,
                        callbacks=[best_weight])

    model.set_weights(best_weight.get_best())
    X_test, Y_test, _fields = create_dataset(test_data, 1, _steps)
    X_test, Y_test = fit_to_batch(X_test, batch_size), fit_to_batch(
        Y_test, batch_size)
    predicted = model.predict(X_test, batch_size=batch_size) + EPS
    rmse_val = rmse(Y_test, predicted)
    metrics = OrderedDict([
        ('hidden', hidden_neurons),
        ('steps', _steps),
        ('geh', geh(Y_test, predicted)),
        ('rmse', rmse_val),
        ('mape', mean_absolute_percentage_error(Y_test, predicted)),
        # ('smape', smape(predicted, _Y_test)),
        ('median_pe', median_percentage_error(predicted, Y_test)),
        # ('mase', MASE(_Y_train, _Y_test, predicted)),
        ('mae', mean_absolute_error(y_true=Y_test, y_pred=predicted)),
        ('batch_size', batch_size),
        ('optimiser', optimiser),
        ('dropout', dropout),
        ('extra_layer', extra_layer),
        ('extra_layer_dropout', dropout_inner),
        ('dropout_dense', dropout_dense),
        ('extra_layer_neurons', inner_hidden_neurons),
        ('loss function', loss_function)
        # 'history': history.history
    ])
    print(metrics)
    return {'loss': -rmse_val, 'status': STATUS_OK, 'metrics': metrics}
コード例 #14
0
ファイル: main.py プロジェクト: satadru5/traffic-prediction-1
def do_model(all_data):
    _steps = steps
    print("steps:", _steps)
    features = all_data[:-_steps]
    labels = all_data[_steps:, 4:]
    tts = train_test_split(features, labels, test_size=0.4)
    X_train = tts[0]
    X_test = tts[1]
    Y_train = tts[2].astype(np.float64)
    Y_test = tts[3].astype(np.float64)
    optimiser = 'adam'
    hidden_neurons = {{choice([256, 300, 332])}} #tested already on : 128, 196, 212, 230, 244,
    loss_function = 'mse'
    batch_size = {{choice([96, 105, 128])}} # already did 148, 156, 164, 196
    dropout = {{uniform(0, 0.1)}}
    hidden_inner_factor = {{uniform(0.1, 1.1)}}
    inner_hidden_neurons = int(hidden_inner_factor * hidden_neurons)
    dropout_inner = {{uniform(0,1)}}

    extra_layer = {{choice([True, False])}}
    if not extra_layer:
        dropout_inner = 0

    X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
    X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
    print("X train shape:\t", X_train.shape)
    # print("X test shape:\t", X_test.shape)
    # print("Y train shape:\t", Y_train.shape)
    # print("Y test shape:\t", Y_test.shape)
    # print("Steps:\t", _steps)
    print("Extra layer:\t", extra_layer)
    in_neurons = X_train.shape[2]

    out_neurons = 1


    model = Sequential()
    gpu_cpu = 'gpu'
    best_weight = BestWeight()
    dense_input = hidden_neurons
    model.add(LSTM(output_dim=hidden_neurons, input_dim=X_test.shape[2], return_sequences=extra_layer, init='uniform',
                   consume_less=gpu_cpu))
    model.add(Dropout(dropout))

    if extra_layer:
        dense_input = inner_hidden_neurons
        model.add(LSTM(output_dim=dense_input, input_dim=hidden_neurons, return_sequences=False, consume_less=gpu_cpu))
        model.add(Dropout(dropout_inner))
        model.add(Activation('relu'))

    model.add(Dense(output_dim=out_neurons, input_dim=dense_input))
    model.add(Activation('relu'))
    model.compile(loss=loss_function, optimizer=optimiser)

    history = model.fit(
        X_train, Y_train,
        verbose=0,
        batch_size=batch_size,
        nb_epoch=30,
        validation_split=0.3,
        shuffle=False,
        callbacks=[best_weight]
    )

    model.set_weights(best_weight.get_best())

    predicted = model.predict(X_test) + EPS
    rmse_val = rmse(Y_test, predicted)
    metrics = OrderedDict([
        ('hidden', hidden_neurons),
        ('steps', _steps),
        ('geh', geh(Y_test, predicted)),
        ('rmse', rmse_val),
        ('mape', mean_absolute_percentage_error(Y_test, predicted)),
        # ('smape', smape(predicted, _Y_test)),
        ('median_pe', median_percentage_error(predicted, Y_test)),
        # ('mase', MASE(_Y_train, _Y_test, predicted)),
        ('mae', mean_absolute_error(y_true=Y_test, y_pred=predicted)),
        ('batch_size', batch_size),
        ('optimiser', optimiser),
        ('dropout', dropout),
        ('extra_layer', extra_layer),
        ('extra_layer_dropout', dropout_inner),
        ('extra_layer_neurons', inner_hidden_neurons),
        ('loss function', loss_function)
        # 'history': history.history
    ])
    # print(metrics)
    return {'loss': -rmse_val, 'status': STATUS_OK, 'metrics': metrics}
コード例 #15
0
def do_model(all_data, steps, run_model=True):
    _steps = steps
    print("steps:", _steps)
    scaler = MinMaxScaler()
    all_data = scaler.fit_transform(all_data)
    if not run_model:
        return None, None, scaler
    features = all_data[:-_steps]
    labels = all_data[_steps:, -1:]
    tts = train_test_split(features, labels, test_size=0.4)
    X_train = tts[0]
    X_test = tts[1]
    Y_train = tts[2].astype(np.float64)
    Y_test = tts[3].astype(np.float64)

    optimiser = 'adam'
    hidden_neurons = 200
    loss_function = 'mse'
    batch_size = 105
    dropout = 0.056
    inner_hidden_neurons = 269
    dropout_inner = 0.22

    X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
    X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
    print("X train shape:\t", X_train.shape)
    print("X test shape:\t", X_test.shape)
    # print("Y train shape:\t", Y_train.shape)
    # print("Y test shape:\t", Y_test.shape)
    # print("Steps:\t", _steps)
    in_neurons = X_train.shape[2]

    out_neurons = 1

    model = Sequential()
    gpu_cpu = 'cpu'
    best_weight = BestWeight()
    model.add(
        LSTM(output_dim=hidden_neurons,
             input_dim=in_neurons,
             return_sequences=True,
             init='uniform',
             consume_less=gpu_cpu))
    model.add(Dropout(dropout))

    dense_input = inner_hidden_neurons
    model.add(
        LSTM(output_dim=dense_input,
             input_dim=hidden_neurons,
             return_sequences=False,
             consume_less=gpu_cpu))
    model.add(Dropout(dropout_inner))
    model.add(Activation('relu'))

    model.add(Dense(output_dim=out_neurons, input_dim=dense_input))
    model.add(Activation('relu'))

    model.compile(loss=loss_function, optimizer=optimiser)

    history = model.fit(X_train,
                        Y_train,
                        verbose=0,
                        batch_size=batch_size,
                        nb_epoch=30,
                        validation_split=0.3,
                        shuffle=False,
                        callbacks=[best_weight])

    model.set_weights(best_weight.get_best())
    predicted = model.predict(X_test) + EPS
    rmse_val = rmse(Y_test, predicted)
    metrics = OrderedDict([
        # ('hidden', hidden_neurons),
        ('steps', _steps),
        ('geh', geh(Y_test, predicted)),
        ('rmse', rmse_val),
        ('mape', mean_absolute_percentage_error(Y_test, predicted)),
        # ('smape', smape(predicted, _Y_test)),
        # ('median_pe', median_percentage_error(predicted, Y_test)),
        # ('mase', MASE(_Y_train, _Y_test, predicted)),
        # ('mae', mean_absolute_error(y_true=Y_test, y_pred=predicted)),
        # ('batch_size', batch_size),
        # ('optimiser', optimiser),
        # ('dropout', dropout),
        # ('extra_layer_dropout', dropout_inner),
        # ('extra_layer_neurons', inner_hidden_neurons),
        # ('loss function', loss_function)
        # 'history': history.history
    ])

    return metrics, model, scaler
コード例 #16
0
ファイル: gbt.py プロジェクト: nuaaxc/PowerNet
def run_model_recursive(apt_fname, tr_te_split, res_file_pref, freq,
                        is_feat_select, is_draw):
    ############
    # load data
    ############
    print('========================================' * 2)
    print(apt_fname, res_file_pref)
    print(tr_te_split)

    df = load_data(apt_fname, freq)
    train = df[tr_te_split['trb']:tr_te_split['tre']]
    test = df[tr_te_split['teb']:tr_te_split['tee']]
    print(test)
    print('train/test:', train.shape, test.shape)

    feat = list(train.columns.values)
    feat.remove('energy')
    feat.remove('raw_energy')
    print('features (%d):' % len(feat), feat)
    print('index of energy-1:', feat.index('energy-1'))

    X_train = train[feat].as_matrix()
    y_train = train['energy'].as_matrix()
    X_test = test[feat].as_matrix()
    y_test = test['raw_energy'].as_matrix()

    print('train/test (after converting to matrix):', X_train.shape,
          X_test.shape)

    ####################
    # feature selection
    ####################
    if is_feat_select:
        print('feature seleciton ...')
        selected = feature_selection(X_train, y_train, 12)
        print(len(selected))
        print('selected features (%d):' % sum(selected),
              [feat[i] for i in range(len(selected)) if selected[i]])
        X_train = X_train[:, selected]
        X_test = X_test[:, selected]
        print('train/test (after feature selection):', X_train.shape,
              X_test.shape)
        res_file_pref += '_feature'

    ########
    # train
    ########
    print('training ...')
    parameters = {
        'n_estimators': (50, 100, 150, 200, 250, 300, 350, 400, 450, 500),
        'max_depth': [1, 2, 3],
        'learning_rate': [0.001, 0.01, 0.1],
        'random_state': [42],
        'loss': ['ls']
    }

    # parameters = {'n_estimators': (50,),
    #               'max_depth': [1],
    #               'learning_rate': [0.001],
    #               'random_state': [42],
    #               'loss': ['ls']}

    clf = GridSearchCV(GradientBoostingRegressor(),
                       param_grid=parameters,
                       cv=TimeSeriesSplit(n_splits=3),
                       scoring='neg_mean_squared_error')
    clf.fit(X_train, y_train)
    print(clf.best_params_)

    #######
    # test
    #######
    print('testing (recursive) ...')
    y_pred = []
    for i in range(len(X_test)):
        # print '-------' * 10
        # print 'i:', i
        # print 'y_pred:', y_pred
        # print 'feat:', X_test[i][18:]
        # print 'range(i):', range(i)
        for j in range(min(i, 49)):
            X_test[i][j + feat.index('energy-1')] = np.log(y_pred[-j - 1] + 1)
        # print 'feat:', X_test[i][18:]
        y_p = clf.predict([X_test[i]])[0]
        y_p = np.exp(y_p) - 1
        # print 'y_p:', y_p, np.log(y_p+1)
        y_pred.append(y_p)

    #############
    # evaluation
    #############
    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    print('MSE:', mse)
    print('MAPE:', mape)
    print('save result to file ...')
    pickle.dump({
        'y_test': y_test,
        'y_pred': y_pred
    }, open(res_file_pref + '_mse%.4f_mape%.4f.pkl' % (mse, mape), 'wb'))
    print('saved.')

    if is_draw:
        pyplot.plot(y_test)
        pyplot.plot(y_pred, color='red')
        pyplot.show()
コード例 #17
0
ファイル: svm.py プロジェクト: nuaaxc/PowerNet
def run_model(apt_fname,
              tr_te_split,
              res_file_pref,
              freq,
              is_feat_select,
              is_draw):
    ############
    # load data
    ############
    print('========================================' * 2)
    # print(apt_fname, res_file_pref)
    print(tr_te_split)

    df = load_data(apt_fname, freq)

    train = df[tr_te_split['trb']: tr_te_split['tre']]
    test = df[tr_te_split['teb']: tr_te_split['tee']]
    # print(test)
    # print('train/test:', train.shape, test.shape)

    feat = list(train.columns.values)
    feat.remove('energy')
    feat.remove('raw_energy')
    # print('raw features (%d):' % len(feat), feat)

    X_train = train[feat].as_matrix()
    y_train = train['energy'].as_matrix()
    X_test = test[feat].as_matrix()
    y_test = test['raw_energy'].as_matrix()

    # print('train/test (after converting to matrix):', X_train.shape, X_test.shape)

    ####################
    # feature selection
    ####################
    if is_feat_select:
        print('feature seleciton ...')
        selected = feature_selection(X_train, y_train, 12)
        print(len(selected))
        print('selected features (%d):' % sum(selected), [feat[i] for i in range(len(selected)) if selected[i]])
        X_train = X_train[:, selected]
        X_test = X_test[:, selected]
        print('train/test (after feature selection):', X_train.shape, X_test.shape)
        res_file_pref += '_feature'

    ########
    # train
    ########
    print('training ...')
    parameters = {'C': (0.001, 0.01, 0.1, 1),
                  'kernel': ['rbf', 'linear', 'poly', 'sigmoid']
                  }

    clf = GridSearchCV(svm.SVR(),
                       param_grid=parameters,
                       cv=TimeSeriesSplit(n_splits=3),
                       scoring='neg_mean_squared_error')
    clf.fit(X_train, y_train)
    print(clf.best_params_)

    #######
    # test
    #######
    print('testing ...')

    y_pred = clf.predict(X_test)

    # y_pred = np.exp(np.cumsum(np.concatenate(([np.log(y_test[0])], y_pred))))
    y_pred = np.exp(y_pred) - 1

    #############
    # evaluation
    #############
    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    print('MSE:', mse)
    print('MAPE:', mape)
    print('save result to file ...')
    pickle.dump(
        {'y_test': y_test, 'y_pred': y_pred},
        open(res_file_pref + '_mse%.4f_mape%.4f.pkl' % (mse, mape), 'wb'))
    print('saved.')

    if is_draw:
        pyplot.plot(y_test)
        pyplot.plot(y_pred, color='red')
        pyplot.show()