Beispiel #1
0
def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2):
    n_sample, n_output = df.shape
    n_test = int(round(n_sample * test_ratio))
    n_train = n_sample - n_test
    df_train, df_test = df[:n_train], df[n_train:]

    scaler = StandardScaler(mean=df_train.values.mean(),
                            std=df_train.values.std())
    data = scaler.transform(df_train.values)
    var_model = VAR(data)
    var_result = var_model.fit(n_lags)
    max_n_forwards = np.max(n_forwards)
    # Do forecasting.
    result = np.zeros(shape=(len(n_forwards), n_test, n_output))
    start = n_train - n_lags - max_n_forwards + 1
    for input_ind in range(start, n_sample - n_lags):
        prediction = var_result.forecast(
            scaler.transform(df.values[input_ind:input_ind + n_lags]),
            max_n_forwards)
        for i, n_forward in enumerate(n_forwards):
            result_ind = input_ind - n_train + n_lags + n_forward - 1
            if 0 <= result_ind < n_test:
                result[i, result_ind, :] = prediction[n_forward - 1, :]

    df_predicts = []
    for i, n_forward in enumerate(n_forwards):
        df_predict = pd.DataFrame(scaler.inverse_transform(result[i]),
                                  index=df_test.index,
                                  columns=df_test.columns)
        df_predicts.append(df_predict)

    df_predict.to_csv("./df_predict.csv", sep=',', index=False)
    df_test.to_csv("./df_test.csv", sep=',', index=False)
    return df_predicts, df_test
def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2):
    """
    Multivariate time series forecasting using Vector Auto-Regressive Model.
    :param df: pandas.DataFrame, index: time, columns: sensor id, content: data.
    :param n_forwards: a tuple of horizons.
    :param n_lags: the order of the VAR model.
    :param test_ratio:
    :return: [list of prediction in different horizon], dt_test
    """
    n_sample, n_output = df.shape
    n_test = int(round(n_sample * test_ratio))
    n_train = n_sample - n_test
    df_train, df_test = df[:n_train], df[n_train:]

    scaler = StandardScaler(mean=df_train.values.mean(), std=df_train.values.std())
    data = scaler.transform(df_train.values)
    var_model = VAR(data)
    var_result = var_model.fit(n_lags)
    max_n_forwards = np.max(n_forwards)
    # Do forecasting.
    result = np.zeros(shape=(len(n_forwards), n_test, n_output))
    start = n_train - n_lags - max_n_forwards + 1
    for input_ind in range(start, n_sample - n_lags):
        prediction = var_result.forecast(scaler.transform(df.values[input_ind: input_ind + n_lags]), max_n_forwards)
        for i, n_forward in enumerate(n_forwards):
            result_ind = input_ind - n_train + n_lags + n_forward - 1
            if 0 <= result_ind < n_test:
                result[i, result_ind, :] = prediction[n_forward - 1, :]

    df_predicts = []
    for i, n_forward in enumerate(n_forwards):
        df_predict = pd.DataFrame(scaler.inverse_transform(result[i]), index=df_test.index, columns=df_test.columns)
        df_predicts.append(df_predict)
    return df_predicts, df_test
    def evaluate(self, sess, **kwargs):

        y_preds_all = []
        half_length = int(len(self.clusters) / 2)
        sclusters = self.clusters[0:32]
        for cluster in sclusters:

            node_count, adj_mx = self.cluster_data(cluster)
            adj_mx = utils.calculate_random_walk_matrix(adj_mx).T
            adj_mx = self._build_sparse_matrix(adj_mx)
            global_step = sess.run(tf.train.get_or_create_global_step())
            scaler_path = self._kwargs['data'].get(
                'dataset_dir') + '/scaler.npy'
            scaler_data_ = np.load(scaler_path)
            mean, var = scaler_data_[0], scaler_data_[1]
            scaler = StandardScaler(mean=mean, std=var)

            # change val to test before run
            test_data_path = self._kwargs['data'].get(
                'dataset_dir') + '/test_' + str(cluster) + '.tfrecords'
            test_dataset = tf.data.TFRecordDataset([test_data_path])
            test_dataset = test_dataset.map(self._parse_record_fn)
            test_dataset = test_dataset.make_one_shot_iterator()
            test_next_element = test_dataset.get_next()

            test_results = self.run_epoch_generator(sess,
                                                    self._test_model,
                                                    test_next_element,
                                                    adj_mx,
                                                    return_output=True,
                                                    training=False)
            test_loss, y_preds = test_results['loss'], test_results['outputs']
            utils.add_simple_summary(self._writer, ['loss/test_loss'],
                                     [test_loss],
                                     global_step=global_step)

            y_preds = np.concatenate(y_preds, axis=0)
            y_preds = scaler.inverse_transform(y_preds[:, self.horizon - 1, :,
                                                       0])
            y_preds = y_preds[:, 0:node_count]

            y_preds_all.append(y_preds)

        y_preds_all = np.concatenate(y_preds_all, axis=1)
        return y_preds_all
Beispiel #4
0
 def test_reverse_transform_df(self):
     df = pd.DataFrame([[0., -1.], [-1, -0.5], [1., 0.]])
     expected_result = np.array([[35., 0.], [0., 17.5], [70., 35.]])
     scaler = StandardScaler(mean=35., std=35.)
     result = scaler.inverse_transform(df)
     self.assertTrue(np.array_equal(expected_result, result.values))
Beispiel #5
0
 def test_reverse_transform(self):
     data = np.array([[0., -1.], [-1, -0.5], [1., 0.]])
     expected_result = np.array([[35., 0.], [0., 17.5], [70., 35.]])
     scaler = StandardScaler(mean=35., std=35.)
     result = scaler.inverse_transform(data)
     self.assertTrue(np.array_equal(expected_result, result))
def main(args):
    with open(args.config_filename) as f:
        supervisor_config = yaml.load(f)

        graph_pkl_filename = supervisor_config['data'].get(
            'graph_pkl_filename')
        sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(
            graph_pkl_filename)
        supervisor_config['model']['num_nodes'] = num_nodes = len(sensor_ids)

        # Data preprocessing
        traffic_df_filename = supervisor_config['data']['hdf_filename']
        df_data = pd.read_hdf(traffic_df_filename)
        #df_data = df_data.iloc[int(df_data.shape[0]/3):,:]
        validation_ratio = supervisor_config.get('data').get(
            'validation_ratio')
        test_ratio = supervisor_config.get('data').get('test_ratio')
        df_train, df_val, df_test = train_val_test_split(
            df_data, val_ratio=validation_ratio, test_ratio=test_ratio)

        batch_size = supervisor_config.get('data').get('batch_size')
        val_batch_size = supervisor_config.get('data').get('val_batch_size')
        test_batch_size = supervisor_config.get('data').get('test_batch_size')
        horizon = supervisor_config.get('model').get('horizon')
        seq_len = supervisor_config.get('model').get('seq_len')
        scaler = StandardScaler(mean=df_train.values.mean(),
                                std=df_train.values.std())

        data_train = generate_seq2seq_data(df_train, batch_size, seq_len,
                                           horizon, num_nodes, 'train', scaler)
        data_val = generate_seq2seq_data(df_val, val_batch_size, seq_len,
                                         horizon, num_nodes, 'val', scaler)
        data_train.update(data_val)
        #data_train['scaler'] = scaler

        data_test = generate_seq2seq_data(df_test, test_batch_size, seq_len,
                                          horizon, num_nodes, 'test', scaler)
        #data_test['scaler'] = scaler

        tf_config = tf.ConfigProto()
        if args.use_cpu_only:
            tf_config = tf.ConfigProto(device_count={'GPU': 0})
        tf_config.gpu_options.allow_growth = True
        with tf.Session(config=tf_config) as sess:
            supervisor = DCRNNSupervisor(adj_mx, data_train, supervisor_config)

            data_tag = supervisor_config.get('data').get('dataset_dir')
            folder = data_tag + '/model/'
            if not os.path.exists(folder):
                os.makedirs(folder)
            # Train
            supervisor.train(sess=sess)

            # Test
            yaml_files = glob.glob('%s/model/*/*.yaml' % data_tag,
                                   recursive=True)
            yaml_files.sort(key=os.path.getmtime)
            config_filename = yaml_files[-1]  #'config_%d.yaml' % config_id

            with open(config_filename) as f:
                config = yaml.load(f)
            # Load model and evaluate
            supervisor.load(sess, config['train']['model_filename'])
            y_preds = supervisor.evaluate(sess, data_test)

            n_test_samples = data_test['y_test'].shape[0]
            folder = data_tag + '/results/'
            if not os.path.exists(folder):
                os.makedirs(folder)
            for horizon_i in range(data_test['y_test'].shape[1]):
                y_pred = scaler.inverse_transform(y_preds[:, horizon_i, :, 0])
                eval_dfs = df_test[seq_len + horizon_i:seq_len + horizon_i +
                                   n_test_samples]
                df = pd.DataFrame(y_pred,
                                  index=eval_dfs.index,
                                  columns=eval_dfs.columns)
                #df = pd.DataFrame(y_pred, columns=df_test.columns)
                filename = os.path.join(
                    '%s/results/' % data_tag,
                    'dcrnn_speed_prediction_%s.h5' % str(horizon_i + 1))
                df.to_hdf(filename, 'results')

            print(
                'Predictions saved as %s/results/dcrnn_prediction_[1-12].h5...'
                % data_tag)