Beispiel #1
0
def predict(config):

    datafile = config.datafile

    if config.predict_datafile is not None:
        datafile = config.predict_datafile

    print("Loading data from %s ..." % datafile)
    path = utils.data_utils.get_data_path(config.data_dir, datafile)

    config.batch_size = 1
    batches = BatchGenerator(path,
                             config,
                             require_targets=config.require_targets,
                             verbose=True)
    batches.cache(verbose=True)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        model = model_utils.get_model(session, config, verbose=True)

        perfs = dict()

        for i in range(batches.num_batches):
            batch = batches.next_batch()

            (mse, preds) = model.step(session, batch)
            # (mse, preds) = model.debug_step(session, batch)

            if math.isnan(mse) is False:
                date = batch_to_date(batch)
                if date not in perfs:
                    perfs[date] = list()
                perfs[date].append(mse)

            if config.pretty_print_preds is True:
                pretty_print_predictions(batches, batch, preds, mse)
            else:
                print_predictions(batches, batch, preds)

        if config.mse_outfile is not None:
            with open(config.mse_outfile, "w") as f:
                for date in sorted(perfs):
                    mean = np.mean(perfs[date])
                    print("%s %.6f %d" % (date, mean, len(perfs[date])),
                          file=f)
                total_mean = np.mean([x for v in perfs.values() for x in v])
                print("Total %.6f" % (total_mean), file=f)
            f.closed
        else:
            exit()
Beispiel #2
0
def predict_pie(config):
    """ Doesn't use print options. Only outputs dataframes"""
    datafile = config.datafile

    if config.predict_datafile is not None:
        datafile = config.predict_datafile

    print("Loading data from %s ..." % datafile)
    path = utils.data_utils.get_data_path(config.data_dir, datafile)

    config.batch_size = 1
    batches = BatchGenerator(path,
                             config,
                             require_targets=config.require_targets,
                             verbose=True)
    batches.cache(verbose=True)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    # Initialize DataFrames
    df_target = pd.DataFrame()
    df_output_lb = pd.DataFrame()
    df_output_ub = pd.DataFrame()

    df_list = [df_target, df_output_lb, df_output_ub]

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        model = model_utils.get_model(session, config, verbose=True)

        for i in range(batches.num_batches):
            batch = batches.next_batch()

            (mpiw, _, _, preds_lb,
             preds_ub) = model.step(session,
                                    batch,
                                    keep_prob=config.keep_prob_pred,
                                    uq=config.UQ,
                                    UQ_model_type='PIE')
            # (mse, preds) = model.debug_step(session, batch)

            date = batch_to_date(batch)
            key = batch_to_key(batch)

            # Dummy input to be consistent with the rest of the predictions printing options. MSE = 0.0. It is not
            # evaluated in PIE case
            mse_dummy = mse_var_dummy = 0.0

            # Print every n iterations to check the progress for monitoring
            if i % 10000 == 0:
                pretty_print_predictions(batches, batch, preds_lb, preds_ub,
                                         mse_dummy, mse_var_dummy)

            # Get values and update DataFrames if df_dirname is provided in config
            if config.df_dirname is not None:
                # Get all values
                target_val = get_value(batches, batch, 'target')
                output_lb_val = get_value(batches, batch, 'output_lb',
                                          preds_lb)
                output_ub_val = get_value(batches, batch, 'output_ub',
                                          preds_ub)
                values_list = [target_val, output_lb_val, output_ub_val]

                # Update DataFrames
                for j in range(len(df_list)):
                    assert (len(df_list) == len(values_list))
                    df_list[j] = update_df(df_list[j], date, key,
                                           values_list[j])

        # Save the DataFrames
        if not os.path.isdir(config.df_dirname):
            os.makedirs(config.df_dirname)
        save_names = ['target-df.pkl', 'output-lb-df.pkl', 'output-ub-df.pkl']

        for j in range(len(df_list)):
            assert (len(df_list) == len(save_names))
            df_list[j].to_pickle(os.path.join(config.df_dirname,
                                              save_names[j]))
    return
Beispiel #3
0
def predict(config):

    datafile = config.datafile

    if config.predict_datafile is not None:
        datafile = config.predict_datafile

    print("Loading data from %s ..." % datafile)
    path = utils.data_utils.get_data_path(config.data_dir, datafile)

    config.batch_size = 1
    batches = BatchGenerator(path,
                             config,
                             require_targets=config.require_targets,
                             verbose=True)
    batches.cache(verbose=True)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    # Initialize DataFrames
    df_target = pd.DataFrame()
    df_output = pd.DataFrame()
    df_mse = pd.DataFrame()

    df_list = [df_target, df_output, df_mse]

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        model = model_utils.get_model(session, config, verbose=True)

        perfs = dict()

        for i in range(batches.num_batches):
            batch = batches.next_batch()

            (mse, preds) = model.step(session,
                                      batch,
                                      keep_prob=config.keep_prob_pred)
            # (mse, preds) = model.debug_step(session, batch)

            if math.isnan(mse) is False:
                date = batch_to_date(batch)
                key = batch_to_key(batch)
                if date not in perfs:
                    perfs[date] = list()
                perfs[date].append(mse)

            if config.pretty_print_preds:
                pretty_print_predictions(batches, batch, preds, mse)
            elif config.print_preds:
                print_predictions(config, batches, batch, preds, mse)

            # Get values and update DataFrames if df_dirname is provided in config
            if config.df_dirname is not None:
                if not math.isnan(mse):
                    # Get all values
                    target_val = get_value(batches, batch, 'target')
                    output_val = get_value(batches, batch, 'output', preds)
                    mse_val = mse
                    values_list = [target_val, output_val, mse_val]

                    # Update DataFrames
                    for j in range(len(df_list)):
                        assert (len(df_list) == len(values_list))
                        df_list[j] = update_df(df_list[j], date, key,
                                               values_list[j])

        # Save the DataFrames
        if config.df_dirname:
            if not os.path.isdir(config.df_dirname):
                os.makedirs(config.df_dirname)
            save_names = ['target-df.pkl', 'output-df.pkl', 'mse-df.pkl']

            for j in range(len(df_list)):
                assert (len(df_list) == len(save_names))
                df_list[j].to_pickle(
                    os.path.join(config.df_dirname, save_names[j]))

        # MSE Outfile
        if config.mse_outfile is not None:
            with open(config.mse_outfile, "w") as f:
                for date in sorted(perfs):
                    mean = np.mean(perfs[date])
                    print("%s %.6f %d" % (date, mean, len(perfs[date])),
                          file=f)
                total_mean = np.mean([x for v in perfs.values() for x in v])
                print("Total %.6f" % (total_mean), file=f)
            f.closed
        else:
            exit()
Beispiel #4
0
def predict(config):

    target_list = [
        'saleq_ttm', 'cogsq_ttm', 'xsgaq_ttm', 'oiadpq_ttm', 'mkvaltq_ttm',
        'niq_ttm', 'ibq_ttm', 'cheq_mrq', 'rectq_mrq', 'invtq_mrq', 'acoq_mrq',
        'ppentq_mrq', 'aoq_mrq', 'dlcq_mrq', 'apq_mrq', 'txpq_mrq', 'lcoq_mrq',
        'ltq_mrq'
    ]
    aux_list = ['mom1m', 'mom3m', 'mom6m', 'mom9m']
    df = pd.DataFrame(columns=[
        'date', 'gvkey', 'mse', 'normalizer', config.target_field +
        "_output", config.target_field + "_target"
    ])
    datafile = config.datafile

    if config.predict_datafile is not None:
        datafile = config.predict_datafile

    print("Loading data from %s ..." % datafile)
    path = os.path.join(config.data_dir, datafile)

    ind = 0

    config.batch_size = 1
    batches = BatchGenerator(path,
                             config,
                             require_targets=config.require_targets,
                             verbose=True)
    batches.cache(verbose=True)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    index = int(
        np.argwhere(np.array(target_list) == config.target_field).mean())
    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        model = model_utils.get_model(session, config, verbose=True)

        perfs = dict()

        for i in range(batches.num_batches):
            batch = batches.next_batch()

            (mse, preds) = model.step(session, batch)
            # (mse, preds) = model.debug_step(session, batch)

            if math.isnan(mse) is False:
                date = batch_to_date(batch)
                if date not in perfs:
                    perfs[date] = list()
                perfs[date].append(mse)

            if config.pretty_print_preds is True:
                #pretty_print_predictions(batches, batch, preds, mse)
                key = batch_to_key(batch)
                date = batch_to_date(batch)
                if int(date % 100) in [3, 6, 9, 12]:
                    print("GVKEY: " + str(key) + ", Date: " + str(date))
                    L = batch.seq_lengths[0]
                    targets = batch.targets[L - 1][0]
                    outputs = preds[0]
                    normalizer = batch.normalizers[0]

                    np.set_printoptions(suppress=True)
                    np.set_printoptions(precision=3)

                    df.loc[ind] = [
                        date, key, mse, normalizer,
                        batches.get_raw_outputs(batch, 0, outputs)[index],
                        batches.get_raw_outputs(batch, 0, targets)[index]
                    ]
                    ind += 1

            else:
                print_predictions(batches, batch, preds)

        if config.mse_outfile is not None:
            with open(config.mse_outfile, "w") as f:
                for date in sorted(perfs):
                    mean = np.mean(perfs[date])
                    print("%s %.6f %d" % (date, mean, len(perfs[date])),
                          file=f)
                total_mean = np.mean([x for v in perfs.values() for x in v])
                print("Total %.6f" % (total_mean), file=f)
            df.to_csv('datasets/' + config.output_file, index=False)
            f.closed
        else:
            df.to_csv('datasets/' + config.output_file, index=False)
            exit()