def predict(config): datafile = config.datafile if config.predict_datafile is not None: datafile = config.predict_datafile print("Loading data from %s ..." % datafile) path = utils.data_utils.get_data_path(config.data_dir, datafile) config.batch_size = 1 batches = BatchGenerator(path, config, require_targets=config.require_targets, verbose=True) batches.cache(verbose=True) tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) with tf.Graph().as_default(), tf.Session(config=tf_config) as session: model = model_utils.get_model(session, config, verbose=True) perfs = dict() for i in range(batches.num_batches): batch = batches.next_batch() (mse, preds) = model.step(session, batch) # (mse, preds) = model.debug_step(session, batch) if math.isnan(mse) is False: date = batch_to_date(batch) if date not in perfs: perfs[date] = list() perfs[date].append(mse) if config.pretty_print_preds is True: pretty_print_predictions(batches, batch, preds, mse) else: print_predictions(batches, batch, preds) if config.mse_outfile is not None: with open(config.mse_outfile, "w") as f: for date in sorted(perfs): mean = np.mean(perfs[date]) print("%s %.6f %d" % (date, mean, len(perfs[date])), file=f) total_mean = np.mean([x for v in perfs.values() for x in v]) print("Total %.6f" % (total_mean), file=f) f.closed else: exit()
def predict_pie(config): """ Doesn't use print options. Only outputs dataframes""" datafile = config.datafile if config.predict_datafile is not None: datafile = config.predict_datafile print("Loading data from %s ..." % datafile) path = utils.data_utils.get_data_path(config.data_dir, datafile) config.batch_size = 1 batches = BatchGenerator(path, config, require_targets=config.require_targets, verbose=True) batches.cache(verbose=True) tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) # Initialize DataFrames df_target = pd.DataFrame() df_output_lb = pd.DataFrame() df_output_ub = pd.DataFrame() df_list = [df_target, df_output_lb, df_output_ub] with tf.Graph().as_default(), tf.Session(config=tf_config) as session: model = model_utils.get_model(session, config, verbose=True) for i in range(batches.num_batches): batch = batches.next_batch() (mpiw, _, _, preds_lb, preds_ub) = model.step(session, batch, keep_prob=config.keep_prob_pred, uq=config.UQ, UQ_model_type='PIE') # (mse, preds) = model.debug_step(session, batch) date = batch_to_date(batch) key = batch_to_key(batch) # Dummy input to be consistent with the rest of the predictions printing options. MSE = 0.0. It is not # evaluated in PIE case mse_dummy = mse_var_dummy = 0.0 # Print every n iterations to check the progress for monitoring if i % 10000 == 0: pretty_print_predictions(batches, batch, preds_lb, preds_ub, mse_dummy, mse_var_dummy) # Get values and update DataFrames if df_dirname is provided in config if config.df_dirname is not None: # Get all values target_val = get_value(batches, batch, 'target') output_lb_val = get_value(batches, batch, 'output_lb', preds_lb) output_ub_val = get_value(batches, batch, 'output_ub', preds_ub) values_list = [target_val, output_lb_val, output_ub_val] # Update DataFrames for j in range(len(df_list)): assert (len(df_list) == len(values_list)) df_list[j] = update_df(df_list[j], date, key, values_list[j]) # Save the DataFrames if not os.path.isdir(config.df_dirname): os.makedirs(config.df_dirname) save_names = ['target-df.pkl', 'output-lb-df.pkl', 'output-ub-df.pkl'] for j in range(len(df_list)): assert (len(df_list) == len(save_names)) df_list[j].to_pickle(os.path.join(config.df_dirname, save_names[j])) return
def predict(config): datafile = config.datafile if config.predict_datafile is not None: datafile = config.predict_datafile print("Loading data from %s ..." % datafile) path = utils.data_utils.get_data_path(config.data_dir, datafile) config.batch_size = 1 batches = BatchGenerator(path, config, require_targets=config.require_targets, verbose=True) batches.cache(verbose=True) tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) # Initialize DataFrames df_target = pd.DataFrame() df_output = pd.DataFrame() df_mse = pd.DataFrame() df_list = [df_target, df_output, df_mse] with tf.Graph().as_default(), tf.Session(config=tf_config) as session: model = model_utils.get_model(session, config, verbose=True) perfs = dict() for i in range(batches.num_batches): batch = batches.next_batch() (mse, preds) = model.step(session, batch, keep_prob=config.keep_prob_pred) # (mse, preds) = model.debug_step(session, batch) if math.isnan(mse) is False: date = batch_to_date(batch) key = batch_to_key(batch) if date not in perfs: perfs[date] = list() perfs[date].append(mse) if config.pretty_print_preds: pretty_print_predictions(batches, batch, preds, mse) elif config.print_preds: print_predictions(config, batches, batch, preds, mse) # Get values and update DataFrames if df_dirname is provided in config if config.df_dirname is not None: if not math.isnan(mse): # Get all values target_val = get_value(batches, batch, 'target') output_val = get_value(batches, batch, 'output', preds) mse_val = mse values_list = [target_val, output_val, mse_val] # Update DataFrames for j in range(len(df_list)): assert (len(df_list) == len(values_list)) df_list[j] = update_df(df_list[j], date, key, values_list[j]) # Save the DataFrames if config.df_dirname: if not os.path.isdir(config.df_dirname): os.makedirs(config.df_dirname) save_names = ['target-df.pkl', 'output-df.pkl', 'mse-df.pkl'] for j in range(len(df_list)): assert (len(df_list) == len(save_names)) df_list[j].to_pickle( os.path.join(config.df_dirname, save_names[j])) # MSE Outfile if config.mse_outfile is not None: with open(config.mse_outfile, "w") as f: for date in sorted(perfs): mean = np.mean(perfs[date]) print("%s %.6f %d" % (date, mean, len(perfs[date])), file=f) total_mean = np.mean([x for v in perfs.values() for x in v]) print("Total %.6f" % (total_mean), file=f) f.closed else: exit()
def predict(config): target_list = [ 'saleq_ttm', 'cogsq_ttm', 'xsgaq_ttm', 'oiadpq_ttm', 'mkvaltq_ttm', 'niq_ttm', 'ibq_ttm', 'cheq_mrq', 'rectq_mrq', 'invtq_mrq', 'acoq_mrq', 'ppentq_mrq', 'aoq_mrq', 'dlcq_mrq', 'apq_mrq', 'txpq_mrq', 'lcoq_mrq', 'ltq_mrq' ] aux_list = ['mom1m', 'mom3m', 'mom6m', 'mom9m'] df = pd.DataFrame(columns=[ 'date', 'gvkey', 'mse', 'normalizer', config.target_field + "_output", config.target_field + "_target" ]) datafile = config.datafile if config.predict_datafile is not None: datafile = config.predict_datafile print("Loading data from %s ..." % datafile) path = os.path.join(config.data_dir, datafile) ind = 0 config.batch_size = 1 batches = BatchGenerator(path, config, require_targets=config.require_targets, verbose=True) batches.cache(verbose=True) tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) index = int( np.argwhere(np.array(target_list) == config.target_field).mean()) with tf.Graph().as_default(), tf.Session(config=tf_config) as session: model = model_utils.get_model(session, config, verbose=True) perfs = dict() for i in range(batches.num_batches): batch = batches.next_batch() (mse, preds) = model.step(session, batch) # (mse, preds) = model.debug_step(session, batch) if math.isnan(mse) is False: date = batch_to_date(batch) if date not in perfs: perfs[date] = list() perfs[date].append(mse) if config.pretty_print_preds is True: #pretty_print_predictions(batches, batch, preds, mse) key = batch_to_key(batch) date = batch_to_date(batch) if int(date % 100) in [3, 6, 9, 12]: print("GVKEY: " + str(key) + ", Date: " + str(date)) L = batch.seq_lengths[0] targets = batch.targets[L - 1][0] outputs = preds[0] normalizer = batch.normalizers[0] np.set_printoptions(suppress=True) np.set_printoptions(precision=3) df.loc[ind] = [ date, key, mse, normalizer, batches.get_raw_outputs(batch, 0, outputs)[index], batches.get_raw_outputs(batch, 0, targets)[index] ] ind += 1 else: print_predictions(batches, batch, preds) if config.mse_outfile is not None: with open(config.mse_outfile, "w") as f: for date in sorted(perfs): mean = np.mean(perfs[date]) print("%s %.6f %d" % (date, mean, len(perfs[date])), file=f) total_mean = np.mean([x for v in perfs.values() for x in v]) print("Total %.6f" % (total_mean), file=f) df.to_csv('datasets/' + config.output_file, index=False) f.closed else: df.to_csv('datasets/' + config.output_file, index=False) exit()