Python BatchGenerator.next_batchの例、batch_generator.BatchGenerator.next_batch Pythonの例

コード例 #1

0

ファイルを表示

ファイル: classify_data.py プロジェクト: rpatil524/dnn-quant

def main(_):
  """
  The model specified command line arg --model_dir is applied to every data
  point in --test_datafile and the model output is sent to --output. The unix
  command 'paste' can be used to stich the input file and output together.
  e.g.,
  $ classifiy_data.py --config=train.conf --test_datafile=test.dat > output.dat
  $ paste -d ' ' test.dat output.dat > input_and_output.dat
  """
  configs.DEFINE_string('test_datafile',None,'file with test data')
  configs.DEFINE_string('time_field','date','fields used for dates/time')
  configs.DEFINE_string('print_start','190001','only print data on or after')
  configs.DEFINE_string('print_end','999912','only print data on or before')
  configs.DEFINE_integer('num_batches',None,'num_batches overrride')

  config = configs.get_configs()

  if config.test_datafile is None:
     config.test_datafile = config.datafile

  batch_size = 1
  data_path = model_utils.get_data_path(config.data_dir,config.test_datafile)

  # print("Loading data %s"%data_path)

  dataset = BatchGenerator(data_path, config,
                             batch_size=batch_size,
                             num_unrollings=config.num_unrollings)

  num_data_points = dataset.num_batches
  if config.num_batches is not None:
     num_data_points = config.num_batches

  #print("num_batches = ", num_data_points)

  tf_config = tf.ConfigProto( allow_soft_placement=True,
                                log_device_placement=False )

  with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

    #print("Loading model.")

    model = model_utils.get_trained_model(session, config, verbose=False)

    for i in range(num_data_points):

      batch = dataset.next_batch()
      preds = model.step(session, batch)
      seq_len = get_seq_length(batch)
      key, date = get_key_and_date(batch, seq_len-1)

      if (date < config.print_start or date > config.print_end):
        continue

      score  = get_score(config, preds, seq_len-1)
      target = get_target(config, batch, seq_len-1)

      print("%s %s %.6f %.6f %d" % (key, date, score, target, seq_len))

コード例 #2

0

ファイルを表示

ファイル: train.py プロジェクト: bartlomiejszozda/handwrittenPairsGenerator

def main():
    restore_model = args.restore
    print(restore_model)
    seq_len = args.seq_len
    batch_size = args.batch_size
    num_epoch = args.epochs
    batches_per_epoch = 1000

    batch_generator = BatchGenerator(batch_size, seq_len)
    g, vs = create_graph(batch_generator.num_letters,
                         batch_size,
                         num_units=args.units,
                         lstm_layers=args.lstm_layers,
                         window_mixtures=args.window_mixtures,
                         output_mixtures=args.output_mixtures)

    with tf.Session(graph=g) as sess:
        model_saver = tf.train.Saver(max_to_keep=2)
        if restore_model:
            model_file = tf.train.latest_checkpoint(
                os.path.join(restore_model, 'models'))
            experiment_path = restore_model
            epoch = int(model_file.split('-')[-1]) + 1
            model_saver.restore(sess, model_file)
        else:
            sess.run(tf.global_variables_initializer())
            experiment_path = next_experiment_path()
            epoch = 0

        summary_writer = tf.summary.FileWriter(experiment_path,
                                               graph=g,
                                               flush_secs=10)
        summary_writer.add_session_log(
            tf.SessionLog(status=tf.SessionLog.START),
            global_step=epoch * batches_per_epoch)

        for e in range(epoch, num_epoch):
            print('\nEpoch {}'.format(e))
            for b in range(1, batches_per_epoch + 1):
                coords, seq, reset, needed = batch_generator.next_batch()
                if needed:
                    sess.run(vs.reset_states, feed_dict={vs.reset: reset})
                l, s, _ = sess.run([vs.loss, vs.summary, vs.train_step],
                                   feed_dict={
                                       vs.coordinates: coords,
                                       vs.sequence: seq
                                   })
                summary_writer.add_summary(s,
                                           global_step=e * batches_per_epoch +
                                           b)
                print('\r[{:5d}/{:5d}] loss = {}'.format(
                    b, batches_per_epoch, l),
                      end='')

            model_saver.save(sess,
                             os.path.join(experiment_path, 'models', 'model'),
                             global_step=e)

コード例 #3

0

ファイルを表示

def predict(config):

    datafile = config.datafile

    if config.predict_datafile is not None:
        datafile = config.predict_datafile

    print("Loading data from %s ..." % datafile)
    path = utils.data_utils.get_data_path(config.data_dir, datafile)

    config.batch_size = 1
    batches = BatchGenerator(path,
                             config,
                             require_targets=config.require_targets,
                             verbose=True)
    batches.cache(verbose=True)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        model = model_utils.get_model(session, config, verbose=True)

        perfs = dict()

        for i in range(batches.num_batches):
            batch = batches.next_batch()

            (mse, preds) = model.step(session, batch)
            # (mse, preds) = model.debug_step(session, batch)

            if math.isnan(mse) is False:
                date = batch_to_date(batch)
                if date not in perfs:
                    perfs[date] = list()
                perfs[date].append(mse)

            if config.pretty_print_preds is True:
                pretty_print_predictions(batches, batch, preds, mse)
            else:
                print_predictions(batches, batch, preds)

        if config.mse_outfile is not None:
            with open(config.mse_outfile, "w") as f:
                for date in sorted(perfs):
                    mean = np.mean(perfs[date])
                    print("%s %.6f %d" % (date, mean, len(perfs[date])),
                          file=f)
                total_mean = np.mean([x for v in perfs.values() for x in v])
                print("Total %.6f" % (total_mean), file=f)
            f.closed
        else:
            exit()

コード例 #4

0

ファイルを表示

ファイル: train_model.py プロジェクト: mantripragada-manogna/Data-Science-Projects

def main():
    seq_len = 256
    batch_size = 64
    epochs = 30
    batches_per_epoch = 1000

    batch_generator = BatchGenerator(batch_size, seq_len)
    g, vs = create_graph(batch_generator.num_letters, batch_size)

    with tf.Session(graph=g) as sess:
        model_saver = tf.train.Saver(max_to_keep=2)
        sess.run(tf.global_variables_initializer())
        model_path = get_model_path()

        summary_writer = tf.summary.FileWriter(model_path,
                                               graph=g,
                                               flush_secs=10)
        summary_writer.add_session_log(
            tf.SessionLog(status=tf.SessionLog.START), global_step=0)
        for e in range(epochs):
            print('\n{} : Epoch {}'.format(datetime.datetime.now().time(), e))
            for b in range(1, batches_per_epoch + 1):
                coordinates, labels, reset, to_reset = batch_generator.next_batch(
                )
                if to_reset:
                    sess.run(vs.reset_states, feed_dict={vs.reset: reset})
                loss, s, _ = sess.run([vs.loss, vs.summary, vs.train_step],
                                      feed_dict={
                                          vs.coordinates: coordinates,
                                          vs.sequence: labels
                                      })
                summary_writer.add_summary(s,
                                           global_step=e * batches_per_epoch +
                                           b)
                print('\r[{:5d}/{:5d}] loss = {}'.format(
                    b, batches_per_epoch, loss),
                      end='')

            model_saver.save(sess,
                             os.path.join(model_path, 'models', 'model'),
                             global_step=e)

コード例 #5

0

ファイルを表示

ファイル: batch_testor.py プロジェクト: euclidjda/dnn-quant

configs.DEFINE_float("rnn_loss_weight",None,"How much moret to weight kth example")
config = configs.get_configs()

if config.train_datafile is None:
    config.train_datafile = config.datafile

train_path = get_data_path(config.data_dir,config.train_datafile)

print("Loading batched data ...")

batches = BatchGenerator(train_path, config,
                         config.batch_size,config.num_unrollings,
                         validation_size=config.validation_size,
                         randomly_sample=True)


for i in range(10):
    b = batches.next_batch()
    print("-----------------------------------------------------")
    print("----Atributes: ")
    print(b.attribs)
    print("----Sequence Lengths: ")
    print(b.seq_lengths)
    print("----Train Weights: ")
    print(b.train_mask)
    print("----Valid Weights: ")
    print(b.valid_mask)
    print("----Targets: ")
    print(b.targets)

コード例 #6

0

ファイルを表示

def main(_):
    """
  The model specified command line arg --model_dir is applied to every data
  point in --test_datafile and the model output is sent to --output. The unix
  command 'paste' can be used to stich the input file and output together.
  e.g.,
  $ classifiy_data.py --config=train.conf --test_datafile=test.dat --output=output.dat
  $ paste -d ' ' test.dat output.dat > input_and_output.dat
  """
    configs.DEFINE_string('test_datafile', None, 'file with test data')
    configs.DEFINE_string('output', 'preds.dat', 'file for predictions')
    configs.DEFINE_string('time_field', 'date', 'fields used for dates/time')
    configs.DEFINE_string('print_start', '190001',
                          'only print data on or after')
    configs.DEFINE_string('print_end', '210012',
                          'only print data on or before')
    configs.DEFINE_integer('min_test_k', 1, 'minimum seq length classified')
    configs.DEFINE_integer('num_batches', None, 'num_batches overrride')

    config = configs.get_configs()

    if config.test_datafile is None:
        config.test_datafile = config.datafile

    batch_size = 1
    data_path = model_utils.get_data_path(config.data_dir,
                                          config.test_datafile)

    print("Loading data %s" % data_path)

    dataset = BatchGenerator(data_path,
                             config,
                             batch_size=batch_size,
                             num_unrollings=config.num_unrollings)

    num_data_points = dataset.num_batches
    if config.num_batches is not None:
        num_data_points = config.num_batches

    print("num_batches = ", num_data_points)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        print("Loading model.")

        model = model_utils.get_trained_model(session, config)

        stats = dict()
        key = 'ALL'
        stats[key] = list()

        with open(config.output, "w") as outfile:

            for i in range(num_data_points):

                batch = dataset.next_batch()
                preds = model.step(session, batch)
                seq_len = get_seq_length(batch)
                start = seq_len - 1

                if seq_len < config.num_unrollings:
                    continue
                #if config.nn_type != 'rnn' and seq_len < config.num_unrollings:
                #  continue
                #elif config.nn_type == 'rnn' and classify_entire_seq(batch):
                #  start = config.min_test_k - 1

                for i in range(start, seq_len):
                    key, date = get_key_and_date(batch, i)
                    if (date < config.print_start or date > config.print_end):
                        continue
                    prob = get_pos_prob(config, preds, i)
                    target = get_target(batch, i)
                    outfile.write("%s %s "
                                  "%.4f %.4f %d %d\n" %
                                  (key, date, 1.0 - prob, prob, target, i + 1))
                    pred = +1.0 if prob >= 0.5 else 0.0
                    error = 0.0 if (pred == target) else 1.0
                    tpos = 1.0 if (pred == 1 and target == 1) else 0.0
                    tneg = 1.0 if (pred == 0 and target == 0) else 0.0
                    fpos = 1.0 if (pred == 1 and target == 0) else 0.0
                    fneg = 1.0 if (pred == 0 and target == 1) else 0.0
                    # print("pred=%.2f target=%.2f tp=%d tn=%d fp=%d fn=%d"%(pred,target,tp,tn,fp,fn))
                    curstat = {
                        'error': error,
                        'tpos': tpos,
                        'tneg': tneg,
                        'fpos': fpos,
                        'fneg': fneg
                    }
                    if date not in stats:
                        stats[date] = list()
                    stats[date].append(curstat)
                    stats['ALL'].append(curstat)

        print_summary_stats(stats)

コード例 #7

0

ファイルを表示

def main():

    config = deep_quant.get_configs()
    train_path = utils.data_utils.get_data_path(config.data_dir,config.datafile)

    print("Loading training data ...")

    config.batch_size = 1
    batches = BatchGenerator(train_path,config)
    # batches.cache(verbose=True)
    # batches.shuffle()

    params = batches.get_scaling_params('StandardScaler')

    print(params['scale'])
    print(params['center'])

    col_names = batches.feature_names
    df = pd.DataFrame(columns=col_names)
    
    gvkeys = list()
    dates  = list()
    steps  = list()

    print("Num batches sampled: %d"%batches.num_batches)
    for j in range(batches.num_batches):
    # for j in range(5000):
        b = batches.next_batch()
        seq_len = b.seq_lengths[0]
        idx = seq_len-1
        for i in range(seq_len):
            gvkeys.append( b.attribs[idx][0][0] )
            dates.append( b.attribs[idx][0][1] )
            steps.append( i )
            x = (b.inputs[i][0] - params['center']) / params['scale']
            # x = b.inputs[i][0] 
            n = len(df.index)
            df.loc[n] = x
        if (j % 1000)==0:
            print(".",end='')
            sys.stdout.flush()
    print()
            
    df = pd.concat( [pd.DataFrame( {'gvkey' : gvkeys, 'date': dates, 'step' : steps } ), df], axis=1 )

    # write to outfile
    df.to_csv(config.mse_outfile,sep=' ',float_format="%.4f")
    
    # print feature charateristics
    for feature in col_names:
        mean = np.mean( df[feature] )
        std = np.std( df[feature] )
        print("%s %.4f %.4f"%(feature,mean,std))

    print('--------------------------------')
    
    # print min and max values
    for feature in col_names:
        print("%s:"%feature)
        st = df.sort_values(feature)
        rt = df.sort_values(feature, ascending=False)
        for i in range(5):
            min_el = st.iloc[i,:]
            max_el = rt.iloc[i,:]
            #print(min_el)
            #print(max_el)
            print("%s %s %s %s"%
                      (min_el['gvkey'],min_el['date'],min_el['step'],min_el[feature]),end=' ')
            print("%s %s %s %s"%
                      (max_el['gvkey'],max_el['date'],max_el['step'],max_el[feature]))
        print('--------------------------------')

コード例 #8

0

ファイルを表示

ファイル: batch_testor.py プロジェクト: rpatil524/dnn-quant

                     "How much moret to weight kth example")
config = configs.get_configs()

if config.train_datafile is None:
    config.train_datafile = config.datafile

train_path = get_data_path(config.data_dir, config.train_datafile)

print("Loading batched data ...")

batches = BatchGenerator(train_path,
                         config,
                         config.batch_size,
                         config.num_unrollings,
                         validation_size=config.validation_size,
                         randomly_sample=True)

for i in range(10):
    b = batches.next_batch()
    print("-----------------------------------------------------")
    print("----Atributes: ")
    print(b.attribs)
    print("----Sequence Lengths: ")
    print(b.seq_lengths)
    print("----Train Weights: ")
    print(b.train_mask)
    print("----Valid Weights: ")
    print(b.valid_mask)
    print("----Targets: ")
    print(b.targets)

コード例 #9

0

ファイルを表示

ファイル: predict.py プロジェクト: xiaosixugithub/deep-quant

def predict(config):

    datafile = config.datafile

    if config.predict_datafile is not None:
        datafile = config.predict_datafile

    print("Loading data from %s ..." % datafile)
    path = utils.data_utils.get_data_path(config.data_dir, datafile)

    config.batch_size = 1
    batches = BatchGenerator(path,
                             config,
                             require_targets=config.require_targets,
                             verbose=True)
    batches.cache(verbose=True)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    # Initialize DataFrames
    df_target = pd.DataFrame()
    df_output = pd.DataFrame()
    df_mse = pd.DataFrame()

    df_list = [df_target, df_output, df_mse]

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        model = model_utils.get_model(session, config, verbose=True)

        perfs = dict()

        for i in range(batches.num_batches):
            batch = batches.next_batch()

            (mse, preds) = model.step(session,
                                      batch,
                                      keep_prob=config.keep_prob_pred)
            # (mse, preds) = model.debug_step(session, batch)

            if math.isnan(mse) is False:
                date = batch_to_date(batch)
                key = batch_to_key(batch)
                if date not in perfs:
                    perfs[date] = list()
                perfs[date].append(mse)

            if config.pretty_print_preds:
                pretty_print_predictions(batches, batch, preds, mse)
            elif config.print_preds:
                print_predictions(config, batches, batch, preds, mse)

            # Get values and update DataFrames if df_dirname is provided in config
            if config.df_dirname is not None:
                if not math.isnan(mse):
                    # Get all values
                    target_val = get_value(batches, batch, 'target')
                    output_val = get_value(batches, batch, 'output', preds)
                    mse_val = mse
                    values_list = [target_val, output_val, mse_val]

                    # Update DataFrames
                    for j in range(len(df_list)):
                        assert (len(df_list) == len(values_list))
                        df_list[j] = update_df(df_list[j], date, key,
                                               values_list[j])

        # Save the DataFrames
        if config.df_dirname:
            if not os.path.isdir(config.df_dirname):
                os.makedirs(config.df_dirname)
            save_names = ['target-df.pkl', 'output-df.pkl', 'mse-df.pkl']

            for j in range(len(df_list)):
                assert (len(df_list) == len(save_names))
                df_list[j].to_pickle(
                    os.path.join(config.df_dirname, save_names[j]))

        # MSE Outfile
        if config.mse_outfile is not None:
            with open(config.mse_outfile, "w") as f:
                for date in sorted(perfs):
                    mean = np.mean(perfs[date])
                    print("%s %.6f %d" % (date, mean, len(perfs[date])),
                          file=f)
                total_mean = np.mean([x for v in perfs.values() for x in v])
                print("Total %.6f" % (total_mean), file=f)
            f.closed
        else:
            exit()

コード例 #10

0

ファイルを表示

def predict_pie(config):
    """ Doesn't use print options. Only outputs dataframes"""
    datafile = config.datafile

    if config.predict_datafile is not None:
        datafile = config.predict_datafile

    print("Loading data from %s ..." % datafile)
    path = utils.data_utils.get_data_path(config.data_dir, datafile)

    config.batch_size = 1
    batches = BatchGenerator(path,
                             config,
                             require_targets=config.require_targets,
                             verbose=True)
    batches.cache(verbose=True)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    # Initialize DataFrames
    df_target = pd.DataFrame()
    df_output_lb = pd.DataFrame()
    df_output_ub = pd.DataFrame()

    df_list = [df_target, df_output_lb, df_output_ub]

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        model = model_utils.get_model(session, config, verbose=True)

        for i in range(batches.num_batches):
            batch = batches.next_batch()

            (mpiw, _, _, preds_lb,
             preds_ub) = model.step(session,
                                    batch,
                                    keep_prob=config.keep_prob_pred,
                                    uq=config.UQ,
                                    UQ_model_type='PIE')
            # (mse, preds) = model.debug_step(session, batch)

            date = batch_to_date(batch)
            key = batch_to_key(batch)

            # Dummy input to be consistent with the rest of the predictions printing options. MSE = 0.0. It is not
            # evaluated in PIE case
            mse_dummy = mse_var_dummy = 0.0

            # Print every n iterations to check the progress for monitoring
            if i % 10000 == 0:
                pretty_print_predictions(batches, batch, preds_lb, preds_ub,
                                         mse_dummy, mse_var_dummy)

            # Get values and update DataFrames if df_dirname is provided in config
            if config.df_dirname is not None:
                # Get all values
                target_val = get_value(batches, batch, 'target')
                output_lb_val = get_value(batches, batch, 'output_lb',
                                          preds_lb)
                output_ub_val = get_value(batches, batch, 'output_ub',
                                          preds_ub)
                values_list = [target_val, output_lb_val, output_ub_val]

                # Update DataFrames
                for j in range(len(df_list)):
                    assert (len(df_list) == len(values_list))
                    df_list[j] = update_df(df_list[j], date, key,
                                           values_list[j])

        # Save the DataFrames
        if not os.path.isdir(config.df_dirname):
            os.makedirs(config.df_dirname)
        save_names = ['target-df.pkl', 'output-lb-df.pkl', 'output-ub-df.pkl']

        for j in range(len(df_list)):
            assert (len(df_list) == len(save_names))
            df_list[j].to_pickle(os.path.join(config.df_dirname,
                                              save_names[j]))
    return

コード例 #11

0

ファイルを表示

ファイル: action_and_id.py プロジェクト: lbianculli/starcraft_agents

sess = tf.Session(config=config)  # control training and others
# sess.run(tf.global_variables_initializer(), tf.local_variables_initializer())    # initialize var in graph
init_op = tf.group(
    tf.global_variables_initializer(),
    tf.local_variables_initializer())  # the local var is for accuracy_op
sess.run(init_op)  # initialize var in graph

saver = tf.train.Saver()  # define a saver for saving and restoring
writer = tf.summary.FileWriter('./action_and_id_log',
                               sess.graph)  # write to file
merge_op = tf.summary.merge_all()  # operation to merge all summary

bg = BatchGenerator()
print("Beginning training session")
for step in range(250):  # train
    m, s, u, a = bg.next_batch(get_action_id_only=True)
    _, loss_, result = sess.run([train_op, loss, merge_op], {
        minimap: m,
        screen: s,
        info: u,
        action_output: a
    })
    writer.add_summary(result, step)

    if step % 50 == 0:
        accuracy_ = sess.run([accuracy], {
            minimap: m,
            screen: s,
            info: u,
            action_output: a
        })

コード例 #12

0

ファイルを表示

ファイル: embed_bins.py プロジェクト: Wook0129/semantic_binning

    def learn_bin_embeddings(self, dummy_coded_data, var_dict, embedding_dim,
                             lr, n_epoch, weight_decay, batch_size, verbose):

        n_variables = len(var_dict['numerical_vars'])
        if 'categorical_vars' in var_dict:
            n_variables += len(var_dict['categorical_vars'])
        inputs, targets = self._generate_instances(dummy_coded_data,
                                                   n_variables)

        n_instances = len(dummy_coded_data)
        n_dummy_cols = dummy_coded_data.shape[1]
        batch_size = min(int(n_instances / 10), batch_size)
        n_iter_per_epoch = int(
            np.ceil(n_instances * (n_variables - 1) / batch_size))
        batch_gen = BatchGenerator(inputs, targets, batch_size)

        dummy_cols = dummy_coded_data.columns

        torch.cuda.random.manual_seed_all(42)
        torch.manual_seed(42)

        self.be = BinEmbedding(n_dummy_cols, embedding_dim).cuda()

        loss_ftn = nn.CrossEntropyLoss()

        opt = torch.optim.Adagrad(self.be.parameters(), lr=lr, lr_decay=0.001)

        for it in range(n_iter_per_epoch * n_epoch):

            input_batch, target_batch = batch_gen.next_batch()

            opt.zero_grad()

            input_batch = Variable(torch.LongTensor(input_batch)).cuda()
            target_batch = Variable(torch.LongTensor(target_batch)).cuda()

            out = self.be(input_batch)
            loss = loss_ftn(out, target_batch)

            loss.backward()
            opt.step()

            # Normalize Embedding Vectors
            embedding_norm = torch.norm(self.be.embedding.weight, p=2,
                                        dim=1).data
            embedding_norm = embedding_norm.view(-1, 1).expand_as(
                self.be.embedding.weight)
            self.be.embedding.weight.data = self.be.embedding.weight.data.div(
                embedding_norm)

            if ((it + 1) % n_iter_per_epoch == 0):

                if verbose:
                    print('>>> Epoch = {}'.format(
                        int((it + 1) / n_iter_per_epoch)))
                    print('Loss = {}'.format(loss.data[0]))

        embedding_weights = self.be.state_dict()['embedding.weight'].cpu(
        ).numpy()
        self.embedding_by_column = dict(
            zip(list(dummy_coded_data.columns), embedding_weights))

コード例 #13

0

ファイルを表示

ファイル: predict.py プロジェクト: my2582/predicting-per

def predict(config):

    target_list = [
        'saleq_ttm', 'cogsq_ttm', 'xsgaq_ttm', 'oiadpq_ttm', 'mkvaltq_ttm',
        'niq_ttm', 'ibq_ttm', 'cheq_mrq', 'rectq_mrq', 'invtq_mrq', 'acoq_mrq',
        'ppentq_mrq', 'aoq_mrq', 'dlcq_mrq', 'apq_mrq', 'txpq_mrq', 'lcoq_mrq',
        'ltq_mrq'
    ]
    aux_list = ['mom1m', 'mom3m', 'mom6m', 'mom9m']
    df = pd.DataFrame(columns=[
        'date', 'gvkey', 'mse', 'normalizer', config.target_field +
        "_output", config.target_field + "_target"
    ])
    datafile = config.datafile

    if config.predict_datafile is not None:
        datafile = config.predict_datafile

    print("Loading data from %s ..." % datafile)
    path = os.path.join(config.data_dir, datafile)

    ind = 0

    config.batch_size = 1
    batches = BatchGenerator(path,
                             config,
                             require_targets=config.require_targets,
                             verbose=True)
    batches.cache(verbose=True)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    index = int(
        np.argwhere(np.array(target_list) == config.target_field).mean())
    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        model = model_utils.get_model(session, config, verbose=True)

        perfs = dict()

        for i in range(batches.num_batches):
            batch = batches.next_batch()

            (mse, preds) = model.step(session, batch)
            # (mse, preds) = model.debug_step(session, batch)

            if math.isnan(mse) is False:
                date = batch_to_date(batch)
                if date not in perfs:
                    perfs[date] = list()
                perfs[date].append(mse)

            if config.pretty_print_preds is True:
                #pretty_print_predictions(batches, batch, preds, mse)
                key = batch_to_key(batch)
                date = batch_to_date(batch)
                if int(date % 100) in [3, 6, 9, 12]:
                    print("GVKEY: " + str(key) + ", Date: " + str(date))
                    L = batch.seq_lengths[0]
                    targets = batch.targets[L - 1][0]
                    outputs = preds[0]
                    normalizer = batch.normalizers[0]

                    np.set_printoptions(suppress=True)
                    np.set_printoptions(precision=3)

                    df.loc[ind] = [
                        date, key, mse, normalizer,
                        batches.get_raw_outputs(batch, 0, outputs)[index],
                        batches.get_raw_outputs(batch, 0, targets)[index]
                    ]
                    ind += 1

            else:
                print_predictions(batches, batch, preds)

        if config.mse_outfile is not None:
            with open(config.mse_outfile, "w") as f:
                for date in sorted(perfs):
                    mean = np.mean(perfs[date])
                    print("%s %.6f %d" % (date, mean, len(perfs[date])),
                          file=f)
                total_mean = np.mean([x for v in perfs.values() for x in v])
                print("Total %.6f" % (total_mean), file=f)
            df.to_csv('datasets/' + config.output_file, index=False)
            f.closed
        else:
            df.to_csv('datasets/' + config.output_file, index=False)
            exit()