Example #1
0
def get_search_configs():
    """
    Defines the configurations for hyper parameter search
    """
    configurations.DEFINE_string("template", None,
                                 "Template file for hyper-param search")
    configurations.DEFINE_string(
        "search_algorithm", "genetic",
        "Algorithm for hyper-param optimization. Select from 'genetic', 'grid_search'"
    )
    configurations.DEFINE_integer(
        "generations", 100, "Number of generations for genetic algorithm")
    configurations.DEFINE_integer("pop_size", 20,
                                  "Population size for genetic algorithm")
    configurations.DEFINE_integer("num_survivors", 10,
                                  "Number of survivors for genetic algorithm")
    configurations.DEFINE_integer(
        "num_threads", 4,
        "NUmber of parallel threads (Number of parallel executions)")
    configurations.DEFINE_integer(
        "num_gpu", 1, "Number of GPU on the machine, Use 0 if there are None")
    configurations.DEFINE_integer("sleep_time", 1, "Sleep time")
    configurations.DEFINE_float("mutate_rate", 0.2,
                                "Mutation rate for genetic algorithm")
    configurations.DEFINE_string(
        "init_pop", None,
        "Specify starting population. Path to the pickle file")

    c = configurations.ConfigValues()

    return c
Example #2
0
def main(_):
  """
  The model specified command line arg --model_dir is applied to every data
  point in --test_datafile and the model output is sent to --output. The unix
  command 'paste' can be used to stich the input file and output together.
  e.g.,
  $ classifiy_data.py --config=train.conf --test_datafile=test.dat > output.dat
  $ paste -d ' ' test.dat output.dat > input_and_output.dat
  """
  configs.DEFINE_string('test_datafile',None,'file with test data')
  configs.DEFINE_string('time_field','date','fields used for dates/time')
  configs.DEFINE_string('print_start','190001','only print data on or after')
  configs.DEFINE_string('print_end','999912','only print data on or before')
  configs.DEFINE_integer('num_batches',None,'num_batches overrride')

  config = configs.get_configs()

  if config.test_datafile is None:
     config.test_datafile = config.datafile

  batch_size = 1
  data_path = model_utils.get_data_path(config.data_dir,config.test_datafile)

  # print("Loading data %s"%data_path)

  dataset = BatchGenerator(data_path, config,
                             batch_size=batch_size,
                             num_unrollings=config.num_unrollings)

  num_data_points = dataset.num_batches
  if config.num_batches is not None:
     num_data_points = config.num_batches

  #print("num_batches = ", num_data_points)

  tf_config = tf.ConfigProto( allow_soft_placement=True,
                                log_device_placement=False )

  with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

    #print("Loading model.")

    model = model_utils.get_trained_model(session, config, verbose=False)

    for i in range(num_data_points):

      batch = dataset.next_batch()
      preds = model.step(session, batch)
      seq_len = get_seq_length(batch)
      key, date = get_key_and_date(batch, seq_len-1)

      if (date < config.print_start or date > config.print_end):
        continue

      score  = get_score(config, preds, seq_len-1)
      target = get_target(config, batch, seq_len-1)

      print("%s %s %.6f %.6f %d" % (key, date, score, target, seq_len))
def get_configs():
    """
    Defines the configurations for hyper parameter search
    """
    configurations.DEFINE_string("configs_fname",None,"CSV containing all the configs to run")
    configurations.DEFINE_boolean("predict",True,"Run predictions after training")
    configurations.DEFINE_integer("num_threads",4,"NUmber of parallel threads (Number of parallel executions)")
    configurations.DEFINE_integer("num_gpu",1,"Number of GPU on the machine, Use 0 if there are None")
    configurations.DEFINE_integer("sleep_time",1,"Sleep time")
    configurations.DEFINE_integer("start_date",None,"First date for prediction on as YYYYMM")
    configurations.DEFINE_integer("end_date",None,"Last date for prediction on as YYYYMM")

    c = configurations.ConfigValues()

    return c
Example #4
0
def get_configs():
    """
    Defines all configuration params passable to command line.
    """
    configs.DEFINE_string("datasource", 'big_datafile',
                          "The source of the data.")
    configs.DEFINE_string("tkrlist", "big_tkrlist.csv",
                          "The list of filters to use.")
    configs.DEFINE_string("datafile", 'big_datafile.dat', "a datafile name.")
    configs.DEFINE_string("mse_outfile", None,
                          "A file to write mse values during predict phase.")
    configs.DEFINE_string("default_gpu", '',
                          "The default GPU to use e.g., /gpu:0")
    configs.DEFINE_string("nn_type", 'DeepRnnModel', "Model type")
    configs.DEFINE_string("active_field", 'active',
                          "Key column name header for active indicator")
    configs.DEFINE_string("key_field", 'gvkey',
                          "Key column name header in datafile")
    configs.DEFINE_string("target_field", 'oiadpq_ttm',
                          "Target column name header in datafile")
    configs.DEFINE_string("scale_field", 'mrkcap',
                          "Feature to scale inputs by")
    configs.DEFINE_string("feature_fields", '',
                          "shared input and target field names")
    configs.DEFINE_string("aux_input_fields", None,
                          "non-target, input only fields")
    configs.DEFINE_string("data_dir", '', "The data directory")
    configs.DEFINE_string("model_dir", '', "Model directory")
    configs.DEFINE_string("rnn_cell", 'gru', "lstm or gru")
    configs.DEFINE_integer("num_inputs", -1, "")
    configs.DEFINE_integer("num_outputs", -1, "")
    configs.DEFINE_integer("target_idx", None, "")
    configs.DEFINE_integer("min_unrollings", None,
                           "Min number of unrolling steps")
    configs.DEFINE_integer("max_unrollings", None,
                           "Max number of unrolling steps")
    # num_unrollings is being depricated by max_unrollings
    configs.DEFINE_integer("num_unrollings", 4, "Number of unrolling steps")
    configs.DEFINE_integer("stride", 12,
                           "How many steps to skip per unrolling")
    configs.DEFINE_integer("forecast_n", 12,
                           "How many steps to forecast into the future")
    configs.DEFINE_integer("batch_size", 1, "Size of each batch")
    configs.DEFINE_integer("num_layers", 1, "Numer of RNN layers")
    configs.DEFINE_integer("num_hidden", 10, "Number of hidden layer units")
    configs.DEFINE_float("init_scale", 0.1, "Initial scale for weights")
    configs.DEFINE_float("max_grad_norm", 10.0, "Gradient clipping")
    configs.DEFINE_integer("start_date", None,
                           "First date to train on as YYYYMM")
    configs.DEFINE_integer("end_date", None, "Last date to train on as YYYYMM")
    configs.DEFINE_float("keep_prob", 1.0, "Keep probability for dropout")
    configs.DEFINE_boolean("train", True,
                           "Train model otherwise inference only")
    configs.DEFINE_boolean("input_dropout", False, "Do dropout on input layer")
    configs.DEFINE_boolean("hidden_dropout", False,
                           "Do dropout on hidden layers")
    configs.DEFINE_boolean("rnn_dropout", False,
                           "Do dropout on recurrent connections")
    configs.DEFINE_boolean(
        "skip_connections", False,
        "Have direct connections between input and output in MLP")
    configs.DEFINE_boolean(
        "use_cache", True,
        "Load data for logreg from cache (vs processing from batch generator)")
    configs.DEFINE_boolean(
        "pretty_print_preds", False,
        "Print predictions in tabular format with inputs, targets, and keys")
    configs.DEFINE_boolean("scale_targets", True, "")
    configs.DEFINE_string("data_scaler", None,
                          'sklearn scaling algorithm or None if no scaling')
    configs.DEFINE_string("optimizer", 'GradientDescentOptimizer',
                          'Any tensorflow optimizer in tf.train')
    configs.DEFINE_string("optimizer_params", None,
                          'Additional optimizer params such as momentum')
    configs.DEFINE_float("learning_rate", 0.6,
                         "The initial starting learning rate")
    configs.DEFINE_float("lr_decay", 0.9, "Learning rate decay")
    configs.DEFINE_float("validation_size", 0.0,
                         "Size of validation set as %, ie. .3 = 30% of data")
    configs.DEFINE_float("passes", 1.0, "Passes through day per epoch")
    configs.DEFINE_float("target_lambda", 0.5,
                         "How much to weight last step vs. all steps in loss")
    configs.DEFINE_float("rnn_lambda", 0.5,
                         "How much to weight last step vs. all steps in loss")
    configs.DEFINE_integer("max_epoch", 0, "Stop after max_epochs")
    configs.DEFINE_integer("early_stop", None, "Early stop parameter")
    configs.DEFINE_integer("seed", None, "Seed for deterministic training")
    configs.DEFINE_integer("cache_id", None,
                           "A unique experiment key for traking a cahce")

    c = configs.ConfigValues()

    if c.min_unrollings is None:
        c.min_unrollings = c.num_unrollings

    if c.max_unrollings is None:
        c.max_unrollings = c.num_unrollings

    # optimizer_params is a string of the form "param1=value1,param2=value2,..."
    # this maps it to dictionary { param1 : value1, param2 : value2, ...}
    if c.optimizer_params is None:
        c.optimizer_params = dict()
    else:
        args_list = [p.split('=') for p in c.optimizer_params.split(',')]
        params = dict()
        for p in args_list:
            params[p[0]] = float(p[1])
        c.optimizer_params = params
        assert ('learning_rate' not in c.optimizer_params)

    return c
Example #5
0
def get_configs():
    configs.DEFINE_string("name", 'trial1', "")
    configs.DEFINE_string("datafile", 'Group2-Dataset.csv', "")
    configs.DEFINE_string("predict_datafile", None, "")
    configs.DEFINE_string("mse_outfile", None, "")
    configs.DEFINE_string("scalesfile", None, "")
    configs.DEFINE_string("default_gpu", '/gpu:0', "")
    configs.DEFINE_string("nn_type", 'DeepRnnModel', "")
    configs.DEFINE_string("active_field", 'active', "")
    configs.DEFINE_string("date_field", 'date', "")
    configs.DEFINE_string("key_field", 'gvkey', "")
    configs.DEFINE_string("target_field", 'mkvaltq_ttm', "")
    configs.DEFINE_string("scale_field", 'mrkcap', "")
    configs.DEFINE_string("financial_fields", 'saleq_ttm-ltq_mrq', "")
    configs.DEFINE_string("aux_fields", 'mom3m-mom9m', "")
    configs.DEFINE_string("dont_scale", None, "")
    configs.DEFINE_string("data_dir", 'datasets', "")
    configs.DEFINE_string("model_dir", 'chkpts-wrds-rnn', "")
    configs.DEFINE_string("rnn_cell", 'lstm', "")
    configs.DEFINE_string("activation_fn", 'relu', "")
    configs.DEFINE_integer("num_inputs", -1, "")
    configs.DEFINE_integer("num_outputs", -1, "")
    configs.DEFINE_integer("target_idx", None, "")
    configs.DEFINE_integer("min_unrollings", 5, "")
    configs.DEFINE_integer("max_unrollings", 5, "")
    configs.DEFINE_integer("min_years", None, "")
    configs.DEFINE_integer("max_years", None, "")
    configs.DEFINE_integer("pls_years", None, "")

    configs.DEFINE_integer("num_unrollings", 5, "")
    configs.DEFINE_integer("stride", 12, "")
    configs.DEFINE_integer("forecast_n", 3, "")
    configs.DEFINE_integer("batch_size", 128, "")
    configs.DEFINE_integer("num_layers", 5, "")
    configs.DEFINE_integer("num_hidden", 128, "")
    configs.DEFINE_float("training_noise", None, "")
    configs.DEFINE_float("init_scale", 0.01, "")
    configs.DEFINE_float("max_grad_norm", 10.0, "")
    configs.DEFINE_integer("start_date", None, "")
    configs.DEFINE_integer("end_date", None, "")
    configs.DEFINE_integer("split_date", None, "")
    configs.DEFINE_float("keep_prob", 0.75, "")
    configs.DEFINE_boolean("train", False, "")
    configs.DEFINE_boolean("require_targets", False, "")
    configs.DEFINE_boolean("input_dropout", False, "")
    configs.DEFINE_boolean("hidden_dropout", False, "")
    configs.DEFINE_boolean("rnn_dropout", True, "")
    configs.DEFINE_boolean("skip_connections", False, "")
    configs.DEFINE_boolean("direct_connections", False, "")
    configs.DEFINE_boolean("use_cache", True, "")
    configs.DEFINE_boolean("pretty_print_preds", True, "")
    configs.DEFINE_boolean("scale_targets", True, "")
    configs.DEFINE_boolean("backfill", False, "")
    configs.DEFINE_boolean("log_squasher", True, "")
    configs.DEFINE_boolean("ts_smoother", False, "")
    configs.DEFINE_string("data_scaler", 'RobustScaler', '')
    configs.DEFINE_string("optimizer", 'AdadeltaOptimizer', '')
    configs.DEFINE_string("optimizer_params", None, '')
    configs.DEFINE_float("learning_rate", 0.6, "")
    configs.DEFINE_float("lr_decay", 0.95, "")
    configs.DEFINE_float("validation_size", 0.3, "")
    configs.DEFINE_float("train_until", 0.0, "")
    configs.DEFINE_float("passes", 0.2, "")
    configs.DEFINE_float("target_lambda", 0.8, "")
    configs.DEFINE_float("rnn_lambda", 0.2, "")
    configs.DEFINE_float("l2_alpha", 0.0, "")
    configs.DEFINE_integer("max_epoch", 1000, "")
    configs.DEFINE_integer("early_stop", 10, "")
    configs.DEFINE_integer("seed", 100, "")
    configs.DEFINE_integer("cache_id", 100, "")
    configs.DEFINE_string("output_file", "mkvaltq_2016.csv", "")

    c = configs.ConfigValues()

    if c.min_unrollings is None:
        c.min_unrollings = c.num_unrollings

    if c.max_unrollings is None:
        c.max_unrollings = c.num_unrollings

    if c.min_years is not None:
        c.min_unrollings = c.min_years * (12 // c.stride)
        if c.max_years is not None:
            c.max_unrollings = (c.max_years) * (12 // c.stride)
        elif c.pls_years is None:
            c.max_unrollings = c.min_unrollings
        else:
            c.max_unrollings = (c.min_years + c.pls_years) * (12 // c.stride)

    # optimizer_params is a string of the form "param1=value1,param2=value2,..."
    # this maps it to dictionary { param1 : value1, param2 : value2, ...}
    if c.optimizer_params is None:
        c.optimizer_params = dict()
    else:
        args_list = [p.split('=') for p in c.optimizer_params.split(',')]
        params = dict()
        for p in args_list:
            params[p[0]] = float(p[1])
        c.optimizer_params = params
        assert ('learning_rate' not in c.optimizer_params)

    return c
Example #6
0
def get_configs():
    """
    Defines all configuration params passable to command line.
    """
    configs.DEFINE_string("name", 'hpo-test', "A name for the config.")
    configs.DEFINE_string("datafile", 'source-ml-data-v8-100M.dat',
                          "a datafile name.")
    configs.DEFINE_string(
        "predict_datafile", None,
        "If predict_datafile is not None, use it instead of datafile for predictions"
    )
    configs.DEFINE_string("mse_outfile", None,
                          "A file to write mse values during predict phase.")
    configs.DEFINE_string("scalesfile", None,
                          "Optional file for storing scaling params")
    configs.DEFINE_string("default_gpu", '/gpu:0',
                          "The default GPU to use e.g., /gpu:0")
    configs.DEFINE_string("nn_type", 'RNNPointEstimate', "Model type")
    configs.DEFINE_string("active_field", 'active',
                          "Key column name header for active indicator")
    configs.DEFINE_string("date_field", 'date', "Name of data column.")
    configs.DEFINE_string("key_field", 'gvkey',
                          "Key column name header in datafile")
    configs.DEFINE_string("target_field", 'oiadpq_ttm',
                          "Target column name header in datafile")
    configs.DEFINE_string("scale_field", 'mrkcap',
                          "Feature to scale inputs by")
    configs.DEFINE_string("financial_fields", 'saleq_ttm-ltq_mrq',
                          "Shared input and target field names")
    configs.DEFINE_string("aux_fields", 'rel_mom1m-rel_mom9m',
                          "non-target, input only fields")
    configs.DEFINE_string("dont_scale", None, "Names of fields to not scale")
    configs.DEFINE_string("data_dir", 'datasets', "The data directory")
    configs.DEFINE_string("model_dir", 'test-model', "Model directory")
    configs.DEFINE_string("experiments_dir", './', "Experiments directory")
    configs.DEFINE_list_string("rnn_cell", 'lstm', "lstm or gru")
    configs.DEFINE_list_string("activation_fn", 'relu',
                               "MLP activation function in tf.nn.*")
    configs.DEFINE_integer("num_inputs", -1, "")
    configs.DEFINE_integer("num_outputs", -1, "")
    configs.DEFINE_integer("target_idx", None, "")
    configs.DEFINE_list_integer("min_unrollings", 5,
                                "Min number of unrolling steps")
    configs.DEFINE_list_integer("max_unrollings", 5,
                                "Max number of unrolling steps")
    configs.DEFINE_list_integer("min_years", None, "Alt to min_unrollings")
    configs.DEFINE_list_integer("max_years", None, "Alt to max_unrollings")
    configs.DEFINE_integer("pls_years", None,
                           "Alt to max_years. max_years = min_year+pls_years")
    configs.DEFINE_list_integer("stride", 12,
                                "How many steps to skip per unrolling")

    configs.DEFINE_list_integer("batch_size", 256, "Size of each batch")
    configs.DEFINE_list_integer("num_layers", 2, "Numer of RNN layers")
    configs.DEFINE_integer("forecast_n", 12,
                           "How many steps to forecast into the future")
    configs.DEFINE_list_integer("num_hidden", 64,
                                "Number of hidden layer units")
    configs.DEFINE_list_float("init_scale", 1.0, "Initial scale for weights")
    configs.DEFINE_list_float("max_grad_norm", 50.0, "Gradient clipping")
    configs.DEFINE_integer("start_date", 197501,
                           "First date to train on as YYYYMM")
    configs.DEFINE_integer("end_date", 199812,
                           "Last date to train on as YYYYMM")
    configs.DEFINE_integer("split_date", None, "Date to split train/test on.")
    configs.DEFINE_boolean("train", True,
                           "Train model otherwise inference only")
    configs.DEFINE_list_float("dropout", 0.0, "Dropout rate for hidden layers")
    configs.DEFINE_list_float("recurrent_dropout", 0.3,
                              "Dropout rate for recurrent connections")
    configs.DEFINE_boolean(
        "log_squasher", True,
        "Squash large normalized inputs with natural log function")
    configs.DEFINE_list_string(
        "data_scaler", 'RobustScaler',
        'sklearn scaling algorithm or None if no scaling')
    configs.DEFINE_list_string("optimizer", 'Adadelta',
                               'Any tensorflow optimizer in tf.train')
    configs.DEFINE_list_float("learning_rate", 0.6,
                              "The initial starting learning rate")
    configs.DEFINE_list_float("lr_decay", 0.96, "Learning rate decay")
    configs.DEFINE_float("validation_size", 0.3,
                         "Size of validation set as %, ie. 0.3 = 30% of data")
    configs.DEFINE_list_float(
        "target_lambda", 0.5,
        "How much to weight last step vs. all steps in loss")
    configs.DEFINE_list_float(
        "rnn_lambda", 0.7,
        "How much to weight last step vs. all steps in loss")
    configs.DEFINE_integer("max_epoch", 35, "Stop after max_epochs")
    configs.DEFINE_integer("early_stop", 15, "Early stop parameter")
    configs.DEFINE_integer("seed", 521, "Seed for deterministic training")
    configs.DEFINE_boolean("UQ", False, "Uncertainty Quantification Mode")
    configs.DEFINE_list_float("l2_alpha", 0.0,
                              "L2 regularization for weight parameters.")
    configs.DEFINE_float("recurrent_l2_alpha", 0.0,
                         "L2 regularization for recurrent weight parameters.")
    configs.DEFINE_list_boolean("huber_loss", False,
                                "Use huber loss instead of mse")
    configs.DEFINE_list_float("huber_delta", 1.0, "delta for huber loss")
    configs.DEFINE_integer("forecast_steps", 1,
                           "How many future predictions need to me made")
    configs.DEFINE_string('forecast_steps_weights', '1.0',
                          'weights for the forecast steps')
    configs.DEFINE_integer(
        "logging_interval", 100,
        "Number of batches for logging interval during training")
    configs.DEFINE_boolean("write_inp_to_out_file", True,
                           "Write input sequence to the output files")
    configs.DEFINE_string(
        "training_type", 'fixed_dates',
        'Choose between "fixed_dates" and "iterative" training')
    configs.DEFINE_integer("member_id", 1, "Id of member in a population")
    configs.DEFINE_boolean('load_saved_weights', False,
                           'Load weights saved in the checkpoint directory')
    configs.DEFINE_integer(
        "epoch_logging_interval", 1,
        "Number of batches for logging interval during training")
    configs.DEFINE_string('preds_fname', 'preds.dat',
                          'Name of the prediction file')
    configs.DEFINE_integer("num_procs", 1,
                           "Total number of training/prediction processes")

    # HPO related params
    configs.DEFINE_integer("NPE", 1, "Number of Parallel Executions")
    configs.DEFINE_string(
        "search_algorithm", "genetic",
        "Algorithm for hyper-param optimization. Select from 'genetic', 'grid_search', 'doe' "
    )
    configs.DEFINE_integer("generations", 5,
                           "Number of generations for genetic algorithm")
    configs.DEFINE_integer("pop_size", 16,
                           "Population size for genetic algorithm")
    configs.DEFINE_integer(
        "num_gpu", 1, "Number of GPU on the machine, Use 0 if there are None")
    configs.DEFINE_float("mutate_rate", 0.2,
                         "Mutation rate for genetic algorithm")
    configs.DEFINE_string("objective", 'mse', "Select between mse or uq_loss")
    configs.DEFINE_string("init_pop", None,
                          "Initial population to begin hyper param search")
    configs.DEFINE_boolean("save_latest_pop", False,
                           "Save the latest population")
    configs.DEFINE_string('doe_file', None, 'Design of experiments csv file')
    configs.DEFINE_integer("decay_steps", 100000,
                           "Number of training steps between decay steps")
    configs.DEFINE_string("initializer", 'GlorotUniform',
                          'variable initializers available in Keras')
    configs.DEFINE_boolean(
        "use_custom_init", True,
        'Use RandomUniform initializer with init_scale values')
    configs.DEFINE_boolean(
        "aux_masking", False,
        'Mask aux features of all time steps except the last one with 0')
    configs.DEFINE_integer("max_norm", None, "Max Norm for kernel constraint")
    configs.DEFINE_float("sgd_momentum", 0.0, "momentum for SGD optimizer")
    configs.DEFINE_float("end_learning_rate", 0.01,
                         "end lr for polynomial decay")
    configs.DEFINE_float(
        'decay_power', 0.5,
        'power to decay the learning rate with for polynomial decay')
    configs.DEFINE_string('piecewise_lr_boundaries', None,
                          'boundaries for piecewise constant lr')
    configs.DEFINE_string('piecewise_lr_values', None,
                          'values for piecewise constant lr')
    configs.DEFINE_string('lr_schedule', 'ExponentialDecay',
                          'Learning rate scheduler')

    c = configs.ConfigValues()

    c.data_dir = os.path.join(_data_dir_path, c.data_dir)
    c.forecast_steps_weights = [
        float(x) for x in c.forecast_steps_weights.split(',')
    ]

    return c
Example #7
0
import numpy as np
import tensorflow as tf
from sklearn.linear_model import LogisticRegression

import model_utils
from model_utils import get_tabular_data
import configs

from tensorflow.python.platform import gfile
from batch_generator import BatchGenerator
"""
Entry point and main loop for train_net.py. Uses command line arguments to get
model and training specification (see config.py).
"""
configs.DEFINE_string("train_datafile", None, "Training file")
configs.DEFINE_float("lr_decay", 0.9, "Learning rate decay")
configs.DEFINE_float("initial_learning_rate", 1.0, "Initial learning rate")
configs.DEFINE_float("validation_size", 0.0, "Size of validation set as %")
configs.DEFINE_integer("passes", 1, "Passes through day per epoch")
configs.DEFINE_integer("max_epoch", 0, "Stop after max_epochs")
configs.DEFINE_integer("early_stop", None, "Early stop parameter")
configs.DEFINE_integer("seed", None, "Seed for deterministic training")

config = configs.get_configs()

datafile = config.train_datafile if config.train_datafile else config.datafile

train_path = model_utils.get_data_path(config.data_dir, datafile)

cache_path = os.path.splitext(train_path)[0] + '.cache'
Example #8
0
def get_configs():
    """
    Defines all configuration params passable to command line.
    """
    configs.DEFINE_string("name", 'none', "A name for the config.")
    configs.DEFINE_string("datafile", 'open_dataset.dat', "a datafile name.")
    configs.DEFINE_string(
        "predict_datafile", None,
        "If predict_datafile is not None, use it instead of datafile for predictions"
    )
    configs.DEFINE_string("mse_outfile", None,
                          "A file to write mse values during predict phase.")
    configs.DEFINE_string("scalesfile", None,
                          "Optional file for storing scaling params")
    configs.DEFINE_string(
        "mse_var_outfile", None,
        "A file to write mse_var values during predict phase.")
    configs.DEFINE_string("default_gpu", '',
                          "The default GPU to use e.g., /gpu:0")
    configs.DEFINE_string("nn_type", 'DeepRnnModel', "Model type")
    configs.DEFINE_string("active_field", 'active',
                          "Key column name header for active indicator")
    configs.DEFINE_string("date_field", 'date', "Name of data column.")
    configs.DEFINE_string("key_field", 'gvkey',
                          "Key column name header in datafile")
    configs.DEFINE_string("target_field", 'oiadpq_ttm',
                          "Target column name header in datafile")
    configs.DEFINE_string("scale_field", 'mrkcap',
                          "Feature to scale inputs by")
    configs.DEFINE_string("financial_fields", '',
                          "Shared input and target field names")
    configs.DEFINE_string("aux_fields", None, "non-target, input only fields")
    configs.DEFINE_string("dont_scale", None, "Names of fields to not scale")
    configs.DEFINE_string("data_dir", '', "The data directory")
    configs.DEFINE_string("model_dir", 'chkpts',
                          "Model (checkpoint) directory")
    configs.DEFINE_string("rnn_cell", 'gru', "lstm or gru")
    configs.DEFINE_string("activation_fn", 'relu',
                          "MLP activation function in tf.nn.*")
    configs.DEFINE_integer("num_inputs", -1, "")
    configs.DEFINE_integer("num_outputs", -1, "")
    configs.DEFINE_integer("target_idx", None, "")
    configs.DEFINE_integer("min_unrollings", None,
                           "Min number of unrolling steps")
    configs.DEFINE_integer("max_unrollings", None,
                           "Max number of unrolling steps")
    configs.DEFINE_integer("min_years", None, "Alt to min_unrollings")
    configs.DEFINE_integer("max_years", None, "Alt to max_unrollings")
    configs.DEFINE_integer("pls_years", None,
                           "Alt to max_years. max_years = min_year+pls_years")
    # num_unrollings is being depricated, replaced with max_unrollings
    configs.DEFINE_integer("num_unrollings", 4, "Number of unrolling steps")
    configs.DEFINE_integer("stride", 12,
                           "How many steps to skip per unrolling")
    configs.DEFINE_integer("forecast_n", 12,
                           "How many steps to forecast into the future")
    configs.DEFINE_integer("batch_size", 1, "Size of each batch")
    configs.DEFINE_integer("num_layers", 1, "Numer of RNN layers")
    configs.DEFINE_integer("num_hidden", 10, "Number of hidden layer units")
    configs.DEFINE_float("training_noise", None,
                         "Level of training noise as multiple of 1-stdev")
    configs.DEFINE_float("init_scale", 0.1, "Initial scale for weights")
    configs.DEFINE_float("max_grad_norm", 10.0, "Gradient clipping")
    configs.DEFINE_integer("start_date", None,
                           "First date to train on as YYYYMM")
    configs.DEFINE_integer("end_date", None, "Last date to train on as YYYYMM")
    configs.DEFINE_integer("split_date", None, "Date to split train/test on.")
    configs.DEFINE_float("keep_prob", 1.0, "Keep probability for dropout")
    configs.DEFINE_boolean("train", True,
                           "Train model otherwise inference only")
    configs.DEFINE_boolean("require_targets", False,
                           "Require target values for test predictions")
    configs.DEFINE_boolean("input_dropout", False, "Do dropout on input layer")
    configs.DEFINE_boolean("hidden_dropout", False,
                           "Do dropout on hidden layers")
    configs.DEFINE_boolean("rnn_dropout", False,
                           "Do dropout on recurrent connections")
    configs.DEFINE_boolean(
        "skip_connections", False,
        "Have a linear fully connected weight skip hidden units in MLP")
    configs.DEFINE_boolean(
        "direct_connections", False,
        "Have direct connections between input and output in MLP")
    configs.DEFINE_boolean(
        "use_cache", True,
        "Load data for logreg from cache (vs processing from batch generator)")
    configs.DEFINE_boolean(
        "pretty_print_preds", False,
        "Print predictions in tabular format with inputs, targets, and keys")
    configs.DEFINE_boolean(
        "print_preds", False,
        "Print predictions with just date, gvkey and output values")
    configs.DEFINE_string(
        "df_dirname", None,
        "Saves dataframes for target, output, variance/variance, mse and mse_var in df_dirname"
    )
    configs.DEFINE_boolean("scale_targets", True, "")
    configs.DEFINE_boolean(
        "backfill", False,
        "Backfill seq history to max_unrollings with data in first time step")
    configs.DEFINE_boolean(
        "log_squasher", True,
        "Squash large normalized inputs with natural log function")
    configs.DEFINE_boolean("ts_smoother", False,
                           "Use smoother on data time series during training")
    configs.DEFINE_string("data_scaler", None,
                          'sklearn scaling algorithm or None if no scaling')
    configs.DEFINE_string("optimizer", 'GradientDescentOptimizer',
                          'Any tensorflow optimizer in tf.train')
    configs.DEFINE_string("optimizer_params", None,
                          'Additional optimizer params such as momentum')
    configs.DEFINE_float("learning_rate", 0.6,
                         "The initial starting learning rate")
    configs.DEFINE_float("lr_decay", 0.9, "Learning rate decay")
    configs.DEFINE_float("validation_size", 0.0,
                         "Size of validation set as %, ie. .3 = 30% of data")
    configs.DEFINE_float("train_until", 0.0,
                         "Train until validation MSE is less than this value")
    configs.DEFINE_float("passes", 1.0, "Passes through day per epoch")
    configs.DEFINE_float("target_lambda", 0.5,
                         "How much to weight last step vs. all steps in loss")
    configs.DEFINE_float("rnn_lambda", 0.5,
                         "How much to weight last step vs. all steps in loss")
    configs.DEFINE_integer("max_epoch", 0, "Stop after max_epochs")
    configs.DEFINE_integer("early_stop", None, "Early stop parameter")
    configs.DEFINE_integer("seed", None, "Seed for deterministic training")
    configs.DEFINE_integer("cache_id", None,
                           "A unique experiment key for traking a cahce")
    configs.DEFINE_float("keep_prob_pred", 1.0,
                         "Keep Prob for dropout during prediction")
    configs.DEFINE_boolean(
        "print_normalized_outputs", False,
        "Print normalized outputs. Doesn't apply to pretty print")
    configs.DEFINE_boolean("UQ", False, "Uncertainty Quantification Mode")
    configs.DEFINE_string("UQ_model_type", 'MVE', "Select between MVE or PIE")
    configs.DEFINE_float(
        "noise_lambda", 1.0,
        "Weight decay for noise in the loss function. Refer to DeepBayesUQ Model"
    )
    configs.DEFINE_float("l2_alpha", 0.0,
                         "L2 regularization for weight parameters.")
    configs.DEFINE_float("picp_lambda", 1.0,
                         "Contribution of PICP loss term for HQPI UQ model")
    configs.DEFINE_float(
        "smoothing_pi_check", 100,
        "Smoothing parameter for calculation of PI check in HQPI UQ model")
    configs.DEFINE_float(
        "confidence_alpha", 0.1,
        "Alpha used for calculating confidence level (= 1 - alpha)")
    configs.DEFINE_boolean("huber_loss", False,
                           "Use huber loss instead of mse")
    configs.DEFINE_float("huber_delta", 1.0, "delta for huber loss")

    c = configs.ConfigValues()

    if c.min_unrollings is None:
        c.min_unrollings = c.num_unrollings

    if c.max_unrollings is None:
        c.max_unrollings = c.num_unrollings

    if c.min_years is not None:
        c.min_unrollings = c.min_years * (12 // c.stride)
        if c.max_years is not None:
            c.max_unrollings = (c.max_years) * (12 // c.stride)
        elif c.pls_years is None:
            c.max_unrollings = c.min_unrollings
        else:
            c.max_unrollings = (c.min_years + c.pls_years) * (12 // c.stride)

    # optimizer_params is a string of the form "param1=value1,param2=value2,..."
    # this maps it to dictionary { param1 : value1, param2 : value2, ...}
    if c.optimizer_params is None:
        c.optimizer_params = dict()
    else:
        args_list = [p.split('=') for p in c.optimizer_params.split(',')]
        params = dict()
        for p in args_list:
            params[p[0]] = float(p[1])
        c.optimizer_params = params
        assert ('learning_rate' not in c.optimizer_params)

    return c
Example #9
0
def main(_):
    """
  The model specified command line arg --model_dir is applied to every data
  point in --test_datafile and the model output is sent to --output. The unix
  command 'paste' can be used to stich the input file and output together.
  e.g.,
  $ classifiy_data.py --config=train.conf --test_datafile=test.dat --output=output.dat
  $ paste -d ' ' test.dat output.dat > input_and_output.dat
  """
    configs.DEFINE_string('test_datafile', None, 'file with test data')
    configs.DEFINE_string('output', 'preds.dat', 'file for predictions')
    configs.DEFINE_string('time_field', 'date', 'fields used for dates/time')
    configs.DEFINE_string('print_start', '190001',
                          'only print data on or after')
    configs.DEFINE_string('print_end', '210012',
                          'only print data on or before')
    configs.DEFINE_integer('min_test_k', 1, 'minimum seq length classified')
    configs.DEFINE_integer('num_batches', None, 'num_batches overrride')

    config = configs.get_configs()

    if config.test_datafile is None:
        config.test_datafile = config.datafile

    batch_size = 1
    data_path = model_utils.get_data_path(config.data_dir,
                                          config.test_datafile)

    print("Loading data %s" % data_path)

    dataset = BatchGenerator(data_path,
                             config,
                             batch_size=batch_size,
                             num_unrollings=config.num_unrollings)

    num_data_points = dataset.num_batches
    if config.num_batches is not None:
        num_data_points = config.num_batches

    print("num_batches = ", num_data_points)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        print("Loading model.")

        model = model_utils.get_trained_model(session, config)

        stats = dict()
        key = 'ALL'
        stats[key] = list()

        with open(config.output, "w") as outfile:

            for i in range(num_data_points):

                batch = dataset.next_batch()
                preds = model.step(session, batch)
                seq_len = get_seq_length(batch)
                start = seq_len - 1

                if seq_len < config.num_unrollings:
                    continue
                #if config.nn_type != 'rnn' and seq_len < config.num_unrollings:
                #  continue
                #elif config.nn_type == 'rnn' and classify_entire_seq(batch):
                #  start = config.min_test_k - 1

                for i in range(start, seq_len):
                    key, date = get_key_and_date(batch, i)
                    if (date < config.print_start or date > config.print_end):
                        continue
                    prob = get_pos_prob(config, preds, i)
                    target = get_target(batch, i)
                    outfile.write("%s %s "
                                  "%.4f %.4f %d %d\n" %
                                  (key, date, 1.0 - prob, prob, target, i + 1))
                    pred = +1.0 if prob >= 0.5 else 0.0
                    error = 0.0 if (pred == target) else 1.0
                    tpos = 1.0 if (pred == 1 and target == 1) else 0.0
                    tneg = 1.0 if (pred == 0 and target == 0) else 0.0
                    fpos = 1.0 if (pred == 1 and target == 0) else 0.0
                    fneg = 1.0 if (pred == 0 and target == 1) else 0.0
                    # print("pred=%.2f target=%.2f tp=%d tn=%d fp=%d fn=%d"%(pred,target,tp,tn,fp,fn))
                    curstat = {
                        'error': error,
                        'tpos': tpos,
                        'tneg': tneg,
                        'fpos': fpos,
                        'fneg': fneg
                    }
                    if date not in stats:
                        stats[date] = list()
                    stats[date].append(curstat)
                    stats['ALL'].append(curstat)

        print_summary_stats(stats)
Example #10
0
def main(_):
    """
  """
    configs.DEFINE_string('test_datafile', None, 'file with test data')
    configs.DEFINE_string('output', 'preds.dat', 'file for predictions')
    configs.DEFINE_string('time_field', 'date', 'fields used for dates/time')
    configs.DEFINE_string('print_start', '190001',
                          'only print data on or after')
    configs.DEFINE_string('print_end', '210012',
                          'only print data on or before')
    configs.DEFINE_string('factor_name', None,
                          'Name of factor if nn_type=factor')
    configs.DEFINE_integer('min_test_k', 1, 'minimum seq length classified')
    configs.DEFINE_integer('num_batches', None, 'num_batches overrride')

    config = configs.get_configs()

    factor_name = config.factor_name
    assert (factor_name is not None)

    if config.test_datafile is None:
        config.test_datafile = config.datafile
    batch_size = 1
    num_unrollings = config.num_unrollings
    data_path = model_utils.get_data_path(config.data_dir,
                                          config.test_datafile)
    filename = data_path

    print("Loading data %s" % data_path)
    if not os.path.isfile(filename):
        raise RuntimeError("The data file %s does not exists" % filename)
    data = pd.read_csv(filename,
                       sep=' ',
                       dtype={
                           config.key_field: str,
                           'date': str
                       })
    if config.end_date is not None:
        data = data.drop(data[data['date'] > str(config.end_date)].index)

    num_data_points = len(data)

    params = dict()

    print("num data points = ", num_data_points)

    stats = dict()
    key = 'ALL'
    stats[key] = list()

    with open(config.output, "w") as outfile:

        last_key = ''
        seq_len = 0

        for i in range(num_data_points):
            key = get_value(data, config.key_field, i)
            date = get_value(data, 'date', i)
            seq_len = seq_len + 1 if key == last_key else 1
            last_key = key
            if (str(date) < config.print_start
                    or str(date) > config.print_end):
                continue
            if seq_len < config.min_test_k:
                continue
            prob = get_value(data, factor_name, i)
            out = get_value(data, config.target_field, i)
            target = (out + 1.0) / 2.0
            k = min(seq_len, config.num_unrollings)
            outfile.write("%s %s "
                          "%.4f %.4f %d %d\n" %
                          (key, date, 1.0 - prob, prob, target, k))
            pred = +1.0 if prob >= 0.5 else 0.0
            error = 0.0 if (pred == target) else 1.0
            tpos = 1.0 if (pred == 1 and target == 1) else 0.0
            tneg = 1.0 if (pred == 0 and target == 0) else 0.0
            fpos = 1.0 if (pred == 1 and target == 0) else 0.0
            fneg = 1.0 if (pred == 0 and target == 1) else 0.0
            # print("pred=%.2f target=%.2f tp=%d tn=%d fp=%d fn=%d"%(pred,target,tp,tn,fp,fn))
            rec = {
                'error': error,
                'tpos': tpos,
                'tneg': tneg,
                'fpos': fpos,
                'fneg': fneg
            }
            if date not in stats:
                stats[date] = list()
            stats[date].append(rec)
            stats['ALL'].append(rec)

    print_summary_stats(stats)
Example #11
0
def get_configs():
    """
    Defines all configuration params passable to command line.
    """
    configs.DEFINE_string("name", 'test', "A name for the config.")
    configs.DEFINE_string("datafile", None, "a datafile name.")
    configs.DEFINE_string("scalesfile", None,
                          "Optional file for storing scaling params")
    configs.DEFINE_string("default_gpu", '/gpu:0',
                          "The default GPU to use e.g., /gpu:0")
    configs.DEFINE_string("nn_type", 'RNNPointEstimate', "Model type")
    configs.DEFINE_string("active_field", 'active',
                          "Key column name header for active indicator")
    configs.DEFINE_string("date_field", 'date', "Name of data column.")
    configs.DEFINE_string("key_field", 'gvkey',
                          "Key column name header in datafile")
    configs.DEFINE_string("target_field", 'oiadpq_ttm',
                          "Target column name header in datafile")
    configs.DEFINE_string("scale_field", 'mrkcap',
                          "Feature to scale inputs by")
    configs.DEFINE_string("financial_fields", 'saleq_ttm-ltq_mrq',
                          "Shared input and target field names")
    configs.DEFINE_string("aux_fields", 'rel_mom1m-rel_mom9m',
                          "non-target, input only fields")
    configs.DEFINE_string("dont_scale_fields", None,
                          "Names of fields to not scale")
    configs.DEFINE_string("data_dir", 'datasets', "The data directory")
    configs.DEFINE_string("model_dir", 'test-model', "Model directory")
    configs.DEFINE_string("experiments_dir", './', "Experiments directory")
    configs.DEFINE_string("rnn_cell", 'lstm', "lstm or gru")
    configs.DEFINE_string("activation_fn", 'relu',
                          "MLP activation function in tf.nn.*")
    configs.DEFINE_integer("num_inputs", -1, "")
    configs.DEFINE_integer("num_outputs", -1, "")
    configs.DEFINE_integer("target_idx", None, "")
    configs.DEFINE_integer("min_unrollings", 5,
                           "Min number of unrolling steps")
    configs.DEFINE_integer("max_unrollings", 5,
                           "Max number of unrolling steps")
    configs.DEFINE_integer("min_years", None, "Alt to min_unrollings")
    configs.DEFINE_integer("max_years", None, "Alt to max_unrollings")
    configs.DEFINE_integer("pls_years", None,
                           "Alt to max_years. max_years = min_year+pls_years")
    configs.DEFINE_integer("stride", 12,
                           "How many steps to skip per unrolling")
    configs.DEFINE_integer("batch_size", 256, "Size of each batch")
    configs.DEFINE_integer("num_layers", 2, "Numer of RNN layers")
    configs.DEFINE_integer("forecast_n", 12,
                           "How many steps to forecast into the future")
    configs.DEFINE_integer("num_hidden", 64, "Number of hidden layer units")
    configs.DEFINE_float("init_scale", 1.0, "Initial scale for weights")
    configs.DEFINE_float("max_grad_norm", 50.0, "Gradient clipping")
    configs.DEFINE_integer("start_date", 197501,
                           "First date to train on as YYYYMM")
    configs.DEFINE_integer("end_date", 199912,
                           "Last date to train on as YYYYMM")
    configs.DEFINE_integer("split_date", None, "Date to split train/test on.")
    configs.DEFINE_boolean("train", True,
                           "Train model otherwise inference only")
    configs.DEFINE_float("dropout", 0.0, "Dropout rate for hidden layers")
    configs.DEFINE_float("recurrent_dropout", 0.0,
                         "Dropout rate for recurrent connections")
    configs.DEFINE_boolean(
        "log_squasher", True,
        "Squash large normalized inputs with natural log function")
    configs.DEFINE_string("data_scaler", 'RobustScaler',
                          'sklearn scaling algorithm or None if no scaling')
    configs.DEFINE_string("optimizer", 'Adadelta',
                          'Any tensorflow optimizer in tf.train')
    configs.DEFINE_float("learning_rate", 0.6,
                         "The initial starting learning rate")
    configs.DEFINE_float("lr_decay", 1.0,
                         "Learning rate decay for exponential decay")
    configs.DEFINE_float("validation_size", 0.3,
                         "Size of validation set as %, ie. 0.3 = 30% of data")
    configs.DEFINE_float("target_lambda", 0.5,
                         "How much to weight last step vs. all steps in loss")
    configs.DEFINE_float("rnn_lambda", 0.7,
                         "How much to weight last step vs. all steps in loss")
    configs.DEFINE_integer("max_epoch", 1, "Stop after max_epochs")
    configs.DEFINE_integer("early_stop", 1, "Early stop parameter")
    configs.DEFINE_integer("seed", 521, "Seed for deterministic training")
    configs.DEFINE_boolean("UQ", False, "Uncertainty Quantification Mode")
    configs.DEFINE_float("l2_alpha", 0.0,
                         "L2 regularization for weight parameters.")
    configs.DEFINE_float("recurrent_l2_alpha", 0.0,
                         "L2 regularization for recurrent weight parameters.")
    configs.DEFINE_boolean("huber_loss", False,
                           "Use huber loss instead of mse")
    configs.DEFINE_float("huber_delta", 1.0, "delta for huber loss")
    configs.DEFINE_integer("forecast_steps", 1,
                           "How many future predictions need to me made")
    configs.DEFINE_string('forecast_steps_weights', '1.0',
                          'weights for the forecast steps')
    configs.DEFINE_integer(
        "logging_interval", 100,
        "Number of batches for logging interval during training")
    configs.DEFINE_boolean("write_inp_to_out_file", True,
                           "Write input sequence to the output files")
    configs.DEFINE_string(
        "training_type", 'fixed_dates',
        'Choose between "fixed_dates" and "iterative" training')
    configs.DEFINE_integer("NPE", 1, "Number of Parallel Executions")
    configs.DEFINE_integer("num_procs", 1,
                           "Total number of training/prediction processes")
    configs.DEFINE_integer("num_gpu", 1, "NUmber of GPUs")
    configs.DEFINE_boolean('load_saved_weights', False,
                           'Load weights saved in the checkpoint directory')
    configs.DEFINE_integer(
        "epoch_logging_interval", 1,
        "Number of batches for logging interval during training")
    configs.DEFINE_integer("decay_steps", 1500,
                           "Number of training steps between decay steps")
    configs.DEFINE_string("initializer", 'GlorotUniform',
                          'variable initializers available in Keras')
    configs.DEFINE_boolean(
        "use_custom_init", True,
        'Use RandomUniform initializer with init_scale values')
    configs.DEFINE_boolean(
        "aux_masking", False,
        'Mask aux features of all time steps except the last one with 0')
    configs.DEFINE_integer("max_norm", 3, "Max Norm for kernel constraint")
    configs.DEFINE_float("sgd_momentum", 0.0, "momentum for SGD optimizer")
    configs.DEFINE_float("end_learning_rate", 0.01,
                         "end lr for polynomial decay")
    configs.DEFINE_float(
        'decay_power', 0.5,
        'power to decay the learning rate with for polynomial decay')
    configs.DEFINE_string('piecewise_lr_boundaries', '4000-5500-5500',
                          'boundaries for piecewise constant lr')
    configs.DEFINE_string('piecewise_lr_values', '0.5-0.1-0.05-0.1',
                          'values for piecewise constant lr')
    configs.DEFINE_string('lr_schedule', 'ExponentialDecay',
                          'Learning rate scheduler')
    configs.DEFINE_string('preds_fname', 'preds.dat',
                          'Name of the prediction file')
    configs.DEFINE_integer("member_id", 1, "Id of member in a population")
    configs.DEFINE_boolean("cdrs_inference", False,
                           'If the execution is for inference on CDRS data')
    configs.DEFINE_string('cdrs_src_fname', 'cdrs-src.dat',
                          'Filename of the CDRS source file')
    configs.DEFINE_string('cdrs_ml_fname', 'cdrs-ml-data.dat',
                          'Filename of the CDRS ML data file')
    configs.DEFINE_string('model_ranking_fname', './model-ranking.dat',
                          'Model Ranking File Name')
    configs.DEFINE_string('model_ranking_factor', 'pred_var_entval',
                          'Model ranking factor')
    configs.DEFINE_string("cdrs_inference_date", None,
                          "CDRS Inference date. Format: '%Y-%m-%d' ")

    c = configs.ConfigValues()

    if c.min_unrollings is None:
        c.min_unrollings = c.num_unrollings

    if c.max_unrollings is None:
        c.max_unrollings = c.num_unrollings

    if c.min_years is not None:
        c.min_unrollings = c.min_years * (12 // c.stride)
        if c.max_years is not None:
            c.max_unrollings = (c.max_years) * (12 // c.stride)
        elif c.pls_years is None:
            c.max_unrollings = c.min_unrollings
        else:
            c.max_unrollings = (c.min_years + c.pls_years) * (12 // c.stride)

    c.forecast_steps_weights = [
        float(x) for x in c.forecast_steps_weights.split('-')
    ]
    c.piecewise_lr_boundaries = [
        float(x) for x in c.piecewise_lr_boundaries.split('-')
    ]
    c.piecewise_lr_values = [
        float(x) for x in c.piecewise_lr_values.split('-')
    ]

    return c
Example #12
0
def main(_):
    """
  Entry point and main loop for train_net.py. Uses command line arguments to get
  model and training specification (see config.py).
  """
    configs.DEFINE_string("train_datafile", None, "Training file")
    configs.DEFINE_string("optimizer", 'gd',
                          'Optimizer to use gd, adam, adagrad, momentum')
    configs.DEFINE_float("lr_decay", 0.9, "Learning rate decay")
    configs.DEFINE_float("initial_learning_rate", 1.0, "Initial learning rate")
    configs.DEFINE_float("validation_size", 0.0, "Size of validation set as %")
    configs.DEFINE_float("passes", 1.0, "Passes through day per epoch")
    configs.DEFINE_float("rnn_loss_weight", None,
                         "How much moret to weight kth example")
    configs.DEFINE_integer("max_epoch", 0, "Stop after max_epochs")
    configs.DEFINE_integer("early_stop", None, "Early stop parameter")
    configs.DEFINE_integer("seed", None, "Seed for deterministic training")

    config = configs.get_configs()

    if config.train_datafile is None:
        config.train_datafile = config.datafile

    train_path = model_utils.get_data_path(config.data_dir,
                                           config.train_datafile)

    print("Loading training data ...")

    train_data = BatchGenerator(train_path,
                                config,
                                config.batch_size,
                                config.num_unrollings,
                                validation_size=config.validation_size,
                                randomly_sample=True)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        if config.seed is not None:
            tf.set_random_seed(config.seed)

        print("Constructing model ...")

        model = model_utils.get_training_model(session, config, verbose=True)

        if config.early_stop is not None:
            print("Training will early stop without "
                  "improvement after %d epochs." % config.early_stop)

        train_history = list()
        valid_history = list()
        # This sets the initial learning rate tensor
        lr = model.assign_lr(session, config.initial_learning_rate)

        for i in range(config.max_epoch):

            trc, tre, vdc, vde = run_epoch(session,
                                           model,
                                           train_data,
                                           keep_prob=config.keep_prob,
                                           passes=config.passes,
                                           verbose=True)

            trc = 999.0 if trc > 999.0 else trc
            vdc = 999.0 if vdc > 999.0 else vdc

            print(('Epoch: %d loss: %.6f %.6f'
                   ' error: %.6f %.6f Learning rate: %.4f') %
                  (i + 1, trc, vdc, tre, vde, lr))
            sys.stdout.flush()

            train_history.append(trc)
            valid_history.append(vdc)

            # update learning rate
            if config.optimizer == 'gd' or config.optimizer == 'momentum':
                lr = model_utils.adjust_learning_rate(session, model, lr,
                                                      config.lr_decay,
                                                      train_history)

            if not os.path.exists(config.model_dir):
                print("Creating directory %s" % config.model_dir)
                os.mkdir(config.model_dir)

            chkpt_file_prefix = "training.ckpt"
            if model_utils.stop_training(config, valid_history,
                                         chkpt_file_prefix):
                print("Training stopped.")
                quit()
            else:
                checkpoint_path = os.path.join(config.model_dir,
                                               chkpt_file_prefix)
                tf.train.Saver().save(session, checkpoint_path, global_step=i)