def get_configs():
    """
    Defines the configurations for hyper parameter search
    """
    configurations.DEFINE_string("configs_fname",None,"CSV containing all the configs to run")
    configurations.DEFINE_boolean("predict",True,"Run predictions after training")
    configurations.DEFINE_integer("num_threads",4,"NUmber of parallel threads (Number of parallel executions)")
    configurations.DEFINE_integer("num_gpu",1,"Number of GPU on the machine, Use 0 if there are None")
    configurations.DEFINE_integer("sleep_time",1,"Sleep time")
    configurations.DEFINE_integer("start_date",None,"First date for prediction on as YYYYMM")
    configurations.DEFINE_integer("end_date",None,"Last date for prediction on as YYYYMM")

    c = configurations.ConfigValues()

    return c
def get_configs():
    """
    Defines all configuration params passable to command line.
    """
    configs.DEFINE_string("datasource", 'big_datafile',
                          "The source of the data.")
    configs.DEFINE_string("tkrlist", "big_tkrlist.csv",
                          "The list of filters to use.")
    configs.DEFINE_string("datafile", 'big_datafile.dat', "a datafile name.")
    configs.DEFINE_string("mse_outfile", None,
                          "A file to write mse values during predict phase.")
    configs.DEFINE_string("default_gpu", '',
                          "The default GPU to use e.g., /gpu:0")
    configs.DEFINE_string("nn_type", 'DeepRnnModel', "Model type")
    configs.DEFINE_string("active_field", 'active',
                          "Key column name header for active indicator")
    configs.DEFINE_string("key_field", 'gvkey',
                          "Key column name header in datafile")
    configs.DEFINE_string("target_field", 'oiadpq_ttm',
                          "Target column name header in datafile")
    configs.DEFINE_string("scale_field", 'mrkcap',
                          "Feature to scale inputs by")
    configs.DEFINE_string("feature_fields", '',
                          "shared input and target field names")
    configs.DEFINE_string("aux_input_fields", None,
                          "non-target, input only fields")
    configs.DEFINE_string("data_dir", '', "The data directory")
    configs.DEFINE_string("model_dir", '', "Model directory")
    configs.DEFINE_string("rnn_cell", 'gru', "lstm or gru")
    configs.DEFINE_integer("num_inputs", -1, "")
    configs.DEFINE_integer("num_outputs", -1, "")
    configs.DEFINE_integer("target_idx", None, "")
    configs.DEFINE_integer("min_unrollings", None,
                           "Min number of unrolling steps")
    configs.DEFINE_integer("max_unrollings", None,
                           "Max number of unrolling steps")
    # num_unrollings is being depricated by max_unrollings
    configs.DEFINE_integer("num_unrollings", 4, "Number of unrolling steps")
    configs.DEFINE_integer("stride", 12,
                           "How many steps to skip per unrolling")
    configs.DEFINE_integer("forecast_n", 12,
                           "How many steps to forecast into the future")
    configs.DEFINE_integer("batch_size", 1, "Size of each batch")
    configs.DEFINE_integer("num_layers", 1, "Numer of RNN layers")
    configs.DEFINE_integer("num_hidden", 10, "Number of hidden layer units")
    configs.DEFINE_float("init_scale", 0.1, "Initial scale for weights")
    configs.DEFINE_float("max_grad_norm", 10.0, "Gradient clipping")
    configs.DEFINE_integer("start_date", None,
                           "First date to train on as YYYYMM")
    configs.DEFINE_integer("end_date", None, "Last date to train on as YYYYMM")
    configs.DEFINE_float("keep_prob", 1.0, "Keep probability for dropout")
    configs.DEFINE_boolean("train", True,
                           "Train model otherwise inference only")
    configs.DEFINE_boolean("input_dropout", False, "Do dropout on input layer")
    configs.DEFINE_boolean("hidden_dropout", False,
                           "Do dropout on hidden layers")
    configs.DEFINE_boolean("rnn_dropout", False,
                           "Do dropout on recurrent connections")
    configs.DEFINE_boolean(
        "skip_connections", False,
        "Have direct connections between input and output in MLP")
    configs.DEFINE_boolean(
        "use_cache", True,
        "Load data for logreg from cache (vs processing from batch generator)")
    configs.DEFINE_boolean(
        "pretty_print_preds", False,
        "Print predictions in tabular format with inputs, targets, and keys")
    configs.DEFINE_boolean("scale_targets", True, "")
    configs.DEFINE_string("data_scaler", None,
                          'sklearn scaling algorithm or None if no scaling')
    configs.DEFINE_string("optimizer", 'GradientDescentOptimizer',
                          'Any tensorflow optimizer in tf.train')
    configs.DEFINE_string("optimizer_params", None,
                          'Additional optimizer params such as momentum')
    configs.DEFINE_float("learning_rate", 0.6,
                         "The initial starting learning rate")
    configs.DEFINE_float("lr_decay", 0.9, "Learning rate decay")
    configs.DEFINE_float("validation_size", 0.0,
                         "Size of validation set as %, ie. .3 = 30% of data")
    configs.DEFINE_float("passes", 1.0, "Passes through day per epoch")
    configs.DEFINE_float("target_lambda", 0.5,
                         "How much to weight last step vs. all steps in loss")
    configs.DEFINE_float("rnn_lambda", 0.5,
                         "How much to weight last step vs. all steps in loss")
    configs.DEFINE_integer("max_epoch", 0, "Stop after max_epochs")
    configs.DEFINE_integer("early_stop", None, "Early stop parameter")
    configs.DEFINE_integer("seed", None, "Seed for deterministic training")
    configs.DEFINE_integer("cache_id", None,
                           "A unique experiment key for traking a cahce")

    c = configs.ConfigValues()

    if c.min_unrollings is None:
        c.min_unrollings = c.num_unrollings

    if c.max_unrollings is None:
        c.max_unrollings = c.num_unrollings

    # optimizer_params is a string of the form "param1=value1,param2=value2,..."
    # this maps it to dictionary { param1 : value1, param2 : value2, ...}
    if c.optimizer_params is None:
        c.optimizer_params = dict()
    else:
        args_list = [p.split('=') for p in c.optimizer_params.split(',')]
        params = dict()
        for p in args_list:
            params[p[0]] = float(p[1])
        c.optimizer_params = params
        assert ('learning_rate' not in c.optimizer_params)

    return c
Beispiel #3
0
def get_configs():
    configs.DEFINE_string("name", 'trial1', "")
    configs.DEFINE_string("datafile", 'Group2-Dataset.csv', "")
    configs.DEFINE_string("predict_datafile", None, "")
    configs.DEFINE_string("mse_outfile", None, "")
    configs.DEFINE_string("scalesfile", None, "")
    configs.DEFINE_string("default_gpu", '/gpu:0', "")
    configs.DEFINE_string("nn_type", 'DeepRnnModel', "")
    configs.DEFINE_string("active_field", 'active', "")
    configs.DEFINE_string("date_field", 'date', "")
    configs.DEFINE_string("key_field", 'gvkey', "")
    configs.DEFINE_string("target_field", 'mkvaltq_ttm', "")
    configs.DEFINE_string("scale_field", 'mrkcap', "")
    configs.DEFINE_string("financial_fields", 'saleq_ttm-ltq_mrq', "")
    configs.DEFINE_string("aux_fields", 'mom3m-mom9m', "")
    configs.DEFINE_string("dont_scale", None, "")
    configs.DEFINE_string("data_dir", 'datasets', "")
    configs.DEFINE_string("model_dir", 'chkpts-wrds-rnn', "")
    configs.DEFINE_string("rnn_cell", 'lstm', "")
    configs.DEFINE_string("activation_fn", 'relu', "")
    configs.DEFINE_integer("num_inputs", -1, "")
    configs.DEFINE_integer("num_outputs", -1, "")
    configs.DEFINE_integer("target_idx", None, "")
    configs.DEFINE_integer("min_unrollings", 5, "")
    configs.DEFINE_integer("max_unrollings", 5, "")
    configs.DEFINE_integer("min_years", None, "")
    configs.DEFINE_integer("max_years", None, "")
    configs.DEFINE_integer("pls_years", None, "")

    configs.DEFINE_integer("num_unrollings", 5, "")
    configs.DEFINE_integer("stride", 12, "")
    configs.DEFINE_integer("forecast_n", 3, "")
    configs.DEFINE_integer("batch_size", 128, "")
    configs.DEFINE_integer("num_layers", 5, "")
    configs.DEFINE_integer("num_hidden", 128, "")
    configs.DEFINE_float("training_noise", None, "")
    configs.DEFINE_float("init_scale", 0.01, "")
    configs.DEFINE_float("max_grad_norm", 10.0, "")
    configs.DEFINE_integer("start_date", None, "")
    configs.DEFINE_integer("end_date", None, "")
    configs.DEFINE_integer("split_date", None, "")
    configs.DEFINE_float("keep_prob", 0.75, "")
    configs.DEFINE_boolean("train", False, "")
    configs.DEFINE_boolean("require_targets", False, "")
    configs.DEFINE_boolean("input_dropout", False, "")
    configs.DEFINE_boolean("hidden_dropout", False, "")
    configs.DEFINE_boolean("rnn_dropout", True, "")
    configs.DEFINE_boolean("skip_connections", False, "")
    configs.DEFINE_boolean("direct_connections", False, "")
    configs.DEFINE_boolean("use_cache", True, "")
    configs.DEFINE_boolean("pretty_print_preds", True, "")
    configs.DEFINE_boolean("scale_targets", True, "")
    configs.DEFINE_boolean("backfill", False, "")
    configs.DEFINE_boolean("log_squasher", True, "")
    configs.DEFINE_boolean("ts_smoother", False, "")
    configs.DEFINE_string("data_scaler", 'RobustScaler', '')
    configs.DEFINE_string("optimizer", 'AdadeltaOptimizer', '')
    configs.DEFINE_string("optimizer_params", None, '')
    configs.DEFINE_float("learning_rate", 0.6, "")
    configs.DEFINE_float("lr_decay", 0.95, "")
    configs.DEFINE_float("validation_size", 0.3, "")
    configs.DEFINE_float("train_until", 0.0, "")
    configs.DEFINE_float("passes", 0.2, "")
    configs.DEFINE_float("target_lambda", 0.8, "")
    configs.DEFINE_float("rnn_lambda", 0.2, "")
    configs.DEFINE_float("l2_alpha", 0.0, "")
    configs.DEFINE_integer("max_epoch", 1000, "")
    configs.DEFINE_integer("early_stop", 10, "")
    configs.DEFINE_integer("seed", 100, "")
    configs.DEFINE_integer("cache_id", 100, "")
    configs.DEFINE_string("output_file", "mkvaltq_2016.csv", "")

    c = configs.ConfigValues()

    if c.min_unrollings is None:
        c.min_unrollings = c.num_unrollings

    if c.max_unrollings is None:
        c.max_unrollings = c.num_unrollings

    if c.min_years is not None:
        c.min_unrollings = c.min_years * (12 // c.stride)
        if c.max_years is not None:
            c.max_unrollings = (c.max_years) * (12 // c.stride)
        elif c.pls_years is None:
            c.max_unrollings = c.min_unrollings
        else:
            c.max_unrollings = (c.min_years + c.pls_years) * (12 // c.stride)

    # optimizer_params is a string of the form "param1=value1,param2=value2,..."
    # this maps it to dictionary { param1 : value1, param2 : value2, ...}
    if c.optimizer_params is None:
        c.optimizer_params = dict()
    else:
        args_list = [p.split('=') for p in c.optimizer_params.split(',')]
        params = dict()
        for p in args_list:
            params[p[0]] = float(p[1])
        c.optimizer_params = params
        assert ('learning_rate' not in c.optimizer_params)

    return c
Beispiel #4
0
def get_configs():
    """
    Defines all configuration params passable to command line.
    """
    configs.DEFINE_string("name", 'hpo-test', "A name for the config.")
    configs.DEFINE_string("datafile", 'source-ml-data-v8-100M.dat',
                          "a datafile name.")
    configs.DEFINE_string(
        "predict_datafile", None,
        "If predict_datafile is not None, use it instead of datafile for predictions"
    )
    configs.DEFINE_string("mse_outfile", None,
                          "A file to write mse values during predict phase.")
    configs.DEFINE_string("scalesfile", None,
                          "Optional file for storing scaling params")
    configs.DEFINE_string("default_gpu", '/gpu:0',
                          "The default GPU to use e.g., /gpu:0")
    configs.DEFINE_string("nn_type", 'RNNPointEstimate', "Model type")
    configs.DEFINE_string("active_field", 'active',
                          "Key column name header for active indicator")
    configs.DEFINE_string("date_field", 'date', "Name of data column.")
    configs.DEFINE_string("key_field", 'gvkey',
                          "Key column name header in datafile")
    configs.DEFINE_string("target_field", 'oiadpq_ttm',
                          "Target column name header in datafile")
    configs.DEFINE_string("scale_field", 'mrkcap',
                          "Feature to scale inputs by")
    configs.DEFINE_string("financial_fields", 'saleq_ttm-ltq_mrq',
                          "Shared input and target field names")
    configs.DEFINE_string("aux_fields", 'rel_mom1m-rel_mom9m',
                          "non-target, input only fields")
    configs.DEFINE_string("dont_scale", None, "Names of fields to not scale")
    configs.DEFINE_string("data_dir", 'datasets', "The data directory")
    configs.DEFINE_string("model_dir", 'test-model', "Model directory")
    configs.DEFINE_string("experiments_dir", './', "Experiments directory")
    configs.DEFINE_list_string("rnn_cell", 'lstm', "lstm or gru")
    configs.DEFINE_list_string("activation_fn", 'relu',
                               "MLP activation function in tf.nn.*")
    configs.DEFINE_integer("num_inputs", -1, "")
    configs.DEFINE_integer("num_outputs", -1, "")
    configs.DEFINE_integer("target_idx", None, "")
    configs.DEFINE_list_integer("min_unrollings", 5,
                                "Min number of unrolling steps")
    configs.DEFINE_list_integer("max_unrollings", 5,
                                "Max number of unrolling steps")
    configs.DEFINE_list_integer("min_years", None, "Alt to min_unrollings")
    configs.DEFINE_list_integer("max_years", None, "Alt to max_unrollings")
    configs.DEFINE_integer("pls_years", None,
                           "Alt to max_years. max_years = min_year+pls_years")
    configs.DEFINE_list_integer("stride", 12,
                                "How many steps to skip per unrolling")

    configs.DEFINE_list_integer("batch_size", 256, "Size of each batch")
    configs.DEFINE_list_integer("num_layers", 2, "Numer of RNN layers")
    configs.DEFINE_integer("forecast_n", 12,
                           "How many steps to forecast into the future")
    configs.DEFINE_list_integer("num_hidden", 64,
                                "Number of hidden layer units")
    configs.DEFINE_list_float("init_scale", 1.0, "Initial scale for weights")
    configs.DEFINE_list_float("max_grad_norm", 50.0, "Gradient clipping")
    configs.DEFINE_integer("start_date", 197501,
                           "First date to train on as YYYYMM")
    configs.DEFINE_integer("end_date", 199812,
                           "Last date to train on as YYYYMM")
    configs.DEFINE_integer("split_date", None, "Date to split train/test on.")
    configs.DEFINE_boolean("train", True,
                           "Train model otherwise inference only")
    configs.DEFINE_list_float("dropout", 0.0, "Dropout rate for hidden layers")
    configs.DEFINE_list_float("recurrent_dropout", 0.3,
                              "Dropout rate for recurrent connections")
    configs.DEFINE_boolean(
        "log_squasher", True,
        "Squash large normalized inputs with natural log function")
    configs.DEFINE_list_string(
        "data_scaler", 'RobustScaler',
        'sklearn scaling algorithm or None if no scaling')
    configs.DEFINE_list_string("optimizer", 'Adadelta',
                               'Any tensorflow optimizer in tf.train')
    configs.DEFINE_list_float("learning_rate", 0.6,
                              "The initial starting learning rate")
    configs.DEFINE_list_float("lr_decay", 0.96, "Learning rate decay")
    configs.DEFINE_float("validation_size", 0.3,
                         "Size of validation set as %, ie. 0.3 = 30% of data")
    configs.DEFINE_list_float(
        "target_lambda", 0.5,
        "How much to weight last step vs. all steps in loss")
    configs.DEFINE_list_float(
        "rnn_lambda", 0.7,
        "How much to weight last step vs. all steps in loss")
    configs.DEFINE_integer("max_epoch", 35, "Stop after max_epochs")
    configs.DEFINE_integer("early_stop", 15, "Early stop parameter")
    configs.DEFINE_integer("seed", 521, "Seed for deterministic training")
    configs.DEFINE_boolean("UQ", False, "Uncertainty Quantification Mode")
    configs.DEFINE_list_float("l2_alpha", 0.0,
                              "L2 regularization for weight parameters.")
    configs.DEFINE_float("recurrent_l2_alpha", 0.0,
                         "L2 regularization for recurrent weight parameters.")
    configs.DEFINE_list_boolean("huber_loss", False,
                                "Use huber loss instead of mse")
    configs.DEFINE_list_float("huber_delta", 1.0, "delta for huber loss")
    configs.DEFINE_integer("forecast_steps", 1,
                           "How many future predictions need to me made")
    configs.DEFINE_string('forecast_steps_weights', '1.0',
                          'weights for the forecast steps')
    configs.DEFINE_integer(
        "logging_interval", 100,
        "Number of batches for logging interval during training")
    configs.DEFINE_boolean("write_inp_to_out_file", True,
                           "Write input sequence to the output files")
    configs.DEFINE_string(
        "training_type", 'fixed_dates',
        'Choose between "fixed_dates" and "iterative" training')
    configs.DEFINE_integer("member_id", 1, "Id of member in a population")
    configs.DEFINE_boolean('load_saved_weights', False,
                           'Load weights saved in the checkpoint directory')
    configs.DEFINE_integer(
        "epoch_logging_interval", 1,
        "Number of batches for logging interval during training")
    configs.DEFINE_string('preds_fname', 'preds.dat',
                          'Name of the prediction file')
    configs.DEFINE_integer("num_procs", 1,
                           "Total number of training/prediction processes")

    # HPO related params
    configs.DEFINE_integer("NPE", 1, "Number of Parallel Executions")
    configs.DEFINE_string(
        "search_algorithm", "genetic",
        "Algorithm for hyper-param optimization. Select from 'genetic', 'grid_search', 'doe' "
    )
    configs.DEFINE_integer("generations", 5,
                           "Number of generations for genetic algorithm")
    configs.DEFINE_integer("pop_size", 16,
                           "Population size for genetic algorithm")
    configs.DEFINE_integer(
        "num_gpu", 1, "Number of GPU on the machine, Use 0 if there are None")
    configs.DEFINE_float("mutate_rate", 0.2,
                         "Mutation rate for genetic algorithm")
    configs.DEFINE_string("objective", 'mse', "Select between mse or uq_loss")
    configs.DEFINE_string("init_pop", None,
                          "Initial population to begin hyper param search")
    configs.DEFINE_boolean("save_latest_pop", False,
                           "Save the latest population")
    configs.DEFINE_string('doe_file', None, 'Design of experiments csv file')
    configs.DEFINE_integer("decay_steps", 100000,
                           "Number of training steps between decay steps")
    configs.DEFINE_string("initializer", 'GlorotUniform',
                          'variable initializers available in Keras')
    configs.DEFINE_boolean(
        "use_custom_init", True,
        'Use RandomUniform initializer with init_scale values')
    configs.DEFINE_boolean(
        "aux_masking", False,
        'Mask aux features of all time steps except the last one with 0')
    configs.DEFINE_integer("max_norm", None, "Max Norm for kernel constraint")
    configs.DEFINE_float("sgd_momentum", 0.0, "momentum for SGD optimizer")
    configs.DEFINE_float("end_learning_rate", 0.01,
                         "end lr for polynomial decay")
    configs.DEFINE_float(
        'decay_power', 0.5,
        'power to decay the learning rate with for polynomial decay')
    configs.DEFINE_string('piecewise_lr_boundaries', None,
                          'boundaries for piecewise constant lr')
    configs.DEFINE_string('piecewise_lr_values', None,
                          'values for piecewise constant lr')
    configs.DEFINE_string('lr_schedule', 'ExponentialDecay',
                          'Learning rate scheduler')

    c = configs.ConfigValues()

    c.data_dir = os.path.join(_data_dir_path, c.data_dir)
    c.forecast_steps_weights = [
        float(x) for x in c.forecast_steps_weights.split(',')
    ]

    return c
Beispiel #5
0
def get_configs():
    """
    Defines all configuration params passable to command line.
    """
    configs.DEFINE_string("name", 'none', "A name for the config.")
    configs.DEFINE_string("datafile", 'open_dataset.dat', "a datafile name.")
    configs.DEFINE_string(
        "predict_datafile", None,
        "If predict_datafile is not None, use it instead of datafile for predictions"
    )
    configs.DEFINE_string("mse_outfile", None,
                          "A file to write mse values during predict phase.")
    configs.DEFINE_string("scalesfile", None,
                          "Optional file for storing scaling params")
    configs.DEFINE_string(
        "mse_var_outfile", None,
        "A file to write mse_var values during predict phase.")
    configs.DEFINE_string("default_gpu", '',
                          "The default GPU to use e.g., /gpu:0")
    configs.DEFINE_string("nn_type", 'DeepRnnModel', "Model type")
    configs.DEFINE_string("active_field", 'active',
                          "Key column name header for active indicator")
    configs.DEFINE_string("date_field", 'date', "Name of data column.")
    configs.DEFINE_string("key_field", 'gvkey',
                          "Key column name header in datafile")
    configs.DEFINE_string("target_field", 'oiadpq_ttm',
                          "Target column name header in datafile")
    configs.DEFINE_string("scale_field", 'mrkcap',
                          "Feature to scale inputs by")
    configs.DEFINE_string("financial_fields", '',
                          "Shared input and target field names")
    configs.DEFINE_string("aux_fields", None, "non-target, input only fields")
    configs.DEFINE_string("dont_scale", None, "Names of fields to not scale")
    configs.DEFINE_string("data_dir", '', "The data directory")
    configs.DEFINE_string("model_dir", 'chkpts',
                          "Model (checkpoint) directory")
    configs.DEFINE_string("rnn_cell", 'gru', "lstm or gru")
    configs.DEFINE_string("activation_fn", 'relu',
                          "MLP activation function in tf.nn.*")
    configs.DEFINE_integer("num_inputs", -1, "")
    configs.DEFINE_integer("num_outputs", -1, "")
    configs.DEFINE_integer("target_idx", None, "")
    configs.DEFINE_integer("min_unrollings", None,
                           "Min number of unrolling steps")
    configs.DEFINE_integer("max_unrollings", None,
                           "Max number of unrolling steps")
    configs.DEFINE_integer("min_years", None, "Alt to min_unrollings")
    configs.DEFINE_integer("max_years", None, "Alt to max_unrollings")
    configs.DEFINE_integer("pls_years", None,
                           "Alt to max_years. max_years = min_year+pls_years")
    # num_unrollings is being depricated, replaced with max_unrollings
    configs.DEFINE_integer("num_unrollings", 4, "Number of unrolling steps")
    configs.DEFINE_integer("stride", 12,
                           "How many steps to skip per unrolling")
    configs.DEFINE_integer("forecast_n", 12,
                           "How many steps to forecast into the future")
    configs.DEFINE_integer("batch_size", 1, "Size of each batch")
    configs.DEFINE_integer("num_layers", 1, "Numer of RNN layers")
    configs.DEFINE_integer("num_hidden", 10, "Number of hidden layer units")
    configs.DEFINE_float("training_noise", None,
                         "Level of training noise as multiple of 1-stdev")
    configs.DEFINE_float("init_scale", 0.1, "Initial scale for weights")
    configs.DEFINE_float("max_grad_norm", 10.0, "Gradient clipping")
    configs.DEFINE_integer("start_date", None,
                           "First date to train on as YYYYMM")
    configs.DEFINE_integer("end_date", None, "Last date to train on as YYYYMM")
    configs.DEFINE_integer("split_date", None, "Date to split train/test on.")
    configs.DEFINE_float("keep_prob", 1.0, "Keep probability for dropout")
    configs.DEFINE_boolean("train", True,
                           "Train model otherwise inference only")
    configs.DEFINE_boolean("require_targets", False,
                           "Require target values for test predictions")
    configs.DEFINE_boolean("input_dropout", False, "Do dropout on input layer")
    configs.DEFINE_boolean("hidden_dropout", False,
                           "Do dropout on hidden layers")
    configs.DEFINE_boolean("rnn_dropout", False,
                           "Do dropout on recurrent connections")
    configs.DEFINE_boolean(
        "skip_connections", False,
        "Have a linear fully connected weight skip hidden units in MLP")
    configs.DEFINE_boolean(
        "direct_connections", False,
        "Have direct connections between input and output in MLP")
    configs.DEFINE_boolean(
        "use_cache", True,
        "Load data for logreg from cache (vs processing from batch generator)")
    configs.DEFINE_boolean(
        "pretty_print_preds", False,
        "Print predictions in tabular format with inputs, targets, and keys")
    configs.DEFINE_boolean(
        "print_preds", False,
        "Print predictions with just date, gvkey and output values")
    configs.DEFINE_string(
        "df_dirname", None,
        "Saves dataframes for target, output, variance/variance, mse and mse_var in df_dirname"
    )
    configs.DEFINE_boolean("scale_targets", True, "")
    configs.DEFINE_boolean(
        "backfill", False,
        "Backfill seq history to max_unrollings with data in first time step")
    configs.DEFINE_boolean(
        "log_squasher", True,
        "Squash large normalized inputs with natural log function")
    configs.DEFINE_boolean("ts_smoother", False,
                           "Use smoother on data time series during training")
    configs.DEFINE_string("data_scaler", None,
                          'sklearn scaling algorithm or None if no scaling')
    configs.DEFINE_string("optimizer", 'GradientDescentOptimizer',
                          'Any tensorflow optimizer in tf.train')
    configs.DEFINE_string("optimizer_params", None,
                          'Additional optimizer params such as momentum')
    configs.DEFINE_float("learning_rate", 0.6,
                         "The initial starting learning rate")
    configs.DEFINE_float("lr_decay", 0.9, "Learning rate decay")
    configs.DEFINE_float("validation_size", 0.0,
                         "Size of validation set as %, ie. .3 = 30% of data")
    configs.DEFINE_float("train_until", 0.0,
                         "Train until validation MSE is less than this value")
    configs.DEFINE_float("passes", 1.0, "Passes through day per epoch")
    configs.DEFINE_float("target_lambda", 0.5,
                         "How much to weight last step vs. all steps in loss")
    configs.DEFINE_float("rnn_lambda", 0.5,
                         "How much to weight last step vs. all steps in loss")
    configs.DEFINE_integer("max_epoch", 0, "Stop after max_epochs")
    configs.DEFINE_integer("early_stop", None, "Early stop parameter")
    configs.DEFINE_integer("seed", None, "Seed for deterministic training")
    configs.DEFINE_integer("cache_id", None,
                           "A unique experiment key for traking a cahce")
    configs.DEFINE_float("keep_prob_pred", 1.0,
                         "Keep Prob for dropout during prediction")
    configs.DEFINE_boolean(
        "print_normalized_outputs", False,
        "Print normalized outputs. Doesn't apply to pretty print")
    configs.DEFINE_boolean("UQ", False, "Uncertainty Quantification Mode")
    configs.DEFINE_string("UQ_model_type", 'MVE', "Select between MVE or PIE")
    configs.DEFINE_float(
        "noise_lambda", 1.0,
        "Weight decay for noise in the loss function. Refer to DeepBayesUQ Model"
    )
    configs.DEFINE_float("l2_alpha", 0.0,
                         "L2 regularization for weight parameters.")
    configs.DEFINE_float("picp_lambda", 1.0,
                         "Contribution of PICP loss term for HQPI UQ model")
    configs.DEFINE_float(
        "smoothing_pi_check", 100,
        "Smoothing parameter for calculation of PI check in HQPI UQ model")
    configs.DEFINE_float(
        "confidence_alpha", 0.1,
        "Alpha used for calculating confidence level (= 1 - alpha)")
    configs.DEFINE_boolean("huber_loss", False,
                           "Use huber loss instead of mse")
    configs.DEFINE_float("huber_delta", 1.0, "delta for huber loss")

    c = configs.ConfigValues()

    if c.min_unrollings is None:
        c.min_unrollings = c.num_unrollings

    if c.max_unrollings is None:
        c.max_unrollings = c.num_unrollings

    if c.min_years is not None:
        c.min_unrollings = c.min_years * (12 // c.stride)
        if c.max_years is not None:
            c.max_unrollings = (c.max_years) * (12 // c.stride)
        elif c.pls_years is None:
            c.max_unrollings = c.min_unrollings
        else:
            c.max_unrollings = (c.min_years + c.pls_years) * (12 // c.stride)

    # optimizer_params is a string of the form "param1=value1,param2=value2,..."
    # this maps it to dictionary { param1 : value1, param2 : value2, ...}
    if c.optimizer_params is None:
        c.optimizer_params = dict()
    else:
        args_list = [p.split('=') for p in c.optimizer_params.split(',')]
        params = dict()
        for p in args_list:
            params[p[0]] = float(p[1])
        c.optimizer_params = params
        assert ('learning_rate' not in c.optimizer_params)

    return c
Beispiel #6
0
def get_configs():
    """
    Defines all configuration params passable to command line.
    """
    configs.DEFINE_string("name", 'test', "A name for the config.")
    configs.DEFINE_string("datafile", None, "a datafile name.")
    configs.DEFINE_string("scalesfile", None,
                          "Optional file for storing scaling params")
    configs.DEFINE_string("default_gpu", '/gpu:0',
                          "The default GPU to use e.g., /gpu:0")
    configs.DEFINE_string("nn_type", 'RNNPointEstimate', "Model type")
    configs.DEFINE_string("active_field", 'active',
                          "Key column name header for active indicator")
    configs.DEFINE_string("date_field", 'date', "Name of data column.")
    configs.DEFINE_string("key_field", 'gvkey',
                          "Key column name header in datafile")
    configs.DEFINE_string("target_field", 'oiadpq_ttm',
                          "Target column name header in datafile")
    configs.DEFINE_string("scale_field", 'mrkcap',
                          "Feature to scale inputs by")
    configs.DEFINE_string("financial_fields", 'saleq_ttm-ltq_mrq',
                          "Shared input and target field names")
    configs.DEFINE_string("aux_fields", 'rel_mom1m-rel_mom9m',
                          "non-target, input only fields")
    configs.DEFINE_string("dont_scale_fields", None,
                          "Names of fields to not scale")
    configs.DEFINE_string("data_dir", 'datasets', "The data directory")
    configs.DEFINE_string("model_dir", 'test-model', "Model directory")
    configs.DEFINE_string("experiments_dir", './', "Experiments directory")
    configs.DEFINE_string("rnn_cell", 'lstm', "lstm or gru")
    configs.DEFINE_string("activation_fn", 'relu',
                          "MLP activation function in tf.nn.*")
    configs.DEFINE_integer("num_inputs", -1, "")
    configs.DEFINE_integer("num_outputs", -1, "")
    configs.DEFINE_integer("target_idx", None, "")
    configs.DEFINE_integer("min_unrollings", 5,
                           "Min number of unrolling steps")
    configs.DEFINE_integer("max_unrollings", 5,
                           "Max number of unrolling steps")
    configs.DEFINE_integer("min_years", None, "Alt to min_unrollings")
    configs.DEFINE_integer("max_years", None, "Alt to max_unrollings")
    configs.DEFINE_integer("pls_years", None,
                           "Alt to max_years. max_years = min_year+pls_years")
    configs.DEFINE_integer("stride", 12,
                           "How many steps to skip per unrolling")
    configs.DEFINE_integer("batch_size", 256, "Size of each batch")
    configs.DEFINE_integer("num_layers", 2, "Numer of RNN layers")
    configs.DEFINE_integer("forecast_n", 12,
                           "How many steps to forecast into the future")
    configs.DEFINE_integer("num_hidden", 64, "Number of hidden layer units")
    configs.DEFINE_float("init_scale", 1.0, "Initial scale for weights")
    configs.DEFINE_float("max_grad_norm", 50.0, "Gradient clipping")
    configs.DEFINE_integer("start_date", 197501,
                           "First date to train on as YYYYMM")
    configs.DEFINE_integer("end_date", 199912,
                           "Last date to train on as YYYYMM")
    configs.DEFINE_integer("split_date", None, "Date to split train/test on.")
    configs.DEFINE_boolean("train", True,
                           "Train model otherwise inference only")
    configs.DEFINE_float("dropout", 0.0, "Dropout rate for hidden layers")
    configs.DEFINE_float("recurrent_dropout", 0.0,
                         "Dropout rate for recurrent connections")
    configs.DEFINE_boolean(
        "log_squasher", True,
        "Squash large normalized inputs with natural log function")
    configs.DEFINE_string("data_scaler", 'RobustScaler',
                          'sklearn scaling algorithm or None if no scaling')
    configs.DEFINE_string("optimizer", 'Adadelta',
                          'Any tensorflow optimizer in tf.train')
    configs.DEFINE_float("learning_rate", 0.6,
                         "The initial starting learning rate")
    configs.DEFINE_float("lr_decay", 1.0,
                         "Learning rate decay for exponential decay")
    configs.DEFINE_float("validation_size", 0.3,
                         "Size of validation set as %, ie. 0.3 = 30% of data")
    configs.DEFINE_float("target_lambda", 0.5,
                         "How much to weight last step vs. all steps in loss")
    configs.DEFINE_float("rnn_lambda", 0.7,
                         "How much to weight last step vs. all steps in loss")
    configs.DEFINE_integer("max_epoch", 1, "Stop after max_epochs")
    configs.DEFINE_integer("early_stop", 1, "Early stop parameter")
    configs.DEFINE_integer("seed", 521, "Seed for deterministic training")
    configs.DEFINE_boolean("UQ", False, "Uncertainty Quantification Mode")
    configs.DEFINE_float("l2_alpha", 0.0,
                         "L2 regularization for weight parameters.")
    configs.DEFINE_float("recurrent_l2_alpha", 0.0,
                         "L2 regularization for recurrent weight parameters.")
    configs.DEFINE_boolean("huber_loss", False,
                           "Use huber loss instead of mse")
    configs.DEFINE_float("huber_delta", 1.0, "delta for huber loss")
    configs.DEFINE_integer("forecast_steps", 1,
                           "How many future predictions need to me made")
    configs.DEFINE_string('forecast_steps_weights', '1.0',
                          'weights for the forecast steps')
    configs.DEFINE_integer(
        "logging_interval", 100,
        "Number of batches for logging interval during training")
    configs.DEFINE_boolean("write_inp_to_out_file", True,
                           "Write input sequence to the output files")
    configs.DEFINE_string(
        "training_type", 'fixed_dates',
        'Choose between "fixed_dates" and "iterative" training')
    configs.DEFINE_integer("NPE", 1, "Number of Parallel Executions")
    configs.DEFINE_integer("num_procs", 1,
                           "Total number of training/prediction processes")
    configs.DEFINE_integer("num_gpu", 1, "NUmber of GPUs")
    configs.DEFINE_boolean('load_saved_weights', False,
                           'Load weights saved in the checkpoint directory')
    configs.DEFINE_integer(
        "epoch_logging_interval", 1,
        "Number of batches for logging interval during training")
    configs.DEFINE_integer("decay_steps", 1500,
                           "Number of training steps between decay steps")
    configs.DEFINE_string("initializer", 'GlorotUniform',
                          'variable initializers available in Keras')
    configs.DEFINE_boolean(
        "use_custom_init", True,
        'Use RandomUniform initializer with init_scale values')
    configs.DEFINE_boolean(
        "aux_masking", False,
        'Mask aux features of all time steps except the last one with 0')
    configs.DEFINE_integer("max_norm", 3, "Max Norm for kernel constraint")
    configs.DEFINE_float("sgd_momentum", 0.0, "momentum for SGD optimizer")
    configs.DEFINE_float("end_learning_rate", 0.01,
                         "end lr for polynomial decay")
    configs.DEFINE_float(
        'decay_power', 0.5,
        'power to decay the learning rate with for polynomial decay')
    configs.DEFINE_string('piecewise_lr_boundaries', '4000-5500-5500',
                          'boundaries for piecewise constant lr')
    configs.DEFINE_string('piecewise_lr_values', '0.5-0.1-0.05-0.1',
                          'values for piecewise constant lr')
    configs.DEFINE_string('lr_schedule', 'ExponentialDecay',
                          'Learning rate scheduler')
    configs.DEFINE_string('preds_fname', 'preds.dat',
                          'Name of the prediction file')
    configs.DEFINE_integer("member_id", 1, "Id of member in a population")
    configs.DEFINE_boolean("cdrs_inference", False,
                           'If the execution is for inference on CDRS data')
    configs.DEFINE_string('cdrs_src_fname', 'cdrs-src.dat',
                          'Filename of the CDRS source file')
    configs.DEFINE_string('cdrs_ml_fname', 'cdrs-ml-data.dat',
                          'Filename of the CDRS ML data file')
    configs.DEFINE_string('model_ranking_fname', './model-ranking.dat',
                          'Model Ranking File Name')
    configs.DEFINE_string('model_ranking_factor', 'pred_var_entval',
                          'Model ranking factor')
    configs.DEFINE_string("cdrs_inference_date", None,
                          "CDRS Inference date. Format: '%Y-%m-%d' ")

    c = configs.ConfigValues()

    if c.min_unrollings is None:
        c.min_unrollings = c.num_unrollings

    if c.max_unrollings is None:
        c.max_unrollings = c.num_unrollings

    if c.min_years is not None:
        c.min_unrollings = c.min_years * (12 // c.stride)
        if c.max_years is not None:
            c.max_unrollings = (c.max_years) * (12 // c.stride)
        elif c.pls_years is None:
            c.max_unrollings = c.min_unrollings
        else:
            c.max_unrollings = (c.min_years + c.pls_years) * (12 // c.stride)

    c.forecast_steps_weights = [
        float(x) for x in c.forecast_steps_weights.split('-')
    ]
    c.piecewise_lr_boundaries = [
        float(x) for x in c.piecewise_lr_boundaries.split('-')
    ]
    c.piecewise_lr_values = [
        float(x) for x in c.piecewise_lr_values.split('-')
    ]

    return c