def get_search_configs(): """ Defines the configurations for hyper parameter search """ configurations.DEFINE_string("template", None, "Template file for hyper-param search") configurations.DEFINE_string( "search_algorithm", "genetic", "Algorithm for hyper-param optimization. Select from 'genetic', 'grid_search'" ) configurations.DEFINE_integer( "generations", 100, "Number of generations for genetic algorithm") configurations.DEFINE_integer("pop_size", 20, "Population size for genetic algorithm") configurations.DEFINE_integer("num_survivors", 10, "Number of survivors for genetic algorithm") configurations.DEFINE_integer( "num_threads", 4, "NUmber of parallel threads (Number of parallel executions)") configurations.DEFINE_integer( "num_gpu", 1, "Number of GPU on the machine, Use 0 if there are None") configurations.DEFINE_integer("sleep_time", 1, "Sleep time") configurations.DEFINE_float("mutate_rate", 0.2, "Mutation rate for genetic algorithm") configurations.DEFINE_string( "init_pop", None, "Specify starting population. Path to the pickle file") c = configurations.ConfigValues() return c
def get_configs(): """ Defines the configurations for hyper parameter search """ configurations.DEFINE_string("configs_fname",None,"CSV containing all the configs to run") configurations.DEFINE_boolean("predict",True,"Run predictions after training") configurations.DEFINE_integer("num_threads",4,"NUmber of parallel threads (Number of parallel executions)") configurations.DEFINE_integer("num_gpu",1,"Number of GPU on the machine, Use 0 if there are None") configurations.DEFINE_integer("sleep_time",1,"Sleep time") configurations.DEFINE_integer("start_date",None,"First date for prediction on as YYYYMM") configurations.DEFINE_integer("end_date",None,"Last date for prediction on as YYYYMM") c = configurations.ConfigValues() return c
def get_configs(): """ Defines all configuration params passable to command line. """ configs.DEFINE_string("datasource", 'big_datafile', "The source of the data.") configs.DEFINE_string("tkrlist", "big_tkrlist.csv", "The list of filters to use.") configs.DEFINE_string("datafile", 'big_datafile.dat', "a datafile name.") configs.DEFINE_string("mse_outfile", None, "A file to write mse values during predict phase.") configs.DEFINE_string("default_gpu", '', "The default GPU to use e.g., /gpu:0") configs.DEFINE_string("nn_type", 'DeepRnnModel', "Model type") configs.DEFINE_string("active_field", 'active', "Key column name header for active indicator") configs.DEFINE_string("key_field", 'gvkey', "Key column name header in datafile") configs.DEFINE_string("target_field", 'oiadpq_ttm', "Target column name header in datafile") configs.DEFINE_string("scale_field", 'mrkcap', "Feature to scale inputs by") configs.DEFINE_string("feature_fields", '', "shared input and target field names") configs.DEFINE_string("aux_input_fields", None, "non-target, input only fields") configs.DEFINE_string("data_dir", '', "The data directory") configs.DEFINE_string("model_dir", '', "Model directory") configs.DEFINE_string("rnn_cell", 'gru', "lstm or gru") configs.DEFINE_integer("num_inputs", -1, "") configs.DEFINE_integer("num_outputs", -1, "") configs.DEFINE_integer("target_idx", None, "") configs.DEFINE_integer("min_unrollings", None, "Min number of unrolling steps") configs.DEFINE_integer("max_unrollings", None, "Max number of unrolling steps") # num_unrollings is being depricated by max_unrollings configs.DEFINE_integer("num_unrollings", 4, "Number of unrolling steps") configs.DEFINE_integer("stride", 12, "How many steps to skip per unrolling") configs.DEFINE_integer("forecast_n", 12, "How many steps to forecast into the future") configs.DEFINE_integer("batch_size", 1, "Size of each batch") configs.DEFINE_integer("num_layers", 1, "Numer of RNN layers") configs.DEFINE_integer("num_hidden", 10, "Number of hidden layer units") configs.DEFINE_float("init_scale", 0.1, "Initial scale for weights") configs.DEFINE_float("max_grad_norm", 10.0, "Gradient clipping") configs.DEFINE_integer("start_date", None, "First date to train on as YYYYMM") configs.DEFINE_integer("end_date", None, "Last date to train on as YYYYMM") configs.DEFINE_float("keep_prob", 1.0, "Keep probability for dropout") configs.DEFINE_boolean("train", True, "Train model otherwise inference only") configs.DEFINE_boolean("input_dropout", False, "Do dropout on input layer") configs.DEFINE_boolean("hidden_dropout", False, "Do dropout on hidden layers") configs.DEFINE_boolean("rnn_dropout", False, "Do dropout on recurrent connections") configs.DEFINE_boolean( "skip_connections", False, "Have direct connections between input and output in MLP") configs.DEFINE_boolean( "use_cache", True, "Load data for logreg from cache (vs processing from batch generator)") configs.DEFINE_boolean( "pretty_print_preds", False, "Print predictions in tabular format with inputs, targets, and keys") configs.DEFINE_boolean("scale_targets", True, "") configs.DEFINE_string("data_scaler", None, 'sklearn scaling algorithm or None if no scaling') configs.DEFINE_string("optimizer", 'GradientDescentOptimizer', 'Any tensorflow optimizer in tf.train') configs.DEFINE_string("optimizer_params", None, 'Additional optimizer params such as momentum') configs.DEFINE_float("learning_rate", 0.6, "The initial starting learning rate") configs.DEFINE_float("lr_decay", 0.9, "Learning rate decay") configs.DEFINE_float("validation_size", 0.0, "Size of validation set as %, ie. .3 = 30% of data") configs.DEFINE_float("passes", 1.0, "Passes through day per epoch") configs.DEFINE_float("target_lambda", 0.5, "How much to weight last step vs. all steps in loss") configs.DEFINE_float("rnn_lambda", 0.5, "How much to weight last step vs. all steps in loss") configs.DEFINE_integer("max_epoch", 0, "Stop after max_epochs") configs.DEFINE_integer("early_stop", None, "Early stop parameter") configs.DEFINE_integer("seed", None, "Seed for deterministic training") configs.DEFINE_integer("cache_id", None, "A unique experiment key for traking a cahce") c = configs.ConfigValues() if c.min_unrollings is None: c.min_unrollings = c.num_unrollings if c.max_unrollings is None: c.max_unrollings = c.num_unrollings # optimizer_params is a string of the form "param1=value1,param2=value2,..." # this maps it to dictionary { param1 : value1, param2 : value2, ...} if c.optimizer_params is None: c.optimizer_params = dict() else: args_list = [p.split('=') for p in c.optimizer_params.split(',')] params = dict() for p in args_list: params[p[0]] = float(p[1]) c.optimizer_params = params assert ('learning_rate' not in c.optimizer_params) return c
def get_configs(): configs.DEFINE_string("name", 'trial1', "") configs.DEFINE_string("datafile", 'Group2-Dataset.csv', "") configs.DEFINE_string("predict_datafile", None, "") configs.DEFINE_string("mse_outfile", None, "") configs.DEFINE_string("scalesfile", None, "") configs.DEFINE_string("default_gpu", '/gpu:0', "") configs.DEFINE_string("nn_type", 'DeepRnnModel', "") configs.DEFINE_string("active_field", 'active', "") configs.DEFINE_string("date_field", 'date', "") configs.DEFINE_string("key_field", 'gvkey', "") configs.DEFINE_string("target_field", 'mkvaltq_ttm', "") configs.DEFINE_string("scale_field", 'mrkcap', "") configs.DEFINE_string("financial_fields", 'saleq_ttm-ltq_mrq', "") configs.DEFINE_string("aux_fields", 'mom3m-mom9m', "") configs.DEFINE_string("dont_scale", None, "") configs.DEFINE_string("data_dir", 'datasets', "") configs.DEFINE_string("model_dir", 'chkpts-wrds-rnn', "") configs.DEFINE_string("rnn_cell", 'lstm', "") configs.DEFINE_string("activation_fn", 'relu', "") configs.DEFINE_integer("num_inputs", -1, "") configs.DEFINE_integer("num_outputs", -1, "") configs.DEFINE_integer("target_idx", None, "") configs.DEFINE_integer("min_unrollings", 5, "") configs.DEFINE_integer("max_unrollings", 5, "") configs.DEFINE_integer("min_years", None, "") configs.DEFINE_integer("max_years", None, "") configs.DEFINE_integer("pls_years", None, "") configs.DEFINE_integer("num_unrollings", 5, "") configs.DEFINE_integer("stride", 12, "") configs.DEFINE_integer("forecast_n", 3, "") configs.DEFINE_integer("batch_size", 128, "") configs.DEFINE_integer("num_layers", 5, "") configs.DEFINE_integer("num_hidden", 128, "") configs.DEFINE_float("training_noise", None, "") configs.DEFINE_float("init_scale", 0.01, "") configs.DEFINE_float("max_grad_norm", 10.0, "") configs.DEFINE_integer("start_date", None, "") configs.DEFINE_integer("end_date", None, "") configs.DEFINE_integer("split_date", None, "") configs.DEFINE_float("keep_prob", 0.75, "") configs.DEFINE_boolean("train", False, "") configs.DEFINE_boolean("require_targets", False, "") configs.DEFINE_boolean("input_dropout", False, "") configs.DEFINE_boolean("hidden_dropout", False, "") configs.DEFINE_boolean("rnn_dropout", True, "") configs.DEFINE_boolean("skip_connections", False, "") configs.DEFINE_boolean("direct_connections", False, "") configs.DEFINE_boolean("use_cache", True, "") configs.DEFINE_boolean("pretty_print_preds", True, "") configs.DEFINE_boolean("scale_targets", True, "") configs.DEFINE_boolean("backfill", False, "") configs.DEFINE_boolean("log_squasher", True, "") configs.DEFINE_boolean("ts_smoother", False, "") configs.DEFINE_string("data_scaler", 'RobustScaler', '') configs.DEFINE_string("optimizer", 'AdadeltaOptimizer', '') configs.DEFINE_string("optimizer_params", None, '') configs.DEFINE_float("learning_rate", 0.6, "") configs.DEFINE_float("lr_decay", 0.95, "") configs.DEFINE_float("validation_size", 0.3, "") configs.DEFINE_float("train_until", 0.0, "") configs.DEFINE_float("passes", 0.2, "") configs.DEFINE_float("target_lambda", 0.8, "") configs.DEFINE_float("rnn_lambda", 0.2, "") configs.DEFINE_float("l2_alpha", 0.0, "") configs.DEFINE_integer("max_epoch", 1000, "") configs.DEFINE_integer("early_stop", 10, "") configs.DEFINE_integer("seed", 100, "") configs.DEFINE_integer("cache_id", 100, "") configs.DEFINE_string("output_file", "mkvaltq_2016.csv", "") c = configs.ConfigValues() if c.min_unrollings is None: c.min_unrollings = c.num_unrollings if c.max_unrollings is None: c.max_unrollings = c.num_unrollings if c.min_years is not None: c.min_unrollings = c.min_years * (12 // c.stride) if c.max_years is not None: c.max_unrollings = (c.max_years) * (12 // c.stride) elif c.pls_years is None: c.max_unrollings = c.min_unrollings else: c.max_unrollings = (c.min_years + c.pls_years) * (12 // c.stride) # optimizer_params is a string of the form "param1=value1,param2=value2,..." # this maps it to dictionary { param1 : value1, param2 : value2, ...} if c.optimizer_params is None: c.optimizer_params = dict() else: args_list = [p.split('=') for p in c.optimizer_params.split(',')] params = dict() for p in args_list: params[p[0]] = float(p[1]) c.optimizer_params = params assert ('learning_rate' not in c.optimizer_params) return c
def get_configs(): """ Defines all configuration params passable to command line. """ configs.DEFINE_string("name", 'hpo-test', "A name for the config.") configs.DEFINE_string("datafile", 'source-ml-data-v8-100M.dat', "a datafile name.") configs.DEFINE_string( "predict_datafile", None, "If predict_datafile is not None, use it instead of datafile for predictions" ) configs.DEFINE_string("mse_outfile", None, "A file to write mse values during predict phase.") configs.DEFINE_string("scalesfile", None, "Optional file for storing scaling params") configs.DEFINE_string("default_gpu", '/gpu:0', "The default GPU to use e.g., /gpu:0") configs.DEFINE_string("nn_type", 'RNNPointEstimate', "Model type") configs.DEFINE_string("active_field", 'active', "Key column name header for active indicator") configs.DEFINE_string("date_field", 'date', "Name of data column.") configs.DEFINE_string("key_field", 'gvkey', "Key column name header in datafile") configs.DEFINE_string("target_field", 'oiadpq_ttm', "Target column name header in datafile") configs.DEFINE_string("scale_field", 'mrkcap', "Feature to scale inputs by") configs.DEFINE_string("financial_fields", 'saleq_ttm-ltq_mrq', "Shared input and target field names") configs.DEFINE_string("aux_fields", 'rel_mom1m-rel_mom9m', "non-target, input only fields") configs.DEFINE_string("dont_scale", None, "Names of fields to not scale") configs.DEFINE_string("data_dir", 'datasets', "The data directory") configs.DEFINE_string("model_dir", 'test-model', "Model directory") configs.DEFINE_string("experiments_dir", './', "Experiments directory") configs.DEFINE_list_string("rnn_cell", 'lstm', "lstm or gru") configs.DEFINE_list_string("activation_fn", 'relu', "MLP activation function in tf.nn.*") configs.DEFINE_integer("num_inputs", -1, "") configs.DEFINE_integer("num_outputs", -1, "") configs.DEFINE_integer("target_idx", None, "") configs.DEFINE_list_integer("min_unrollings", 5, "Min number of unrolling steps") configs.DEFINE_list_integer("max_unrollings", 5, "Max number of unrolling steps") configs.DEFINE_list_integer("min_years", None, "Alt to min_unrollings") configs.DEFINE_list_integer("max_years", None, "Alt to max_unrollings") configs.DEFINE_integer("pls_years", None, "Alt to max_years. max_years = min_year+pls_years") configs.DEFINE_list_integer("stride", 12, "How many steps to skip per unrolling") configs.DEFINE_list_integer("batch_size", 256, "Size of each batch") configs.DEFINE_list_integer("num_layers", 2, "Numer of RNN layers") configs.DEFINE_integer("forecast_n", 12, "How many steps to forecast into the future") configs.DEFINE_list_integer("num_hidden", 64, "Number of hidden layer units") configs.DEFINE_list_float("init_scale", 1.0, "Initial scale for weights") configs.DEFINE_list_float("max_grad_norm", 50.0, "Gradient clipping") configs.DEFINE_integer("start_date", 197501, "First date to train on as YYYYMM") configs.DEFINE_integer("end_date", 199812, "Last date to train on as YYYYMM") configs.DEFINE_integer("split_date", None, "Date to split train/test on.") configs.DEFINE_boolean("train", True, "Train model otherwise inference only") configs.DEFINE_list_float("dropout", 0.0, "Dropout rate for hidden layers") configs.DEFINE_list_float("recurrent_dropout", 0.3, "Dropout rate for recurrent connections") configs.DEFINE_boolean( "log_squasher", True, "Squash large normalized inputs with natural log function") configs.DEFINE_list_string( "data_scaler", 'RobustScaler', 'sklearn scaling algorithm or None if no scaling') configs.DEFINE_list_string("optimizer", 'Adadelta', 'Any tensorflow optimizer in tf.train') configs.DEFINE_list_float("learning_rate", 0.6, "The initial starting learning rate") configs.DEFINE_list_float("lr_decay", 0.96, "Learning rate decay") configs.DEFINE_float("validation_size", 0.3, "Size of validation set as %, ie. 0.3 = 30% of data") configs.DEFINE_list_float( "target_lambda", 0.5, "How much to weight last step vs. all steps in loss") configs.DEFINE_list_float( "rnn_lambda", 0.7, "How much to weight last step vs. all steps in loss") configs.DEFINE_integer("max_epoch", 35, "Stop after max_epochs") configs.DEFINE_integer("early_stop", 15, "Early stop parameter") configs.DEFINE_integer("seed", 521, "Seed for deterministic training") configs.DEFINE_boolean("UQ", False, "Uncertainty Quantification Mode") configs.DEFINE_list_float("l2_alpha", 0.0, "L2 regularization for weight parameters.") configs.DEFINE_float("recurrent_l2_alpha", 0.0, "L2 regularization for recurrent weight parameters.") configs.DEFINE_list_boolean("huber_loss", False, "Use huber loss instead of mse") configs.DEFINE_list_float("huber_delta", 1.0, "delta for huber loss") configs.DEFINE_integer("forecast_steps", 1, "How many future predictions need to me made") configs.DEFINE_string('forecast_steps_weights', '1.0', 'weights for the forecast steps') configs.DEFINE_integer( "logging_interval", 100, "Number of batches for logging interval during training") configs.DEFINE_boolean("write_inp_to_out_file", True, "Write input sequence to the output files") configs.DEFINE_string( "training_type", 'fixed_dates', 'Choose between "fixed_dates" and "iterative" training') configs.DEFINE_integer("member_id", 1, "Id of member in a population") configs.DEFINE_boolean('load_saved_weights', False, 'Load weights saved in the checkpoint directory') configs.DEFINE_integer( "epoch_logging_interval", 1, "Number of batches for logging interval during training") configs.DEFINE_string('preds_fname', 'preds.dat', 'Name of the prediction file') configs.DEFINE_integer("num_procs", 1, "Total number of training/prediction processes") # HPO related params configs.DEFINE_integer("NPE", 1, "Number of Parallel Executions") configs.DEFINE_string( "search_algorithm", "genetic", "Algorithm for hyper-param optimization. Select from 'genetic', 'grid_search', 'doe' " ) configs.DEFINE_integer("generations", 5, "Number of generations for genetic algorithm") configs.DEFINE_integer("pop_size", 16, "Population size for genetic algorithm") configs.DEFINE_integer( "num_gpu", 1, "Number of GPU on the machine, Use 0 if there are None") configs.DEFINE_float("mutate_rate", 0.2, "Mutation rate for genetic algorithm") configs.DEFINE_string("objective", 'mse', "Select between mse or uq_loss") configs.DEFINE_string("init_pop", None, "Initial population to begin hyper param search") configs.DEFINE_boolean("save_latest_pop", False, "Save the latest population") configs.DEFINE_string('doe_file', None, 'Design of experiments csv file') configs.DEFINE_integer("decay_steps", 100000, "Number of training steps between decay steps") configs.DEFINE_string("initializer", 'GlorotUniform', 'variable initializers available in Keras') configs.DEFINE_boolean( "use_custom_init", True, 'Use RandomUniform initializer with init_scale values') configs.DEFINE_boolean( "aux_masking", False, 'Mask aux features of all time steps except the last one with 0') configs.DEFINE_integer("max_norm", None, "Max Norm for kernel constraint") configs.DEFINE_float("sgd_momentum", 0.0, "momentum for SGD optimizer") configs.DEFINE_float("end_learning_rate", 0.01, "end lr for polynomial decay") configs.DEFINE_float( 'decay_power', 0.5, 'power to decay the learning rate with for polynomial decay') configs.DEFINE_string('piecewise_lr_boundaries', None, 'boundaries for piecewise constant lr') configs.DEFINE_string('piecewise_lr_values', None, 'values for piecewise constant lr') configs.DEFINE_string('lr_schedule', 'ExponentialDecay', 'Learning rate scheduler') c = configs.ConfigValues() c.data_dir = os.path.join(_data_dir_path, c.data_dir) c.forecast_steps_weights = [ float(x) for x in c.forecast_steps_weights.split(',') ] return c
def get_configs(): """ Defines all configuration params passable to command line. """ configs.DEFINE_string("name", 'none', "A name for the config.") configs.DEFINE_string("datafile", 'open_dataset.dat', "a datafile name.") configs.DEFINE_string( "predict_datafile", None, "If predict_datafile is not None, use it instead of datafile for predictions" ) configs.DEFINE_string("mse_outfile", None, "A file to write mse values during predict phase.") configs.DEFINE_string("scalesfile", None, "Optional file for storing scaling params") configs.DEFINE_string( "mse_var_outfile", None, "A file to write mse_var values during predict phase.") configs.DEFINE_string("default_gpu", '', "The default GPU to use e.g., /gpu:0") configs.DEFINE_string("nn_type", 'DeepRnnModel', "Model type") configs.DEFINE_string("active_field", 'active', "Key column name header for active indicator") configs.DEFINE_string("date_field", 'date', "Name of data column.") configs.DEFINE_string("key_field", 'gvkey', "Key column name header in datafile") configs.DEFINE_string("target_field", 'oiadpq_ttm', "Target column name header in datafile") configs.DEFINE_string("scale_field", 'mrkcap', "Feature to scale inputs by") configs.DEFINE_string("financial_fields", '', "Shared input and target field names") configs.DEFINE_string("aux_fields", None, "non-target, input only fields") configs.DEFINE_string("dont_scale", None, "Names of fields to not scale") configs.DEFINE_string("data_dir", '', "The data directory") configs.DEFINE_string("model_dir", 'chkpts', "Model (checkpoint) directory") configs.DEFINE_string("rnn_cell", 'gru', "lstm or gru") configs.DEFINE_string("activation_fn", 'relu', "MLP activation function in tf.nn.*") configs.DEFINE_integer("num_inputs", -1, "") configs.DEFINE_integer("num_outputs", -1, "") configs.DEFINE_integer("target_idx", None, "") configs.DEFINE_integer("min_unrollings", None, "Min number of unrolling steps") configs.DEFINE_integer("max_unrollings", None, "Max number of unrolling steps") configs.DEFINE_integer("min_years", None, "Alt to min_unrollings") configs.DEFINE_integer("max_years", None, "Alt to max_unrollings") configs.DEFINE_integer("pls_years", None, "Alt to max_years. max_years = min_year+pls_years") # num_unrollings is being depricated, replaced with max_unrollings configs.DEFINE_integer("num_unrollings", 4, "Number of unrolling steps") configs.DEFINE_integer("stride", 12, "How many steps to skip per unrolling") configs.DEFINE_integer("forecast_n", 12, "How many steps to forecast into the future") configs.DEFINE_integer("batch_size", 1, "Size of each batch") configs.DEFINE_integer("num_layers", 1, "Numer of RNN layers") configs.DEFINE_integer("num_hidden", 10, "Number of hidden layer units") configs.DEFINE_float("training_noise", None, "Level of training noise as multiple of 1-stdev") configs.DEFINE_float("init_scale", 0.1, "Initial scale for weights") configs.DEFINE_float("max_grad_norm", 10.0, "Gradient clipping") configs.DEFINE_integer("start_date", None, "First date to train on as YYYYMM") configs.DEFINE_integer("end_date", None, "Last date to train on as YYYYMM") configs.DEFINE_integer("split_date", None, "Date to split train/test on.") configs.DEFINE_float("keep_prob", 1.0, "Keep probability for dropout") configs.DEFINE_boolean("train", True, "Train model otherwise inference only") configs.DEFINE_boolean("require_targets", False, "Require target values for test predictions") configs.DEFINE_boolean("input_dropout", False, "Do dropout on input layer") configs.DEFINE_boolean("hidden_dropout", False, "Do dropout on hidden layers") configs.DEFINE_boolean("rnn_dropout", False, "Do dropout on recurrent connections") configs.DEFINE_boolean( "skip_connections", False, "Have a linear fully connected weight skip hidden units in MLP") configs.DEFINE_boolean( "direct_connections", False, "Have direct connections between input and output in MLP") configs.DEFINE_boolean( "use_cache", True, "Load data for logreg from cache (vs processing from batch generator)") configs.DEFINE_boolean( "pretty_print_preds", False, "Print predictions in tabular format with inputs, targets, and keys") configs.DEFINE_boolean( "print_preds", False, "Print predictions with just date, gvkey and output values") configs.DEFINE_string( "df_dirname", None, "Saves dataframes for target, output, variance/variance, mse and mse_var in df_dirname" ) configs.DEFINE_boolean("scale_targets", True, "") configs.DEFINE_boolean( "backfill", False, "Backfill seq history to max_unrollings with data in first time step") configs.DEFINE_boolean( "log_squasher", True, "Squash large normalized inputs with natural log function") configs.DEFINE_boolean("ts_smoother", False, "Use smoother on data time series during training") configs.DEFINE_string("data_scaler", None, 'sklearn scaling algorithm or None if no scaling') configs.DEFINE_string("optimizer", 'GradientDescentOptimizer', 'Any tensorflow optimizer in tf.train') configs.DEFINE_string("optimizer_params", None, 'Additional optimizer params such as momentum') configs.DEFINE_float("learning_rate", 0.6, "The initial starting learning rate") configs.DEFINE_float("lr_decay", 0.9, "Learning rate decay") configs.DEFINE_float("validation_size", 0.0, "Size of validation set as %, ie. .3 = 30% of data") configs.DEFINE_float("train_until", 0.0, "Train until validation MSE is less than this value") configs.DEFINE_float("passes", 1.0, "Passes through day per epoch") configs.DEFINE_float("target_lambda", 0.5, "How much to weight last step vs. all steps in loss") configs.DEFINE_float("rnn_lambda", 0.5, "How much to weight last step vs. all steps in loss") configs.DEFINE_integer("max_epoch", 0, "Stop after max_epochs") configs.DEFINE_integer("early_stop", None, "Early stop parameter") configs.DEFINE_integer("seed", None, "Seed for deterministic training") configs.DEFINE_integer("cache_id", None, "A unique experiment key for traking a cahce") configs.DEFINE_float("keep_prob_pred", 1.0, "Keep Prob for dropout during prediction") configs.DEFINE_boolean( "print_normalized_outputs", False, "Print normalized outputs. Doesn't apply to pretty print") configs.DEFINE_boolean("UQ", False, "Uncertainty Quantification Mode") configs.DEFINE_string("UQ_model_type", 'MVE', "Select between MVE or PIE") configs.DEFINE_float( "noise_lambda", 1.0, "Weight decay for noise in the loss function. Refer to DeepBayesUQ Model" ) configs.DEFINE_float("l2_alpha", 0.0, "L2 regularization for weight parameters.") configs.DEFINE_float("picp_lambda", 1.0, "Contribution of PICP loss term for HQPI UQ model") configs.DEFINE_float( "smoothing_pi_check", 100, "Smoothing parameter for calculation of PI check in HQPI UQ model") configs.DEFINE_float( "confidence_alpha", 0.1, "Alpha used for calculating confidence level (= 1 - alpha)") configs.DEFINE_boolean("huber_loss", False, "Use huber loss instead of mse") configs.DEFINE_float("huber_delta", 1.0, "delta for huber loss") c = configs.ConfigValues() if c.min_unrollings is None: c.min_unrollings = c.num_unrollings if c.max_unrollings is None: c.max_unrollings = c.num_unrollings if c.min_years is not None: c.min_unrollings = c.min_years * (12 // c.stride) if c.max_years is not None: c.max_unrollings = (c.max_years) * (12 // c.stride) elif c.pls_years is None: c.max_unrollings = c.min_unrollings else: c.max_unrollings = (c.min_years + c.pls_years) * (12 // c.stride) # optimizer_params is a string of the form "param1=value1,param2=value2,..." # this maps it to dictionary { param1 : value1, param2 : value2, ...} if c.optimizer_params is None: c.optimizer_params = dict() else: args_list = [p.split('=') for p in c.optimizer_params.split(',')] params = dict() for p in args_list: params[p[0]] = float(p[1]) c.optimizer_params = params assert ('learning_rate' not in c.optimizer_params) return c
def get_configs(): """ Defines all configuration params passable to command line. """ configs.DEFINE_string("name", 'test', "A name for the config.") configs.DEFINE_string("datafile", None, "a datafile name.") configs.DEFINE_string("scalesfile", None, "Optional file for storing scaling params") configs.DEFINE_string("default_gpu", '/gpu:0', "The default GPU to use e.g., /gpu:0") configs.DEFINE_string("nn_type", 'RNNPointEstimate', "Model type") configs.DEFINE_string("active_field", 'active', "Key column name header for active indicator") configs.DEFINE_string("date_field", 'date', "Name of data column.") configs.DEFINE_string("key_field", 'gvkey', "Key column name header in datafile") configs.DEFINE_string("target_field", 'oiadpq_ttm', "Target column name header in datafile") configs.DEFINE_string("scale_field", 'mrkcap', "Feature to scale inputs by") configs.DEFINE_string("financial_fields", 'saleq_ttm-ltq_mrq', "Shared input and target field names") configs.DEFINE_string("aux_fields", 'rel_mom1m-rel_mom9m', "non-target, input only fields") configs.DEFINE_string("dont_scale_fields", None, "Names of fields to not scale") configs.DEFINE_string("data_dir", 'datasets', "The data directory") configs.DEFINE_string("model_dir", 'test-model', "Model directory") configs.DEFINE_string("experiments_dir", './', "Experiments directory") configs.DEFINE_string("rnn_cell", 'lstm', "lstm or gru") configs.DEFINE_string("activation_fn", 'relu', "MLP activation function in tf.nn.*") configs.DEFINE_integer("num_inputs", -1, "") configs.DEFINE_integer("num_outputs", -1, "") configs.DEFINE_integer("target_idx", None, "") configs.DEFINE_integer("min_unrollings", 5, "Min number of unrolling steps") configs.DEFINE_integer("max_unrollings", 5, "Max number of unrolling steps") configs.DEFINE_integer("min_years", None, "Alt to min_unrollings") configs.DEFINE_integer("max_years", None, "Alt to max_unrollings") configs.DEFINE_integer("pls_years", None, "Alt to max_years. max_years = min_year+pls_years") configs.DEFINE_integer("stride", 12, "How many steps to skip per unrolling") configs.DEFINE_integer("batch_size", 256, "Size of each batch") configs.DEFINE_integer("num_layers", 2, "Numer of RNN layers") configs.DEFINE_integer("forecast_n", 12, "How many steps to forecast into the future") configs.DEFINE_integer("num_hidden", 64, "Number of hidden layer units") configs.DEFINE_float("init_scale", 1.0, "Initial scale for weights") configs.DEFINE_float("max_grad_norm", 50.0, "Gradient clipping") configs.DEFINE_integer("start_date", 197501, "First date to train on as YYYYMM") configs.DEFINE_integer("end_date", 199912, "Last date to train on as YYYYMM") configs.DEFINE_integer("split_date", None, "Date to split train/test on.") configs.DEFINE_boolean("train", True, "Train model otherwise inference only") configs.DEFINE_float("dropout", 0.0, "Dropout rate for hidden layers") configs.DEFINE_float("recurrent_dropout", 0.0, "Dropout rate for recurrent connections") configs.DEFINE_boolean( "log_squasher", True, "Squash large normalized inputs with natural log function") configs.DEFINE_string("data_scaler", 'RobustScaler', 'sklearn scaling algorithm or None if no scaling') configs.DEFINE_string("optimizer", 'Adadelta', 'Any tensorflow optimizer in tf.train') configs.DEFINE_float("learning_rate", 0.6, "The initial starting learning rate") configs.DEFINE_float("lr_decay", 1.0, "Learning rate decay for exponential decay") configs.DEFINE_float("validation_size", 0.3, "Size of validation set as %, ie. 0.3 = 30% of data") configs.DEFINE_float("target_lambda", 0.5, "How much to weight last step vs. all steps in loss") configs.DEFINE_float("rnn_lambda", 0.7, "How much to weight last step vs. all steps in loss") configs.DEFINE_integer("max_epoch", 1, "Stop after max_epochs") configs.DEFINE_integer("early_stop", 1, "Early stop parameter") configs.DEFINE_integer("seed", 521, "Seed for deterministic training") configs.DEFINE_boolean("UQ", False, "Uncertainty Quantification Mode") configs.DEFINE_float("l2_alpha", 0.0, "L2 regularization for weight parameters.") configs.DEFINE_float("recurrent_l2_alpha", 0.0, "L2 regularization for recurrent weight parameters.") configs.DEFINE_boolean("huber_loss", False, "Use huber loss instead of mse") configs.DEFINE_float("huber_delta", 1.0, "delta for huber loss") configs.DEFINE_integer("forecast_steps", 1, "How many future predictions need to me made") configs.DEFINE_string('forecast_steps_weights', '1.0', 'weights for the forecast steps') configs.DEFINE_integer( "logging_interval", 100, "Number of batches for logging interval during training") configs.DEFINE_boolean("write_inp_to_out_file", True, "Write input sequence to the output files") configs.DEFINE_string( "training_type", 'fixed_dates', 'Choose between "fixed_dates" and "iterative" training') configs.DEFINE_integer("NPE", 1, "Number of Parallel Executions") configs.DEFINE_integer("num_procs", 1, "Total number of training/prediction processes") configs.DEFINE_integer("num_gpu", 1, "NUmber of GPUs") configs.DEFINE_boolean('load_saved_weights', False, 'Load weights saved in the checkpoint directory') configs.DEFINE_integer( "epoch_logging_interval", 1, "Number of batches for logging interval during training") configs.DEFINE_integer("decay_steps", 1500, "Number of training steps between decay steps") configs.DEFINE_string("initializer", 'GlorotUniform', 'variable initializers available in Keras') configs.DEFINE_boolean( "use_custom_init", True, 'Use RandomUniform initializer with init_scale values') configs.DEFINE_boolean( "aux_masking", False, 'Mask aux features of all time steps except the last one with 0') configs.DEFINE_integer("max_norm", 3, "Max Norm for kernel constraint") configs.DEFINE_float("sgd_momentum", 0.0, "momentum for SGD optimizer") configs.DEFINE_float("end_learning_rate", 0.01, "end lr for polynomial decay") configs.DEFINE_float( 'decay_power', 0.5, 'power to decay the learning rate with for polynomial decay') configs.DEFINE_string('piecewise_lr_boundaries', '4000-5500-5500', 'boundaries for piecewise constant lr') configs.DEFINE_string('piecewise_lr_values', '0.5-0.1-0.05-0.1', 'values for piecewise constant lr') configs.DEFINE_string('lr_schedule', 'ExponentialDecay', 'Learning rate scheduler') configs.DEFINE_string('preds_fname', 'preds.dat', 'Name of the prediction file') configs.DEFINE_integer("member_id", 1, "Id of member in a population") configs.DEFINE_boolean("cdrs_inference", False, 'If the execution is for inference on CDRS data') configs.DEFINE_string('cdrs_src_fname', 'cdrs-src.dat', 'Filename of the CDRS source file') configs.DEFINE_string('cdrs_ml_fname', 'cdrs-ml-data.dat', 'Filename of the CDRS ML data file') configs.DEFINE_string('model_ranking_fname', './model-ranking.dat', 'Model Ranking File Name') configs.DEFINE_string('model_ranking_factor', 'pred_var_entval', 'Model ranking factor') configs.DEFINE_string("cdrs_inference_date", None, "CDRS Inference date. Format: '%Y-%m-%d' ") c = configs.ConfigValues() if c.min_unrollings is None: c.min_unrollings = c.num_unrollings if c.max_unrollings is None: c.max_unrollings = c.num_unrollings if c.min_years is not None: c.min_unrollings = c.min_years * (12 // c.stride) if c.max_years is not None: c.max_unrollings = (c.max_years) * (12 // c.stride) elif c.pls_years is None: c.max_unrollings = c.min_unrollings else: c.max_unrollings = (c.min_years + c.pls_years) * (12 // c.stride) c.forecast_steps_weights = [ float(x) for x in c.forecast_steps_weights.split('-') ] c.piecewise_lr_boundaries = [ float(x) for x in c.piecewise_lr_boundaries.split('-') ] c.piecewise_lr_values = [ float(x) for x in c.piecewise_lr_values.split('-') ] return c