def parse_args(): def path(str): return os.path.abspath((os.path.expanduser(str))) parser = HyperOptArgumentParser() parser.add_argument("--dataset", type=str, default="trec_web.1-200.asrc") parser.add_argument("--asrc-path", type=path, default=None) parser.add_argument("--log-path", type=path, default="robustness_log") parser.add_argument("--test", action='store_true') parser.add_argument("--fp16", action='store_true') parser.add_argument("--gpu-num", type=int, default=1) parser.add_argument("--model", type=str, choices=["bert", "mp", "conv_knrm", "all"], default="all") parser.add_argument("--exp", type=str, default="weight_decay", choices=["dropout", "weight_decay"]) parser.add_argument("--saved-preprocessor", type=path, default="preprocessor") parser.opt_list("--weight-decay", type=float, tunable=True, options=[ 0.0001, 0.001, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.12, 0.14, 0.16, 0.18, 0.20 ]) args = parser.parse_args() return args
def add_model_specific_args( parser: HyperOptArgumentParser, ) -> HyperOptArgumentParser: parser = super(TransformerTagger, TransformerTagger).add_model_specific_args(parser) # Parameters for the Encoder model parser.add_argument( "--encoder_model", default="RoBERTa", type=str, help="Encoder model to be used.", choices=["BERT", "RoBERTa", "XLM-RoBERTa"], ) parser.add_argument( "--pretrained_model", default="roberta.base", type=str, help=("Encoder pretrained model to be used. " "(e.g. roberta.base or roberta.large"), ) parser.add_argument( "--encoder_learning_rate", default=1e-05, type=float, help="Encoder specific learning rate.", ) parser.opt_list( "--dropout", default=0.1, type=float, help="Dropout to be applied in feed forward net on top.", tunable=True, options=[0.1, 0.2, 0.3, 0.4, 0.5], ) parser.add_argument( "--layer", default="-1", type=str, help= ("Encoder model layer to be used. Last one is the default. " "If 'mix' all the encoder layer's will be combined with layer-wise attention" ), ) parser.opt_list( "--scalar_mix_dropout", default=0.0, type=float, tunable=False, options=[0.0, 0.05, 0.1, 0.15, 0.2], help= ("The ammount of layer wise dropout when using scalar_mix option for layer pooling. " "Only applicable if the 'layer' parameters is set to mix"), ) parser.add_argument( "--concat_tokens", default=False, help= "Apply concatenation of consecutive words to feed to the linear projection", action="store_true", ) return parser
def add_model_specific_args(parent_parser): parser = HyperOptArgumentParser(parents=[parent_parser], add_help=False) # want to optimize this parameter #parser.opt_list('--batch_size', type=int, default=16, options=[16, 10, 8], tunable=False) parser.opt_list('--learning_rate', type=float, default=1e-3, options=[1e-3, 1e-4, 1e-5], tunable=False) parser.opt_list('--unfreeze_epoch_no', type=int, default=0, options=[0, 20], tunable=True) #parser.opt_list('--loss_fn', type=str, default='mse', options=['mse', 'bce'], tunable=True) parser.add_argument('--batch_size', type=int, default=16) # fixed arguments parser.add_argument('--link', type=str, default='/scratch/ab8690/DLSP20Dataset/data') #parser.add_argument('--pretrained_path', type=str, default='/scratch/ab8690/logs/dd_pretrain_ae/lightning_logs/version_9234267/checkpoints/epoch=42.ckpt') parser.add_argument( '--pretrained_path', type=str, default= '/scratch/ab8690/logs/space_bb_pretrain/lightning_logs/version_9604234/checkpoints/epoch=23.ckpt' ) parser.add_argument('--output_img_freq', type=int, default=500) return parser
def add_model_specific_args(parent_parser, root_dir): # pragma: no cover """ Parameters you define here will be available to your model through self.hparams :param parent_parser: :param root_dir: :return: """ parser = HyperOptArgumentParser( strategy=parent_parser.strategy, parents=[parent_parser] ) parser.add_argument("--beta", default=1, type=float) parser.add_argument("--bottleneck_size", default=10, type=int) parser.add_argument("--group_distance_scaling", default=1, type=float) parser.add_argument("--audio_size", default=80, type=int) parser.add_argument("--data_dim", default=140, type=int) parser.add_argument("--data_files", default="datafiles.json", type=str) parser.add_argument("--data_dir", default="/data_dir", type=str) parser.opt_list( "--batch_size", default=256 * 4, type=int, options=[32, 64, 128, 256], tunable=False, help="batch size will be divided over all gpus being used across all nodes", ) parser.opt_list( "--learning_rate", default=0.001 * 8, type=float, options=[0.0001, 0.0005, 0.001], tunable=True, ) return parser
def add_model_specific_args(parent_parser): parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) # dataset params parser.add_argument('--data_dir', default=None) parser.add_argument('--dataset', default=None) parser.add_argument('--n_sample', default=-1, type=float) parser.add_argument('--n_aug', default=-1, type=int) parser.add_argument('--seed', default=-1, type=int) parser.add_argument('--max_length', default=128, type=int) # training specific parser.add_argument('--max_epochs', default=5, type=int) parser.opt_list('--learning_rate', default=1e-3, type=float, options=[1e-3, 5e-3, 1e-4], tunable=True) parser.opt_list('--batch_size', default=32, type=int, options=[16, 32, 64], tunable=True) # model parser.add_argument('--pretrained_model', default='bert-base-uncased', type=str) parser.add_argument('--dropout', default=0.1, type=float) return parser
def add_model_specific_args(parent_parser, root_dir): parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) # network params parser.add_argument('--h_dim', default=32, type=int) parser.add_argument('--truncation', default=64, type=int) parser.add_argument('--truncation_offset', default=1, type=int) parser.add_argument('--ratio', default=None, type=float, help="ratio for early prediction") # training params (opt) parser.opt_list('--threhold', default=0.5, type=float, options=[0.3, 0.4, 0.5, 0.6], tunable=False) parser.opt_list('--learning_rate', default=5e-5, type=float, options=[0.0001, 0.0005, 0.001, 0.005], tunable=False) parser.opt_list('--optimizer_name', default='adam', type=str, options=['adam'], tunable=False) parser.opt_list('--batch_size', default=8, type=int) return parser
def add_model_specific_args(parent_parser, root_dir): # pragma: no cover """ Parameters you define here will be available to your model through self.hparams :param parent_parser: :param root_dir: :return: """ parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) # param overwrites # parser.set_defaults(gradient_clip_val=5.0) # network params parser.opt_list('--drop_prob', default=0.2, options=[0.2, 0.5], type=float, tunable=False) parser.add_argument('--in_features', default=28 * 28, type=int) parser.add_argument('--out_features', default=10, type=int) # use 500 for CPU, 50000 for GPU to see speed difference parser.add_argument('--hidden_dim', default=50000, type=int) # data parser.add_argument('--data_root', default=os.path.join(root_dir, 'mnist'), type=str) # training params (opt) parser.opt_list('--learning_rate', default=0.001 * 8, type=float, options=[0.0001, 0.0005, 0.001, 0.005], tunable=False) parser.opt_list('--optimizer_name', default='adam', type=str, options=['adam'], tunable=False) # if using 2 nodes with 4 gpus each the batch size here # (256) will be 256 / (2*8) = 16 per gpu parser.opt_list('--batch_size', default=256 * 8, type=int, options=[32, 64, 128, 256], tunable=False, help='batch size will be divided over all GPUs being used across all nodes') return parser
def main(): parser = HyperOptArgumentParser( description='Train a PyTorch Lightning model on Yest dataset', strategy='random_search' ) parser.opt_list('--nb_layers', default=2, type=int, tunable=False, options=[2, 4, 8]) parser.opt_range('--layer_size', default=20, type=int, tunable=False, low=10, high=200, nb_samples=10, help="size of the hidden layer") parser.add_argument('--model', default="model.ptl", help="path to save the model") parser.add_argument('--train', default="yeast_train.svm", help="path to the training data") parser.add_argument('--val', default="yeast_test.svm", help="path to the training data") hparams = parser.parse_args() hparams.optimize_parallel_cpu(train_main, nb_trials=20, nb_workers=8)
def add_optim_specific_args( parser: HyperOptArgumentParser, ) -> HyperOptArgumentParser: """ Functions that parses Optimizer specific arguments and adds them to the Namespace :param parser: """ parser.opt_list( "--learning_rate", default=5e-5, type=float, tunable=True, options=[1e-05, 3e-05, 5e-05, 8e-05, 1e-04], help="Optimizer learning rate.", ) return parser
def add_model_specific_args(parent_parser): parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) # dataset params parser.add_argument('--data_dir', default=None) parser.add_argument('--word_vec_dir', default=None) parser.add_argument('--dataset', default=None) parser.add_argument('--n_sample', default=-1, type=float) parser.add_argument('--n_aug', default=-1, type=int) parser.add_argument('--pwe', default=0, type=int) # pretrained word embedding # model params parser.add_argument('--embed_dim', default=200, type=int) parser.opt_list('--hidden_dim', default=64, type=int, options=[64, 128, 256], tunable=True) parser.add_argument('--n_layers', default=1, type=int) parser.opt_list('--dropout', default=0.1, type=float, options=[0.1, 0.2, 0.3, 0.5], tunable=True) # training params parser.add_argument('--max_epochs', default=5, type=int) parser.opt_list('--learning_rate', default=4.0, type=float, options=[0.05, 0.5, 5], tunable=True) parser.opt_list('--scheduler_gamma', default=0.9, type=float, options=[0.9, 0.5, 0.1], tunable=True) parser.opt_list('--batch_size', default=32, type=int, options=[16, 32, 64], tunable=True) parser.add_argument('--seed', default=-1, type=int) return parser
def add_model_specific_args(parent_parser): parser = HyperOptArgumentParser(parents=[parent_parser], add_help=False) # want to optimize this parameter #parser.opt_list('--batch_size', type=int, default=16, options=[16, 10, 8], tunable=False) parser.opt_list('--learning_rate', type=float, default=0.001, options=[1e-3, 1e-4, 1e-5], tunable=True) parser.add_argument('--batch_size', type=int, default=10) # fixed arguments parser.add_argument('--output_img_freq', type=int, default=100) parser.add_argument('--unfreeze_epoch_no', type=int, default=0) parser.add_argument('--mse_loss', default=False, action='store_true') return parser
def add_model_specific_args(parent_parser, root_dir): """ Specify the hyperparams for this LightningModule """ # MODEL specific parser = HyperOptArgumentParser(strategy='grid_search', parents=[parent_parser]) parser.add_argument('--learning_rate', default=0.01, type=float) parser.add_argument('--batch_size', default=128, type=int) # parser.add_argument('--split_seed', default=123, type=int) parser.opt_list('--split_seed', default=1, type=int, tunable=True, options=list(1000 + np.arange(20).astype(np.int32))) # options=list(1000+np.arange(2).astype(np.int32))) parser.add_argument('--data_root', default=Path(root_dir) / 'data', type=Path) parser.add_argument('--monitor_preds', action='store_true', default=False, help='export histograms of preds') parser.add_argument('--trn_subsample_pct', default=1.0, type=float, help='subsample percentage of training data') parser.opt_list( '--trn_nb', default=0, type=int, help='number of training samples (0 = take all samples)', tunable=True, options=[2000, 1500, 1000, 500]) # options=[750, 500]) # parser.add_argument('--trn_nb', default=0, type=int, help='number of training samples (0 = take all samples)') # training specific (for this model) parser.add_argument('--max_nb_epochs', default=200, type=int) return parser
def add_model_specific_args(parent_parser, root_dir): """ Parameters you define here will be available to your model through self.hparams :param parent_parser: :param root_dir: :return: """ parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) # param overwrites # parser.set_defaults(gradient_clip=5.0) # network params parser.opt_list('--drop_prob', default=0.2, options=[0.2, 0.5], type=float, tunable=False) parser.add_argument('--in_features', default=28 * 28) parser.add_argument('--out_features', default=10) parser.add_argument( '--hidden_dim', default=50000 ) # use 500 for CPU, 50000 for GPU to see speed difference # data parser.add_argument('--data_root', default=os.path.join(root_dir, 'mnist'), type=str) # training params (opt) parser.opt_list('--learning_rate', default=0.001, type=float, options=[0.0001, 0.0005, 0.001, 0.005], tunable=False) parser.opt_list('--batch_size', default=256, type=int, options=[32, 64, 128, 256], tunable=False) parser.opt_list('--optimizer_name', default='adam', type=str, options=['adam'], tunable=False) return parser
def add_model_specific_args(parent_parser): parser = HyperOptArgumentParser(parents=[parent_parser], add_help=False) parser.opt_list( '--hidden_dim', type=int, default=256, options=[256, 128], tunable=False, help= 'itermediate layers dimension before embedding for default encoder/decoder' ) parser.opt_list('--latent_dim', type=int, default=128, options=[64, 128], tunable=True, help='dimension of latent variables z') parser.opt_list('--learning_rate', type=float, default=0.001, options=[1e-3, 1e-4, 1e-5], tunable=True) parser.opt_list('--batch_size', type=int, default=16, options=[16], tunable=False) # fixed parameters parser.add_argument('--input_width', type=int, default=306 * 6, help='input image width') parser.add_argument('--input_height', type=int, default=256) parser.add_argument('--output_width', type=int, default=306) parser.add_argument('--output_height', type=int, default=256) parser.add_argument('--in_channels', type=int, default=3) parser.add_argument('--link', type=str, default='/scratch/ab8690/DLSP20Dataset/data') #parser.add_argument('--link', type=str, default='/Users/annika/Developer/driving-dirty/data') parser.add_argument('--output_img_freq', type=int, default=500) return parser
def add_model_specific_args(parent_parser): parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) # param overwrites # parser.set_defaults(gradient_clip=5.0) # network params parser.opt_list('--drop_prob', default=0.2, options=[0.2, 0.5], type=float, tunable=False) parser.add_argument('--in_features', default=28 * 28) parser.add_argument('--hidden_dim', default=500) parser.add_argument('--out_features', default=10) # data parser.add_argument( '--data_root', default= '/Users/williamfalcon/Developer/personal/research_lib/research_proj/datasets/mnist', type=str) # training params (opt) parser.opt_list('--learning_rate', default=0.001, type=float, options=[0.0001, 0.0005, 0.001, 0.005], tunable=False) parser.opt_list('--batch_size', default=256, type=int, options=[32, 64, 128, 256], tunable=False) parser.opt_list('--optimizer_name', default='adam', type=str, options=['adam'], tunable=False) return parser
def add_model_specific_args(parent_parser): parser = HyperOptArgumentParser(parents=[parent_parser], add_help=False) # want to optimize this parameter #parser.opt_list('--batch_size', type=int, default=16, options=[16, 10, 8], tunable=False) parser.opt_list('--learning_rate', type=float, default=0.001, options=[1e-3, 1e-4, 1e-5], tunable=True) parser.opt_list('--batch_size', type=int, default=6, options=[4, 6], tunable=False) parser.opt_list('--unfreeze_epoch_no', type=int, default=10, options=[0, 10], tunable=True) #parser.add_argument('--batch_size', type=int, default=4) # fixed arguments parser.add_argument('--link', type=str, default='/scratch/ab8690/DLSP20Dataset/data') parser.add_argument( '--pretrained_path', type=str, default= '/scratch/ab8690/logs/space_bb_pretrain/lightning_logs/version_9604234/checkpoints/epoch=23.ckpt' ) parser.add_argument('--output_img_freq', type=int, default=100) parser.add_argument('--debug', default=False, action='store_true') parser.add_argument('--mse_loss', default=False, action='store_true') return parser
def add_model_specific_args(parent_parser, root_dir): # pragma: no cover """ Parameters you define here will be available to your model through self.hparams """ parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) # param overwrites # parser.set_defaults(gradient_clip=5.0) # network params parser.opt_list('--local', default=3, options=[3, 5, 7], type=int, tunable=True) parser.opt_list('--n_kernels', default=32, options=[32, 50, 100], type=int, tunable=True) parser.add_argument('-w_kernel', type=int, default=1) parser.opt_list('--d_model', type=int, default=512, options=[512], tunable=False) parser.opt_list('--d_inner', type=int, default=2048, options=[2048], tunable=False) parser.opt_list('--d_k', type=int, default=64, options=[64], tunable=False) parser.opt_list('--d_v', type=int, default=64, options=[64], tunable=False) parser.opt_list('--n_head', type=int, default=8, options=[8], tunable=False) parser.opt_list('--n_layers', type=int, default=6, options=[6], tunable=False) parser.opt_list('--drop_prob', type=float, default=0.1, options=[0.1, 0.2, 0.5], tunable=False) # arguments from dataset parser.add_argument('--data_name', type=str) parser.add_argument('--data_dir', default='.\data', type=str) parser.add_argument('--n_multiv', type=int) parser.opt_list('--window', default=64, type=int, options=[32, 64, 128], tunable=True) parser.opt_list('--horizon', default=3, type=int, options=[3, 6, 12, 24], tunable=True) # training params (opt) parser.opt_list('--learning_rate', default=0.005, type=float, options=[0.0001, 0.0005, 0.001, 0.005, 0.008], tunable=True) parser.opt_list('--optimizer_name', default='adam', type=str, options=['adam'], tunable=False) parser.opt_list('--criterion', default='mse_loss', type=str, options=['l1_loss', 'mse_loss'], tunable=False) # if using 2 nodes with 4 gpus each the batch size here (256) will be 256 / (2*8) = 16 per gpu parser.opt_list( '--batch_size', default=16, type=int, options=[16, 32, 64, 128, 256], tunable=False, help= 'batch size will be divided over all the gpus being used across all nodes' ) return parser
# define tensorflow graph x = tf.placeholder(dtype=tf.int32, name='x') y = tf.placeholder(dtype=tf.int32, name='y') out = x * y sess = tf.Session() # Run the tf op for train_step in range(0, 100): output = sess.run(out, feed_dict={x: hparams.x_val, y: hparams.y_val}) exp.log({'fake_err': output}) # save exp when we're done exp.save() # set up our argparser and make the y_val tunable parser = HyperOptArgumentParser(strategy='random_search') parser.add_argument('--test_tube_exp_name', default='my_test') parser.add_argument('--log_path', default='/Users/waf/Desktop/test') parser.opt_list('--y_val', default=12, options=[1, 2, 3, 4], tunable=True) parser.opt_list('--x_val', default=12, options=[20, 12, 30, 45], tunable=True) hyperparams = parser.parse_args() # optimize on 4 gpus at the same time # each gpu will get 1 experiment with a set of hyperparams hyperparams.optimize_parallel_gpu(train, gpu_ids=['1', '0', '3', '2'], nb_trials=4, nb_workers=4)
def add_model_specific_args(parent_parser, root_dir): # pragma: no cover parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) parser.set_defaults(device=torch.device( 'cuda' if torch.cuda.is_available() else 'cpu')) # network params parser.opt_list('--gcn_mid_dim', default=256, type=int, options=[128, 256, 512, 1024], tunable=True) parser.opt_list('--gcn_output_dim', default=256, type=int, options=[128, 256, 512, 1024], tunable=True) parser.opt_list('--txtcnn_drop_prob', default=0.0, options=[0.0, 0.1, 0.2], type=float, tunable=True) parser.opt_list('--gcn_drop_prob', default=0.5, options=[0.2, 0.5], type=float, tunable=True) parser.opt_list('--warploss_margin', default=0.4, type=float, tunable=True) parser.opt_list('--freeze_embeddings', default=True, options=[True, False], type=lambda x: (str(x).lower() == 'true'), tunable=True) parser.opt_list('--txtcnn_pfilter_num1', default=64, options=[16, 32, 64, 128], type=int, tunable=True) parser.opt_list('--txtcnn_pfilter_num2', default=64, options=[16, 32, 64, 128], type=int, tunable=True) parser.opt_list('--txtcnn_pfilter_num3', default=64, options=[16, 32, 64, 128], type=int, tunable=True) parser.opt_list('--txtcnn_pfilter_num4', default=64, options=[16, 32, 64, 128], type=int, tunable=True) parser.opt_list('--txtcnn_rfilter_num1', default=64, options=[16, 32, 64, 128], type=int, tunable=True) parser.opt_list('--txtcnn_rfilter_num2', default=32, options=[16, 32, 64, 128], type=int, tunable=True) # data parser.add_argument('--data_root', default=os.path.join(root_dir, 'data'), type=str) parser.add_argument('--top_t', default=6, type=int) parser.add_argument('--total_onehop', default=20, type=int) parser.add_argument('--total', default=50, type=int) parser.add_argument('--shuffle', default=True, type=lambda x: (str(x).lower() == 'true')) parser.add_argument('--train_div', default=1.0, type=float) # training params (opt) parser.opt_list('--batch_size', default=64, options=[32, 64, 128, 256], type=int, tunable=False) parser.opt_list('--max_nb_epochs', default=8, options=[256, 512, 1024], type=int, tunable=False) parser.opt_list('--learning_rate', default=0.0005, options=[0.0001, 0.0005, 0.001], type=float, tunable=True) parser.opt_list('--weight_decay', default=0.001, options=[0.0001, 0.0005, 0.001], type=float, tunable=True) parser.add_argument('--model_save_path', default=os.path.join(root_dir, 'experiment'), type=str) return parser
nb_samples=100, low=.0001, high=.001) parser.opt_range('--l2lam_init', action='store', type=float, dest='l2lam_init', default=.001, tunable=False, low=.0001, high=100, help='initial l2 regularization') parser.opt_list('--solver', action='store', dest='solver', type=str, tunable=False, options=['sgd', 'adam'], help='optimizer/solver ("adam", "sgd")', default="sgd") parser.opt_range('--cg_max_iter', action='store', dest='cg_max_iter', type=int, tunable=False, low=1, high=20, help='max number of conjgrad iterations', default=10) parser.opt_range('--batch_size', action='store', dest='batch_size',
def add_model_specific_args(parent_parser): parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser], add_help=False) parser.opt_list('--q_drop', default=0.35, options=[0.25, 0.3, 0.35, 0.15, 0.4, 0.1], type=float, tunable=False) parser.add_argument('--q_hid_dim', default=1024, type=int) parser.add_argument('--padding_idx', type=int, default=0) parser.add_argument('--net_init', default='k_u') parser.opt_list('--iter_num', default=3, options=[1, 2, 3, 4, 5], type=int) parser.add_argument('--g_init_method', default='full', type=str) parser.add_argument('--stem_in_dim', default=2048, type=int) parser.add_argument('--stem_out_dim', default=1024, type=int) parser.opt_list('--stem_norm', default='custom', options=['custom', 'layer', 'weight'], type=str, tunable=False) parser.opt_list('--stem_orders', default='dlna', options=['dlna', 'dnla'], type=str, tunable=False) parser.opt_list('--stem_drop', default=0.35, options=[0.3, 0.35, 0.4], type=int) parser.opt_list('--stem_method', default='linear', options=['linear', 'film', 'double_linear'], type=str) parser.add_argument('--stem_use_act', action='store_true') parser.opt_list('--e_f_norm', default='weight', options=['custom', 'layer', 'weight'], type=str, tunable=False) parser.opt_list('--e_f_orders', default='dlna', options=['dlna', 'dnla'], type=str, tunable=False) parser.opt_list('--e_f_drop', default=0.35, options=[0.3, 0.35, 0.4], type=int) parser.add_argument('--e_dim', default=512, type=int) parser.opt_list('--e_f_method', default='mul_film', options=['mul_film', 'cat_film'], type=str) parser.add_argument('--e_f_use_nGeo', action='store_true', default=True) parser.opt_list('--e_w_method', default='linear_softmax_8', options=['linear_softmax_10', 'linear_softmax_12'], type=str) parser.opt_list('--e_w_norm', default='weight', options=['custom', 'layer', 'weight'], type=str, tunable=False) parser.opt_list('--e_w_orders', default='dlna', options=['dlna', 'dnla'], type=str, tunable=False) parser.opt_list('--e_w_drop', default=0.35, options=[0.3, 0.35, 0.4], type=int) parser.opt_list('--e_p_method', default='linear', options=['linear', 'share'], type=str) parser.opt_list('--e_p_norm', default='weight', options=['custom', 'layer', 'weight'], type=str, tunable=False) parser.opt_list('--e_p_orders', default='dlna', options=['dlna', 'dnla'], type=str, tunable=False) parser.opt_list('--e_p_drop', default=0.35, options=[0.3, 0.35, 0.4], type=int) parser.opt_list('--e_p_act', default='relu', options=['relu', 'swish', 'elu'], type=str, tunable=False) parser.opt_list('--n_f_method', default='film', options=['linear', 'film'], type=str) parser.opt_list('--n_f_drop', default=0.35, options=[0.3, 0.35, 0.4], type=int) parser.opt_list('--n_geo_method', default='cat', options=['cat', 'sum', 'linear_cat'], type=str) parser.add_argument('--n_geo_reuse', action='store_true', default=True) parser.add_argument('--n_geo_dim', default=64, type=int) parser.add_argument('--n_geo_out_dim', default=64, type=int) parser.opt_list('--n_geo_norm', default='weight', options=['custom', 'layer', 'weight'], type=str, tunable=False) parser.opt_list('--n_geo_orders', default='lna', options=['lna', 'nla'], type=str, tunable=False) parser.opt_list('--e_geo_method', default='linear', options=['linear', 'cat'], type=str) parser.add_argument('--e_geo_dim', default=128, type=int) parser.add_argument('--e_geo_out_dim', default=128, type=int) parser.add_argument('--e_geo_reuse', action='store_true', default=True) parser.add_argument('--e_geo_aug', action='store_true') parser.opt_list('--e_geo_norm', default='weight', options=['custom', 'layer', 'weight'], type=str, tunable=False) parser.opt_list('--e_geo_orders', default='lna', options=['lna', 'nla'], type=str, tunable=False) parser.opt_list('--cls_method', default='linear', options=['linear'], type=str) parser.opt_list('--cls_norm', default='weight', options=['custom', 'layer', 'weight'], type=str, tunable=False) parser.opt_list('--cls_orders', default='dlna', options=['dlna', 'dnla'], type=str, tunable=False) parser.opt_list('--cls_drop', default=0.35, options=[0.3, 0.35, 0.4], type=int) parser.opt_list('--cls_act', default='relu', options=['relu', 'swish'], type=str, tunable=False) parser.opt_list('--f_c_norm', type=str, default='weight', options=['weight', 'custom', 'layer']) parser.opt_list('--f_c_drop', default=0., options=[0.3, 0.35, 0.4], type=int) parser.opt_list('--f_c_orders', default='dln', options=['dln', 'dnl'], type=str, tunable=False) parser.opt_list('--f_x_norm', default='layer', options=['custom', 'layer', 'weight'], type=str, tunable=False) parser.opt_list('--f_x_orders', default='dln', options=['dln', 'dnl'], type=str, tunable=False) parser.add_argument('--f_x_norm_affine', action='store_true') parser.opt_list('--f_act', default='relu', options=['relu', 'swish', 'elu'], type=str, tunable=False) parser.opt_list('--pool_method', default='mix', options=['mix', 'mean', 'max'], type=str) parser.opt_list('--lr', default=2.5e-4, type=float, options=[2.5e-4, 3.5e-4], tunable=False) parser.opt_list('--sched_factor', default=0.5, type=float, options=[0.1, 0.8, 0.6, 0.3], tunable=False) parser.opt_list('--optimizer', type=str, default='adam', options=['adam', 'sgd']) parser.opt_list('--sched', type=str, default='plat', options=['plat', 'cyclic', 'sgdr']) parser.opt_list('--dataset', default='vqa2_cp', type=str, options=['vaq2_cp', 'gqa_lcgn']) parser.add_argument('--epochs', default=12, type=int) parser.add_argument('--batch_size', default=64, type=int) parser.add_argument('--num_workers', default=5, type=int) parser.add_argument('--grad_clip', default=0., type=float) parser.add_argument('--weight_decay', default=0., type=float) parser.add_argument('--use_warmup', action='store_true') return parser
def add_model_specific_args(parent_parser, root_dir): """ parameters defined here will be available to the model through self.hparams """ # fmt: off parser = HyperOptArgumentParser(parents=[parent_parser]) parser.opt_list('--bs', default=64, type=int, options=[32, 128, 256], tunable=True, help='mini-batch size (default: 256), this is the total batch size of all GPUs' 'on the current node when using Data Parallel or Distributed Data Parallel') parser.opt_list('--projection_size', default=256, type=int, options=[32, 128, 512], tunable=True, help='sentence embedding size and hidden size for the second transformer') parser.opt_list('--n_layers', default=1, type=int, options=[2, 4, 6], tunable=True, help='number of encoder layers for the second transformer') parser.opt_list('--frozen_epochs', default=2, type=int, options=[3, 6, 9], tunable=True, help='number of epochs the pretrained DistilBert is frozen') parser.opt_range('--lr', default=2.0e-5, type=float, tunable=True, low=1.0e-5, high=5.0e-4, nb_samples=5, help='initial learning rate') parser.opt_list('--layerwise_decay', default=0.95, type=float, options=[0.3, 0.6, 0.8], tunable=True, help='layerwise decay factor for the learning rate of the pretrained DistilBert') parser.opt_list('--max_seq_len', default=32, type=int, options=[16, 64], tunable=False, help='maximal number of input tokens for the DistilBert model') parser.opt_list('--dropout', default=0.1, type=float, options=[0.1, 0.2], tunable=False) parser.add_argument('--train_file', default=os.path.join(root_dir, 'data/clean_train.txt'), type=str) parser.add_argument('--val_file', default=os.path.join(root_dir, 'data/clean_val.txt'), type=str) parser.add_argument('--test_file', default=os.path.join(root_dir, 'data/clean_test.txt'), type=str) parser.add_argument('--epochs', default=3, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('--seed', type=int, default=None, help='seed for initializing training') # fmt: on return parser
def add_model_specific_args( cls, parser: HyperOptArgumentParser ) -> HyperOptArgumentParser: """ Parser for Estimator specific arguments/hyperparameters. :param parser: HyperOptArgumentParser obj Returns: - updated parser """ parser.add_argument( "--encoder_learning_rate", default=1e-05, type=float, help="Encoder specific learning rate.", ) parser.add_argument( "--learning_rate", default=3e-05, type=float, help="Classification head learning rate.", ) parser.opt_list( "--nr_frozen_epochs", default=1, type=int, help="Number of epochs we want to keep the encoder model frozen.", tunable=True, options=[0, 1, 2, 3, 4, 5], ) # Data Args: parser.add_argument( "--label_set", default="Social sciences and society;Sports and recreation;Natural sciences;Language and literature;Geography and places;Music;Media and drama;Art and architecture;Warfare;Engineering and technology;Video games;Philosophy and religion;Agriculture, food and drink;History;Mathematics;Miscellaneous", type=str, help="Classification labels set.", ) parser.add_argument( "--train_csv", default="data/train_data.csv", type=str, help="Path to the file containing the train data.", ) parser.add_argument( "--dev_csv", default="data/valid_data.csv", type=str, help="Path to the file containing the dev data.", ) parser.add_argument( "--test_csv", default="data/valid_data.csv", type=str, help="Path to the file containing the dev data.", ) parser.add_argument( "--loader_workers", default=8, type=int, help="How many subprocesses to use for data loading. 0 means that \ the data will be loaded in the main process.", ) return parser
def get_args(command=None): # parser = argparse.ArgumentParser() parser = HyperOptArgumentParser(strategy="random_search", add_help=False) # Analysis args # parser.add_argument("--bert_model", default='bert-base-uncased', type=str, help="bert model name") parser.add_argument( "--bert_model", default="distilbert-base-uncased", type=str, help="bert model name", ) parser.add_argument( "--emb_file", default="~/checkpoint/bert_vectors/", type=str, help="location of embedding file", ) parser.add_argument( "--data_loc", default="~/checkpoint/dialog_metric/convai2_data/", type=str, help="location of data dump", ) parser.add_argument("--data_name", default="convai2", type=str, help="convai2/cornell_movie") parser.add_argument("--tok_file", default="na", type=str, help="tokens and word dict file") parser.add_argument("--pca_file", default="na", type=str, help="pca saved weights file") parser.opt_list( "--learn_down", default=False, action="store_true", options=[True, False], tunable=False, ) parser.opt_list( "--fix_down", default=False, action="store_true", options=[True, False], tunable=False, ) parser.add_argument( "--trained_bert_suffix", default="ep_10_lm", type=str, help="folder to look for trained bert", ) parser.add_argument("--tc", default=False, action="store_true") parser.opt_list( "--downsample", default=True, action="store_true", options=[True, False], tunable=False, ) parser.opt_list("--down_dim", type=int, default=300, options=[100, 300, 400], tunable=False) parser.add_argument("--load_fine_tuned", default=True, action="store_true") # parser.add_argument("--fine_tune_model", default="~/checkpoint/dialog_metric/cleaned/bert_lm",type=str) parser.add_argument( "--fine_tune_model", default="~/checkpoint/dialog_metric/cleaned/distilbert_lm", type=str, ) # Experiment ID parser.add_argument("--id", default="ruber_bs", type=str) # Model training args parser.add_argument("--device", default="cuda", type=str, help="cuda/cpu") parser.add_argument( "--model", default="models.TransitionPredictorMaxPoolLearnedDownsample", type=str, help="full model name path", ) parser.opt_list( "--optim", default="adam,lr=0.0001", type=str, help="optimizer", options=["adam,lr=0.001", "adam,lr=0.01", "adam,lr=0.0001"], tunable=False, ) parser.add_argument("--epochs", default=10, type=int, help="number of epochs") parser.add_argument("--margin", default=0.5, type=float, help="margin") parser.add_argument( "--train_mode", default="ref_score", type=str, help="ref_score/cont_score/all/nce", ) parser.add_argument("--num_nce", type=int, default=5, help="number of nce samples per scheme") parser.add_argument( "--model_save_dir", default="~/checkpoint/dialog_metric/", type=str, help="model save dir", ) parser.add_argument( "--model_load_path", default="~/checkpoint/dialog_metric/", type=str, help="if there is a need of different load path", ) parser.add_argument("--batch_size", default=64, type=int, help="batch size") parser.add_argument( "--load_model", default=False, action="store_true", help="load model from previous checkpoint", ) parser.add_argument( "--logger_dir", default="./", type=str, help="log directory (must be created)", ) parser.add_argument("--log_interval", default=100, type=int, help="log interval") parser.add_argument("--watch_model", default=False, action="store_true", help="wandb watch model") parser.add_argument( "--vector_mode", default=True, action="store_true", help="if false, train with word representations", ) parser.add_argument( "--remote_logging", default=False, action="store_true", help="wandb remote loggin on or off", ) parser.add_argument("--wandb_project", default="dialog-metric", type=str) parser.add_argument("--bidirectional", default=False, action="store_true") parser.add_argument("--dataloader_threads", default=8, type=int) parser.add_argument("--exp_data_folder", default="na", help="exp data folder") parser.add_argument("--num_workers", default=4, type=int, help="dataloader num workers") parser.opt_list( "--clip", default=0.5, type=float, help="gradient clipping", options=[0.0, 0.5, 1.0], tunable=False, ) parser.opt_list( "--dropout", default=0.2, type=float, help="gradient clipping", options=[0.0, 0.2], tunable=False, ) parser.opt_list( "--decoder_hidden", default=200, type=int, help="decoder hidden values", options=[100, 200, 500, 700], tunable=False, ) parser.add_argument("--gpus", type=str, default="-1", help="how many gpus to use in the node") parser.add_argument("--debug", default=False, action="store_true", help="if true, set debug modes") ## Evaluation args parser.add_argument( "--corrupt_type", default="rand_utt", type=str, help= "all/word_drop/word_order/word_repeat/rand_utt/model_false/rand_back/only_semantics/only_syntax/context_corrupt", ) parser.add_argument( "--corrupt_context_type", default="rand", type=str, help="rand/drop/shuffle/model_true/model_false/progress/none", ) parser.add_argument("--drop_per", default=0.50, type=float, help="drop percentage") parser.add_argument("--eval_val", default=False, action="store_true", help="only eval val set") parser.add_argument( "--model_response_pre", default="na", type=str, help="model response file prefix", ) parser.add_argument( "--load_model_responses", default=True, action="store_true", help="load model responses", ) parser.add_argument( "--corrupt_model_names", default="seq2seq", type=str, help="comma separated models", ) parser.add_argument( "--restore_version", default=-1, type=int, help="if > -1, restore training from the given version", ) # Baseline args parser.add_argument("--train_baseline", default="na", help="ruber/bilstm", type=str) ## RUBER parser.add_argument( "--word2vec_context_size", default=3, type=int, help="context size for word2vec training", ) parser.add_argument("--word2vec_embedding_dim", default=300, type=int, help="embedding dim") parser.add_argument("--word2vec_epochs", default=100, type=int, help="word2vec training epochs") parser.add_argument( "--word2vec_out", default="~/checkpoint/dialog_metric/ruber/w2v.pt", type=str, help="word2vec output location", ) parser.add_argument("--word2vec_lr", default=0.001, type=float, help="word2vec lr") parser.add_argument("--word2vec_batchsize", default=512, type=int) parser.add_argument("--ruber_ref_pooling_type", default="max_min", type=str, help="max_min/avg") parser.add_argument("--ruber_unref_pooling_type", default="max", type=str, help="max/mean") parser.add_argument("--ruber_load_emb", action="store_true", help="load trained word2vec") parser.add_argument("--ruber_lstm_dim", default=300, type=int, help="dimensions of ruber encoder") parser.add_argument("--ruber_mlp_dim", default=200, type=int, help="dimensions of ruber encoder") parser.add_argument("--ruber_dropout", default=0.1, type=float, help="ruber dropout") parser.add_argument("--num_words", default=-1, type=int) ## Data collection args parser.add_argument("--agent", type=str, default="kvmemnn", help="repeat/ir/seq2seq") parser.add_argument("--mode", type=str, default="train", help="train/test/valid") parser.add_argument( "--models", type=str, default="seq2seq,repeat", help="comma separated model values", ) parser.add_argument("--response_file", type=str, default="~/Projects/online_dialog_eval/elisa_data/") parser.add_argument( "--mf", type=str, default= "/checkpoint/parlai/zoo/convai2/seq2seq_naacl2019_abibaseline/model", help="only for special cases", ) parser.add_argument( "--only_data", action="store_true", default=False, help="only extract and store dialog data", ) ## SLURM args parser.add_argument( "--slurm_log_path", type=str, default="~/checkpoint/dialog_metrics/ckpt/", help="slurm log path", ) parser.add_argument("--per_experiment_nb_gpus", type=int, default=1, help="number of gpus") parser.add_argument("--per_experiment_nb_cpus", type=int, default=16, help="number of cpus") parser.add_argument("--nb_gpu_nodes", type=int, default=1, help="number of gpu nodes") parser.add_argument("--job_time", type=str, default="23:59:00", help="time") parser.add_argument("--gpu_type", type=str, default="volta", help="gpu type") parser.add_argument("--gpu_partition", type=str, default="learnfair", help="gpu type") parser.add_argument( "--nb_hopt_trials", type=int, default=1, help="how many grid search trials to run", ) parser.add_argument("--train_per_check", type=float, default=1.0) parser.add_argument("--val_per_check", type=float, default=1.0) parser.add_argument("--test_per_check", type=float, default=1.0) parser.add_argument( "--use_cluster", action="store_true", default=False, help="activate cluster mode", ) ## Inference args parser.add_argument("--model_name", type=str, default="na", help="model name") parser.add_argument("--model_version", type=str, default="version_0", help="model version") parser.add_argument("--use_ddp", action="store_true", default=False) parser.add_argument("--human_eval", action="store_true", default=False) parser.add_argument( "--human_eval_file", type=str, default="~/checkpoint/dialog_metric/controllable_dialogs.csv", ) parser.add_argument("--results_file", type=str, default="test_results.jsonl") ## Corruption args parser.add_argument( "--corrupt_pre", type=str, default="~/checkpoint/dialog_metric/convai2_data/convai2_test_", ) parser.add_argument("--corrupt_ne", type=int, default=1) parser.add_argument("--test_suffix", type=str, default="true_response") parser.add_argument("--test_column", type=str, default="true_response") if command: return parser.parse_args(command.split(" ")) else: return parser.parse_args()
def add_model_specific_args( cls, parser: HyperOptArgumentParser) -> HyperOptArgumentParser: """ Parser for Estimator specific arguments/hyperparameters. :param parser: HyperOptArgumentParser obj Returns: - updated parser """ parser.add_argument( "--encoder_model", default="bert-base-uncased", type=str, help="Encoder model to be used.", ) parser.add_argument( "--encoder_learning_rate", default=1e-05, type=float, help="Encoder specific learning rate.", ) parser.add_argument( "--learning_rate", default=3e-05, type=float, help="Classification head learning rate.", ) parser.opt_list( "--nr_frozen_epochs", default=1, type=int, help="Number of epochs we want to keep the encoder model frozen.", tunable=True, options=[0, 1, 2, 3, 4, 5], ) # Data Args: parser.add_argument( "--train_csv", default="data/imdb_reviews_train.csv", type=str, help="Path to the file containing the train data.", ) parser.add_argument( "--dev_csv", default="data/imdb_reviews_test.csv", type=str, help="Path to the file containing the dev data.", ) parser.add_argument( "--test_csv", default="data/imdb_reviews_test.csv", type=str, help="Path to the file containing the dev data.", ) parser.add_argument( "--loader_workers", default=8, type=int, help="How many subprocesses to use for data loading. 0 means that \ the data will be loaded in the main process.", ) return parser
def argParser(): """ This function creates a parser object which parses all the flags from the command line We can access the parsed command line values using the args object returned by this function Usage: First field is the flag name. dest=NAME is the name to reference when using the parameter (args.NAME) default is the default value of the parameter Example: > python run.py --batch-size 100 args.batch_size <-- 100 """ # parser = argparse.ArgumentParser() parser = HyperOptArgumentParser(strategy='random_search') # trainer arguments parser.add_argument("--gpu", dest="gpu", default='0', help="GPU number") parser.add_argument("--mode", dest="mode", default='train', help="Mode is one of 'train', 'test'") parser.add_argument("--encode", dest="encode", default=0, type=int, help="encode is 0 or 1, default 0") parser.add_argument("--ntrials", dest="ntrials", default=20, type=int, help="Number of trials to run for hyperparameter tuning") # model-specific arguments # (non-tunable) parser.add_argument("--model", dest="model", default="baseline_lstm", help="Name of model to use") parser.add_argument("--epochs", dest="epochs", type=int, default=10, help="Number of epochs to train for") parser.add_argument("--patience", dest="patience", type=int, default=10, help="Learning rate decay scheduler patience, number of epochs") # (tunable arguments) parser.opt_list("--batch-size", dest="batch_size", type=int, default=100, help="Size of the minibatch", tunable=False, options=[32, 64, 128, 256]) parser.opt_range("--learning-rate", dest="learning_rate", type=float, default=1e-3, help="Learning rate for training", tunable=True, low=1e-3, high=1e-1, nb_samples=4) parser.opt_list("--hidden-size", dest="hidden_size", type=int, default=100, help="Dimension of hidden layers", tunable=False, options=[32, 64, 128, 256]) parser.opt_list('--optimizer', dest="optimizer", type=str, default='SGD', help='Optimizer to use (default: SGD)', tunable=False, options=['SGD', 'Adam']) parser.opt_range('--weight-decay', dest="weight_decay", type=float, default=1e-5, help='Weight decay for L2 regularization.', tunable=True, low=1e-6, high=1e-1, nb_samples=10) parser.opt_list('--frame-freq', dest="frame_freq", type=int, default=5, help='Frequency for sub-sampling frames from a video', tunable=True, options=[10, 30, 60, 75, 100]) # (tcn-only arguments) parser.opt_list('--dropout', dest="dropout", type=float, default=0.05, help='Dropout applied to layers (default: 0.05)', tunable=True, options=[0.05, 0.1, 0.3, 0.5, 0.7]) parser.opt_list('--levels', dest="levels", type=int, default=8, help='# of levels for TCN (default: 8)', tunable=True, options=[6, 8, 10, 12]) # LSTM only arguments parser.opt_list('--num_layers', dest="num_layers", type=int, default=1, help='# of layers in LSTM (default:1', tunable=True, options=[1, 2, 3, 4, 5]) # program arguments (dataset and logger paths) parser.add_argument("--raw_data_path", dest="raw_data_path", default="/mnt/disks/disk1/raw", help="Path to raw dataset") parser.add_argument('--proc_data_path', dest="proc_data_path", default="/mnt/disks/disk1/processed", help="Path to processed dataset") parser.add_argument("--log", dest="log", default='', help="Unique log directory name under log/. If the name is empty, do not store logs") parser.add_argument("--checkpoint", dest="checkpoint", type=str, default="", help="Path to the .pth checkpoint file. Used to continue training from checkpoint") # create argparser args = parser.parse_args() return args
def add_model_specific_args(parent_parser, root_dir): parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) # network params parser.add_argument('--h_dim', default=32, type=int) parser.add_argument('--only_rnn', default=False, action='store_true') parser.add_argument('--has_value', default=False, action='store_true') parser.add_argument('--is_multi', default=True, action='store_true') parser.add_argument('--truncation', default=128, type=int) parser.add_argument('--truncation_offset', default=1, type=int) parser.add_argument('--duration', default=90, type=int) parser.add_argument('--ratio', default=None, type=float, help="ratio for early prediction") # training params (opt) parser.opt_list('--heads', default=1, type=int, options=[2, 4, 5], tunable=False) parser.opt_list('--n_layers', default=2, type=int, options=[2, 4, 5], tunable=False) parser.opt_list('--threhold', default=0.5, type=float, options=[0.3, 0.4, 0.5, 0.6], tunable=False) parser.opt_list('--learning_rate', default=5e-5, type=float, options=[0.0001, 0.0005, 0.001, 0.005], tunable=False) parser.opt_list('--optimizer_name', default='adam', type=str, options=['adam'], tunable=False) parser.opt_list('--batch_size', default=2, type=int) return parser
def add_model_specific_args( cls, parser: HyperOptArgumentParser ) -> HyperOptArgumentParser: """ Parser for Estimator specific arguments/hyperparameters. :param parser: HyperOptArgumentParser obj Returns: - updated parser """ parser.add_argument( "--encoder_learning_rate", default=1e-05, type=float, help="Encoder specific learning rate.", ) parser.add_argument( "--learning_rate", default=3e-05, type=float, help="Classification head learning rate.", ) parser.add_argument( "--class_weights", default="ignore", type=str, help="Weights for each of the classes we want to tag.", ) parser.opt_list( "--nr_frozen_epochs", default=sys.maxsize, type=int, help="Number of epochs we want to keep the encoder model frozen.", tunable=True, options=[0, 1, 2, 3, 4, 5], ) parser.add_argument( "--warmup_steps", default=200, type=int, help="Scheduler warmup steps.", ) parser.opt_list( "--dropout", default=0.1, type=float, help="Dropout to be applied to the BERT embeddings.", tunable=True, options=[0.1, 0.2, 0.3, 0.4, 0.5], ) # Data Args: parser.add_argument( "--train_csv", default="data/imdb_reviews_train.csv", type=str, help="Path to the file containing the train data.", ) parser.add_argument( "--dev_csv", default="data/imdb_reviews_test.csv", type=str, help="Path to the file containing the dev data.", ) parser.add_argument( "--test_csv", default="data/imdb_reviews_test.csv", type=str, help="Path to the file containing the dev data.", ) parser.add_argument( "--loader_workers", default=8, type=int, help="How many subprocesses to use for data loading. 0 means that \ the data will be loaded in the main process.", ) return parser
from torch.autograd import Variable from torch.nn import Linear from torch.optim import Adam from utils import ReplayBuffer, Step, np_to_var # In paper, ant was dropped into 9 envs, but its S,A were same. No transfer # learning yet. exp = Experiment("meta learning shared hierarchies", save_dir="logs") parser = HyperOptArgumentParser(strategy="random_search") parser.opt_list( "--batch_size", default=128, type=int, tunable=True, options=[2**n for n in range(5, 10)], ) args = parser.parse_args() args.max_steps = 1000 args.subpolicy_duration = 200 args.num_policies = 10 args.max_buffer_size = 1_000_000 args.env_names = ["Ant-v2"] exp.argparse(args) State = Any
# ] # parser.opt_list( # "--augmentation_transforms", # default="none", # type=str, # tunable=True, # options=augmentation_transforms, # ) # parser.add_argument("--lr", default=1.3e-4, type=float) parser.opt_list( "--lr", default=10e-5, type=float, help="the learning rate", tunable=True, options=np.power(10.0, -np.linspace(3.0, 7.0, num=10)), ) # parser.add_argument("--weight_decay", default=1.3e-9, type=float) parser.opt_list( "--weight_decay", default=0, type=float, help="the learning rate", tunable=True, options=np.power(10.0, -np.linspace(4.0, 10.0, num=10)), ) hparams = parser.parse_args()