fileHandler = logging.FileHandler('%s/log_train.txt' % (exp_path), mode='w') fileHandler.setFormatter(logFormatter) logger.addHandler(fileHandler) if not opt.noStdout: consoleHandler = logging.StreamHandler(sys.stdout) consoleHandler.setFormatter(logFormatter) logger.addHandler(consoleHandler) logger.info(opt) logger.info("Experiment path: %s" % (exp_path)) logger.info(time.asctime(time.localtime(time.time()))) if opt.deviceId >= 0: import utils.gpu_selection as gpu_selection if opt.deviceId > 0: opt.deviceId, gpu_name, valid_gpus = gpu_selection.auto_select_gpu( assigned_gpu_id=opt.deviceId - 1) elif opt.deviceId == 0: opt.deviceId, gpu_name, valid_gpus = gpu_selection.auto_select_gpu() logger.info("Valid GPU list: %s ; GPU %d (%s) is auto selected." % (valid_gpus, opt.deviceId, gpu_name)) torch.cuda.set_device(opt.deviceId) opt.device = torch.device( "cuda" ) # is equivalent to torch.device('cuda:X') where X is the result of torch.cuda.current_device() else: logger.info("CPU is used.") opt.device = torch.device("cpu") random.seed(opt.random_seed) torch.manual_seed(opt.random_seed) if torch.cuda.is_available():
def parse_arguments(): parser = argparse.ArgumentParser() ######################### model structure ######################### parser.add_argument('--emb_size', type=int, default=256, help='word embedding dimension') parser.add_argument('--hidden_size', type=int, default=512, help='hidden layer dimension') parser.add_argument('--max_seq_len', type=int, default=None, help='max sequence length') parser.add_argument('--n_layers', type=int, default=6, help='#transformer layers') parser.add_argument('--n_head', type=int, default=4, help='#attention heads') parser.add_argument('--d_k', type=int, default=64, help='dimension of k in attention') parser.add_argument('--d_v', type=int, default=64, help='dimension of v in attention') parser.add_argument( '--score_util', default='pp', choices=['none', 'np', 'pp', 'mul'], help= 'how to utilize scores in Transformer & BERT: np-naiveplus; pp-paramplus' ) parser.add_argument('--sent_repr', default='bin_sa_cls', choices=[ 'cls', 'maxpool', 'attn', 'bin_lstm', 'bin_sa', 'bin_sa_cls', 'tok_sa_cls' ], help='sentence level representation') parser.add_argument('--cls_type', default='stc', choices=['nc', 'tf_hd', 'stc'], help='classifier type') ######################### data & vocab ######################### parser.add_argument('--dataset', required=True, help='<domain>') parser.add_argument('--dataroot', required=True, help='path to dataset') parser.add_argument('--train_file', default='train', help='base file name of train dataset') parser.add_argument('--valid_file', default='valid', help='base file name of valid dataset') parser.add_argument('--test_file', default='test', help='base file name of test dataset') parser.add_argument('--ontology_path', default=None, help='ontology') ######################## pretrained model (BERT) ######################## parser.add_argument('--bert_model_name', default='bert-base-uncased', choices=[ 'bert-base-uncased', 'bert-base-cased', 'bert-large-uncased', 'bert-large-cased' ]) parser.add_argument('--fix_bert_model', action='store_true') ######################### training & testing options ######################### parser.add_argument( '--testing', action='store_true', help=' test your model (default is training && testing)') parser.add_argument('--deviceId', type=int, default=-1, help='train model on ith gpu. -1:cpu, 0:auto_select') parser.add_argument('--random_seed', type=int, default=999, help='initial random seed') parser.add_argument('--l2', type=float, default=0, help='weight decay') parser.add_argument('--dropout', type=float, default=0., help='dropout rate at each non-recurrent layer') parser.add_argument('--bert_dropout', type=float, default=0.1, help='dropout rate for BERT') parser.add_argument('--batchSize', type=int, default=32, help='training batch size') parser.add_argument('--max_norm', type=float, default=5.0, help='threshold of gradient clipping (2-norm)') parser.add_argument('--max_epoch', type=int, default=50, help='max number of epochs to train') parser.add_argument( '--experiment', default='exp', help='experiment directories for storing models and logs') parser.add_argument('--optim_choice', default='bertadam', choices=['adam', 'adamw', 'bertadam'], help='optimizer choice') parser.add_argument('--lr', default=5e-4, type=float, help='learning rate') parser.add_argument('--bert_lr', default=1e-5, type=float, help='learning rate for bert') parser.add_argument('--warmup_proportion', type=float, default=0.1, help='warmup propotion') parser.add_argument('--init_type', default='uf', choices=['uf', 'xuf', 'normal'], help='init type') parser.add_argument('--init_range', type=float, default=0.2, help='init range, for naive uniform') ######################## system act ######################### parser.add_argument('--with_system_act', action='store_true', help='whether to include the last system act') opt = parser.parse_args() ######################### option verification & adjustment ######################### # device definition if opt.deviceId >= 0: if opt.deviceId > 0: opt.deviceId, gpu_name, valid_gpus = auto_select_gpu( assigned_gpu_id=opt.deviceId - 1) elif opt.deviceId == 0: opt.deviceId, gpu_name, valid_gpus = auto_select_gpu() print('Valid GPU list: %s ; GPU %d (%s) is auto selected.' % (valid_gpus, opt.deviceId, gpu_name)) torch.cuda.set_device(opt.deviceId) opt.device = torch.device('cuda') else: print('CPU is used.') opt.device = torch.device('cpu') # random seed set random.seed(opt.random_seed) np.random.seed(opt.random_seed) torch.manual_seed(opt.random_seed) if torch.cuda.is_available(): torch.cuda.manual_seed(opt.random_seed) # d_model: just equals embedding size opt.d_model = opt.emb_size # ontology opt.ontology = None if opt.ontology_path is None else \ json.load(open(opt.ontology_path)) return opt