fileHandler = logging.FileHandler('%s/log_train.txt' % (exp_path),
                                      mode='w')
fileHandler.setFormatter(logFormatter)
logger.addHandler(fileHandler)
if not opt.noStdout:
    consoleHandler = logging.StreamHandler(sys.stdout)
    consoleHandler.setFormatter(logFormatter)
    logger.addHandler(consoleHandler)
logger.info(opt)
logger.info("Experiment path: %s" % (exp_path))
logger.info(time.asctime(time.localtime(time.time())))

if opt.deviceId >= 0:
    import utils.gpu_selection as gpu_selection
    if opt.deviceId > 0:
        opt.deviceId, gpu_name, valid_gpus = gpu_selection.auto_select_gpu(
            assigned_gpu_id=opt.deviceId - 1)
    elif opt.deviceId == 0:
        opt.deviceId, gpu_name, valid_gpus = gpu_selection.auto_select_gpu()
    logger.info("Valid GPU list: %s ; GPU %d (%s) is auto selected." %
                (valid_gpus, opt.deviceId, gpu_name))
    torch.cuda.set_device(opt.deviceId)
    opt.device = torch.device(
        "cuda"
    )  # is equivalent to torch.device('cuda:X') where X is the result of torch.cuda.current_device()
else:
    logger.info("CPU is used.")
    opt.device = torch.device("cpu")

random.seed(opt.random_seed)
torch.manual_seed(opt.random_seed)
if torch.cuda.is_available():
コード例 #2
0
def parse_arguments():
    parser = argparse.ArgumentParser()

    ######################### model structure #########################
    parser.add_argument('--emb_size',
                        type=int,
                        default=256,
                        help='word embedding dimension')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=512,
                        help='hidden layer dimension')
    parser.add_argument('--max_seq_len',
                        type=int,
                        default=None,
                        help='max sequence length')
    parser.add_argument('--n_layers',
                        type=int,
                        default=6,
                        help='#transformer layers')
    parser.add_argument('--n_head',
                        type=int,
                        default=4,
                        help='#attention heads')
    parser.add_argument('--d_k',
                        type=int,
                        default=64,
                        help='dimension of k in attention')
    parser.add_argument('--d_v',
                        type=int,
                        default=64,
                        help='dimension of v in attention')
    parser.add_argument(
        '--score_util',
        default='pp',
        choices=['none', 'np', 'pp', 'mul'],
        help=
        'how to utilize scores in Transformer & BERT: np-naiveplus; pp-paramplus'
    )
    parser.add_argument('--sent_repr',
                        default='bin_sa_cls',
                        choices=[
                            'cls', 'maxpool', 'attn', 'bin_lstm', 'bin_sa',
                            'bin_sa_cls', 'tok_sa_cls'
                        ],
                        help='sentence level representation')
    parser.add_argument('--cls_type',
                        default='stc',
                        choices=['nc', 'tf_hd', 'stc'],
                        help='classifier type')

    ######################### data & vocab #########################
    parser.add_argument('--dataset', required=True, help='<domain>')
    parser.add_argument('--dataroot', required=True, help='path to dataset')
    parser.add_argument('--train_file',
                        default='train',
                        help='base file name of train dataset')
    parser.add_argument('--valid_file',
                        default='valid',
                        help='base file name of valid dataset')
    parser.add_argument('--test_file',
                        default='test',
                        help='base file name of test dataset')
    parser.add_argument('--ontology_path', default=None, help='ontology')

    ######################## pretrained model (BERT) ########################
    parser.add_argument('--bert_model_name',
                        default='bert-base-uncased',
                        choices=[
                            'bert-base-uncased', 'bert-base-cased',
                            'bert-large-uncased', 'bert-large-cased'
                        ])
    parser.add_argument('--fix_bert_model', action='store_true')

    ######################### training & testing options #########################
    parser.add_argument(
        '--testing',
        action='store_true',
        help=' test your model (default is training && testing)')
    parser.add_argument('--deviceId',
                        type=int,
                        default=-1,
                        help='train model on ith gpu. -1:cpu, 0:auto_select')
    parser.add_argument('--random_seed',
                        type=int,
                        default=999,
                        help='initial random seed')
    parser.add_argument('--l2', type=float, default=0, help='weight decay')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.,
                        help='dropout rate at each non-recurrent layer')
    parser.add_argument('--bert_dropout',
                        type=float,
                        default=0.1,
                        help='dropout rate for BERT')
    parser.add_argument('--batchSize',
                        type=int,
                        default=32,
                        help='training batch size')
    parser.add_argument('--max_norm',
                        type=float,
                        default=5.0,
                        help='threshold of gradient clipping (2-norm)')
    parser.add_argument('--max_epoch',
                        type=int,
                        default=50,
                        help='max number of epochs to train')
    parser.add_argument(
        '--experiment',
        default='exp',
        help='experiment directories for storing models and logs')
    parser.add_argument('--optim_choice',
                        default='bertadam',
                        choices=['adam', 'adamw', 'bertadam'],
                        help='optimizer choice')
    parser.add_argument('--lr', default=5e-4, type=float, help='learning rate')
    parser.add_argument('--bert_lr',
                        default=1e-5,
                        type=float,
                        help='learning rate for bert')
    parser.add_argument('--warmup_proportion',
                        type=float,
                        default=0.1,
                        help='warmup propotion')
    parser.add_argument('--init_type',
                        default='uf',
                        choices=['uf', 'xuf', 'normal'],
                        help='init type')
    parser.add_argument('--init_range',
                        type=float,
                        default=0.2,
                        help='init range, for naive uniform')

    ######################## system act #########################
    parser.add_argument('--with_system_act',
                        action='store_true',
                        help='whether to include the last system act')

    opt = parser.parse_args()

    ######################### option verification & adjustment #########################
    # device definition
    if opt.deviceId >= 0:
        if opt.deviceId > 0:
            opt.deviceId, gpu_name, valid_gpus = auto_select_gpu(
                assigned_gpu_id=opt.deviceId - 1)
        elif opt.deviceId == 0:
            opt.deviceId, gpu_name, valid_gpus = auto_select_gpu()
        print('Valid GPU list: %s ; GPU %d (%s) is auto selected.' %
              (valid_gpus, opt.deviceId, gpu_name))
        torch.cuda.set_device(opt.deviceId)
        opt.device = torch.device('cuda')
    else:
        print('CPU is used.')
        opt.device = torch.device('cpu')

    # random seed set
    random.seed(opt.random_seed)
    np.random.seed(opt.random_seed)
    torch.manual_seed(opt.random_seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(opt.random_seed)

    # d_model: just equals embedding size
    opt.d_model = opt.emb_size

    # ontology
    opt.ontology = None if opt.ontology_path is None else \
        json.load(open(opt.ontology_path))

    return opt