Ejemplo n.º 1
0
def setup_interact_args():
    """
    Sets up the arguments for evaluation.
    """
    parser = argparse.ArgumentParser()
    group = parser.add_argument_group('interact')
    group.add_argument('--method',
                       type=str,
                       default='nucleus',
                       help='Decoding method to use.')
    group.add_argument('--cuda',
                       type=bool,
                       default=torch.cuda.is_available(),
                       help='Device for training.')
    group.add_argument('--max_len',
                       type=int,
                       default=100,
                       help='Maximum length of the decoded sequence.')
    group.add_argument('--top_p',
                       type=float,
                       default=0.9,
                       help='Top-p parameter for nucleus sampling.')
    group.add_argument('--top_k',
                       type=int,
                       default=100,
                       help='Top-k parameter for topk sampling.')

    setup_data_args(parser)
    setup_model_args(parser)

    return parser.parse_args()
Ejemplo n.º 2
0
def setup_train_args():
    """
    Sets up the training arguments.
    """
    parser = argparse.ArgumentParser()
    group = parser.add_argument_group('train')
    group.add_argument('--max_epochs',
                       type=int,
                       default=15,
                       help='Maximum number of epochs for training.')
    group.add_argument('--cuda',
                       type=bool,
                       default=torch.cuda.is_available(),
                       help='Device for training.')
    # TODO XLNet produces NaN with apex
    group.add_argument('--mixed',
                       type=bool,
                       default=True,
                       help='Use mixed precision training.')
    group.add_argument('--learning_rate',
                       type=float,
                       default=1e-4,
                       help='Learning rate for the model.')
    group.add_argument('--batch_size',
                       type=int,
                       default=64,
                       help='Batch size during training.')
    group.add_argument('--patience',
                       type=int,
                       default=5,
                       help='Number of patience epochs before termination.')
    group.add_argument('--grad_accum_steps',
                       type=int,
                       default=2,
                       help='Number of steps for grad accum.')
    group.add_argument('--eval_every_step',
                       type=int,
                       default=3000,
                       help='Evaluation frequency in steps.')
    group.add_argument('--local_rank',
                       type=int,
                       default=-1,
                       help='Local rank for the script.')

    setup_data_args(parser)
    setup_model_args(parser)

    return parser.parse_args()
Ejemplo n.º 3
0
def setup_eval_args():
    """
    Sets up the arguments for interaction.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_file',
                        type=str,
                        default=None,
                        help='Path to the file of the model.')
    parser.add_argument('--ckpt_name',
                        type=str,
                        default='last',
                        choices=['last', 'best'],
                        help='Name of the checkpoint to load.')
    parser.add_argument('--decoding',
                        type=str,
                        default='topk',
                        choices=list(METHODS),
                        help='Decoding method to use.')
    parser.add_argument('--no_cuda',
                        action='store_true',
                        default=torch.cuda.is_available(),
                        help='Device for training.')
    parser.add_argument('--top_p',
                        type=float,
                        default=0.9,
                        help='Top-p parameter for nucleus sampling.')
    parser.add_argument('--top_k',
                        type=int,
                        default=100,
                        help='Top-k parameter for topk sampling.')
    parser.add_argument('--min_len',
                        type=int,
                        default=0,
                        help='Minimum length of the response sentence.')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='Random seed for interactive mode.')

    setup_data_args(parser)
    setup_model_args(parser)

    return parser.parse_args()
Ejemplo n.º 4
0
def setup_train_args():
    """
    Sets up the training arguments.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--config',
                        type=str,
                        default=None,
                        help='Path of the config file.')
    parser.add_argument('--max_epochs',
                        type=int,
                        default=25,
                        help='Maximum number of epochs for training.')
    parser.add_argument('--no_cuda',
                        action='store_true',
                        help='Device for training.')
    # TODO XLNet produces NaN with apex
    parser.add_argument('--fp16',
                        action='store_true',
                        help='Use fp16 precision training.')
    parser.add_argument('--lr',
                        type=float,
                        default=1e-5,
                        help='Learning rate for the model.')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        help='Batch size during training.')
    parser.add_argument('--patience',
                        type=int,
                        default=5,
                        help='Number of patience epochs before termination.')
    parser.add_argument('--schedule',
                        type=str,
                        default='noam',
                        choices=['noam', 'noamwd'],
                        help='Type of learning rate scheduling.')
    parser.add_argument('--warmup_steps',
                        type=int,
                        default=16000,
                        help='Number of warmup steps.')
    parser.add_argument('--total_steps',
                        type=int,
                        default=1000000,
                        help='Number of optimization steps.')
    parser.add_argument('--grad_accum_steps',
                        type=int,
                        default=2,
                        help='Number of steps for grad accum.')
    parser.add_argument('--local_rank',
                        type=int,
                        default=-1,
                        help='Local rank for the script.')
    parser.add_argument('--notebook',
                        action='store_true',
                        help='Set true if you are using IPython notebook.')
    parser.add_argument('--clip_grad',
                        type=float,
                        default=None,
                        help='Gradient clipping norm value.')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='Random seed for training.')

    setup_data_args(parser)
    setup_model_args(parser)

    return parser.parse_args()