def main():
    args = get_train_args()
    model = init_train_env(args, tbert_type='siamese2')
    valid_examples = load_examples(args.data_dir, data_type="valid", model=model, num_limit=args.valid_num,
                                   overwrite=args.overwrite)
    train_examples = load_examples(args.data_dir, data_type="train", model=model, num_limit=args.train_num,
                                   overwrite=args.overwrite)
    train(args, train_examples, valid_examples, model, train_iter_method=train_with_neg_sampling)
    logger.info("Training finished")
Exemple #2
0
def main():
    args = get_train_args()
    model = init_train_env(args, tbert_type='siamese2')
    train_dir = os.path.join(args.data_dir, "train")
    valid_dir = os.path.join(args.data_dir, "valid")
    train_examples = load_examples(train_dir, model=model, num_limit=args.train_num)
    valid_examples = load_examples(valid_dir, model=model, num_limit=args.valid_num)
    train(args, train_examples, valid_examples, model, train_with_neg_sampling)
    logger.info("Training finished")
Exemple #3
0
def main():
    args = get_train_args()
    model = init_train_env(args, tbert_type='single')
    valid_examples = load_examples(args.data_dir,
                                   data_type="valid",
                                   model=model,
                                   num_limit=args.valid_num,
                                   overwrite=args.overwrite)
    train_examples = load_examples(args.data_dir,
                                   data_type="train",
                                   model=model,
                                   num_limit=args.train_num,
                                   overwrite=args.overwrite)
    train(args, train_examples, valid_examples, model, train_single_iteration)
    logger.info("Training finished")
def main():
    args = get_rnn_train_args()
    if args.is_no_padding:
        args.gradient_accumulation_steps = 1
        args.per_gpu_eval_batch_size = 1
        args.per_gpu_train_batch_size = 1
        args.logging_steps = args.logging_steps * 10

    args.exp_name = "{}_{}".format(
        args.exp_name,
        datetime.datetime.now().strftime("%m-%d-%H-%M-%S"))
    # embd_info = create_emb_layer("./we/glove.6B.300d.txt")
    embd_info = load_embd_from_file(args.embd_file_path)

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device
    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )
    if args.local_rank not in [-1, 0]:
        # Make sure only the first process in distributed training will download model & vocab
        torch.distributed.barrier()
    model = RNNTracer(hidden_dim=args.hidden_dim,
                      embd_info=embd_info,
                      embd_trainable=args.is_embd_trainable,
                      max_seq_len=args.max_seq_len,
                      is_no_padding=args.is_no_padding,
                      rnn_type=args.rnn_type)
    if args.local_rank == 0:
        # Make sure only the first process in distributed training will download model & vocab
        torch.distributed.barrier()
    model.to(args.device)
    model.device = args.device
    if args.fp16:
        try:
            import apex
            apex.amp.register_half_function(torch, "einsum")
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )

    train_dir = os.path.join(args.data_dir, "train")
    valid_dir = os.path.join(args.data_dir, "valid")
    train_examples = load_examples_for_rnn(train_dir,
                                           model=model,
                                           num_limit=args.train_num)
    valid_examples = load_examples_for_rnn(valid_dir,
                                           model=model,
                                           num_limit=args.valid_num)
    logger.info("Training started")
    train(args, train_examples, valid_examples, model, train_rnn_iter)
    logger.info("Training finished")