예제 #1
0
    # Set the random seed for reproducible experiments
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    # Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))
    logging.info('device: {}'.format(args.device))
    logging.info('Hyper params:%r' % args.__dict__)

    # Create the input data pipeline
    logging.info('Loading the datasets...')
    bl = BatchLoader(args)
    ## Load train and dev data
    train_data = bl.load_data('train.json')
    dev_data = bl.load_data('dev.json')
    ## Train data
    ner_train_data, re_train_data = bl.build_data(train_data, is_train=True)
    train_bls = bl.batch_loader(ner_train_data,
                                re_train_data,
                                args.ner_max_len,
                                args.re_max_len,
                                args.batch_size,
                                is_train=True)
    num_batchs_per_task = [len(train_bl) for train_bl in train_bls]
    logging.info(
        'num of batch per task for train: {}'.format(num_batchs_per_task))
    train_task_ids = sum([[i] * num_batchs_per_task[i]
                          for i in range(len(num_batchs_per_task))], [])
    shuffle(train_task_ids)
    ## Dev data
    ner_dev_data, _ = bl.build_data(dev_data, is_train=False)
    dev_bl = bl.batch_loader(ner_dev_data,