Beispiel #1
0
# Define the sentence encoder
sentence_encoder = opennre.encoder.BERTEncoder(
    max_length=args.max_seq_len,
    pretrain_path=os.path.join(root_path, 'pretrain/bert-base-uncased'),
    mask_entity=False)

# Define the model
model = opennre.model.SoftmaxNN(sentence_encoder, len(rel2id), rel2id)
model.to(torch.device('cuda:0'))

# Define the whole training framework
framework = opennre.framework.SentenceRE(train_path=args.test_path,
                                         val_path=args.test_path,
                                         test_path=args.test_path,
                                         model=model,
                                         ckpt=args.model_path,
                                         batch_size=args.batch_size,
                                         max_epoch=1,
                                         lr=2e-5,
                                         opt='adamw')

framework.load_state_dict(torch.load(args.model_path)['state_dict'])
result = framework.eval_model(framework.test_loader)

# Print the result
print('Accuracy on test set: {}'.format(result['acc']))
print('Micro Precision: {}'.format(result['micro_p']))
print('Micro Recall: {}'.format(result['micro_r']))
print('Micro F1: {}'.format(result['micro_f1']))
# Define the sentence encoder
sentence_encoder = opennre.encoder.BERTEntityEncoder(
    max_length=80,
    pretrain_path=os.path.join(root_path, 'pretrain/bert-base-uncased'))

# Define the model
model = opennre.model.SoftmaxNN(sentence_encoder, len(rel2id), rel2id)

# Define the whole training framework
framework = opennre.framework.SentenceRE(
    train_path=os.path.join(root_path, 'benchmark/wiki80/wiki80_train.txt'),
    val_path=os.path.join(root_path, 'benchmark/wiki80/wiki80_val.txt'),
    test_path=os.path.join(root_path, 'benchmark/wiki80/wiki80_val.txt'),
    model=model,
    ckpt=ckpt,
    batch_size=64,  # Modify the batch size w.r.t. your device
    max_epoch=10,
    lr=2e-5,
    opt='adamw')

# Train the model
framework.train_model()

# Test the model
framework.load_state_dict(torch.load(ckpt)['state_dict'])
result = framework.eval_model(framework.test_loader)

# Print the result
print('Accuracy on test set: {}'.format(result['acc']))
Beispiel #3
0
def train(args):
    # Some basic settings
    # root_path = '.'
    root_path = args.data_dir
    sys.path.append(root_path)
    if not os.path.exists('ckpt'):
        os.mkdir('ckpt')
    if len(args.ckpt) == 0:
        args.ckpt = '{}_{}_{}'.format(args.dataset,
                                      args.pretrain_path.split('/')[-1],
                                      args.pooler)
    ckpt = os.path.join(args.model_dir, 'ckpt/{}.pth.tar'.format(args.ckpt))

    if args.dataset != 'none':
        try:
            opennre.download(args.dataset, root_path=root_path)
        except:
            pass
        args.train_file = os.path.join(root_path, 'benchmark', args.dataset,
                                       '{}_train.txt'.format(args.dataset))
        args.val_file = os.path.join(root_path, 'benchmark', args.dataset,
                                     '{}_val.txt'.format(args.dataset))
        args.test_file = os.path.join(root_path, 'benchmark', args.dataset,
                                      '{}_test.txt'.format(args.dataset))
        if not os.path.exists(args.test_file):
            logging.warn(
                "Test file {} does not exist! Use val file instead".format(
                    args.test_file))
            args.test_file = args.val_file
        args.rel2id_file = os.path.join(root_path, 'benchmark', args.dataset,
                                        '{}_rel2id.json'.format(args.dataset))
        if args.dataset == 'wiki80':
            args.metric = 'acc'
        else:
            args.metric = 'micro_f1'
    else:
        if not (os.path.exists(args.train_file) and os.path.exists(
                args.val_file) and os.path.exists(args.test_file)
                and os.path.exists(args.rel2id_file)):
            raise Exception(
                '--train_file, --val_file, --test_file and --rel2id_file are not specified or files do not exist. Or specify --dataset'
            )

    logging.info('Arguments:')
    for arg in vars(args):
        logging.info('    {}: {}'.format(arg, getattr(args, arg)))

    # rel2id = json.load(open(args.rel2id_file))
    rel2id = json.load(
        open(
            os.path.join(root_path, 'benchmark', args.dataset,
                         'finre_rel2id.json')))

    # Define the sentence encoder
    if args.pooler == 'entity':
        sentence_encoder = opennre.encoder.BERTEntityEncoder(
            max_length=args.max_length,
            pretrain_path=args.pretrain_path,
            mask_entity=args.mask_entity)
    elif args.pooler == 'cls':
        sentence_encoder = opennre.encoder.BERTEncoder(
            max_length=args.max_length,
            pretrain_path=args.pretrain_path,
            mask_entity=args.mask_entity)
    else:
        raise NotImplementedError

    # Define the model
    model = opennre.model.SoftmaxNN(sentence_encoder, len(rel2id), rel2id)

    # Define the whole training framework
    framework = opennre.framework.SentenceRE(train_path=args.train_file,
                                             val_path=args.val_file,
                                             test_path=args.test_file,
                                             model=model,
                                             ckpt=ckpt,
                                             batch_size=args.batch_size,
                                             max_epoch=args.max_epoch,
                                             lr=args.lr,
                                             opt='adamw')

    # Train the model
    if not args.only_test:
        framework.train_model('micro_f1')

    # Test
    framework.load_state_dict(torch.load(ckpt)['state_dict'])
    result = framework.eval_model(framework.test_loader)

    # Print the result
    logging.info('Test set results:')
    logging.info('Accuracy: {}'.format(result['acc']))
    logging.info('Micro precision: {}'.format(result['micro_p']))
    logging.info('Micro recall: {}'.format(result['micro_r']))
    logging.info('Micro F1: {}'.format(result['micro_f1']))