def download_dataset(args): if args.dataset is not None: if args.dataset in ['wiki80', 'semeval']: opennre.download(args.dataset, root_path=args.root_path) elif args.dataset == 'tacred': logging.warning( 'TACRED is released via the Linguistic Data Consortium (LDC). Please download it from https://catalog.ldc.upenn.edu/LDC2018T24' ) else: raise Exception( 'For sentence-level RE, Dataset must be one of [`wiki80`, `tacred`, `semeval`].' )
import sys, json import torch import os import numpy as np import opennre from opennre import encoder, model, framework # Some basic settings root_path = '.' sys.path.append(root_path) if not os.path.exists('ckpt'): os.mkdir('ckpt') ckpt = 'ckpt/wiki80_bertentity_softmax.pth.tar' # Check data opennre.download('wiki80', root_path=root_path) opennre.download('bert_base_uncased', root_path=root_path) rel2id = json.load( open(os.path.join(root_path, 'benchmark/wiki80/wiki80_rel2id.json'))) # Define the sentence encoder sentence_encoder = opennre.encoder.BERTEntityEncoder( max_length=80, pretrain_path=os.path.join(root_path, 'pretrain/bert-base-uncased')) # Define the model model = opennre.model.SoftmaxNN(sentence_encoder, len(rel2id), rel2id) # Define the whole training framework framework = opennre.framework.SentenceRE( train_path=os.path.join(root_path, 'benchmark/wiki80/wiki80_train.txt'),
help='Weight decay') parser.add_argument('--max_epoch', default=100, type=int, help='Max number of training epochs') args = parser.parse_args() # Some basic settings root_path = '.' if not os.path.exists('ckpt'): os.mkdir('ckpt') ckpt = 'ckpt/{}.pth.tar'.format(args.ckpt) # Check data opennre.download('nyt10', root_path=root_path) opennre.download('glove', root_path=root_path) rel2id = json.load( open(os.path.join(root_path, 'benchmark/nyt10/nyt10_rel2id.json'))) wordi2d = json.load( open(os.path.join(root_path, 'pretrain/glove/glove.6B.50d_word2id.json'))) word2vec = np.load( os.path.join(root_path, 'pretrain/glove/glove.6B.50d_mat.npy')) # Define the sentence encoder sentence_encoder = opennre.encoder.PCNNEncoder(token2id=wordi2d, max_length=120, word_size=50, position_size=5, hidden_size=230, blank_padding=True,
type=int, help='Max number of training epochs') args = parser.parse_args() # Some basic settings root_path = '.' sys.path.append(root_path) if not os.path.exists('./ckpt'): os.mkdir('./ckpt') if len(args.ckpt) == 0: args.ckpt = '{}_{}'.format(args.dataset, 'cnn') ckpt = 'ckpt/{}.pth.tar'.format(args.ckpt) if args.dataset != 'none': opennre.download(args.dataset, root_path=root_path) args.train_file = os.path.join(root_path, 'benchmark', args.dataset, '{}_train.txt'.format(args.dataset)) args.val_file = os.path.join(root_path, 'benchmark', args.dataset, '{}_val.txt'.format(args.dataset)) args.test_file = os.path.join(root_path, 'benchmark', args.dataset, '{}_test.txt'.format(args.dataset)) args.rel2id_file = os.path.join(root_path, 'benchmark', args.dataset, '{}_rel2id.json'.format(args.dataset)) if args.dataset == 'wiki80': args.metric = 'acc' else: args.metric = 'micro_f1' else: if not (os.path.exists(args.train_file) and os.path.exists(args.val_file) and os.path.exists(args.test_file)
def download_pretrain(args): if 'bert' in args.encoder: opennre.download('bert_base_uncased', root_path=args.root_path) elif 'cnn' in args.encoder: opennre.download('glove', root_path=args.root_path)
import sys, json import torch import os import numpy as np import opennre from opennre import encoder, model, framework # Some basic settings root_path = '.' if not os.path.exists('ckpt'): os.mkdir('ckpt') ckpt = 'ckpt/wiki80_cnn_softmax.pth.tar' # Check data opennre.download('wiki80', root_path=root_path) opennre.download('glove', root_path=root_path) rel2id = json.load(open(os.path.join(root_path, 'benchmark/wiki80/wiki80_rel2id.json'))) wordi2d = json.load(open(os.path.join(root_path, 'pretrain/glove/glove.6B.50d_word2id.json'))) word2vec = np.load(os.path.join(root_path, 'pretrain/glove/glove.6B.50d_mat.npy')) # Define the sentence encoder sentence_encoder = opennre.encoder.CNNEncoder( token2id=wordi2d, max_length=40, word_size=50, position_size=5, hidden_size=230, blank_padding=True, kernel_size=3, padding_size=1, word2vec=word2vec,
def train(args): # Some basic settings # root_path = '.' root_path = args.data_dir sys.path.append(root_path) if not os.path.exists('ckpt'): os.mkdir('ckpt') if len(args.ckpt) == 0: args.ckpt = '{}_{}_{}'.format(args.dataset, args.pretrain_path.split('/')[-1], args.pooler) ckpt = os.path.join(args.model_dir, 'ckpt/{}.pth.tar'.format(args.ckpt)) if args.dataset != 'none': try: opennre.download(args.dataset, root_path=root_path) except: pass args.train_file = os.path.join(root_path, 'benchmark', args.dataset, '{}_train.txt'.format(args.dataset)) args.val_file = os.path.join(root_path, 'benchmark', args.dataset, '{}_val.txt'.format(args.dataset)) args.test_file = os.path.join(root_path, 'benchmark', args.dataset, '{}_test.txt'.format(args.dataset)) if not os.path.exists(args.test_file): logging.warn( "Test file {} does not exist! Use val file instead".format( args.test_file)) args.test_file = args.val_file args.rel2id_file = os.path.join(root_path, 'benchmark', args.dataset, '{}_rel2id.json'.format(args.dataset)) if args.dataset == 'wiki80': args.metric = 'acc' else: args.metric = 'micro_f1' else: if not (os.path.exists(args.train_file) and os.path.exists( args.val_file) and os.path.exists(args.test_file) and os.path.exists(args.rel2id_file)): raise Exception( '--train_file, --val_file, --test_file and --rel2id_file are not specified or files do not exist. Or specify --dataset' ) logging.info('Arguments:') for arg in vars(args): logging.info(' {}: {}'.format(arg, getattr(args, arg))) # rel2id = json.load(open(args.rel2id_file)) rel2id = json.load( open( os.path.join(root_path, 'benchmark', args.dataset, 'finre_rel2id.json'))) # Define the sentence encoder if args.pooler == 'entity': sentence_encoder = opennre.encoder.BERTEntityEncoder( max_length=args.max_length, pretrain_path=args.pretrain_path, mask_entity=args.mask_entity) elif args.pooler == 'cls': sentence_encoder = opennre.encoder.BERTEncoder( max_length=args.max_length, pretrain_path=args.pretrain_path, mask_entity=args.mask_entity) else: raise NotImplementedError # Define the model model = opennre.model.SoftmaxNN(sentence_encoder, len(rel2id), rel2id) # Define the whole training framework framework = opennre.framework.SentenceRE(train_path=args.train_file, val_path=args.val_file, test_path=args.test_file, model=model, ckpt=ckpt, batch_size=args.batch_size, max_epoch=args.max_epoch, lr=args.lr, opt='adamw') # Train the model if not args.only_test: framework.train_model('micro_f1') # Test framework.load_state_dict(torch.load(ckpt)['state_dict']) result = framework.eval_model(framework.test_loader) # Print the result logging.info('Test set results:') logging.info('Accuracy: {}'.format(result['acc'])) logging.info('Micro precision: {}'.format(result['micro_p'])) logging.info('Micro recall: {}'.format(result['micro_r'])) logging.info('Micro F1: {}'.format(result['micro_f1']))