def __init__(self, opt): self.opt = opt fname = { 'cr': { 'train': './datasets/cr/train.csv', 'test': './datasets/cr/dev.csv' }, 'mr': { 'train': './datasets/mr/train.csv', 'test': './datasets/mr/dev.csv' }, 'mpqa': { 'train': './datasets/mpqa/train.csv', 'test': './datasets/mpqa/dev.csv' }, 'subj': { 'train': './datasets/subj/train.csv', 'test': './datasets/subj/dev.csv' }, 'sst2': { 'train': './datasets/sst2/train.csv', 'test': './datasets/sst2/test.csv' }, 'TREC': { 'train': './datasets/TREC/train.csv', 'test': './datasets/TREC/test.csv' }, } if os.path.exists(opt.dataset + '_word2idx.pkl'): print("loading {0} tokenizer...".format(opt.dataset)) with open(opt.dataset + '_word2idx.pkl', 'rb') as f: word2idx = pickle.load(f) self.tokenizer = Tokenizer(word2idx=word2idx) else: print("reading {0} dataset...".format(opt.dataset)) text = ABSADatesetReader.__read_text__( [fname[opt.dataset]['train'], fname[opt.dataset]['test']]) self.tokenizer = Tokenizer() self.tokenizer.fit_on_text(text) with open(opt.dataset + '_word2idx.pkl', 'wb') as f: pickle.dump(self.tokenizer.word2idx, f) embedding_matrix = build_embedding_matrix(self.tokenizer.word2idx, opt.embed_dim, opt.dataset) self.model = opt.model_class(embedding_matrix, opt).to(opt.device) print('loading model {0} ...'.format(opt.model_name)) self.model.load_state_dict(torch.load(opt.state_dict_path)) self.model = self.model # switch model to evaluation mode self.model.eval() torch.autograd.set_grad_enabled(False)
def __init__(self, opt): self.opt = opt fname = { 'twitter': { 'train': './datasets/acl-14-short-data/train.raw', 'test': './datasets/acl-14-short-data/test.raw' }, 'rest14': { 'train': './datasets/semeval14/restaurant_train.raw', 'test': './datasets/semeval14/restaurant_test.raw' }, 'lap14': { 'train': './datasets/semeval14/laptop_train.raw', 'test': './datasets/semeval14/laptop_test.raw' }, 'rest15': { 'train': './datasets/semeval15/restaurant_train.raw', 'test': './datasets/semeval15/restaurant_test.raw' }, 'rest16': { 'train': './datasets/semeval16/restaurant_train.raw', 'test': './datasets/semeval16/restaurant_test.raw' }, } if os.path.exists(opt.dataset + '_word2idx.pkl'): print("loading {0} tokenizer...".format(opt.dataset)) with open(opt.dataset + '_word2idx.pkl', 'rb') as f: word2idx = pickle.load(f) self.tokenizer = Tokenizer(word2idx=word2idx) else: print("reading {0} dataset...".format(opt.dataset)) text = ABSADatesetReader.__read_text__( [fname[opt.dataset]['train'], fname[opt.dataset]['test']]) self.tokenizer = Tokenizer() self.tokenizer.fit_on_text(text) with open(opt.dataset + '_word2idx.pkl', 'wb') as f: pickle.dump(self.tokenizer.word2idx, f) embedding_matrix = build_embedding_matrix(self.tokenizer.word2idx, opt.embed_dim, opt.dataset) self.model = opt.model_class(embedding_matrix, opt).to(opt.device) print('loading model {0} ...'.format(opt.model_name)) self.model.load_state_dict(torch.load(opt.state_dict_path)) self.model = self.model # switch model to evaluation mode self.model.eval() torch.autograd.set_grad_enabled(False)