def import_models(dataset): models = {} for f in glob.glob('checkpoints/cnn_{}_*'.format(dataset)): fname = os.path.split(f)[1] embedding_dims = 300 embedding_type = get_embedding_type(fname) X_train, y_train = load('{}_train'.format(dataset)) vocab = load('{}_vocab'.format(dataset)).vocab model = TextCNN(dataset=dataset, input_size=X_train.shape[1], vocab_size=len(vocab) + 1, embedding_dims=embedding_dims, embedding_type=embedding_type) model.load_state_dict(torch.load(f)) model.eval() models[fname] = model return models
def main(): test_set = SinaDataset(path.join(args.source, 'test.json'), input_dim) test_loader = DataLoader(test_set, batch_size=args.bs, shuffle=False, drop_last=True) if args.model == 'textcnn': model = TextCNN(input_dim, 200) model.load_state_dict(torch.load('./saved_models/textcnn.pkl')) elif args.model == 'lstm': model = MyLSTM(input_dim, hidden_dim=8) model.load_state_dict(torch.load('./saved_models/lstm.pkl')) else: print('"--model" argument only accepts "textcnn" or "lstm"') exit(0) model = model.to(device) pred, ans, pred_dists, true_dists = test(model, test_loader, device, args.bs) calc_f1_score(pred, ans) calc_coef(pred_dists, true_dists)
class Classify: def __init__(self, features='word', device='gpu'): self.features = features self.sentence_length = TextCNNConfig.sequence_length self.device = device self.__device() self.load_vocab() self.__load_model() def __device(self): if torch.cuda.is_available() and self.device=='gpu': self.device = torch.device('cuda') else: self.device = 'cpu' def __load_model(self): self.model = TextCNN(TextCNNConfig) self.model.load_state_dict(torch.load("./ckpts/cnn_model.pth")) self.model.to(self.device) self.model.eval() def load_vocab(self): with open('./ckpts/vocab.txt','r',encoding='utf-8') as f: vocab = f.read().strip().split('\n') self.vocab = {k: v for k, v in zip(vocab, range(len(vocab)))} with open('./ckpts/target.txt','r',encoding='utf-8') as f: target = f.read().strip().split('\n') self.target = {v: k for k, v in zip(target, range(len(target)))} def cut_words(self, sentence : str) -> list: if self.features == 'word': return jieba.lcut(sentence) else: return list(sentence) def sentence_cut(self, sentence): """针对一个句子的字符转ID,并截取到固定长度,返回定长的字符代号。""" words = self.cut_words(sentence) if len(words) >= self.sentence_length: sentence_cutted = words[:self.sentence_length] else: sentence_cutted = words + ["<PAD>"] * (self.sentence_length - len(words)) sentence_id = [self.vocab[w] if w in self.vocab else self.vocab["<UNK>"] for w in sentence_cutted] return sentence_id def predict(self, content): """ 传入一个句子,测试单个类别 """ with torch.no_grad(): content_id = [self.sentence_cut(content)] start_time = time.time() content_id = torch.LongTensor(content_id) one_batch_input = content_id.to(self.device) outputs = self.model(one_batch_input) max_value, max_index = torch.max(outputs, axis=1) predict = max_index.cpu().numpy() print(time.time()-start_time) return self.target[predict[0]]