def rank(self, args, question, generated_queries): if len(generated_queries) == 0: return [] # Load the model checkpoint_filename = '%s.pt' % os.path.join(args.save, args.expname) dataset_vocab_file = os.path.join(args.data, 'dataset.vocab') # metrics = Metrics(args.num_classes) vocab = Vocab(filename=dataset_vocab_file, data=[ Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD ]) similarity = DASimilarity(args.mem_dim, args.hidden_dim, args.num_classes) model = SimilarityTreeLSTM(vocab.size(), args.input_dim, args.mem_dim, similarity, args.sparse) criterion = nn.KLDivLoss() optimizer = optim.Adagrad(model.parameters(), lr=args.lr, weight_decay=args.wd) emb_file = os.path.join(args.data, 'dataset_embed.pth') if os.path.isfile(emb_file): emb = torch.load(emb_file) model.emb.weight.data.copy_(emb) checkpoint = torch.load(checkpoint_filename, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['model']) trainer = Trainer(args, model, criterion, optimizer) # Prepare the dataset json_data = [{ "id": "test", "question": question, "generated_queries": [{ "query": " .".join(query["where"]), "correct": False } for query in generated_queries] }] output_dir = "./output/tmp" preprocess_lcquad.save_split( output_dir, *preprocess_lcquad.split(json_data, self.parser)) lib_dir = './learning/treelstm/lib/' classpath = ':'.join([ lib_dir, os.path.join(lib_dir, 'stanford-parser/stanford-parser.jar'), os.path.join(lib_dir, 'stanford-parser/stanford-parser-3.5.1-models.jar') ]) preprocess_lcquad.parse(output_dir, cp=classpath) test_dataset = QGDataset(output_dir, vocab, args.num_classes) test_loss, test_pred = trainer.test(test_dataset) return test_pred
def rank(self, args, question, generated_queries): if len(generated_queries) == 0: return [] if 2 > 1: # try: # Load the model checkpoint_filename = '%s.pt' % os.path.join( args.save, args.expname) dataset_vocab_file = os.path.join(args.data, 'dataset.vocab') # metrics = Metrics(args.num_classes) vocab = Vocab(filename=dataset_vocab_file, data=[ Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD ]) similarity = DASimilarity(args.mem_dim, args.hidden_dim, args.num_classes) model = SimilarityTreeLSTM(vocab.size(), args.input_dim, args.mem_dim, similarity, args.sparse) criterion = nn.KLDivLoss() optimizer = optim.Adagrad(model.parameters(), lr=args.lr, weight_decay=args.wd) emb_file = os.path.join(args.data, 'dataset_embed.pth') if os.path.isfile(emb_file): emb = torch.load(emb_file) model.emb.weight.data.copy_(emb) checkpoint = torch.load(checkpoint_filename, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['model']) trainer = Trainer(args, model, criterion, optimizer) # Prepare the dataset json_data = [{ "id": "test", "question": question, "generated_queries": [{ "query": query["where"], "correct": False } for query in generated_queries] }] output_dir = "./output/tmp" preprocess_lcquad.save_split( output_dir, *preprocess_lcquad.split(json_data, self.parser)) lib_dir = './learning/treelstm/lib/' classpath = ':'.join([ lib_dir, os.path.join(lib_dir, 'stanford-parser/stanford-parser.jar'), os.path.join( lib_dir, 'stanford-parser/stanford-parser-3.5.1-models.jar') ]) if question in self.dep_tree_cache: preprocess_lcquad.parse(output_dir, cp=classpath, dep_parse=False) cache_item = self.dep_tree_cache[question] with open(os.path.join(output_dir, 'a.parents'), 'w') as f_parent, open( os.path.join(output_dir, 'a.toks'), 'w') as f_token: for i in range(len(generated_queries)): f_token.write(cache_item[0]) f_parent.write(cache_item[1]) else: preprocess_lcquad.parse(output_dir, cp=classpath) with open(os.path.join(output_dir, 'a.parents')) as f: parents = f.readline() with open(os.path.join(output_dir, 'a.toks')) as f: tokens = f.readline() self.dep_tree_cache[question] = [tokens, parents] with open(self.dep_tree_cache_file_path, 'w') as f: ujson.dump(self.dep_tree_cache, f) test_dataset = QGDataset(output_dir, vocab, args.num_classes) test_loss, test_pred = trainer.test(test_dataset) return test_pred
def rank(self, args, question, generated_queries): print('rank function:') print('args indim:', args.input_dim) print('question:', question) print('generated_queries', generated_queries) if len(generated_queries) == 0: return [] if 2 > 1: # try: # Load the model checkpoint_filename = '%s.pt' % os.path.join(args.save, args.expname) dataset_vocab_file = os.path.join(args.data, 'dataset.vocab') # metrics = Metrics(args.num_classes) vocab = Vocab(filename=dataset_vocab_file, data=[Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD]) print('criou vocab') similarity = DASimilarity(args.mem_dim, args.hidden_dim, args.num_classes) model = SimilarityTreeLSTM( vocab.size(), args.input_dim, args.mem_dim, similarity, args.sparse) print(model.emb) criterion = nn.KLDivLoss() optimizer = optim.Adagrad(model.parameters(), lr=args.lr, weight_decay=args.wd) print('criou rede') emb_file = os.path.join(args.data, 'dataset_embed.pth') if os.path.isfile(emb_file): emb = torch.load(emb_file) print(emb.shape) model.emb.weight.data.copy_(emb) print('carregou embedding') checkpoint = torch.load(checkpoint_filename, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['model']) print('carregou checkpoint') trainer = Trainer(args, model, criterion, optimizer) # Prepare the dataset # This part generalize the pairs of questions and queries replacing # entities by placeholders #ent. # Generates a.txt and b.txt json_data = [{"id": "test", "question": question, "generated_queries": [{"query": " .".join(query["where"]), "correct": False} for query in generated_queries]}] print('json data:', json_data) output_dir = "./output/tmp" preprocess_lcquad.save_split(output_dir, *preprocess_lcquad.split(json_data, self.parser)) print('save split') # This part parses both question and query generating toks, rels and # parents files. lib_dir = '/home/mateus/TCC/SQG/learning/treelstm/lib/' classpath = ':'.join([ lib_dir, os.path.join(lib_dir, 'stanford-parser/stanford-parser.jar'), os.path.join(lib_dir, 'stanford-parser/stanford-parser-3.5.1-models.jar') ]) if question in self.dep_tree_cache: preprocess_lcquad.parse(output_dir, cp=classpath, dep_parse=False) cache_item = self.dep_tree_cache[question] with open(os.path.join(output_dir, 'a.parents'), 'w') as f_parent, open( os.path.join(output_dir, 'a.toks'), 'w') as f_token: for i in range(len(generated_queries)): f_token.write(cache_item[0]) f_parent.write(cache_item[1]) else: print('dep_tree') print('classpath', classpath) preprocess_lcquad.parse(output_dir, cp=classpath) with open(os.path.join(output_dir, 'a.parents')) as f: parents = f.readline() with open(os.path.join(output_dir, 'a.toks')) as f: tokens = f.readline() self.dep_tree_cache[question] = [tokens, parents] with open(self.dep_tree_cache_file_path, 'w') as f: ujson.dump(self.dep_tree_cache, f) test_dataset = QGDataset(output_dir, vocab, args.num_classes) test_loss, test_pred = trainer.test(test_dataset) return test_pred