from stanford import CoreNLP from dependency_tree import build_tree from dependency_similarity import * from feature import * from NeuralLearner import * import torch import torch.nn as nn import numpy as np import sys import xgboost as xgb # data pre-processing if False: reader = CorpusReader('data/test-set.txt') train_data = reader.data() for _, item in train_data.items(): item['token1'] = tokenized_sentence(item['Sentence1']) item['token2'] = tokenized_sentence(item['Sentence2']) corenlp = CoreNLP(sys.argv) corenlp.start_server() for k, item in train_data.items(): print(k) item['d-tree1'] = corenlp.dependency_parse_tree( list_to_string(item['token1'])) item['d-tree2'] = corenlp.dependency_parse_tree( list_to_string(item['token2'])) corenlp.stop_server()