Esempio n. 1
0
from stanford import CoreNLP
from dependency_tree import build_tree
from dependency_similarity import *
from feature import *
from NeuralLearner import *
import torch
import torch.nn as nn
import numpy as np
import sys
import xgboost as xgb

# data pre-processing
if False:
    reader = CorpusReader('data/test-set.txt')

    train_data = reader.data()

    for _, item in train_data.items():
        item['token1'] = tokenized_sentence(item['Sentence1'])
        item['token2'] = tokenized_sentence(item['Sentence2'])

    corenlp = CoreNLP(sys.argv)
    corenlp.start_server()
    for k, item in train_data.items():
        print(k)
        item['d-tree1'] = corenlp.dependency_parse_tree(
            list_to_string(item['token1']))
        item['d-tree2'] = corenlp.dependency_parse_tree(
            list_to_string(item['token2']))
    corenlp.stop_server()