def _parse(sentences):
    ### 3.4.2 CoreNLP Parser
    from nltk.parse.corenlp import CoreNLPParser as CP
    parser = CP(url='http://localhost:9000')

    # example:
    # parse, = parser.raw_parse(
    #     'The quick brown fox jumps over the lazy dog.'
    # )
    # print(parse.pretty_print())

    parse = parser.raw_parse_sents(sentences)
    # print(sentences[5:6])

    parse_trees = []
    for itr_tree in parse:
        for tree in itr_tree:
            parse_trees.append(tree)
            # tree.pretty_print()
        # print()

    return parse_trees
Beispiel #2
0
    def __init__(self, sentence):
        with CoreNLPServer(port=9000) as server:
            en_parser = CoreNLPParser()
            # sg = StanfordTokenizer(path_to_jar='../stanford-parser-full-2018-02-27/stanford-parser.jar')
            self.trans = googletrans.Translator()

            self.sentence = sentence

            result1 = self.trans.translate(sentence).text
            print(result1)
            # en_sencence = result1.split(".")
            # print(en_sencence)
            # tree = list(en_parser.raw_parse(result1))
            iter = en_parser.raw_parse_sents([result1])
            tree = []
            while True:
                try:
                    sub_tree = list(next(iter))
                    tree.append(sub_tree)
                except StopIteration:
                    break
            print(len(tree))
            self.tree = tree[0][0]
            self.rel = []
class Cassim():
    """Cassim main class."""
    def __init__(self):
        """ """
        self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
        self.parser = CoreNLPParser(url='http://localhost:9000')

    def convert_mytree(self, nltktree, pnode):
        """ """
        global numnodes
        for node in nltktree:
            numnodes += 1
            if type(node) is nltk.ParentedTree:
                tempnode = Node(node.label())
                pnode.addkid(tempnode)
                self.convert_mytree(node, tempnode)
        return pnode

    def syntax_similarity_conversation(self, documents1):
        """Syntax similarity of each document with its before and after."""
        global numnodes
        documents1parsed = []

        # Detect sentences and parse them
        for d1 in tqdm(range(len(documents1))):
            tempsents = (self.sent_detector.tokenize(documents1[d1].strip()))
            for s in tempsents:
                if len(s.split()) > 70:
                    documents1parsed.append("NA")
                    break
            else:
                temp = list(self.parser.raw_parse_sents((tempsents)))
                for i in range(len(temp)):
                    temp[i] = list(temp[i])[0]
                    temp[i] = ParentedTree.convert(temp[i])
                documents1parsed.append(list(temp))

        results = []
        for d1 in range(len(documents1parsed) - 1):
            d2 = d1 + 1
            if documents1parsed[d1] == "NA" or documents1parsed[d2] == "NA":
                results.append(float('NaN'))
                continue

            costMatrix = []
            for i in range(len(documents1parsed[d1])):
                numnodes = 0
                tempnode = Node(documents1parsed[d1][i].root().label())
                sentencedoc1 = self.convert_mytree(documents1parsed[d1][i],
                                                   tempnode)
                temp_costMatrix = []
                sen1nodes = numnodes
                for j in range(len(documents1parsed[d2])):
                    numnodes = .0
                    tempnode = Node(documents1parsed[d2][j].root().label())
                    sentencedoc2 = self.convert_mytree(documents1parsed[d2][j],
                                                       tempnode)
                    ED = simple_distance(sentencedoc1, sentencedoc2)
                    ED /= (numnodes + sen1nodes)
                    temp_costMatrix.append(ED)
                costMatrix.append(temp_costMatrix)
            costMatrix = np.array(costMatrix)

            results.append(1 - np.mean(costMatrix))

        return np.array(results)