forked from andrecunha/idd3
/
run.py
121 lines (92 loc) · 3.62 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# -*- coding: utf-8 -*-
# IDD3 - Propositional Idea Density from Dependency Trees
# Copyright (C) 2014-2015 Andre Luiz Verucci da Cunha
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function, unicode_literals, division
import idd3
from idd3 import Relation, Engine
from idd3.rules import en, pt
import nltk
from sys import argv
from collections import defaultdict
from idd3.parsers import StanfordUnivDepParser, StanfordParser
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
import os
_, columns = os.popen('stty size', 'r').read().split()
try:
from termcolor import colored
except ImportError:
def colored(string, color, attrs):
return string
# Stanford NN dependency parser, trained using the normalized Penn Treebank.
# Change these variables to the path on your system
corenlp_path = os.path.expanduser('~') + \
"/Develop/stanford_tools/corenlp"
model_path = 'data/nndep.model.txt.gz'
pos_mapping_file = 'data/ENGLISH-fine-to-universal.full.map'
# Traditional Stanford Parser with normalized output
# Change this variable to the path on your system
stanford_path = os.path.expanduser('~') + \
"/Develop/stanford_tools/stanford-parser"
def get_sentence(graph):
"""Turns a graph into a list of words.
"""
return ' '.join([node['word']
for node in graph.nodes.values() if node['word']])
def process_graphs(graphs):
engine = Engine(idd3.all_rulesets, idd3.all_transformations)
stats = defaultdict(int)
for index in range(len(graphs)):
print('-' * int(columns))
relations = []
for relation in graphs[index].nodes.values():
relations.append(Relation(**relation))
print(colored('Sentence %d:' % (index + 1), 'white', attrs=['bold']))
print('\t' + get_sentence(graphs[index]))
print(colored('Propositions:', 'white', attrs=['bold']))
try:
engine.analyze(relations)
for i, prop in enumerate(engine.props):
print(str(i + 1) + ' ' + str(prop))
stats[prop.kind] += 1
except Exception as e:
logger.error('{0} in engine.analyze: {1}'.format(
e.__class__.__name__, e))
print('-' * int(columns))
return stats
def print_stats(stats):
print('Stats:')
print('Kind\t#\t')
for kind, n in stats.items():
print('{0}\t{1}'.format(kind, n))
def main():
idd3.use_language(pt)
if len(argv) < 2:
print('Usage: python', argv[0], '<input file>')
return
if argv[1].endswith('.conll'):
graphs = nltk.parse.dependencygraph.DependencyGraph.load(argv[1])
else:
parser = StanfordUnivDepParser(corenlp_path, model_path,
pos_mapping_file)
# Uncomment for normalized Stanford Parser.
# parser = StanfordParser(stanford_path, pos_mapping_file)
graphs = parser.parse_raw_file(argv[1])
stats = process_graphs(graphs)
print_stats(stats)
if __name__ == '__main__':
main()