def parse_worker(q): """""" parser = DependencyTreeParser(model_path='StanfordLibrary/stanford-parser-full-%s/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz' % DATE) parser = MetricalTreeParser(parser) for filename in iter(q.get, 'STOP'): print 'Working on %s...' % filename sents = [] with codecs.open(filename, encoding='utf-8') as f: for line in f: sents.extend(pause_splitter(line)) df = parser.stats_raw_parse_sents(sents, arto=True) df.to_csv(codecs.open('%s.csv' % filename, 'w', encoding='utf-8'), index=False) print 'Finished with %s.' % filename return True
def parse_worker(q): """""" parser = DependencyTreeParser(model_path='Stanford Library/stanford-parser-full-%s/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz' % DATE) parser = MetricalTreeParser(parser) for filename in iter(q.get, 'STOP'): print('Working on %s...' % filename) sents = [] with codecs.open(filename, encoding='utf-8') as f: for line in f: sents.extend(pause_splitter(line)) df = parser.stats_raw_parse_sents(sents, arto=True) df.to_csv(codecs.open('%s.csv' % filename, 'w', encoding='utf-8'), index=False) print('Finished with %s.' % filename) return True
def __init__(self, deptreeParser=None): """""" if deptreeParser is None: sys.stderr.write('No deptreeParser provided, defaulting to PCFG\n') deptreeParser = 'PCFG' if isinstance(deptreeParser, compat.string_types): deptreeParser = DependencyTreeParser(model_path='stanford-parser-full-%s/edu/stanford/nlp/models/lexparser/english%s.ser.gz' % (DATE, deptreeParser)) elif not isinstance(deptreeParser, DependencyTreeParser): raise ValueError('Provided an invalid dependency tree parser') self.deptreeParser = deptreeParser
def return_parser(dir_root): model_path = 'Stanford Library/stanford-parser-full-%s/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz' % DATE fnfn = os.path.join(dir_root, model_path) parser = DependencyTreeParser(model_path=fnfn) parser = MetricalTreeParser(parser) return parser