Exemple #1
0
 def transform(self, documents):
     logging.info("Starting documents parsing with StanfordCoreNLP...")
     for i, doc in enumerate(documents):
         processed = False
         if not 'article' in doc.ext.keys():
             processed = True
             doc.ext['article'] = []
             for sentence in doc.ext['sentences']:
                 doc.ext['article'].extend(
                     stanfordParse(sentence)['sentences'])
             # What I'd like to do, but damn slow:
             # stanfordOutput = stanfordParse(doc.text)
             # doc.ext['article'] = stanfordOutput['sentences']
             # doc.ext['coref'] = stanfordOutput['coref']
         if not 'models' in doc.ext.keys():
             processed = True
             doc.ext['models'] = []
             for model in doc.models:
                 doc.ext['models'].extend(
                     stanfordParse(model)['sentences'])
         if processed:
             logging.info("Processed document %i/%i" % (i + 1,
                                                        len(documents)))
         else:
             logging.info("Document %i/%i was already processed" %
                          (i + 1, len(documents)))
     return documents
Exemple #2
0
import nlpio,json

if __name__ == '__main__':
    print json.dumps(nlpio.stanfordParse('The world is so pretty.'),indent=4)
    print json.dumps(nlpio.stanfordParse('I like trains. They are nice and clean, almost as clean as their tracks.'),indent=4)
Exemple #3
0
 def transform(self,documents):
     for doc in documents:
         print 'Parsing: ' + doc.name
         doc.ext['coreNLP'] = nlpio.stanfordParse(doc.text)
     return documents