import nlpio if __name__ == '__main__': #print nlpio.parseDocFile('data/duc2004/docs/d30001t/APW19981016.0240') documents = nlpio.loadDocumentsFromFile('testset.txt') predictions = [doc.peers[0] for doc in documents] #using the given predictions print nlpio.evaluateRouge(documents,predictions) predictions = ['lorem ipsum dolor' for doc in documents] #using own predictions print nlpio.evaluateRouge(documents,predictions)
#TODO: make better def __init__(self): pass def fit(self,documents,y=None): return self def transform(self,documents): for doc in documents: print 'Parsing: ' + doc.name doc.ext['coreNLP'] = nlpio.stanfordParse(doc.text) return documents if __name__ == '__main__': docs = nlpio.loadDocumentsFromFile( 'testset.txt', # 'testset_all.txt', 'data/eval/models/1/', 'data/eval/peers/1/') print 'Do Cleanup...' textCleaner = SimpleTextCleaner() textCleaner.transform(docs) print 'Writing file and start coreNLP processing...' # Writing out the individual files. for doc in docs: write_file('tmp/' + doc.name, doc.text) # Writing the filelist write_file('tmp/filelist.txt', '\n'.join( ['tmp/' + doc.name for doc in docs]))