def ownAZannot(export_annots=False): """ Annotates each sentence using own classifier """ from minerva.az.az_cfc_classification import AZannotator annot=AZannotator("trained_az_classifier.pickle") papers=cp.Corpus.listPapers(max_results=sys.maxint) writer=AZPrimeWriter() writer.save_pos_tags=True ## papers=papers[:1] progress=ProgressIndicator(True, len(papers),False) print("Producing annotations for SciDocs...") for guid in papers: doc=cp.Corpus.loadSciDoc(guid) annot.annotateDoc(doc) if export_annots: output_filename=os.path.join(cp.Corpus.paths.output, doc.metadata["guid"]+".annot.txt") output_file=open(output_filename,"w") for sentence in doc.allsentences: output_file.write(sentence.get("az","")+"\n") output_file.close() else: cp.Corpus.saveSciDoc(doc) progress.showProgressReport("Annotating -- %s" % guid)
def testLabels(): """ """ guid="f7921eed-89bc-4f38-a794-7c9a5878a7ee" writer=AZPrimeWriter() writer.save_pos_tags=True doc=cp.Corpus.loadSciDoc(guid) writer.write(doc, os.path.join(cp.Corpus.paths.output, doc.metadata["guid"]+".pos.xml"))
def exportSciXML(): """ Exports all scidocs with the selected collection_id to AZPrime XML in the output dir of the corpus """ papers=cp.Corpus.listPapers(max_results=sys.maxint) writer=AZPrimeWriter() writer.save_pos_tags=True ## papers=papers[3894:] progress=ProgressIndicator(True, len(papers),False) print("Exporting SciXML files") for guid in papers: doc=cp.Corpus.loadSciDoc(guid) if len(doc.allsentences) < 1: continue writer.write(doc, os.path.join(cp.Corpus.paths.output, doc.metadata["guid"]+".pos.xml")) cp.Corpus.saveSciDoc(doc) progress.showProgressReport("Exporting -- %s" % guid)