Python SciDoc.addSection Exemples

Langage de programmation: Python

Espace de nommage/Pack: minerva.scidoc.scidoc

Class/Type: SciDoc

Méthode/Fonction: addSection

Exemples au hotexamples.com: 2

Python SciDoc.addSection - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de minerva.scidoc.scidoc.SciDoc.addSection extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

addParagraph(2)

addSection(2)

updateAuthorsAffiliations(2)

updateReferences(2)

abstract(1)

addCitation(1)

addExistingReference(1)

addSentence(1)

loadExistingMetadata(1)

Méthodes fréquemment utilisées

addParagraph (2)

addSection (2)

updateAuthorsAffiliations (2)

updateReferences (2)

abstract (1)

addCitation (1)

addExistingReference (1)

addSentence (1)

loadExistingMetadata (1)

Exemple #1

0

Afficher le fichier

Fichier : athar_corpus.py Projet : danieldmm/minerva

def wrapInSciDoc(self, contexts, doc_from_id, doc_to_id): """ Returns a SciDoc ready to be passed to the standard context_extract functions, where each context is a paragraph Args: contexts: list of context dicts doc_from_id: corpus_id of this SciDoc doc_to_id: corpus_id of target document (citation) Returns: SciDoc """ newDocument=SciDoc() metadata=cp.Corpus.getMetadataByField("metadata.corpus_id",doc_from_id) if metadata: newDocument.loadExistingMetadata(metadata) assert newDocument.metadata["guid"] != "" else: newDocument.metadata["guid"]=doc_from_id assert newDocument.metadata["guid"] != "" newDocument.metadata["corpus_id"]=doc_from_id newSection_id=newDocument.addSection("root", "", 0) metadata=cp.Corpus.getMetadataByField("metadata.corpus_id",doc_to_id) if not metadata: raise ValueError("Target document %s is not in corpus!" % doc_to_id) return ref=newDocument.addExistingReference(metadata) ref["corpus_id"]=doc_to_id for context in contexts: newPar_id=newDocument.addParagraph(newSection_id) for line in context["lines"]: newSent_id=newDocument.addSentence(newPar_id) text=line["text"] citations=[] if re.search(CIT_MARKER,text): newCit=newDocument.addCitation(newSent_id, ref["id"]) text=re.sub(CIT_MARKER, CITATION_FORM % newCit["id"], text) citations.append(newCit["id"]) sent=newDocument.element_by_id[newSent_id] sent["sentiment"]=line["sentiment"] sent["text"]=text if len(citations) > 0: sent["citations"]=citations return newDocument

Exemple #2

0

Afficher le fichier

Fichier : azscixml.py Projet : danieldmm/minerva

def loadAZSciXML(filename): """ Load a Cambridge-style SciXML """ # main loadSciXML text=loadFileText(filename) soup=BeautifulStoneSoup(text) fileno=soup.find("docno") fileno=fileno.text if fileno else "" # Create a new SciDoc to store the paper newDocument=SciDoc() newDocument["metadata"]["filename"]=os.path.basename(filename) newDocument["metadata"]["filepath"]=filename paper=soup.find("paper") if not paper: debugAddMessage(newDocument,"error","NO <PAPER> IN THIS PAPER! file: "+filename) return newDocument # Load metadata, either from corpus or from file ## key=cp.Corpus.getFileUID(newDocument["metadata"]["filename"]) ## if cp.Corpus.metadata_index.has_key(key): ## metadata=cp.Corpus.metadata_index[key] ## else: metadata=None if metadata: newDocument["metadata"]["conference"]="" for field in metadata: newDocument["metadata"][field]=metadata[field] else: loadMetadata(newDocument, paper, fileno, soup) ## debugAddMessage(newDocument,"error","PAPER NOT IN METADATA FILE! file: "+filename) newDocument["metadata"]["guid"]=cp.Corpus.generateGUID(newDocument["metadata"]) # Clean up potential weird text in XML metadata ## makeSureValuesAreReadable(newDocument) # remove if not dealing with crap conversion stuff # Load all references (at the end of the document) from the XML for ref in soup.findAll("reference"): processReferenceXML(ref, newDocument) # Load Abstract abstract=soup.find("abstract") if not abstract: debugAddMessage(newDocument,"error","CANNOT LOAD ABSTRACT! file: "+ filename+"\n") # TODO: LOAD first paragraph as abstract else: newSection_id=newDocument.addSection("root","Abstract") newPar_id=newDocument.addParagraph(newSection_id) for s in abstract.findChildren("a-s"): addNewSentenceAndProcessRefs(s, newDocument, newPar_id, newSection_id) # deals with all of the adding of a sentence newDocument.abstract=newDocument.element_by_id[newSection_id] for div in soup.findAll("div"): loadStructureProcessDiv(div, newDocument) # try to match each citation with its reference matchCitationsWithReferences(newDocument) # "in press", "forthcoming", "submitted", "to appear" = dates to fix & match # No functiona por: unicode ## for ref in newDocument["references"]: ## k=ref.get("AZ",["NO AZ"]) ## print k, most_common(k) return newDocument