Esempio n. 1
0
    def loadXML(self, filename, sentenceFilter, loadRegistries):
        """ read an xml file containing an abstract """
        xmldoc = xml.dom.minidom.parse(filename)
        absNodes = xmldoc.getElementsByTagName('abstract')
        xmlutil.normalizeXMLTree(absNodes[0])

        self.id = absNodes[0].getAttribute('id')

        # read journal, author, publication info
        nodes = absNodes[0].getElementsByTagName('PublicationInformation')
        if len(nodes) > 0:
            self.publicationInformation = publicationinfo.PublicationInfo(nodes[0])

        # read title
        nodes = absNodes[0].getElementsByTagName('title')
        if len(nodes) > 0:
            self.titleSentences = xmlutil.parseSentences(nodes[0], self)

        # read affiliation
        nodes = absNodes[0].getElementsByTagName('affiliation')
        if len(nodes) > 0:
            self.affiliationSentences = xmlutil.parseSentences(nodes[0], self)

        # read abstract body text
        nodes = absNodes[0].getElementsByTagName('body')
        if len(nodes) > 0:
            self.__allSentences = xmlutil.parseSentences(nodes[0], self)
            for s in self.__allSentences:
                if sentenceFilter(s) == True:
                    self.sentences.append(s)

                    # read reports
        # remove links within the xml doc so GC can reclaim mem faster
        xmldoc.unlink()
        self.__buildAcronymTable()
Esempio n. 2
0
 def getXML(self, doc):
     """
       Create an XML element with publication information
     """
     node = doc.createElement('PublicationInformation')
     if self._journalNode is not None:
         node.appendChild(self._journalNode)
     node.appendChild(xmlutil.createNodeWithTextChild(doc, 'Country', self._country))
     if self._authorListNode is not None:
         node.appendChild(self._authorListNode)
     if self._publicationTypeListNode is not None:
         node.appendChild(self._publicationTypeListNode)
     xmlutil.normalizeXMLTree(node)
     return node
Esempio n. 3
0
    def loadXML(self, filename, sentenceFilter, loadRegistries):
        """ read an xml file containing an abstract """
        xmldoc = xml.dom.minidom.parse(filename)
        absNodes = xmldoc.getElementsByTagName('abstract')
        xmlutil.normalizeXMLTree(absNodes[0])

        self.id = absNodes[0].getAttribute('id')

        # read journal, author, publication info
        nodes = absNodes[0].getElementsByTagName('PublicationInformation')
        if len(nodes) > 0:
            self.publicationInformation = publicationinfo.PublicationInfo(
                nodes[0])

        # read title
        nodes = absNodes[0].getElementsByTagName('title')
        if len(nodes) > 0:
            self.titleSentences = xmlutil.parseSentences(nodes[0], self)

        # read affiliation
        nodes = absNodes[0].getElementsByTagName('affiliation')
        if len(nodes) > 0:
            self.affiliationSentences = xmlutil.parseSentences(nodes[0], self)

        # read abstract body text
        nodes = absNodes[0].getElementsByTagName('body')
        if len(nodes) > 0:
            self.__allSentences = xmlutil.parseSentences(nodes[0], self)
            for s in self.__allSentences:
                if sentenceFilter(s) == True:
                    self.sentences.append(s)

                    # read reports
        # remove links within the xml doc so GC can reclaim mem faster
        xmldoc.unlink()
        self.__buildAcronymTable()