Esempio n. 1
0
 def getReferences(self, json=False):
     references = self.soup.find_all('ref')
     referenceList = []
     for reference in references:
         extractedReference = Reference()
         try:
             extractedReference.id = reference.attrs['id']
         except AttributeError:
             print('id not present')
             continue
         try:
             authors = reference.find_all('string-name')
             extractedReference.authors[:] = []
             for author in authors:
                 surname = author.find('surname').get_text().encode("utf-8")
                 givenName = author.find('given-names').get_text().encode(
                     "utf-8")
                 authorObject = Author()
                 authorObject.surname = surname
                 authorObject.givenName = givenName
                 extractedReference.authors.append(authorObject)
         except AttributeError:
             print('author not present')
         try:
             extractedReference.year = reference.find(
                 'year').get_text().encode("utf-8")
         except AttributeError:
             print('year not present')
         try:
             extractedReference.articleTitle = reference.find_all(
                 'article-title')[-1].get_text().encode("utf-8")
             print(extractedReference.articleTitle)
         except AttributeError:
             print('article title not present')
         try:
             extractedReference.source = reference.find(
                 'source').get_text().encode("utf-8")
         except AttributeError:
             print('source not present')
         try:
             extractedReference.volume = reference.find(
                 'volume').get_text().encode("utf-8")
         except AttributeError:
             print('volume not present')
         try:
             extractedReference.fromPage = reference.find(
                 'fpage').get_text().encode("utf-8")
         except AttributeError:
             print('fromPage not present')
         try:
             extractedReference.toPage = reference.find(
                 'lpage').get_text().encode("utf-8")
         except AttributeError:
             print('toPage not present')
         referenceList.append(extractedReference)
     if json is True:
         referenceListJsons = []
         for reference in referenceList:
             referenceListJsons.append(self.convertToJson(reference))
         return self.convertToJson(referenceList)
     return referenceList