def getReferences(self, json=False): references = self.soup.find_all('ref') referenceList = [] for reference in references: extractedReference = Reference() try: extractedReference.id = reference.attrs['id'] except AttributeError: print('id not present') continue try: authors = reference.find_all('string-name') extractedReference.authors[:] = [] for author in authors: surname = author.find('surname').get_text().encode("utf-8") givenName = author.find('given-names').get_text().encode( "utf-8") authorObject = Author() authorObject.surname = surname authorObject.givenName = givenName extractedReference.authors.append(authorObject) except AttributeError: print('author not present') try: extractedReference.year = reference.find( 'year').get_text().encode("utf-8") except AttributeError: print('year not present') try: extractedReference.articleTitle = reference.find_all( 'article-title')[-1].get_text().encode("utf-8") print(extractedReference.articleTitle) except AttributeError: print('article title not present') try: extractedReference.source = reference.find( 'source').get_text().encode("utf-8") except AttributeError: print('source not present') try: extractedReference.volume = reference.find( 'volume').get_text().encode("utf-8") except AttributeError: print('volume not present') try: extractedReference.fromPage = reference.find( 'fpage').get_text().encode("utf-8") except AttributeError: print('fromPage not present') try: extractedReference.toPage = reference.find( 'lpage').get_text().encode("utf-8") except AttributeError: print('toPage not present') referenceList.append(extractedReference) if json is True: referenceListJsons = [] for reference in referenceList: referenceListJsons.append(self.convertToJson(reference)) return self.convertToJson(referenceList) return referenceList