def extract_paper_info(self): # DOI doi = self.soup.teiheader.find("idno", type="DOI") if doi: self.paper.doi = elem_to_text(doi) elif self.document: self.paper.doi = self.document['doi'] # Title title = self.soup.teiheader.find("title") if title: self.paper.title = elem_to_text(title) # Authors authors = self.get_authors(self.soup.analytic.find_all('author')) if authors: self.paper.authors = authors if self.soup.abstract: self.paper.abstract = elem_to_text(self.soup.abstract) # Year published = self.soup.analytic.find("publicationstmt") if published: self.paper.year = elem_to_text(published.find("date", type="when")) # Organization / Affiliations affiliations = self.soup.analytic.find_all('affiliation') for affiliation in affiliations: org = Organization() org.type = "institution" org.name = elem_to_text(affiliation.find("orgname", type="institution")) address = Address() addr = affiliation.find("address") if addr: address.place = elem_to_text(addr.find("settlement")) address.region = elem_to_text(addr.find("region")) address.country = elem_to_text(addr.find("country")) org.address = address self.paper.affiliations.append(org) # University Ranking if self.paper.affiliations: if self.paper.affiliations[0] != '': self.paper.uni_rank = self.uni_rank.get_rank(self.paper.affiliations[0].name) elif len(self.paper.affiliations) > 1: self.paper.uni_rank = self.uni_rank.get_rank(self.paper.affiliations[1].name) else: self.paper.uni_rank = self.uni_rank.get_rank('Random') # Citations bibliography = self.soup.listbibl.find_all('biblstruct') for bibl in bibliography: citation = Citation() cited_paper = bibl.analytic if cited_paper: citation.title = elem_to_text(cited_paper.find("title", type="main")) citation_authors = self.get_authors(cited_paper.find_all("author")) citation.doi = elem_to_text(cited_paper.find("idno", type="DOI")) if citation_authors: citation.authors = citation_authors cited_journal = bibl.monogr if cited_journal: citation.source = elem_to_text(cited_journal.find("title")) try: citation.publish_year = cited_journal.imprint.date['when'] except TypeError: pass self.paper.citations.append(citation) # NER - Ack pairs - Funding status self.paper.ack_pairs = self.get_funding_status() er_list = [org for (entity, org) in self.paper.ack_pairs] if 'ORG' in er_list: self.paper.funded = 1 else: self.paper.funded = 0 # SJR api_resp = self.get_sjr(self.paper.doi, self.paper.title,self.db) # Adding the paragraphs from the paper to the corpus extractor = ClaimEvidenceExtractor(self.xml, self.soup,self.test_csv) os.chdir("/scifact/") extractor.make_corpus() # Get response for claim evidence using request to API response = requests.get('http://0.0.0.0:8000/getclaimevidence') print(response) self.support, self.refute, self.ratio = extractor.get_results() print('support:',self.support,'refute:',self.refute,'ratio:',self.ratio) os.chdir("../") if api_resp: self.paper.cited_by_count = api_resp["num_citations"] self.paper.sjr = api_resp["sjr"] self.paper.subject = api_resp["subject"] self.paper.subject_code = api_resp["subject_code"] self.paper.normalized = api_resp["normalized_citations"] self.paper.velocity = api_resp["citationVelocity"] self.paper.influentialcitations = api_resp["influentialCitationCount"] self.paper.references = api_resp["references_count"] self.paper.flag = api_resp["openaccessflag"] self.paper.influentialref = api_resp["influentialReferencesCount"] self.paper.ref_background = api_resp["reference_background"] self.paper.ref_result = api_resp["reference_result"] self.paper.ref_method = api_resp["reference_methodology"] self.paper.cite_background = api_resp["citations_background"] self.paper.cite_result = api_resp["citations_result"] self.paper.cite_method = api_resp["citations_methodology"] self.paper.cite_next = api_resp["citations_next"] self.paper.influential_references_methodology = api_resp["upstream_influential_methodology_count"] self.paper.issn = api_resp["ISSN"] self.paper.auth = api_resp["authors"] self.paper.age = api_resp["age"] if api_resp["abstract"]: self.paper.abstract = api_resp["abstract"] # Set self-citations self.paper.self_citations = self.paper.set_self_citations() # return paper #calculate coCitations t2,t3 = coCite(self.paper.doi, self.db) #calculate NLP features reading_score = self.get_reading_score(self.paper.abstract) subjectivity = self.get_subjectivity(self.paper.abstract) sentiment = self.get_sentiment(self.paper.abstract) return {"doi": self.paper.doi, "title": self.paper.title, "num_citations": self.paper.cited_by_count, "author_count": len(self.paper.authors),"sjr": self.paper.sjr, "u_rank": self.paper.uni_rank, "funded": self.paper.funded,"self_citations": self.paper.self_citations, "subject": self.paper.subject, "subject_code": self.paper.subject_code, "citationVelocity": self.paper.velocity, "influentialCitationCount": self.paper.influentialcitations, "references_count": self.paper.references, "openaccessflag": self.paper.flag, "influentialReferencesCount": self.paper.influentialref, "normalized_citations": self.paper.normalized, "reference_background": self.paper.ref_background, "reference_result": self.paper.ref_result, "reference_methodology": self.paper.ref_method, "citations_background": self.paper.cite_background, "citations_result": self.paper.cite_result, "citations_methodology": self.paper.cite_method, "citations_next": self.paper.cite_next, "upstream_influential_methodology_count": self.paper.influential_references_methodology, "coCite2":t2, "coCite3":t3, "ISSN":self.paper.issn, "authors":self.paper.auth,"citations":api_resp["citations"],"age":self.paper.age, "reading_score":reading_score, "subjectivity":subjectivity, "sentiment":sentiment, "supporting_sentences":self.support, "refuting_sentences":self.refute, "ratio_support":self.ratio}
def extract_paper_info(self): # DOI doi = self.soup.teiheader.find("idno", type="DOI") if doi: self.paper.doi = elem_to_text(doi) elif self.document: self.paper.doi = self.document['doi'] # Title title = self.soup.teiheader.find("title") if title: self.paper.title = elem_to_text(title) # Authors authors = self.get_authors(self.soup.analytic.find_all('author')) if authors: self.paper.authors = authors # Year published = self.soup.analytic.find("publicationstmt") if published: self.paper.year = elem_to_text(published.find("date", type="when")) # Organization / Affiliations affiliations = self.soup.analytic.find_all('affiliation') for affiliation in affiliations: org = Organization() org.type = "institution" org.name = elem_to_text( affiliation.find("orgname", type="institution")) address = Address() addr = affiliation.find("address") if addr: address.place = elem_to_text(addr.find("settlement")) address.region = elem_to_text(addr.find("region")) address.country = elem_to_text(addr.find("country")) org.address = address self.paper.affiliations.append(org) # University Ranking if self.paper.affiliations: if self.paper.affiliations[0] != '': self.paper.uni_rank = self.uni_rank.get_rank( self.paper.affiliations[0].name) elif len(self.paper.affiliations) > 1: self.paper.uni_rank = self.uni_rank.get_rank( self.paper.affiliations[1].name) else: self.paper.uni_rank = self.uni_rank.get_rank('Random') # Citations bibliography = self.soup.listbibl.find_all('biblstruct') for bibl in bibliography: citation = Citation() cited_paper = bibl.analytic if cited_paper: citation.title = elem_to_text( cited_paper.find("title", type="main")) citation_authors = self.get_authors( cited_paper.find_all("author")) citation.doi = elem_to_text( cited_paper.find("idno", type="DOI")) if citation_authors: citation.authors = citation_authors cited_journal = bibl.monogr if cited_journal: citation.source = elem_to_text(cited_journal.find("title")) try: citation.publish_year = cited_journal.imprint.date['when'] except TypeError: pass self.paper.citations.append(citation) # NER - Ack pairs - Funding status self.paper.ack_pairs = self.get_funding_status() er_list = [org for (entity, org) in self.paper.ack_pairs] if 'ORG' in er_list: self.paper.funded = 1 else: self.paper.funded = 0 # SJR api_resp = self.get_sjr(self.paper.doi, self.paper.title) if api_resp: self.paper.cited_by_count = api_resp["num_citations"] self.paper.sjr = api_resp["sjr"] self.paper.subject = api_resp["subject"] self.paper.subject_code = api_resp["subject_code"] self.paper.normalized = api_resp["normalized_citations"] self.paper.velocity = api_resp["citationVelocity"] self.paper.influentialcitations = api_resp[ "influentialCitationCount"] self.paper.references = api_resp["references_count"] self.paper.flag = api_resp["openaccessflag"] self.paper.influentialref = api_resp["influentialReferencesCount"] self.paper.ref_background = api_resp["reference_background"] self.paper.ref_result = api_resp["reference_result"] self.paper.ref_method = api_resp["reference_methodology"] self.paper.cite_background = api_resp["citations_background"] self.paper.cite_result = api_resp["citations_result"] self.paper.cite_method = api_resp["citations_methodology"] self.paper.cite_next = api_resp["citations_next"] # Set self-citations self.paper.self_citations = self.paper.set_self_citations() # Set influential_methodology_references self.paper.influential_references_methodology = self.set_influential_references_methodology( ) # return paper t2, t3 = coCite(self.paper.doi) return { "doi": self.paper.doi, "title": self.paper.title, "num_citations": self.paper.cited_by_count, "author_count": len(self.paper.authors), "sjr": self.paper.sjr, "u_rank": self.paper.uni_rank, "funded": self.paper.funded, "self_citations": self.paper.self_citations, "subject": self.paper.subject, "subject_code": self.paper.subject_code, "citationVelocity": self.paper.velocity, "influentialCitationCount": self.paper.influentialcitations, "references_count": self.paper.references, "openaccessflag": self.paper.flag, "influentialReferencesCount": self.paper.influentialref, "normalized_citations": self.paper.normalized, "reference_background": self.paper.ref_background, "reference_result": self.paper.ref_result, "reference_methodology": self.paper.ref_method, "citations_background": self.paper.cite_background, "citations_result": self.paper.cite_result, "citations_methodology": self.paper.cite_method, "citations_next": self.paper.cite_next, "upstream_influential_methodology_count": self.paper.influential_references_methodology, "coCite2": t2, "coCite3": t3 }