def _extract_biblio_efetch(self, page, id=None): if "ArticleDate" in page: dict_of_keylists = {"year": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleDate", "Year"], "month": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleDate", "Month"], "day": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleDate", "Day"], "title": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleTitle"], "abstract": ["PubmedArticleSet", "MedlineCitation", "Article", "Abstract", "AbstractText"], "issn": ["PubmedArticleSet", "MedlineCitation", "Article", "Journal", "ISSN"], "journal": ["PubmedArticleSet", "MedlineCitation", "Article", "Journal", "Title"], } else: dict_of_keylists = {"year": ["PubmedArticleSet", "MedlineCitation", "Article", "PubDate", "Year"], "month": ["PubmedArticleSet", "MedlineCitation", "Article", "PubDate", "Month"], "day": ["PubmedArticleSet", "MedlineCitation", "Article", "PubDate", "Day"], "title": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleTitle"], "abstract": ["PubmedArticleSet", "MedlineCitation", "Article", "Abstract", "AbstractText"], "issn": ["PubmedArticleSet", "MedlineCitation", "Article", "Journal", "ISSN"], "journal": ["PubmedArticleSet", "MedlineCitation", "Article", "Journal", "Title"], } biblio_dict = provider._extract_from_xml(page, dict_of_keylists) dom_authors = provider._find_all_in_xml(page, "LastName") try: biblio_dict["authors"] = ", ".join([author.firstChild.data for author in dom_authors]) except (AttributeError, TypeError): pass mesh_list = provider._find_all_in_xml(page, "DescriptorName") try: if mesh_list: biblio_dict["keywords"] = "; ".join([mesh_term.firstChild.data for mesh_term in mesh_list]) except (AttributeError, TypeError): pass try: biblio_dict["issn"] = biblio_dict["issn"].replace("-", "") except (AttributeError, KeyError): pass try: datetime_published = datetime.datetime(year=biblio_dict["year"], month=biblio_dict["month"], day=biblio_dict["day"]) biblio_dict["date"] = datetime_published.isoformat() biblio_dict["year"] = re.sub("\D", "", str(biblio_dict["year"])) del biblio_dict["month"] del biblio_dict["day"] except (AttributeError, TypeError, KeyError): logger.debug(u"%20s don't have full date information %s" % (self.provider_name, id)) pass try: biblio_dict["year"] = str(biblio_dict["year"]) except (KeyError): pass return biblio_dict
def _extract_biblio(self, page, id=None): dict_of_keylists = {"year": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleDate", "Year"], "month": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleDate", "Month"], "day": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleDate", "Day"], "title": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleTitle"], "journal": ["PubmedArticleSet", "MedlineCitation", "Article", "Journal", "Title"], } biblio_dict = provider._extract_from_xml(page, dict_of_keylists) dom_authors = provider._find_all_in_xml(page, "LastName") try: biblio_dict["authors"] = ", ".join([author.firstChild.data for author in dom_authors]) except (AttributeError, TypeError): pass try: datetime_published = datetime.datetime(year=biblio_dict["year"], month=biblio_dict["month"], day=biblio_dict["day"]) biblio_dict["date"] = datetime_published.isoformat() del biblio_dict["month"] del biblio_dict["day"] except (AttributeError, TypeError, KeyError): logger.debug("%20s don't have full date information %s" % (self.provider_name, id)) pass return biblio_dict
def _extract_biblio(self, page, id=None): dict_of_keylists = { 'title': ['entry', 'title'], 'date': ['entry', 'published'], } biblio_dict = provider._extract_from_xml(page, dict_of_keylists) dom_authors = provider._find_all_in_xml(page, "name") try: authors = [author.firstChild.data for author in dom_authors] biblio_dict["authors"] = ", ".join( [author.split(" ")[-1] for author in authors]) except (AttributeError, TypeError): pass try: biblio_dict["year"] = biblio_dict["date"][0:4] except KeyError: pass biblio_dict["repository"] = "arXiv" biblio_dict["free_fulltext_url"] = self._get_templated_url( self.aliases_url_template, id, "aliases") return biblio_dict
def _extract_biblio(self, page, id=None): dict_of_keylists = {"year": ["PubmedArticleSet", "MedlineCitation", "Article", "Journal", "PubDate", "Year"], "title": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleTitle"], "journal": ["PubmedArticleSet", "MedlineCitation", "Article", "Journal", "Title"], } biblio_dict = provider._extract_from_xml(page, dict_of_keylists) dom_authors = provider._find_all_in_xml(page, "LastName") try: biblio_dict["authors"] = ", ".join([author.firstChild.data for author in dom_authors]) except (AttributeError, TypeError): pass return biblio_dict
def _extract_biblio(self, page, id=None): dict_of_keylists = { 'title' : ['entry', 'title'], 'date' : ['entry', 'published'], } biblio_dict = provider._extract_from_xml(page, dict_of_keylists) dom_authors = provider._find_all_in_xml(page, "name") try: authors = [author.firstChild.data for author in dom_authors] biblio_dict["authors"] = ", ".join([author.split(" ")[-1] for author in authors]) except (AttributeError, TypeError): pass try: biblio_dict["year"] = biblio_dict["date"][0:4] except KeyError: pass biblio_dict["repository"] = "arXiv" biblio_dict["free_fulltext_url"] = self._get_templated_url(self.aliases_url_template, id, "aliases") return biblio_dict