Example #1
0
    def _extract_biblio_efetch(self, page, id=None):
        if "ArticleDate" in page:
            dict_of_keylists = {"year": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleDate", "Year"], 
                                "month": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleDate", "Month"],
                                "day": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleDate", "Day"],
                                "title": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleTitle"],
                                "abstract": ["PubmedArticleSet", "MedlineCitation", "Article", "Abstract", "AbstractText"],
                                "issn": ["PubmedArticleSet", "MedlineCitation", "Article", "Journal", "ISSN"],
                                "journal": ["PubmedArticleSet", "MedlineCitation", "Article", "Journal", "Title"],
                                }
        else:
            dict_of_keylists = {"year": ["PubmedArticleSet", "MedlineCitation", "Article", "PubDate", "Year"], 
                                "month": ["PubmedArticleSet", "MedlineCitation", "Article", "PubDate", "Month"],
                                "day": ["PubmedArticleSet", "MedlineCitation", "Article", "PubDate", "Day"],
                                "title": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleTitle"],
                                "abstract": ["PubmedArticleSet", "MedlineCitation", "Article", "Abstract", "AbstractText"],
                                "issn": ["PubmedArticleSet", "MedlineCitation", "Article", "Journal", "ISSN"],
                                "journal": ["PubmedArticleSet", "MedlineCitation", "Article", "Journal", "Title"],
                                }            
        biblio_dict = provider._extract_from_xml(page, dict_of_keylists)
        dom_authors = provider._find_all_in_xml(page, "LastName")
        try:
            biblio_dict["authors"] = ", ".join([author.firstChild.data for author in dom_authors])
        except (AttributeError, TypeError):
            pass

        mesh_list = provider._find_all_in_xml(page, "DescriptorName")
        try:
            if mesh_list:
                biblio_dict["keywords"] = "; ".join([mesh_term.firstChild.data for mesh_term in mesh_list])
        except (AttributeError, TypeError):
            pass

        try:
            biblio_dict["issn"] = biblio_dict["issn"].replace("-", "")
        except (AttributeError, KeyError):
            pass

        try:
            datetime_published = datetime.datetime(year=biblio_dict["year"], 
                                                    month=biblio_dict["month"], 
                                                    day=biblio_dict["day"])
            biblio_dict["date"] = datetime_published.isoformat()
            biblio_dict["year"] = re.sub("\D", "", str(biblio_dict["year"]))
            del biblio_dict["month"]
            del biblio_dict["day"]
        except (AttributeError, TypeError, KeyError):
            logger.debug(u"%20s don't have full date information %s" % (self.provider_name, id))
            pass

        try:
            biblio_dict["year"] = str(biblio_dict["year"])
        except (KeyError):
            pass

        return biblio_dict  
Example #2
0
    def _extract_biblio(self, page, id=None):
        dict_of_keylists = {"year": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleDate", "Year"], 
                            "month": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleDate", "Month"],
                            "day": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleDate", "Day"],
                            "title": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleTitle"],
                            "journal": ["PubmedArticleSet", "MedlineCitation", "Article", "Journal", "Title"],
                            }
        biblio_dict = provider._extract_from_xml(page, dict_of_keylists)
        dom_authors = provider._find_all_in_xml(page, "LastName")
        try:
            biblio_dict["authors"] = ", ".join([author.firstChild.data for author in dom_authors])
        except (AttributeError, TypeError):
            pass

        try:
            datetime_published = datetime.datetime(year=biblio_dict["year"], 
                                                    month=biblio_dict["month"], 
                                                    day=biblio_dict["day"])
            biblio_dict["date"] = datetime_published.isoformat()
            del biblio_dict["month"]
            del biblio_dict["day"]
        except (AttributeError, TypeError, KeyError):
            logger.debug("%20s don't have full date information %s" % (self.provider_name, id))
            pass
        return biblio_dict  
Example #3
0
    def _extract_biblio(self, page, id=None):
        dict_of_keylists = {
            'title': ['entry', 'title'],
            'date': ['entry', 'published'],
        }
        biblio_dict = provider._extract_from_xml(page, dict_of_keylists)
        dom_authors = provider._find_all_in_xml(page, "name")

        try:
            authors = [author.firstChild.data for author in dom_authors]
            biblio_dict["authors"] = ", ".join(
                [author.split(" ")[-1] for author in authors])
        except (AttributeError, TypeError):
            pass

        try:
            biblio_dict["year"] = biblio_dict["date"][0:4]
        except KeyError:
            pass

        biblio_dict["repository"] = "arXiv"
        biblio_dict["free_fulltext_url"] = self._get_templated_url(
            self.aliases_url_template, id, "aliases")

        return biblio_dict
Example #4
0
 def _extract_biblio(self, page, id=None):
     dict_of_keylists = {"year": ["PubmedArticleSet", "MedlineCitation", "Article", "Journal", "PubDate", "Year"], 
                         "title": ["PubmedArticleSet", "MedlineCitation", "Article", "ArticleTitle"],
                         "journal": ["PubmedArticleSet", "MedlineCitation", "Article", "Journal", "Title"],
                         }
     biblio_dict = provider._extract_from_xml(page, dict_of_keylists)
     dom_authors = provider._find_all_in_xml(page, "LastName")
     try:
         biblio_dict["authors"] = ", ".join([author.firstChild.data for author in dom_authors])
     except (AttributeError, TypeError):
         pass
     return biblio_dict  
Example #5
0
    def _extract_biblio(self, page, id=None):
        dict_of_keylists = {
            'title' : ['entry', 'title'],
            'date' : ['entry', 'published'],
        }
        biblio_dict = provider._extract_from_xml(page, dict_of_keylists)
        dom_authors = provider._find_all_in_xml(page, "name")

        try:
            authors = [author.firstChild.data for author in dom_authors]
            biblio_dict["authors"] = ", ".join([author.split(" ")[-1] for author in authors])
        except (AttributeError, TypeError):
            pass

        try:
            biblio_dict["year"] = biblio_dict["date"][0:4]
        except KeyError:
            pass

        biblio_dict["repository"] = "arXiv"
        biblio_dict["free_fulltext_url"] = self._get_templated_url(self.aliases_url_template, id, "aliases")

        return biblio_dict