Example #1
0
    def parse_article_xml(self, document):
        """
        Given article XML, parse
        it and return an object representation
        """

        try:
            soup = parser.parse_document(document)
            self.doi = parser.doi(soup)
            if self.doi:
                self.doi_id = self.get_doi_id(self.doi)
                self.doi_url = self.get_doi_url(self.doi)
                self.lens_url = self.get_lens_url(self.doi)
                self.tweet_url = self.get_tweet_url(self.doi)

            self.pub_date = parser.pub_date(soup)
            self.pub_date_timestamp = parser.pub_date_timestamp(soup)

            self.article_title = parser.title(soup)
            self.article_type = parser.article_type(soup)

            self.authors = parser.authors(soup)
            self.authors_string = self.authors_string(self.authors)

            self.related_articles = parser.related_article(soup)

            self.is_poa = parser.is_poa(soup)

            #self.subject_area = self.parse_subject_area(soup)

            self.display_channel = parser.display_channel(soup)

            return True
        except:
            return False
 def profile_article(self, document):
     """
     Temporary, profile the article by folder names in test data set
     In real code we still want this to return the same values
     """
     # Temporary setting of version values from directory names
     
     soup = self.article_soup(self.article_xml_file())
     
     # elife id / doi id / manuscript id
     fid = parser.doi(soup).split('.')[-1]
 
     # article status
     if parser.is_poa(soup) is True:
         status = 'poa'
     else:
         status = 'vor'
     
     # version
     version = self.version_number(document)
 
     # volume
     volume = parser.volume(soup)
         
     return (fid, status, version, volume)
Example #3
0
    def profile_article(self, document):
        """
        Temporary, profile the article by folder names in test data set
        In real code we still want this to return the same values
        """
        # Temporary setting of version values from directory names

        soup = self.article_soup(self.article_xml_file())

        # elife id / doi id / manuscript id
        fid = parser.doi(soup).split('.')[-1]

        # article status
        if parser.is_poa(soup) is True:
            status = 'poa'
        else:
            status = 'vor'

        # version
        version = self.version_number(document)

        # volume
        volume = parser.volume(soup)

        return (fid, status, version, volume)
Example #4
0
    def parse_article_xml(self, document):
        """
        Given article XML, parse
        it and return an object representation
        """

        try:
            soup = parser.parse_document(document)
            self.doi = parser.doi(soup)
            if self.doi:
                self.doi_id = self.get_doi_id(self.doi)
                self.doi_url = self.get_doi_url(self.doi)
                self.lens_url = self.get_lens_url(self.doi)
                self.tweet_url = self.get_tweet_url(self.doi)

            self.pub_date = parser.pub_date(soup)
            self.pub_date_timestamp = parser.pub_date_timestamp(soup)

            self.article_title = parser.title(soup)
            self.article_type = parser.article_type(soup)

            self.authors = parser.authors(soup)
            self.authors_string = self.get_authors_string(self.authors)

            self.related_articles = parser.related_article(soup)

            self.is_poa = parser.is_poa(soup)

            #self.subject_area = self.parse_subject_area(soup)

            self.display_channel = parser.display_channel(soup)

            return True
        except:
            return False
Example #5
0
 def test_basic_fetching_of_common_attributes(self):
     "basic extraction of common values from a JATS-NLM XML article"
     self.assertEqual(
         parser.title(self.soup),
         u"Bacterial regulation of colony development in the closest living\n                    relatives of animals",
     )
     self.assertEqual(parser.doi(self.soup), u"10.7554/eLife.00013")
     self.assertEqual(
         parser.keywords(self.soup),
         [
             u"\nSalpingoeca rosetta\n",
             u"Algoriphagus",
             u"bacterial sulfonolipid",
             u"multicellular development",
         ],
     )
Example #6
0
def doi(item):
    return parseJATS.doi(item)
Example #7
0
def build_article_from_xml(article_xml_filename, detail="brief"):
    """
    Parse JATS XML with elifetools parser, and populate an
    eLifePOA article object
    Basic data crossref needs: article_id, doi, title, contributors with names set
    detail="brief" is normally enough,
    detail="full" will populate all the contributor affiliations that are linked by xref tags
    """

    error_count = 0

    soup = parser.parse_document(article_xml_filename)

    # Get DOI
    doi = parser.doi(soup)

    # Create the article object
    article = eLifePOA(doi, title=None)

    # Related articles
    article.related_articles = build_related_articles(parser.related_article(soup))

    # Get publisher_id and set object manuscript value
    publisher_id = parser.publisher_id(soup)
    article.manuscript = publisher_id

    # Set the articleType
    article_type = parser.article_type(soup)
    if article_type:
        article.articleType = article_type

    # title
    article.title = parser.full_title(soup)
    #print article.title

    # abstract
    article.abstract = clean_abstract(parser.full_abstract(soup))

    # digest
    article.digest = clean_abstract(parser.full_digest(soup))

    # elocation-id
    article.elocation_id = parser.elocation_id(soup)

    # contributors
    all_contributors = parser.contributors(soup, detail)
    author_contributors = filter(lambda con: con.get('type')
                                 in ['author', 'on-behalf-of'], all_contributors)
    contrib_type = "author"
    contributors = build_contributors(author_contributors, contrib_type)

    contrib_type = "author non-byline"
    authors = parser.authors_non_byline(soup, detail)
    contributors_non_byline = build_contributors(authors, contrib_type)
    article.contributors = contributors + contributors_non_byline

    # license href
    license = eLifeLicense()
    license.href = parser.license_url(soup)
    article.license = license

    # article_category
    article.article_categories = parser.category(soup)

    # keywords
    article.author_keywords = parser.keywords(soup)

    # research organisms
    article.research_organisms = parser.research_organism(soup)

    # funding awards
    article.funding_awards = build_funding(parser.full_award_groups(soup))

    # references or citations
    article.ref_list = build_ref_list(parser.refs(soup))

    # components with component DOI
    article.component_list = build_components(parser.components(soup))

    # History dates
    date_types = ["received", "accepted"]
    for date_type in date_types:
        history_date = parser.history_date(soup, date_type)
        if history_date:
            date_instance = eLifeDate(date_type, history_date)
            article.add_date(date_instance)

    # Pub date
    pub_date = parser.pub_date(soup)
    if pub_date:
        date_instance = eLifeDate("pub", pub_date)
        article.add_date(date_instance)

    # Set the volume if present
    volume = parser.volume(soup)
    if volume:
        article.volume = volume

    article.is_poa = parser.is_poa(soup)

    return article, error_count
Example #8
0
def doi(item):
    return parseJATS.doi(item)