Esempio n. 1
0
def add_missing(query, max_count, randomize=False):
    """Search PubMed for articles and scrape documents.

    :param str query: PubMed query
    :param int max_count: Maximum number of articles to process
    :param bool randomize: Randomize list of articles to fetch
    :return: Added article objects
    """
    pmids = pubtools.search_pmids(query)
    stored_pmids = [
        article['pmid'] for article in mongo['article'].find({}, {'pmid': 1})
    ]

    missing_pmids = set(pmids) - set(stored_pmids)
    logger.warn('Found {0} articles to add.'.format(len(missing_pmids)))

    pmids_to_add = list(missing_pmids)[:max_count]
    if randomize:
        random.shuffle(pmids_to_add)

    records = pubtools.download_pmids(pmids_to_add)

    scraper = SCRAPE_CLASS(**SCRAPE_KWARGS)

    added = []

    for pmid, record in zip(pmids_to_add, records):
        logger.debug('Adding article {}'.format(pmid))
        article = Article.from_record(record)
        article.scrape(scraper)
        article.tag()
        added.append(article)

    return added
Esempio n. 2
0
    def update(cls, query, max_count):
        """Search PubMed for articles and scrape documents.

        :param str query: PubMed query
        :param int max_count: Maximum number of articles to process
        :return list: Added article objects

        """
        pmids = pubtools.search_pmids(query)
        stored_pmids = [
            article['pmid']
            for article in mongo['article'].find(
                {}, {'pmid': 1}
            )
        ]

        pmids_to_add = set(pmids) - set(stored_pmids)
        pmids_to_add = list(pmids_to_add)[:max_count]

        records = pubtools.download_pmids(pmids_to_add)

        scraper = SCRAPE_CLASS(**SCRAPE_KWARGS)

        added = []

        for pmid, record in zip(pmids_to_add, records):
            logging.debug('Adding article {}'.format(pmid))
            article = Article.from_record(record)
            article.scrape(scraper)
            added.append(article)

        return added
Esempio n. 3
0
def add_missing(query, max_count, randomize=False):
    """Search PubMed for articles and scrape documents.

    :param str query: PubMed query
    :param int max_count: Maximum number of articles to process
    :param bool randomize: Randomize list of articles to fetch
    :return: Added article objects
    """
    pmids = pubtools.search_pmids(query)
    stored_pmids = [
        article['pmid']
        for article in mongo['article'].find(
            {}, {'pmid': 1}
        )
    ]

    missing_pmids = set(pmids) - set(stored_pmids)
    logger.warn('Found {0} articles to add.'.format(len(missing_pmids)))

    pmids_to_add = list(missing_pmids)[:max_count]
    if randomize:
        random.shuffle(pmids_to_add)

    records = pubtools.download_pmids(pmids_to_add)

    scraper = SCRAPE_CLASS(**SCRAPE_KWARGS)

    added = []

    for pmid, record in zip(pmids_to_add, records):
        logger.debug('Adding article {}'.format(pmid))
        article = Article.from_record(record)
        article.scrape(scraper)
        article.tag()
        added.append(article)

    return added
Esempio n. 4
0
    def test_search_pmids(self):
        """ Searching for an article by PMID should return the same PMID. """

        with assert_raises(pubtools.EntrezEmailError):
            pubtools.search_pmids('1[uid]')
Esempio n. 5
0
    def test_search_pmids(self):
        """ Searching for an article by PMID should return the same PMID. """

        pmids = pubtools.search_pmids('1[uid]')
        assert_equal(pmids, ['1'])