Beispiel #1
0
    def run_new_query(self, prior_query):
        # Create a new search
        search = PubMedSearch(term=self.search.search_string_text)
        search.get_ids_count()

        if search.id_count > self.MAX_QUERY_SIZE:
            raise TooManyPubMedResults(
                "Too many PubMed references found: {0}; reduce query scope to "
                "fewer than {1}".format(search.id_count, self.MAX_QUERY_SIZE))

        search.get_ids()
        results = {"ids": search.ids,
                   "added": search.ids,
                   "removed": []}

        if prior_query:
            old_ids_list = json.loads(prior_query.results)['ids']
            changes = search.get_changes_from_previous_search(old_ids_list=old_ids_list)
            results['added'] = list(changes['added'])
            results['removed'] = list(changes['removed'])

        self.results = json.dumps(results)
        self.save()
        self.create_identifiers()
        return results
Beispiel #2
0
    def run_new_query(self, prior_query):
        # Create a new search
        search = PubMedSearch(term=self.search.search_string_text)
        search.get_ids_count()

        query_size_limit = 5000
        if search.id_count > query_size_limit:
            raise TooManyPubMedResults(
                "Too many PubMed references found: {0}; reduce query scope to fewer than {1}"
                .format(search.id_count, query_size_limit))

        search.get_ids()
        results = {"ids": search.ids, "added": search.ids, "removed": None}

        if prior_query:
            old_ids_list = json.loads(prior_query.results)['ids']
            changes = search.get_changes_from_previous_search(
                old_ids_list=old_ids_list)
            results['added'] = list(changes['added'])
            results['removed'] = list(changes['removed'])

        self.results = json.dumps(results)
        self.save()
        self.create_identifiers()
        return results
Beispiel #3
0
class PubMedSearchTest(TestCase):
    """
    Make sure that a PubMed search with returns the expected number of IDS,
    and that all IDs are identical to what were expected. Example from the
    PubMed quickstart guide here:

        http://www.ncbi.nlm.nih.gov/books/NBK25500/

    """
    def setUp(self):
        self.term = "science[journal] AND breast cancer AND 2008[pdat]"
        self.results_list = [
            19008416, 18927361, 18787170, 18487186, 18239126, 18239125
        ]

    def test_standard_query(self):
        self.search = PubMedSearch(term=self.term)
        self.search.get_ids_count()
        self.search.get_ids()
        self.assertEqual(self.search.request_count, 1)
        self._results_check()

    def test_multiquery(self):
        self.search = PubMedSearch(term=self.term, retmax=3)
        self.search.get_ids_count()
        self.search.get_ids()
        self.assertEqual(self.search.request_count, 2)
        self._results_check()

    def test_changes_from_previous_search(self):
        self.search = PubMedSearch(term=self.term)
        self.search.get_ids_count()
        self.search.get_ids()
        old_ids_list = [999999, 19008416, 18927361, 18787170, 18487186]
        changes = self.search.get_changes_from_previous_search(
            old_ids_list=old_ids_list)
        self.assertEqual(changes['added'], set([18239126, 18239125]))
        self.assertEqual(changes['removed'], set([999999]))

    def test_complex_query(self):
        """
        Make sure that we can send a very complicated search term
        and the results that at least some results are returned. This is
        commonly done when using MeSH search terms in PubMed.
        """
        self.term = """(monomethyl OR MEP OR mono-n-butyl OR MBP OR mono (3-carboxypropyl) OR mcpp OR monobenzyl OR mbzp OR mono-isobutyl OR mibp OR mono (2-ethylhexyl) OR mono (2-ethyl-5-oxohexyl) OR meoph OR mono (2-ethyl-5-carboxypentyl) OR mecpp OR mepp OR mono (2-ethyl-5-hydroxyhexyl) OR mehp OR mono (2-ethyl-5-oxyhexyl) OR mono (2-ethyl-4-hydroxyhexyl) OR mono (2-ethyl-4-oxyhexyl) OR mono (2-carboxymethyl) OR mmhp OR mehp OR dehp OR 2-ethylhexanol OR (phthalic acid)) AND (liver OR hepato* OR hepat*) AND ((cell proliferation) OR (cell growth) OR (dna replication) OR (dna synthesis) OR (replicative dna synthesis) OR mitosis OR (cell division) OR (growth response) OR hyperplasia OR hepatomegaly) AND (mouse OR rat OR hamster OR rodent OR murine OR Mus musculus or Rattus)"""
        self.search = PubMedSearch(term=self.term)
        self.search.get_ids_count()
        self.search.get_ids()
        self.assertTrue(self.search.ids >= 212)

    def _results_check(self):
        self.assertEqual(self.search.id_count, 6)
        self.assertListEqual(self.search.ids, self.results_list)
Beispiel #4
0
class PubMedSearchTest(TestCase):
    """
    Make sure that a PubMed search with returns the expected number of IDS,
    and that all IDs are identical to what were expected. Example from the
    PubMed quickstart guide here:

        http://www.ncbi.nlm.nih.gov/books/NBK25500/

    """
    def setUp(self):
        self.term = "science[journal] AND breast cancer AND 2008[pdat]"
        self.results_list = [19008416, 18927361, 18787170, 18487186, 18239126, 18239125]

    def test_standard_query(self):
        self.search = PubMedSearch(term=self.term)
        self.search.get_ids_count()
        self.search.get_ids()
        self.assertEqual(self.search.request_count, 1)
        self._results_check()

    def test_multiquery(self):
        self.search = PubMedSearch(term=self.term, retmax=3)
        self.search.get_ids_count()
        self.search.get_ids()
        self.assertEqual(self.search.request_count, 2)
        self._results_check()

    def test_changes_from_previous_search(self):
        self.search = PubMedSearch(term=self.term)
        self.search.get_ids_count()
        self.search.get_ids()
        old_ids_list = [999999, 19008416, 18927361, 18787170, 18487186]
        changes = self.search.get_changes_from_previous_search(old_ids_list=old_ids_list)
        self.assertEqual(changes['added'], set([18239126, 18239125]))
        self.assertEqual(changes['removed'], set([999999]))

    def test_complex_query(self):
        """
        Make sure that we can send a very complicated search term
        and the results that at least some results are returned. This is
        commonly done when using MeSH search terms in PubMed.
        """
        self.term = """(monomethyl OR MEP OR mono-n-butyl OR MBP OR mono (3-carboxypropyl) OR mcpp OR monobenzyl OR mbzp OR mono-isobutyl OR mibp OR mono (2-ethylhexyl) OR mono (2-ethyl-5-oxohexyl) OR meoph OR mono (2-ethyl-5-carboxypentyl) OR mecpp OR mepp OR mono (2-ethyl-5-hydroxyhexyl) OR mehp OR mono (2-ethyl-5-oxyhexyl) OR mono (2-ethyl-4-hydroxyhexyl) OR mono (2-ethyl-4-oxyhexyl) OR mono (2-carboxymethyl) OR mmhp OR mehp OR dehp OR 2-ethylhexanol OR (phthalic acid)) AND (liver OR hepato* OR hepat*) AND ((cell proliferation) OR (cell growth) OR (dna replication) OR (dna synthesis) OR (replicative dna synthesis) OR mitosis OR (cell division) OR (growth response) OR hyperplasia OR hepatomegaly) AND (mouse OR rat OR hamster OR rodent OR murine OR Mus musculus or Rattus)"""
        self.search = PubMedSearch(term=self.term)
        self.search.get_ids_count()
        self.search.get_ids()
        self.assertTrue(self.search.ids >= 212)

    def _results_check(self):
        self.assertEqual(self.search.id_count, 6)
        self.assertListEqual(self.search.ids, self.results_list)