Example #1
0
 def test_collective_author(self):
     # this doesn't have an individual author but rather a collective author
     self.ids = [21860499]
     self.fetch = PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     self.assertEqual(self.fetch.content[0]['authors_short'],
                      u'National Toxicology Program')
Example #2
0
 def test_utf8(self):
     # these ids have UTF-8 text in the abstract; make sure we can import
     # and the abstract field captures this value.
     self.ids = [23878845, 16080930]
     self.fetch = PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     # assert that a unicode value exists in text
     self.assertTrue(self.fetch.content[0]['abstract'].find(u'\u03b1') > -1)
Example #3
0
 def test_structured_abstract(self):
     """
     Some abstracts have structure in XML; make sure HAWC can import these.
     For example: http://www.ncbi.nlm.nih.gov/pubmed/21813367
     """
     self.ids = (21813367, )
     self.fetch = PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     abstract_text = u"""<span class='abstract_label'>BACKGROUND: </span>People living or working in eastern Ohio and western West Virginia have been exposed to perfluorooctanoic acid (PFOA) released by DuPont Washington Works facilities.<br><span class='abstract_label'>OBJECTIVES: </span>Our objective was to estimate historical PFOA exposures and serum concentrations experienced by 45,276 non-occupationally exposed participants in the C8 Health Project who consented to share their residential histories and a 2005-2006 serum PFOA measurement.<br><span class='abstract_label'>METHODS: </span>We estimated annual PFOA exposure rates for each individual based on predicted calibrated water concentrations and predicted air concentrations using an environmental fate and transport model, individual residential histories, and maps of public water supply networks. We coupled individual exposure estimates with a one-compartment absorption, distribution, metabolism, and excretion (ADME) model to estimate time-dependent serum concentrations.<br><span class='abstract_label'>RESULTS: </span>For all participants (n = 45,276), predicted and observed median serum concentrations in 2005-2006 are 14.2 and 24.3 ppb, respectively [Spearman's rank correlation coefficient (r(s)) = 0.67]. For participants who provided daily public well water consumption rate and who had the same residence and workplace in one of six municipal water districts for 5 years before the serum sample (n = 1,074), predicted and observed median serum concentrations in 2005-2006 are 32.2 and 40.0 ppb, respectively (r(s) = 0.82).<br><span class='abstract_label'>CONCLUSIONS: </span>Serum PFOA concentrations predicted by linked exposure and ADME models correlated well with observed 2005-2006 human serum concentrations for C8 Health Project participants. These individualized retrospective exposure and serum estimates are being used in a variety of epidemiologic studies being conducted in this region."""  # NOQA
     self.assertEqual(self.fetch.content[0]['abstract'], abstract_text)
Example #4
0
 def test_utf8(self):
     # these ids have UTF-8 text in the abstract; make sure we can import
     # and the abstract field captures this value.
     self.ids = [23878845, 16080930]
     self.fetch = PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     # assert that a unicode value exists in text
     self.assertTrue(self.fetch.content[0]["abstract"].find(u"\u03b1") > -1)
Example #5
0
 def test_collective_author(self):
     # this doesn't have an individual author but rather a collective author
     self.ids = [21860499]
     self.fetch = PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     self.assertEqual(
         self.fetch.content[0]['authors_short'],
         u'National Toxicology Program'
     )
Example #6
0
 def test_structured_abstract(self):
     """
     Some abstracts have structure in XML; make sure HAWC can import these.
     For example: http://www.ncbi.nlm.nih.gov/pubmed/21813367
     """
     self.ids = (21813367,)
     self.fetch = PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     abstract_text = u"""<span class='abstract_label'>BACKGROUND: </span>People living or working in eastern Ohio and western West Virginia have been exposed to perfluorooctanoic acid (PFOA) released by DuPont Washington Works facilities.<br><span class='abstract_label'>OBJECTIVES: </span>Our objective was to estimate historical PFOA exposures and serum concentrations experienced by 45,276 non-occupationally exposed participants in the C8 Health Project who consented to share their residential histories and a 2005-2006 serum PFOA measurement.<br><span class='abstract_label'>METHODS: </span>We estimated annual PFOA exposure rates for each individual based on predicted calibrated water concentrations and predicted air concentrations using an environmental fate and transport model, individual residential histories, and maps of public water supply networks. We coupled individual exposure estimates with a one-compartment absorption, distribution, metabolism, and excretion (ADME) model to estimate time-dependent serum concentrations.<br><span class='abstract_label'>RESULTS: </span>For all participants (n = 45,276), predicted and observed median serum concentrations in 2005-2006 are 14.2 and 24.3 ppb, respectively [Spearman's rank correlation coefficient (r(s)) = 0.67]. For participants who provided daily public well water consumption rate and who had the same residence and workplace in one of six municipal water districts for 5 years before the serum sample (n = 1,074), predicted and observed median serum concentrations in 2005-2006 are 32.2 and 40.0 ppb, respectively (r(s) = 0.82).<br><span class='abstract_label'>CONCLUSIONS: </span>Serum PFOA concentrations predicted by linked exposure and ADME models correlated well with observed 2005-2006 human serum concentrations for C8 Health Project participants. These individualized retrospective exposure and serum estimates are being used in a variety of epidemiologic studies being conducted in this region."""  # NOQA
     self.assertEqual(self.fetch.content[0]["abstract"], abstract_text)
Example #7
0
 def test_doi(self):
     """
     Make sure HAWC grabs the DOI
     For example: http://www.ncbi.nlm.nih.gov/pubmed/21813142?retmod=xml&report=xml&format=text  # NOQA
     """
     self.ids = (21813142, )
     self.fetch = PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     doi = u"10.1016/j.medcli.2011.05.017"
     self.assertEqual(self.fetch.content[0]['doi'], doi)
Example #8
0
 def test_book_chapter(self):
     self.ids = (20301382, )
     self.fetch = PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     obj = self.fetch.content[0]
     obj.pop('xml')
     obj.pop('abstract')
     expected = {
         'PMID': '20301382',
         'authors_list': [
             u'DiMauro S',
             u'Hirano M'
         ],
         'authors_short': u'DiMauro S and Hirano M',
         'citation': u'GeneReviews(®) (1993). Seattle (WA): University of Washington, Seattle.',
         'doi': None,
         'title': 'Mitochondrial DNA Deletion Syndromes',
         'year': 1993
     }
     self.assertEqual(obj, expected)
Example #9
0
 def test_book(self):
     self.ids = (26468569, )
     self.fetch = PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     obj = self.fetch.content[0]
     obj.pop('xml')
     obj.pop('abstract')
     expected = {
         'authors_short': u'Committee on Predictive-Toxicology Approaches for Military Assessments of Acute Exposures et al.',
         'doi': '10.17226/21775',
         'year': 2015,
         'PMID': '26468569',
         'title': 'Application of Modern Toxicology Approaches for Predicting Acute Toxicity for Chemical Defense',
         'citation': u'(2015). Washington (DC): National Academies Press (US).',
         'authors_list': [
             'Committee on Predictive-Toxicology Approaches for Military Assessments of Acute Exposures',
             'Committee on Toxicology',
             'Board on Environmental Studies and Toxicology',
             'Board on Life Sciences',
             'Division on Earth and Life Studies',
             'The National Academies of Sciences, Engineering, and Medicine'
         ]
     }
     self.assertEqual(obj, expected)
Example #10
0
 def test_multiquery(self):
     self.fetch = PubMedFetch(id_list=self.ids, retmax=3)
     self.fetch.get_content()
     self.assertEqual(self.fetch.request_count, 2)
     self._results_check()
Example #11
0
 def test_standard_query(self):
     self.fetch = PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     self.assertEqual(self.fetch.request_count, 1)
     self._results_check()
Example #12
0
class PubMedFetchTests(TestCase):
    """
    Make sure that a PubMed search with returns the expected number of IDS,
    and that all IDs are identical to what were expected. Example from the
    PubMed quickstart guide here:

        http://www.ncbi.nlm.nih.gov/books/NBK25500/

    """
    def setUp(self):
        self.ids = [19008416, 18927361, 18787170, 18487186, 18239126, 18239125]

    def test_standard_query(self):
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        self.assertEqual(self.fetch.request_count, 1)
        self._results_check()

    def test_multiquery(self):
        self.fetch = PubMedFetch(id_list=self.ids, retmax=3)
        self.fetch.get_content()
        self.assertEqual(self.fetch.request_count, 2)
        self._results_check()

    def test_utf8(self):
        # these ids have UTF-8 text in the abstract; make sure we can import
        # and the abstract field captures this value.
        self.ids = [23878845, 16080930]
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        # assert that a unicode value exists in text
        self.assertTrue(self.fetch.content[0]['abstract'].find(u'\u03b1') > -1)

    def test_collective_author(self):
        # this doesn't have an individual author but rather a collective author
        self.ids = [21860499]
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        self.assertEqual(self.fetch.content[0]['authors_short'],
                         u'National Toxicology Program')

    def test_structured_abstract(self):
        """
        Some abstracts have structure in XML; make sure HAWC can import these.
        For example: http://www.ncbi.nlm.nih.gov/pubmed/21813367
        """
        self.ids = (21813367, )
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        abstract_text = u"""<span class='abstract_label'>BACKGROUND: </span>People living or working in eastern Ohio and western West Virginia have been exposed to perfluorooctanoic acid (PFOA) released by DuPont Washington Works facilities.<br><span class='abstract_label'>OBJECTIVES: </span>Our objective was to estimate historical PFOA exposures and serum concentrations experienced by 45,276 non-occupationally exposed participants in the C8 Health Project who consented to share their residential histories and a 2005-2006 serum PFOA measurement.<br><span class='abstract_label'>METHODS: </span>We estimated annual PFOA exposure rates for each individual based on predicted calibrated water concentrations and predicted air concentrations using an environmental fate and transport model, individual residential histories, and maps of public water supply networks. We coupled individual exposure estimates with a one-compartment absorption, distribution, metabolism, and excretion (ADME) model to estimate time-dependent serum concentrations.<br><span class='abstract_label'>RESULTS: </span>For all participants (n = 45,276), predicted and observed median serum concentrations in 2005-2006 are 14.2 and 24.3 ppb, respectively [Spearman's rank correlation coefficient (r(s)) = 0.67]. For participants who provided daily public well water consumption rate and who had the same residence and workplace in one of six municipal water districts for 5 years before the serum sample (n = 1,074), predicted and observed median serum concentrations in 2005-2006 are 32.2 and 40.0 ppb, respectively (r(s) = 0.82).<br><span class='abstract_label'>CONCLUSIONS: </span>Serum PFOA concentrations predicted by linked exposure and ADME models correlated well with observed 2005-2006 human serum concentrations for C8 Health Project participants. These individualized retrospective exposure and serum estimates are being used in a variety of epidemiologic studies being conducted in this region."""  # NOQA
        self.assertEqual(self.fetch.content[0]['abstract'], abstract_text)

    def _results_check(self):
        self.assertEqual(len(self.fetch.content), 6)
        self.assertListEqual([item['PMID'] for item in self.fetch.content],
                             self.ids)

        citations = [
            "Science 2008; 322 (5908):1695-9", "Science 2008; 322 (5900):357",
            "Science 2008; 321 (5895):1499-502",
            "Science 2008; 320 (5878):903-9", "Science 2008; 319 (5863):620-4",
            "Science 2008; 319 (5863):617-20"
        ]
        self.assertListEqual([item['citation'] for item in self.fetch.content],
                             citations)

        authors_short = [
            "Varambally S et al.", "Couzin J", "Mao JH et al.",
            "Bromberg KD et al.", "Schlabach MR et al.", "Silva JM et al."
        ]
        self.assertListEqual(
            [item['authors_short'] for item in self.fetch.content],
            authors_short)
Example #13
0
 def test_multiquery(self):
     self.fetch = PubMedFetch(id_list=self.ids, retmax=3)
     self.fetch.get_content()
     self.assertEqual(self.fetch.request_count, 2)
     self._results_check()
Example #14
0
 def test_standard_query(self):
     self.fetch = PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     self.assertEqual(self.fetch.request_count, 1)
     self._results_check()
Example #15
0
class PubMedFetchTests(TestCase):
    """
    Make sure that a PubMed search with returns the expected number of IDS,
    and that all IDs are identical to what were expected. Example from the
    PubMed quickstart guide here:

        http://www.ncbi.nlm.nih.gov/books/NBK25500/

    """

    def setUp(self):
        self.ids = [19008416, 18927361, 18787170, 18487186, 18239126, 18239125]

    def test_standard_query(self):
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        self.assertEqual(self.fetch.request_count, 1)
        self._results_check()

    def test_multiquery(self):
        self.fetch = PubMedFetch(id_list=self.ids, retmax=3)
        self.fetch.get_content()
        self.assertEqual(self.fetch.request_count, 2)
        self._results_check()

    def test_utf8(self):
        # these ids have UTF-8 text in the abstract; make sure we can import
        # and the abstract field captures this value.
        self.ids = [23878845, 16080930]
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        # assert that a unicode value exists in text
        self.assertTrue(self.fetch.content[0]["abstract"].find(u"\u03b1") > -1)

    def test_collective_author(self):
        # this doesn't have an individual author but rather a collective author
        self.ids = [21860499]
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        self.assertEqual(self.fetch.content[0]["authors_short"], u"National Toxicology Program")

    def test_structured_abstract(self):
        """
        Some abstracts have structure in XML; make sure HAWC can import these.
        For example: http://www.ncbi.nlm.nih.gov/pubmed/21813367
        """
        self.ids = (21813367,)
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        abstract_text = u"""<span class='abstract_label'>BACKGROUND: </span>People living or working in eastern Ohio and western West Virginia have been exposed to perfluorooctanoic acid (PFOA) released by DuPont Washington Works facilities.<br><span class='abstract_label'>OBJECTIVES: </span>Our objective was to estimate historical PFOA exposures and serum concentrations experienced by 45,276 non-occupationally exposed participants in the C8 Health Project who consented to share their residential histories and a 2005-2006 serum PFOA measurement.<br><span class='abstract_label'>METHODS: </span>We estimated annual PFOA exposure rates for each individual based on predicted calibrated water concentrations and predicted air concentrations using an environmental fate and transport model, individual residential histories, and maps of public water supply networks. We coupled individual exposure estimates with a one-compartment absorption, distribution, metabolism, and excretion (ADME) model to estimate time-dependent serum concentrations.<br><span class='abstract_label'>RESULTS: </span>For all participants (n = 45,276), predicted and observed median serum concentrations in 2005-2006 are 14.2 and 24.3 ppb, respectively [Spearman's rank correlation coefficient (r(s)) = 0.67]. For participants who provided daily public well water consumption rate and who had the same residence and workplace in one of six municipal water districts for 5 years before the serum sample (n = 1,074), predicted and observed median serum concentrations in 2005-2006 are 32.2 and 40.0 ppb, respectively (r(s) = 0.82).<br><span class='abstract_label'>CONCLUSIONS: </span>Serum PFOA concentrations predicted by linked exposure and ADME models correlated well with observed 2005-2006 human serum concentrations for C8 Health Project participants. These individualized retrospective exposure and serum estimates are being used in a variety of epidemiologic studies being conducted in this region."""  # NOQA
        self.assertEqual(self.fetch.content[0]["abstract"], abstract_text)

    def _results_check(self):
        self.assertEqual(len(self.fetch.content), 6)
        self.assertListEqual([item["PMID"] for item in self.fetch.content], self.ids)

        citations = [
            "Science 2008; 322 (5908):1695-9",
            "Science 2008; 322 (5900):357",
            "Science 2008; 321 (5895):1499-502",
            "Science 2008; 320 (5878):903-9",
            "Science 2008; 319 (5863):620-4",
            "Science 2008; 319 (5863):617-20",
        ]
        self.assertListEqual([item["citation"] for item in self.fetch.content], citations)

        authors_short = [
            "Varambally S et al.",
            "Couzin J",
            "Mao JH et al.",
            "Bromberg KD et al.",
            "Schlabach MR et al.",
            "Silva JM et al.",
        ]
        self.assertListEqual([item["authors_short"] for item in self.fetch.content], authors_short)
Example #16
0
class PubMedFetchTests(TestCase):
    """
    Make sure that a PubMed search with returns the expected number of IDS,
    and that all IDs are identical to what were expected. Example from the
    PubMed quickstart guide here:

        http://www.ncbi.nlm.nih.gov/books/NBK25500/

    """
    def setUp(self):
        self.ids = ['19008416', '18927361', '18787170', '18487186', '18239126', '18239125']

    def test_standard_query(self):
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        self.assertEqual(self.fetch.request_count, 1)
        self._results_check()

    def test_multiquery(self):
        self.fetch = PubMedFetch(id_list=self.ids, retmax=3)
        self.fetch.get_content()
        self.assertEqual(self.fetch.request_count, 2)
        self._results_check()

    def test_utf8(self):
        # these ids have UTF-8 text in the abstract; make sure we can import
        # and the abstract field captures this value.
        self.ids = [23878845, 16080930]
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        # assert that a unicode value exists in text
        self.assertTrue(self.fetch.content[0]['abstract'].find(u'\u03b1') > -1)

    def test_collective_author(self):
        # this doesn't have an individual author but rather a collective author
        self.ids = [21860499]
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        self.assertEqual(
            self.fetch.content[0]['authors_short'],
            u'National Toxicology Program'
        )

    def test_structured_abstract(self):
        """
        Some abstracts have structure in XML; make sure HAWC can import these.
        For example: http://www.ncbi.nlm.nih.gov/pubmed/21813367
        """
        self.ids = (21813367, )
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        abstract_text = u"""<span class='abstract_label'>BACKGROUND: </span>People living or working in eastern Ohio and western West Virginia have been exposed to perfluorooctanoic acid (PFOA) released by DuPont Washington Works facilities.<br><span class='abstract_label'>OBJECTIVES: </span>Our objective was to estimate historical PFOA exposures and serum concentrations experienced by 45,276 non-occupationally exposed participants in the C8 Health Project who consented to share their residential histories and a 2005-2006 serum PFOA measurement.<br><span class='abstract_label'>METHODS: </span>We estimated annual PFOA exposure rates for each individual based on predicted calibrated water concentrations and predicted air concentrations using an environmental fate and transport model, individual residential histories, and maps of public water supply networks. We coupled individual exposure estimates with a one-compartment absorption, distribution, metabolism, and excretion (ADME) model to estimate time-dependent serum concentrations.<br><span class='abstract_label'>RESULTS: </span>For all participants (n = 45,276), predicted and observed median serum concentrations in 2005-2006 are 14.2 and 24.3 ppb, respectively [Spearman's rank correlation coefficient (r(s)) = 0.67]. For participants who provided daily public well water consumption rate and who had the same residence and workplace in one of six municipal water districts for 5 years before the serum sample (n = 1,074), predicted and observed median serum concentrations in 2005-2006 are 32.2 and 40.0 ppb, respectively (r(s) = 0.82).<br><span class='abstract_label'>CONCLUSIONS: </span>Serum PFOA concentrations predicted by linked exposure and ADME models correlated well with observed 2005-2006 human serum concentrations for C8 Health Project participants. These individualized retrospective exposure and serum estimates are being used in a variety of epidemiologic studies being conducted in this region."""  # NOQA
        self.assertEqual(self.fetch.content[0]['abstract'], abstract_text)

    def test_doi(self):
        """
        Make sure HAWC grabs the DOI
        For example: http://www.ncbi.nlm.nih.gov/pubmed/21813142?retmod=xml&report=xml&format=text  # NOQA
        """
        self.ids = (21813142, )
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        doi = u"10.1016/j.medcli.2011.05.017"
        self.assertEqual(self.fetch.content[0]['doi'], doi)

    def test_book(self):
        self.ids = (26468569, )
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        obj = self.fetch.content[0]
        obj.pop('xml')
        obj.pop('abstract')
        expected = {
            'authors_short': u'Committee on Predictive-Toxicology Approaches for Military Assessments of Acute Exposures et al.',
            'doi': '10.17226/21775',
            'year': 2015,
            'PMID': '26468569',
            'title': 'Application of Modern Toxicology Approaches for Predicting Acute Toxicity for Chemical Defense',
            'citation': u'(2015). Washington (DC): National Academies Press (US).',
            'authors_list': [
                'Committee on Predictive-Toxicology Approaches for Military Assessments of Acute Exposures',
                'Committee on Toxicology',
                'Board on Environmental Studies and Toxicology',
                'Board on Life Sciences',
                'Division on Earth and Life Studies',
                'The National Academies of Sciences, Engineering, and Medicine'
            ]
        }
        self.assertEqual(obj, expected)

    def test_book_chapter(self):
        self.ids = (20301382, )
        self.fetch = PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        obj = self.fetch.content[0]
        obj.pop('xml')
        obj.pop('abstract')
        expected = {
            'PMID': '20301382',
            'authors_list': [
                u'DiMauro S',
                u'Hirano M'
            ],
            'authors_short': u'DiMauro S and Hirano M',
            'citation': u'GeneReviews(®) (1993). Seattle (WA): University of Washington, Seattle.',
            'doi': None,
            'title': 'Mitochondrial DNA Deletion Syndromes',
            'year': 1993
        }
        self.assertEqual(obj, expected)

    def _results_check(self):
        self.assertEqual(len(self.fetch.content), 6)
        self.assertListEqual(
            [item['PMID'] for item in self.fetch.content],
            self.ids
        )

        citations = [
            "Science 2008; 322 (5908):1695-9",
            "Science 2008; 322 (5900):357",
            "Science 2008; 321 (5895):1499-502",
            "Science 2008; 320 (5878):903-9",
            "Science 2008; 319 (5863):620-4",
            "Science 2008; 319 (5863):617-20"
        ]
        self.assertListEqual(
            [item['citation'] for item in self.fetch.content],
            citations
        )

        authors_short = [
            "Varambally S et al.",
            "Couzin J",
            "Mao JH et al.",
            "Bromberg KD et al.",
            "Schlabach MR et al.",
            "Silva JM et al."
        ]
        self.assertListEqual(
            [item['authors_short'] for item in self.fetch.content],
            authors_short
        )