Exemple #1
0
    def create_identifiers(self):
        # Create new PubMed identifiers for any PMIDs which are not already in
        # our database.
        new_ids = json.loads(self.results)['added']
        existing_pmids = list(
            Identifiers.objects.filter(database=constants.PUBMED,
                                       unique_id__in=new_ids).values_list(
                                           'unique_id', flat=True))
        ids_to_add = list(set(new_ids) - set(existing_pmids))
        ids_to_add_len = len(ids_to_add)

        block_size = 1000.
        logging.debug("{0} IDs to be added".format(ids_to_add_len))
        for i in range(int(ceil(ids_to_add_len / block_size))):
            start_index = int(i * block_size)
            end_index = min(int(i * block_size + block_size), ids_to_add_len)
            logging.debug("Building from {0} to {1}".format(
                start_index, end_index))
            fetch = pubmed.PubMedFetch(
                id_list=ids_to_add[start_index:end_index],
                retmax=int(block_size))
            identifiers = []
            for item in fetch.get_content():
                identifiers.append(
                    Identifiers(unique_id=item['PMID'],
                                database=constants.PUBMED,
                                content=json.dumps(item)))
            Identifiers.objects.bulk_create(identifiers)
 def test_collective_author(self):
     # this doesn't have an individual author but rather a collective author
     self.ids = [21860499]
     self.fetch = pubmed.PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     self.assertEqual(self.fetch.content[0]['authors_short'],
                      'National Toxicology Program')
Exemple #3
0
    def get_pubmed_identifiers(self, pmids: List[int]):
        """Return a queryset of identifiers, one for each PubMed ID. Either get
        or create an identifier, whatever is required

        Args:
            pmids (List[int]): A list of pubmed identifiers
        """
        #
        Identifiers = apps.get_model("lit", "Identifiers")

        # Filter IDs which need to be imported; we cast to str and back to mirror db fields
        pmids_str = [str(id) for id in pmids]
        existing = list(
            self.filter(database=constants.PUBMED,
                        unique_id__in=pmids_str).values_list("unique_id",
                                                             flat=True))
        need_import = [int(id) for id in set(pmids_str) - set(existing)]

        # Grab Pubmed objects
        fetch = pubmed.PubMedFetch(need_import)

        # Save new Identifier objects
        Identifiers.objects.bulk_create([
            Identifiers(
                unique_id=str(item["PMID"]),
                database=constants.PUBMED,
                content=json.dumps(item),
            ) for item in fetch.get_content()
        ])

        return self.filter(database=constants.PUBMED, unique_id__in=pmids_str)
 def test_book(self):
     self.ids = (26468569, )
     self.fetch = pubmed.PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     obj = self.fetch.content[0]
     obj.pop('xml')
     obj.pop('abstract')
     expected = {
         'authors_short':
         'Committee on Predictive-Toxicology Approaches for Military Assessments of Acute Exposures et al.',
         'doi':
         '10.17226/21775',
         'year':
         2015,
         'PMID':
         '26468569',
         'title':
         'Application of Modern Toxicology Approaches for Predicting Acute Toxicity for Chemical Defense',
         'citation':
         '(2015). Washington (DC): National Academies Press (US).',
         'authors_list': [
             'Committee on Predictive-Toxicology Approaches for Military Assessments of Acute Exposures',
             'Committee on Toxicology',
             'Board on Environmental Studies and Toxicology',
             'Board on Life Sciences', 'Division on Earth and Life Studies',
             'The National Academies of Sciences, Engineering, and Medicine'
         ]
     }
     self.assertEqual(obj, expected)
Exemple #5
0
 def test_book(self):
     self.ids = (26468569, )
     self.fetch = pubmed.PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     obj = self.fetch.content[0]
     obj.pop("xml")
     obj.pop("abstract")
     expected = {
         "authors_short":
         "Committee on Predictive-Toxicology Approaches for Military Assessments of Acute Exposures et al.",
         "doi":
         "10.17226/21775",
         "year":
         2015,
         "PMID":
         26468569,
         "title":
         "Application of Modern Toxicology Approaches for Predicting Acute Toxicity for Chemical Defense",
         "citation":
         "(2015). Washington (DC): National Academies Press (US).",
         "authors": [
             "Committee on Predictive-Toxicology Approaches for Military Assessments of Acute Exposures",
             "Committee on Toxicology",
             "Board on Environmental Studies and Toxicology",
             "Board on Life Sciences",
             "Division on Earth and Life Studies",
             "The National Academies of Sciences, Engineering, and Medicine",
         ],
     }
     assert obj == expected
 def test_utf8(self):
     # these ids have UTF-8 text in the abstract; make sure we can import
     # and the abstract field captures this value.
     self.ids = [23878845, 16080930]
     self.fetch = pubmed.PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     # assert that a unicode value exists in text
     self.assertTrue(self.fetch.content[0]['abstract'].find('\u03b1') > -1)
    def test_doi(self):
        """
        Ensure DOI is obtained.

        Ex: https://www.ncbi.nlm.nih.gov/pubmed/21813142?retmod=xml&report=xml&format=text  # NOQA
        """
        self.ids = (21813142, )
        self.fetch = pubmed.PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        doi = "10.1016/j.medcli.2011.05.017"
        self.assertEqual(self.fetch.content[0]['doi'], doi)
Exemple #8
0
    def test_abstract_with_child_text(self):
        """
        Ensure abstracts w/ html spans like <i></i> can be captured.

        Example: https://www.ncbi.nlm.nih.gov/pubmed/29186030
        """
        self.ids = (29186030, )
        self.fetch = pubmed.PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        abstract_text = """CYP353D1v2 is a cytochrome P450 related to imidacloprid resistance in Laodelphax striatellus. This work was conducted to examine the ability of CYP353D1v2 to metabolize other insecticides. Carbon monoxide difference spectra analysis indicates that CYP353D1v2 was successfully expressed in insect cell Sf9. The catalytic activity of CYP353D1v2 relating to degrading buprofezin, chlorpyrifos, and deltamethrin was tested by measuring substrate depletion and analyzing the formation of metabolites. The results showed the nicotinamide-adenine dinucleotide phosphate (NADPH)-dependent depletion of buprofezin (eluting at 8.7 min) and parallel formation of an unknown metabolite (eluting 9.5 min). However, CYP353D1v2 is unable to metabolize deltamethrin and chlorpyrifos. The recombinant CYP353D1v2 protein efficiently catalyzed the model substrate p-nitroanisole with a maximum velocity of 9.24 nmol/min/mg of protein and a Michaelis constant of Km = 6.21 µM. In addition, imidacloprid was metabolized in vitro by the recombinant CYP353D1v2 microsomes (catalytic constant Kcat) 0.064 pmol/min/pmol P450, Km = 6.41 µM. The mass spectrum of UPLC-MS analysis shows that the metabolite was a product of buprofezin, which was buprofezin sulfone. This result provided direct evidence that L. striatellus cytochrome P450 CYP353D1v2 is capable of metabolizing imidacloprid and buprofezin."""  # NOQA
        self.maxDiff = None
        self.assertEqual(self.fetch.content[0]["abstract"], abstract_text)
    def test_structured_abstract(self):
        """
        Ensured structured abstract XML is captured.

        Example: https://www.ncbi.nlm.nih.gov/pubmed/21813367/
        """
        self.ids = (21813367, )
        self.fetch = pubmed.PubMedFetch(id_list=self.ids)
        self.fetch.get_content()
        abstract_text = """<span class="abstract_label">BACKGROUND: </span>People living or working in eastern Ohio and western West Virginia have been exposed to perfluorooctanoic acid (PFOA) released by DuPont Washington Works facilities.<br><span class="abstract_label">OBJECTIVES: </span>Our objective was to estimate historical PFOA exposures and serum concentrations experienced by 45,276 non-occupationally exposed participants in the C8 Health Project who consented to share their residential histories and a 2005-2006 serum PFOA measurement.<br><span class="abstract_label">METHODS: </span>We estimated annual PFOA exposure rates for each individual based on predicted calibrated water concentrations and predicted air concentrations using an environmental fate and transport model, individual residential histories, and maps of public water supply networks. We coupled individual exposure estimates with a one-compartment absorption, distribution, metabolism, and excretion (ADME) model to estimate time-dependent serum concentrations.<br><span class="abstract_label">RESULTS: </span>For all participants (n = 45,276), predicted and observed median serum concentrations in 2005-2006 are 14.2 and 24.3 ppb, respectively [Spearman's rank correlation coefficient (r(s)) = 0.67]. For participants who provided daily public well water consumption rate and who had the same residence and workplace in one of six municipal water districts for 5 years before the serum sample (n = 1,074), predicted and observed median serum concentrations in 2005-2006 are 32.2 and 40.0 ppb, respectively (r(s) = 0.82).<br><span class="abstract_label">CONCLUSIONS: </span>Serum PFOA concentrations predicted by linked exposure and ADME models correlated well with observed 2005-2006 human serum concentrations for C8 Health Project participants. These individualized retrospective exposure and serum estimates are being used in a variety of epidemiologic studies being conducted in this region."""  # NOQA
        self.maxDiff = None
        self.assertEqual(self.fetch.content[0]['abstract'], abstract_text)
Exemple #10
0
def update_pubmed_content(ids):
    """Fetch the latest data from Pubmed and update identifier object."""
    Identifiers = apps.get_model('lit', 'identifiers')
    fetcher = pubmed.PubMedFetch(ids)
    contents = fetcher.get_content()
    for d in contents:
        content = json.dumps(d)
        Identifiers.objects\
            .filter(
                unique_id=d['PMID'],
                database=constants.PUBMED
            )\
            .update(content=content)
Exemple #11
0
def update_pubmed_content(ids: List[int]):
    """Fetch the latest data from Pubmed and update identifier object."""
    Identifiers = apps.get_model("lit", "identifiers")
    fetcher = pubmed.PubMedFetch(ids)
    contents = fetcher.get_content()
    for d in contents:
        content = json.dumps(d)
        Identifiers.objects.filter(unique_id=str(d["PMID"]), database=constants.PUBMED).update(
            content=content
        )
    ids_str = [str(id) for id in ids]
    Identifiers.objects.filter(unique_id__in=ids_str, database=constants.PUBMED, content="").update(
        content='{"status": "failed"}'
    )
Exemple #12
0
def getMetaData(papersToFind):

    ids = []
    for paper in papersToFind:
        ids.append(paper.s.pubmed_id)

    fetch = pubmed.PubMedFetch(id_list=ids)
    refs = fetch.get_content()

    for num, id in enumerate(refs):
        papersToFind[num].s.abstract = refs[num]['abstract']
        papersToFind[num].s.paper_authors = ', '.join(refs[num]['authors'])
        papersToFind[num].s.title = refs[num]['title']
        papersToFind[
            num].s.source_url = "https://www.ncbi.nlm.nih.gov/pubmed/" + papersToFind[
                num].s.pubmed_id
        papersToFind[num].s.doi = refs[num]['doi']
        papersToFind[num].s.abstract = refs[num]['abstract']

        root = ET.fromstring(refs[num]["xml"])
        if root[0].find('Article'):
            if root[0].find('Article').find('Journal'):
                papersToFind[num].s.journal = root[0].find('Article').find(
                    'Journal').find('Title').text

        for pubDate in root[1][0].findall('PubMedPubDate'):
            if pubDate.get('PubStatus') == 'pubmed':
                year = pubDate.find('Year').text
                month = pubDate.find('Month').text
                day = pubDate.find('Day').text
                break

        papersToFind[num].s.publication_date = year + "-" + month + "-" + day

        if root[0].find("KeywordList"):
            keywordlist = root[0].find("KeywordList").findall("Keyword")

            kwl = []
            for word in keywordlist:
                if word.text:
                    kwl.append((word.text).strip())
            papersToFind[num].s.keywords = ', '.join(kwl)
        else:
            papersToFind[num].s.keywords = 'No keywords in Pubmed'

    return papersToFind
 def test_book_chapter(self):
     self.ids = (20301382, )
     self.fetch = pubmed.PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     obj = self.fetch.content[0]
     obj.pop('xml')
     obj.pop('abstract')
     expected = {
         'PMID': '20301382',
         'authors_list': ['DiMauro S', 'Hirano M'],
         'authors_short': 'DiMauro S and Hirano M',
         'citation':
         'GeneReviews(®) (1993). Seattle (WA): University of Washington, Seattle.',
         'doi': None,
         'title': 'Mitochondrial DNA Deletion Syndromes',
         'year': 1993
     }
     self.assertEqual(obj, expected)
Exemple #14
0
 def test_book_chapter(self):
     self.ids = (20301382, )
     self.fetch = pubmed.PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     obj = self.fetch.content[0]
     obj.pop("xml")
     obj.pop("abstract")
     expected = {
         "PMID": 20301382,
         "authors": ["Goldstein A", "Falk MJ"],
         "authors_short": "Goldstein A and Falk MJ",
         "citation":
         "GeneReviews® (1993). Seattle (WA): University of Washington, Seattle.",
         "doi": None,
         "title": "Mitochondrial DNA Deletion Syndromes",
         "year": 1993,
     }
     assert obj == expected
Exemple #15
0
    def get_pubmed_identifiers(self, ids):
        # Return a queryset of identifiers, one for each PubMed ID. Either get
        # or create an identifier, whatever is required
        Identifiers = apps.get_model('lit', 'Identifiers')
        # Filter IDs which need to be imported
        idents = list(self.filter(database=constants.PUBMED, unique_id__in=ids)
                            .values_list('unique_id', flat=True))
        need_import = tuple(set(ids) - set(idents))

        # Grab Pubmed objects
        fetch = pubmed.PubMedFetch(need_import)

        # Save new Identifier objects
        for item in fetch.get_content():
            ident = Identifiers(unique_id=item['PMID'],
                                database=constants.PUBMED,
                                content=json.dumps(item))
            ident.save()
            idents.append(ident.unique_id)

        return self.filter(database=constants.PUBMED, unique_id__in=idents)
Exemple #16
0
#!/usr/bin/env python

'''
This script takes a PubMed ID and retrieves the article metadata.
'''

from litter_getter import pubmed
import xml.etree.ElementTree as ET
import xml.dom.minidom

# register with user account
pubmed.connect("PUBMED", '*****@*****.**')

ids = [29161754]  # Ref: https://www.ncbi.nlm.nih.gov/pubmed/29161754
fetch = pubmed.PubMedFetch(id_list=ids)
refs = fetch.get_content()

root = ET.fromstring(refs[0]["xml"])

keywordlist = root[0].find("KeywordList").findall("Keyword")

kwl =[]

for word in keywordlist:
	kwl.append((word.text).strip())

print(', '.join(kwl))

# "refs" is a list of dictionaries with keys:
'''
'xml' : the raw XML returned
Exemple #17
0
 def test_title_with_child_text(self):
     self.ids = (27933116, )
     self.fetch = pubmed.PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     title = "Phoenix dactylifera mediated green synthesis of Cu2O particles for arsenite uptake from water."
     self.assertEqual(self.fetch.content[0]["title"], title)
 def test_multiquery(self):
     self.fetch = pubmed.PubMedFetch(id_list=self.ids, retmax=3)
     self.fetch.get_content()
     self.assertEqual(self.fetch.request_count, 2)
     self._results_check()
 def test_standard_query(self):
     self.fetch = pubmed.PubMedFetch(id_list=self.ids)
     self.fetch.get_content()
     self.assertEqual(self.fetch.request_count, 1)
     self._results_check()