Ejemplo n.º 1
0
def add_paper(pmid, nex_session=None):
     
    if nex_session is None:
        nex_session = get_session()

    records = get_pubmed_record(str(pmid))
    # print records[0]

    rec_file = StringIO(records[0])
    record = Medline.read(rec_file)
    # print record

    source_id = get_source_id(nex_session, 'NCBI')

    ## insert into DBENTITY/REFERENCEDBENTITY/REFERENCEDOCUMENT
    [reference_id, authors, doi_url, pmc_url] = insert_referencedbentity(nex_session, 
                                                                         pmid, 
                                                                         source_id, 
                                                                         record)
    
    # print reference_id, authors, doi_url, pmc_url

    insert_authors(nex_session, reference_id, authors, source_id)

    insert_pubtypes(nex_session, pmid, reference_id, record.get('PT', []), source_id)
    
    insert_urls(nex_session, pmid, reference_id, doi_url, pmc_url, source_id)

    insert_relations(nex_session, pmid, reference_id, record)

    return reference_id
Ejemplo n.º 2
0
 def test_medline_from_url(self):
     """Test Entrez into Medline.read from URL"""
     efetch = Entrez.efetch(db="pubmed", id="19304878", rettype="medline", retmode="text")
     record = Medline.read(efetch)
     self.assertTrue(isinstance(record, dict))
     self.assertEqual("19304878", record["PMID"])
     self.assertEqual("10.1093/bioinformatics/btp163 [doi]", record["LID"])
Ejemplo n.º 3
0
	def processInput(k):
		print "Querying PMID: "+str(k)+"."
		getall = Medline.read(Entrez.efetch(db="pubmed", id=k, rettype="medline", retmode="text"))
		singlemesh = getall.get("MH")
		singledate = getall.get("EDAT")	
		for j1 in range(len(singlemesh)):
			cur.execute("INSERT INTO MeSH002(PMID, MeSH, Dates) VALUES("+str(k)+",'" + getall.get("MH")[j1][0:24].translate(None, "'*&")+"','" +  str(singledate[0:10]) +"')" )
Ejemplo n.º 4
0
 def test_read(self):
     with open("Medline/pubmed_result1.txt") as handle:
         record = Medline.read(handle)
     self.assertEqual(record["PMID"], "12230038")
     self.assertEqual(record["OWN"], "NLM")
     self.assertEqual(record["STAT"], "MEDLINE")
     self.assertEqual(record["DA"], "20020916")
     self.assertEqual(record["DCOM"], "20030606")
     self.assertEqual(record["LR"], "20041117")
     self.assertEqual(record["PUBM"], "Print")
     self.assertEqual(record["IS"], "1467-5463 (Print)")
     self.assertEqual(record["VI"], "3")
     self.assertEqual(record["IP"], "3")
     self.assertEqual(record["DP"], "2002 Sep")
     self.assertEqual(record["TI"], "The Bio* toolkits--a brief overview.")
     self.assertEqual(record["PG"], "296-302")
     self.assertEqual(record["AB"], "Bioinformatics research is often difficult to do with commercial software. The Open Source BioPerl, BioPython and Biojava projects provide toolkits with multiple functionality that make it easier to create customised pipelines or analysis. This review briefly compares the quirks of the underlying languages and the functionality, documentation, utility and relative advantages of the Bio counterparts, particularly from the point of view of the beginning biologist programmer.")
     self.assertEqual(record["AD"], "tacg Informatics, Irvine, CA 92612, USA. [email protected]")
     self.assertEqual(record["FAU"], ["Mangalam, Harry"])
     self.assertEqual(record["AU"], ["Mangalam H"])
     self.assertEqual(record["LA"], ["eng"])
     self.assertEqual(record["PT"], ["Journal Article"])
     self.assertEqual(record["PL"], "England")
     self.assertEqual(record["TA"], "Brief Bioinform")
     self.assertEqual(record["JT"], "Briefings in bioinformatics")
     self.assertEqual(record["JID"], "100912837")
     self.assertEqual(record["SB"], "IM")
     self.assertEqual(record["MH"], ["*Computational Biology", "Computer Systems", "Humans", "Internet", "*Programming Languages", "*Software", "User-Computer Interface"])
     self.assertEqual(record["EDAT"], "2002/09/17 10:00")
     self.assertEqual(record["MHDA"], "2003/06/07 05:00")
     self.assertEqual(record["PST"], "ppublish")
     self.assertEqual(record["SO"], "Brief Bioinform. 2002 Sep;3(3):296-302.")
Ejemplo n.º 5
0
def medline_to_csv():
    """
    Create one record per text using XML abstracts scraped from PubMed
    """
    files = pickle.load(open('pickles/pubmed_records.p', 'rb'))
    files = ['pubmed/' + f + '.txt' for f in files]
    f_out = 'csv/sentences_pubmed.csv'

    sentence_splitter = set_up_tokenizer()

    with open(f_out, 'wb') as csv_out:
        csv_writer = csv.DictWriter(csv_out, ['id', 'sent_num', 'text'], delimiter=',')
        csv_writer.writeheader()

        for f in files:
            with open(f, 'rb') as f_in:
                record = Medline.read(f_in)
                # use medline parser to extract relevant data from the file
                pid = record['PMID']
                text = record['TI'] + ' ' + record['AB']

                sentences = sentence_splitter.tokenize(text)
                for i, s in enumerate(sentences):
                    # dict comprehension here to hack the unicode into csv writer
                    dict_row = {'id': pid, 'sent_num': i, 'text': s.encode('utf-8')}
                    csv_writer.writerow(dict_row)
def update_database_batch(nex_session, fw, records, pmid_to_reference,
                          reference_id_to_urls, source_id):

    for rec in records:
        rec_file = StringIO(rec)
        record = Medline.read(rec_file)

        pmid = record.get('PMID')
        if pmid is None:
            continue

        x = pmid_to_reference.get(int(pmid))

        if x is None:
            continue

        pmc_url = None
        if record.get('PMC'):
            pmc_url = PMC_ROOT + record['PMC'] + '/'

        doi, doi_url = get_doi(record)

        doi_url = doi_url.replace("&lt;", "<").replace("&gt;", ">")

        update_urls(nex_session, fw, pmid, x.dbentity_id, pmc_url, doi_url,
                    reference_id_to_urls[x.dbentity_id], source_id)
Ejemplo n.º 7
0
def medline_to_db():
    """
    Create one record per text using medline abstracts scraped from PubMed
    """
    sentence_splitter = set_up_tokenizer()
    files = set(pickle.load(open('pickles/pubmed_records_new.p', 'rb')))

    with sqlite3.connect(db_path) as db:
        cursor = db.cursor()

        # don't want to add the same abstract multiple times, so get existing ones first
        cursor.execute('SELECT DISTINCT pubmed_id FROM sentences')
        # using sets to hopefully speed things up
        existing = {p[0] for p in cursor}
        files = {f for f in files if f not in existing}
        files = ['pubmed/' + str(f) + '.txt' for f in files]

        for f in files:
            with open(f, 'rb') as f_in:
                record = Medline.read(f_in)
                # use medline parser to extract relevant data from the file
                pid = record['PMID']

                try:
                    text = record['TI'] + ' ' + record['AB']
                # bti is for books? the value is a list for some reason so just take first element
                except:
                    text = record['BTI'][0] + ' ' + record['AB']

                sentences = sentence_splitter.tokenize(text)
                for i, s in enumerate(sentences):
                    cursor.execute('''INSERT INTO sentences
                                             VALUES (NULL, ?, ?, ?, ?);''', (pid, i, s, 'pubmed'))
def handle_one_record(db_session, records, gene_list, alias_to_name):

    i = 1
    for rec in records:
        rec_file = StringIO(rec)
        record = Medline.read(rec_file)
        pmid = record.get('PMID')
        pubmed_url = 'http://www.ncbi.nlm.nih.gov/pubmed/' + str(pmid)
        doi_url = ""
        if record.get('AID'):
            # ['S0167-7012(17)30042-8 [pii]', '10.1016/j.mimet.2017.02.002 [doi]']
            doi = None
            for id in record['AID']:
                if id.endswith('[doi]'):
                    doi = id.replace(' [doi]', '')
                    break
            if doi:
                doi_url = "/".join(['http://dx.doi.org', doi])
        title = record.get('TI', '')
        authors = record.get('AU', [])
        pubdate = record.get('DP', '')  # 'PubDate': '2012 Mar 20'
        year = pubdate.split(' ')[0]
        journal = record.get('TA', '')
        volume = record.get('VI', '')
        issue = record.get('IP', '')
        pages = record.get('PG', '')
        citation = set_cite(title, authors, year, journal, volume, issue,
                            pages)
        abstract = record.get('AB', '')
        gene_names = extract_gene_names(abstract, gene_list, alias_to_name)

        # insert formatted data to DB
        insert_reference(db_session, pmid, citation, doi_url, abstract,
                         " ".join(gene_names))
 def test_read(self):
     handle = open("Medline/pubmed_result1.txt")
     record = Medline.read(handle)
     handle.close()
     self.assertEqual(record["PMID"], "12230038")
     self.assertEqual(record["OWN"], "NLM")
     self.assertEqual(record["STAT"], "MEDLINE")
     self.assertEqual(record["DA"], "20020916")
     self.assertEqual(record["DCOM"], "20030606")
     self.assertEqual(record["LR"], "20041117")
     self.assertEqual(record["PUBM"], "Print")
     self.assertEqual(record["IS"], "1467-5463 (Print)")
     self.assertEqual(record["VI"], "3")
     self.assertEqual(record["IP"], "3")
     self.assertEqual(record["DP"], "2002 Sep")
     self.assertEqual(record["TI"], "The Bio* toolkits--a brief overview.")
     self.assertEqual(record["PG"], "296-302")
     self.assertEqual(record["AB"], "Bioinformatics research is often difficult to do with commercial software. The Open Source BioPerl, BioPython and Biojava projects provide toolkits with multiple functionality that make it easier to create customised pipelines or analysis. This review briefly compares the quirks of the underlying languages and the functionality, documentation, utility and relative advantages of the Bio counterparts, particularly from the point of view of the beginning biologist programmer.")
     self.assertEqual(record["AD"], "tacg Informatics, Irvine, CA 92612, USA. [email protected]")
     self.assertEqual(record["FAU"], ["Mangalam, Harry"])
     self.assertEqual(record["AU"], ["Mangalam H"])
     self.assertEqual(record["LA"], ["eng"])
     self.assertEqual(record["PT"], ["Journal Article"])
     self.assertEqual(record["PL"], "England")
     self.assertEqual(record["TA"], "Brief Bioinform")
     self.assertEqual(record["JT"], "Briefings in bioinformatics")
     self.assertEqual(record["JID"], "100912837")
     self.assertEqual(record["SB"], "IM")
     self.assertEqual(record["MH"], ["*Computational Biology", "Computer Systems", "Humans", "Internet", "*Programming Languages", "*Software", "User-Computer Interface"])
     self.assertEqual(record["EDAT"], "2002/09/17 10:00")
     self.assertEqual(record["MHDA"], "2003/06/07 05:00")
     self.assertEqual(record["PST"], "ppublish")
     self.assertEqual(record["SO"], "Brief Bioinform. 2002 Sep;3(3):296-302.")
Ejemplo n.º 10
0
def handle_query(query: str,
                 records_to_retrieve: set,
                 retrieve_max: int = 100) -> List[Dict]:
    '''
    Takes a query, searches for the relevant papers and returns the abstracts
    Abbreviations:
    AB = Abstract, AID = Article Identifier, (F)AU = (Full) Author, DP = Date of Publication,
    JT = Journal Title, OT = Other Term, PMID = PubMed Unique Identifier, TI = Title
    https://biopython.org/docs/1.75/api/Bio.Medline.html#Bio.Medline.Record
    '''
    rec_handler = search_medline(query=query, retmax=retrieve_max)

    records = []
    for rec_id in rec_handler['IdList']:
        rec = fetch_details(rec_id=rec_id, entrez_handle=rec_handler)
        rec_file = StringIO(rec)
        medline_rec = Medline.read(rec_file)
        record = {}
        for key, value in medline_rec.items():
            if key in records_to_retrieve:
                record[key] = value
        records.append(record)
        # if 'AB' in medline_rec:
        #     print(medline_rec['AB'])]

    return records
Ejemplo n.º 11
0
def main(query, email):
    rec_handler = search_medline(query, email)

    for rec_id in rec_handler['IdList']:
        rec = fetch_rec(rec_id, rec_handler)
        rec_file = StringIO(rec)
        medline_rec = Medline.read(rec_file)
        if 'AB' in medline_rec:
            print(medline_rec['AB'])
Ejemplo n.º 12
0
 def test_medline_from_url(self):
     """Test Entrez into Medline.read from URL"""
     efetch = Entrez.efetch(db="pubmed",
                            id='19304878',
                            rettype="medline",
                            retmode="text")
     record = Medline.read(efetch)
     self.assertTrue(isinstance(record, dict))
     self.assertEqual('19304878', record['PMID'])
     self.assertEqual('10.1093/bioinformatics/btp163 [doi]', record['LID'])
Ejemplo n.º 13
0
 def parse_medline(self, text):
     self.medline = Medline.read(text.split('\n'))
     self.title = self.medline['TI']
     self.journal = self.medline['JT']
     self.citation = self.medline['SO']
     self.date_pub = self.medline['DP']
     try:
         self.authors = self.medline['AU']
     except KeyError:
         self.authors = self.medline['IR']
Ejemplo n.º 14
0
 def parse_medline(self, text):
     self.medline = Medline.read(text.split('\n'))
     self.title = self.medline['TI']
     self.journal = self.medline['JT']
     self.citation = self.medline['SO']
     self.date_pub = self.medline['DP']
     try:
         self.authors = self.medline['AU']
     except KeyError:
         self.authors = self.medline['IR']
Ejemplo n.º 15
0
 def test_multiline_mesh(self):
     with open("Medline/pubmed_result3.txt") as handle:
         record = Medline.read(handle)
         self.assertEqual(record["PMID"], "23039619")
     self.assertEqual(record["MH"], [
         "Blood Circulation",
         "High-Intensity Focused Ultrasound Ablation/adverse effects/instrumentation/*methods",
         "Humans", "Models, Biological", "Sonication", "Temperature",
         "Time Factors", "Transducers"
     ])
Ejemplo n.º 16
0
def retrieve_abstract(PMID):
    Entrez.email = app.config['EMAIL']
    handle = Entrez.efetch(db="pubmed",rettype="medline", retmode="text", id=PMID)

    record = Medline.read(handle)
    handle.close()
    try:
        abstract = record['AB']
    except:
        abstract = ''
    return abstract
 def test_medline_from_url(self):
     """Test Entrez into Medline.read from URL."""
     handle = Entrez.efetch(db="pubmed",
                            id="19304878",
                            rettype="medline",
                            retmode="text")
     record = Medline.read(handle)
     handle.close()
     self.assertIsInstance(record, dict)
     self.assertEqual("19304878", record["PMID"])
     self.assertEqual("10.1093/bioinformatics/btp163 [doi]", record["LID"])
Ejemplo n.º 18
0
 def test_multiline_mesh(self):
     with open("Medline/pubmed_result3.txt") as handle:
         record = Medline.read(handle)
         self.assertEqual(record["PMID"], "23039619")
     self.assertEqual(record["MH"], ["Blood Circulation",
                                     "High-Intensity Focused Ultrasound Ablation/adverse effects/instrumentation/*methods",
                                     "Humans",
                                     "Models, Biological",
                                     "Sonication",
                                     "Temperature",
                                     "Time Factors",
                                     "Transducers"])
Ejemplo n.º 19
0
 def test_medline_from_url(self):
     """Test Entrez into Medline.read from URL"""
     handle = Entrez.efetch(db="pubmed", id="19304878", rettype="medline", retmode="text")
     self.assertTrue(handle.url.startswith(URL_HEAD + "efetch.fcgi?"), handle.url)
     self.assertTrue(URL_TOOL in handle.url)
     self.assertTrue(URL_EMAIL in handle.url)
     self.assertTrue("id=19304878" in handle.url)
     record = Medline.read(handle)
     handle.close()
     self.assertTrue(isinstance(record, dict))
     self.assertEqual("19304878", record["PMID"])
     self.assertEqual("10.1093/bioinformatics/btp163 [doi]", record["LID"])
Ejemplo n.º 20
0
 def test_pubmed_16381885(self):
     """Bio.TogoWS.entry("pubmed", "16381885")"""
     # Gives Medline plain text
     handle = TogoWS.entry("pubmed", "16381885")
     data = Medline.read(handle)
     handle.close()
     self.assertEqual(data["TI"],
          'From genomics to chemical genomics: new developments in KEGG.')
     self.assertEqual(data["AU"], ['Kanehisa M', 'Goto S', 'Hattori M',
                                   'Aoki-Kinoshita KF', 'Itoh M',
                                   'Kawashima S', 'Katayama T', 'Araki M',
                                   'Hirakawa M'])
Ejemplo n.º 21
0
 def test_pubmed_16381885(self):
     """Bio.TogoWS.entry("pubmed", "16381885")"""
     #Gives Medline plain text
     handle = TogoWS.entry("pubmed", "16381885")
     data = Medline.read(handle)
     handle.close()
     self.assertEqual(data["TI"],
          'From genomics to chemical genomics: new developments in KEGG.')
     self.assertEqual(data["AU"], ['Kanehisa M', 'Goto S', 'Hattori M',
                                   'Aoki-Kinoshita KF', 'Itoh M',
                                   'Kawashima S', 'Katayama T', 'Araki M',
                                   'Hirakawa M'])
Ejemplo n.º 22
0
def fetchMetadata2(lala, search):
    pmid_string = ",".join(lala)
    print(pmid_string)
    records = []
    efetch = Entrez.efetch(db, id=pmid_string, rettype="medline", retmode="text")
    for i in range(len(lala)):
        record = Medline.read(efetch)
        records.append(record)
        print("receiving data")
    print("This is records", records)
    alldata = getdata1(records, search)
    return alldata
Ejemplo n.º 23
0
 def processInput(k):
     print "Querying PMID: " + str(k) + "."
     getall = Medline.read(
         Entrez.efetch(db="pubmed", id=k, rettype="medline",
                       retmode="text"))
     singlemesh = getall.get("MH")
     singledate = getall.get("EDAT")
     for j1 in range(len(singlemesh)):
         cur.execute("INSERT INTO MeSH002(PMID, MeSH, Dates) VALUES(" +
                     str(k) + ",'" +
                     getall.get("MH")[j1][0:24].translate(None, "'*&") +
                     "','" + str(singledate[0:10]) + "')")
Ejemplo n.º 24
0
 def test_pubmed_16381885(self):
     """Bio.TogoWS.entry("pubmed", "16381885")."""
     # Gives Medline plain text
     handle = TogoWS.entry("pubmed", "16381885")
     data = Medline.read(handle)
     handle.close()
     self.assertEqual(data["TI"],
                      "From genomics to chemical genomics: "
                      "new developments in KEGG.")
     self.assertEqual(data["AU"], ["Kanehisa M", "Goto S", "Hattori M",
                                   "Aoki-Kinoshita KF", "Itoh M",
                                   "Kawashima S", "Katayama T", "Araki M",
                                   "Hirakawa M"])
Ejemplo n.º 25
0
 def test_medline_from_url(self):
     """Test Entrez into Medline.read from URL"""
     handle = Entrez.efetch(db="pubmed", id='19304878', rettype="medline",
                            retmode="text")
     self.assertTrue(handle.url.startswith(URL_HEAD + "efetch.fcgi?"), handle.url)
     self.assertIn(URL_TOOL, handle.url)
     self.assertIn(URL_EMAIL, handle.url)
     self.assertIn("id=19304878", handle.url)
     record = Medline.read(handle)
     handle.close()
     self.assertTrue(isinstance(record, dict))
     self.assertEqual('19304878', record['PMID'])
     self.assertEqual('10.1093/bioinformatics/btp163 [doi]', record['LID'])
Ejemplo n.º 26
0
def fetchMetadata(pmid_list, search):
    all_metadata = []
    for i in pmid_list:
        time.sleep(1)
        print("fetching")
        efetch = Entrez.efetch(db, id=i, rettype="medline", retmode="text")
        records = Medline.read(efetch)
        metadata = [i]
        metadata = getdata(records, metadata, search)
        all_metadata.append(metadata)

    print(all_metadata)
    return all_metadata
Ejemplo n.º 27
0
def get_abstracts(file_name):
    pubmed_ids = []
    with open(file_name) as f:
        for line in f:
            pubmed_ids.append(int(line.rstrip('\n')))
    abstracts = []
    for pubmed_id in pubmed_ids:
        fetch_handler = Entrez.efetch(db='pubmed',
                                      rettype='medline',
                                      retmode='text',
                                      id=str(pubmed_id))
        record = Medline.read(fetch_handler)
        abstracts.append(record['AB'])
    return abstracts
Ejemplo n.º 28
0
def fetch_all_dates(doi):
    '''
    :param doi: DOI of paper to use for searching
    :return: the date of publication when applicable
    '''
    handle = search_pubmed(doi)
    if len(handle['IdList']) > 0:
        record_id = handle['IdList'][0]
        paper = fetch_paper_date(record_id)
        rec_file = StringIO(paper)
        medline_rec = Medline.read(rec_file)
        return medline_rec['PHST'][-1].partition(' ')[0]
    else:
        return None
Ejemplo n.º 29
0
def update_database_batch(nex_session, fw, records, pmid_to_reference, key_to_type, source_to_id):

    for rec in records:
        rec_file = StringIO(rec)
        record = Medline.read(rec_file)
        pmid = record.get('PMID')
        if pmid is None:
            continue

        x = pmid_to_reference.get(int(pmid))

        if x is None:
            continue

        update_comment_erratum(nex_session, fw, record, int(pmid), pmid_to_reference, key_to_type, source_to_id) 
Ejemplo n.º 30
0
def get_new_reference_info(request):
    MAX_PUBS_ADDED = 10
    try:
        params = request.json_body
        if not params:
            raise ValueError('Please enter at least 1 PMID.')
        pmids = params['pmids']
        int_pmids = convert_space_separated_pmids_to_list(pmids)
        if len(int_pmids) > MAX_PUBS_ADDED:
            raise ValueError('Only ' + str(MAX_PUBS_ADDED) +
                             ' may be added at once.')
        # avoid repeat PMIDs
        repeat_pmids = [
            x for x, count in collections.Counter(int_pmids).items()
            if count > 1
        ]
        if len(repeat_pmids):
            str_pmids = [str(x) for x in repeat_pmids]
            str_pmids = ', '.join(str_pmids)
            msg = 'A PMID was repeated: ' + str_pmids
            raise ValueError(msg)
        confirmation_list = []
        for x in int_pmids:
            is_in_db = DBSession.query(Referencedbentity).filter(
                Referencedbentity.pmid == x).one_or_none()
            if is_in_db:
                raise ValueError(
                    'At least 1 PMID is already in the database: ' + str(x))
            record = Medline.read(
                Entrez.efetch(db='pubmed', id=str(x), rettype='medline'))
            warning = Referencedbentity.get_deletion_warnings(x)
            journal_title = record.get('JT', '')
            if len(journal_title) <= 1:
                raise ValueError('Cannot import PMID ' + str(x) +
                                 ' because journal title is blank.')
            confirmation_item = {
                'name': record.get('TI') + ' PMID: ' + str(x),
                'pmid': x,
                'warning': warning
            }
            confirmation_list.append(confirmation_item)
        return {'references': confirmation_list}
    except Exception as e:
        traceback.print_exc()
        log.error(e)
        DBSession.rollback()
        return HTTPBadRequest(body=json.dumps({'message': str(e)}),
                              content_type='text/json')
def update_database_batch(nex_session, fw, records, pmid_to_reference, journal_id_to_abbrev, source_id):

    for rec in records:
        rec_file = StringIO(rec)
        record = Medline.read(rec_file)
    
        pmid = record.get('PMID')
        if pmid is None:
            continue

        x = pmid_to_reference.get(int(pmid))
        
        if x is None:
            continue

        update_database(nex_session, fw, record, int(pmid), pmid_to_reference, 
                        journal_id_to_abbrev, source_id)
Ejemplo n.º 32
0
def pmid2abstract_info(pmid):
    from Bio import Medline
    handle = Entrez.efetch(db="pubmed",
                           id=pmid,
                           rettype="medline",
                           retmode="text")
    record = Medline.read(handle)

    print record
    pmid_data = {}
    pmid_data["title"] = record.get("TI", "?")
    pmid_data["authors"] = record.get("AU", "?")
    pmid_data["source"] = record.get("SO", "?")
    pmid_data["abstract"] = record.get("AB", "?")
    pmid_data["pmid"] = pmid

    return pmid_data
 def test_medline_from_url(self):
     """Test Entrez into Medline.read from URL."""
     handle = Entrez.efetch(db="pubmed",
                            id="19304878",
                            rettype="medline",
                            retmode="text")
     url = handle.url
     self.assertTrue(url.startswith(URL_HEAD + "efetch.fcgi?"), url)
     self.assertIn(URL_TOOL, url)
     self.assertIn(URL_EMAIL, url)
     self.assertIn(URL_API_KEY, url)
     self.assertIn("id=19304878", url)
     record = Medline.read(handle)
     handle.close()
     self.assertIsInstance(record, dict)
     self.assertEqual("19304878", record["PMID"])
     self.assertEqual("10.1093/bioinformatics/btp163 [doi]", record["LID"])
def update_database_batch(nex_session, fw, records, pmid_to_reference, reference_id_to_authors, source_id):

    for rec in records:
        rec_file = StringIO(rec)
        record = Medline.read(rec_file)

        pmid = record.get('PMID')
        if pmid is None:
            continue

        x = pmid_to_reference.get(int(pmid))
        
        if x is None:
            continue

        authors = record.get('AU', '')
        update_authors(nex_session, fw, pmid, x.dbentity_id, authors, reference_id_to_authors.get(x.dbentity_id), source_id)
Ejemplo n.º 35
0
def get_first_last_authors(paper_id):
    """Given a paper, returns the first and last authors of the paper.

    Arguments:
    paper_id - str; paper ids

    Returns:
    authors - list of strs; list of full names of the first and last authors of the provided paper id
    """
    handle = Entrez.efetch(db='pubmed',
                           id=paper_id,
                           rettype='medline',
                           retmode="text",
                           retmax=200)
    record = Medline.read(handle)
    authors = record.get("FAU", "?")
    first_last_authors = [authors[0], authors[-1]]
    return first_last_authors
def update_database_batch(nex_session, fw, records, pmid_to_reference,
                          reference_id_to_pubtypes, source_id):

    for rec in records:
        rec_file = StringIO(rec)
        record = Medline.read(rec_file)
        pmid = record.get('PMID')
        if pmid is None:
            continue

        x = pmid_to_reference.get(int(pmid))

        if x is None:
            continue

        pubtypes = record.get('PT', [])  ## a list of types

        update_reftypes(nex_session, fw, pmid, x.dbentity_id, pubtypes,
                        reference_id_to_pubtypes.get(x.dbentity_id), source_id)
Ejemplo n.º 37
0
def downloadBibliography():
    from Bio import Entrez
    import re
    from Bio import Medline
    Entrez.email = "*****@*****.**"
    fp1 = open("SNP_pubmed_result.txt", "a")
    fp2 = open("new_pubmed_result.txt", "w")
    '''
    all_text_tatol = fp1.read()
    all_text_new = fp2.read()
    '''

    input = open("SNP_pubmed_result.txt")
    medline_exist = Medline.read(input)
    input.close()
    for line in open("title.txt", "r").readlines()[:5]:
        title = line.replace("\n", "")
        handle = Entrez.esearch(db="pubmed", term=title)
        records = Entrez.read(handle)
        if int(records["Count"]) == 1:
            term = re.sub(" ", "+", title)
            url = "https://www.ncbi.nlm.nih.gov/pubmed/?term="
            url += ''.join(term)
            url += ''.join("&report=medline&format=text")
            medline_handle = Entrez.efetch(db="pubmed",
                                           id=records["IdList"],
                                           rettype="medline",
                                           retmode="text")
            medline_res = Medline.parse(medline_handle)
            medline_res_list = list(medline_res)
            PMID = -1
            for record in medline_res_list:
                PMID = record.get("PMID", "?")
            '''      
            if len(all_text_tatol) == 0 :
                writeBibliography(url, fp1, fp2)
            else :       
            '''
            if PMID not in medline_exist["PMID"]:
                writeBibliography(url, fp1, fp2)
    fp1.close()
    fp2.close()
Ejemplo n.º 38
0
 def test_pubmed_16381885(self):
     """Bio.TogoWS.entry("pubmed", "16381885")"""
     # Gives Medline plain text
     handle = TogoWS.entry("pubmed", "16381885")
     data = Medline.read(handle)
     handle.close()
     self.assertEqual(data["TI"], "From genomics to chemical genomics: new developments in KEGG.")
     self.assertEqual(
         data["AU"],
         [
             "Kanehisa M",
             "Goto S",
             "Hattori M",
             "Aoki-Kinoshita KF",
             "Itoh M",
             "Kawashima S",
             "Katayama T",
             "Araki M",
             "Hirakawa M",
         ],
     )
Ejemplo n.º 39
0
def fetch_from_entrez(index, cache_dir=False):
    logger = logging.getLogger('build')

    # slugify the index for the cache filename (some indices have symbols not allowed in file names (e.g. /))
    index_slug= slugify(index)
    cache_file_path = '{}/{}'.format('/'.join(cache_dir), index_slug)

    # try fetching from cache
    if cache_dir:
        d = fetch_from_cache(cache_dir, index_slug)
        if d:
            logger.info('Fetched {} from cache'.format(cache_file_path))
            return d
    
    # if nothing is found in the cache, use the web API
    logger.info('Fetching {} from Entrez'.format(index))
    tries = 0
    max_tries = 5
    while tries < max_tries:
        if tries > 0:
            logger.warning('Failed fetching {}, retrying'.format(full_url))
            
        try:
            Entrez.email = '*****@*****.**'
            handle = Entrez.efetch(
                db="pubmed", 
                id=str(index), 
                rettype="medline", 
                retmode="text"
            )
        except:
            tries += 1
            time.sleep(2)
        else:
            d = Medline.read(handle)

            # save to cache
            save_to_cache(cache_dir, index_slug, d)
            logger.info('Saved entry for {} in cache'.format(cache_file_path))
            return d
Ejemplo n.º 40
0
 def fetcher(self):
     handle = Entrez.efetch(db='pubmed', id=self.name, retmode='text', rettype='medline')
     return Medline.read(handle)