def record_processor(self, record, database): """ Splits the record returned by Entrez into sparate variables and returns them. """ count = int(record["Count"]) # Int webenv = record["WebEnv"] # String query_key = record["QueryKey"] # String IDs = [] for i in range(0, count, 10000): iter_handle = Entrez.efetch(db=database, webenv=webenv, query_key=query_key, retmax=10000, rettype="acc", retstart=i) IDs += [x.rstrip() for x in iter_handle] iter_handle.close() assert count == len(IDs) if count == 0 and self.gui == 0: sys.exit("Your serch query returned no results!") elif count == 0: self.no_match.emit("Your serch query returned no results!") return None return count, IDs, webenv, query_key
def erratum_check(PMID, comments): """ Does this PMID have an erratum? I can't do the erratum formatting automatically. But I can warn user that there is an erratum. """ Entrez.email = app.config['EMAIL'] handle = Entrez.efetch(db="pubmed", id=PMID, rettype="gb", retmode="xml") records = Entrez.read(handle) erratum_count = 0 try: corrections = records['PubmedArticle'][0]['MedlineCitation'][ 'CommentsCorrectionsList'] for correction in corrections: if correction.attributes['RefType'] == 'ErratumIn': if erratum_count == 0: comments.append( "I smell an erratum: " + correction['RefSource'] + ". \nAdd it to the end of your citation: [Erratum in Journal, Issue(Volume): page. DOI: #. Accessed date.]" ) else: # Is this the second (or later) erratum we're reporting for this article? Then shorten report. comments.append("There's another erratum! What a mess." + correction['RefSource']) erratum_count += 1 except: pass # No errata? Do nothing. return comments
def genbank_entries_from_accession(accessions, read_out=False, email='*****@*****.**'): import Entrez entries = [] Entrez.email = email request = Entrez.epost('nucleotide', id=','.join(map(str, accessions))) result = Entrez.read(request) handle = Entrez.efetch(db='nucleotide', retmode='xml', webenv=result['WebEnv'], query_key=result['QueryKey']) for r in Entrez.parse(handle): try: entry = [x for x in r['GBSeq_other-seqids'] if 'gi' in x][0] gi = int(entry.split('|')[1]) entries.append(gi) except ValueError: gi = None if read_out is True: print(">GI {0} {1} {2}\n{3}".format(gi, r['GBSeq_primary-accession'], r['GBSeq_definition'], r['GBSeq_sequence'][:15])) return entries
def retrieve_abstract(PMID): Entrez.email = app.config['EMAIL'] handle = Entrez.efetch(db="pubmed",rettype="medline", retmode="text", id=PMID) record = Medline.read(handle) handle.close() try: abstract = record['AB'] except: abstract = '' return abstract
def get_description(mail,ID): """Used by xml write for searching the definition of the hit using the accession number from NCBI which appears in the rapsearch output""" Entrez.email = mail handle = Entrez.efetch(db="protein", id=ID, rettype="gb", retmode="text") entry=(handle.read().strip()) complete=entry.split("\n") definition=complete[1][12:] definition2=definition.strip(".") handle.close() return (definition2)
def get_description(mail,ID): """Used by make description for searching the definition of the hit using the accession number from NCBI which appears in the nnotation output Adapted from the RapsearchToXml.py file""" Entrez.email = mail handle = Entrez.efetch(db="protein", id=ID, rettype="gb", retmode="text") entry=(handle.read().strip()) complete=entry.split("\n") definition=complete[1][12:] #get the definition camp definition2=definition.split("[")#removes the species informaton handle.close() return (definition2[0])
def fetch_by_id(self, IDs, b_size): """ Fetches NCBI data based on the IDs, rather than a search query. Returns the data handle string. """ id_handle = Entrez.efetch(db=self.database, id=IDs, rettype="fasta", retmode="text", retmax=b_size) data = id_handle.read() id_handle.close() return data
def fetch_by_id(self, IDs, b_size): """ Fetches NCBI data based on the IDs, rather than a search query. Returns the data handle string. """ id_handle = Entrez.efetch(db=self.database, id=IDs, rettype="fasta", retmode="text", retmax=b_size) data = id_handle.read() id_handle.close() return data
def fetch_by_history(self, start, b_size, webenv, query_key): """ Fetches NCBI data based on the provided search query. Returns the data handle string. """ hist_handle = Entrez.efetch(db=self.database, retstart=start, rettype="fasta", retmode="text", retmax=b_size, webenv=webenv, query_key=query_key) data = hist_handle.read() hist_handle.close() return data
def fetch_by_history(self, start, b_size, webenv, query_key): """ Fetches NCBI data based on the provided search query. Returns the data handle string. """ hist_handle = Entrez.efetch(db=self.database, retstart=start, rettype="fasta", retmode="text", retmax=b_size, webenv=webenv, query_key=query_key) data = hist_handle.read() hist_handle.close() return data