def get_clinvar(self): eu=eutils.EUtils() # Filters: (Pathogenic or likely pathogenic) and (frameshif, missense or nonsense) id,args=eu.esearch({'db':'clinvar', 'term':'(clinsig+pathogenic[Filter] OR clinsig+likely+path[Filter]) AND (mol+cons+frameshift[Filter] OR mol+cons+missense[Filter] OR mol+cons+nonsense[Filter])'}) # This gives ~28000 records (variants) # id,args=eu.esearch({'db':'clinvar', 'term':'(clinsig+pathogenic[Filter] OR clinsig+likely+path[Filter]) AND (mol+cons+frameshift[Filter] OR mol+cons+missense[Filter] OR mol+cons+nonsense[Filter]) AND (var+deletion[Filter] OR var+indel[Filter]'}) # The above query gave ~5000 records, but it missed missense mutations because of deletion and indel filters args['db']='clinvar' out=eu.esummary(args,count = args['count']) x = cv.SummaryList(out) records = x.to_list() # Combine results into DataFrame rows = [] for entry in records: data = [entry['variant_id']] data.extend(entry['variant']) for gene in entry['genes']: record = data[:] record.extend(gene) for key in entry['trait'].keys(): row = record[:] row.extend([key,"; ".join(entry['trait'][key])]) rows.append(row) df = pd.DataFrame(rows, columns = ['id','VariantType','VarinatName','Pathogenic','Symbol','GeneID','Trait','Source']) df.Trait = [trait.encode('ascii','ignore') for trait in df.Trait.values] data=[] for k,t_v in df.groupby('GeneID'): S1=[x for x in util.unique(t_v['VariantType']) if not pd.isnull(x)] S2=[x[0] + "(" + x[1] + ")" for x in zip(t_v['Source'], t_v['Trait']) if not pd.isnull(x[0]) and not pd.isnull(x[1]) ] #print S1[:], S2[:], S3[:] data.append({'gene_id':k, 'variant_type':";".join(S1), 'source_trait':";".join(S2)}) df=pd.DataFrame(data).query('source_trait != ""') df.to_csv(self.fn_dest_clinvar, index=False)
def fetch_pmid(self, PMID): eu = eutils.EUtils() args = {} args['db'] = 'pubmed' args['id'] = [PMID] out = eu.efetch(args) X = pm.FetchList(out) X = X.to_list(['pubmed_id', 'journal', 'title', 'author']) if len(X) == 0: return {} #{'journal.title': 'Pharmaceutical medicine', 'journal.day': None, 'journal.year': '2017', 'title': 'Measuring and Improving Physician Knowledge of Safety Risks Using Traditional and Online Methods in Pharmacovigilance.', 'journal.month': None, 'journal.volume': '31', 'journal.issue': '4', 'author': ['Liede A', 'Amelio J', 'Bennett J', 'Goodman H', 'Peters PM', 'Barber R', 'Kehler E', 'Michael Sprafka J'], 'pubmed_id': '28824275', 'journal.page': '257-266'} X = X[0] data = { 'PMID': X['pubmed_id'], 'Title': X['title'], 'Year': X['journal.year'], 'Journal': X['journal.title'], 'Month': X['journal.month'], 'Volume': X['journal.volume'], 'Issue': X['journal.issue'], 'Page': X['journal.page'], 'Authors': ", ".join(X['author']) } return data
def symbol(self): return self._get_value("./Name") def description(self): return self._get_value("./Description") def chromosome(self): return self._get_value("./Chromosome") def mim(self): out = self._get_nodes("./Mim/int") for i, x in enumerate(out): if x is None: continue out[i] = " ".join([y.text for y in x]) return out if __name__ == "__main__": import eutils eu = eutils.EUtils() out = eu.efetch({'db': 'gene', 'id': '170743,51284'}) x = FetchList(out) print(">>>Fetch") print(x.to_list(x.attributes() + ["summary", "type"])) out = eu.esummary({'db': 'gene', 'id': '170743,51284,466142'}) x = SummaryList(out) print(">>>Summary") print(x.to_list(x.attributes()))