def get_clinvar(self):
        eu=eutils.EUtils()

        # Filters: (Pathogenic or likely pathogenic) and (frameshif, missense or nonsense)

        id,args=eu.esearch({'db':'clinvar', 'term':'(clinsig+pathogenic[Filter] OR clinsig+likely+path[Filter]) AND (mol+cons+frameshift[Filter] OR mol+cons+missense[Filter] OR mol+cons+nonsense[Filter])'})
        # This gives ~28000 records (variants)

        # id,args=eu.esearch({'db':'clinvar', 'term':'(clinsig+pathogenic[Filter] OR clinsig+likely+path[Filter]) AND (mol+cons+frameshift[Filter] OR mol+cons+missense[Filter] OR mol+cons+nonsense[Filter]) AND (var+deletion[Filter] OR var+indel[Filter]'})
        # The above query gave ~5000 records, but it missed missense mutations because of deletion and indel filters

        args['db']='clinvar'
        out=eu.esummary(args,count = args['count'])
        x = cv.SummaryList(out)
        records = x.to_list()

        # Combine results into DataFrame

        rows = []
        for entry in records:
          data = [entry['variant_id']]
          data.extend(entry['variant'])
          for gene in entry['genes']:
            record = data[:]
            record.extend(gene) 
            for key in entry['trait'].keys():
                row = record[:] 
                row.extend([key,"; ".join(entry['trait'][key])])  
                rows.append(row)        

        df = pd.DataFrame(rows, columns = ['id','VariantType','VarinatName','Pathogenic','Symbol','GeneID','Trait','Source'])      
        df.Trait = [trait.encode('ascii','ignore') for trait in df.Trait.values]
        data=[]
        for k,t_v in df.groupby('GeneID'):
            S1=[x for x in util.unique(t_v['VariantType']) if not pd.isnull(x)]
            S2=[x[0] + "(" + x[1] + ")" for x in zip(t_v['Source'], t_v['Trait']) if not pd.isnull(x[0]) and not pd.isnull(x[1]) ]
            #print S1[:], S2[:], S3[:]
            data.append({'gene_id':k, 'variant_type':";".join(S1), 'source_trait':";".join(S2)})
            
        df=pd.DataFrame(data).query('source_trait != ""')        
        df.to_csv(self.fn_dest_clinvar, index=False)
Ejemplo n.º 2
0
 def fetch_pmid(self, PMID):
     eu = eutils.EUtils()
     args = {}
     args['db'] = 'pubmed'
     args['id'] = [PMID]
     out = eu.efetch(args)
     X = pm.FetchList(out)
     X = X.to_list(['pubmed_id', 'journal', 'title', 'author'])
     if len(X) == 0:
         return {}
     #{'journal.title': 'Pharmaceutical medicine', 'journal.day': None, 'journal.year': '2017', 'title': 'Measuring and Improving Physician Knowledge of Safety Risks Using Traditional and Online Methods in Pharmacovigilance.', 'journal.month': None, 'journal.volume': '31', 'journal.issue': '4', 'author': ['Liede A', 'Amelio J', 'Bennett J', 'Goodman H', 'Peters PM', 'Barber R', 'Kehler E', 'Michael Sprafka J'], 'pubmed_id': '28824275', 'journal.page': '257-266'}
     X = X[0]
     data = {
         'PMID': X['pubmed_id'],
         'Title': X['title'],
         'Year': X['journal.year'],
         'Journal': X['journal.title'],
         'Month': X['journal.month'],
         'Volume': X['journal.volume'],
         'Issue': X['journal.issue'],
         'Page': X['journal.page'],
         'Authors': ", ".join(X['author'])
     }
     return data
Ejemplo n.º 3
0
    def symbol(self):
        return self._get_value("./Name")

    def description(self):
        return self._get_value("./Description")

    def chromosome(self):
        return self._get_value("./Chromosome")

    def mim(self):
        out = self._get_nodes("./Mim/int")
        for i, x in enumerate(out):
            if x is None: continue
            out[i] = " ".join([y.text for y in x])
        return out


if __name__ == "__main__":
    import eutils
    eu = eutils.EUtils()
    out = eu.efetch({'db': 'gene', 'id': '170743,51284'})
    x = FetchList(out)
    print(">>>Fetch")
    print(x.to_list(x.attributes() + ["summary", "type"]))

    out = eu.esummary({'db': 'gene', 'id': '170743,51284,466142'})

    x = SummaryList(out)
    print(">>>Summary")
    print(x.to_list(x.attributes()))