def collect_pic50(): gpcrdb=get_gpcrdb(assay_type='pic50',dataframe=True) chembl=get_chembl_by_assay_type(assay_type='pic50',dataframe=True) chemblcyp450=get_chembl_cyp450_by_assay_type(assay_type='pic50',dataframe=True) bindingdb=get_bindingdb_by_assay_type(assay_type='pic50',dataframe=True) pic50_df=pd.concat([gpcrdb,chembl,chemblcyp450,bindingdb]) return pic50_df.reset_index(drop=True)
def collect_pkd(): kinomescan=get_kinomescan(assay_type='pKd', dataframe=True) chemblcyp450=get_chembl_cyp450_by_assay_type(assay_type='pKd',dataframe=True) chembl=get_chembl_by_assay_type(assay_type='pKd',dataframe=True) bindingdb=get_bindingdb_by_assay_type(assay_type='pKd',dataframe=True) gpcrdb=get_gpcrdb(assay_type='pKd',dataframe=True) pkd_df=pd.concat([kinomescan,chemblcyp450,chembl,bindingdb,gpcrdb]) return pkd_df.reset_index(drop=True)
def collect_pki(): sci=get_science_pki(dataframe=True) plos=get_plos_pki(dataframe=True) pkis=get_pkis_pki(dataframe=True) jcim=get_jcim_pki(dataframe=True) gpcrdb=get_gpcrdb(assay_type='pKi',dataframe=True) chembl=get_chembl_by_assay_type(assay_type='pKi',dataframe=True) chemblcyp450=get_chembl_cyp450_by_assay_type(assay_type='pki',dataframe=True) bindingdb=get_bindingdb_by_assay_type(assay_type='pKi',dataframe=True) pki_df=pd.concat([sci,plos,pkis,jcim,gpcrdb,chembl,chemblcyp450,bindingdb]) return pki_df.reset_index(drop=True)
def collect_kinases(): kinomescan_pkd=get_kinomescan(assay_type='pKd', dataframe=True) sci=get_science_pki(dataframe=True) plos=get_plos_pki(dataframe=True) pkis=get_pkis_pki(dataframe=True) jcim=get_jcim_pki(dataframe=True) chembl1=get_chembl_by_assay_type(assay_type='pKd',dataframe=True) bindingdb1=get_bindingdb_by_assay_type(assay_type='pKd',dataframe=True) chembl2=get_chembl_by_assay_type(assay_type='pKi',dataframe=True) bindingdb2=get_bindingdb_by_assay_type(assay_type='pKi',dataframe=True) chembl3=get_chembl_by_assay_type(assay_type='pic50',dataframe=True) bindingdb3=get_bindingdb_by_assay_type(assay_type='pic50',dataframe=True) df=pd.concat([jcim,chembl1,chembl2,chembl3,bindingdb1,bindingdb2,bindingdb3]) uniprots=[] with open('../proteins/kinase_enzyme_classes.tsv','r') as f: for line in f: line=line.strip().split('\t') uniprot=line[0] uniprots.append(uniprot) df=df[df['UniProt'].isin(uniprots)] df=pd.concat([df,kinomescan_pkd,sci,plos,pkis]) return df.reset_index(drop=True)
def collect_by_uniprot_file(uniprot_file): uniprots=[] #with open('../proteins/disease_related_genes_id_mapping.tsv','r') as f: with open(uniprot_file,'r') as f: for line in f: line=line.strip().split('\t') uniprot=line[0] uniprots.append(uniprot) gpcrdb1=get_gpcrdb(assay_type='pKd',dataframe=True) gpcrdb2=get_gpcrdb(assay_type='pic50',dataframe=True) gpcrdb3=get_gpcrdb(assay_type='pKi',dataframe=True) chembl1=get_chembl_by_assay_type(assay_type='pic50',dataframe=True) chembl2=get_chembl_by_assay_type(assay_type='pki',dataframe=True) chembl3=get_chembl_by_assay_type(assay_type='pkd',dataframe=True) bindingdb1=get_bindingdb_by_assay_type(assay_type='pic50',dataframe=True) bindingdb2=get_bindingdb_by_assay_type(assay_type='pki',dataframe=True) bindingdb3=get_bindingdb_by_assay_type(assay_type='pkd',dataframe=True) drugbank=get_drugbank(dataframe=True) df=pd.concat([chembl1,chembl2,chembl3,bindingdb1,bindingdb2,bindingdb3,gpcrdb1,gpcrdb2,gpcrdb3]) df=df[df['UniProt'].isin(uniprots)] df=df.reset_index(drop=True) df_bin=drugbank[drugbank['UniProt'].isin(uniprots)].reset_index(drop=True) return df,df_bin
def collect_cyp450(): cyp450_uniprots=[] with open("../proteins/cytochrome_p450_id_mapping.tsv",'r') as f: for line in f: line=line.strip().split('\t') uniprot=line[0] cyp450_uniprots.append(uniprot) chembl1=get_chembl_by_assay_type(assay_type='pic50',dataframe=True) chembl2=get_chembl_by_assay_type(assay_type='pki',dataframe=True) chembl3=get_chembl_by_assay_type(assay_type='pkd',dataframe=True) chemblcyp450=get_chembl_cyp450_by_assay_type(assay_type='pic50',dataframe=True) bindingdb1=get_bindingdb_by_assay_type(assay_type='pic50',dataframe=True) bindingdb2=get_bindingdb_by_assay_type(assay_type='pki',dataframe=True) bindingdb3=get_bindingdb_by_assay_type(assay_type='pkd',dataframe=True) drugbank=get_drugbank(dataframe=True) pubchemcyp450=get_pubchem_cyp450(dataframe=True) cyp_df=pd.concat([chembl1,chembl2,chembl3,chemblcyp450,bindingdb1,bindingdb2,bindingdb3]) cyp_df_bin=pd.concat([pubchemcyp450,drugbank]) cyp_df_cont=cyp_df[cyp_df['UniProt'].isin(cyp450_uniprots)] cyp_df_cont=cyp_df_cont.reset_index(drop=True) cyp_df_bin=cyp_df_bin[cyp_df_bin['UniProt'].isin(cyp450_uniprots)] cyp_df_bin=cyp_df_bin.drop_duplicates(subset=['InChIKey','UniProt','Activity'],keep='first') cyp_df_bin=cyp_df_bin.reset_index(drop=True) return cyp_df_cont,cyp_df_bin