def collect_pic50():
  gpcrdb=get_gpcrdb(assay_type='pic50',dataframe=True)
  chembl=get_chembl_by_assay_type(assay_type='pic50',dataframe=True)
  chemblcyp450=get_chembl_cyp450_by_assay_type(assay_type='pic50',dataframe=True)
  bindingdb=get_bindingdb_by_assay_type(assay_type='pic50',dataframe=True)
  pic50_df=pd.concat([gpcrdb,chembl,chemblcyp450,bindingdb])
  return pic50_df.reset_index(drop=True)
def collect_pkd():
  kinomescan=get_kinomescan(assay_type='pKd', dataframe=True)
  chemblcyp450=get_chembl_cyp450_by_assay_type(assay_type='pKd',dataframe=True)
  chembl=get_chembl_by_assay_type(assay_type='pKd',dataframe=True)
  bindingdb=get_bindingdb_by_assay_type(assay_type='pKd',dataframe=True)
  gpcrdb=get_gpcrdb(assay_type='pKd',dataframe=True)
  pkd_df=pd.concat([kinomescan,chemblcyp450,chembl,bindingdb,gpcrdb])
  return pkd_df.reset_index(drop=True)
def collect_pki():
  sci=get_science_pki(dataframe=True)
  plos=get_plos_pki(dataframe=True)
  pkis=get_pkis_pki(dataframe=True)
  jcim=get_jcim_pki(dataframe=True)
  gpcrdb=get_gpcrdb(assay_type='pKi',dataframe=True)
  chembl=get_chembl_by_assay_type(assay_type='pKi',dataframe=True)
  chemblcyp450=get_chembl_cyp450_by_assay_type(assay_type='pki',dataframe=True)
  bindingdb=get_bindingdb_by_assay_type(assay_type='pKi',dataframe=True)
  pki_df=pd.concat([sci,plos,pkis,jcim,gpcrdb,chembl,chemblcyp450,bindingdb])
  return pki_df.reset_index(drop=True)
def collect_kinases():
  kinomescan_pkd=get_kinomescan(assay_type='pKd', dataframe=True)
  sci=get_science_pki(dataframe=True)
  plos=get_plos_pki(dataframe=True)
  pkis=get_pkis_pki(dataframe=True)
  jcim=get_jcim_pki(dataframe=True)
  chembl1=get_chembl_by_assay_type(assay_type='pKd',dataframe=True)
  bindingdb1=get_bindingdb_by_assay_type(assay_type='pKd',dataframe=True)
  chembl2=get_chembl_by_assay_type(assay_type='pKi',dataframe=True)
  bindingdb2=get_bindingdb_by_assay_type(assay_type='pKi',dataframe=True)
  chembl3=get_chembl_by_assay_type(assay_type='pic50',dataframe=True)
  bindingdb3=get_bindingdb_by_assay_type(assay_type='pic50',dataframe=True)
  df=pd.concat([jcim,chembl1,chembl2,chembl3,bindingdb1,bindingdb2,bindingdb3])
  uniprots=[]
  with open('../proteins/kinase_enzyme_classes.tsv','r') as f:
    for line in f:
      line=line.strip().split('\t')
      uniprot=line[0]
      uniprots.append(uniprot)
  df=df[df['UniProt'].isin(uniprots)]
  df=pd.concat([df,kinomescan_pkd,sci,plos,pkis])
  return df.reset_index(drop=True)
def collect_by_uniprot_file(uniprot_file):
  uniprots=[]
  #with open('../proteins/disease_related_genes_id_mapping.tsv','r') as f:
  with open(uniprot_file,'r') as f:
    for line in f:
      line=line.strip().split('\t')
      uniprot=line[0]
      uniprots.append(uniprot)
  gpcrdb1=get_gpcrdb(assay_type='pKd',dataframe=True)
  gpcrdb2=get_gpcrdb(assay_type='pic50',dataframe=True)
  gpcrdb3=get_gpcrdb(assay_type='pKi',dataframe=True)
  chembl1=get_chembl_by_assay_type(assay_type='pic50',dataframe=True)
  chembl2=get_chembl_by_assay_type(assay_type='pki',dataframe=True)
  chembl3=get_chembl_by_assay_type(assay_type='pkd',dataframe=True)
  bindingdb1=get_bindingdb_by_assay_type(assay_type='pic50',dataframe=True)
  bindingdb2=get_bindingdb_by_assay_type(assay_type='pki',dataframe=True)
  bindingdb3=get_bindingdb_by_assay_type(assay_type='pkd',dataframe=True)
  drugbank=get_drugbank(dataframe=True)
  df=pd.concat([chembl1,chembl2,chembl3,bindingdb1,bindingdb2,bindingdb3,gpcrdb1,gpcrdb2,gpcrdb3])
  df=df[df['UniProt'].isin(uniprots)]
  df=df.reset_index(drop=True)
  df_bin=drugbank[drugbank['UniProt'].isin(uniprots)].reset_index(drop=True)
  return df,df_bin
def collect_cyp450():
  cyp450_uniprots=[]
  with open("../proteins/cytochrome_p450_id_mapping.tsv",'r') as f:
    for line in f:
      line=line.strip().split('\t')
      uniprot=line[0]
      cyp450_uniprots.append(uniprot)
  chembl1=get_chembl_by_assay_type(assay_type='pic50',dataframe=True)
  chembl2=get_chembl_by_assay_type(assay_type='pki',dataframe=True)
  chembl3=get_chembl_by_assay_type(assay_type='pkd',dataframe=True)
  chemblcyp450=get_chembl_cyp450_by_assay_type(assay_type='pic50',dataframe=True)
  bindingdb1=get_bindingdb_by_assay_type(assay_type='pic50',dataframe=True)
  bindingdb2=get_bindingdb_by_assay_type(assay_type='pki',dataframe=True)
  bindingdb3=get_bindingdb_by_assay_type(assay_type='pkd',dataframe=True)
  drugbank=get_drugbank(dataframe=True)
  pubchemcyp450=get_pubchem_cyp450(dataframe=True)
  cyp_df=pd.concat([chembl1,chembl2,chembl3,chemblcyp450,bindingdb1,bindingdb2,bindingdb3])
  cyp_df_bin=pd.concat([pubchemcyp450,drugbank])
  cyp_df_cont=cyp_df[cyp_df['UniProt'].isin(cyp450_uniprots)]
  cyp_df_cont=cyp_df_cont.reset_index(drop=True)
  cyp_df_bin=cyp_df_bin[cyp_df_bin['UniProt'].isin(cyp450_uniprots)]
  cyp_df_bin=cyp_df_bin.drop_duplicates(subset=['InChIKey','UniProt','Activity'],keep='first')
  cyp_df_bin=cyp_df_bin.reset_index(drop=True)
  return cyp_df_cont,cyp_df_bin