Example #1
0
def addMolweight(path, vsChembl, user, pword, host, port):
    import queryDevice
    import os
    import time    
                                                      
    infile = open(path, 'r')
    lines = infile.readlines()
    infile.close()

    out = open('_'.join([path,"sed"]) ,'w')
    out.write('%s\tmolweight\tlogP\n'%lines[0].rstrip('\n')) 

    els = lines[0].split('\t')
    for i, el in enumerate(els):
        if el == 'molregno':
            idx = i
            break    
    for line in lines[1:]:
        time.sleep(0.03)
        elements = line.split('\t')
        molregno = elements[idx]
        try:
            mw = queryDevice.queryDevice("SELECT mw_freebase FROM compound_properties WHERE molregno = %s "% molregno, vsChembl, user, pword, host, port)[0][0]
        except IndexError:
            mw = None
        try:
            logP = queryDevice.queryDevice("SELECT alogp FROM compound_properties WHERE molregno = %s" % molregno, vsChembl, user, pword, host, port)[0][0]
        except IndexError:
            mw = None
        out.write("%s\t%s\t%s\n"%(line.rstrip('\n'), mw, logP  ))                                                                      
    out.close()
    os.system('mv  %s %s'% ('_'.join([path,"sed"]), path))  
Example #2
0
def actSizeMap(chemblTargets, release, user, pword, host, port):
  import queryDevice

  multi = queryDevice.queryDevice("SELECT DISTINCT activity_id FROM map_pfam WHERE mapType = 'multi'", release, user, pword, host, port)

  single = queryDevice.queryDevice("SELECT DISTINCT activity_id FROM map_pfam WHERE mapType = 'single'", release, user, pword, host, port)

  conflict = queryDevice.queryDevice("SELECT DISTINCT activity_id FROM map_pfam WHERE mapType = 'conflict'", release, user, pword, host, port)


  return  single,multi, conflict
Example #3
0
def groupSizeMap(chemblTargets, release, user, pword, host, port): 
  import queryDevice

  multi = queryDevice.queryDevice("SELECT DISTINCT protein_accession FROM map_pfam WHERE mapType = 'multi'", release, user, pword, host, port)
  
  single = queryDevice.queryDevice("SELECT DISTINCT protein_accession FROM map_pfam WHERE mapType = 'single'", release, user, pword, host, port)
  
  conflict = queryDevice.queryDevice("SELECT DISTINCT protein_accession FROM map_pfam WHERE mapType = 'conflict'", release, user, pword, host, port)

 
  return single,multi, conflict
Example #4
0
def getUniprotTargets(release, user, pword, host, port):
    release_number = int(release.split('_')[1])
    if release_number >= 15:
        rawtargets = queryDevice.queryDevice("""SELECT cs.accession
        FROM component_sequences cs 
            JOIN target_components tc 
            ON tc.component_id = cs.component_id  
        WHERE db_source IN('SWISS-PROT', 'TREMBL')""", release, user, pword, host, port)
    else:
        rawtargets = queryDevice.queryDevice("""SELECT protein_accession
        FROM target_dictionary 
        WHERE db_source IN('SWISS-PROT', 'TREMBL')""", release, user, pword, host, port)
    targets= []
    for target in rawtargets:
        targets.append(target[0])
    return targets 
Example #5
0
def researchCode(pfamDict, release, user, pword, host, port):
  import time
  import queryDevice
 
  infile = open('data/map_pfam.txt','r')
  lines = infile.readlines()
  
  out = open('data/resCodeExamples.tab', 'w')
  out.write('research_code\tpfam\tuniprot\tnPfam\n')

  for line in lines[1:]:
    elements = line.split('\t')
    molregno = elements[2]
    pfam = elements[1]
    uniprot = elements[3]
    nPfam = len(pfamDict[uniprot]['domains'])
    time.sleep(0.03)
    resCode = queryDevice.queryDevice("SELECT synonyms FROM molecule_synonyms WHERE syn_type = 'RESEARCH_CODE' AND molregno = %s" %molregno, release, user, pword, host, port)
    try:
      resCode = resCode[0][0]
      out.write('%s\t%s\t%s\t%s\n'%(resCode, pfam, uniprot, nPfam))
    except IndexError:
      pass      
  out.close()

  return
Example #6
0
def countLigs(humanTargets, chemblTargets, release, user, pword, host, port):
  import queryDevice
  ligandPerDom = {}
  ligandPerTarget =  {} 
  for humGene in humanTargets:
    if not humGene in chemblTargets:
      #print 'could not find', humGene
      continue
    qRes = queryDevice.queryDevice("SELECT domain, molregno FROM map_pfam WHERE protein_accession = '%s'"%humGene , release, user, pword, host, port)
    for element in qRes:
      domain = element[0]
      lig = element[1]
      try:
        ligandPerDom[domain][lig]=  0 
      except KeyError:
        ligandPerDom[domain] = {}
        ligandPerDom[domain][lig]=  0
      try:
        ligandPerTarget[humGene][lig] = 0 
      except KeyError:
        ligandPerTarget[humGene] = {}
        ligandPerTarget[humGene][lig] = 0

  domOut = open('data/domLigs.tab','w')
  domOut.write('domain\tfreq\n')
  targetOut = open('data/targLigs.tab','w')
  targetOut.write('domain\tfreq\n')
  for domain in ligandPerDom:
    numLigs = len(ligandPerDom[domain].keys())
    domOut.write('%s\t%s\n'%(domain,numLigs))
  domOut.close()
  for target in ligandPerTarget:
    numLigs = len(ligandPerTarget[target].keys())
    targetOut.write('%s\t%s\n'%(target,numLigs))
  targetOut.close()
Example #7
0
def getRatio(pfamDict,humanTargets, release, user, pword, host, port):
 
  import numpy as np
  import queryDevice
  
  for target in pfamDict.keys():
    pfamDict[target]['ratio']='NA'
    try:
      seq = humanTargets[target]
      seq_len  = len(seq)-1
    except KeyError:
      seq= queryDevice.queryDevice("SELECT protein_sequence FROM target_dictionary WHERE protein_accession = '%s'"%target, release, user, pword, host, port)
      try:
        seq_len = len(seq[0][0])-1
      except IndexError:
        continue

    dom_len = 0
    for i in range(len(pfamDict[target]['domains'])):
      start  = pfamDict[target]['start'][i]         
      end  = pfamDict[target]['end'][i]
      ind_dom = end - start
      dom_len += ind_dom
                                      
    ratio = np.true_divide(dom_len,seq_len)
    pfamDict[target]['ratio'] = ratio
           
  return pfamDict
def get_el_targets(params):
    """Query the ChEMBL database for (almost) all activities that are subject to the mapping. Does not conver activities expressed in log-conversion eg pIC50 etc. This function works with chembl_15 upwards. Outputs a list of tuples [(tid, target_type, domain_count, assay_count, act_count),...]
    """
    data = queryDevice.queryDevice("""
            SELECT DISTINCT dc.tid, dc.target_type, dc.dc, COUNT(DISTINCT act.assay_id), COUNT(DISTINCT activity_id)
            FROM assays ass
            JOIN(
                      SELECT td.tid, td.target_type, COUNT(cd.domain_id) as dc
                      FROM target_dictionary td
                      JOIN target_components tc
                        ON tc.tid = td.tid
		      JOIN component_sequences cs
			ON cs.component_id = tc.component_id
                      JOIN component_domains cd
 			ON cd.component_id = cs.component_id
                      WHERE td.target_type IN('SINGLE PROTEIN', 'PROTEIN COMPLEX')
                      GROUP BY td.tid
                     ) as dc
              ON dc.tid = ass.tid
            JOIN activities act
              ON act.assay_id = ass.assay_id
            WHERE act.standard_type IN('Ki','Kd','IC50','EC50', 'AC50')
            AND ass.relationship_type = 'D'
            AND assay_type IN('B')
            AND act.standard_relation IN('=')
            AND standard_units = 'nM'
            AND standard_value <= %s
            GROUP BY dc.tid ORDER BY COUNT(activity_id)""" % (int(params['threshold']) * 1000) , params)
    print "retrieved data for ", len(data), "tids."
    return data
Example #9
0
def interAssay():
    import queryDevice
    import mkDict
    import queries
    import writePairs
    import os
    import addProperties
    import readIds
    import string
    import yaml
    # Read config file.
    paramFile = open('gla.yaml')
    params = yaml.safe_load(paramFile)
    species = params['species']
    # Get information for all relevant activities from ChEMBL.
    for spec in species:
        specName =  string.replace(spec, ' ','_')
        dictFile =  "data/inter_compDict_%s_%s.pkl" % (specName, params['release'])
        results = "data/interAssay_%s_%s.tab" % (specName, params['release'])
        query = queries.activities(spec)
        acts = queryDevice.queryDevice(query, params) 
        mkDict.activities(acts, dictFile)
        writePairs.interAssaySampled(results, dictFile)
        addProperties.addMolweight("molregno", results, params)
        addProperties.addTargetClass("L1","accession", results, params)
        addProperties.addSeq100(results)
def getRatio(pfamDict, humanTargets, release, user, pword, host, port):

    import numpy as np
    import queryDevice

    for target in pfamDict.keys():
        pfamDict[target]['ratio'] = 'NA'
        try:
            seq = humanTargets[target]
            seq_len = len(seq) - 1
        except KeyError:
            seq = queryDevice.queryDevice(
                "SELECT protein_sequence FROM target_dictionary WHERE protein_accession = '%s'"
                % target, release, user, pword, host, port)
            try:
                seq_len = len(seq[0][0]) - 1
            except IndexError:
                continue

        dom_len = 0
        for i in range(len(pfamDict[target]['domains'])):
            start = pfamDict[target]['start'][i]
            end = pfamDict[target]['end'][i]
            ind_dom = end - start
            dom_len += ind_dom

        ratio = np.true_divide(dom_len, seq_len)
        pfamDict[target]['ratio'] = ratio

    return pfamDict
Example #11
0
def get_el_targets(RELEASE, USER, PWORD, HOST, PORT):
    data = queryDevice.queryDevice("""
            SELECT DISTINCT td.protein_accession, dc.dc, COUNT(DISTINCT activity_id) 
            FROM target_dictionary td
            JOIN(
                      SELECT pd.protein_accession, COUNT(protein_accession) as dc 
                      FROM pfam_domains pd 
                      GROUP BY pd.protein_accession
                     ) as dc
              ON dc.protein_accession = td.protein_accession
            JOIN assay2target a2t 
              ON td.tid = a2t.tid 
            JOIN assays a 
              ON a.assay_id = a2t.assay_id 
            JOIN activities act 
              ON act.assay_id = a.assay_id
            WHERE act.standard_type IN('Ki','Kd','IC50','EC50', 'AC50') 
            AND a2t.multi=0 
            AND a2t.complex =0 
            AND a2t.relationship_type = 'D' 
            AND act.relation = '=' 
            AND assay_type = 'B' 
            AND standard_value < 50000
            AND dc.dc > 1
            GROUP BY td.protein_accession ORDER BY COUNT(DISTINCT activity_id)""", RELEASE, USER, PWORD, HOST, PORT)
    return data
Example #12
0
def addTargetClass(level, key, path, params):
    import queryDevice
    import os
    infile = open(path, 'r')
    lines = infile.readlines()
    infile.close()
    out = open('_'.join([path,"sed"]) ,'w')
    out.write('%s\ttarget_class_%s\n'%(lines[0].rstrip('\n'), level))
    header = lines[0].split('\t')
    accessions = {}
    for i, col in enumerate(header):
        if col == key:
            idx = i
            break
    for line in lines[1:]:
        elements = line.split('\t')
        acc = elements[idx]
        accessions[acc] = 0
    accStr = "','".join(map(str, accessions.keys()))
    data = queryDevice.queryDevice("SELECT protein_accession, %s FROM target_class tc JOIN target_dictionary td ON td.tid = tc.tid  WHERE protein_accession IN('%s') "%(level, accStr), params)
    for tup in data:
        acc = tup[0]
        targetClass = tup[1]
        accessions[acc] = targetClass
    for line in lines[1:]:
        elements = line.split('\t')
        acc = elements[idx]
        targetClass = accessions[acc]
        out.write("%s\t%s\n"%(line.rstrip('\n'), targetClass ))
    out.close()
    os.system('mv %s %s'% ('_'.join([path,"sed"]), path))
Example #13
0
def getIntacts(uniDict , release, user, pword, host, port):

  import pickle
  import queryDevice

  chemblIds = uniDict.keys()
  idString = "\',\'".join(chemblIds)
  
  intactPairs = queryDevice.queryDevice("SELECT md.chembl_id, td.protein_accession \
  FROM activities act \
  JOIN assay2target a2t ON act.assay_id = a2t.assay_id \
  JOIN target_dictionary td ON a2t.tid = td.tid \
  JOIN molecule_dictionary md ON act.molregno = md.molregno \
  JOIN compound_properties cp ON md.molregno = cp.molregno \
  WHERE td.protein_accession IS NOT NULL AND cp.mw_freebase <= 1000 AND md.chembl_id IN('%s')" %idString, release, user, pword, host, port)
  
  intactDict = {}
  molDict = {}

  for pair in intactPairs:
    chembl_id = pair[0]
    target = pair[1]
    try:
      intactDict[target][chembl_id] = {}
    except KeyError:
      intactDict[target] = {}
      intactDict[target][chembl_id] = {}
          
  return intactDict
Example #14
0
def mkIntactDictAllPDBs(release, user, pword, host, port):

    import pickle
    import queryDevice

    intactPairs = queryDevice.queryDevice(
        "SELECT cr.molregno, compound_key,protein_accession \
  FROM compound_records cr \
  JOIN activities act ON cr.molregno = act.molregno \
  JOIN assay2target a2t ON act.assay_id = a2t.assay_id \
  JOIN target_dictionary td ON a2t.tid = td.tid \
  WHERE src_id = 6 AND protein_accession IS NOT NULL",
        release,
        user,
        pword,
        host,
        port,
    )

    intactDict = {}
    molDict = {}

    for pair in intactPairs:
        molregno = pair[0]
        cmpdId = pair[1]
        target = pair[2]

        molDict[molregno] = cmpdId
        try:
            intactDict[target][molregno] = {}
        except KeyError:
            intactDict[target] = {}
            intactDict[target][molregno] = {}

    return (intactDict, molDict)
Example #15
0
def groupSizeMap(chemblTargets, release, user, pword, host, port):
    import queryDevice

    multi = queryDevice.queryDevice(
        "SELECT DISTINCT protein_accession FROM map_pfam WHERE mapType = 'multi'",
        release, user, pword, host, port)

    single = queryDevice.queryDevice(
        "SELECT DISTINCT protein_accession FROM map_pfam WHERE mapType = 'single'",
        release, user, pword, host, port)

    conflict = queryDevice.queryDevice(
        "SELECT DISTINCT protein_accession FROM map_pfam WHERE mapType = 'conflict'",
        release, user, pword, host, port)

    return single, multi, conflict
Example #16
0
def researchCode(pfamDict, release, user, pword, host, port):
    import time
    import queryDevice

    infile = open('data/map_pfam.txt', 'r')
    lines = infile.readlines()

    out = open('data/resCodeExamples.tab', 'w')
    out.write('research_code\tpfam\tuniprot\tnPfam\n')

    for line in lines[1:]:
        elements = line.split('\t')
        molregno = elements[2]
        pfam = elements[1]
        uniprot = elements[3]
        nPfam = len(pfamDict[uniprot]['domains'])
        time.sleep(0.03)
        resCode = queryDevice.queryDevice(
            "SELECT synonyms FROM molecule_synonyms WHERE syn_type = 'RESEARCH_CODE' AND molregno = %s"
            % molregno, release, user, pword, host, port)
        try:
            resCode = resCode[0][0]
            out.write('%s\t%s\t%s\t%s\n' % (resCode, pfam, uniprot, nPfam))
        except IndexError:
            pass
    out.close()

    return
Example #17
0
def getLigandsForTarget(target, release, user, pword, host, port):

  import queryDevice
  ligands = queryDevice.queryDevice("SELECT DISTINCT act.molregno, standard_value,\
         standard_type, standard_units, canonical_smiles, act.relation, act.activity_id \
                                                                                \
                                        FROM activities act \
                                        JOIN assay2target a2t \
                                            ON act.assay_id = a2t.assay_id\
                                        JOIN target_dictionary td \
                                            ON a2t.tid = td.tid \
                                        JOIN assays ass \
                                            ON ass.assay_id = act.assay_id \
                                        JOIN compound_structures cs \
                                            ON cs.molregno=act.molregno \
                                                                        \
                                 WHERE td.protein_accession = '%s' \
                                 AND ass.assay_type='B'  \
                                 AND act.relation ='='    \
                                 AND a2t.multi=0  \
                                 AND a2t.complex=0 \
                                 AND a2t.relationship_type = 'D'\
                                 AND act.standard_type IN('Ki','Kd','IC50', \
                                'EC50','-Log Ki','pKd' , 'pA2', 'pI', 'pKa')" \
                                        %target, release, user, pword, host, port)
  return ligands
Example #18
0
def addTargetClass(level, key, path, vsChembl, user, pword, host, port):
    import queryDevice
    import os
    import time

    infile = open(path, 'r')
    lines = infile.readlines()
    infile.close()

    out = open('_'.join([path,"sed"]) ,'w')
    out.write('%s\ttargetClass_%s\n'%(lines[0].rstrip('\n'),level) )
    header = lines[0].split('\t')
    for i, col in enumerate(header):
        if col == key:
            idx = i
            break
    for line in lines[1:]:
        time.sleep(0.03)     
        elements = line.split('\t')
        uniprot = elements[idx]
        targetClass = queryDevice.queryDevice("SELECT %s FROM target_class tc JOIN target_dictionary td ON td.tid = tc.tid  WHERE protein_accession = '%s' "%(level, uniprot),vsChembl, user, pword, host, port)
        try:
            targetClass = targetClass[0][0]
        except IndexError:
            targetClass = None
        out.write("%s\t%s\n"%(line.rstrip('\n'), targetClass ))
    out.close()
    os.system('mv %s %s'% ('_'.join([path,"sed"]), path))
Example #19
0
def get_doms(tids, params):
    """Get domains for a list of tids.
    Inputs:
    el_targets -- list of eligible targets
    """
    pfam_lkp = {}
    tidstr = "', '".join(str(t) for t in tids)
    data = queryDevice.queryDevice(
        """
            SELECT tid, domain_name
            FROM target_components tc
	    JOIN component_domains cd
	      ON cd.component_id = tc.component_id
            JOIN domains d
	      ON d.domain_id = cd.domain_id
            WHERE tc.tid IN('%s') and domain_type = 'Pfam-A'""" % tidstr,
        params)
    for ent in data:
        tid = ent[0]
        dom = ent[1]
        try:
            pfam_lkp[tid].append(dom)
        except KeyError:
            pfam_lkp[tid] = [dom]
    return pfam_lkp
Example #20
0
def get_el_targets(params):
    """Query the ChEMBL database for (almost) all activities that are subject to the mapping. Does not conver activities expressed in log-conversion eg pIC50 etc. This function works with chembl_15 upwards. Outputs a list of tuples [(tid, target_type, domain_count, assay_count, act_count),...]
    """
    data = queryDevice.queryDevice(
        """
            SELECT DISTINCT dc.tid, dc.target_type, dc.dc, COUNT(DISTINCT act.assay_id), COUNT(DISTINCT activity_id)
            FROM assays ass
            JOIN(
                      SELECT td.tid, td.target_type, COUNT(cd.domain_id) as dc
                      FROM target_dictionary td
                      JOIN target_components tc
                        ON tc.tid = td.tid
		      JOIN component_sequences cs
			ON cs.component_id = tc.component_id
                      JOIN component_domains cd
 			ON cd.component_id = cs.component_id
                      WHERE td.target_type IN('SINGLE PROTEIN', 'PROTEIN COMPLEX')
                      GROUP BY td.tid
                     ) as dc
              ON dc.tid = ass.tid
            JOIN activities act
              ON act.assay_id = ass.assay_id
            WHERE act.standard_type IN('Ki','Kd','IC50','EC50', 'AC50')
            AND ass.relationship_type = 'D'
            AND assay_type IN('B')
            AND act.standard_relation IN('=')
            AND standard_units = 'nM'
            AND standard_value <= %s
            GROUP BY dc.tid ORDER BY COUNT(activity_id)""" %
        (int(params['threshold']) * 1000), params)
    print "retrieved data for ", len(data), "tids."
    return data
Example #21
0
def actSizeMap(chemblTargets, release, user, pword, host, port):
    import queryDevice

    multi = queryDevice.queryDevice(
        "SELECT DISTINCT activity_id FROM map_pfam WHERE mapType = 'multi'",
        release, user, pword, host, port)

    single = queryDevice.queryDevice(
        "SELECT DISTINCT activity_id FROM map_pfam WHERE mapType = 'single'",
        release, user, pword, host, port)

    conflict = queryDevice.queryDevice(
        "SELECT DISTINCT activity_id FROM map_pfam WHERE mapType = 'conflict'",
        release, user, pword, host, port)

    return single, multi, conflict
Example #22
0
def addChembl_id(key, path, params):
    import queryDevice
    import os
    infile = open(path, 'r')
    lines = infile.readlines()
    infile.close()
    out = open('_'.join([path,"sed"]) ,'w')
    out.write('%s\tchembl_id\n'%lines[0].rstrip('\n') )
    header = lines[0].split('\t')
    molregnos = {}
    for i, col in enumerate(header):
        if col == key:
            idx = i
            break
    for line in lines[1:]:
        elements = line.split('\t')
        molregno = int(elements[idx])
        molregnos[molregno] = 0
    molstr = "','".join(map(str, molregnos.keys()))
    print "Looking up chembl_id for ", len(molregnos.keys()), "cmpds."
    data = queryDevice.queryDevice("SELECT distinct molregno, chembl_id FROM molecule_dictionary WHERE molregno IN('%s')"% molstr, params)
    for tup in data:
        molregno = int(tup[0])
        chembl_id = tup[1]
        molregnos[molregno] = chembl_id
    for line in lines[1:]:
        elements = line.split('\t')
        molregno = int(elements[idx])
        chembl_id = molregnos[molregno]
        out.write("%s\t%s\n"%(line.rstrip('\n'), chembl_id ))
    out.close()
    os.system('mv %s %s'% ('_'.join([path,"sed"]), path))
Example #23
0
def addMolweight(key, path, params):
    import queryDevice
    import os
    infile = open(path, 'r')
    lines = infile.readlines()
    infile.close()
    out = open('_'.join([path,"sed"]) ,'w')
    out.write('%s\tmolweight\n'%lines[0].rstrip('\n') )
    header = lines[0].split('\t')
    molregnos = {}
    for i, col in enumerate(header):
        if col == key:
            idx = i
            break
    for line in lines[1:]:
        elements = line.split('\t')
        molregno = int(elements[idx])
        molregnos[molregno] = 0 
    molstr = "','".join(map(str, molregnos.keys()))
    print "Looking up mw_freebase for ", len(molregnos.keys()), "cmpds."
    data = queryDevice.queryDevice("SELECT distinct molregno, mw_freebase FROM compound_properties WHERE molregno IN('%s')"% molstr, params)
    for tup in data:
        molregno = int(tup[0])
        molweight = tup[1]
        molregnos[molregno] = molweight
    for line in lines[1:]:
        elements = line.split('\t')
        molregno = int(elements[idx])
        molweight = molregnos[molregno]
        out.write("%s\t%s\n"%(line.rstrip('\n'), molweight ))
    out.close()
    os.system('mv %s %s'% ('_'.join([path,"sed"]), path))
Example #24
0
def addPrefName(key, path, params):
    import queryDevice
    import os
    infile = open(path, 'r')
    lines = infile.readlines()
    infile.close()
    out = open('_'.join([path,"sed"]) ,'w')
    out.write('%s\tprefName_%s\n'%(lines[0].rstrip('\n'), key))
    header = lines[0].split('\t')
    accessions = {}
    for i, col in enumerate(header):
        if col == key:
            idx = i
            break
    for line in lines[1:]:
        elements = line.split('\t')
        acc = elements[idx]
        accessions[acc] = 0
    accStr = "','".join(map(str, accessions.keys()))
    data = queryDevice.queryDevice("SELECT protein_accession, pref_name FROM target_dictionary WHERE protein_accession IN('%s') "% accStr, params)
    for tup in data:
        acc = tup[0]
        prefName = tup[1]
        accessions[acc] = prefName

    for line in lines[1:]:
        elements = line.split('\t')
        acc = elements[idx]
        prefName = accessions[acc]
        out.write("%s\t\"%s\"\n"%(line.rstrip('\n'), prefName))
    out.close()
    os.system('mv %s %s'% ('_'.join([path,"sed"]), path))
Example #25
0
def getLigandsForTarget(target, release, user, pword, host, port):

  import queryDevice
  ligands = queryDevice.queryDevice("SELECT DISTINCT act.molregno, standard_value,\
         standard_type, standard_units, canonical_smiles, act.standard_relation, act.activity_id \
                                                                                \
                                        FROM activities act \
                                        JOIN assays ass \
                                            ON ass.assay_id = act.assay_id \
                                        JOIN compound_records cr \
                                            ON act.molregno = cr.molregno \
                                        JOIN molecule_dictionary md \
                                            ON md.molregno = cr.molregno \
                                        JOIN molecule_hierarchy mh \
                                            ON cr.molregno = mh.molregno \
                                        JOIN target_dictionary td \
                                            ON ass.tid = td.tid \
                                        JOIN target_components tc \
                                            ON td.tid = tc.tid \
                                        JOIN component_sequences cos \
                                            ON tc.component_id = cos.component_id \
                                        JOIN compound_structures cs \
                                            ON cs.molregno=act.molregno \
                                                                        \
                                 WHERE cos.accession = '%s' \
                                 AND ass.assay_type='B'  \
                                 AND ass.relationship_type = 'D'\
                                 AND mh.active_molregno = mh.parent_molregno \
                                 AND md.first_approval is not NULL \
                                 AND act.standard_type IN('Ki','Kd','IC50', \
                                'EC50','-Log Ki','pKd' , 'pA2', 'pI', 'pKa')" \
                                        %target, 'ChEMBL_%s' %release, user, pword, host, port)
  return ligands  
Example #26
0
def toSam(conflicts, threshold, user, pword, host, release, port):

    import parse
    import getLigands
    import filterForTarget
    import queryDevice

    conf = {}
    for confStr in conflicts.keys():
        for target in conflicts[confStr]:
            ligands = getLigands.getLigandsForTarget(target, release, user,
                                                     pword, host, port)
            ligands = filterForTarget.filterForTarget(ligands, threshold)

            for ligand in ligands:
                molregno = ligand[2]
                actId = ligand[3]
                pubmed = queryDevice.queryDevice(
                    "SELECT pubmed_id FROM docs JOIN activities act ON act.doc_id = docs.doc_id WHERE activity_id = %s"
                    % actId, release, user, pword, host, port)[0][0]
                pubmed = pubmed
                try:
                    conf[confStr][molregno]['actId'].append(actId)
                    conf[confStr][molregno]['pubmed'].append(pubmed)
                    conf[confStr][molregno]['pubmed'] = []
                    conf[confStr][molregno]['pubmed'].append(pubmed)
                except KeyError:
                    try:
                        conf[confStr][molregno] = {}
                        conf[confStr][molregno]['actId'] = []
                        conf[confStr][molregno]['actId'].append(actId)
                        conf[confStr][molregno]['pubmed'] = []
                        conf[confStr][molregno]['pubmed'].append(pubmed)
                    except KeyError:
                        conf[confStr] = {}
                        conf[confStr][molregno] = {}
                        conf[confStr][molregno]['actId'] = []
                        conf[confStr][molregno]['actId'].append(actId)
                        conf[confStr][molregno]['pubmed'] = []
                        conf[confStr][molregno]['pubmed'].append(pubmed)

    confLkp = {}
    for confStr in conf.keys():
        confLkp[confStr] = 0

    for confStr in confLkp.keys():
        out = open('data/forSam_%s.pred' % confStr, 'w')
        out.write('molregno\tpubmed\tprediction\tactivity_id\n')
        for conflict in conf[confStr]:
            for molregno in conf[confStr].keys():
                pubmed = conf[confStr][molregno]['pubmed']
                domain = 'None'
                actId = conf[confStr][molregno]['actId']
                for act in actId:
                    out.write('%s\t%s\t%s\t%s\n' %
                              (molregno, pubmed[0], domain, act))
        out.close()
Example #27
0
def retrieve_acts(params):
    """Run a query for chembl_id, canonical_smiles, molformula.

    Inputs:
    params -- dictionary holding details of the connection string

    """
    acts = queryDevice.queryDevice("SELECT md.chembl_id, cs.canonical_smiles, cs.molformula from molecule_dictionary md JOIN compound_structures cs ON md.molregno = cs.molregno" ,params['release'], params['user'], params['pword'], params['host'], params['port'])
    return acts
Example #28
0
def getTargets(release, user, pword, host, port):
    #release_number = int(release.split('_')[1])
    release_number = int(release)
    if release_number >= 15:
        rawtargets = queryDevice.queryDevice("""SELECT DISTINCT accession 
        FROM component_sequences
        WHERE ORGANISM = 'H**o sapiens'""", 'ChEMBL_%s' %release, user, pword, host, port)
    targets= []
    for target in rawtargets:
        targets.append(target[0])
    return targets 
Example #29
0
def retrieve_acts(params):
    """Run a query for chembl_id, canonical_smiles, molformula.

    Inputs:
    params -- dictionary holding details of the connection string

    """
    acts = queryDevice.queryDevice(
        "SELECT md.chembl_id, cs.canonical_smiles, cs.molformula from molecule_dictionary md JOIN compound_structures cs ON md.molregno = cs.molregno",
        params['release'], params['user'], params['pword'], params['host'],
        params['port'])
    return acts
Example #30
0
def getUniprotTargets(release, user, pword, host, port):
    import queryDevice
    rawtargets = queryDevice.queryDevice("""SELECT cs.accession, cs.component_id, tid
        FROM component_sequences cs 
            JOIN target_components tc 
            ON tc.component_id = cs.component_id  
        WHERE db_source IN('SWISS-PROT', 'TREMBL')""", release, user, pword, host, port)
    targets= []
    tids = []
    for target in rawtargets:
        targets.append(target[0])
    return targets
Example #31
0
def toSam(conflicts, threshold, user, pword, host, release, port):
  
  import parse
  import getLigands
  import filterForTarget
  import queryDevice 

  conf = {}
  for confStr in conflicts.keys():
    for target in conflicts[confStr]:      
      ligands = getLigands.getLigandsForTarget(target, release, user, pword, host, port)
      ligands = filterForTarget.filterForTarget(ligands, threshold)

      for ligand in ligands:
        molregno = ligand[2] 
        actId = ligand[3]
        pubmed = queryDevice.queryDevice("SELECT pubmed_id FROM docs JOIN activities act ON act.doc_id = docs.doc_id WHERE activity_id = %s"%actId,release, user, pword, host, port)[0][0]
        pubmed = pubmed
        try:
          conf[confStr][molregno]['actId'].append(actId)
          conf[confStr][molregno]['pubmed'].append(pubmed)
          conf[confStr][molregno]['pubmed'] = []
          conf[confStr][molregno]['pubmed'].append(pubmed)
        except KeyError:
          try:
            conf[confStr][molregno] = {}
            conf[confStr][molregno]['actId'] = []
            conf[confStr][molregno]['actId'].append(actId)
            conf[confStr][molregno]['pubmed'] = []
            conf[confStr][molregno]['pubmed'].append(pubmed)
          except KeyError:
            conf[confStr]={}
            conf[confStr][molregno] = {}
            conf[confStr][molregno]['actId'] = []
            conf[confStr][molregno]['actId'].append(actId)
            conf[confStr][molregno]['pubmed'] = []
            conf[confStr][molregno]['pubmed'].append(pubmed)
 
  confLkp = {}
  for confStr in conf.keys():
    confLkp[confStr] = 0
    
  for confStr in confLkp.keys():
    out = open('data/forSam_%s.pred'%confStr, 'w')
    out.write('molregno\tpubmed\tprediction\tactivity_id\n')
    for conflict in conf[confStr]:
      for molregno in conf[confStr].keys():
        pubmed = conf[confStr][molregno]['pubmed']
        domain = 'None'
        actId = conf[confStr][molregno]['actId']
        for act in actId:
          out.write('%s\t%s\t%s\t%s\n'%(molregno, pubmed[0], domain, act))
    out.close()
Example #32
0
def getUniprotTargets(release, user, pword, host, port):
    import queryDevice
    rawtargets = queryDevice.queryDevice(
        """SELECT cs.accession, cs.component_id, tid
        FROM component_sequences cs 
            JOIN target_components tc 
            ON tc.component_id = cs.component_id  
        WHERE db_source IN('SWISS-PROT', 'TREMBL')""", release, user, pword,
        host, port)
    targets = []
    tids = []
    for target in rawtargets:
        targets.append(target[0])
    return targets
Example #33
0
def getLigandsForActivity(activity, release, user, pword, host, port):

    import queryDevice


    ligands = queryDevice.queryDevice("SELECT DISTINCT act.molregno, standard_value,\
	 standard_type, standard_units, canonical_smiles, relation, act.activity_id \
										\
                                        FROM activities act \
                                        JOIN compound_structures cs\
                                            ON act.molregno = cs.molregno \
     				 WHERE activity_id = %s"   \
                                          %activity, release, user, pword, host, port)
    return ligands
Example #34
0
def getLigandsForActivity(activity, release, user, pword, host, port): 


  import queryDevice

  
  ligands = queryDevice.queryDevice("SELECT DISTINCT act.molregno, standard_value,\
	 standard_type, standard_units, canonical_smiles, relation, act.activity_id \
										\
                                        FROM activities act \
                                        JOIN compound_structures cs\
                                            ON act.molregno = cs.molregno \
     				 WHERE activity_id = %s" \
                                        %activity, release, user, pword, host, port)
  return ligands
def getUniprotTargets(release, user, pword, host, port):

  import queryDevice
  
  rawtargets = queryDevice.queryDevice("SELECT protein_accession\
    FROM target_dictionary WHERE db_source IN('SWISS-PROT', 'TREMBL')", release, user, pword, host, port)

  targets= []
  tids = []
  for target in rawtargets:
    targets.append(target[0])


  

  return targets
def add_target_class(level, key, path):
    '''Add target class to each row in a table using a Uniprot accession for release < ChEMBL 15.
    Inputs:
    level: the target class annotation level
    key: the keyword in the header indicating a Uniprot Id
    path: path to the file.
    --------------------
    Felix Kruger
    [email protected]
    '''
    infile = open(path, 'r')
    lines = infile.readlines()
    infile.close()
    out = open('_'.join([path, "sed"]), 'w')
    out.write('%s\ttarget_class_%s\n' % (lines[0].rstrip('\n'), level))
    header = lines[0].split('\t')
    accessions = {}
    for i, col in enumerate(header):
        if col == key:
            idx = i
            break
    for line in lines[1:]:
        elements = line.split('\t')
        acc = elements[idx]
        accessions[acc] = None
    accStr = "','".join(map(str, accessions.keys()))
    print len(accessions.keys())
    data = queryDevice.queryDevice(
        """SELECT protein_accession, %s 
                FROM target_class tc 
                JOIN target_dictionary td 
                  ON td.tid = tc.tid  
                WHERE protein_accession IN('%s')""" % (level, accStr), release,
        user, pword, host, port)
    for tup in data:
        acc = tup[0]
        targetClass = tup[1]
        accessions[acc] = targetClass
    for line in lines[1:]:
        elements = line.split('\t')
        acc = elements[idx]
        targetClass = accessions[acc]
        out.write("%s\t%s\n" % (line.rstrip('\n'), targetClass))
    out.close()
    os.system('mv %s %s' % ('_'.join([path, "sed"]), path))
Example #37
0
def add_target_class(level, key, path):
    '''Add target class to each row in a table using a Uniprot accession for release < ChEMBL 15.
    Inputs:
    level: the target class annotation level
    key: the keyword in the header indicating a Uniprot Id
    path: path to the file.
    --------------------
    Felix Kruger
    [email protected]
    '''
    infile = open(path, 'r')
    lines = infile.readlines()
    infile.close()
    out = open('_'.join([path,"sed"]) ,'w')
    out.write('%s\ttarget_class_%s\n'%(lines[0].rstrip('\n'), level))
    header = lines[0].split('\t')
    accessions = {}
    for i, col in enumerate(header):
        if col == key:
            idx = i
            break
    for line in lines[1:]:
        elements = line.split('\t')
        acc = elements[idx]
        accessions[acc] = None
    accStr = "','".join(map(str, accessions.keys()))
    print len(accessions.keys())
    data = queryDevice.queryDevice("""SELECT protein_accession, %s 
                FROM target_class tc 
                JOIN target_dictionary td 
                  ON td.tid = tc.tid  
                WHERE protein_accession IN('%s')"""%(level, accStr), release, user, pword, host, port)
    for tup in data:
        acc = tup[0]
        targetClass = tup[1]
        accessions[acc] = targetClass
    for line in lines[1:]:
        elements = line.split('\t')
        acc = elements[idx]
        targetClass = accessions[acc]
        out.write("%s\t%s\n"%(line.rstrip('\n'), targetClass ))
    out.close()
    os.system('mv %s %s'% ('_'.join([path,"sed"]), path))
Example #38
0
def conflicts4Sam():
  import queryDevice
  conflicts = queryDevice.queryDevice("SELECT DISTINCT  mpf.molregno, mpf.domain, dcs.pubmed_id,  con.conflict, mpf.activity_id FROM map_pfam mpf JOIN activities act ON mpf.activity_id = act.activity_id JOIN docs dcs ON dcs.doc_id = act.doc_id JOIN conflicts con ON mpf.protein_accession = con.protein_accession WHERE mapType = 'conflict'", release) 
  confLkp = {}
  for conflict in conflicts:
    confStr = conflict[3]
    confLkp[confStr] = 0 
  for confStr in confLkp.keys():
    out = open('data/forSam_%s.tab'%confStr, 'w')
    out.write('molregno\tpubmed\tprediction\tactivity_id\n')
    for conflict in conflicts:
      if confStr == conflict[3]:
        molregno = conflict[0]
        pubmed = conflict[2]
        domain = conflict[1]
        actId = conflict[4]
        out.write('%s\t%s\t%s\t%s\n'%(molregno, pubmed, domain, actId))
    out.close()
  return
Example #39
0
def conflicts4Sam():
  import queryDevice
  conflicts = queryDevice.queryDevice("SELECT DISTINCT  mpf.molregno, mpf.domain, dcs.pubmed_id,  con.conflict, mpf.activity_id FROM map_pfam mpf JOIN activities act ON mpf.activity_id = act.activity_id JOIN docs dcs ON dcs.doc_id = act.doc_id JOIN conflicts con ON mpf.protein_accession = con.protein_accession WHERE mapType = 'conflict'", release) 
  confLkp = {}
  for conflict in conflicts:
    confStr = conflict[3]
    confLkp[confStr] = 0 
  for confStr in confLkp.keys():
    out = open('data/forSam_%s.tab'%confStr, 'w')
    out.write('molregno\tpubmed\tprediction\tactivity_id\n')
    for conflict in conflicts:
      if confStr == conflict[3]:
        molregno = conflict[0]
        pubmed = conflict[2]
        domain = conflict[1]
        actId = conflict[4]
        out.write('%s\t%s\t%s\t%s\n'%(molregno, pubmed, domain, actId))
    out.close()
  return
Example #40
0
def fullSeq(path, params):
    import pickle
    import queryDevice
    import needle
    import random
    inFile = open( path, 'r')
    lines = inFile.readlines()
    inFile.close()
    out = open(path ,'w')
    out.write("%s\tseq_id\tseq_sim\n"%lines[0].rstrip('\n'))
    seqIdDict = {}
    for line in lines[1:]:                
        elements = line.split('\t')
        proteinAcc_1 = elements[0]
        proteinAcc_2 = elements[1]
        pairName = ('_').join([proteinAcc_1, proteinAcc_2])
        try:            
            (seqSim, seqId) = seqIdDict[pairName]
            out.write("%s\t%s\t%s\n"%(line.rstrip('\n'), seqId, seqSim))
            continue
        except KeyError:
            #print "aligning sequences of: %s\t%s"%(proteinAcc_1, proteinAcc_2)
            pass
        data = queryDevice.queryDevice("SELECT td.protein_sequence, td.protein_accession FROM target_dictionary td WHERE td.protein_accession IN ('%s')"% "','".join([proteinAcc_1, proteinAcc_2]), params)
        lkp = {}
        for entry in data:
            lkp[entry[1]] = entry[0]
        try:
            seq_1 = lkp[proteinAcc_1]
            seq_2 = lkp[proteinAcc_2]     
        except KeyError:
            seqIdDict[pairName] = (None, None)
            out.write("%s\t%s\t%s\n"%(line.rstrip('\n'), None, None))
            continue
        ################################################
        # Align the sequences using needle from EMBOSS.
        needleReport = needle.needle(params['needlepath'], seq_1, seq_2)
        # Parse the output of the alignment
        (seqSim, seqId) = needle.parseNeedle(needleReport)
        seqIdDict[pairName] = (seqSim, seqId)
        out.write("%s\t%s\t%s\n"%(line.rstrip('\n'), seqId, seqSim))
    out.close()
def add_species(key, path):
    '''Add species annotation to each row in a table using a Uniprot accession for release < ChEMBL 15.
    Inputs:
    key: the keyword in the header indicating a Uniprot Id
    path: path to the file.
    --------------------
    Felix Kruger
    [email protected]
    '''
    infile = open(path, 'r')
    lines = infile.readlines()
    infile.close()
    out = open('_'.join([path, "sed"]), 'w')
    out.write('%s\tspecies_%s\n' % (lines[0].rstrip('\n'), key))
    header = lines[0].split('\t')
    accessions = {}
    for i, col in enumerate(header):
        if col == key:
            idx = i
            break
    for line in lines[1:]:
        elements = line.split('\t')
        acc = elements[idx]
        accessions[acc] = 0
    accStr = "','".join(map(str, accessions.keys()))
    data = queryDevice.queryDevice(
        """SELECT protein_accession, organism 
		FROM target_dictionary 
		WHERE protein_accession IN('%s') """ % accStr, release, user, pword, host,
        port)
    for tup in data:
        acc = tup[0]
        org = tup[1]
        accessions[acc] = org
    for line in lines[1:]:
        elements = line.split('\t')
        acc = elements[idx]
        org = accessions[acc]
        out.write("%s\t\"%s\"\n" % (line.rstrip('\n'), org))
    out.close()
    os.system('mv %s %s' % ('_'.join([path, "sed"]), path))
Example #42
0
def orthologs():
    import queryDevice
    import mkDict
    import queries
    import writePairs
    import os
    import align
    import addProperties  
    import mkHomologTable
    import readIds
    import yaml
    # Read config file.
    paramFile = open('gla.yaml')
    params = yaml.safe_load(paramFile)
    needlepath = params['needlepath']
    vsCompara = params['vsCompara'] 
    release = params['release']
    comparaOrthologs = "data/orthologs_%s.txt"% params['vsCompara']
    comparaHumanIds = "data/humanIds_%s.txt"% params['vsCompara']
    comparaRatIds = "data/ratIds_%s.txt"% params['vsCompara']
    # Assign output filenames.
    dictFile = "data/ortho_compDict_%s.pkl"% params['release']
    results = "data/orthologs_%s_%s.tab"%(params['release'], params['vsCompara'])
    orthoTab = "data/orthologTable_%s.txt" % params['vsCompara']
    # Create output files.
    humanLkp = readIds.readIds(comparaHumanIds)
    ratLkp = readIds.readIds(comparaRatIds)
    mkHomologTable.homologTable(comparaOrthologs, orthoTab, humanLkp, ratLkp) 
    query = queries.paralogs(orthoTab)
    acts= queryDevice.queryDevice(query, params)
    mkDict.activities(acts, dictFile)
    writePairs.homologMedian(params['homologyTypeOrthologs'], orthoTab, dictFile, results)
    # Annotate output files.
    align.pfam_a(results, params)
    align.bSite(results, params)
    addProperties.addMolweight('molregno', results, params)
    addProperties.addTargetClass("L1","accession1", results, params)
    addProperties.addPrefName("accession1", results, params)
    addProperties.addPrefName("accession2", results, params)
Example #43
0
def drugs(pfamDict, release, user, pword, host, port):
  import time
  infile = open('data/map_pfam.txt','r')
  lines = infile.readlines()
  
  out = open('data/drugExamples.tab', 'w')
  out.write('ingredient\tpfam\tuniprot\tnPfam\n')
  for line in lines[1:]:
    elements = line.split('\t')
    molregno = elements[2]
    pfam = elements[1]
    uniprot = elements[3]
    nPfam = len(pfamDict[uniprot]['domains'])
    time.sleep(0.03)
    ingredient = queryDevice.queryDevice("SELECT ingredient FROM formulations WHERE  molregno = %s" %molregno, release, user, pword, host, port)
    try:
      ingredient = ingredient[0][0]
      out.write('%s\t%s\t%s\t%s\n'%(ingredient, pfam, uniprot, nPfam))
    except IndexError:
      pass
      
  out.close()
Example #44
0
def exportProps(selected, propDict, threshold, release, user, pword, host, port): 

  import os  
  import queryDevice
  ### Write output to a table.
  out = open('data/cmpdPropssed.tab', 'w')
  out.write('domain\tmolregno\tmolweight\tlogP\tHBA\tHBD\tPSA\trtb\tacd_most_apka\tacd_most_bpka\n')
  for domain in selected:
    lkp = {}
    for mol in propDict[domain]:
      molregno = mol[0]
      lkp[molregno] = 0
    for molregno in lkp.keys():
      tup = queryDevice.queryDevice("SELECT DISTINCT cp.molregno, mw_freebase, alogp, HBA, HBD, PSA, RTB, ACD_MOST_APKA, ACD_MOST_BPKA FROM compound_properties cp JOIN molecule_dictionary md ON cp.molregno = md.molregno WHERE cp.molregno ='%s' AND md.molecule_type = 'Small molecule'"% molregno, release, user, pword, host, port)
      try:
        tup = tup[0]
        out.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n'%(domain, tup[0], tup[1], tup[2], tup[3], tup[4], tup[5], tup[6], tup[7],tup[8]))
      except IndexError:
        pass
  out.close()

  os.system("sed \"s/None/NA/g\" data/cmpdPropssed.tab > \
    data/cmpdProps_pKi%s_chembl%s.tab" %(int(threshold), release))
Example #45
0
def get_doms(el_targets):
    """Find multi-domain architectures.
    Inputs:
    el_targets -- list of eligible targets
    """
    pfam_lkp = {}
    tids = [x[0] for x in el_targets]
    tidstr = "', '".join(str(t) for t in tids)
    data = queryDevice.queryDevice("""
            SELECT tid, domain_name 
            FROM target_components tc
	    JOIN component_domains cd
	      ON cd.component_id = tc.component_id
            JOIN domains d
	      ON d.domain_id = cd.domain_id
            WHERE tc.tid IN('%s')""" %tidstr, RELEASE, USER, PWORD, HOST, PORT)  
    for ent in data:
        tid = ent[0]
        dom = ent[1]
        try:
            pfam_lkp[tid].append(dom)
        except KeyError:
            pfam_lkp[tid] = [dom]
    return pfam_lkp
Example #46
0
def drugs(pfamDict, release, user, pword, host, port):
    import time
    infile = open('data/map_pfam.txt', 'r')
    lines = infile.readlines()

    out = open('data/drugExamples.tab', 'w')
    out.write('ingredient\tpfam\tuniprot\tnPfam\n')
    for line in lines[1:]:
        elements = line.split('\t')
        molregno = elements[2]
        pfam = elements[1]
        uniprot = elements[3]
        nPfam = len(pfamDict[uniprot]['domains'])
        time.sleep(0.03)
        ingredient = queryDevice.queryDevice(
            "SELECT ingredient FROM formulations WHERE  molregno = %s" %
            molregno, release, user, pword, host, port)
        try:
            ingredient = ingredient[0][0]
            out.write('%s\t%s\t%s\t%s\n' % (ingredient, pfam, uniprot, nPfam))
        except IndexError:
            pass

    out.close()
Example #47
0
def analysis(release):

    ####
    #### Load parameters.
    ####

    import yaml
    # Read config file.
    paramFile = open('mpf.yaml')
    params = yaml.safe_load(paramFile)
    user = params['user']
    pword = params['pword']
    host = params['host']
    port = params['port']
    th = params['threshold']

    ####
    #### Load data.
    ####

    ## Set threshold for all calculations.
    import numpy as np
    threshold = -np.log10(th * 10**(-6))

    ## Get all ChEMBL targets with a Uniprot accession.
    import getUniprotTargets
    chemblTargets = getUniprotTargets.getUniprotTargets(
        release, user, pword, host, port)

    ## Get a list of all human (!) ChEMBL targets
    humChembl = {}
    for target in chemblTargets.keys():
        if chemblTargets[target] == 'H**o sapiens':
            humChembl[target] = 0

    ## Read all human protein coding genes
    import parse
    humProtCod = parse.parse2col('data/proteinCoding.tab', True, 1, 0)
    #humanTargets = humanProtCodUniq.keys()
    print "We are dealing with %s human proteins" % len(humProtCod.keys())

    ## Load the pfamDict.
    import pickle
    inFile = open('data/protCodPfamDict_%s.pkl' % release, 'r')
    pfamDict = pickle.load(inFile)
    inFile.close()

    ## Load the pdbDict.
    import pickle
    infile = open('data/pdbDict_%s.pkl' % release, 'r')
    pdbDict = pickle.load(infile)
    infile.close()

    ## Load the uniprotDict.
    import pickle
    infile = open('data/bsDictUniprot_%s.pkl' % release, 'r')
    uniprotDict = pickle.load(infile)
    infile.close()
    print 'number of targets with binding site information', len(
        uniprotDict.keys())

    ## Load the uniDict.
    import parseUniChem
    uniDict = parseUniChem.parse('data/unichemMappings.txt')

    ## Load the propDict.
    import pickle
    infile = open('data/propDict_%s.pkl' % release, 'r')
    propDict = pickle.load(infile)
    infile.close()

    ####
    #### Generate Plots.
    ####

    ## For each target in PfamDict, calculate the ratio of domain over non-domain regions.
    import getRatioUnstruct
    import writeTable
    import os
    pfamDict = getRatioUnstruct.getRatio(pfamDict, humProtCod, release, user,
                                         pword, host, port)
    writeTable.writePfam(pfamDict, humProtCod, humChembl, chemblTargets,
                         release)
    os.system(
        '/ebi/research/software/Linux_x86_64/bin/R-2.11.0 CMD BATCH --vanilla -%s plotPfamStat.R'
        % release)

    ## Assess small molecule binding within Pfam domains for PDBe entries.
    import matchData
    import evaluatePred
    pdbDict = matchData.pdbe(pdbDict, pfamDict, release)
    evaluatePred.pdbe(pdbDict, 'within', release)
    os.system(
        '/ebi/research/software/Linux_x86_64/bin/R-2.11.0 CMD BATCH --vanilla -%s  -%s -%s  ecdf.R'
        % ('within', "PDB", release))

    ## Assess small molecule binding within Pfam domains for Uniprot entries.
    import matchData
    import evaluatePred
    uniprotDict = matchData.uniprot(uniprotDict, pfamDict, release)
    evaluatePred.uniprot(uniprotDict, 'within', release)
    os.system(
        '/ebi/research/software/Linux_x86_64/bin/R-2.11.0 CMD BATCH --vanilla -%s -%s -%s  ecdf.R'
        % ('within', "Uni", release))

    ## Print a summary of the number of targets and domains covered by the mapping.
    import groupSize
    import os
    allDomains = groupSize.uniqueDomains(pfamDict)
    singleDomains = groupSize.singles(chemblTargets, pfamDict)
    groupsAll = groupSize.groupSize(chemblTargets, pfamDict, singles)
    print "all possible groups (single, none, multi, conflict):", groupsAll
    (single, multi, conflict) = groupSize.groupSizeMap(chemblTargets, release,
                                                       user, pword, host, port)
    print "all covered targets (single, multi, conflict): ", len(single), len(
        multi), len(conflict)
    (single, multi, conflict) = groupSize.actSizeMap(chemblTargets, release,
                                                     user, pword, host, port)
    print "all covered targets (single, multi, conflict): ", len(single), len(
        multi), len(conflict)

    ## Plot the evaluation of the mappings.
    import queryDevice
    import matchData
    import evaluatePred
    import os

    intacts = queryDevice.queryDevice(
        """SELECT mpf.protein_accession,
		mpf.domain,mpf.molregno, pfd.start, pfd.end, mpf.maptype,
	 	md.chembl_id FROM map_pfam mpf 
	JOIN pfam_domains pfd 
	  ON pfd.protein_accession = mpf.protein_accession 
	JOIN molecule_dictionary md 
	  ON md.molregno = mpf.molregno 
	WHERE mpf.domain = pfd.domain""", release, user, pword, host, port)

    # ...against PDBe
    pdbDict = matchData.pdbePredicted(pdbDict, intacts, uniDict)
    evaluatePred.pdbePredicted(pdbDict, 'prediction', release)
    os.system(
        '/ebi/research/software/Linux_x86_64/bin/R-2.11.0 CMD BATCH --vanilla -%s -%s -%s  ecdf.R'
        % ('prediction', 'PDB', release))
    # ...against uniprot
    uniprotDict = matchData.uniprotPredicted(uniprotDict, intacts)
    evaluatePred.uniprotPredicted(uniprotDict, 'prediction', release)
    os.system(
        '/ebi/research/software/Linux_x86_64/bin/R-2.11.0 CMD BATCH --vanilla -%s  -%s -%s  ecdf.R'
        % ('prediction', "Uni", release))

    ## Map the overlap
    #import overlap
    #tholds = [50,10,5,1,0.5,0.1,0.05,0.01,0.005,0.001, 0.0005,0.0001, 0.00005,0.000001]
    #overlap.overlap(propDict, tholds, release)

    ## Power Law Distribution of domain occurences
    ##  Prepare the data for the power law plot.
    ##  1. Count the targets and compounds per domain using the propDict
    ##  2. Count a human genes per domain using the Pfam dictionary
    ##  3. Plot the power law distributions for all domains and overlay 25 most
    ##     frequent domains
    import countFreqs
    import plplot
    import plplotRaw
    import parse
    countFreqs.countLigs(humProtCod.keys(), chemblTargets, release, user,
                         pword, host, port)
    countFreqs.countDoms(humProtCod.keys(), pfamDict)
    filenames = ['genFreq.tab', 'domLigs.tab', 'targLigs.tab']

    for filename in filenames:
        os.system(
            '/ebi/research/software/Linux_x86_64/bin/R-2.11.0 CMD BATCH --vanilla -%s statPowerLaw.R'
            % filename)
        al, minx = parse.rdstatLogs('data/powerLawLog%s' % filename)
        freqs = parse.col2intlist('data/%s' % filename, 1, True)
        print len(freqs), minx, al, filename, type(freqs), type(freqs[1])
        plplot.plplot(freqs, minx, al, filename)
        plplotRaw.plplotRaw(freqs, filename)

    ## Plot the ligand properties.
    import export
    import os
    selected = ['Pkinase', 'Pkinase_Tyr', 'p450', 'SNF', 'Trypsin', 'RVP']
    export.exportProps(selected, propDict, threshold, release, user, pword,
                       host, port)

    filename = 'data/cmpdProps_pKi%s_chembl%s.tab' % (int(threshold), release)
    os.system(
        "/ebi/research/software/Linux_x86_64/bin/R-2.11.0 CMD BATCH --vanilla -%s pca.R"
        % filename)