Example #1
0
def run_subs_query(subs):

    query_sql = """
    select  lhs_smi.smiles,
            lhs.cmpd_id,
            lhs.core_smi,
            rhs_smi.smiles,
            rhs.cmpd_id,
            rhs.core_smi,
            context_table.context_smi,
            rhs_smi.cmpd_size-context_table.context_size
    from    (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') lhs,
            core_table rhs,
            cmpd_smisp lhs_smi,
            cmpd_smisp rhs_smi,
            context_table
    where   lhs.context_id = rhs.context_id
            and context_table.context_id = rhs.context_id
            and lhs_smi.cmpd_id = lhs.cmpd_id
            and rhs_smi.cmpd_id = rhs.cmpd_id
            and lhs.cmpd_id != rhs.cmpd_id
            and rhs_smi.cmpd_size-context_table.context_size <= %s""" % (subs,max_size)
    cursor.execute(query_sql)
    results = cursor.fetchall()

    for r in results:        
        #make sure it is not the same core on both sides
        if(r[2] != r[5]):
            #cansmirk
            smirks,context = cansmirk(str(r[2]),str(r[5]),str(r[6]))
            if(have_id):
                print "%s,%s,%s,%s,%s,%s,%s,%s" % (subs,id,r[0],r[3],r[1],r[4],smirks,context)
            else:
                print "%s,%s,%s,%s,%s,%s,%s" % (subs,r[0],r[3],r[1],r[4],smirks,context)
Example #2
0
def run_subs_query(subs):

  query_sql = """
    select  lhs_smi.smiles,
            lhs.cmpd_id,
            lhs.core_smi,
            rhs_smi.smiles,
            rhs.cmpd_id,
            rhs.core_smi,
            context_table.context_smi,
            rhs_smi.cmpd_size-context_table.context_size
    from    (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') lhs,
            core_table rhs,
            cmpd_smisp lhs_smi,
            cmpd_smisp rhs_smi,
            context_table
    where   lhs.context_id = rhs.context_id
            and context_table.context_id = rhs.context_id
            and lhs_smi.cmpd_id = lhs.cmpd_id
            and rhs_smi.cmpd_id = rhs.cmpd_id
            and lhs.cmpd_id != rhs.cmpd_id
            and rhs_smi.cmpd_size-context_table.context_size <= %s""" % (subs, max_size)
  cursor.execute(query_sql)
  results = cursor.fetchall()

  for r in results:
    #make sure it is not the same core on both sides
    if (r[2] != r[5]):
      #cansmirk
      smirks, context = cansmirk(str(r[2]), str(r[5]), str(r[6]))
      if (have_id):
        print("%s,%s,%s,%s,%s,%s,%s,%s" % (subs, id, r[0], r[3], r[1], r[4], smirks, context))
      else:
        print("%s,%s,%s,%s,%s,%s,%s" % (subs, r[0], r[3], r[1], r[4], smirks, context))
Example #3
0
def run_trans_smarts_query(transform):

    lhs,rhs = transform.split(">>")
    matching_lhs = []
    matching_rhs = []

    #set os enviroment for rdkit to use sqllite
    os.environ['RD_USESQLLITE'] = '1'

    cmd = "python $RDBASE/Projects/DbCLI/SearchDb.py --dbDir=%s_smarts --smarts='%s' --silent" % (pre,lhs)
    p1 = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
    output = p1.communicate()[0].rstrip()
    matching_lhs=output.split("\n")
    #sys.stderr.write("rhs: %s\n" % (len(matching_lhs)) )

    cmd = "python $RDBASE/Projects/DbCLI/SearchDb.py --dbDir=%s_smarts --smarts='%s' --silent" % (pre,rhs)
    p1 = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
    output = p1.communicate()[0].rstrip()
    matching_rhs=output.split("\n")
    #sys.stderr.write("rhs: %s\n" % (len(matching_rhs)) )

    #sys.stderr.write('SQLlite method\n')
    lhs_q_string = "','".join(matching_lhs)
    lhs_q_string = "'%s'" % (lhs_q_string)

    rhs_q_string = "','".join(matching_rhs)
    rhs_q_string = "'%s'" % (rhs_q_string)

    query_sql = """
    select  lhs_smi.smiles,
            lhs.cmpd_id,
            lhs.core_smi,
            rhs_smi.smiles,
            rhs.cmpd_id,
            rhs.core_smi,
            context_table.context_smi
    from    (select cmpd_id,core_smi,context_id from core_table where core_smi_ni in (%s) ) lhs,
            (select cmpd_id,core_smi,context_id from core_table where core_smi_ni in (%s) ) rhs,
            cmpd_smisp lhs_smi,
            cmpd_smisp rhs_smi,
            context_table
    where   lhs.context_id = rhs.context_id
            and context_table.context_id = rhs.context_id
            and lhs_smi.cmpd_id = lhs.cmpd_id
            and rhs_smi.cmpd_id = rhs.cmpd_id
            and lhs.cmpd_id != rhs.cmpd_id
            and rhs_smi.cmpd_size-context_table.context_size <= %s
            and lhs_smi.cmpd_size-context_table.context_size <= %s """ % (lhs_q_string,rhs_q_string,max_size,max_size)

    cursor.execute(query_sql)
    results = cursor.fetchall()

    for r in results:
        smirks,context = cansmirk(str(r[2]),str(r[5]),str(r[6]))
        if(have_id):
            print "%s,%s,%s,%s,%s,%s,%s,%s" % (transform,id,r[0],r[3],r[1],r[4],smirks,context)
        else:
            print "%s,%s,%s,%s,%s,%s,%s" % (transform,r[0],r[3],r[1],r[4],smirks,context)
Example #4
0
def run_subs_smarts_query(subs_smarts):

    #set os enviroment for rdkit to use sqllite
    os.environ['RD_USESQLLITE'] = '1'
    temp_core_ni_file = 'temp_core_ni_file_%s' % (os.getpid())
    cmd = "python $RDBASE/Projects/DbCLI/SearchDb.py --dbDir=%s_smarts --smarts='%s' --silent >%s" % (
        pre, subs_smarts, temp_core_ni_file)
    subprocess.Popen(cmd, shell=True).wait()

    infile = open(temp_core_ni_file, 'r')
    for row in infile:
        row = row.rstrip()

        query_sql = """
        select  lhs_smi.smiles,
                lhs.cmpd_id,
                lhs.core_smi,
                rhs_smi.smiles,
                rhs.cmpd_id,
                rhs.core_smi,
                context_table.context_smi,
                rhs_smi.cmpd_size-context_table.context_size
        from    (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') lhs,
                core_table rhs,
                cmpd_smisp lhs_smi,
                cmpd_smisp rhs_smi,
                context_table
        where   lhs.context_id = rhs.context_id
                and context_table.context_id = rhs.context_id
                and lhs_smi.cmpd_id = lhs.cmpd_id
                and rhs_smi.cmpd_id = rhs.cmpd_id
                and lhs.cmpd_id != rhs.cmpd_id
                and rhs_smi.cmpd_size-context_table.context_size <= %s
                and lhs_smi.cmpd_size-context_table.context_size <= %s""" % (
            row, max_size, max_size)
        cursor.execute(query_sql)
        results = cursor.fetchall()

        for r in results:
            #cansmirk
            smirks, context = cansmirk(str(r[2]), str(r[5]), str(r[6]))
            if (have_id):
                print("%s,%s,%s,%s,%s,%s,%s" %
                      (id, r[0], r[3], r[1], r[4], smirks, context))
            else:
                print("%s,%s,%s,%s,%s,%s" %
                      (r[0], r[3], r[1], r[4], smirks, context))
    infile.close()
    #remove temporary files
    os.unlink(temp_core_ni_file)
Example #5
0
def run_subs_smarts_query(subs_smarts):

  #set os enviroment for rdkit to use sqllite
  os.environ['RD_USESQLLITE'] = '1'
  temp_core_ni_file = 'temp_core_ni_file_%s' % (os.getpid())
  cmd = "python $RDBASE/Projects/DbCLI/SearchDb.py --dbDir=%s_smarts --smarts='%s' --silent >%s" % (
    pre, subs_smarts, temp_core_ni_file)
  subprocess.Popen(cmd, shell=True).wait()

  infile = open(temp_core_ni_file, 'r')
  for row in infile:
    row = row.rstrip()

    query_sql = """
        select  lhs_smi.smiles,
                lhs.cmpd_id,
                lhs.core_smi,
                rhs_smi.smiles,
                rhs.cmpd_id,
                rhs.core_smi,
                context_table.context_smi,
                rhs_smi.cmpd_size-context_table.context_size
        from    (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') lhs,
                core_table rhs,
                cmpd_smisp lhs_smi,
                cmpd_smisp rhs_smi,
                context_table
        where   lhs.context_id = rhs.context_id
                and context_table.context_id = rhs.context_id
                and lhs_smi.cmpd_id = lhs.cmpd_id
                and rhs_smi.cmpd_id = rhs.cmpd_id
                and lhs.cmpd_id != rhs.cmpd_id
                and rhs_smi.cmpd_size-context_table.context_size <= %s
                and lhs_smi.cmpd_size-context_table.context_size <= %s""" % (row, max_size,
                                                                             max_size)
    cursor.execute(query_sql)
    results = cursor.fetchall()

    for r in results:
      #cansmirk
      smirks, context = cansmirk(str(r[2]), str(r[5]), str(r[6]))
      if (have_id):
        print("%s,%s,%s,%s,%s,%s,%s" % (id, r[0], r[3], r[1], r[4], smirks, context))
      else:
        print("%s,%s,%s,%s,%s,%s" % (r[0], r[3], r[1], r[4], smirks, context))
  infile.close()
  #remove temporary files
  os.unlink(temp_core_ni_file)
Example #6
0
def run_trans_query(transform):

    lhs, rhs = transform.split(">>")

    #remove connectivity info
    lhs_ni = remove_numbers(lhs)
    rhs_ni = remove_numbers(rhs)

    query_sql = """
    select  lhs_smi.smiles,
            lhs.cmpd_id,
            lhs.core_smi,
            rhs_smi.smiles,
            rhs.cmpd_id,
            rhs.core_smi,
            context_table.context_smi
    from    (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') lhs,
            (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') rhs,
            cmpd_smisp lhs_smi,
            cmpd_smisp rhs_smi,
            context_table
    where   lhs.context_id = rhs.context_id
            and context_table.context_id = rhs.context_id
            and lhs_smi.cmpd_id = lhs.cmpd_id
            and rhs_smi.cmpd_id = rhs.cmpd_id""" % (lhs_ni, rhs_ni)

    cursor.execute(query_sql)
    results = cursor.fetchall()

    for r in results:
        smirks, context = cansmirk(str(r[2]), str(r[5]), str(r[6]))
        #make sure connectivity is correct
        if (smirks == transform):
            if (have_id):
                print("%s,%s,%s,%s,%s,%s,%s" %
                      (id, r[0], r[3], r[1], r[4], smirks, context))
            else:
                print("%s,%s,%s,%s,%s,%s" %
                      (r[0], r[3], r[1], r[4], smirks, context))
Example #7
0
def run_trans_query(transform):

    lhs,rhs = transform.split(">>")

    #remove connectivity info
    lhs_ni = remove_numbers(lhs)
    rhs_ni = remove_numbers(rhs)

    query_sql = """
    select  lhs_smi.smiles,
            lhs.cmpd_id,
            lhs.core_smi,
            rhs_smi.smiles,
            rhs.cmpd_id,
            rhs.core_smi,
            context_table.context_smi
    from    (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') lhs,
            (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') rhs,
            cmpd_smisp lhs_smi,
            cmpd_smisp rhs_smi,
            context_table
    where   lhs.context_id = rhs.context_id
            and context_table.context_id = rhs.context_id
            and lhs_smi.cmpd_id = lhs.cmpd_id
            and rhs_smi.cmpd_id = rhs.cmpd_id""" % (lhs_ni,rhs_ni)

    cursor.execute(query_sql)
    results = cursor.fetchall()

    for r in results:
        smirks,context = cansmirk(str(r[2]),str(r[5]),str(r[6]))
        #make sure connectivity is correct
        if(smirks == transform):
            if(have_id):
                print "%s,%s,%s,%s,%s,%s,%s" % (id,r[0],r[3],r[1],r[4],smirks,context)
            else:
                print "%s,%s,%s,%s,%s,%s" % (r[0],r[3],r[1],r[4],smirks,context)
Example #8
0
def run_trans_smarts_query(transform):

    lhs, rhs = transform.split(">>")
    matching_lhs = []
    matching_rhs = []

    #set os enviroment for rdkit to use sqllite
    os.environ['RD_USESQLLITE'] = '1'

    cmd = "python $RDBASE/Projects/DbCLI/SearchDb.py --dbDir=%s_smarts --smarts='%s' --silent" % (
        pre, lhs)
    p1 = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
    output = p1.communicate()[0].rstrip()
    matching_lhs = output.split("\n")
    #sys.stderr.write("rhs: %s\n" % (len(matching_lhs)) )

    cmd = "python $RDBASE/Projects/DbCLI/SearchDb.py --dbDir=%s_smarts --smarts='%s' --silent" % (
        pre, rhs)
    p1 = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
    output = p1.communicate()[0].rstrip()
    matching_rhs = output.split("\n")
    #sys.stderr.write("rhs: %s\n" % (len(matching_rhs)) )

    #sys.stderr.write('SQLlite method\n')
    lhs_q_string = "','".join(matching_lhs)
    lhs_q_string = "'%s'" % (lhs_q_string)

    rhs_q_string = "','".join(matching_rhs)
    rhs_q_string = "'%s'" % (rhs_q_string)

    query_sql = """
    select  lhs_smi.smiles,
            lhs.cmpd_id,
            lhs.core_smi,
            rhs_smi.smiles,
            rhs.cmpd_id,
            rhs.core_smi,
            context_table.context_smi
    from    (select cmpd_id,core_smi,context_id from core_table where core_smi_ni in (%s) ) lhs,
            (select cmpd_id,core_smi,context_id from core_table where core_smi_ni in (%s) ) rhs,
            cmpd_smisp lhs_smi,
            cmpd_smisp rhs_smi,
            context_table
    where   lhs.context_id = rhs.context_id
            and context_table.context_id = rhs.context_id
            and lhs_smi.cmpd_id = lhs.cmpd_id
            and rhs_smi.cmpd_id = rhs.cmpd_id
            and lhs.cmpd_id != rhs.cmpd_id
            and rhs_smi.cmpd_size-context_table.context_size <= %s
            and lhs_smi.cmpd_size-context_table.context_size <= %s """ % (
        lhs_q_string, rhs_q_string, max_size, max_size)

    cursor.execute(query_sql)
    results = cursor.fetchall()

    for r in results:
        smirks, context = cansmirk(str(r[2]), str(r[5]), str(r[6]))
        if (have_id):
            print("%s,%s,%s,%s,%s,%s,%s,%s" %
                  (transform, id, r[0], r[3], r[1], r[4], smirks, context))
        else:
            print("%s,%s,%s,%s,%s,%s,%s" %
                  (transform, r[0], r[3], r[1], r[4], smirks, context))
Example #9
0
            id = line_fields[1]

        lhs, rhs = smirks.split(">>")

        l = Chem.MolFromSmiles(lhs)
        if (l == None):
            sys.stderr.write("Can't generate mol for: %s\n" % (lhs))
            continue

        r = Chem.MolFromSmiles(rhs)
        if (r == None):
            sys.stderr.write("Can't generate mol for: %s\n" % (rhs))
            continue

        clhs = Chem.MolToSmiles(l, isomericSmiles=True)
        crhs = Chem.MolToSmiles(r, isomericSmiles=True)

        #just need to take care of [*H:1]
        if (clhs == '[*H:1]'):
            clhs = '[*:1][H]'

        if (crhs == '[*H:1]'):
            crhs = '[*:1][H]'

        #print clhs
        #print crhs

        csmirk, context = cansmirk(clhs, crhs, "")

        print("%s %s" % (csmirk, id))
Example #10
0
      id = line_fields[1]

    lhs, rhs = smirks.split(">>")

    l = Chem.MolFromSmiles(lhs)
    if (l == None):
      sys.stderr.write("Can't generate mol for: %s\n" % (lhs))
      continue

    r = Chem.MolFromSmiles(rhs)
    if (r == None):
      sys.stderr.write("Can't generate mol for: %s\n" % (rhs))
      continue

    clhs = Chem.MolToSmiles(l, isomericSmiles=True)
    crhs = Chem.MolToSmiles(r, isomericSmiles=True)

    #just need to take care of [*H:1]
    if (clhs == '[*H:1]'):
      clhs = '[*:1][H]'

    if (crhs == '[*H:1]'):
      crhs = '[*:1][H]'

    #print clhs
    #print crhs

    csmirk, context = cansmirk(clhs, crhs, "")

    print("%s %s" % (csmirk, id))