def run_subs_query(subs): query_sql = """ select lhs_smi.smiles, lhs.cmpd_id, lhs.core_smi, rhs_smi.smiles, rhs.cmpd_id, rhs.core_smi, context_table.context_smi, rhs_smi.cmpd_size-context_table.context_size from (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') lhs, core_table rhs, cmpd_smisp lhs_smi, cmpd_smisp rhs_smi, context_table where lhs.context_id = rhs.context_id and context_table.context_id = rhs.context_id and lhs_smi.cmpd_id = lhs.cmpd_id and rhs_smi.cmpd_id = rhs.cmpd_id and lhs.cmpd_id != rhs.cmpd_id and rhs_smi.cmpd_size-context_table.context_size <= %s""" % (subs,max_size) cursor.execute(query_sql) results = cursor.fetchall() for r in results: #make sure it is not the same core on both sides if(r[2] != r[5]): #cansmirk smirks,context = cansmirk(str(r[2]),str(r[5]),str(r[6])) if(have_id): print "%s,%s,%s,%s,%s,%s,%s,%s" % (subs,id,r[0],r[3],r[1],r[4],smirks,context) else: print "%s,%s,%s,%s,%s,%s,%s" % (subs,r[0],r[3],r[1],r[4],smirks,context)
def run_subs_query(subs): query_sql = """ select lhs_smi.smiles, lhs.cmpd_id, lhs.core_smi, rhs_smi.smiles, rhs.cmpd_id, rhs.core_smi, context_table.context_smi, rhs_smi.cmpd_size-context_table.context_size from (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') lhs, core_table rhs, cmpd_smisp lhs_smi, cmpd_smisp rhs_smi, context_table where lhs.context_id = rhs.context_id and context_table.context_id = rhs.context_id and lhs_smi.cmpd_id = lhs.cmpd_id and rhs_smi.cmpd_id = rhs.cmpd_id and lhs.cmpd_id != rhs.cmpd_id and rhs_smi.cmpd_size-context_table.context_size <= %s""" % (subs, max_size) cursor.execute(query_sql) results = cursor.fetchall() for r in results: #make sure it is not the same core on both sides if (r[2] != r[5]): #cansmirk smirks, context = cansmirk(str(r[2]), str(r[5]), str(r[6])) if (have_id): print("%s,%s,%s,%s,%s,%s,%s,%s" % (subs, id, r[0], r[3], r[1], r[4], smirks, context)) else: print("%s,%s,%s,%s,%s,%s,%s" % (subs, r[0], r[3], r[1], r[4], smirks, context))
def run_trans_smarts_query(transform): lhs,rhs = transform.split(">>") matching_lhs = [] matching_rhs = [] #set os enviroment for rdkit to use sqllite os.environ['RD_USESQLLITE'] = '1' cmd = "python $RDBASE/Projects/DbCLI/SearchDb.py --dbDir=%s_smarts --smarts='%s' --silent" % (pre,lhs) p1 = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) output = p1.communicate()[0].rstrip() matching_lhs=output.split("\n") #sys.stderr.write("rhs: %s\n" % (len(matching_lhs)) ) cmd = "python $RDBASE/Projects/DbCLI/SearchDb.py --dbDir=%s_smarts --smarts='%s' --silent" % (pre,rhs) p1 = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) output = p1.communicate()[0].rstrip() matching_rhs=output.split("\n") #sys.stderr.write("rhs: %s\n" % (len(matching_rhs)) ) #sys.stderr.write('SQLlite method\n') lhs_q_string = "','".join(matching_lhs) lhs_q_string = "'%s'" % (lhs_q_string) rhs_q_string = "','".join(matching_rhs) rhs_q_string = "'%s'" % (rhs_q_string) query_sql = """ select lhs_smi.smiles, lhs.cmpd_id, lhs.core_smi, rhs_smi.smiles, rhs.cmpd_id, rhs.core_smi, context_table.context_smi from (select cmpd_id,core_smi,context_id from core_table where core_smi_ni in (%s) ) lhs, (select cmpd_id,core_smi,context_id from core_table where core_smi_ni in (%s) ) rhs, cmpd_smisp lhs_smi, cmpd_smisp rhs_smi, context_table where lhs.context_id = rhs.context_id and context_table.context_id = rhs.context_id and lhs_smi.cmpd_id = lhs.cmpd_id and rhs_smi.cmpd_id = rhs.cmpd_id and lhs.cmpd_id != rhs.cmpd_id and rhs_smi.cmpd_size-context_table.context_size <= %s and lhs_smi.cmpd_size-context_table.context_size <= %s """ % (lhs_q_string,rhs_q_string,max_size,max_size) cursor.execute(query_sql) results = cursor.fetchall() for r in results: smirks,context = cansmirk(str(r[2]),str(r[5]),str(r[6])) if(have_id): print "%s,%s,%s,%s,%s,%s,%s,%s" % (transform,id,r[0],r[3],r[1],r[4],smirks,context) else: print "%s,%s,%s,%s,%s,%s,%s" % (transform,r[0],r[3],r[1],r[4],smirks,context)
def run_subs_smarts_query(subs_smarts): #set os enviroment for rdkit to use sqllite os.environ['RD_USESQLLITE'] = '1' temp_core_ni_file = 'temp_core_ni_file_%s' % (os.getpid()) cmd = "python $RDBASE/Projects/DbCLI/SearchDb.py --dbDir=%s_smarts --smarts='%s' --silent >%s" % ( pre, subs_smarts, temp_core_ni_file) subprocess.Popen(cmd, shell=True).wait() infile = open(temp_core_ni_file, 'r') for row in infile: row = row.rstrip() query_sql = """ select lhs_smi.smiles, lhs.cmpd_id, lhs.core_smi, rhs_smi.smiles, rhs.cmpd_id, rhs.core_smi, context_table.context_smi, rhs_smi.cmpd_size-context_table.context_size from (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') lhs, core_table rhs, cmpd_smisp lhs_smi, cmpd_smisp rhs_smi, context_table where lhs.context_id = rhs.context_id and context_table.context_id = rhs.context_id and lhs_smi.cmpd_id = lhs.cmpd_id and rhs_smi.cmpd_id = rhs.cmpd_id and lhs.cmpd_id != rhs.cmpd_id and rhs_smi.cmpd_size-context_table.context_size <= %s and lhs_smi.cmpd_size-context_table.context_size <= %s""" % ( row, max_size, max_size) cursor.execute(query_sql) results = cursor.fetchall() for r in results: #cansmirk smirks, context = cansmirk(str(r[2]), str(r[5]), str(r[6])) if (have_id): print("%s,%s,%s,%s,%s,%s,%s" % (id, r[0], r[3], r[1], r[4], smirks, context)) else: print("%s,%s,%s,%s,%s,%s" % (r[0], r[3], r[1], r[4], smirks, context)) infile.close() #remove temporary files os.unlink(temp_core_ni_file)
def run_subs_smarts_query(subs_smarts): #set os enviroment for rdkit to use sqllite os.environ['RD_USESQLLITE'] = '1' temp_core_ni_file = 'temp_core_ni_file_%s' % (os.getpid()) cmd = "python $RDBASE/Projects/DbCLI/SearchDb.py --dbDir=%s_smarts --smarts='%s' --silent >%s" % ( pre, subs_smarts, temp_core_ni_file) subprocess.Popen(cmd, shell=True).wait() infile = open(temp_core_ni_file, 'r') for row in infile: row = row.rstrip() query_sql = """ select lhs_smi.smiles, lhs.cmpd_id, lhs.core_smi, rhs_smi.smiles, rhs.cmpd_id, rhs.core_smi, context_table.context_smi, rhs_smi.cmpd_size-context_table.context_size from (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') lhs, core_table rhs, cmpd_smisp lhs_smi, cmpd_smisp rhs_smi, context_table where lhs.context_id = rhs.context_id and context_table.context_id = rhs.context_id and lhs_smi.cmpd_id = lhs.cmpd_id and rhs_smi.cmpd_id = rhs.cmpd_id and lhs.cmpd_id != rhs.cmpd_id and rhs_smi.cmpd_size-context_table.context_size <= %s and lhs_smi.cmpd_size-context_table.context_size <= %s""" % (row, max_size, max_size) cursor.execute(query_sql) results = cursor.fetchall() for r in results: #cansmirk smirks, context = cansmirk(str(r[2]), str(r[5]), str(r[6])) if (have_id): print("%s,%s,%s,%s,%s,%s,%s" % (id, r[0], r[3], r[1], r[4], smirks, context)) else: print("%s,%s,%s,%s,%s,%s" % (r[0], r[3], r[1], r[4], smirks, context)) infile.close() #remove temporary files os.unlink(temp_core_ni_file)
def run_trans_query(transform): lhs, rhs = transform.split(">>") #remove connectivity info lhs_ni = remove_numbers(lhs) rhs_ni = remove_numbers(rhs) query_sql = """ select lhs_smi.smiles, lhs.cmpd_id, lhs.core_smi, rhs_smi.smiles, rhs.cmpd_id, rhs.core_smi, context_table.context_smi from (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') lhs, (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') rhs, cmpd_smisp lhs_smi, cmpd_smisp rhs_smi, context_table where lhs.context_id = rhs.context_id and context_table.context_id = rhs.context_id and lhs_smi.cmpd_id = lhs.cmpd_id and rhs_smi.cmpd_id = rhs.cmpd_id""" % (lhs_ni, rhs_ni) cursor.execute(query_sql) results = cursor.fetchall() for r in results: smirks, context = cansmirk(str(r[2]), str(r[5]), str(r[6])) #make sure connectivity is correct if (smirks == transform): if (have_id): print("%s,%s,%s,%s,%s,%s,%s" % (id, r[0], r[3], r[1], r[4], smirks, context)) else: print("%s,%s,%s,%s,%s,%s" % (r[0], r[3], r[1], r[4], smirks, context))
def run_trans_query(transform): lhs,rhs = transform.split(">>") #remove connectivity info lhs_ni = remove_numbers(lhs) rhs_ni = remove_numbers(rhs) query_sql = """ select lhs_smi.smiles, lhs.cmpd_id, lhs.core_smi, rhs_smi.smiles, rhs.cmpd_id, rhs.core_smi, context_table.context_smi from (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') lhs, (select cmpd_id,core_smi,context_id from core_table where core_smi_ni = '%s') rhs, cmpd_smisp lhs_smi, cmpd_smisp rhs_smi, context_table where lhs.context_id = rhs.context_id and context_table.context_id = rhs.context_id and lhs_smi.cmpd_id = lhs.cmpd_id and rhs_smi.cmpd_id = rhs.cmpd_id""" % (lhs_ni,rhs_ni) cursor.execute(query_sql) results = cursor.fetchall() for r in results: smirks,context = cansmirk(str(r[2]),str(r[5]),str(r[6])) #make sure connectivity is correct if(smirks == transform): if(have_id): print "%s,%s,%s,%s,%s,%s,%s" % (id,r[0],r[3],r[1],r[4],smirks,context) else: print "%s,%s,%s,%s,%s,%s" % (r[0],r[3],r[1],r[4],smirks,context)
def run_trans_smarts_query(transform): lhs, rhs = transform.split(">>") matching_lhs = [] matching_rhs = [] #set os enviroment for rdkit to use sqllite os.environ['RD_USESQLLITE'] = '1' cmd = "python $RDBASE/Projects/DbCLI/SearchDb.py --dbDir=%s_smarts --smarts='%s' --silent" % ( pre, lhs) p1 = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) output = p1.communicate()[0].rstrip() matching_lhs = output.split("\n") #sys.stderr.write("rhs: %s\n" % (len(matching_lhs)) ) cmd = "python $RDBASE/Projects/DbCLI/SearchDb.py --dbDir=%s_smarts --smarts='%s' --silent" % ( pre, rhs) p1 = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) output = p1.communicate()[0].rstrip() matching_rhs = output.split("\n") #sys.stderr.write("rhs: %s\n" % (len(matching_rhs)) ) #sys.stderr.write('SQLlite method\n') lhs_q_string = "','".join(matching_lhs) lhs_q_string = "'%s'" % (lhs_q_string) rhs_q_string = "','".join(matching_rhs) rhs_q_string = "'%s'" % (rhs_q_string) query_sql = """ select lhs_smi.smiles, lhs.cmpd_id, lhs.core_smi, rhs_smi.smiles, rhs.cmpd_id, rhs.core_smi, context_table.context_smi from (select cmpd_id,core_smi,context_id from core_table where core_smi_ni in (%s) ) lhs, (select cmpd_id,core_smi,context_id from core_table where core_smi_ni in (%s) ) rhs, cmpd_smisp lhs_smi, cmpd_smisp rhs_smi, context_table where lhs.context_id = rhs.context_id and context_table.context_id = rhs.context_id and lhs_smi.cmpd_id = lhs.cmpd_id and rhs_smi.cmpd_id = rhs.cmpd_id and lhs.cmpd_id != rhs.cmpd_id and rhs_smi.cmpd_size-context_table.context_size <= %s and lhs_smi.cmpd_size-context_table.context_size <= %s """ % ( lhs_q_string, rhs_q_string, max_size, max_size) cursor.execute(query_sql) results = cursor.fetchall() for r in results: smirks, context = cansmirk(str(r[2]), str(r[5]), str(r[6])) if (have_id): print("%s,%s,%s,%s,%s,%s,%s,%s" % (transform, id, r[0], r[3], r[1], r[4], smirks, context)) else: print("%s,%s,%s,%s,%s,%s,%s" % (transform, r[0], r[3], r[1], r[4], smirks, context))
id = line_fields[1] lhs, rhs = smirks.split(">>") l = Chem.MolFromSmiles(lhs) if (l == None): sys.stderr.write("Can't generate mol for: %s\n" % (lhs)) continue r = Chem.MolFromSmiles(rhs) if (r == None): sys.stderr.write("Can't generate mol for: %s\n" % (rhs)) continue clhs = Chem.MolToSmiles(l, isomericSmiles=True) crhs = Chem.MolToSmiles(r, isomericSmiles=True) #just need to take care of [*H:1] if (clhs == '[*H:1]'): clhs = '[*:1][H]' if (crhs == '[*H:1]'): crhs = '[*:1][H]' #print clhs #print crhs csmirk, context = cansmirk(clhs, crhs, "") print("%s %s" % (csmirk, id))