def store_kplets_pile(kplets_pile, cdd2id, file2id): _sql_kplet = """insert ignore into bacteria_3plets (kplet_1, kplet_2, kplet_3) values \n""" _sql_kplet_file = """insert ignore into bacteria_3plets_win10 (kplet_id, file_id) values \n""" for (kplets, fname) in kplets_pile: for kplet in kplets: kplet = list(kplet) kplet.sort() kplet = tuple([int(cdd2id[k]) for k in kplet]) _sql_kplet += """(%d, %d, %d),\n""" % kplet _sql_kplet_file += ("""((select id from bacteria_3plets where """ + """kplet_1=%d and kplet_2=%d and kplet_3=%d),""" + """%d),\n""") % (kplet + (int(file2id[fname]),)) _sql_kplet = _sql_kplet[:-2] _sql_kplet += ';' _sql_kplet_file = _sql_kplet_file[:-2] _sql_kplet_file += ';' _db = DbClass() _db.cmd = _sql_kplet _db.execute() _db.commit() _db.cmd = _sql_kplet_file _db.execute() _db.commit()
def store_kplets_pile(kplets_pile, cdd2id, file2id): _sql_kplet = """insert ignore into bacteria_3plets (kplet_1, kplet_2, kplet_3) values \n""" _sql_kplet_file = """insert ignore into bacteria_3plets_win10 (kplet_id, file_id) values \n""" for (kplets, fname) in kplets_pile: for kplet in kplets: kplet = list(kplet) kplet.sort() kplet = tuple([int(cdd2id[k]) for k in kplet]) _sql_kplet += """(%d, %d, %d),\n""" % kplet _sql_kplet_file += ( """((select id from bacteria_3plets where """ + """kplet_1=%d and kplet_2=%d and kplet_3=%d),""" + """%d),\n""") % (kplet + (int(file2id[fname]), )) _sql_kplet = _sql_kplet[:-2] _sql_kplet += ';' _sql_kplet_file = _sql_kplet_file[:-2] _sql_kplet_file += ';' _db = DbClass() _db.cmd = _sql_kplet _db.execute() _db.commit() _db.cmd = _sql_kplet_file _db.execute() _db.commit()
def insert_source_duplets(kplets_pile, profile2id, source_id): sql_insert_kplet = """insert ignore into prok1402_all_adj_duplet (kplet_1, kplet_2, multidomain) values \n""" sql_insert_kplet_file = """insert into prok1402_all_adj_duplet_source (kplet_id, source_id) values \n""" for (duplet, fname, multidomain) in kplets_pile: terms = sorted(list(duplet)) terms = [profile2id[_] for _ in terms] sql_insert_kplet += """(%s, %s, %d),\n""" % (terms[0], terms[1], multidomain) sql_insert_kplet_file += """((select id from prok1402_all_adj_duplet where """ + \ """kplet_1=%s and kplet_2=%s and multidomain=%d), %d),\n""" % \ (terms[0], terms[1], multidomain, source_id) sql_insert_kplet_file = sql_insert_kplet_file[:-2] sql_insert_kplet_file += ";" sql_insert_kplet = sql_insert_kplet[:-2] sql_insert_kplet += ";" _db = DbClass() _db.cmd = sql_insert_kplet _db.execute() _db.cmd = sql_insert_kplet_file _db.execute() _db.commit()
def insert_duplets(kplets_pile, profile2id, file2id): sql_insert_kplet = """insert ignore into prok1402_baited_duplet (kplet_1, kplet_2) values \n""" sql_insert_kplet_file = """insert ignore into prok1402_baited_duplet_file (kplet_id, file_id) values \n""" for (duplet, fname) in kplets_pile: terms = sorted(list(duplet)) terms = [profile2id[_] for _ in terms] fname = os.path.basename(fname) file_id = file2id[fname] sql_insert_kplet += """(%s, %s),\n""" % (terms[0], terms[1]) sql_insert_kplet_file += """((select id from prok1402_baited_duplet where """ + \ """kplet_1=%s and kplet_2=%s), %d),\n""" % (terms[0], terms[1] ,file_id) sql_insert_kplet_file = sql_insert_kplet_file[:-2] sql_insert_kplet_file += ";" sql_insert_kplet = sql_insert_kplet[:-2] sql_insert_kplet += ";" _db = DbClass() _db.cmd = sql_insert_kplet _db.execute() _db.cmd = sql_insert_kplet_file _db.execute() _db.commit()
def store_kplets_pile(prefix, kplets_pile, profile2id, file2id): kplet_table_name = "%s_3plets" % prefix kplet_table_file_name = "%s_3plets_files" % prefix _sql_kplet = """insert ignore into %s (kplet_1, kplet_2, kplet_3) values \n""" % kplet_table_name _sql_kplet_file = """insert ignore into %s (kplet_id, file_id) values \n""" % kplet_table_file_name for (kplets, fname) in kplets_pile: for kplet in kplets: kplet = list(kplet) kplet.sort() kplet = tuple([int(profile2id[k]) for k in kplet]) _sql_kplet += """(%d, %d, %d),\n""" % kplet _sql_kplet_file += ( """((select id from %s where """ + """kplet_1=%d and kplet_2=%d and kplet_3=%d),""" + """%d),\n""") % ((kplet_table_name, ) + kplet + (int(file2id[fname]), )) _sql_kplet = _sql_kplet[:-2] _sql_kplet += ';' _sql_kplet_file = _sql_kplet_file[:-2] _sql_kplet_file += ';' _db = DbClass() _db.cmd = _sql_kplet _db.execute() _db.commit() _db.cmd = _sql_kplet_file _db.execute() _db.commit()
def store_kplets_pile(prefix, kplets_pile, profile2id, file2id): kplet_table_name = "%s_4plets" % prefix kplet_table_file_name = "%s_4plets_files" % prefix _sql_kplet = """insert ignore into %s (kplet_1, kplet_2, kplet_3, kplet_4) values \n""" % kplet_table_name _sql_kplet_file = """insert ignore into %s (kplet_id, file_id) values \n""" % kplet_table_file_name for (kplets, fname) in kplets_pile: for kplet in kplets: kplet = list(kplet) kplet.sort() kplet = tuple([int(profile2id[k]) for k in kplet]) _sql_kplet += """(%d, %d, %d, %d),\n""" % kplet _sql_kplet_file += ("""((select id from %s where """ + """kplet_1=%d and kplet_2=%d and kplet_3=%d and kplet_4=%d),""" + """%d),\n""") % ((kplet_table_name,) + kplet + (int(file2id[fname]),)) _sql_kplet = _sql_kplet[:-2] _sql_kplet += ';' _sql_kplet_file = _sql_kplet_file[:-2] _sql_kplet_file += ';' _db = DbClass() _db.cmd = _sql_kplet _db.execute() _db.commit() _db.cmd = _sql_kplet_file _db.execute() _db.commit()
print "Duplicates saved in file:", fname continue sys.exit() print("Starting to remove duplicates") total_cnt = 0 chunk_size = 10000 chunks = [table_rows[i: i+chunk_size] for i in range(0, len(table_rows), chunk_size)] for row in table_rows: (_cnt, _kplet_id, _file_id) = row assert _cnt > 1 _db.cmd = """delete from %s where kplet_id=%d and file_id=%d""" % (table_name, _kplet_id, _file_id) _db.execute() _db.commit() _db.cmd = """insert into %s(kplet_id, file_id) values (%d, %d)""" % (table_name, _kplet_id, _file_id) _db.execute() _db.commit() total_cnt += 1 if total_cnt % 10000 ==0: print total_cnt print total_cnt print "Finished:", table_name print print
total_cnt = 0 chunk_size = 10000 chunks = [ table_rows[i:i + chunk_size] for i in range(0, len(table_rows), chunk_size) ] for row in table_rows: (_cnt, _kplet_id, _file_id) = row assert _cnt > 1 _db.cmd = """delete from %s where kplet_id=%d and file_id=%d""" % ( table_name, _kplet_id, _file_id) _db.execute() _db.commit() _db.cmd = """insert into %s(kplet_id, file_id) values (%d, %d)""" % ( table_name, _kplet_id, _file_id) _db.execute() _db.commit() total_cnt += 1 if total_cnt % 10000 == 0: print total_cnt print total_cnt print "Finished:", table_name print print