def get_report_kplets(limit_to=300, load_locations=None): _db = DbClass() _db.cmd = """select apc.*, s1.cnt, s1.wgt, s1.an from ( select ap.id ,count(*) as cnt, sum(w.weight) as wgt, group_concat(awf.name) as an from archea_3plets ap inner join archea_3plets_win10 apw on ap.id = apw.kplet_id inner join archea_win10_files awf on apw.file_id = awf.id inner join sources s on awf.source_id=s.id inner join weights w on w.genome_id=s.genome_id group by ap.id having count(distinct s.genome_id)>1 ) s1 inner join archea_3plets_codes apc on s1.id=apc.id order by s1.wgt desc limit 0,%d""" % limit_to out_list = [] for row in _db.retrieve(): id = row[0] kplet_codes = (row[1:4]) if len(set(kplet_codes)) != 3: continue count = row[4] files = row[6].split(',') tmp_kplet = Kplet(id=id, codes=kplet_codes, count=count, files=files) out_list.append(tmp_kplet) _path = neighborhoods_path() if load_locations: [kplet.load_locations(_path) for kplet in out_list] return out_list
def insert_source_duplets(kplets_pile, profile2id, source_id): sql_insert_kplet = """insert ignore into prok1402_all_adj_duplet (kplet_1, kplet_2, multidomain) values \n""" sql_insert_kplet_file = """insert into prok1402_all_adj_duplet_source (kplet_id, source_id) values \n""" for (duplet, fname, multidomain) in kplets_pile: terms = sorted(list(duplet)) terms = [profile2id[_] for _ in terms] sql_insert_kplet += """(%s, %s, %d),\n""" % (terms[0], terms[1], multidomain) sql_insert_kplet_file += """((select id from prok1402_all_adj_duplet where """ + \ """kplet_1=%s and kplet_2=%s and multidomain=%d), %d),\n""" % \ (terms[0], terms[1], multidomain, source_id) sql_insert_kplet_file = sql_insert_kplet_file[:-2] sql_insert_kplet_file += ";" sql_insert_kplet = sql_insert_kplet[:-2] sql_insert_kplet += ";" _db = DbClass() _db.cmd = sql_insert_kplet _db.execute() _db.cmd = sql_insert_kplet_file _db.execute() _db.commit()
def get_code_kplet(kplet_id): _db = DbClass() _db.cmd = """select kplet_1, kplet_2, kplet_3, kplet_4, kplet_5 from bacteria_5plets_codes where id = %s""" % kplet_id return _db.retrieve()[0]
def org2src_src2files_map(files): _db = DbClass() _db.cmd = """select awf.name, s.name, g.name from bacteria_win10_files awf inner join sources s on awf.source_id=s.id inner join genomes g on s.genome_id = g.id where awf.name in ('%s')""" % "','".join(files) _org2src = {} _src2files = {} for row in _db.retrieve(): parts = row [_file, _src, _org] = parts if _org in _org2src: _org2src[_org].update([_src]) else: _org2src[_org] = set([_src]) if _src in _src2files: _src2files[_src].update([_file]) else: _src2files[_src] = set([_file]) return _org2src, _src2files
def insert_duplets(kplets_pile, profile2id, file2id): sql_insert_kplet = """insert ignore into prok1402_baited_duplet (kplet_1, kplet_2) values \n""" sql_insert_kplet_file = """insert ignore into prok1402_baited_duplet_file (kplet_id, file_id) values \n""" for (duplet, fname) in kplets_pile: terms = sorted(list(duplet)) terms = [profile2id[_] for _ in terms] fname = os.path.basename(fname) file_id = file2id[fname] sql_insert_kplet += """(%s, %s),\n""" % (terms[0], terms[1]) sql_insert_kplet_file += """((select id from prok1402_baited_duplet where """ + \ """kplet_1=%s and kplet_2=%s), %d),\n""" % (terms[0], terms[1] ,file_id) sql_insert_kplet_file = sql_insert_kplet_file[:-2] sql_insert_kplet_file += ";" sql_insert_kplet = sql_insert_kplet[:-2] sql_insert_kplet += ";" _db = DbClass() _db.cmd = sql_insert_kplet _db.execute() _db.cmd = sql_insert_kplet_file _db.execute() _db.commit()
def get_report_kplets(limit_to=300, load_locations=None): sql_cmd = """select apc.*, s1.cnt, s1.wgt, s1.an from ( select ap.id ,count(*) as cnt, sum(w.weight) as wgt, group_concat(awf.name) as an from archea_4plets ap inner join archea_4plets_win10 apw on ap.id = apw.kplet_id inner join archea_win10_files awf on apw.file_id = awf.id inner join sources s on awf.source_id=s.id inner join weights w on w.genome_id=s.genome_id group by ap.id having count(distinct s.genome_id)>1 ) s1 inner join archea_4plets_codes apc on s1.id=apc.id order by s1.wgt desc limit 0,%d""" % limit_to _db = DbClass() _db.cmd = sql_cmd out_list = [] for row in _db.retrieve(): id = row[0] kplet_codes = (row[1:5]) if len(set(kplet_codes)) != 4: continue count = row[5] files = row[7].split(',') tmp_kplet = Kplet(id=id, codes=kplet_codes, count=count, files=files) out_list.append(tmp_kplet) _path = neighborhoods_path() if load_locations: [kplet.load_locations(_path) for kplet in out_list] return out_list
def insert_kplet(profiles): profile2id = t.map_profile2id(profiles) _db = DbClass() sql_cmd = """insert into archea_4plets (kplet_1, kplet_2, kplet_3, kplet_4) values (%d, %d, %d, %d)""" sql_cmd = sql_cmd % (profile2id[profiles[0]], profile2id[profiles[1]], profile2id[profiles[2]], profile2id[profiles[3]]) _db.cmd = sql_cmd _db.execute()
def get_code_kplet(kplet_id): _sql_cmd = """SELECT cp1.code, cp2.code, cp3.code, cp4.code FROM PatternQuest.archea_4plets ap inner join cdd_profiles cp1 on cp1.id = ap.kplet_1 inner join cdd_profiles cp2 on cp2.id = ap.kplet_2 inner join cdd_profiles cp3 on cp3.id = ap.kplet_3 inner join cdd_profiles cp4 on cp4.id = ap.kplet_4 where ap.id = %d""" % kplet_id _db = DbClass() _db.cmd = _sql_cmd return _db.retrieve()
def archea_kplet_ids2files(id_list): _sql_cmd = """select distinct awf.name from archea_5plets ap inner join archea_5plets_win10 apw on ap.id = apw.kplet_id inner join archea_win10_files awf on apw.file_id = awf.id where ap.id in (%s)""" % " , ".join(id_list) _db = DbClass() _db.cmd = _sql_cmd return [l[0] for l in _db.retrieve()]
def map_profile2id(profile_list): _db = DbClass() if profile_list == 'all': sql_cmd = """select code, id from cdd_profiles""" else: sql_cmd = """select code, id from cdd_profiles where code in ('%s')""" sql_cmd = sql_cmd % "','".join(profile_list) _db.cmd = sql_cmd return {row[0]: int(row[1]) for row in _db.retrieve()}
def get_multiple_kplets(): _sql_cmd = """select ap.id, count(*) cnt, group_concat(convert(apw.file_id, char(15))) as file_ids from archea_5plets ap inner join archea_5plets_win10 apw on ap.id = apw.kplet_id group by ap.id having count(*)>1 order by cnt desc""" _db = DbClass() _db.cmd = _sql_cmd return _db.retrieve()
def map_baited_file2id(): _db = DbClass() _db.cmd = """ select name, id from prok1402_baited_files""" rows = _db.retrieve() file2id = {} id2file = {} for row in rows: file2id[row[0]] = row[1] id2file[row[1]] = row[0] return file2id, id2file
def get_kplet_id(profiles): profile2id = t.map_profile2id(profiles) _db = DbClass() sql_cmd = """select id from archea_4plets where kplet_1=%d and kplet_2=%d and kplet_3=%d and kplet_4=%d""" sql_cmd = sql_cmd % (profile2id[profiles[0]], profile2id[profiles[1]], profile2id[profiles[2]], profile2id[profiles[3]]) _db.cmd = sql_cmd rows = _db.retrieve() if rows: return rows[0][0] return None
def get_report_kplet(kplet_id, id2cdd, load_locations=None): _db = DbClass() _db.cmd = """SET group_concat_max_len=1500000;""" _db.execute() _db.cmd = """select ap.* ,count(*) as cnt, sum(w.weight) as wgt, group_concat(awf.name) as an from bacteria_5plets ap inner join bacteria_5plets_win10 apw on ap.id = apw.kplet_id inner join bacteria_win10_files awf on apw.file_id = awf.id inner join sources s on awf.source_id=s.id inner join weights w on w.genome_id=s.genome_id where ap.id=%d group by ap.id""" % kplet_id out_list = [] for row in _db.retrieve(): id = row[0] kplet_codes = [id2cdd[int(_id)] for _id in row[1:6]] if len(set(kplet_codes)) != 5: continue count = row[6] files = row[8].split(',') tmp_kplet = Kplet(id=id, codes=kplet_codes, count=count, files=files) out_list.append(tmp_kplet) _path = neighborhoods_path() if load_locations: [kplet.load_locations(_path) for kplet in out_list] return out_list[0]
def get_report_kplets(id2cdd, limit_to=500, load_locations=None): _db = DbClass() _db.cmd = """SET group_concat_max_len=1500000""" _db.execute() _db.cmd = """select ap.* ,count(*) as cnt, sum(w.weight) as wgt, group_concat(awf.name) as an from bacteria_3plets ap inner join bacteria_3plets_win10 apw on ap.id = apw.kplet_id inner join bacteria_win10_files awf on apw.file_id = awf.id inner join sources s on awf.source_id=s.id inner join weights w on w.genome_id=s.genome_id group by ap.id having count(distinct s.genome_id)>1 order by wgt desc limit 0, %d""" % limit_to out_list = [] for row in _db.retrieve(): id = row[0] kplet_codes = ([id2cdd[int(_id)] for _id in row[1:4]]) if len(set(kplet_codes)) != 3: continue count = row[4] weight = row[5] files = row[6].split(',') tmp_kplet = Kplet(id=id, codes=kplet_codes, count=count, files=files) out_list.append(tmp_kplet) _path = neighborhoods_path() if load_locations: [kplet.load_locations(_path) for kplet in out_list] return out_list
def get_report_kplets(id2cdd, limit_to=500): _db = DbClass() _db.cmd = """SET group_concat_max_len=1500000""" _db.execute() _db.cmd = """select cp.*, count(distinct cwf.id) as cnt, group_concat(cwf.name) as files from crispr_2plets cp inner join crispr_2plets_wgs cpw on cp.id = cpw.kplet_id inner join crispr_wgs_files cwf on cpw.file_id = cwf.id group by cp.id having count(distinct cwf.id)>1 order by cnt desc limit 0, %d""" % limit_to out_list = [] for row in _db.retrieve(): id = row[0] kplet_codes = ([id2cdd[int(_id)] for _id in row[1:3]]) if len(set(kplet_codes)) != 2: continue count = row[3] files = row[4].split(',') tmp_kplet = Kplet(id=id, codes=kplet_codes, count=count, files=files) out_list.append(tmp_kplet) return out_list
def file2src_src2org_map(files): _db = DbClass() _db.cmd = """select awf.name, s.name, g.name from bacteria_win10_files awf inner join sources s on awf.source_id=s.id inner join genomes g on s.genome_id = g.id where awf.name in ('%s')""" % "','".join(files) _src2org = {} _file2src = {} for row in _db.retrieve(): parts = row [_file, _src, _org] = parts _src2org[_src] = _org _file2src[_file] = _src return _file2src, _src2org
def get_archaea_kplets(): sql_cmd = """select apc.*, s1.cnt, s1.wgt from ( select ap.id ,count(*) as cnt, sum(w.weight) as wgt from archea_5plets ap inner join archea_5plets_win10 apw on ap.id = apw.kplet_id inner join archea_win10_files awf on apw.file_id = awf.id inner join sources s on awf.source_id=s.id inner join weights w on w.genome_id=s.genome_id group by ap.id ) s1 inner join archea_5plets_codes apc on s1.id=apc.id order by s1.wgt desc""" _db = DbClass() _db.cmd = sql_cmd return _db.retrieve()
def extract_baited_duplet_aggregates(duplet_type="adj"): if duplet_type == "adj": sql_cmd = """select d.id, p1.code as profile_1, p2.code as profile_2, count(*) as loci, sum(w.weight) as loci_weight, group_concat(CONVERT(df.file_id, CHAR(20)) separator ',') as files from prok1402_baited_adj_duplet d inner join prok1402_baited_adj_duplet_file df on d.id = df.kplet_id inner join cdd_profiles p1 on d.kplet_1 = p1.id inner join cdd_profiles p2 on d.kplet_2 = p2.id inner join prok1402_baited_files bf on bf.id = df.file_id inner join sources s on bf.source_id = s.id inner join genomes g on g.id = s.genome_id inner join weights w on w.genome_id = g.id group by d.id order by loci desc;""" else: sql_cmd = """select d.id, p1.code as profile_1, p2.code as profile_2, count(*) as loci, sum(w.weight) as loci_weight, group_concat(CONVERT(df.file_id, CHAR(20)) separator ',') as files from prok1402_baited_duplet d inner join prok1402_baited_duplet_file df on d.id = df.kplet_id inner join cdd_profiles p1 on d.kplet_1 = p1.id inner join cdd_profiles p2 on d.kplet_2 = p2.id inner join prok1402_baited_files bf on bf.id = df.file_id inner join sources s on bf.source_id = s.id inner join genomes g on g.id = s.genome_id inner join weights w on w.genome_id = g.id group by d.id order by loci desc;""" _db = DbClass() _db.cmd = sql_cmd rows = _db.retrieve() return rows
def get_report_kplets(id2cdd, limit_to=500): _db = DbClass() _db.cmd = """SET group_concat_max_len=15000000""" _db.execute() _db.cmd = """select cp.*, count(distinct cwf.id) as cnt, group_concat(cwf.name) as files from crispr_5plets cp inner join crispr_5plets_wgs cpw on cp.id = cpw.kplet_id inner join crispr_wgs_files cwf on cpw.file_id = cwf.id group by cp.id having count(distinct cwf.id)>1 order by cnt desc limit 0, %d""" % limit_to out_list = [] for row in _db.retrieve(): id = row[0] kplet_codes = ([id2cdd[int(_id)] for _id in row[1:6]]) if len(set(kplet_codes)) != 5: continue count = row[6] files = row[7].split(',') tmp_kplet = Kplet(id=id, codes=kplet_codes, count=count, files=files) out_list.append(tmp_kplet) return out_list
def get_multiple_kplets(): _db = DbClass() _db.cmd = "SET group_concat_max_len = 100000;" _db.execute() _db.cmd = """ select ap.id, count(*) cnt, group_concat(convert(apw.file_id, char(15))) as file_ids from bacteria_3plets ap inner join bacteria_3plets_win10 apw on ap.id = apw.kplet_id group by ap.id having count(*)>1 order by cnt desc""" return _db.retrieve()
def store_kplets(kplets, fname): _sql_kplet = """insert ignore into bacteria_4plets (kplet_1, kplet_2, kplet_3, kplet_4) values \n""" _sql_kplet_file = """insert ignore into bacteria_4plets_win10 (kplet_id, file_id) values \n""" for kplet in kplets: kplet = list(kplet) kplet.sort() kplet = tuple(kplet) _sql_kplet += ( """((select id from cdd_profiles where code='%s'),""" + """(select id from cdd_profiles where code='%s'),""" + """(select id from cdd_profiles where code='%s'),""" + """(select id from cdd_profiles where code='%s')),\n""") % kplet _sql_kplet_file += ( """((select id from bacteria_4plets where """ + """kplet_1=(select id from cdd_profiles where code='%s') """ + """and kplet_2=(select id from cdd_profiles where code='%s') """ + """and kplet_3=(select id from cdd_profiles where code='%s') """ + """and kplet_4=(select id from cdd_profiles where code='%s')),""" + """(select id from bacteria_win10_files where name='%s')),\n""" ) % (kplet + (fname, )) _sql_kplet = _sql_kplet[:-2] _sql_kplet += ';' _sql_kplet_file = _sql_kplet_file[:-2] _sql_kplet_file += ';' _db = DbClass() _db.cmd = _sql_kplet _db.execute() _db.cmd = _sql_kplet_file _db.execute()
def get_code_kplet(kplet_id, id2cdd=None): _db = DbClass() if not id2cdd: _db.cmd = """select cp1.code, cp2.code from bacteria_2plets bp inner join cdd_profiles cp1 on cp1.id = bp.kplet_1 inner join cdd_profiles cp2 on cp2.id = bp.kplet_2 where bp.id = %d""" % kplet_id retval = _db.retrieve()[0] else: _db.cmd = """select kplet_1, kplet_2 from bacteria_2plets where id = %d""" % kplet_id retval = _db.retrieve()[0] retval = set([id2cdd[id] for id in retval]) return retval
def store_kplets_pile(kplets_pile, profile2id, file2id): kplet_table_name = "crispr_2plets" kplet_file_table_name = "crispr_2plets_wgs" _sql_kplet = """insert ignore into %s (kplet_1, kplet_2) values \n""" % kplet_table_name _sql_kplet_file = """insert ignore into %s (kplet_id, file_id) values \n""" % kplet_file_table_name for (kplets, fname) in kplets_pile: for kplet in kplets: kplet = list(kplet) kplet.sort() kplet = tuple([int(profile2id[k]) for k in kplet]) _sql_kplet += """(%d, %d),\n""" % kplet _sql_kplet_file += ("""((select id from crispr_2plets where """ + """kplet_1=%d and kplet_2=%d),""" + """%d),\n""") % (kplet + (int(file2id[fname]), )) _sql_kplet = _sql_kplet[:-2] _sql_kplet += ';' _sql_kplet_file = _sql_kplet_file[:-2] _sql_kplet_file += ';' _db = DbClass() _db.cmd = _sql_kplet _db.execute() _db.commit() _db.cmd = _sql_kplet_file _db.execute() _db.commit()
def get_code_kplet(kplet_id): _sql_cmd = """select kplet_1, kplet_2, kplet_3, kplet_4, kplet_5 from archea_5plets_codes where id = %s""" % kplet_id _db = DbClass() _db.cmd = _sql_cmd return _db.retrieve()
def store_kplets_pile(kplets_pile, cdd2id, file2id): _sql_kplet = """insert ignore into bacteria_3plets (kplet_1, kplet_2, kplet_3) values \n""" _sql_kplet_file = """insert ignore into bacteria_3plets_win10 (kplet_id, file_id) values \n""" for (kplets, fname) in kplets_pile: for kplet in kplets: kplet = list(kplet) kplet.sort() kplet = tuple([int(cdd2id[k]) for k in kplet]) _sql_kplet += """(%d, %d, %d),\n""" % kplet _sql_kplet_file += ("""((select id from bacteria_3plets where """ + """kplet_1=%d and kplet_2=%d and kplet_3=%d),""" + """%d),\n""") % (kplet + (int(file2id[fname]),)) _sql_kplet = _sql_kplet[:-2] _sql_kplet += ';' _sql_kplet_file = _sql_kplet_file[:-2] _sql_kplet_file += ';' _db = DbClass() _db.cmd = _sql_kplet _db.execute() _db.commit() _db.cmd = _sql_kplet_file _db.execute() _db.commit()
def store_kplets_pile(kplets_pile, profile2id, file2id): kplet_table_name = "crispr_5plets" kplet_table_file_name = "crispr_5plets_wgs" _sql_kplet = """insert ignore into %s (kplet_1, kplet_2, kplet_3, kplet_4, kplet_5) values \n""" % kplet_table_name _sql_kplet_file = """insert ignore into %s (kplet_id, file_id) values \n""" % kplet_table_file_name for (kplets, fname) in kplets_pile: for kplet in kplets: kplet = list(kplet) kplet.sort() kplet = tuple([int(profile2id[k]) for k in kplet]) _sql_kplet += """(%d, %d, %d, %d, %d),\n""" % kplet _sql_kplet_file += ("""((select id from crispr_5plets where """ + """kplet_1=%d and kplet_2=%d and kplet_3=%d and kplet_4=%d and kplet_5=%d),""" + """%d),\n""") % (kplet + (int(file2id[fname]),)) _sql_kplet = _sql_kplet[:-2] _sql_kplet += ';' _sql_kplet_file = _sql_kplet_file[:-2] _sql_kplet_file += ';' _db = DbClass() _db.cmd = _sql_kplet _db.execute() _db.commit() _db.cmd = _sql_kplet_file _db.execute() _db.commit()
def map_file_name2id(): _db = DbClass() _db.cmd = """ select name, id from bacteria_win10_files""" rows = _db.retrieve() return {row[0]: row[1] for row in rows}
__author__ = 'hudaiber' import sys sys.path.append('../..') from lib.db import DbClass _db = DbClass() # for table_name in ['crispr_4plets_wgs', 'crispr_3plets_wgs', 'crispr_2plets_wgs']: for table_name in ['crispr_5plets_wgs']: print "Starting for table:", table_name print print "Loading the duplicates" _db.cmd = """SELECT count(*) as cnt, kplet_id, file_id FROM PatternQuest.%s group by kplet_id, file_id having count(*)>1 order by cnt desc""" % table_name table_rows = [row for row in _db.retrieve()] print("Number of duplicate entries:", len(table_rows)) fname = "duplicates_%s.txt" % table_name with open(fname, 'w') as f: [f.write("%d\t%d\t%d\n" % (row)) for row in table_rows] print "Duplicates saved in file:", fname continue sys.exit() print("Starting to remove duplicates") total_cnt = 0
def map_file_id2name(): _db = DbClass() _db.cmd = """select id, name from bacteria_win10_files""" return {str(l[0]): l[1] for l in _db.retrieve()}
def store_kplets_pile(kplets_pile, cdd2id, file2id): _sql_kplet = """insert ignore into bacteria_3plets (kplet_1, kplet_2, kplet_3) values \n""" _sql_kplet_file = """insert ignore into bacteria_3plets_win10 (kplet_id, file_id) values \n""" for (kplets, fname) in kplets_pile: for kplet in kplets: kplet = list(kplet) kplet.sort() kplet = tuple([int(cdd2id[k]) for k in kplet]) _sql_kplet += """(%d, %d, %d),\n""" % kplet _sql_kplet_file += ( """((select id from bacteria_3plets where """ + """kplet_1=%d and kplet_2=%d and kplet_3=%d),""" + """%d),\n""") % (kplet + (int(file2id[fname]), )) _sql_kplet = _sql_kplet[:-2] _sql_kplet += ';' _sql_kplet_file = _sql_kplet_file[:-2] _sql_kplet_file += ';' _db = DbClass() _db.cmd = _sql_kplet _db.execute() _db.commit() _db.cmd = _sql_kplet_file _db.execute() _db.commit()
def map_source2id(): _db = DbClass() _db.cmd = """ SELECT id, name FROM PatternQuest.sources; """ rows = _db.retrieve() return {row[1]: row[0] for row in rows}
__author__ = 'hudaiber' import sys sys.path.append('../..') from lib.db import DbClass _db = DbClass() # for table_name in ['crispr_4plets_wgs', 'crispr_3plets_wgs', 'crispr_2plets_wgs']: for table_name in ['crispr_5plets_wgs']: print "Starting for table:", table_name print print "Loading the duplicates" _db.cmd = """SELECT count(*) as cnt, kplet_id, file_id FROM PatternQuest.%s group by kplet_id, file_id having count(*)>1 order by cnt desc""" % table_name table_rows = [row for row in _db.retrieve()] print("Number of duplicate entries:", len(table_rows)) fname = "duplicates_%s.txt" % table_name with open(fname, 'w') as f: [f.write("%d\t%d\t%d\n"%(row)) for row in table_rows] print "Duplicates saved in file:", fname continue sys.exit() print("Starting to remove duplicates") total_cnt = 0
def store_kplets_pile(prefix, kplets_pile, profile2id, file2id): kplet_table_name = "%s_5plets" % prefix kplet_table_file_name = "%s_5plets_files" % prefix _sql_kplet = """insert ignore into %s (kplet_1, kplet_2, kplet_3, kplet_4, kplet_5) values \n""" % kplet_table_name _sql_kplet_file = """insert ignore into %s (kplet_id, file_id) values \n""" % kplet_table_file_name for (kplets, fname) in kplets_pile: for kplet in kplets: kplet = list(kplet) kplet.sort() kplet = tuple([int(profile2id[k]) for k in kplet]) _sql_kplet += """(%d, %d, %d, %d, %d),\n""" % kplet _sql_kplet_file += ( """((select id from %s where """ + """kplet_1=%d and kplet_2=%d and kplet_3=%d and kplet_4=%d and kplet_5=%d),""" + """%d),\n""") % ((kplet_table_name, ) + kplet + (int(file2id[fname]), )) _sql_kplet = _sql_kplet[:-2] _sql_kplet += ';' _sql_kplet_file = _sql_kplet_file[:-2] _sql_kplet_file += ';' _db = DbClass() _db.cmd = _sql_kplet _db.execute() _db.commit() _db.cmd = _sql_kplet_file _db.execute() _db.commit()
def store_kplets_pile(prefix, kplets_pile, profile2id, file2id): kplet_table_name = "%s_2plets" % prefix kplet_file_table_name = "%s_2plets_files" % prefix _sql_kplet = """insert ignore into %s (kplet_1, kplet_2) values \n""" % kplet_table_name _sql_kplet_file = """insert ignore into %s (kplet_id, file_id) values \n""" % kplet_file_table_name for (kplets, fname) in kplets_pile: for kplet in kplets: kplet = list(kplet) kplet.sort() kplet = tuple([int(profile2id[k]) for k in kplet]) _sql_kplet += """(%d, %d),\n""" % kplet _sql_kplet_file += ("""((select id from %s where """ + """kplet_1=%d and kplet_2=%d),""" + """%d),\n""") % ((kplet_table_name,) + kplet + (int(file2id[fname]),)) _sql_kplet = _sql_kplet[:-2] _sql_kplet += ';' _sql_kplet_file = _sql_kplet_file[:-2] _sql_kplet_file += ';' _db = DbClass() _db.cmd = _sql_kplet _db.execute() _db.commit() _db.cmd = _sql_kplet_file _db.execute() _db.commit()