예제 #1
0
def store_kplets_pile(kplets_pile, cdd2id, file2id):

    _sql_kplet = """insert ignore into bacteria_3plets (kplet_1, kplet_2, kplet_3) values \n"""

    _sql_kplet_file = """insert ignore into bacteria_3plets_win10 (kplet_id, file_id) values \n"""

    for (kplets, fname) in kplets_pile:

        for kplet in kplets:
            kplet = list(kplet)
            kplet.sort()
            kplet = tuple([int(cdd2id[k]) for k in kplet])

            _sql_kplet += """(%d, %d, %d),\n""" % kplet

            _sql_kplet_file += ("""((select id from bacteria_3plets where """ +
                                """kplet_1=%d and kplet_2=%d and kplet_3=%d),""" +
                                """%d),\n""") % (kplet + (int(file2id[fname]),))

    _sql_kplet = _sql_kplet[:-2]
    _sql_kplet += ';'

    _sql_kplet_file = _sql_kplet_file[:-2]
    _sql_kplet_file += ';'

    _db = DbClass()

    _db.cmd = _sql_kplet
    _db.execute()
    _db.commit()

    _db.cmd = _sql_kplet_file
    _db.execute()
    _db.commit()
예제 #2
0
파일: triplets.py 프로젝트: okurman/CRISPR
def store_kplets_pile(kplets_pile, cdd2id, file2id):

    _sql_kplet = """insert ignore into bacteria_3plets (kplet_1, kplet_2, kplet_3) values \n"""

    _sql_kplet_file = """insert ignore into bacteria_3plets_win10 (kplet_id, file_id) values \n"""

    for (kplets, fname) in kplets_pile:

        for kplet in kplets:
            kplet = list(kplet)
            kplet.sort()
            kplet = tuple([int(cdd2id[k]) for k in kplet])

            _sql_kplet += """(%d, %d, %d),\n""" % kplet

            _sql_kplet_file += (
                """((select id from bacteria_3plets where """ +
                """kplet_1=%d and kplet_2=%d and kplet_3=%d),""" +
                """%d),\n""") % (kplet + (int(file2id[fname]), ))

    _sql_kplet = _sql_kplet[:-2]
    _sql_kplet += ';'

    _sql_kplet_file = _sql_kplet_file[:-2]
    _sql_kplet_file += ';'

    _db = DbClass()

    _db.cmd = _sql_kplet
    _db.execute()
    _db.commit()

    _db.cmd = _sql_kplet_file
    _db.execute()
    _db.commit()
예제 #3
0
def insert_source_duplets(kplets_pile, profile2id, source_id):

    sql_insert_kplet = """insert ignore into prok1402_all_adj_duplet (kplet_1, kplet_2, multidomain) values \n"""
    sql_insert_kplet_file = """insert into prok1402_all_adj_duplet_source (kplet_id, source_id) values \n"""

    for (duplet, fname, multidomain) in kplets_pile:

        terms = sorted(list(duplet))
        terms = [profile2id[_] for _ in terms]

        sql_insert_kplet += """(%s, %s, %d),\n""" % (terms[0], terms[1], multidomain)

        sql_insert_kplet_file += """((select id from prok1402_all_adj_duplet where """ + \
                                  """kplet_1=%s and kplet_2=%s and multidomain=%d), %d),\n""" % \
                                  (terms[0], terms[1], multidomain, source_id)

    sql_insert_kplet_file = sql_insert_kplet_file[:-2]
    sql_insert_kplet_file += ";"

    sql_insert_kplet = sql_insert_kplet[:-2]
    sql_insert_kplet += ";"

    _db = DbClass()

    _db.cmd = sql_insert_kplet
    _db.execute()

    _db.cmd = sql_insert_kplet_file
    _db.execute()

    _db.commit()
예제 #4
0
def insert_duplets(kplets_pile, profile2id, file2id):

    sql_insert_kplet = """insert ignore into prok1402_baited_duplet (kplet_1, kplet_2) values \n"""
    sql_insert_kplet_file = """insert ignore into prok1402_baited_duplet_file (kplet_id, file_id) values \n"""

    for (duplet, fname) in kplets_pile:

        terms = sorted(list(duplet))
        terms = [profile2id[_] for _ in terms]

        fname = os.path.basename(fname)
        file_id = file2id[fname]

        sql_insert_kplet += """(%s, %s),\n""" % (terms[0], terms[1])

        sql_insert_kplet_file += """((select id from prok1402_baited_duplet where """ + \
                                  """kplet_1=%s and kplet_2=%s), %d),\n""" % (terms[0], terms[1] ,file_id)

    sql_insert_kplet_file = sql_insert_kplet_file[:-2]
    sql_insert_kplet_file += ";"

    sql_insert_kplet = sql_insert_kplet[:-2]
    sql_insert_kplet += ";"

    _db = DbClass()

    _db.cmd = sql_insert_kplet
    _db.execute()

    _db.cmd = sql_insert_kplet_file
    _db.execute()

    _db.commit()
예제 #5
0
def store_kplets_pile(prefix, kplets_pile, profile2id, file2id):

    kplet_table_name = "%s_3plets" % prefix
    kplet_table_file_name = "%s_3plets_files" % prefix

    _sql_kplet = """insert ignore into %s (kplet_1, kplet_2, kplet_3) values \n""" % kplet_table_name
    _sql_kplet_file = """insert ignore into %s (kplet_id, file_id) values \n""" % kplet_table_file_name

    for (kplets, fname) in kplets_pile:

        for kplet in kplets:
            kplet = list(kplet)
            kplet.sort()

            kplet = tuple([int(profile2id[k]) for k in kplet])

            _sql_kplet += """(%d, %d, %d),\n""" % kplet

            _sql_kplet_file += (
                """((select id from %s where """ +
                """kplet_1=%d and kplet_2=%d and kplet_3=%d),""" +
                """%d),\n""") % ((kplet_table_name, ) + kplet +
                                 (int(file2id[fname]), ))

    _sql_kplet = _sql_kplet[:-2]
    _sql_kplet += ';'

    _sql_kplet_file = _sql_kplet_file[:-2]
    _sql_kplet_file += ';'

    _db = DbClass()

    _db.cmd = _sql_kplet
    _db.execute()
    _db.commit()

    _db.cmd = _sql_kplet_file
    _db.execute()
    _db.commit()
예제 #6
0
def store_kplets_pile(prefix, kplets_pile, profile2id, file2id):

    kplet_table_name = "%s_4plets" % prefix
    kplet_table_file_name = "%s_4plets_files" % prefix

    _sql_kplet = """insert ignore into %s (kplet_1, kplet_2, kplet_3, kplet_4) values \n""" % kplet_table_name
    _sql_kplet_file = """insert ignore into %s (kplet_id, file_id) values \n""" % kplet_table_file_name

    for (kplets, fname) in kplets_pile:

        for kplet in kplets:
            kplet = list(kplet)
            kplet.sort()

            kplet = tuple([int(profile2id[k]) for k in kplet])

            _sql_kplet += """(%d, %d, %d, %d),\n""" % kplet

            _sql_kplet_file += ("""((select id from %s where """ +
                                """kplet_1=%d and kplet_2=%d and kplet_3=%d and kplet_4=%d),""" +
                                """%d),\n""") % ((kplet_table_name,) + kplet + (int(file2id[fname]),))

    _sql_kplet = _sql_kplet[:-2]
    _sql_kplet += ';'

    _sql_kplet_file = _sql_kplet_file[:-2]
    _sql_kplet_file += ';'

    _db = DbClass()

    _db.cmd = _sql_kplet
    _db.execute()
    _db.commit()

    _db.cmd = _sql_kplet_file
    _db.execute()
    _db.commit()
    print "Duplicates saved in file:", fname
    continue
    sys.exit()
    print("Starting to remove duplicates")

    total_cnt = 0
    chunk_size = 10000
    chunks = [table_rows[i: i+chunk_size] for i in range(0, len(table_rows), chunk_size)]

    for row in table_rows:

        (_cnt, _kplet_id, _file_id) = row

        assert _cnt > 1

        _db.cmd = """delete from %s where kplet_id=%d and file_id=%d""" % (table_name, _kplet_id, _file_id)
        _db.execute()
        _db.commit()

        _db.cmd = """insert into %s(kplet_id, file_id) values (%d, %d)""" % (table_name, _kplet_id, _file_id)
        _db.execute()
        _db.commit()

        total_cnt += 1
        if total_cnt % 10000 ==0:
            print total_cnt

    print total_cnt
    print "Finished:", table_name
    print
    print
예제 #8
0
    total_cnt = 0
    chunk_size = 10000
    chunks = [
        table_rows[i:i + chunk_size]
        for i in range(0, len(table_rows), chunk_size)
    ]

    for row in table_rows:

        (_cnt, _kplet_id, _file_id) = row

        assert _cnt > 1

        _db.cmd = """delete from %s where kplet_id=%d and file_id=%d""" % (
            table_name, _kplet_id, _file_id)
        _db.execute()
        _db.commit()

        _db.cmd = """insert into %s(kplet_id, file_id) values (%d, %d)""" % (
            table_name, _kplet_id, _file_id)
        _db.execute()
        _db.commit()

        total_cnt += 1
        if total_cnt % 10000 == 0:
            print total_cnt

    print total_cnt
    print "Finished:", table_name
    print
    print