コード例 #1
0
ファイル: ecs.py プロジェクト: felixlyd/py-kallisto
def fix_contigs(k):
    perfect_contig_num = 0
    for contig_id in range(len(contig_find_trans_list)):
        all_ok = 1
        # 如前文所言,没有找错的
        n_of_kmer_in_curr_contig = contig_list[contig_id].n_of_kmer
        for each_trans_info in contig_find_trans_list[contig_id]:
            if each_trans_info.start_in_tran != 0 or each_trans_info.stop_in_tran != n_of_kmer_in_curr_contig:
                all_ok = 0
                # 说明这里有找错的
        if all_ok:
            perfect_contig_num = perfect_contig_num + 1
        else:
            tmp_list = []
            for each_trans_info in contig_find_trans_list[contig_id]:
                tmp_list.append(each_trans_info.start_in_tran)
                tmp_list.append(each_trans_info.stop_in_tran)
            break_points = sorted(list(set(tmp_list)))
            old_contig_seq = contig_list[contig_id].seq
            old_trans_info = contig_find_trans_list[contig_id]
            new_contig = Contig()
            for j in range(len(break_points) - 1):
                new_contig.seq = old_contig_seq[
                    break_points[j]:break_points[j + 1] - break_points[j] + k -
                    1]
                new_contig.n_of_kmer = break_points[j + 1] - break_points[j]
                if j == 0:
                    new_contig.id = contig_id
                    contig_list[contig_id] = new_contig
                else:
                    new_contig.id = len(contig_list)
                    contig_list.append(new_contig)
                    ecs_list.append(-1)
                for m in range(len(new_contig.seq) - k + 1):
                    curr_kmer_in_contig = new_contig.seq[m:m + k]
                    curr_rep = kmer_rep(curr_kmer_in_contig)
                    if curr_rep in kmer_str_dict:
                        kmer_str_dict[curr_rep].contig_id = new_contig.id
                        kmer_str_dict[curr_rep].pos_in_contig = m
                        kmer_str_dict[
                            curr_rep].n_of_kmer_in_contig = new_contig.n_of_kmer
                    else:
                        program_stop("ecs.py-4")
                new_trans_info_list = []
                for each_trans_info in old_trans_info:
                    if not (each_trans_info.start_in_tran >=
                            break_points[j + 1] or
                            each_trans_info.stop_in_tran <= break_points[j]):
                        new_trans_info = ContigFindTrans()
                        new_trans_info.sense_in_tran = each_trans_info.sense_in_tran
                        new_trans_info.tran_id = each_trans_info.tran_id
                        new_trans_info.start_in_tran = 0
                        new_trans_info.stop_in_tran = new_contig.n_of_kmer
                        new_trans_info_list.append(new_trans_info)
                if j == 0:
                    contig_find_trans_list[contig_id] = new_trans_info_list
                else:
                    contig_find_trans_list.append(new_trans_info_list)
コード例 #2
0
ファイル: em.py プロジェクト: felixlyd/py-kallisto
def write_em_tsv(file_name, alpha_list, eff_lens):
    tran_name = transcript_info.tran_name
    tran_len = transcript_info.tran_len
    try:
        fp = open(file_name, 'w')
    except IOError:
        print("cannot open", file_name)
        program_stop("em.py")
    fp.write("tran_name\ttran_len\teff_len\tcounts" + "\n")
    for ec in range(len(alpha_list)):
        fp.write(tran_name[ec] + "\t" + str(tran_len[ec]) + "\t" +
                 str(round(eff_lens[ec], 4)) + "\t" +
                 str(round(alpha_list[ec], 8)) + "\n")
    fp.close()
コード例 #3
0
ファイル: read_tran.py プロジェクト: felixlyd/py-kallisto
def read_tran_files(file_name):
    seqs = []
    try:
        if ".gz" in file_name:
            fp = gzip.open(file_name, 'rt')
        else:
            fp = open(file_name, 'r')
    except IOError:
        print(file_name, "is not exist!")
        program_stop("read_tran.py")
    for seq_record in SeqIO.parse(fp, "fasta"):
        transcript_info.tran_name.append(seq_record.id)
        transcript_info.tran_len.append(len(seq_record.seq))
        seq = replace_base(seq_record.seq)
        seq = check_poly_a_tail(seq)
        seqs.append(seq)
    transcript_info.tran_num = len(transcript_info.tran_name)
    fp.close()
    return seqs
コード例 #4
0
ファイル: quant.py プロジェクト: felixlyd/py-kallisto
def start(args):
    if not os.path.exists(args.index_path):
        print("the index_file is not exist.please check.")
        program_stop("quant.py")
    else:
        begin = datetime.datetime.now()
        load_k = load_idx(args.index_path)
        end = datetime.datetime.now()
        print("load_idx:", end - begin)
        if args.kmer_len != load_k:
            print("the k value is inconsistent.")
            program_stop("quant.py")
    def_args.k = args.kmer_len
    def_args.index = args.index_path
    if args.single_mode:
        def_args.len_frag = args.fragment_len
        def_args.sd = args.sd_len
        def_args.fq_file_single = args.fa_or_fq_file
        if not os.path.exists(def_args.fq_file_single):
            print("fq_or_fa_files is required.please check.")
            program_stop("quant.py")
    else:
        def_args.fq_file_1 = args.fa_or_fq_files_1
        def_args.fq_file_2 = args.fa_or_fq_files_2
        if not (os.path.exists(def_args.fq_file_1)
                or os.path.exists(def_args.fq_file_2)):
            print("fq_or_fa_files is required.please check.")
            program_stop("quant.py")
    output_path_list = os.path.split(args.output_path)
    if len(output_path_list[0]) == 0:
        output_path = os.path.join(os.getcwd(), output_path_list[1])
    else:
        output_path = args.output_path
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    def_args.output = output_path
    def_args.threads = args.threads
    def_args.single_mode = args.single_mode
    read_fq_fa_files(def_args)
コード例 #5
0
ファイル: index.py プロジェクト: felixlyd/py-kallisto
def start(args):
    if args.kmer_len % 2 == 0:
        print("k value must be odd.")
        program_stop("index.py")
    if args.input_fna_path == "null":
        print("the input_fna_path is required.")
        program_stop("index.py")
    else:
        if not os.path.isfile(args.input_fna_path):
            print("the input_fna_path is wrong.please check.")
            program_stop("index.py")
    index_out_dir = os.path.split(args.index_path)
    if len(index_out_dir[0]) == 0:
        index_path = os.path.join(os.getcwd(), index_out_dir[1])
    else:
        if not os.path.exists(index_out_dir[0]):
            print("the index_out_DIR is not exist.please check.")
            program_stop("index.py")
        else:
            index_path = args.index_path
    def_args.k = args.kmer_len
    def_args.index = index_path
    def_args.fna_file = args.input_fna_path
    begin = datetime.datetime.now()
    seqs = read_tran_files(def_args.fna_file)
    end = datetime.datetime.now()
    print("read_tran:", end - begin)
    begin = datetime.datetime.now()
    build_dbg(seqs, def_args.k)
    end = datetime.datetime.now()
    print("build_contig:", end - begin)
    begin = datetime.datetime.now()
    build_ecs(seqs, def_args.k)
    end = datetime.datetime.now()
    print("build_ecs:", end - begin)
    begin = datetime.datetime.now()
    write_idx(def_args.index, def_args.k)
    end = datetime.datetime.now()
    print("write_idx:", end - begin)
コード例 #6
0
ファイル: process_read.py プロジェクト: felixlyd/py-kallisto
def read_fq_fa_files(args):
    global k, f_len_goal
    k = args.k
    pool = Pool(processes=args.threads)
    begin = datetime.datetime.now()
    if args.single_mode:
        file_name = args.fq_file_single
        file_suffix = os.path.splitext(file_name)[1]
        try:
            if ".gz" == file_suffix:
                fp = gzip.open(file_name, 'rt')
                file_suffix_suffix = os.path.splitext(os.path.splitext(file_name)[0])[1]
            else:
                fp = open(file_name, 'r')
                file_suffix_suffix = file_suffix
        except IOError:
            print(file_name, "is not exist!")
            program_stop("process_read.py")
        if ".fasta" == file_suffix_suffix or ".fa" == file_suffix_suffix:
            res = pool.map(do_something_with_record, to_fasta(fp))
            pool.close()
            pool.join()
        elif ".fastq" == file_suffix_suffix or ".fq" == file_suffix_suffix:
            res = pool.map(do_something_with_record, convert_to_fasta(fp))
            pool.close()
            pool.join()
        else:
            print("cannot find .fa(.fasta) or .fq(.fastq).please check.")
            program_stop("process_read.py")
        print(len(res))
        print("please waite single mode.")
    else:
        # paired_mode
        file_name_1 = args.fq_file_1
        file_suffix_1 = os.path.splitext(file_name_1)[1]
        file_name_2 = args.fq_file_2
        file_suffix_2 = os.path.splitext(file_name_2)[1]
        try:
            if ".gz" in file_name_1:
                fp1 = gzip.open(file_name_1, 'rt')
                file_suffix_suffix_1 = os.path.splitext(os.path.splitext(file_name_1)[0])[1]
            else:
                fp1 = open(file_name_1, 'r')
                file_suffix_suffix_1 = file_suffix_1
            if ".gz" in file_name_2:
                fp2 = gzip.open(file_name_2, 'rt')
                file_suffix_suffix_2 = os.path.splitext(os.path.splitext(file_name_2)[0])[1]
            else:
                fp2 = open(file_name_2, 'r')
                file_suffix_suffix_2 = file_suffix_2
        except IOError:
            print(file_name_1, "or", file_name_2, "is not exist!")
            program_stop("process_read.py")
        if file_suffix_suffix_1 != file_suffix_suffix_2:
            print("suffix is not inconsistent.please check.")
            program_stop("process_read.py")
        else:
            if ".fasta" == file_suffix_suffix_1 or ".fa" == file_suffix_suffix_1:
                res = pool.starmap(do_something_with_record_paired, zip(to_fasta(fp1), to_fasta(fp2)))
                pool.close()
                pool.join()
            elif ".fastq" == file_suffix_suffix_1 or ".fq" == file_suffix_suffix_1:
                res = pool.starmap(do_something_with_record_paired, zip(convert_to_fasta(fp1), convert_to_fasta(fp2)))
                pool.close()
                pool.join()
            else:
                print("cannot find .fa(.fasta) or .fq(.fastq).please check.")
                program_stop("process_read.py")
            fp1.close()
            fp2.close()
            end = datetime.datetime.now()
            print("process_reads:", end - begin)
            begin = datetime.datetime.now()
            for each_u, each_tl in res:
                ec = find_ec(each_u)
                if ec == -1 or ec >= len(counts):
                    new_ecs.append(each_u)
                else:
                    counts[ec] = counts[ec] + 1
                if f_len_goal > 0 and 0 <= ec < transcript_info.tran_num:
                    if 0 < each_tl < len(f_lens):
                        f_lens[each_tl] = f_lens[each_tl] + 1
                        f_len_goal = f_len_goal - 1
            res.clear()
            print("counts:", counts)
            end = datetime.datetime.now()
            print("match_ecs:", end - begin)
            begin = datetime.datetime.now()
            mean_f_lens = compute_mean_flg_lens(f_lens)
            f_lens.clear()
            tran_lens_estimated = get_each_tran_len(mean_f_lens)
            mean_f_lens.clear()
            alpha_list, eff_lens = em_run(counts, tran_lens_estimated)
            output_file = os.path.join(args.output, "dualisto_quant.tsv")
            write_em_tsv(output_file, alpha_list, eff_lens)
            end = datetime.datetime.now()
            print("em_run:", end - begin)
コード例 #7
0
ファイル: ecs.py プロジェクト: felixlyd/py-kallisto
def build_ecs(seqs, k):
    contig_find_trans_dict = {}
    for i in range(len(contig_list)):
        contig_find_trans_dict[i] = []
    # 遍历基因序列
    for i in range(len(seqs)):
        each_ec = [i]
        ec_map.append(each_ec)
        ec_inv_dict[i] = each_ec
        seq = seqs[i]
        n_of_kmers_of_seq = len(seq) - k + 1
        j = 0
        while j < n_of_kmers_of_seq:
            curr_kmer = seq[j:j + k]
            curr_kmer = str(curr_kmer)
            curr_rep = kmer_rep(curr_kmer)
            curr_twin = kmer_twin(curr_kmer)
            if curr_rep in kmer_str_dict:
                curr_contig_id = kmer_str_dict[curr_rep].contig_id
                curr_contig = contig_list[curr_contig_id]

                kmers_in_contig = []
                # 遍历curr_contig里面的每一个kmer
                for m in range(len(curr_contig.seq) - k + 1):
                    curr_kmer_in_contig = curr_contig.seq[m:m + k]
                    kmers_in_contig.append(curr_kmer_in_contig)
                kmers_in_contig_len = len(kmers_in_contig)

                contig_find_trans = ContigFindTrans()
                contig_find_trans.tran_id = i
                if (curr_kmer == curr_rep
                    ) == kmer_str_dict[curr_rep].sense_in_contig:
                    contig_find_trans.sense_in_tran = 1
                    contig_find_trans.start_in_tran = kmers_in_contig.index(
                        curr_kmer)
                    if kmers_in_contig_len - contig_find_trans.start_in_tran > n_of_kmers_of_seq - j:
                        # 说明这个contig不来自这个seq
                        contig_find_trans.stop_in_tran = contig_find_trans.start_in_tran + n_of_kmers_of_seq - j
                        # [start,stop)这里这个长度肯定是错误的,正好后面要用到
                        j = n_of_kmers_of_seq
                        # 到末尾,也就是结束
                    else:
                        contig_find_trans.stop_in_tran = kmers_in_contig_len
                        # contig属于这个seq,而且长度就是contig的长度
                        j = j + contig_find_trans.stop_in_tran - contig_find_trans.start_in_tran
                        # 下一个从紧接着当前contig的下一个contig开始
                else:
                    contig_find_trans.sense_in_tran = 0
                    contig_find_trans.stop_in_tran = kmers_in_contig.index(
                        curr_twin) + 1
                    # 因为[start,stop) stop代表的是长度,取不到stop,实际取的是stop-1
                    if contig_find_trans.stop_in_tran > n_of_kmers_of_seq - j:
                        contig_find_trans.start_in_tran = contig_find_trans.stop_in_tran - n_of_kmers_of_seq + j
                        j = n_of_kmers_of_seq
                    else:
                        contig_find_trans.start_in_tran = 0
                        j = j + contig_find_trans.stop_in_tran - contig_find_trans.start_in_tran
                contig_find_trans_dict[curr_contig_id].append(
                    contig_find_trans)
            else:
                print(curr_rep)
                print(j)
                program_stop("ecs.py-2")
    for i in range(len(contig_list)):
        contig_find_trans_list.append(contig_find_trans_dict[i])
    fix_contigs(k)
    for curr_contig_id in range(len(contig_find_trans_list)):
        curr_contig_list = []
        for curr_trans_info in contig_find_trans_list[curr_contig_id]:
            curr_contig_list.append(curr_trans_info.tran_id)
        curr_contig_list = sorted(list(set(curr_contig_list)))
        ec = -1
        if curr_contig_list in ec_inv_dict.values():
            for (key, value) in ec_inv_dict.items():
                if value == curr_contig_list:
                    ec = key
        else:
            ec = len(ec_inv_dict)
            ec_inv_dict[ec] = curr_contig_list
            ec_map.append(curr_contig_list)
        ecs_list[curr_contig_id] = ec
        contig_list[curr_contig_id].ecs_id = ec
    for i in range(len(seqs)):
        seq = seqs[i]
        n_of_kmers_of_seq = len(seq) - k + 1
        tmp_str = ""
        j = 0
        while j < n_of_kmers_of_seq:
            curr_kmer = seq[j:j + k]
            curr_kmer = str(curr_kmer)
            curr_rep = kmer_rep(curr_kmer)
            if curr_rep in kmer_str_dict:
                curr_kmer_info = kmer_str_dict[curr_rep]
                contig_include_trans = ContigIncludeTrans()
                contig_include_trans.tran_id = i
                contig_include_trans.pos_in_tran = j
                contig_include_trans.sense_in_tran = int(((
                    curr_kmer == curr_rep) == curr_kmer_info.sense_in_contig))
                contig_list[curr_kmer_info.contig_id].include_trans.append(
                    contig_include_trans)
                j = j + contig_list[curr_kmer_info.contig_id].n_of_kmer
                # if contig_include_trans.sense_in_tran:
                #     if contig_include_trans.pos_in_tran == 0:
                #         tmp_str = tmp_str + contig_list[curr_kmer_info.contig_id].seq
                #     else:
                #         tmp_str = tmp_str + contig_list[curr_kmer_info.contig_id].seq[k - 1]
                # else:
                #     new_tm_str = str_pair(contig_list[curr_kmer_info.contig_id].seq)
                #     if contig_include_trans.pos_in_tran == 0:
                #         tmp_str = tmp_str + new_tm_str
                #     else:
                #         tmp_str = tmp_str + new_tm_str[k - 1]
            else:
                program_stop("ecs.py-3")
コード例 #8
0
def load_idx(filename):
    try:
        fp = open(filename, 'r')
    except IOError:
        print(filename, "is not exist!")
        program_stop("index_write_load.py")
    # 1.read version
    read_version = fp.readline().strip()
    if read_version != kallisto_index_version:
        program_stop("index_write_load.py")
    # 2.read k
    k = int(fp.readline().strip())
    # 3.read num of trans
    transcript_info.tran_num = int(fp.readline().strip())
    transcript_info.tran_len = [0] * transcript_info.tran_num
    for i in range(transcript_info.tran_num):
        transcript_info.tran_len[i] = int(fp.readline().strip())
    # 4.read kmer_str_dict and kmer_info_list
    kmer_str_list_len = int(fp.readline().strip())
    print("kmer_num:", kmer_str_list_len)
    for i in range(kmer_str_list_len):
        kmer_str_list.append("")
        kmer_info_list.append("")
    i = 0
    while i < kmer_str_list_len:
        kmer_str_list[i] = fp.readline().strip()
        kmer_info = KmerInfo()
        curr_kmer_info = fp.readline().strip().split(",")
        kmer_info.contig_id = int(curr_kmer_info[0])
        kmer_info.pos_in_contig = int(curr_kmer_info[1])
        kmer_info.n_of_kmer_in_contig = int(curr_kmer_info[2])
        kmer_info.sense_in_contig = int(curr_kmer_info[3])
        kmer_info_list[i] = kmer_info
        i = i + 1
    for i in range(kmer_str_list_len):
        kmer_str_dict[kmer_str_list[i]] = kmer_info_list[i]
    kmer_str_list.clear()
    kmer_info_list.clear()
    # 5.read num of ecs
    ec_map_len = int(fp.readline().strip())
    print("ecs_num:", ec_map_len)
    for i in range(ec_map_len):
        ec_map.append([])
        ec_inv_dict[i] = []
        counts.append(0)
    # 6.read each ecs
    i = 0
    while i < ec_map_len:
        # 6.1 read num of each ecs
        each_ecs_len = int(fp.readline().strip())
        for j in range(each_ecs_len):
            ec_map[i].append(0)
        # 6.2 read each trans in each ecs
        j = 0
        while j < each_ecs_len:
            ec_map[i][j] = int(fp.readline().strip())
            j = j + 1
        ec_inv_dict[i] = ec_map[i]
        i = i + 1
    # 7.read trans_names
    transcript_info.tran_name = [""] * transcript_info.tran_num
    for i in range(transcript_info.tran_num):
        transcript_info.tran_name[i] = fp.readline().strip()
    # 8.read contigs
    contig_list_len = int(fp.readline().strip())
    print("contig_num:", contig_list_len)
    for i in range(contig_list_len):
        contig_list.append(0)
        ecs_list.append(0)
    i = 0
    while i < contig_list_len:
        load_contig = Contig()
        curr_contig_info = fp.readline().strip().split(",")
        load_contig.id = int(curr_contig_info[0])
        load_contig.n_of_kmer = int(curr_contig_info[1])
        load_contig.seq = curr_contig_info[2]
        # 8.1 read contig_to_trans_info
        curr_contig_include_trans_len = int(fp.readline().strip())
        load_contig.include_trans = [0] * curr_contig_include_trans_len
        j = 0
        while j < curr_contig_include_trans_len:
            load_include_trans = ContigIncludeTrans()
            curr_include_trans = fp.readline().strip().split(",")
            load_include_trans.tran_id = int(curr_include_trans[0])
            load_include_trans.pos_in_tran = int(curr_include_trans[1])
            load_include_trans.sense_in_tran = int(curr_include_trans[2])
            j = j + 1
        i = i + 1
    # 9.read ecs info
    i = 0
    while i < contig_list_len:
        ecs_list[i] = int(fp.readline().strip())
        i = i + 1
    fp.close()
    return k
コード例 #9
0
def write_idx(filename, k):
    try:
        fp = open(filename, 'w')
    except IOError:
        print(filename, "is not exist!")
        program_stop("index_write_load.py")
    # 1.write version
    fp.write(version + "\n")
    # 2.write k
    fp.write(str(k) + "\n")
    # 3.write num of trans
    fp.write(str(transcript_info.tran_num) + "\n")
    for i in range(transcript_info.tran_num):
        fp.write(str(transcript_info.tran_len[i]) + "\n")
    # 4.write kmer_str_dict and kmer_info_list
    for key in kmer_str_dict:
        kmer_str_list.append(key)
        kmer_info_list.append(kmer_str_dict[key])
    i = 0
    fp.write(str(len(kmer_str_list)) + "\n")
    while i < len(kmer_str_list):
        fp.write(kmer_str_list[i] + "\n")
        fp.write(
            str(kmer_info_list[i].contig_id) + "," +
            str(kmer_info_list[i].pos_in_contig) + "," +
            str(kmer_info_list[i].n_of_kmer_in_contig) + "," +
            str(kmer_info_list[i].sense_in_contig) + "\n")
        i = i + 1
    # 5.write num of ecs
    fp.write(str(len(ec_map)) + "\n")
    # 6.write each ecs
    i = 0
    while i < len(ec_map):
        # 6.1 write num of each ecs
        fp.write(str(len(ec_map[i])) + "\n")
        # 6.2 write each trans in each ecs
        j = 0
        while j < len(ec_map[i]):
            fp.write(str(ec_map[i][j]) + "\n")
            j = j + 1
        i = i + 1
    # 7.write trans_names
    for i in range(transcript_info.tran_num):
        fp.write(transcript_info.tran_name[i] + "\n")
    # 8.write contigs
    fp.write(str(len(contig_list)) + "\n")
    i = 0
    while i < len(contig_list):
        fp.write(
            str(contig_list[i].id) + "," + str(contig_list[i].n_of_kmer) +
            "," + str(contig_list[i].seq) + "\n")
        # 8.1 write contig_to_trans_info
        fp.write(str(len(contig_list[i].include_trans)) + "\n")
        j = 0
        while j < len(contig_list[i].include_trans):
            curr_include_trans = contig_list[i].include_trans[j]
            fp.write(
                str(curr_include_trans.tran_id) + "," +
                str(curr_include_trans.pos_in_tran) + "," +
                str(curr_include_trans.sense_in_tran) + "\n")
            j = j + 1
        i = i + 1
    # 9.write ecs info
    i = 0
    while i < len(ecs_list):
        fp.write(str(ecs_list[i]) + "\n")
        i = i + 1
    fp.flush()
    fp.close()
コード例 #10
0
ファイル: contig.py プロジェクト: felixlyd/py-kallisto
def build_dbg(seqs, k):
    global fw_step_kmer
    tmp_kmer_map = set()
    # 把序列打断,得到kmer的集合
    for seq in seqs:
        for i in range(len(seq) - k + 1):
            curr_kmer = seq[i:i + k]
            curr_kmer = str(curr_kmer)
            curr_rep = kmer_rep(curr_kmer)
            tmp_kmer_map.add(curr_rep)

    # 给每一个kmer带上一个可以存储它信息的类
    for each_kmer in tmp_kmer_map:
        kmer_info = KmerInfo()
        kmer_str_dict[each_kmer] = kmer_info

    # 把kmer连起来,找contig
    for kmer_key in kmer_str_dict:
        curr_kmer = kmer_key
        curr_kmer_info = kmer_str_dict[kmer_key]
        if curr_kmer_info.contig_id == -1:
            self_loop = 0
            curr_twin = kmer_twin(curr_kmer)
            fw_list = [curr_kmer]
            last_kmer = curr_kmer
            fw_step_kmer = curr_kmer
            while fwstep(fw_step_kmer):
                if fw_step_kmer == curr_kmer:
                    # 该if判断是否成环
                    self_loop = 1
                    break
                    # pass
                    # example(3,"ACTGAC") (5,"ACCAACCA") (5,"TCTGTCTG") (5,"AACAAACA") (5,"CACACA") (5,"ACACAC")
                    # print("begin:",begin,"fw:", fw, "t_kmer", self.tmp_kmer, "seqs:", self.seqs)
                elif fw_step_kmer == curr_twin:
                    self_loop = (len(fw_list) > 1)
                    break
                elif fw_step_kmer == kmer_twin(last_kmer):
                    break
                fw_list.append(fw_step_kmer)
                last_kmer = fw_step_kmer
            fw_step_kmer = curr_twin
            bw_list = []
            first_kmer = curr_twin
            if not self_loop:
                while fwstep(fw_step_kmer):
                    if fw_step_kmer == curr_twin:
                        break
                    elif fw_step_kmer == curr_kmer:
                        break
                    elif fw_step_kmer == kmer_twin(first_kmer):
                        break
                    bw_list.append(fw_step_kmer)
                    first_kmer = fw_step_kmer
            curr_kmer_list = []
            for bw in reversed(bw_list):
                curr_kmer_list.append(kmer_twin(bw))
            for fw in fw_list:
                curr_kmer_list.append(fw)
            contig_list_len = len(curr_kmer_list)
            curr_contig_id = len(contig_list)
            contig = Contig()
            contig.id = curr_contig_id
            contig.n_of_kmer = contig_list_len
            # 存储kmer的信息
            for j in range(contig_list_len):
                each_kmer = curr_kmer_list[j]
                tmp_rep = kmer_rep(each_kmer)
                if tmp_rep in kmer_str_dict:
                    kmer_str_dict[tmp_rep].contig_id = curr_contig_id
                    kmer_str_dict[tmp_rep].pos_in_contig = j
                    kmer_str_dict[
                        tmp_rep].n_of_kmer_in_contig = contig_list_len
                    if tmp_rep == each_kmer:
                        kmer_str_dict[tmp_rep].sense_in_contig = 1
                    else:
                        kmer_str_dict[tmp_rep].sense_in_contig = 0
                else:
                    program_stop("contig.py")
                if j == 0:
                    contig.seq = each_kmer
                elif j > 0:
                    contig.seq = contig.seq + each_kmer[-1]
            contig_list.append(contig)
            ecs_list.append(-1)