Beispiel #1
0
def krill_open(q_min, q_percent, p64, in1, in2, pe_1, pe_2, se_1, se_2, f_1,
               f_2):
    '''
    quality filter test for single and paired-end reads
    '''
    compressed = gzip_test(in1)
    if compressed:
        pe_1, _ = os.path.splitext(pe_1)
        pe_2, _ = os.path.splitext(pe_2)
        se_1, _ = os.path.splitext(se_1)
        se_2, _ = os.path.splitext(se_2)
        f_1, _ = os.path.splitext(f_1)
        f_2, _ = os.path.splitext(f_2)
        with gzip.open(in1, 'rt') as f1, gzip.open(in2, 'rt') as f2,\
                open(pe_1, "w") as pe_o1,\
                open(pe_2, "w") as pe_o2,\
                open(se_1, "w") as se_o1,\
                open(se_2, "w") as se_o2,\
                open(f_1, "w") as fail_o1,\
                open(f_2, "w") as fail_o2:
            krill(q_min, q_percent, p64, f1, f2, pe_o1, pe_o2, se_o1, se_o2,
                  fail_o1, fail_o2)
    else:
        with open(in1) as f1, open(in2) as f2,\
                open(pe_1, "w") as pe_o1,\
                open(pe_2, "w") as pe_o2,\
                open(se_1, "w") as se_o1,\
                open(se_2, "w") as se_o2,\
                open(f_1, "w") as fail_o1,\
                open(f_2, "w") as fail_o2:
            krill(q_min, q_percent, p64, f1, f2, pe_o1, pe_o2, se_o1, se_o2,
                  fail_o1, fail_o2)
Beispiel #2
0
def porifera_open(in1, in2, subseqs1, subseqs2, se_1, pe_1, se_2, pe_2, k,
                  rounds, match, min_l, tiny_ls1, tiny_ls2, tiny):
    '''
    open paired end files for adapter detection
    '''
    compressed = gzip_test(in1)
    if compressed:
        pe_1, _ = os.path.splitext(pe_1)
        pe_2, _ = os.path.splitext(pe_2)
        se_1, _ = os.path.splitext(se_1)
        se_2, _ = os.path.splitext(se_2)
        with gzip.open(in1, 'rt') as f1, gzip.open(in2, 'rt') as f2,\
                open(pe_1, 'w') as pe_o1,\
                open(pe_2, 'w') as pe_o2,\
                open(se_1, 'w') as se_o1,\
                open(se_2, 'w') as se_o2:
            porifera(f1, f2, subseqs1, subseqs2, pe_o1, pe_o2, se_o1, se_o2, k,
                     rounds, match, min_l, tiny_ls1, tiny_ls2, tiny)
    else:
        with open(in1, 'rt') as f1, open(in2, 'rt') as f2,\
                open(pe_1, 'w') as pe_o1,\
                open(pe_2, 'w') as pe_o2,\
                open(se_1, 'w') as se_o1,\
                open(se_2, 'w') as se_o2:
            porifera(f1, f2, subseqs1, subseqs2, pe_o1, pe_o2, se_o1, se_o2, k,
                     rounds, match, min_l, tiny_ls1, tiny_ls2, tiny)
Beispiel #3
0
def rotifer_open(R1_bases_ls, R2_bases_ls, in1, in2, pe_1, pe_2, se_1, se_2,
                 trim):
    '''
    parse single and paired-end reads for recognized motifs
    '''
    compressed = gzip_test(in1)
    if compressed:
        pe_1, _ = os.path.splitext(pe_1)
        pe_2, _ = os.path.splitext(pe_2)
        se_1, _ = os.path.splitext(se_1)
        se_2, _ = os.path.splitext(se_2)
        with gzip.open(in1, 'rt') as f1, gzip.open(in2, 'rt') as f2,\
                open(pe_1, "w") as pe_o1,\
                open(pe_2, "w") as pe_o2,\
                open(se_1, "w") as se_o1,\
                open(se_2, "w") as se_o2:
            if trim:
                rotifer_trim(R1_bases_ls, R2_bases_ls, f1, f2, pe_o1, pe_o2,
                             se_o1, se_o2, trim)
            else:
                rotifer(R1_bases_ls, R2_bases_ls, f1, f2, pe_o1, pe_o2, se_o1,
                        se_o2)
    else:
        with open(in1) as f1, open(in2) as f2,\
                open(pe_1, "w") as pe_o1,\
                open(pe_2, "w") as pe_o2,\
                open(se_1, "w") as se_o1,\
                open(se_2, "w") as se_o2:
            if trim:
                rotifer_trim(R1_bases_ls, R2_bases_ls, f1, f2, pe_o1, pe_o2,
                             se_o1, se_o2, trim)
            else:
                rotifer(R1_bases_ls, R2_bases_ls, f1, f2, pe_o1, pe_o2, se_o1,
                        se_o2)
Beispiel #4
0
def porifera_single_open(args):
    '''
    open files for adapter detection
    '''
    compressed = gzip_test(args.in1)
    if compressed:
        se_1, _ = os.path.splitext(args.se_1)
        with gzip.open(args.in1, 'rt') as f, open(se_1, 'w') as o:
            porifera_single(args, f, o)
    else:
        with open(args.in1) as f, open(args.se_1, 'w') as o:
            porifera_single(args, f, o)
Beispiel #5
0
def krill_single_open(q_min, q_percent, p64, in1, se_1):
    '''
    quality filter test for single-end reads
    '''
    compressed = gzip_test(in1)
    if compressed:
        se_1, _ = os.path.splitext(se_1)
        with gzip.open(in1, 'rt') as f1, open(se_1, "w") as se_o1:
            krill_single(q_min, q_percent, p64, f1, se_o1)
    else:
        with open(in1) as f1, open(se_1, "w") as se_o1:
            krill_single(q_min, q_percent, p64, f1, se_o1)
Beispiel #6
0
def scallop_open(in1, front_trim, end_trim, out1):
    '''
    test if gzipped, then open single-end files
    '''
    compressed = gzip_test(in1)
    if compressed:
        out1, _ = os.path.splitext(out1)
        with gzip.open(in1, 'rt') as f, open(out1, 'w') as o:
            scallop(front_trim, end_trim, f, o)
    else:
        with open(in1) as f, open(out1, 'w') as o:
            scallop(front_trim, end_trim, f, o)
Beispiel #7
0
def scallop_single_end_open(in1, out1, front_trim, end_score, window, min_l):
    '''
    test if gzipped, then open single-end files
    '''
    compressed = gzip_test(in1)
    if compressed:
        out1, _ = os.path.splitext(out1)
        with gzip.open(in1, 'rt') as f, open(out1, 'w') as o:
            scallop_single_end_line(f, o, front_trim, end_score, window, min_l)
    else:
        with open(in1) as f, open(out1, 'w') as o:
            scallop_single_end_line(f, o, front_trim, end_score, window, min_l)
Beispiel #8
0
def anemone_single_open(in1, out1, of1_ls, mismatch, bcs, proj, round_one,
                        front_trim):
    '''
    create IO file object based on gzipped status for se data
    '''
    compressed = gzip_test(in1)
    if compressed:
        with gzip.open(in1, 'rt') as f1:
            anemone_single(f1, out1, of1_ls, mismatch, bcs, proj, round_one,
                           front_trim)
    else:
        with open(in1) as f1:
            anemone_single(f1, out1, of1_ls, mismatch, bcs, proj, round_one,
                           front_trim)
Beispiel #9
0
def crinoid_open(in1, out1, out2, procs, p64):
    '''
    open as gzipped file object if gzipped
    '''
    compressed = gzip_test(in1)
    if compressed:
        with gzip.open(in1, 'rt') as f:
            k_score = kmer_test(f)
        with gzip.open(in1, 'rt') as f:
            crinoid(f, out1, out2, procs, p64, k_score)
    else:
        with open(in1) as f:
            k_score = kmer_test(f)
        with open(in1) as f:
            crinoid(f, out1, out2, procs, p64, k_score)
Beispiel #10
0
def porifera_single_open(in1, subseqs, se_1, k, rounds, match, min_l, tiny_ls,
                         tiny):
    '''
    open files for adapter detection
    '''
    compressed = gzip_test(in1)
    if compressed:
        se_1, _ = os.path.splitext(se_1)
        with gzip.open(in1, 'rt') as f, open(se_1, 'w') as o:
            porifera_single(f, subseqs, o, k, rounds, match, min_l, tiny_ls,
                            tiny)
    else:
        with open(in1) as f, open(se_1, 'w') as o:
            porifera_single(f, subseqs, o, k, rounds, match, min_l, tiny_ls,
                            tiny)
Beispiel #11
0
def anemone_comp(in1_ls, in2_ls, mismatch, bcs_dict, curr, front_trim, in1):
    '''
    composer entry point to anemone
    '''
    if in1 in in2_ls:
        return

    try:
        in2 = in2_ls[in1_ls.index(in1)]
        out2 = os.path.basename(in2)
    except (IndexError, ValueError) as e:
        in2 = False
        out2 = False

    out1 = os.path.basename(in1)
    compressed = gzip_test(in1)
    if compressed:
        out1 = out1[:-3] if out1.endswith('.gz') else out1
        if out2:
            out2 = out2[:-3] if out2.endswith('.gz') else out2

    if out1 in bcs_dict.keys():
        pass
    elif out1 + '.gz' in bcs_dict.keys():
        bcs_dict[out1] = bcs_dict.pop(out1 + '.gz')
    '''
    the following copies files not found in index.txt
    '''
    try:
        bcs_file = bcs_dict[out1]
    except KeyError:
        shutil.copy(in1, curr)
        try:
            shutil.copy(in2, curr)
        except TypeError:
            pass
        return

    if in1.endswith('.fastq'):
        tmp_dir = os.path.basename(in1)[:-6]
    elif in1.endswith('.fastq.gz'):
        tmp_dir = os.path.basename(in1)[:-9]
    else:
        tmp_dir = os.path.basename(in1)

    subdir = os.path.join(curr, tmp_dir)
    os.mkdir(subdir)
    anemone_init(in1, in2, out1, out2, mismatch, bcs_file, subdir, front_trim)
Beispiel #12
0
def anemone_open(in1, in2, out1, out2, of1_ls, of2_ls, mismatch, bcs, proj,
                 round_one, front_trim):
    '''
    create IO file object based on gzipped status for pe data
    '''
    compressed = gzip_test(in1)
    if compressed:
        with gzip.open(in1, 'rt') as f1, gzip.open(in2, 'rt') as f2:
            of1_ls, of2_ls = anemone(f1, f2, out1, out2, of1_ls, of2_ls,
                                     mismatch, bcs, proj, round_one,
                                     front_trim)
    else:
        with open(in1) as f1, open(in2) as f2:
            of1_ls, of2_ls = anemone(f1, f2, out1, out2, of1_ls, of2_ls,
                                     mismatch, bcs, proj, round_one,
                                     front_trim)
    return of1_ls, of2_ls
Beispiel #13
0
def rotifer_single_open(R1_bases_ls, in1, se_1, f_1, trim):
    '''
    parse single-end reads for recognized motifs
    '''
    compressed = gzip_test(in1)
    if compressed:
        se_1, _ = os.path.splitext(se_1)
        f_1, _ = os.path.splitext(f_1)
        with gzip.open(in1,
                       'rt') as f1, open(se_1,
                                         "w") as se_o1, open(f_1,
                                                             "w") as fail_o1:
            rotifer_single(R1_bases_ls, f1, se_o1, fail_o1)
    else:
        with open(in1) as f1, open(se_1, "w") as se_o1, open(f_1,
                                                             "w") as fail_o1:
            rotifer_single(R1_bases_ls, f1, se_o1, fail_o1)
Beispiel #14
0
def fastq_test(fastq_ls):
    '''
    test if gzipped fastq file
    '''
    fastq_dt, in1_ls, in2_ls = {}, [], []
    for filename in fastq_ls:
        compressed = gzip_test(filename)
        if compressed is None:
            sys.exit('\n\n' + filename + msg.fastq_test1)
        elif compressed is True:
            with gzip.open(filename, 'rt') as f:
                fastq_dt = fastq_structure(f, filename, fastq_dt)
        else:
            with open(filename) as f:
                fastq_dt = fastq_structure(f, filename, fastq_dt)

    for i in fastq_dt.values():
        if None not in i:
            in1_ls.append(i[0])
            in2_ls.append(i[1])
    return in1_ls, in2_ls
Beispiel #15
0
def porifera_open(args):
    '''
    open paired end files for adapter detection
    '''
    compressed = gzip_test(args.in1)
    if compressed:
        pe_1, _ = os.path.splitext(args.pe_1)
        pe_2, _ = os.path.splitext(args.pe_2)
        se_1, _ = os.path.splitext(args.se_1)
        se_2, _ = os.path.splitext(args.se_2)
        with gzip.open(args.in1, 'rt') as f1, gzip.open(args.in2, 'rt') as f2,\
                open(pe_1, 'w') as pe_o1,\
                open(pe_2, 'w') as pe_o2,\
                open(se_1, 'w') as se_o1,\
                open(se_2, 'w') as se_o2:
            porifera(args, f1, f2, pe_o1, pe_o2, se_o1, se_o2)
    else:
        with open(args.in1, 'rt') as f1, open(args.in2, 'rt') as f2,\
                open(args.pe_1, 'w') as pe_o1,\
                open(args.pe_2, 'w') as pe_o2,\
                open(args.se_1, 'w') as se_o1,\
                open(args.se_2, 'w') as se_o2:
            porifera(args, f1, f2, pe_o1, pe_o2, se_o1, se_o2)
Beispiel #16
0
def scallop_end_open(in1, in2, pe_1, pe_2, se_1, se_2, front_trim, end_score, window, min_l):
    '''
    test if gzipped, then open paired-end files
    '''
    compressed = gzip_test(in1)
    if compressed:
        pe_1, _ = os.path.splitext(pe_1)
        pe_2, _ = os.path.splitext(pe_2)
        se_1, _ = os.path.splitext(se_1)
        se_2, _ = os.path.splitext(se_2)
        with gzip.open(in1, 'rt') as f1, gzip.open(in2, 'rt') as f2,\
                open(pe_1, "w") as pe_o1,\
                open(pe_2, "w") as pe_o2,\
                open(se_1, "w") as se_o1,\
                open(se_2, "w") as se_o2:
            scallop_end_line(f1, f2, pe_o1, pe_o2, se_o1, se_o2, front_trim, end_score, window, min_l)
    else:
        with open(in1) as f1, open(in2) as f2,\
                open(pe_1, "w") as pe_o1,\
                open(pe_2, "w") as pe_o2,\
                open(se_1, "w") as se_o1,\
                open(se_2, "w") as se_o2:
            scallop_end_line(f1, f2, pe_o1, pe_o2, se_o1, se_o2, front_trim, end_score, window, min_l)