Esempio n. 1
0
def _dnapi_prediction(fn, out_dir):
    end_file = _prepare_file(fn, out_dir)
    iterative_result = iterative_adapter_prediction(end_file, [1.2, 1.3, 1.4, 1.7, 2], [7, 11], 500000)
    max_score = iterative_result[1][1]
    adapters = list()
    for a in iterative_result:
        if a[1] > max_score * 0.40:
            logger.debug("Adding adapter to the list: %s with score %s" % (a[0], a[1]))
            adapters.append(a[0])
    return adapters
Esempio n. 2
0
def _dnapi_prediction(fn, out_dir):
    end_file = _prepare_file(fn, out_dir)
    iterative_result = iterative_adapter_prediction(end_file, [1.2, 1.3, 1.4, 1.7, 2], [7, 11], 500000)
    max_score = iterative_result[1][1]
    adapters = list()
    for a in iterative_result:
        if a[1] > max_score * 0.40:
            logger.debug("Adding adapter to the list: %s with score %s" % (a[0], a[1]))
            adapters.append(a[0])
    return adapters
Esempio n. 3
0
def _dnapi_prediction(fn):
    iterative_result = iterative_adapter_prediction(fn, [1.2, 1.3, 1.4],
                                                    [9, 11], 50000)
    return [a[0] for a in iterative_result]
Esempio n. 4
0
def _dnapi_prediction(fn):
    iterative_result = iterative_adapter_prediction(fn, [1.2, 1.3, 1.4], [9, 11], 50000)
    return [a[0] for a in iterative_result]
Esempio n. 5
0
def main():
    args = parse_args()
    fastq = args.FASTQ

    Ks = convert_interval(args.k, "-k", int)
    Rs = convert_interval(args.r, "-r", float)

    if not MAP_TO_GENOME:
        if len(Ks) > 1 or len(Rs) > 1:
            adapts = iterative_adapter_prediction(fastq, Rs, Ks, SAMPLE_NUM)
        else:
            adapts = adapter_prediction(fastq, Rs[0], Ks[0], SAMPLE_NUM)
        if args.show_all:
            for x in adapts:
                print("{}\tscore={:.2f}".format(*x))
        else:
            print(adapts[0][0])

    else:
        global TEMP_DIR
        TEMP_DIR = "{}/DNApi_tmp_{}".format(args.temp_dir, str(uuid.uuid4()))
        subprocess.call(("mkdir {}".format(TEMP_DIR)).split())

        original_fastq = fastq
        fastq, total_read, sd = fastq_input_prep(fastq, args.subsample_rate,
                                                 TEMP_DIR)

        if args.seq:
            adapts = set(args.seq)
            setstr = ["user-input" for i in range(len(adapts))]
        else:
            msg = "warning: predicted adapter is too short (<{0}): '{1}'\n" \
                + "warning: '{1}' will not be further investigated\n"
            params = {}
            for k in Ks:
                for r in Rs:
                    aout = adapter_prediction(fastq, r, k, SAMPLE_NUM)[0][0]
                    if len(aout) < args.prefix_match:
                        sys.stderr.write(msg.format(l, s))
                        continue
                    aseq = aout[:args.prefix_match + 5]
                    params.setdefault(aseq,
                                      []).append("{}:{:.1f}".format(k, r))
            adapts = list(params.keys())
            setstr = [';'.join(s) for s in params.values()]
            adapts.append("RAW_INPUT")
            setstr.append("NO_TREATMENT")

        if not adapts:
            raise Exception("no valid adapters to further process")

        table = []
        for i, aseq in enumerate(adapts):
            cnts = map_clean_reads(fastq, aseq[:args.prefix_match],
                                   args.trim_5p, args.trim_3p, args.min_len,
                                   args.max_len, args.map_command, TEMP_DIR)
            read_stats = [c / total_read * 100 for c in cnts]
            table.append([
                aseq, cnts[0], read_stats[0], cnts[1], read_stats[1], setstr[i]
            ])
        make_stats_report(table, total_read, args.subsample_rate,
                          args.prefix_match, sd, original_fastq,
                          args.output_dir, TEMP_DIR, args.no_output_files)
Esempio n. 6
0
def main():
    args = parse_args()
    fastq = args.FASTQ

    Ks = convert_interval(args.k, "-k", int)
    Rs = convert_interval(args.r, "-r", float)

    if not MAP_TO_GENOME:
        if len(Ks) > 1 or len(Rs) > 1:
            adapts = iterative_adapter_prediction(fastq, Rs, Ks, SAMPLE_NUM)
        else:
            adapts = adapter_prediction(fastq, Rs[0], Ks[0], SAMPLE_NUM)
        if args.show_all:
            for x in adapts:
                print("{}\tscore={:.2f}".format(*x))
        else:
            print(adapts[0][0])

    else:
        global TEMP_DIR
        TEMP_DIR = "{}/DNApi_tmp_{}".format(
            args.temp_dir, str(uuid.uuid4()))
        subprocess.call(("mkdir {}".format(TEMP_DIR)).split())

        original_fastq = fastq
        fastq, total_read, sd = fastq_input_prep(
            fastq, args.subsample_rate, TEMP_DIR)

        if args.seq:
            adapts = set(args.seq)
            setstr = ["user-input" for i in range(len(adapts))]
        else:
            msg = "warning: predicted adapter is too short (<{0}): '{1}'\n" \
                + "warning: '{1}' will not be further investigated\n"
            params = {}
            for k in Ks:
                for r in Rs:
                    aout = adapter_prediction(fastq, r, k, SAMPLE_NUM)[0][0]
                    if len(aout) < args.prefix_match:
                        sys.stderr.write(msg.format(l, s))
                        continue
                    aseq = aout[: args.prefix_match+5]
                    params.setdefault(aseq,[]).append("{}:{:.1f}".format(k,r))
            adapts = list(params.keys())
            setstr = [';'.join(s) for s in params.values()]
            adapts.append("RAW_INPUT")
            setstr.append("NO_TREATMENT")

        if not adapts:
            raise Exception("no valid adapters to further process")

        table = []
        for i, aseq in enumerate(adapts):
            cnts = map_clean_reads(
                       fastq, aseq[:args.prefix_match], args.trim_5p,
                       args.trim_3p, args.min_len, args.max_len,
                       args.map_command, TEMP_DIR)
            read_stats = [c / total_read * 100 for c in cnts]
            table.append([aseq, cnts[0], read_stats[0],
                          cnts[1], read_stats[1], setstr[i]])
        make_stats_report(
            table, total_read, args.subsample_rate, args.prefix_match,
            sd, original_fastq, args.output_dir, TEMP_DIR, args.no_output_files)