def _dnapi_prediction(fn, out_dir): end_file = _prepare_file(fn, out_dir) iterative_result = iterative_adapter_prediction(end_file, [1.2, 1.3, 1.4, 1.7, 2], [7, 11], 500000) max_score = iterative_result[1][1] adapters = list() for a in iterative_result: if a[1] > max_score * 0.40: logger.debug("Adding adapter to the list: %s with score %s" % (a[0], a[1])) adapters.append(a[0]) return adapters
def _dnapi_prediction(fn): iterative_result = iterative_adapter_prediction(fn, [1.2, 1.3, 1.4], [9, 11], 50000) return [a[0] for a in iterative_result]
def main(): args = parse_args() fastq = args.FASTQ Ks = convert_interval(args.k, "-k", int) Rs = convert_interval(args.r, "-r", float) if not MAP_TO_GENOME: if len(Ks) > 1 or len(Rs) > 1: adapts = iterative_adapter_prediction(fastq, Rs, Ks, SAMPLE_NUM) else: adapts = adapter_prediction(fastq, Rs[0], Ks[0], SAMPLE_NUM) if args.show_all: for x in adapts: print("{}\tscore={:.2f}".format(*x)) else: print(adapts[0][0]) else: global TEMP_DIR TEMP_DIR = "{}/DNApi_tmp_{}".format(args.temp_dir, str(uuid.uuid4())) subprocess.call(("mkdir {}".format(TEMP_DIR)).split()) original_fastq = fastq fastq, total_read, sd = fastq_input_prep(fastq, args.subsample_rate, TEMP_DIR) if args.seq: adapts = set(args.seq) setstr = ["user-input" for i in range(len(adapts))] else: msg = "warning: predicted adapter is too short (<{0}): '{1}'\n" \ + "warning: '{1}' will not be further investigated\n" params = {} for k in Ks: for r in Rs: aout = adapter_prediction(fastq, r, k, SAMPLE_NUM)[0][0] if len(aout) < args.prefix_match: sys.stderr.write(msg.format(l, s)) continue aseq = aout[:args.prefix_match + 5] params.setdefault(aseq, []).append("{}:{:.1f}".format(k, r)) adapts = list(params.keys()) setstr = [';'.join(s) for s in params.values()] adapts.append("RAW_INPUT") setstr.append("NO_TREATMENT") if not adapts: raise Exception("no valid adapters to further process") table = [] for i, aseq in enumerate(adapts): cnts = map_clean_reads(fastq, aseq[:args.prefix_match], args.trim_5p, args.trim_3p, args.min_len, args.max_len, args.map_command, TEMP_DIR) read_stats = [c / total_read * 100 for c in cnts] table.append([ aseq, cnts[0], read_stats[0], cnts[1], read_stats[1], setstr[i] ]) make_stats_report(table, total_read, args.subsample_rate, args.prefix_match, sd, original_fastq, args.output_dir, TEMP_DIR, args.no_output_files)
def main(): args = parse_args() fastq = args.FASTQ Ks = convert_interval(args.k, "-k", int) Rs = convert_interval(args.r, "-r", float) if not MAP_TO_GENOME: if len(Ks) > 1 or len(Rs) > 1: adapts = iterative_adapter_prediction(fastq, Rs, Ks, SAMPLE_NUM) else: adapts = adapter_prediction(fastq, Rs[0], Ks[0], SAMPLE_NUM) if args.show_all: for x in adapts: print("{}\tscore={:.2f}".format(*x)) else: print(adapts[0][0]) else: global TEMP_DIR TEMP_DIR = "{}/DNApi_tmp_{}".format( args.temp_dir, str(uuid.uuid4())) subprocess.call(("mkdir {}".format(TEMP_DIR)).split()) original_fastq = fastq fastq, total_read, sd = fastq_input_prep( fastq, args.subsample_rate, TEMP_DIR) if args.seq: adapts = set(args.seq) setstr = ["user-input" for i in range(len(adapts))] else: msg = "warning: predicted adapter is too short (<{0}): '{1}'\n" \ + "warning: '{1}' will not be further investigated\n" params = {} for k in Ks: for r in Rs: aout = adapter_prediction(fastq, r, k, SAMPLE_NUM)[0][0] if len(aout) < args.prefix_match: sys.stderr.write(msg.format(l, s)) continue aseq = aout[: args.prefix_match+5] params.setdefault(aseq,[]).append("{}:{:.1f}".format(k,r)) adapts = list(params.keys()) setstr = [';'.join(s) for s in params.values()] adapts.append("RAW_INPUT") setstr.append("NO_TREATMENT") if not adapts: raise Exception("no valid adapters to further process") table = [] for i, aseq in enumerate(adapts): cnts = map_clean_reads( fastq, aseq[:args.prefix_match], args.trim_5p, args.trim_3p, args.min_len, args.max_len, args.map_command, TEMP_DIR) read_stats = [c / total_read * 100 for c in cnts] table.append([aseq, cnts[0], read_stats[0], cnts[1], read_stats[1], setstr[i]]) make_stats_report( table, total_read, args.subsample_rate, args.prefix_match, sd, original_fastq, args.output_dir, TEMP_DIR, args.no_output_files)