def trim_srna_sample(data): """ Remove 3' adapter for smallRNA-seq Uses cutadapt but with different parameters than for other pipelines. """ data = umi_transform(data) in_file = data["files"][0] names = data["rgnames"]['sample'] work_dir = os.path.join(dd.get_work_dir(data), "trimmed") out_dir = os.path.join(work_dir, names) log_out = os.path.join(out_dir, "%s.log" % names) utils.safe_makedir(out_dir) out_file = replace_directory(append_stem(in_file, ".clean"), out_dir) trim_reads = data["config"]["algorithm"].get("trim_reads", True) if utils.file_exists(out_file): data["files"][0] = out_file data["clean_fastq"] = out_file data["collapse"] = _collapse(data["clean_fastq"]) data["size_stats"] = _summary(data['collapse']) data["log_trimming"] = log_out return [[data]] adapter = dd.get_adapters(data) is_4n = any([a == "4N" for a in adapter]) adapter = [a for a in adapter if re.compile("^([NATGC]+)$").match(a)] if adapter and not trim_reads: trim_reads = True logger.info("Adapter is set up in config file, but trim_reads is not true." "If you want to skip trimming, skip adapter option from config.") if trim_reads and not adapter and error_dnapi: raise ValueError(error_dnapi) if trim_reads: adapters = adapter if adapter else _dnapi_prediction(in_file, out_dir) times = "" if not trim_reads or len(adapters) == 1 else "--times %s" % len(adapters) if trim_reads and adapters: adapter_cmd = " ".join(map(lambda x: "-a " + x, adapters)) if any([a for a in adapters if re.compile("^N+$").match(a)]): adapter_cmd = "-N %s" % adapter_cmd out_noadapter_file = replace_directory(append_stem(in_file, ".fragments"), out_dir) out_short_file = replace_directory(append_stem(in_file, ".short"), out_dir) atropos = _get_atropos() options = " ".join(data.get('resources', {}).get('atropos', {}).get("options", "")) if options.strip() == "-u 4 -u -4": options = "" is_4n = "4N" cores = ("--threads %s" % dd.get_num_cores(data) if dd.get_num_cores(data) > 1 else "") if " ".join(data.get('resources', {}).get('cutadapt', {}).get("options", "")): raise ValueError("Atropos is now used, but cutadapt options found in YAML file." "See https://atropos.readthedocs.io/en/latest/") cmd = _cmd_atropos() if not utils.file_exists(out_file): with file_transaction(out_file) as tx_out_file: do.run(cmd.format(**locals()), "remove adapter for %s" % names) if utils.file_exists(log_out): content = open(log_out).read().replace(out_short_file, names) open(log_out, 'w').write(content) if is_4n: options = "-u 4 -u -4" in_file = append_stem(tx_out_file, ".tmp") utils.move_safe(tx_out_file, in_file) cmd = "{atropos} {cores} {options} -se {in_file} -o {tx_out_file} -m 17" do.run(cmd.format(**locals()), "atropos with this parameters %s for %s" %(options, names)) data["log_trimming"] = log_out else: if not trim_reads: logger.debug("Skip trimming for: %s" % names) elif not adapters: logger.info("No adapter founds in %s, this is an issue related" " to no small RNA enrichment in your sample." % names) symlink_plus(in_file, out_file) data["files"][0] = out_file data["clean_fastq"] = out_file data["collapse"] = _collapse(data["clean_fastq"]) data["size_stats"] = _summary(data['collapse']) return [[data]]
def trim_srna_sample(data): """ Remove 3' adapter for smallRNA-seq Uses cutadapt but with different parameters than for other pipelines. """ data = umi_transform(data) in_file = data["files"][0] names = data["rgnames"]['sample'] work_dir = os.path.join(dd.get_work_dir(data), "trimmed") out_dir = os.path.join(work_dir, names) log_out = os.path.join(out_dir, "%s.log" % names) utils.safe_makedir(out_dir) out_file = replace_directory(append_stem(in_file, ".clean"), out_dir) trim_reads = data["config"]["algorithm"].get("trim_reads", True) if utils.file_exists(out_file): data["files"][0] = out_file data["clean_fastq"] = out_file data["collapse"] = _collapse(data["clean_fastq"]) data["size_stats"] = _summary(data['collapse']) data["log_trimming"] = log_out return [[data]] adapter = dd.get_adapters(data) is_4n = any([a == "4N" for a in adapter]) adapter = [a for a in adapter if re.compile("^([NATGC]+)$").match(a)] if adapter and not trim_reads: trim_reads = True logger.info("Adapter is set up in config file, but trim_reads is not true." "If you want to skip trimming, skip adapter option from config.") if trim_reads and not adapter and error_dnapi: raise ValueError(error_dnapi) if trim_reads: adapters = adapter if adapter else _dnapi_prediction(in_file, out_dir) times = "" if not trim_reads or len(adapters) == 1 else "--times %s" % len(adapters) if trim_reads and adapters: adapter_cmd = " ".join(map(lambda x: "-a " + x, adapters)) if any([a for a in adapters if re.compile("^N+$").match(a)]): adapter_cmd = "-N %s" % adapter_cmd out_noadapter_file = replace_directory(append_stem(in_file, ".fragments"), out_dir) out_short_file = replace_directory(append_stem(in_file, ".short"), out_dir) # atropos = _get_atropos() atropos = config_utils.get_program("atropos", data, default="atropos") options = " ".join(data.get('resources', {}).get('atropos', {}).get("options", "")) if options.strip() == "-u 4 -u -4": options = "" is_4n = "4N" cores = ("--threads %s" % dd.get_num_cores(data) if dd.get_num_cores(data) > 1 else "") if " ".join(data.get('resources', {}).get('cutadapt', {}).get("options", "")): raise ValueError("Atropos is now used, but cutadapt options found in YAML file." "See https://atropos.readthedocs.io/en/latest/") cmd = _cmd_atropos() if not utils.file_exists(out_file): with file_transaction(out_file) as tx_out_file: do.run(cmd.format(**locals()), "remove adapter for %s" % names) if utils.file_exists(log_out): content = open(log_out).read().replace(out_short_file, names) open(log_out, 'w').write(content) if is_4n: options = "-u 4 -u -4" in_file = append_stem(tx_out_file, ".tmp") utils.move_safe(tx_out_file, in_file) cmd = "{atropos} {cores} {options} -se {in_file} -o {tx_out_file} -m 17" do.run(cmd.format(**locals()), "atropos with this parameters %s for %s" %(options, names)) data["log_trimming"] = log_out else: if not trim_reads: logger.debug("Skip trimming for: %s" % names) elif not adapters: logger.info("No adapter founds in %s, this is an issue related" " to no small RNA enrichment in your sample." % names) symlink_plus(in_file, out_file) data["files"][0] = out_file data["clean_fastq"] = out_file data["collapse"] = _collapse(data["clean_fastq"]) data["size_stats"] = _summary(data['collapse']) return [[data]]