def run(data): config = data[0][0]['config'] work_dir = dd.get_work_dir(data[0][0]) genome = dd.get_ref_file(data[0][0]) mirdeep2 = os.path.join(os.path.dirname(sys.executable), "miRDeep2.pl") perl_exports = get_perl_exports() hairpin, mature, species = "none", "none", "na" rfam_file = dd.get_mirdeep2_file(data[0][0]) if file_exists(dd.get_mirbase_hairpin(data[0][0])): species = dd.get_species(data[0][0]) hairpin = dd.get_mirbase_hairpin(data[0][0]) mature = dd.get_mirbase_mature(data[0][0]) logger.debug("Preparing for mirdeep2 analysis.") bam_file = op.join(work_dir, "align", "seqs.bam") seqs_dir = op.join(work_dir, "seqcluster", "prepare") collapsed = op.join(seqs_dir, "seqs.ma") out_dir = op.join(work_dir, "mirdeep2") out_file = op.join(out_dir, "result_res.csv") safe_makedir(out_dir) with chdir(out_dir): collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir) cmd = ("{perl_exports} && perl {mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -f {rfam_file} -r simple -c -P -t {species} -z res").format(**locals()) if file_exists(mirdeep2) and not file_exists(out_file) and file_exists(rfam_file): try: do.run(cmd.format(**locals()), "Running mirdeep2.") except: logger.warning("mirdeep2 failed. Please report the error to https://github.com/lpantano/mirdeep2_core/issues.") if file_exists(out_file): novel_db = _parse_novel(out_file, dd.get_species(data[0][0])) return novel_db
def sample_annotation(data): """ Annotate miRNAs using miRBase database with seqbuster tool """ names = data["rgnames"]['sample'] tools = dd.get_expression_caller(data) work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) if dd.get_mirbase_hairpin(data): mirbase = op.abspath(op.dirname(dd.get_mirbase_hairpin(data))) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) else: logger.debug("No annotation file from miRBase.") sps = dd.get_species(data) if dd.get_species(data) else "None" logger.debug("Looking for mirdeep2 database for %s" % names) if file_exists( op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")): data['seqbuster_novel'] = _miraligner( data["collapse"], "%s_novel" % out_file, sps, op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config']) if "trna" in tools: data['trna'] = _trna_annotation(data) data = spikein.counts_spikein(data) return [[data]]
def sample_annotation(data): """ Annotate miRNAs using miRBase database with seqbuster tool """ names = data["rgnames"]['sample'] tools = dd.get_expression_caller(data) work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) if dd.get_mirbase_hairpin(data): mirbase = op.abspath(op.dirname(dd.get_mirbase_hairpin(data))) if utils.file_exists(data["collapse"]): data['transcriptome_bam'] = _align(data["collapse"], dd.get_mirbase_hairpin(data), out_file, data) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) else: logger.debug("Trimmed collapsed file is empty for %s." % names) else: logger.debug("No annotation file from miRBase.") sps = dd.get_species(data) if dd.get_species(data) else "None" logger.debug("Looking for mirdeep2 database for %s" % names) if file_exists(op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")): data['seqbuster_novel'] = _miraligner(data["collapse"], "%s_novel" % out_file, sps, op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config']) if "trna" in tools: data['trna'] = _mint_trna_annotation(data) data = spikein.counts_spikein(data) return [[data]]
def run(data): config = data[0][0]['config'] work_dir = dd.get_work_dir(data[0][0]) genome = dd.get_ref_file(data[0][0]) mirdeep2 = os.path.join(os.path.dirname(sys.executable), "miRDeep2.pl") perl_exports = get_perl_exports() mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data[0][0]))) species = dd.get_species(data[0][0]) hairpin = op.join(mirbase, "hairpin.fa") mature = op.join(mirbase, "mature.fa") rfam_file = op.join(mirbase, "Rfam_for_miRDeep.fa") bam_file = op.join(work_dir, "align", "seqs.bam") seqs_dir = op.join(work_dir, "seqcluster", "prepare") collapsed = op.join(seqs_dir, "seqs.ma") out_dir = op.join(work_dir, "mirdeep2") out_file = op.join(out_dir, "result_res.csv") safe_makedir(out_dir) with chdir(out_dir): collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir) cmd = ("{perl_exports} && {mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -f {rfam_file} -r simple -c -d -P -t {species} -z res").format(**locals()) if file_exists(mirdeep2) and not file_exists(out_file) and file_exists(mature) and file_exists(rfam_file): do.run(cmd.format(**locals()), "Running mirdeep2.") if file_exists(out_file): novel_db = _parse_novel(out_file, dd.get_species(data[0][0])) return novel_db
def sample_annotation(data): """ Annotate miRNAs using miRBase database with seqbuster tool """ names = data["rgnames"]['sample'] work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) if not dd.get_mirbase_ref(data): raise ValueError( "There is no smallRNA genome data." "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name." ) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) if file_exists( op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")): data['seqbuster_novel'] = _miraligner( data["collapse"], "%s_novel" % out_file, dd.get_species(data), op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config']) data['trna'] = _trna_annotation(data) return [[data]]
def sample_annotation(data): """ Annotate miRNAs using miRBase database with seqbuster tool """ names = data["rgnames"]['sample'] work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) if dd.get_mirbase_hairpin(data): mirbase = op.abspath(op.dirname(dd.get_mirbase_hairpin(data))) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) sps = dd.get_species(data) if dd.get_species(data) else "None" if file_exists( op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")): data['seqbuster_novel'] = _miraligner( data["collapse"], "%s_novel" % out_file, sps, op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config']) data['trna'] = _trna_annotation(data) return [[data]]
def run(data): """Proxy function to run the tool""" sample = data[0][0] work_dir = dd.get_work_dir(sample) out_dir = os.path.join(work_dir, "mirge") lib = _find_lib(sample) mirge = _find_mirge(sample) bowtie = _find_bowtie(sample) sps = dd.get_species(sample) species = SPS.get(sps, "") if not species: raise ValueError( "species not supported (hsa, mmu, rno, dre, cel, dme): %s" % sps) if not lib: raise ValueError( "-lib option is not set up in resources for mirge tool." " Read above warnings lines.") if not utils.file_exists(out_dir): with tx_tmpdir() as tmp_dir: sample_file = _create_sample_file(data, tmp_dir) do.run(_cmd().format(**locals()), "Running miRge2.0.") shutil.move(tmp_dir, out_dir) return [ os.path.abspath(fn) for fn in glob.glob(os.path.join(out_dir, "*", "*")) ]
def sample_annotation(data): """ Annotate miRNAs using miRBase database with seqbuster tool """ names = data["rgnames"]['sample'] work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) if dd.get_mirbase_hairpin(data): mirbase = op.abspath(op.dirname(dd.get_mirbase_hairpin(data))) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) sps = dd.get_species(data) if dd.get_species(data) else "None" if file_exists(op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")): data['seqbuster_novel'] = _miraligner(data["collapse"], "%s_novel" % out_file, sps, op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config']) data['trna'] = _trna_annotation(data) return [[data]]
def mirbase(data): names = data["rgnames"]['sample'] work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) return [[data]]
def run(data): config = data[0][0]['config'] work_dir = dd.get_work_dir(data[0][0]) genome = dd.get_ref_file(data[0][0]) mirdeep2 = os.path.join(os.path.dirname(sys.executable), "miRDeep2.pl") perl_exports = get_perl_exports() hairpin, mature, species = "none", "none", "na" rfam_file = dd.get_mirdeep2_file(data[0][0]) if file_exists(dd.get_mirbase_hairpin(data[0][0])): species = dd.get_species(data[0][0]) hairpin = dd.get_mirbase_hairpin(data[0][0]) mature = dd.get_mirbase_mature(data[0][0]) logger.debug("Preparing for mirdeep2 analysis.") bam_file = op.join(work_dir, "align", "seqs.bam") seqs_dir = op.join(work_dir, "seqcluster", "prepare") collapsed = op.join(seqs_dir, "seqs.ma") out_dir = op.join(work_dir, "mirdeep2") out_file = op.join(out_dir, "result_res.csv") safe_makedir(out_dir) if not file_exists(rfam_file): logger.warning("mirdeep2 Rfam file not instaled. Skipping...") return None if not file_exists(mirdeep2): logger.warning("mirdeep2 executable file not found. Skipping...") return None with chdir(out_dir): collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir) cmd = ( "{perl_exports} && perl {mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -f {rfam_file} -r simple -c -P -t {species} -z res" ).format(**locals()) if not file_exists(out_file): try: do.run(cmd.format(**locals()), "Running mirdeep2.") except: logger.warning( "mirdeep2 failed. Please report the error to https://github.com/lpantano/mirdeep2_core/issues." ) if file_exists(out_file): novel_db = _parse_novel(out_file, dd.get_species(data[0][0])) return novel_db
def mirbase(data): names = data["rgnames"]['sample'] work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) if not dd.get_mirbase_ref(data): raise ValueError("There is no smallRNA genome data." "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name.") mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) return [[data]]
def mirbase(data): names = data["rgnames"]['sample'] work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) if not dd.get_mirbase_ref(data): raise ValueError( "There is no smallRNA genome data." "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name." ) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) return [[data]]
def sample_annotation(data): """ Annotate miRNAs using miRBase database with seqbuster tool """ names = data["rgnames"]['sample'] work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) if not dd.get_mirbase_ref(data): raise ValueError("There is no smallRNA genome data." "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name.") mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) if file_exists(op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")): data['seqbuster_novel'] = _miraligner(data["collapse"], "%s_novel" % out_file, dd.get_species(data), op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config']) data['trna'] = _trna_annotation(data) return [[data]]
def run(data): config = data[0][0]['config'] work_dir = dd.get_work_dir(data[0][0]) genome = dd.get_ref_file(data[0][0]) mirdeep2 = config_utils.get_program("miRDeep2.pl", config) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data[0][0]))) species = dd.get_species(data[0][0]) hairpin = op.join(mirbase, "hairpin.fa") mature = op.join(mirbase, "mature.fa") bam_file = op.join(work_dir, "align", "seqs.bam") seqs_dir = op.join(work_dir, "seqcluster", "prepare") collapsed = op.join(seqs_dir, "seqs.ma") out_dir = op.join(work_dir, "mirdeep2") out_file = op.join(out_dir, "result_res.csv") safe_makedir(out_dir) with chdir(out_dir): collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir) cmd = ("{mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -r simple -c -d -P -t {species} -z res").format(**locals()) if mirdeep2 and not file_exists(out_file) and file_exists(mature): do.run(cmd.format(**locals()), "Running mirdeep2.") _parse_novel(out_file)
def run(data): """Proxy function to run the tool""" sample = data[0][0] work_dir = dd.get_work_dir(sample) out_dir = os.path.join(work_dir, "mirge") lib = _find_lib(sample) mirge = _find_mirge(sample) bowtie = _find_bowtie(sample) sps = dd.get_species(sample) species = SPS.get(sps, "") if not species: raise ValueError("species not supported (hsa, mmu, rno, dre, cel, dme): %s" % sps) if not lib: raise ValueError("-lib option is not set up in resources for mirge tool." " Read above warnings lines.") if not utils.file_exists(out_dir): with tx_tmpdir() as tmp_dir: sample_file = _create_sample_file(data, tmp_dir) do.run(_cmd().format(**locals()), "Running miRge2.0.") shutil.move(tmp_dir, out_dir) return [os.path.abspath(fn) for fn in glob.glob(os.path.join(out_dir, "*", "*"))]
def run(data): config = data[0][0]['config'] work_dir = dd.get_work_dir(data[0][0]) genome = dd.get_ref_file(data[0][0]) mirdeep2 = config_utils.get_program("miRDeep2.pl", config) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data[0][0]))) species = dd.get_species(data[0][0]) hairpin = op.join(mirbase, "hairpin.fa") mature = op.join(mirbase, "mature.fa") bam_file = op.join(work_dir, "align", "seqs.bam") seqs_dir = op.join(work_dir, "seqcluster", "prepare") collapsed = op.join(seqs_dir, "seqs.ma") out_dir = op.join(work_dir, "mirdeep2") out_file = op.join(out_dir, "result_res.csv") safe_makedir(out_dir) with chdir(out_dir): collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir) cmd = ( "{mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -r simple -c -d -P -t {species} -z res" ).format(**locals()) if mirdeep2 and not file_exists(out_file) and file_exists(mature): do.run(cmd.format(**locals()), "Running mirdeep2.") _parse_novel(out_file)