def sample_annotation(data): """ Annotate miRNAs using miRBase database with seqbuster tool """ names = data["rgnames"]['sample'] work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) if not dd.get_mirbase_ref(data): raise ValueError( "There is no smallRNA genome data." "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name." ) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) if file_exists( op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")): data['seqbuster_novel'] = _miraligner( data["collapse"], "%s_novel" % out_file, dd.get_species(data), op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config']) data['trna'] = _trna_annotation(data) return [[data]]
def mirbase(data): names = data["rgnames"]['sample'] work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) return [[data]]
def mirbase(data): names = data["rgnames"]['sample'] work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) if not dd.get_mirbase_ref(data): raise ValueError("There is no smallRNA genome data." "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name.") mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) return [[data]]
def run(data): config = data[0][0]['config'] work_dir = dd.get_work_dir(data[0][0]) genome = dd.get_ref_file(data[0][0]) mirdeep2 = os.path.join(os.path.dirname(sys.executable), "miRDeep2.pl") perl_exports = get_perl_exports() mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data[0][0]))) species = dd.get_species(data[0][0]) hairpin = op.join(mirbase, "hairpin.fa") mature = op.join(mirbase, "mature.fa") rfam_file = op.join(mirbase, "Rfam_for_miRDeep.fa") bam_file = op.join(work_dir, "align", "seqs.bam") seqs_dir = op.join(work_dir, "seqcluster", "prepare") collapsed = op.join(seqs_dir, "seqs.ma") out_dir = op.join(work_dir, "mirdeep2") out_file = op.join(out_dir, "result_res.csv") safe_makedir(out_dir) with chdir(out_dir): collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir) cmd = ("{perl_exports} && {mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -f {rfam_file} -r simple -c -d -P -t {species} -z res").format(**locals()) if file_exists(mirdeep2) and not file_exists(out_file) and file_exists(mature) and file_exists(rfam_file): do.run(cmd.format(**locals()), "Running mirdeep2.") if file_exists(out_file): novel_db = _parse_novel(out_file, dd.get_species(data[0][0])) return novel_db
def _trna_annotation(data): """ use tDRmapper to quantify tRNAs """ mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) trna_ref = op.join(mirbase, "trna_mature_pre.fa") name = dd.get_sample_name(data) work_dir = utils.safe_makedir( os.path.join(dd.get_work_dir(data), "trna", name)) in_file = op.basename(data["clean_fastq"]) tdrmapper = os.path.join(os.path.dirname(sys.executable), "TdrMappingScripts.pl") if not file_exists(trna_ref) or not file_exists(tdrmapper): logger.info("There is no tRNA annotation to run TdrMapper.") return None out_file = op.join(work_dir, in_file + ".hq_cs.mapped") if not file_exists(out_file): with tx_tmpdir(data) as txdir: with utils.chdir(txdir): utils.symlink_plus(data["clean_fastq"], op.join(txdir, in_file)) cmd = ("perl {tdrmapper} {trna_ref} {in_file}").format( **locals()) do.run(cmd, "tRNA for %s" % name) for filename in glob.glob("*mapped*"): shutil.move(filename, work_dir) return out_file
def mirbase(data): names = data["rgnames"]['sample'] work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) if not dd.get_mirbase_ref(data): raise ValueError( "There is no smallRNA genome data." "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name." ) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) return [[data]]
def sample_annotation(data): """ Annotate miRNAs using miRBase database with seqbuster tool """ names = data["rgnames"]['sample'] work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) if not dd.get_mirbase_ref(data): raise ValueError("There is no smallRNA genome data." "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name.") mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) if file_exists(op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")): data['seqbuster_novel'] = _miraligner(data["collapse"], "%s_novel" % out_file, dd.get_species(data), op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config']) data['trna'] = _trna_annotation(data) return [[data]]
def run(data): config = data[0][0]['config'] work_dir = dd.get_work_dir(data[0][0]) genome = dd.get_ref_file(data[0][0]) mirdeep2 = config_utils.get_program("miRDeep2.pl", config) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data[0][0]))) species = dd.get_species(data[0][0]) hairpin = op.join(mirbase, "hairpin.fa") mature = op.join(mirbase, "mature.fa") bam_file = op.join(work_dir, "align", "seqs.bam") seqs_dir = op.join(work_dir, "seqcluster", "prepare") collapsed = op.join(seqs_dir, "seqs.ma") out_dir = op.join(work_dir, "mirdeep2") out_file = op.join(out_dir, "result_res.csv") safe_makedir(out_dir) with chdir(out_dir): collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir) cmd = ("{mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -r simple -c -d -P -t {species} -z res").format(**locals()) if mirdeep2 and not file_exists(out_file) and file_exists(mature): do.run(cmd.format(**locals()), "Running mirdeep2.") _parse_novel(out_file)
def _trna_annotation(data): """ use tDRmapper to quantify tRNAs """ mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data))) trna_ref = op.join(mirbase, "trna_mature_pre.fa") name = dd.get_sample_name(data) work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "trna", name)) in_file = op.basename(data["clean_fastq"]) tdrmapper = os.path.join(os.path.dirname(sys.executable), "TdrMappingScripts.pl") if not file_exists(trna_ref) or not file_exists(tdrmapper): logger.info("There is no tRNA annotation to run TdrMapper.") return None out_file = op.join(work_dir, in_file + ".hq_cs.mapped") if not file_exists(out_file): with tx_tmpdir(data) as txdir: with utils.chdir(txdir): utils.symlink_plus(data["clean_fastq"], op.join(txdir, in_file)) cmd = ("perl {tdrmapper} {trna_ref} {in_file}").format(**locals()) do.run(cmd, "tRNA for %s" % name) for filename in glob.glob("*mapped*"): shutil.move(filename, work_dir) return out_file
def run(data): config = data[0][0]['config'] work_dir = dd.get_work_dir(data[0][0]) genome = dd.get_ref_file(data[0][0]) mirdeep2 = config_utils.get_program("miRDeep2.pl", config) mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data[0][0]))) species = dd.get_species(data[0][0]) hairpin = op.join(mirbase, "hairpin.fa") mature = op.join(mirbase, "mature.fa") bam_file = op.join(work_dir, "align", "seqs.bam") seqs_dir = op.join(work_dir, "seqcluster", "prepare") collapsed = op.join(seqs_dir, "seqs.ma") out_dir = op.join(work_dir, "mirdeep2") out_file = op.join(out_dir, "result_res.csv") safe_makedir(out_dir) with chdir(out_dir): collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir) cmd = ( "{mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -r simple -c -d -P -t {species} -z res" ).format(**locals()) if mirdeep2 and not file_exists(out_file) and file_exists(mature): do.run(cmd.format(**locals()), "Running mirdeep2.") _parse_novel(out_file)