Example #1
0
def sample_annotation(data):
    """
    Annotate miRNAs using miRBase database with seqbuster tool
    """
    names = data["rgnames"]['sample']
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    if not dd.get_mirbase_ref(data):
        raise ValueError(
            "There is no smallRNA genome data."
            "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name."
        )
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))

    data['seqbuster'] = _miraligner(data["collapse"], out_file,
                                    dd.get_species(data), mirbase,
                                    data['config'])
    if file_exists(
            op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")):
        data['seqbuster_novel'] = _miraligner(
            data["collapse"], "%s_novel" % out_file, dd.get_species(data),
            op.join(dd.get_work_dir(data), "mirdeep2", "novel"),
            data['config'])
    data['trna'] = _trna_annotation(data)
    return [[data]]
Example #2
0
def mirbase(data):
    names = data["rgnames"]['sample']
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))

    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))
    data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config'])
    return [[data]]
Example #3
0
def mirbase(data):
    names = data["rgnames"]['sample']
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))

    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))
    data['seqbuster'] = _miraligner(data["collapse"], out_file,
                                    dd.get_species(data), mirbase,
                                    data['config'])
    return [[data]]
Example #4
0
def mirbase(data):
    names = data["rgnames"]['sample']
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    if not dd.get_mirbase_ref(data):
        raise ValueError("There is no smallRNA genome data."
                         "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name.")
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))

    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))
    data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config'])
    return [[data]]
Example #5
0
def run(data):
    config = data[0][0]['config']
    work_dir = dd.get_work_dir(data[0][0])
    genome = dd.get_ref_file(data[0][0])
    mirdeep2 = os.path.join(os.path.dirname(sys.executable), "miRDeep2.pl")
    perl_exports = get_perl_exports()
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data[0][0])))
    species = dd.get_species(data[0][0])
    hairpin = op.join(mirbase, "hairpin.fa")
    mature = op.join(mirbase, "mature.fa")
    rfam_file = op.join(mirbase, "Rfam_for_miRDeep.fa")
    bam_file = op.join(work_dir, "align", "seqs.bam")
    seqs_dir = op.join(work_dir, "seqcluster", "prepare")
    collapsed = op.join(seqs_dir, "seqs.ma")
    out_dir = op.join(work_dir, "mirdeep2")
    out_file = op.join(out_dir, "result_res.csv")
    safe_makedir(out_dir)
    with chdir(out_dir):
        collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir)
        cmd = ("{perl_exports} && {mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -f {rfam_file} -r simple -c -d -P -t {species} -z res").format(**locals())
        if file_exists(mirdeep2) and not file_exists(out_file) and file_exists(mature) and file_exists(rfam_file):
            do.run(cmd.format(**locals()), "Running mirdeep2.")
        if file_exists(out_file):
            novel_db = _parse_novel(out_file, dd.get_species(data[0][0]))
            return novel_db
Example #6
0
def _trna_annotation(data):
    """
    use tDRmapper to quantify tRNAs
    """
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))
    trna_ref = op.join(mirbase, "trna_mature_pre.fa")
    name = dd.get_sample_name(data)
    work_dir = utils.safe_makedir(
        os.path.join(dd.get_work_dir(data), "trna", name))
    in_file = op.basename(data["clean_fastq"])
    tdrmapper = os.path.join(os.path.dirname(sys.executable),
                             "TdrMappingScripts.pl")
    if not file_exists(trna_ref) or not file_exists(tdrmapper):
        logger.info("There is no tRNA annotation to run TdrMapper.")
        return None
    out_file = op.join(work_dir, in_file + ".hq_cs.mapped")
    if not file_exists(out_file):
        with tx_tmpdir(data) as txdir:
            with utils.chdir(txdir):
                utils.symlink_plus(data["clean_fastq"],
                                   op.join(txdir, in_file))
                cmd = ("perl {tdrmapper} {trna_ref} {in_file}").format(
                    **locals())
                do.run(cmd, "tRNA for %s" % name)
                for filename in glob.glob("*mapped*"):
                    shutil.move(filename, work_dir)
    return out_file
Example #7
0
def mirbase(data):
    names = data["rgnames"]['sample']
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    if not dd.get_mirbase_ref(data):
        raise ValueError(
            "There is no smallRNA genome data."
            "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name."
        )
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))

    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))
    data['seqbuster'] = _miraligner(data["collapse"], out_file,
                                    dd.get_species(data), mirbase,
                                    data['config'])
    return [[data]]
Example #8
0
def sample_annotation(data):
    """
    Annotate miRNAs using miRBase database with seqbuster tool
    """
    names = data["rgnames"]['sample']
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    if not dd.get_mirbase_ref(data):
        raise ValueError("There is no smallRNA genome data."
                         "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name.")
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))

    data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config'])
    if file_exists(op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")):
        data['seqbuster_novel'] = _miraligner(data["collapse"], "%s_novel" % out_file, dd.get_species(data), op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config'])
    data['trna'] = _trna_annotation(data)
    return [[data]]
Example #9
0
def run(data):
    config = data[0][0]['config']
    work_dir = dd.get_work_dir(data[0][0])
    genome = dd.get_ref_file(data[0][0])
    mirdeep2 = config_utils.get_program("miRDeep2.pl", config)
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data[0][0])))
    species = dd.get_species(data[0][0])
    hairpin = op.join(mirbase, "hairpin.fa")
    mature = op.join(mirbase, "mature.fa")
    bam_file = op.join(work_dir, "align", "seqs.bam")
    seqs_dir = op.join(work_dir, "seqcluster", "prepare")
    collapsed = op.join(seqs_dir, "seqs.ma")
    out_dir = op.join(work_dir, "mirdeep2")
    out_file = op.join(out_dir, "result_res.csv")
    safe_makedir(out_dir)
    with chdir(out_dir):
        collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir)
        cmd = ("{mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -r simple -c -d -P -t {species} -z res").format(**locals())
        if mirdeep2 and not file_exists(out_file) and file_exists(mature):
            do.run(cmd.format(**locals()), "Running mirdeep2.")
        _parse_novel(out_file)
Example #10
0
def _trna_annotation(data):
    """
    use tDRmapper to quantify tRNAs
    """
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))
    trna_ref = op.join(mirbase, "trna_mature_pre.fa")
    name = dd.get_sample_name(data)
    work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "trna", name))
    in_file = op.basename(data["clean_fastq"])
    tdrmapper = os.path.join(os.path.dirname(sys.executable), "TdrMappingScripts.pl")
    if not file_exists(trna_ref) or not file_exists(tdrmapper):
        logger.info("There is no tRNA annotation to run TdrMapper.")
        return None
    out_file = op.join(work_dir, in_file + ".hq_cs.mapped")
    if not file_exists(out_file):
        with tx_tmpdir(data) as txdir:
            with utils.chdir(txdir):
                utils.symlink_plus(data["clean_fastq"], op.join(txdir, in_file))
                cmd = ("perl {tdrmapper} {trna_ref} {in_file}").format(**locals())
                do.run(cmd, "tRNA for %s" % name)
                for filename in glob.glob("*mapped*"):
                    shutil.move(filename, work_dir)
    return out_file
Example #11
0
def run(data):
    config = data[0][0]['config']
    work_dir = dd.get_work_dir(data[0][0])
    genome = dd.get_ref_file(data[0][0])
    mirdeep2 = config_utils.get_program("miRDeep2.pl", config)
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data[0][0])))
    species = dd.get_species(data[0][0])
    hairpin = op.join(mirbase, "hairpin.fa")
    mature = op.join(mirbase, "mature.fa")
    bam_file = op.join(work_dir, "align", "seqs.bam")
    seqs_dir = op.join(work_dir, "seqcluster", "prepare")
    collapsed = op.join(seqs_dir, "seqs.ma")
    out_dir = op.join(work_dir, "mirdeep2")
    out_file = op.join(out_dir, "result_res.csv")
    safe_makedir(out_dir)
    with chdir(out_dir):
        collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir)
        cmd = (
            "{mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -r simple -c -d -P -t {species} -z res"
        ).format(**locals())
        if mirdeep2 and not file_exists(out_file) and file_exists(mature):
            do.run(cmd.format(**locals()), "Running mirdeep2.")
        _parse_novel(out_file)