Beispiel #1
0
def run(data):
    config = data[0][0]['config']
    work_dir = dd.get_work_dir(data[0][0])
    genome = dd.get_ref_file(data[0][0])
    mirdeep2 = os.path.join(os.path.dirname(sys.executable), "miRDeep2.pl")
    perl_exports = get_perl_exports()
    hairpin, mature, species = "none", "none", "na"
    rfam_file = dd.get_mirdeep2_file(data[0][0])
    if file_exists(dd.get_mirbase_hairpin(data[0][0])):
        species = dd.get_species(data[0][0])
        hairpin = dd.get_mirbase_hairpin(data[0][0])
        mature = dd.get_mirbase_mature(data[0][0])

    logger.debug("Preparing for mirdeep2 analysis.")
    bam_file = op.join(work_dir, "align", "seqs.bam")
    seqs_dir = op.join(work_dir, "seqcluster", "prepare")
    collapsed = op.join(seqs_dir, "seqs.ma")
    out_dir = op.join(work_dir, "mirdeep2")
    out_file = op.join(out_dir, "result_res.csv")
    safe_makedir(out_dir)
    with chdir(out_dir):
        collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir)
        cmd = ("{perl_exports} && perl {mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -f {rfam_file} -r simple -c -P -t {species} -z res").format(**locals())
        if file_exists(mirdeep2) and not file_exists(out_file) and file_exists(rfam_file):
            try:
                do.run(cmd.format(**locals()), "Running mirdeep2.")
            except:
                logger.warning("mirdeep2 failed. Please report the error to https://github.com/lpantano/mirdeep2_core/issues.")
        if file_exists(out_file):
            novel_db = _parse_novel(out_file, dd.get_species(data[0][0]))
            return novel_db
Beispiel #2
0
def sample_annotation(data):
    """
    Annotate miRNAs using miRBase database with seqbuster tool
    """
    names = data["rgnames"]['sample']
    tools = dd.get_expression_caller(data)
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    if dd.get_mirbase_hairpin(data):
        mirbase = op.abspath(op.dirname(dd.get_mirbase_hairpin(data)))
        data['seqbuster'] = _miraligner(data["collapse"], out_file,
                                        dd.get_species(data), mirbase,
                                        data['config'])
    else:
        logger.debug("No annotation file from miRBase.")

    sps = dd.get_species(data) if dd.get_species(data) else "None"
    logger.debug("Looking for mirdeep2 database for %s" % names)
    if file_exists(
            op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")):
        data['seqbuster_novel'] = _miraligner(
            data["collapse"], "%s_novel" % out_file, sps,
            op.join(dd.get_work_dir(data), "mirdeep2", "novel"),
            data['config'])

    if "trna" in tools:
        data['trna'] = _trna_annotation(data)

    data = spikein.counts_spikein(data)
    return [[data]]
Beispiel #3
0
def sample_annotation(data):
    """
    Annotate miRNAs using miRBase database with seqbuster tool
    """
    names = data["rgnames"]['sample']
    tools = dd.get_expression_caller(data)
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    if dd.get_mirbase_hairpin(data):
        mirbase = op.abspath(op.dirname(dd.get_mirbase_hairpin(data)))
        if utils.file_exists(data["collapse"]):
            data['transcriptome_bam'] = _align(data["collapse"], dd.get_mirbase_hairpin(data), out_file, data)
            data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config'])
        else:
            logger.debug("Trimmed collapsed file is empty for %s." % names)
    else:
        logger.debug("No annotation file from miRBase.")

    sps = dd.get_species(data) if dd.get_species(data) else "None"
    logger.debug("Looking for mirdeep2 database for %s" % names)
    if file_exists(op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")):
        data['seqbuster_novel'] = _miraligner(data["collapse"], "%s_novel" % out_file, sps,  op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config'])

    if "trna" in tools:
        data['trna'] = _mint_trna_annotation(data)

    data = spikein.counts_spikein(data)
    return [[data]]
Beispiel #4
0
def run(data):
    config = data[0][0]['config']
    work_dir = dd.get_work_dir(data[0][0])
    genome = dd.get_ref_file(data[0][0])
    mirdeep2 = os.path.join(os.path.dirname(sys.executable), "miRDeep2.pl")
    perl_exports = get_perl_exports()
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data[0][0])))
    species = dd.get_species(data[0][0])
    hairpin = op.join(mirbase, "hairpin.fa")
    mature = op.join(mirbase, "mature.fa")
    rfam_file = op.join(mirbase, "Rfam_for_miRDeep.fa")
    bam_file = op.join(work_dir, "align", "seqs.bam")
    seqs_dir = op.join(work_dir, "seqcluster", "prepare")
    collapsed = op.join(seqs_dir, "seqs.ma")
    out_dir = op.join(work_dir, "mirdeep2")
    out_file = op.join(out_dir, "result_res.csv")
    safe_makedir(out_dir)
    with chdir(out_dir):
        collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir)
        cmd = ("{perl_exports} && {mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -f {rfam_file} -r simple -c -d -P -t {species} -z res").format(**locals())
        if file_exists(mirdeep2) and not file_exists(out_file) and file_exists(mature) and file_exists(rfam_file):
            do.run(cmd.format(**locals()), "Running mirdeep2.")
        if file_exists(out_file):
            novel_db = _parse_novel(out_file, dd.get_species(data[0][0]))
            return novel_db
Beispiel #5
0
def sample_annotation(data):
    """
    Annotate miRNAs using miRBase database with seqbuster tool
    """
    names = data["rgnames"]['sample']
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    if not dd.get_mirbase_ref(data):
        raise ValueError(
            "There is no smallRNA genome data."
            "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name."
        )
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))

    data['seqbuster'] = _miraligner(data["collapse"], out_file,
                                    dd.get_species(data), mirbase,
                                    data['config'])
    if file_exists(
            op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")):
        data['seqbuster_novel'] = _miraligner(
            data["collapse"], "%s_novel" % out_file, dd.get_species(data),
            op.join(dd.get_work_dir(data), "mirdeep2", "novel"),
            data['config'])
    data['trna'] = _trna_annotation(data)
    return [[data]]
Beispiel #6
0
def sample_annotation(data):
    """
    Annotate miRNAs using miRBase database with seqbuster tool
    """
    names = data["rgnames"]['sample']
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    if dd.get_mirbase_hairpin(data):
        mirbase = op.abspath(op.dirname(dd.get_mirbase_hairpin(data)))
        data['seqbuster'] = _miraligner(data["collapse"], out_file,
                                        dd.get_species(data), mirbase,
                                        data['config'])

    sps = dd.get_species(data) if dd.get_species(data) else "None"
    if file_exists(
            op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")):
        data['seqbuster_novel'] = _miraligner(
            data["collapse"], "%s_novel" % out_file, sps,
            op.join(dd.get_work_dir(data), "mirdeep2", "novel"),
            data['config'])

    data['trna'] = _trna_annotation(data)
    return [[data]]
Beispiel #7
0
def run(data):
    """Proxy function to run the tool"""
    sample = data[0][0]
    work_dir = dd.get_work_dir(sample)
    out_dir = os.path.join(work_dir, "mirge")
    lib = _find_lib(sample)
    mirge = _find_mirge(sample)
    bowtie = _find_bowtie(sample)
    sps = dd.get_species(sample)
    species = SPS.get(sps, "")
    if not species:
        raise ValueError(
            "species not supported (hsa, mmu, rno, dre, cel, dme): %s" % sps)
    if not lib:
        raise ValueError(
            "-lib option is not set up in resources for mirge tool."
            " Read above warnings lines.")

    if not utils.file_exists(out_dir):
        with tx_tmpdir() as tmp_dir:
            sample_file = _create_sample_file(data, tmp_dir)
            do.run(_cmd().format(**locals()), "Running miRge2.0.")
            shutil.move(tmp_dir, out_dir)
    return [
        os.path.abspath(fn)
        for fn in glob.glob(os.path.join(out_dir, "*", "*"))
    ]
Beispiel #8
0
def sample_annotation(data):
    """
    Annotate miRNAs using miRBase database with seqbuster tool
    """
    names = data["rgnames"]['sample']
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    if dd.get_mirbase_hairpin(data):
        mirbase = op.abspath(op.dirname(dd.get_mirbase_hairpin(data)))
        data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config'])

    sps = dd.get_species(data) if dd.get_species(data) else "None"
    if file_exists(op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")):
        data['seqbuster_novel'] = _miraligner(data["collapse"], "%s_novel" % out_file, sps,  op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config'])

    data['trna'] = _trna_annotation(data)
    return [[data]]
Beispiel #9
0
def mirbase(data):
    names = data["rgnames"]['sample']
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))

    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))
    data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config'])
    return [[data]]
Beispiel #10
0
def run(data):
    config = data[0][0]['config']
    work_dir = dd.get_work_dir(data[0][0])
    genome = dd.get_ref_file(data[0][0])
    mirdeep2 = os.path.join(os.path.dirname(sys.executable), "miRDeep2.pl")
    perl_exports = get_perl_exports()
    hairpin, mature, species = "none", "none", "na"
    rfam_file = dd.get_mirdeep2_file(data[0][0])
    if file_exists(dd.get_mirbase_hairpin(data[0][0])):
        species = dd.get_species(data[0][0])
        hairpin = dd.get_mirbase_hairpin(data[0][0])
        mature = dd.get_mirbase_mature(data[0][0])

    logger.debug("Preparing for mirdeep2 analysis.")
    bam_file = op.join(work_dir, "align", "seqs.bam")
    seqs_dir = op.join(work_dir, "seqcluster", "prepare")
    collapsed = op.join(seqs_dir, "seqs.ma")
    out_dir = op.join(work_dir, "mirdeep2")
    out_file = op.join(out_dir, "result_res.csv")
    safe_makedir(out_dir)
    if not file_exists(rfam_file):
        logger.warning("mirdeep2 Rfam file not instaled. Skipping...")
        return None
    if not file_exists(mirdeep2):
        logger.warning("mirdeep2 executable file not found. Skipping...")
        return None
    with chdir(out_dir):
        collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir)
        cmd = (
            "{perl_exports} && perl {mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -f {rfam_file} -r simple -c -P -t {species} -z res"
        ).format(**locals())
        if not file_exists(out_file):
            try:
                do.run(cmd.format(**locals()), "Running mirdeep2.")
            except:
                logger.warning(
                    "mirdeep2 failed. Please report the error to https://github.com/lpantano/mirdeep2_core/issues."
                )
        if file_exists(out_file):
            novel_db = _parse_novel(out_file, dd.get_species(data[0][0]))
            return novel_db
Beispiel #11
0
def mirbase(data):
    names = data["rgnames"]['sample']
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))

    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))
    data['seqbuster'] = _miraligner(data["collapse"], out_file,
                                    dd.get_species(data), mirbase,
                                    data['config'])
    return [[data]]
Beispiel #12
0
def mirbase(data):
    names = data["rgnames"]['sample']
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    if not dd.get_mirbase_ref(data):
        raise ValueError("There is no smallRNA genome data."
                         "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name.")
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))

    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))
    data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config'])
    return [[data]]
Beispiel #13
0
def mirbase(data):
    names = data["rgnames"]['sample']
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    if not dd.get_mirbase_ref(data):
        raise ValueError(
            "There is no smallRNA genome data."
            "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name."
        )
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))

    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))
    data['seqbuster'] = _miraligner(data["collapse"], out_file,
                                    dd.get_species(data), mirbase,
                                    data['config'])
    return [[data]]
Beispiel #14
0
def sample_annotation(data):
    """
    Annotate miRNAs using miRBase database with seqbuster tool
    """
    names = data["rgnames"]['sample']
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    if not dd.get_mirbase_ref(data):
        raise ValueError("There is no smallRNA genome data."
                         "Please, run bcbio_nextgen.py upgrade -u skip --genome build_name.")
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data)))

    data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config'])
    if file_exists(op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")):
        data['seqbuster_novel'] = _miraligner(data["collapse"], "%s_novel" % out_file, dd.get_species(data), op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config'])
    data['trna'] = _trna_annotation(data)
    return [[data]]
Beispiel #15
0
def run(data):
    config = data[0][0]['config']
    work_dir = dd.get_work_dir(data[0][0])
    genome = dd.get_ref_file(data[0][0])
    mirdeep2 = config_utils.get_program("miRDeep2.pl", config)
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data[0][0])))
    species = dd.get_species(data[0][0])
    hairpin = op.join(mirbase, "hairpin.fa")
    mature = op.join(mirbase, "mature.fa")
    bam_file = op.join(work_dir, "align", "seqs.bam")
    seqs_dir = op.join(work_dir, "seqcluster", "prepare")
    collapsed = op.join(seqs_dir, "seqs.ma")
    out_dir = op.join(work_dir, "mirdeep2")
    out_file = op.join(out_dir, "result_res.csv")
    safe_makedir(out_dir)
    with chdir(out_dir):
        collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir)
        cmd = ("{mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -r simple -c -d -P -t {species} -z res").format(**locals())
        if mirdeep2 and not file_exists(out_file) and file_exists(mature):
            do.run(cmd.format(**locals()), "Running mirdeep2.")
        _parse_novel(out_file)
Beispiel #16
0
def run(data):
    """Proxy function to run the tool"""
    sample = data[0][0]
    work_dir = dd.get_work_dir(sample)
    out_dir = os.path.join(work_dir, "mirge")
    lib = _find_lib(sample)
    mirge = _find_mirge(sample)
    bowtie = _find_bowtie(sample)
    sps = dd.get_species(sample)
    species = SPS.get(sps, "")
    if not species:
        raise ValueError("species not supported (hsa, mmu, rno, dre, cel, dme): %s" % sps)
    if not lib:
        raise ValueError("-lib option is not set up in resources for mirge tool."
                         " Read above warnings lines.")

    if not utils.file_exists(out_dir):
        with tx_tmpdir() as tmp_dir:
            sample_file = _create_sample_file(data, tmp_dir)
            do.run(_cmd().format(**locals()), "Running miRge2.0.")
            shutil.move(tmp_dir, out_dir)
    return [os.path.abspath(fn) for fn in glob.glob(os.path.join(out_dir, "*", "*"))]
Beispiel #17
0
def run(data):
    config = data[0][0]['config']
    work_dir = dd.get_work_dir(data[0][0])
    genome = dd.get_ref_file(data[0][0])
    mirdeep2 = config_utils.get_program("miRDeep2.pl", config)
    mirbase = op.abspath(op.dirname(dd.get_mirbase_ref(data[0][0])))
    species = dd.get_species(data[0][0])
    hairpin = op.join(mirbase, "hairpin.fa")
    mature = op.join(mirbase, "mature.fa")
    bam_file = op.join(work_dir, "align", "seqs.bam")
    seqs_dir = op.join(work_dir, "seqcluster", "prepare")
    collapsed = op.join(seqs_dir, "seqs.ma")
    out_dir = op.join(work_dir, "mirdeep2")
    out_file = op.join(out_dir, "result_res.csv")
    safe_makedir(out_dir)
    with chdir(out_dir):
        collapsed, bam_file = _prepare_inputs(collapsed, bam_file, out_dir)
        cmd = (
            "{mirdeep2} {collapsed} {genome} {bam_file} {mature} none {hairpin} -r simple -c -d -P -t {species} -z res"
        ).format(**locals())
        if mirdeep2 and not file_exists(out_file) and file_exists(mature):
            do.run(cmd.format(**locals()), "Running mirdeep2.")
        _parse_novel(out_file)