Ejemplo n.º 1
0
def sample_annotation(data):
    """
    Annotate miRNAs using miRBase database with seqbuster tool
    """
    names = data["rgnames"]['sample']
    tools = dd.get_expression_caller(data)
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    if dd.get_mirbase_hairpin(data):
        mirbase = op.abspath(op.dirname(dd.get_mirbase_hairpin(data)))
        if utils.file_exists(data["collapse"]):
            data['transcriptome_bam'] = _align(data["collapse"], dd.get_mirbase_hairpin(data), out_file, data)
            data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config'])
        else:
            logger.debug("Trimmed collapsed file is empty for %s." % names)
    else:
        logger.debug("No annotation file from miRBase.")

    sps = dd.get_species(data) if dd.get_species(data) else "None"
    logger.debug("Looking for mirdeep2 database for %s" % names)
    if file_exists(op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")):
        data['seqbuster_novel'] = _miraligner(data["collapse"], "%s_novel" % out_file, sps,  op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config'])

    if "trna" in tools:
        data['trna'] = _mint_trna_annotation(data)

    data = spikein.counts_spikein(data)
    return [[data]]
Ejemplo n.º 2
0
def sample_annotation(data):
    """
    Annotate miRNAs using miRBase database with seqbuster tool
    """
    names = data["rgnames"]['sample']
    tools = dd.get_expression_caller(data)
    work_dir = os.path.join(dd.get_work_dir(data), "mirbase")
    out_dir = os.path.join(work_dir, names)
    utils.safe_makedir(out_dir)
    out_file = op.join(out_dir, names)
    if dd.get_mirbase_hairpin(data):
        mirbase = op.abspath(op.dirname(dd.get_mirbase_hairpin(data)))
        data['seqbuster'] = _miraligner(data["collapse"], out_file,
                                        dd.get_species(data), mirbase,
                                        data['config'])
    else:
        logger.debug("No annotation file from miRBase.")

    sps = dd.get_species(data) if dd.get_species(data) else "None"
    logger.debug("Looking for mirdeep2 database for %s" % names)
    if file_exists(
            op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")):
        data['seqbuster_novel'] = _miraligner(
            data["collapse"], "%s_novel" % out_file, sps,
            op.join(dd.get_work_dir(data), "mirdeep2", "novel"),
            data['config'])

    if "trna" in tools:
        data['trna'] = _trna_annotation(data)

    data = spikein.counts_spikein(data)
    return [[data]]
Ejemplo n.º 3
0
def generate_transcript_counts(data):
    """Generate counts per transcript and per exon from an alignment"""
    data["count_file"] = featureCounts.count(data)

    if dd.get_transcriptome_align(data):
        # to create a disambiguated transcriptome file realign with bowtie2
        if dd.get_disambiguate(data):
            logger.info("Aligning to the transcriptome with bowtie2 using the "
                        "disambiguated reads.")
            bam_path = data["work_bam"]
            fastq_paths = alignprep._bgzip_from_bam(
                bam_path,
                data["dirs"],
                data,
                is_retry=False,
                output_infix='-transcriptome')
            if len(fastq_paths) == 2:
                file1, file2 = fastq_paths
            else:
                file1, file2 = fastq_paths[0], None
            ref_file = dd.get_ref_file(data)
            data = bowtie2.align_transcriptome(file1, file2, ref_file, data)
        else:
            file1, file2 = dd.get_input_sequence_files(data)
        if not dd.get_transcriptome_bam(data):
            ref_file = dd.get_ref_file(data)
            logger.info(
                "Transcriptome alignment was flagged to run, but the "
                "transcriptome BAM file was not found. Aligning to the "
                "transcriptome with bowtie2.")
            data = bowtie2.align_transcriptome(file1, file2, ref_file, data)
    data = spikein.counts_spikein(data)
    return [[data]]
Ejemplo n.º 4
0
def generate_transcript_counts(data):
    """Generate counts per transcript and per exon from an alignment"""
    data["count_file"] = featureCounts.count(data)

    if dd.get_fusion_mode(data, False):
        oncofuse_file = oncofuse.run(data)
        if oncofuse_file:
            data = dd.set_oncofuse_file(data, oncofuse_file)

    if dd.get_transcriptome_align(data):
        # to create a disambiguated transcriptome file realign with bowtie2
        if dd.get_disambiguate(data):
            logger.info("Aligning to the transcriptome with bowtie2 using the "
                        "disambiguated reads.")
            bam_path = data["work_bam"]
            fastq_paths = alignprep._bgzip_from_bam(bam_path, data["dirs"], data["config"], is_retry=False, output_infix='-transcriptome')
            if len(fastq_paths) == 2:
                file1, file2 = fastq_paths
            else:
                file1, file2 = fastq_paths[0], None
            ref_file = dd.get_ref_file(data)
            data = bowtie2.align_transcriptome(file1, file2, ref_file, data)
        else:
            file1, file2 = dd.get_input_sequence_files(data)
        if not dd.get_transcriptome_bam(data):
            ref_file = dd.get_ref_file(data)
            logger.info("Transcriptome alignment was flagged to run, but the "
                        "transcriptome BAM file was not found. Aligning to the "
                        "transcriptome with bowtie2.")
            data = bowtie2.align_transcriptome(file1, file2, ref_file, data)
    data = spikein.counts_spikein(data)
    return [[data]]