def sample_annotation(data): """ Annotate miRNAs using miRBase database with seqbuster tool """ names = data["rgnames"]['sample'] tools = dd.get_expression_caller(data) work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) if dd.get_mirbase_hairpin(data): mirbase = op.abspath(op.dirname(dd.get_mirbase_hairpin(data))) if utils.file_exists(data["collapse"]): data['transcriptome_bam'] = _align(data["collapse"], dd.get_mirbase_hairpin(data), out_file, data) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) else: logger.debug("Trimmed collapsed file is empty for %s." % names) else: logger.debug("No annotation file from miRBase.") sps = dd.get_species(data) if dd.get_species(data) else "None" logger.debug("Looking for mirdeep2 database for %s" % names) if file_exists(op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")): data['seqbuster_novel'] = _miraligner(data["collapse"], "%s_novel" % out_file, sps, op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config']) if "trna" in tools: data['trna'] = _mint_trna_annotation(data) data = spikein.counts_spikein(data) return [[data]]
def sample_annotation(data): """ Annotate miRNAs using miRBase database with seqbuster tool """ names = data["rgnames"]['sample'] tools = dd.get_expression_caller(data) work_dir = os.path.join(dd.get_work_dir(data), "mirbase") out_dir = os.path.join(work_dir, names) utils.safe_makedir(out_dir) out_file = op.join(out_dir, names) if dd.get_mirbase_hairpin(data): mirbase = op.abspath(op.dirname(dd.get_mirbase_hairpin(data))) data['seqbuster'] = _miraligner(data["collapse"], out_file, dd.get_species(data), mirbase, data['config']) else: logger.debug("No annotation file from miRBase.") sps = dd.get_species(data) if dd.get_species(data) else "None" logger.debug("Looking for mirdeep2 database for %s" % names) if file_exists( op.join(dd.get_work_dir(data), "mirdeep2", "novel", "hairpin.fa")): data['seqbuster_novel'] = _miraligner( data["collapse"], "%s_novel" % out_file, sps, op.join(dd.get_work_dir(data), "mirdeep2", "novel"), data['config']) if "trna" in tools: data['trna'] = _trna_annotation(data) data = spikein.counts_spikein(data) return [[data]]
def generate_transcript_counts(data): """Generate counts per transcript and per exon from an alignment""" data["count_file"] = featureCounts.count(data) if dd.get_transcriptome_align(data): # to create a disambiguated transcriptome file realign with bowtie2 if dd.get_disambiguate(data): logger.info("Aligning to the transcriptome with bowtie2 using the " "disambiguated reads.") bam_path = data["work_bam"] fastq_paths = alignprep._bgzip_from_bam( bam_path, data["dirs"], data, is_retry=False, output_infix='-transcriptome') if len(fastq_paths) == 2: file1, file2 = fastq_paths else: file1, file2 = fastq_paths[0], None ref_file = dd.get_ref_file(data) data = bowtie2.align_transcriptome(file1, file2, ref_file, data) else: file1, file2 = dd.get_input_sequence_files(data) if not dd.get_transcriptome_bam(data): ref_file = dd.get_ref_file(data) logger.info( "Transcriptome alignment was flagged to run, but the " "transcriptome BAM file was not found. Aligning to the " "transcriptome with bowtie2.") data = bowtie2.align_transcriptome(file1, file2, ref_file, data) data = spikein.counts_spikein(data) return [[data]]
def generate_transcript_counts(data): """Generate counts per transcript and per exon from an alignment""" data["count_file"] = featureCounts.count(data) if dd.get_fusion_mode(data, False): oncofuse_file = oncofuse.run(data) if oncofuse_file: data = dd.set_oncofuse_file(data, oncofuse_file) if dd.get_transcriptome_align(data): # to create a disambiguated transcriptome file realign with bowtie2 if dd.get_disambiguate(data): logger.info("Aligning to the transcriptome with bowtie2 using the " "disambiguated reads.") bam_path = data["work_bam"] fastq_paths = alignprep._bgzip_from_bam(bam_path, data["dirs"], data["config"], is_retry=False, output_infix='-transcriptome') if len(fastq_paths) == 2: file1, file2 = fastq_paths else: file1, file2 = fastq_paths[0], None ref_file = dd.get_ref_file(data) data = bowtie2.align_transcriptome(file1, file2, ref_file, data) else: file1, file2 = dd.get_input_sequence_files(data) if not dd.get_transcriptome_bam(data): ref_file = dd.get_ref_file(data) logger.info("Transcriptome alignment was flagged to run, but the " "transcriptome BAM file was not found. Aligning to the " "transcriptome with bowtie2.") data = bowtie2.align_transcriptome(file1, file2, ref_file, data) data = spikein.counts_spikein(data) return [[data]]