def get(self, filename, name=None, title=None, prefix=None, image=False): if name is None: name = os.path.split(filename)[1] if prefix is None: prefix = self.file_prefix dest = self.workspace / (prefix + name) #Copy( # dest = dest, # source = filename, # ).make() io.symbolic_link(source=filename, link_name=dest) return self.href(dest, title, image)
def get(self, filename, name=None, title=None, prefix=None, image=False): if name is None: name = os.path.split(filename)[1] if prefix is None: prefix = self.file_prefix dest = self.workspace / (prefix+name) #Copy( # dest = dest, # source = filename, # ).make() io.symbolic_link(source=filename, link_name=dest) return self.href(dest, title, image)
def run(self): working_dirs = [ ] peaks_file = self.peaks_file for item in self.working_dirs: state_filename = os.path.join(item,'analyse-polya-batch.state') if not os.path.exists(state_filename): working_dirs.append(item) else: with open(state_filename,'rb') as f: state = pickle.load(f) for sample in state.samples: working_dirs.append(os.path.join(item,'samples',sample.output_dir)) if not peaks_file: peaks_file = os.path.join(self.pipeline_dir, "peaks", "relation-child.gff") sample_names = [ os.path.split(dirname)[1] for dirname in working_dirs ] workspaces = [ working_directory.Working(dirname, must_exist=True) for dirname in working_dirs ] workspace = self.get_workspace() with open(workspace/"index.html","wb") as f: web.emit(f, "igv.html", dict( SAMPLES = json.dumps(sample_names), HAVE_NORM = json.dumps(bool(self.norm_file)), TITLE = self.title, )) bams = [ item/"alignments_filtered_sorted.bam" for item in workspaces ] for i in xrange(len(sample_names)): io.symbolic_link(bams[i], workspace/(sample_names[i]+".bam")) io.symbolic_link(bams[i]+".bai", workspace/(sample_names[i]+".bam.bai")) io.symbolic_link(peaks_file, workspace/"peaks.gff") if self.norm_file: mults = io.read_grouped_table(self.norm_file)['All'] norm_mult = [ float(mults[name]['Normalizing.multiplier']) for name in sample_names ] with nesoni.Stage() as stage: Bam_to_bigwig(workspace/"total", bam_files=bams, what="ambiguity,span,3p,polyaspan,polya3p", ).process_make(stage) for i in xrange(len(sample_names)): for scale_desc, scale in \ [("raw",1.0)] + \ ([("norm",norm_mult[i])] if self.norm_file else []): Bam_to_bigwig( workspace/(sample_names[i]+"-"+scale_desc), bam_files=[bams[i]], what='span,3p,polyaspan,polya3p', scale=scale ).process_make(stage)
def run(self): #=============================================== # Sanity checks #=============================================== assert len(set([ item.output_dir for item in self.samples ])) == len(self.samples), "Duplicate sample name." all_inputs = [ ] for sample in self.samples: all_inputs.extend(sample.reads) assert len(set(all_inputs)) == len(all_inputs), "Duplicate read filename." assert len(set([ item.output_dir for item in self.tests ])) == len(self.tests), "Duplicate test name." for test in self.tests: assert not test.analysis, "analysis parameter for tests should not be set, will be filled in automatically" #=============================================== # Run pipeline #=============================================== names = [ sample.output_dir for sample in self.samples ] reference = reference_directory.Reference(self.reference, must_exist=True) workspace = io.Workspace(self.output_dir, must_exist=False) samplespace = io.Workspace(workspace/'samples', must_exist=False) expressionspace = io.Workspace(workspace/'expression', must_exist=False) testspace = io.Workspace(workspace/'test', must_exist=False) self._create_json() file_prefix = self.file_prefix if file_prefix and not file_prefix.endswith('-'): file_prefix += '-' samples = [ ] for sample in self.samples: samples.append(sample( samplespace / sample.output_dir, reference = self.reference, )) dirs = [ item.output_dir for item in samples ] clipper_logs = [ join(item.output_dir, 'clipped_reads_log.txt') for item in samples ] filter_logs = [ join(item.output_dir, 'filter_log.txt') for item in samples ] filter_polya_logs = [ join(item.output_dir + '-polyA', 'filter_log.txt') for item in samples ] analyse_template = tail_lengths.Analyse_tail_counts( working_dirs = dirs, extension = self.extension, annotations = reference/'reference.gff', types = self.types, parts = self.parts ) with nesoni.Stage() as stage: for item in samples: item.process_make(stage) job_gene_counts = analyse_template( output_dir = expressionspace/'genewise', extension = self.extension, title = 'Genewise expression - ' + self.title, file_prefix = file_prefix+'genewise-', ).make job_peaks = _call(self._run_peaks, workspace=workspace, expressionspace=expressionspace, reference=reference, dirs = dirs, analyse_template = analyse_template, file_prefix=file_prefix, ) job_norm = nesoni.Norm_from_samples( workspace/'norm', working_dirs = dirs ).make job_bigwig = bigwig.Polya_bigwigs( workspace/'bigwigs', working_dirs = dirs, norm_file = workspace/"norm.csv", peaks_file = workspace/("peaks", "relation-child.gff"), title = "IGV tracks - "+self.title ).make job_norm_bigwig = _call(_serial, job_norm, job_bigwig) job_utrs = tail_tools.Call_utrs( workspace/('peaks','primary-peak'), self.reference, self.output_dir, extension=self.extension ).make job_primpeak_counts = analyse_template( expressionspace/'primarypeakwise', annotations=workspace/('peaks','primary-peak-peaks.gff'), extension=0, types='peak', parts='peak', title='Primary-peakwise expression - ' + self.title, file_prefix=file_prefix+'primarypeakwise-', ).make job_primpeak = _call(_serial, job_utrs, job_primpeak_counts) job_peak_primpeak_bigwig = _call(_serial, job_peaks, _call(_parallel, job_norm_bigwig, job_primpeak)) job_count = _call(_parallel, job_gene_counts, job_peak_primpeak_bigwig) test_jobs = [ ] for test in self.tests: test_jobs.append(test( output_dir = testspace/test.output_dir, analysis = self.output_dir, ).make) job_test = _call(_parallel, *test_jobs) job_raw = self._extract_raw job_all = _call(_serial, job_count, _call(_parallel, job_raw, job_test)) job_all() #=============================================== # Report #=============================================== r = reporting.Reporter(workspace/'report', self.title, self.file_prefix, style=web.style()) io.symbolic_link(source=workspace/'bigwigs', link_name=r.workspace/'bigwigs') r.write('<div style="font-size: 150%; margin-top: 1em; margin-bottom: 1em;"><a href="bigwigs/index.html">→ Load tracks into IGV</a></div>') tail_tools.Shiny(workspace/('report','shiny'), self.output_dir, title=self.title, species=self.species).run() r.write('<div style="font-size: 150%; margin-top: 1em; margin-bottom: 1em;"><a href="shiny/" target="_blank">→ Interactive report (shiny)</a></div>') r.heading('Alignment to reference') r.report_logs('alignment-statistics', #[ workspace/'stats.txt' ] + clipper_logs + filter_logs + #filter_polya_logs + [ expressionspace/('genewise','aggregate-tail-counts_log.txt') ], filter=lambda sample, field: ( field not in [ 'fragments','fragments aligned to the reference','reads kept', 'average depth of coverage, ambiguous', 'average depth of coverage, unambiguous', ] ), ) r.heading('Genewise expression') r.p("This is based on all reads within each gene (possibly from multiple peaks, or decay products).") io.symbolic_link(source=expressionspace/('genewise','report'),link_name=r.workspace/'genewise') r.p('<a href="genewise/index.html">→ Genewise expression</a>') r.heading('Peakwise expression') r.p("This shows results from all called peaks.") peak_filename = expressionspace/('peakwise','features-with-data.gff') r.p(r.get(peak_filename, name='peaks.gff') + ' - peaks called') self._describe_peaks(r) io.symbolic_link(source=expressionspace/('peakwise','report'),link_name=r.workspace/'peakwise') r.p('<a href="peakwise/index.html">→ Peakwise expression</a>') r.subheading('Primary-peakwise expression') r.p("This is based on the most prominent peak in the 3'UTR for each gene. (Peak can be up to %d bases downstrand of the annotated 3'UTR end, but not inside another gene on the same strand.)" % self.extension) io.symbolic_link(source=expressionspace/('primarypeakwise','report'),link_name=r.workspace/'primarypeakwise') r.p('<a href="primarypeakwise/index.html">→ Primary-peakwise expression</a>') r.p(r.get(workspace/('peaks','primary-peak-peaks.gff')) + ' - primary peaks for each gene.') r.p(r.get(workspace/('peaks','primary-peak-utrs.gff')) + ' - 3\' UTR regions, based on primary peak call.') r.p(r.get(workspace/('peaks','primary-peak-genes.gff')) + ' - full extent of gene, based on primary peak call.') if self.tests: r.heading('Differential tests') for test in self.tests: io.symbolic_link(source=testspace/test.output_dir,link_name=r.workspace/('test-'+test.output_dir)) r.p('<a href="test-%s">→ %s</a> ' % (test.output_dir, test.get_title())) web.Geneview_webapp(r.workspace/'view').run() r.heading('Gene viewers') r.p('Having identified interesting genes from heatmaps and differential tests above, ' 'these viewers allow specific genes to be examined in detail.') if self.groups: r.get(workspace/('peak-shift','grouped.json')) r.p('<a href="view.html?json=%sgrouped.json">→ Gene viewer, grouped samples</a>' % r.file_prefix) r.get(workspace/('peak-shift','individual.json')) r.p('<a href="view.html?json=%sindividual.json">→ Gene viewer, individual samples</a>' % r.file_prefix) r.heading('Raw data') r.p(r.tar('csv-files',glob.glob(workspace/('raw','*.csv')))) r.write('<ul>\n') r.write('<li> -info.csv = gene name and product, etc\n') r.write('<li> -count.csv = read count\n') r.write('<li> -mlog2-RPM.csv = moderated log2 Reads Per Million\n') r.write('<li> -tail.csv = average poly(A) tail length\n') r.write('<li> -tail-count.csv = poly(A) read count\n') r.write('<li> -proportion.csv = proportion of reads with poly(A)\n') r.write('<li> -norm.csv = read count normalization used for log2 transformation, heatmaps, differential tests, etc etc\n') r.write('</ul>\n') r.p('This set of genes was used in the analysis:') r.p(r.get(reference/'reference.gff') + ' - Reference annotations in GFF3 format') r.p(r.get(reference/'utr.gff') + ' - 3\' UTR regions') r.p('<b>%d further bases 3\' extension was allowed</b> beyond the GFF files above (but not extending into the next gene on the same strand).' % self.extension) r.write('<p/><hr>\n') r.subheading('About normalization and log transformation') r.p('Counts are converted to ' 'log2 Reads Per Million using Anscombe\'s variance stabilizing transformation ' 'for the negative binomial distribution, implemented in ' 'R package "varistran".') r.write('<p/><hr>\n') r.p('Reference directory '+self.reference) r.p('Tail Tools version '+tail_tools.VERSION) r.p('Nesoni version '+nesoni.VERSION) r.close()
def run(self): context = self.get_context() with nesoni.Stage() as stage: for sample in context.samples: sample.process_make(stage) with nesoni.Stage() as stage: if context.variants: context.variants.process_make(stage) if context.expression: context.expression.process_make(stage) if self.igv_plots: plot_space = workspace.Workspace(context.space / 'plot', False) self.igv_plots( prefix=plot_space / ('plot'), genome=context.reference.get_genome_filename(), norm_file=context.space / ('expression', 'norm.csv') if context.expression else None, working_dirs=context.sample_dirs, ).make() # ================================================================================= # ================================================================================= # ================================================================================= reporter = reporting.Reporter(context.space / 'report', self.report_title, context.name) reporter.report_logs( 'alignment-statistics', [ sample.get_context().clip.log_filename() for sample in context.samples if sample.clip ] + [ sample.get_context().filter.log_filename() if not sample.count else sample.get_context().count.log_filename() for sample in context.samples if sample.filter or sample.count ], filter=lambda sample, field: field != 'fragments', ) if self.expression: io.symbolic_link(source=context.space / ('expression', 'report'), link_name=context.space / ('report', 'expression')) reporter.heading( '<a href="expression/index.html">> Expression analysis</a>') if self.variants: io.symbolic_link(source=context.space / ('variants', 'report'), link_name=context.space / ('report', 'variants')) reporter.heading( '<a href="variants/index.html">> Variants analysis</a>') if self.igv_plots: reporter.heading('IGV plots') reporter.p( 'These files show the depth of coverage. They can be viewed with the IGV genome browser.' ) genome_files = [] if self.include_genome: genome_filename = context.reference.get_genome_filename() genome_dir = context.reference.get_genome_dir() genome_files.append(genome_filename) if genome_dir: base = os.path.split(genome_dir)[1] for filename in os.listdir(genome_dir): genome_files.append( (os.path.join(genome_dir, filename), os.path.join(base, filename))) reporter.p( reporter.tar('igv-plots', genome_files + glob.glob(plot_space / '*.tdf'))) if self.include_bams: reporter.heading('BAM files') reporter.p( 'These BAM files contain the alignments of reads to the reference sequences.' ' They can also be viewed using IGV.') bam_files = [] for sample in self.samples: name = sample.output_dir bam_files.append( (context.space / ('samples', name, 'alignments_filtered_sorted.bam'), name + '.bam')) bam_files.append( (context.space / ('samples', name, 'alignments_filtered_sorted.bam.bai'), name + '.bam.bai')) reporter.p(reporter.tar('bam-files', bam_files)) reporter.write('<p/><hr/>\n') reporter.p('nesoni version ' + nesoni.VERSION) reporter.close()
def run(self): assert self.reference is not None, 'No reference directory given.' space = self.get_workspace() if self.analysis: nesoni.Power_variant_call( space / 'power', template__analysis=self.analysis, template__freebayes=self.freebayes, template__vcf_filter=self.vcf_filter, legacy=False, ).make() self.freebayes( space / 'variants-raw', samples=self.samples, ).make() self.vcf_filter( space / 'variants-filtered', space / 'variants-raw.vcf', ).make() filename = space / 'variants-filtered.vcf' if self.snpeff: self.snpeff(space / 'variants-filtered-annotated', self.reference, space / 'variants-filtered.vcf').make() filename = space / 'variants-filtered-annotated.vcf' io.symbolic_link(source=filename, link_name=space / 'variants.vcf') if os.path.exists(filename + '.idx'): io.symbolic_link(source=filename + '.idx', link_name=space / 'variants.vcf.idx') nesoni.Vcf_patch(space / 'patched', self.reference, space / 'variants.vcf').make() nesoni.Vcf_nway( space / 'net', space / 'variants.vcf', require='all', as_='splitstree', ).make() reporter = reporting.Reporter(space / 'report', 'Variants analysis') reporter.report_logs(None, [space / 'variants-filtered_log.txt'], renaming={ 'input': 'Found by freebayes', 'kept': 'Kept after quality filtering' }) reporter.p(reporter.get(filename)) if os.path.exists(filename + '.idx'): reporter.p( reporter.get(filename + '.idx') + ' (needed to view VCF file in IGV)') reporter.p(reporter.get(space / 'net.svg', title='Phylogenetic net')) if self.analysis: reporter.p( reporter.get(space / 'power_log.txt', title='Power report') + '<br/>(Test of the ability of the pipeline to call various variants at various depths of coverage and in the presence of errors, using synthetic reads.)' ) reporter.close()
def run(self): names = [ sample.output_dir for sample in self.samples ] #os.path.splitext(os.path.split(item)[1])[0] #for item in self.reads #] reference = reference_directory.Reference(self.reference, must_exist=True) workspace = io.Workspace(self.output_dir, must_exist=False) samplespace = io.Workspace(workspace/'samples', must_exist=False) plotspace = io.Workspace(workspace/'plots', must_exist=False) expressionspace = io.Workspace(workspace/'expression', must_exist=False) testspace = io.Workspace(workspace/'test', must_exist=False) testspace_dedup = io.Workspace(workspace/'test-dedup', must_exist=False) file_prefix = self.file_prefix if file_prefix and not file_prefix.endswith('-'): file_prefix += '-' #dirs = [ # workspace/item # for item in names #] samples = [ ] for sample in self.samples: samples.append(sample( samplespace / sample.output_dir, reference = self.reference, )) dirs = [ item.output_dir for item in samples ] polya_dirs = [ item + '-polyA' for item in dirs ] interleaved = [ item2 for item in zip(dirs,polya_dirs) for item2 in item ] clipper_logs = [ join(item.output_dir, 'clipped_reads_log.txt') for item in samples ] filter_logs = [ join(item.output_dir, 'filter_log.txt') for item in samples ] filter_polya_logs = [ join(item.output_dir + '-polyA', 'filter_log.txt') for item in samples ] #filter_logs = [ item.get_filter_action().log_filename() for item in samples ] #filter_polya_logs = [ item.get_polya_filter_action().log_filename() for item in samples ] analyse_template = tail_lengths.Analyse_tail_counts( working_dirs = dirs, saturation = 0, extension = self.extension, annotations = reference/'reference.gff', types = 'gene', ) with nesoni.Stage() as stage: for item in samples: item.process_make(stage) nesoni.Norm_from_samples( workspace/'norm', working_dirs = dirs ).make() def writer(): for row in io.read_table(workspace/'norm.csv'): row['Name'] = row['Name']+'-polyA' yield row io.write_csv(workspace/'norm-polyA.csv', writer(), comments=['Normalization']) with nesoni.Stage() as stage: if self.include_plots: for plot_name, directories, norm_filename in [ ('all', dirs, workspace/'norm.csv'), ('polyA', polya_dirs, workspace/'norm-polyA.csv'), ]: nesoni.IGV_plots( plotspace/plot_name, working_dirs = directories, label_prefix = plot_name+' ', raw = True, norm = True, genome = reference.get_genome_filename(), norm_file = norm_filename, #delete_igv = False, ).process_make(stage) analyse_gene_counts_0 = analyse_template( output_dir = expressionspace/'genewise', saturation = 0, extension = self.extension, title = 'Genewise expression - ' + self.title, file_prefix = file_prefix+'genewise-', ) analyse_gene_counts_0.process_make(stage) analyse_gene_counts_1 = analyse_template( output_dir = expressionspace/'genewise-dedup', saturation = 1, title = 'Genewise expression with read deduplication - ' + self.title, file_prefix = file_prefix+'genewise-dedup-', ) analyse_gene_counts_1.process_make(stage) stage.process(self._run_peaks, workspace=workspace, expressionspace=expressionspace, reference=reference, polya_dirs=polya_dirs, analyse_template=analyse_template, file_prefix=file_prefix, ) with nesoni.Stage() as stage: for test in self.tests: test( output_dir = testspace/test.output_dir, analysis = self.output_dir ).process_make(stage) test( output_dir = testspace_dedup/test.output_dir, analysis = self.output_dir, dedup = True, ).process_make(stage) #=============================================== # Report #=============================================== r = reporting.Reporter(os.path.join(self.output_dir, 'report'), self.title, self.file_prefix) r.heading('Alignment to reference') r.report_logs('alignment-statistics', #[ workspace/'stats.txt' ] + clipper_logs + filter_logs + #filter_polya_logs + [ expressionspace/('genewise','aggregate-tail-counts_log.txt') ], filter=lambda sample, field: ( field not in [ 'fragments','fragments aligned to the reference','reads kept', 'average depth of coverage, ambiguous', 'average depth of coverage, unambiguous', ] ), ) if self.include_plots: r.heading('IGV plots') r.p('These files show the depth of coverage. They can be viewed with the IGV genome browser.') genome_files = [ ] if self.include_genome: genome_files.append(reference.get_genome_filename()) genome_dir = reference.get_genome_dir() base = os.path.split(self.genome_dir)[1] for filename in os.listdir(genome_dir): genome_files.append(( os.path.join(genome_dir, filename), os.path.join(base, filename) )) r.p(r.tar('igv-plots', genome_files + glob.glob(plotspace/'*.tdf') )) if self.include_bams: r.heading('BAM files') r.p('These BAM files contain the alignments of reads to the reference sequences.') r.p('Reads with a poly(A) tail have an \'AN\' attribute giving the length of non-templated poly(A) sequence. ' 'Tail-tools only treats a read as having a tail if this length is at least 4.') bam_files = [ ] for name in names: bam_files.append( (samplespace/(name,'alignments_filtered_sorted.bam'),name+'.bam') ) bam_files.append( (samplespace/(name,'alignments_filtered_sorted.bam.bai'),name+'.bam.bai') ) r.p(r.tar('bam-files', bam_files)) r.heading('Genewise expression') io.symbolic_link(source=expressionspace/('genewise','report'),link_name=r.workspace/'genewise') r.p('<a href="genewise/index.html">→ Genewise expression</a>') io.symbolic_link(source=expressionspace/('genewise-dedup','report'),link_name=r.workspace/'genewise-dedup') r.p('<a href="genewise-dedup/index.html">→ Genewise expression with read deduplication</a>') r.heading('Peakwise expression') web.Geneview_webapp(r.workspace/'view').run() peak_filename = expressionspace/('peakwise','features-with-data.gff') n_peaks = len(list(annotation.read_annotations(peak_filename))) r.p('%d peaks called (%d poly(A) reads were required to call a peak).' % (n_peaks, self.peak_min_depth)) r.p(r.get(peak_filename, name='peaks.gff') + ' - peaks called') #if self.groups: #r.subheading('Peak shift between groups') #r.p(r.get(workspace/('peak-shift','grouped.csv')) + ' - genes with a potential peak shift') #r.get(workspace/('peak-shift','grouped.json')) #r.subheading('Peak shift between samples') #r.p(r.get(workspace/('peak-shift','individual.csv')) + ' - genes with a potential peak shift') #r.get(workspace/('peak-shift','individual.json')) io.symbolic_link(source=expressionspace/('peakwise','report'),link_name=r.workspace/'peakwise') r.p('<a href="peakwise/index.html">→ Peakwise expression</a>') io.symbolic_link(source=expressionspace/('peakwise-dedup','report'),link_name=r.workspace/'peakwise-dedup') r.p('<a href="peakwise-dedup/index.html">→ Peakwise expression with read deduplication</a>') if self.tests: r.heading('Differential tests') for test in self.tests: io.symbolic_link(source=testspace/test.output_dir,link_name=r.workspace/('test-'+test.output_dir)) io.symbolic_link(source=testspace_dedup/test.output_dir,link_name=r.workspace/('test-dedup-'+test.output_dir)) r.p('<a href="test-%s">→ %s</a> ' ' <a href="test-dedup-%s" style="font-size: 66%%">[→ Deduplicated version]</a>' % (test.output_dir, test.get_title(), test.output_dir)) r.heading('Gene viewers') r.p('Having identified interesting genes from heatmaps and differential tests above, ' 'these viewers allow specific genes to be examined in detail.') if self.groups: r.p('<a href="view.html?json=%sgrouped.json">→ Gene viewer, grouped samples</a>' % r.file_prefix) r.p('<a href="view.html?json=%sindividual.json">→ Gene viewer, individual samples</a>' % r.file_prefix) r.write('<p/><hr>\n') r.p('Note: Use deduplicated versions with care. ' 'They may possibly provide more significant results, however they are less quantitative. ' 'Read deduplication involves throwing away a large amount of data, much of which will not be a technical artifact. ' 'Deduplicated versions might best be viewed as a check on data quality.') r.p('This set of genes was used in the analysis:') r.p(r.get(reference/'reference.gff') + ' - Reference annotations in GFF3 format') r.p(r.get(reference/'utr.gff') + ' - 3\' UTR regions') r.p('tail-tools version '+tail_tools.VERSION) r.p('nesoni version '+nesoni.VERSION) #r.p('SHRiMP version '+grace.get_shrimp_2_version()) r.close()
def run(self): context = self.get_context() with nesoni.Stage() as stage: for sample in context.samples: sample.process_make(stage) with nesoni.Stage() as stage: if context.variants: context.variants.process_make(stage) if context.expression: context.expression.process_make(stage) if self.igv_plots: plot_space = workspace.Workspace(context.space/'plot',False) self.igv_plots( prefix = plot_space / ('plot'), genome = context.reference.get_genome_filename(), norm_file = context.space/('expression','norm.csv') if context.expression else None, working_dirs = context.sample_dirs, ).make() # ================================================================================= # ================================================================================= # ================================================================================= reporter = reporting.Reporter(context.space / 'report', self.report_title, context.name) reporter.report_logs('alignment-statistics', [ sample.get_context().clip.log_filename() for sample in context.samples if sample.clip ] + ([ sample.get_context().filter.log_filename() for sample in context.samples if sample.filter ] if not context.expression else [ ]) + ([ context.space/('expression','counts_log.txt') ] if context.expression else [ ]), filter=lambda sample,field: field != 'fragments', ) if self.expression: io.symbolic_link(source=context.space/('expression','report'),link_name=context.space/('report','expression')) reporter.heading('<a href="expression/index.html">> Expression analysis</a>') if self.variants: io.symbolic_link(source=context.space/('variants','report'),link_name=context.space/('report','variants')) reporter.heading('<a href="variants/index.html">> Variants analysis</a>') if self.igv_plots: reporter.heading('IGV plots') reporter.p('These files show the depth of coverage. They can be viewed with the IGV genome browser.') genome_files = [ ] if self.include_genome: genome_filename = context.reference.get_genome_filename() genome_dir = context.reference.get_genome_dir() genome_files.append(genome_filename) if genome_dir: base = os.path.split(genome_dir)[1] for filename in os.listdir(genome_dir): genome_files.append(( os.path.join(genome_dir, filename), os.path.join(base, filename) )) reporter.p(reporter.tar('igv-plots', genome_files + glob.glob(plot_space/'*.tdf') )) if self.include_bams: reporter.heading('BAM files') reporter.p('These BAM files contain the alignments of reads to the reference sequences.' ' They can also be viewed using IGV.') bam_files = [ ] for sample in self.samples: name = sample.output_dir bam_files.append( (context.space/('samples',name,'alignments_filtered_sorted.bam'),name+'.bam') ) bam_files.append( (context.space/('samples',name,'alignments_filtered_sorted.bam.bai'),name+'.bam.bai') ) reporter.p(reporter.tar('bam-files', bam_files)) reporter.write('<p/><hr/>\n') reporter.p('nesoni version '+nesoni.VERSION) reporter.close()
def run(self): assert self.reference is not None, 'No reference directory given.' space = self.get_workspace() if self.analysis: nesoni.Power_variant_call( space/'power', template__analysis = self.analysis, template__freebayes = self.freebayes, template__vcf_filter = self.vcf_filter, legacy = False, ).make() self.freebayes( space / 'variants-raw', samples=self.samples, ).make() self.vcf_filter( space / 'variants-filtered', space / 'variants-raw.vcf', ).make() filename = space/'variants-filtered.vcf' if self.snpeff: self.snpeff( space / 'variants-filtered-annotated', self.reference, space / 'variants-filtered.vcf' ).make() filename = space / 'variants-filtered-annotated.vcf' io.symbolic_link(source=filename, link_name=space / 'variants.vcf') if os.path.exists(filename+'.idx'): io.symbolic_link(source=filename+'.idx', link_name=space / 'variants.vcf.idx') nesoni.Vcf_patch( space / 'patched', self.reference, space / 'variants.vcf' ).make() nesoni.Vcf_nway( space / 'net', space / 'variants.vcf', require='all', as_='splitstree', ).make() reporter = reporting.Reporter(space / 'report', 'Variants analysis') reporter.report_logs(None, [ space / 'variants-filtered_log.txt' ], renaming = {'input':'Found by freebayes', 'kept':'Kept after quality filtering'}) reporter.p(reporter.get(filename)) if os.path.exists(filename+'.idx'): reporter.p(reporter.get(filename + '.idx') + ' (needed to view VCF file in IGV)') reporter.p(reporter.get(space / 'net.svg', title='Phylogenetic net')) if self.analysis: reporter.p(reporter.get(space / 'power_log.txt', title='Power report') + '<br/>(Test of the ability of the pipeline to call various variants at various depths of coverage and in the presence of errors, using synthetic reads.)' ) reporter.close()
def run(self): working_dirs = [] peaks_file = self.peaks_file for item in self.working_dirs: state_filename = os.path.join(item, 'analyse-polya-batch.state') if not os.path.exists(state_filename): working_dirs.append(item) else: with open(state_filename, 'rb') as f: state = pickle.load(f) for sample in state.samples: working_dirs.append( os.path.join(item, 'samples', sample.output_dir)) if not peaks_file: peaks_file = os.path.join(self.pipeline_dir, "peaks", "relation-child.gff") sample_names = [os.path.split(dirname)[1] for dirname in working_dirs] workspaces = [ working_directory.Working(dirname, must_exist=True) for dirname in working_dirs ] workspace = self.get_workspace() with open(workspace / "index.html", "wb") as f: web.emit( f, "igv.html", dict( SAMPLES=json.dumps(sample_names), HAVE_NORM=json.dumps(bool(self.norm_file)), TITLE=self.title, )) bams = [item / "alignments_filtered_sorted.bam" for item in workspaces] for i in xrange(len(sample_names)): io.symbolic_link(bams[i], workspace / (sample_names[i] + ".bam")) io.symbolic_link(bams[i] + ".bai", workspace / (sample_names[i] + ".bam.bai")) io.symbolic_link(peaks_file, workspace / "peaks.gff") if self.norm_file: mults = io.read_grouped_table(self.norm_file)['All'] norm_mult = [ float(mults[name]['Normalizing.multiplier']) for name in sample_names ] with nesoni.Stage() as stage: Bam_to_bigwig( workspace / "total", bam_files=bams, what="ambiguity,span,3p,polyaspan,polya3p", ).process_make(stage) for i in xrange(len(sample_names)): for scale_desc, scale in \ [("raw",1.0)] + \ ([("norm",norm_mult[i])] if self.norm_file else []): Bam_to_bigwig(workspace / (sample_names[i] + "-" + scale_desc), bam_files=[bams[i]], what='span,3p,polyaspan,polya3p', scale=scale).process_make(stage)