def __init__(self, working_dir, must_exist): workspace.Workspace.__init__(self, working_dir, must_exist) workspace.Workspace(self/'images', must_exist=False) workspace.Workspace(self/'config', must_exist=False) if must_exist: self.index = util.load(self/('config','index.pgz'))
def run(self): assert self.extension is not None, '--extension must be specified' assert self.annotations is not None, '--annotations must be specified' outspace = self.get_workspace() working = workspace.Workspace(outspace / 'working', must_exist=False) Find_peaks( working/'modes', filenames = self.samples, lap = self.lap, radius = self.radius, min_depth = self.min_depth, polya = self.polya, ).make() nesoni.Modify_features( working/'peaks', working/'modes.gff', shift_start = str(-self.peak_length), ).make() Filter_and_relate_peaks_to_genes( outspace/'relation', parent = self.annotations, child = working/'peaks.gff', extension = self.extension, min_tail = self.min_tail, ).make()
def get_context(self): assert self.reference is not None, 'reference not given.' context = Context() context.action = self context.space = self.get_workspace() context.name = context.space.name context.sample_space = workspace.Workspace(context.space / 'samples', False) context.reference = reference_directory.Reference(self.reference, True) for sample in self.samples: assert sample.reference is None, 'reference should not be specified within sample.' assert sample.output_dir, 'sample given without name.' assert os.path.sep not in sample.output_dir, 'sample name contains ' + os.path.sep context.samples = [ sample( output_dir=context.sample_space / sample.output_dir, reference=self.reference, ) for sample in self.samples ] context.sample_dirs = [item.output_dir for item in context.samples] if not self.variants: context.variants = None else: context.variants = self.variants( context.space / 'variants', self.reference, samples=context.sample_dirs, analysis=self.variants.analysis or self. template, #Use aligner from template unless aligner explicitly given ) if not self.expression: context.expression = None else: context.expression = self.expression( context.space / 'expression', samples=context.sample_dirs, ) return context
def run(self): work = self.get_workspace() config = work.get_config() work_coordinates = workspace.Workspace(work/'coordinates', must_exist=False) counts = [ [0]*len(config.labels) for i in xrange(len(work.index)) ] manual_counts = [ [0]*len(config.labels) for i in xrange(len(work.index)) ] for i, name in enumerate(work.index): print i, name seg = work.get_segmentation(i) calls = work.get_calls(i, True, True) manual_calls = work.get_calls(i, False, True) with open(work_coordinates/(name+'.csv'),'wb') as f: print >> f, 'x,y,call,manual_label' for j in xrange(len(seg.bounds)): print >> f, '%d,%d,%s,%s' % ( seg.bounds[j].x+seg.bounds[j].width//2, seg.bounds[j].y+seg.bounds[j].height//2, calls[j] or '', manual_calls[j] or '', ) for k, label in enumerate(config.labels): for item in calls: if item == label: counts[i][k] += 1 for item in manual_calls: if item == label: manual_counts[i][k] += 1 for filename,matrix in [ ('totals.csv', counts), ('manual_totals.csv', manual_counts), ]: with open(work/filename,'wb') as f: print >> f, 'image,' + ','.join(config.labels) for i, name in enumerate(work.index): print >> f, name+','+','.join(str(item) for item in matrix[i])
def run(self): context = self.get_context() with nesoni.Stage() as stage: for sample in context.samples: sample.process_make(stage) with nesoni.Stage() as stage: if context.variants: context.variants.process_make(stage) if context.expression: context.expression.process_make(stage) if self.igv_plots: plot_space = workspace.Workspace(context.space / 'plot', False) self.igv_plots( prefix=plot_space / ('plot'), genome=context.reference.get_genome_filename(), norm_file=context.space / ('expression', 'norm.csv') if context.expression else None, working_dirs=context.sample_dirs, ).make() # ================================================================================= # ================================================================================= # ================================================================================= reporter = reporting.Reporter(context.space / 'report', self.report_title, context.name) reporter.report_logs( 'alignment-statistics', [ sample.get_context().clip.log_filename() for sample in context.samples if sample.clip ] + [ sample.get_context().filter.log_filename() if not sample.count else sample.get_context().count.log_filename() for sample in context.samples if sample.filter or sample.count ], filter=lambda sample, field: field != 'fragments', ) if self.expression: io.symbolic_link(source=context.space / ('expression', 'report'), link_name=context.space / ('report', 'expression')) reporter.heading( '<a href="expression/index.html">> Expression analysis</a>') if self.variants: io.symbolic_link(source=context.space / ('variants', 'report'), link_name=context.space / ('report', 'variants')) reporter.heading( '<a href="variants/index.html">> Variants analysis</a>') if self.igv_plots: reporter.heading('IGV plots') reporter.p( 'These files show the depth of coverage. They can be viewed with the IGV genome browser.' ) genome_files = [] if self.include_genome: genome_filename = context.reference.get_genome_filename() genome_dir = context.reference.get_genome_dir() genome_files.append(genome_filename) if genome_dir: base = os.path.split(genome_dir)[1] for filename in os.listdir(genome_dir): genome_files.append( (os.path.join(genome_dir, filename), os.path.join(base, filename))) reporter.p( reporter.tar('igv-plots', genome_files + glob.glob(plot_space / '*.tdf'))) if self.include_bams: reporter.heading('BAM files') reporter.p( 'These BAM files contain the alignments of reads to the reference sequences.' ' They can also be viewed using IGV.') bam_files = [] for sample in self.samples: name = sample.output_dir bam_files.append( (context.space / ('samples', name, 'alignments_filtered_sorted.bam'), name + '.bam')) bam_files.append( (context.space / ('samples', name, 'alignments_filtered_sorted.bam.bai'), name + '.bam.bai')) reporter.p(reporter.tar('bam-files', bam_files)) reporter.write('<p/><hr/>\n') reporter.p('nesoni version ' + nesoni.VERSION) reporter.close()
def __init__(self, action): self.action = action self.work = action.get_workspace() self.ucsc = workspace.Workspace(self.work / 'ucsc') self.tables = {}
def run(self): assert self.extension is not None, '--extension must be specified' # Also allow simply the analyse-polya-batch directory working_dirs = [] for item in self.working_dirs: state_filename = os.path.join(item, 'analyse-polya-batch.state') if not os.path.exists(state_filename): working_dirs.append(item) else: with open(state_filename, 'rb') as f: state = pickle.load(f) for sample in state.samples: working_dirs.append( os.path.join(item, 'samples', sample.output_dir)) work = self.get_workspace() if self.reuse: pickle_workspace = workspace.Workspace( os.path.join(self.reuse, 'pickles')) else: pickle_workspace = workspace.Workspace(work / 'pickles') plot_workspace = workspace.Workspace(work / 'plots') pickle_filenames = [] file_prefix = self.file_prefix if file_prefix and not file_prefix.endswith('-'): file_prefix += '-' with nesoni.Stage() as stage: for dir in working_dirs: working = working_directory.Working(dir, must_exist=True) pickle_filenames.append(pickle_workspace / working.name + '.pickle.gz') if self.reuse: continue Tail_count( pickle_workspace / working.name, working_dir=dir, annotations=self.annotations, types=self.types, parts=self.parts, extension=self.extension, ).process_make(stage) assert len(set(pickle_filenames)) == len( pickle_filenames), "Duplicate sample name." with nesoni.Stage() as stage: Aggregate_tail_counts(output_dir=self.output_dir, pickles=pickle_filenames, tail=self.tail, adaptor=self.adaptor).process_make(stage) nesoni.Norm_from_counts( prefix=work / 'norm', counts_filename=work / 'counts.csv', ).make() similarity = nesoni.Similarity( prefix=plot_workspace / 'similarity', counts=work / 'counts.csv', ) plot_pooleds = [ Plot_pooled( prefix=plot_workspace / 'pooled-heatmap', aggregate=self.output_dir, #min_tails = min_tails, min_tails=1, top=100, ) #for min_tails in (20,50,100,200,500,1000,2000) ] #plot_comparisons = [ # Plot_comparison( # prefix = plot_workspace/('comparison-min-tails-%d-min-span-%.1f' % (min_tails,min_span)), # aggregate = self.output_dir, # min_tails = min_tails, # min_span = min_span, # ) # for min_tails in [50,100,200,500] # for min_span in [2,4,8,10,15,20,25,30] # ] # heatmaps = [ nesoni.Heatmap( prefix=plot_workspace / ('heatmap-min-fold-%.1f' % fold), counts=work / 'counts.csv', norm_file=work / 'norm.csv', min_span=math.log(fold) / math.log(2.0), ) for fold in [1.5, 2.0, 4.0, 6.0, 8.0, 10.0, 20.0, 30.0, 40.0] ] with nesoni.Stage() as stage: similarity.process_make(stage) for action in plot_pooleds + heatmaps: #+ plot_comparisons: action.process_make(stage) r = reporting.Reporter( work / 'report', self.title, file_prefix, style=web.style(), ) similarity.report(r) r.heading('Poly(A) tail length distribution') r.p('This plot shows the distribution of lengths of poly(A) tail sequence in top expressed features. ' 'Its main purpose is to assess data quality. ' 'If the plot has many bright spots there may be many identical reads, possibly due to non-random digestion.' ) r.p('Only reads with a poly(A) sequence of four or more bases are used.' ) for heatmap in plot_pooleds: r.report_heatmap(heatmap) r.heading('Heatmaps') r.p('Genes were selected based ' 'on there being at least some fold change difference between ' 'some pair of samples.') for heatmap in heatmaps: r.report_heatmap(heatmap) #r.heading('Average poly(A) tail length and its relation to expression levels') # #r.p( # 'Only reads with a poly(A) sequence of four or more bases was included in the averages.' # ) # #r.p( # 'Genes were selected based on there being at least a certain number of reads with poly(A) sequence in <i>each</i> sample (min-tails), ' # 'and on there being at least some amount of difference in average tail length between samples (min-span).' # ) # #for heatmap in plot_comparisons: # r.report_heatmap(heatmap) r.close()