def __init__(self, working_dir, must_exist):
     workspace.Workspace.__init__(self, working_dir, must_exist)
     
     workspace.Workspace(self/'images', must_exist=False)
     workspace.Workspace(self/'config', must_exist=False)
     
     if must_exist:        
         self.index = util.load(self/('config','index.pgz'))
Example #2
0
 def run(self):
     assert self.extension is not None, '--extension must be specified'
     assert self.annotations is not None, '--annotations must be specified'
 
     outspace = self.get_workspace()
     working = workspace.Workspace(outspace / 'working', must_exist=False)
     
     Find_peaks(
         working/'modes',
         filenames = self.samples,
         lap = self.lap,
         radius = self.radius,
         min_depth = self.min_depth,
         polya = self.polya,
         ).make()
     
     nesoni.Modify_features(
         working/'peaks',
         working/'modes.gff',
         shift_start = str(-self.peak_length),
         ).make()
     
     Filter_and_relate_peaks_to_genes(
         outspace/'relation',
         parent = self.annotations,
         child = working/'peaks.gff',
         extension = self.extension,
         min_tail = self.min_tail,
         ).make()
Example #3
0
    def get_context(self):
        assert self.reference is not None, 'reference not given.'

        context = Context()
        context.action = self
        context.space = self.get_workspace()
        context.name = context.space.name
        context.sample_space = workspace.Workspace(context.space / 'samples',
                                                   False)
        context.reference = reference_directory.Reference(self.reference, True)

        for sample in self.samples:
            assert sample.reference is None, 'reference should not be specified within sample.'
            assert sample.output_dir, 'sample given without name.'
            assert os.path.sep not in sample.output_dir, 'sample name contains ' + os.path.sep
        context.samples = [
            sample(
                output_dir=context.sample_space / sample.output_dir,
                reference=self.reference,
            ) for sample in self.samples
        ]
        context.sample_dirs = [item.output_dir for item in context.samples]

        if not self.variants:
            context.variants = None
        else:
            context.variants = self.variants(
                context.space / 'variants',
                self.reference,
                samples=context.sample_dirs,
                analysis=self.variants.analysis or self.
                template,  #Use aligner from template unless aligner explicitly given
            )

        if not self.expression:
            context.expression = None
        else:
            context.expression = self.expression(
                context.space / 'expression',
                samples=context.sample_dirs,
            )

        return context
Example #4
0
    def run(self):
        work = self.get_workspace()
        config = work.get_config()
        
        work_coordinates = workspace.Workspace(work/'coordinates', must_exist=False)

        counts = [ [0]*len(config.labels) for i in xrange(len(work.index)) ]
        manual_counts = [ [0]*len(config.labels) for i in xrange(len(work.index)) ]
        
        for i, name in enumerate(work.index):
            print i, name
            
            seg = work.get_segmentation(i)
            calls = work.get_calls(i, True, True)
            manual_calls = work.get_calls(i, False, True)
            
            with open(work_coordinates/(name+'.csv'),'wb') as f:
                print >> f, 'x,y,call,manual_label'
                for j in xrange(len(seg.bounds)):
                    print >> f, '%d,%d,%s,%s' % (
                        seg.bounds[j].x+seg.bounds[j].width//2,
                        seg.bounds[j].y+seg.bounds[j].height//2,
                        calls[j] or '',
                        manual_calls[j] or '',
                        )

            for k, label in enumerate(config.labels):
                for item in calls:
                    if item == label:
                        counts[i][k] += 1
                for item in manual_calls:
                    if item == label:
                        manual_counts[i][k] += 1
        
        for filename,matrix in [
            ('totals.csv', counts),
            ('manual_totals.csv', manual_counts),
            ]:
            with open(work/filename,'wb') as f:
                print >> f, 'image,' + ','.join(config.labels)
                for i, name in enumerate(work.index):
                    print >> f, name+','+','.join(str(item) for item in matrix[i])
Example #5
0
    def run(self):
        context = self.get_context()

        with nesoni.Stage() as stage:
            for sample in context.samples:
                sample.process_make(stage)

        with nesoni.Stage() as stage:
            if context.variants:
                context.variants.process_make(stage)

            if context.expression:
                context.expression.process_make(stage)

        if self.igv_plots:
            plot_space = workspace.Workspace(context.space / 'plot', False)
            self.igv_plots(
                prefix=plot_space / ('plot'),
                genome=context.reference.get_genome_filename(),
                norm_file=context.space /
                ('expression', 'norm.csv') if context.expression else None,
                working_dirs=context.sample_dirs,
            ).make()

        # =================================================================================
        # =================================================================================
        # =================================================================================

        reporter = reporting.Reporter(context.space / 'report',
                                      self.report_title, context.name)

        reporter.report_logs(
            'alignment-statistics',
            [
                sample.get_context().clip.log_filename()
                for sample in context.samples if sample.clip
            ] + [
                sample.get_context().filter.log_filename() if not sample.count
                else sample.get_context().count.log_filename()
                for sample in context.samples if sample.filter or sample.count
            ],
            filter=lambda sample, field: field != 'fragments',
        )

        if self.expression:
            io.symbolic_link(source=context.space / ('expression', 'report'),
                             link_name=context.space /
                             ('report', 'expression'))
            reporter.heading(
                '<a href="expression/index.html">&gt; Expression analysis</a>')

        if self.variants:
            io.symbolic_link(source=context.space / ('variants', 'report'),
                             link_name=context.space / ('report', 'variants'))
            reporter.heading(
                '<a href="variants/index.html">&gt; Variants analysis</a>')

        if self.igv_plots:
            reporter.heading('IGV plots')
            reporter.p(
                'These files show the depth of coverage. They can be viewed with the IGV genome browser.'
            )

            genome_files = []
            if self.include_genome:
                genome_filename = context.reference.get_genome_filename()
                genome_dir = context.reference.get_genome_dir()
                genome_files.append(genome_filename)
                if genome_dir:
                    base = os.path.split(genome_dir)[1]
                    for filename in os.listdir(genome_dir):
                        genome_files.append(
                            (os.path.join(genome_dir, filename),
                             os.path.join(base, filename)))

            reporter.p(
                reporter.tar('igv-plots',
                             genome_files + glob.glob(plot_space / '*.tdf')))

        if self.include_bams:
            reporter.heading('BAM files')

            reporter.p(
                'These BAM files contain the alignments of reads to the reference sequences.'
                ' They can also be viewed using IGV.')

            bam_files = []
            for sample in self.samples:
                name = sample.output_dir
                bam_files.append(
                    (context.space /
                     ('samples', name, 'alignments_filtered_sorted.bam'),
                     name + '.bam'))
                bam_files.append(
                    (context.space /
                     ('samples', name, 'alignments_filtered_sorted.bam.bai'),
                     name + '.bam.bai'))
            reporter.p(reporter.tar('bam-files', bam_files))

        reporter.write('<p/><hr/>\n')
        reporter.p('nesoni version ' + nesoni.VERSION)
        reporter.close()
 def __init__(self, action):
     self.action = action
     self.work = action.get_workspace()
     self.ucsc = workspace.Workspace(self.work / 'ucsc')
     self.tables = {}
Example #7
0
    def run(self):
        assert self.extension is not None, '--extension must be specified'

        # Also allow simply the analyse-polya-batch directory
        working_dirs = []
        for item in self.working_dirs:
            state_filename = os.path.join(item, 'analyse-polya-batch.state')
            if not os.path.exists(state_filename):
                working_dirs.append(item)
            else:
                with open(state_filename, 'rb') as f:
                    state = pickle.load(f)

                for sample in state.samples:
                    working_dirs.append(
                        os.path.join(item, 'samples', sample.output_dir))

        work = self.get_workspace()

        if self.reuse:
            pickle_workspace = workspace.Workspace(
                os.path.join(self.reuse, 'pickles'))
        else:
            pickle_workspace = workspace.Workspace(work / 'pickles')
        plot_workspace = workspace.Workspace(work / 'plots')

        pickle_filenames = []

        file_prefix = self.file_prefix
        if file_prefix and not file_prefix.endswith('-'):
            file_prefix += '-'

        with nesoni.Stage() as stage:
            for dir in working_dirs:
                working = working_directory.Working(dir, must_exist=True)
                pickle_filenames.append(pickle_workspace / working.name +
                                        '.pickle.gz')
                if self.reuse: continue
                Tail_count(
                    pickle_workspace / working.name,
                    working_dir=dir,
                    annotations=self.annotations,
                    types=self.types,
                    parts=self.parts,
                    extension=self.extension,
                ).process_make(stage)

        assert len(set(pickle_filenames)) == len(
            pickle_filenames), "Duplicate sample name."

        with nesoni.Stage() as stage:
            Aggregate_tail_counts(output_dir=self.output_dir,
                                  pickles=pickle_filenames,
                                  tail=self.tail,
                                  adaptor=self.adaptor).process_make(stage)

        nesoni.Norm_from_counts(
            prefix=work / 'norm',
            counts_filename=work / 'counts.csv',
        ).make()

        similarity = nesoni.Similarity(
            prefix=plot_workspace / 'similarity',
            counts=work / 'counts.csv',
        )

        plot_pooleds = [
            Plot_pooled(
                prefix=plot_workspace / 'pooled-heatmap',
                aggregate=self.output_dir,
                #min_tails = min_tails,
                min_tails=1,
                top=100,
            )
            #for min_tails in (20,50,100,200,500,1000,2000)
        ]

        #plot_comparisons = [
        #    Plot_comparison(
        #        prefix = plot_workspace/('comparison-min-tails-%d-min-span-%.1f' % (min_tails,min_span)),
        #        aggregate = self.output_dir,
        #        min_tails = min_tails,
        #        min_span = min_span,
        #        )
        #    for min_tails in [50,100,200,500]
        #    for min_span in [2,4,8,10,15,20,25,30]
        #    ]
        #
        heatmaps = [
            nesoni.Heatmap(
                prefix=plot_workspace / ('heatmap-min-fold-%.1f' % fold),
                counts=work / 'counts.csv',
                norm_file=work / 'norm.csv',
                min_span=math.log(fold) / math.log(2.0),
            ) for fold in [1.5, 2.0, 4.0, 6.0, 8.0, 10.0, 20.0, 30.0, 40.0]
        ]

        with nesoni.Stage() as stage:
            similarity.process_make(stage)
            for action in plot_pooleds + heatmaps:  #+ plot_comparisons:
                action.process_make(stage)

        r = reporting.Reporter(
            work / 'report',
            self.title,
            file_prefix,
            style=web.style(),
        )

        similarity.report(r)

        r.heading('Poly(A) tail length distribution')

        r.p('This plot shows the distribution of lengths of poly(A) tail sequence in top expressed features. '
            'Its main purpose is to assess data quality. '
            'If the plot has many bright spots there may be many identical reads, possibly due to non-random digestion.'
            )

        r.p('Only reads with a poly(A) sequence of four or more bases are used.'
            )

        for heatmap in plot_pooleds:
            r.report_heatmap(heatmap)

        r.heading('Heatmaps')

        r.p('Genes were selected based '
            'on there being at least some fold change difference between '
            'some pair of samples.')

        for heatmap in heatmaps:
            r.report_heatmap(heatmap)

        #r.heading('Average poly(A) tail length and its relation to expression levels')
        #
        #r.p(
        #    'Only reads with a poly(A) sequence of four or more bases was included in the averages.'
        #    )
        #
        #r.p(
        #    'Genes were selected based on there being at least a certain number of reads with poly(A) sequence in <i>each</i> sample (min-tails), '
        #    'and on there being at least some amount of difference in average tail length between samples (min-span).'
        #    )
        #
        #for heatmap in plot_comparisons:
        #    r.report_heatmap(heatmap)

        r.close()