예제 #1
0
 def run(self):
     sequences = [ ]
     annotations = [ ]
     for filename in self.filenames:
         any = False
         if io.is_sequence_file(filename):
             sequences.append(filename)
             any = True
         if annotation.is_annotation_file(filename):
             annotations.append(filename)
             any = True            
         if not any:
             raise grace.Error(filename + ' is neither a sequence file nor an annotation file that nesoni can read.')
     
     if not sequences:
         assert not annotations, 'Annotations given without any reference sequences.'
         reference = Reference(self.output_dir, must_exist=True)        
     else:
         reference = Reference(self.output_dir, must_exist=False)        
         reference.set_sequences(sequences)
         reference.set_annotations(annotations)
     
     with legion.Stage() as stage:
         if self.genome:
             stage.process(reference.build_genome, self.genome_select)
         if config.apply_ifavailable_program(self.bowtie, 'bowtie2-build'):
             stage.process(reference.build_bowtie_index)
         if config.apply_ifavailable_program(self.ls, 'gmapper-ls'):
             stage.process(reference.build_shrimp_mmap, False)
         if config.apply_ifavailable_program(self.cs, 'gmapper-cs'):
             stage.process(reference.build_shrimp_mmap, True)
         if config.apply_ifavailable_jar(self.snpeff, 'snpEff.jar'):
             stage.process(reference.build_snpeff)
예제 #2
0
 def run(self):
     sequences = [ ]
     annotations = [ ]
     for filename in self.filenames:
         any = False
         if io.is_sequence_file(filename):
             sequences.append(filename)
             any = True
         if annotation.is_annotation_file(filename):
             annotations.append(filename)
             any = True            
         if not any:
             raise grace.Error(filename + ' is neither a sequence file nor an annotation file that nesoni can read.')
     
     if not sequences:
         assert not annotations, 'Annotations given without any reference sequences.'
         reference = Reference(self.output_dir, must_exist=True)        
     else:
         reference = Reference(self.output_dir, must_exist=False)        
         reference.set_sequences(sequences)
         reference.set_annotations(annotations)
     
     with open(self.log_filename(),'wb') as f:
         if self.ls:
             reference.build_shrimp_mmap(False, f)
         if self.cs:
             reference.build_shrimp_mmap(True, f)
         if self.bowtie:
             reference.build_bowtie_index(f)
         if self.genome:
             reference.build_genome(self.genome_select)
         if self.snpeff:
             reference.build_snpeff()
예제 #3
0
    def run(self):
        base = os.path.split(self.prefix)[1]
        
        annotations = [ ]
        sequences = [ ]
        
        for filename in self.filenames:
            any = False
            if io.is_sequence_file(filename):
                sequences.append(filename)
                any = True
            if annotation.is_annotation_file(filename):
                annotations.append(filename)
                any = True
            assert any, 'File is neither a recognized sequence or annotation file'

        cytoband_filename = os.path.join(self.prefix,base+'_cytoband.txt')
        property_filename = os.path.join(self.prefix,'property.txt')
        gff_filename = os.path.join(self.prefix,base+'.gff')
        output_filenames = [ cytoband_filename, property_filename, gff_filename ] 

        if not os.path.exists(self.prefix):
            os.mkdir(self.prefix)
            
        f = open(property_filename,'wb')
        print >> f, 'ordered=true'
        print >> f, 'id=%s' % base
        print >> f, 'name=%s' % (self.name or base)
        print >> f, 'cytobandFile=%s_cytoband.txt' % base
        print >> f, 'geneFile=%s.gff' % base
        print >> f, 'sequenceLocation=%s' % base
        f.close()
        
        trivia.As_gff(output=gff_filename,
               filenames=annotations,
               exclude=[ 'gene', 'source' ]
        ).run()
        
        f_cyt = open(cytoband_filename,'wb')
        for filename in sequences:
            for name, seq in io.read_sequences(filename):
                assert '/' not in name
                f = open(os.path.join(self.prefix, name + '.txt'), 'wb')
                f.write(seq)
                f.close()
                print >> f_cyt, '%s\t0\t%d' % (name, len(seq))
        f_cyt.close()
        
        genome_filename = self.prefix + '.genome'
        if os.path.exists(genome_filename):
            os.unlink(genome_filename)
        io.execute(
            ['zip', '-j', io.abspath(genome_filename)] +
            [ io.abspath(item) for item in output_filenames ]
        )
        for filename in output_filenames:
            if os.path.exists(filename):
                os.unlink(filename)
예제 #4
0
 def run(self):
     sequences = [ ]
     annotations = [ ]
     for filename in self.filenames:
         any = False
         if io.is_sequence_file(filename):
             sequences.append(filename)
             any = True
         if annotation.is_annotation_file(filename):
             annotations.append(filename)
             any = True            
         if not any:
             raise grace.Error(filename + ' is neither a sequence file nor an annotation file that nesoni can read.')
     
     reference = Reference(self.output_dir, must_exist=False)        
     reference.set_sequences(sequences)
     reference.set_annotations(annotations)
     if self.ls:
         reference.build_shrimp_mmap(False)
     if self.cs:
         reference.build_shrimp_mmap(True)
예제 #5
0
    def run(self):
        sequences = []
        annotations = []
        for filename in self.filenames:
            any = False
            if io.is_sequence_file(filename):
                sequences.append(filename)
                any = True
            if annotation.is_annotation_file(filename):
                annotations.append(filename)
                any = True
            if not any:
                raise grace.Error(
                    filename +
                    ' is neither a sequence file nor an annotation file that nesoni can read.'
                )

        reference = Reference(self.output_dir, must_exist=False)
        reference.set_sequences(sequences)
        reference.set_annotations(annotations)
        if self.ls:
            reference.build_shrimp_mmap(False)
        if self.cs:
            reference.build_shrimp_mmap(True)
예제 #6
0
    def run(self):
        bams = [ ]
        reference = None
        reference2 = None
        
        extra = [ ]
        
        for sample in self.samples:
            if sam.is_bam(sample):
                bams.append(sample)
            elif os.path.isdir(sample):
                working = working_directory.Working(sample,True)
                bams.append( working.get_filtered_sorted_bam() )
                extra.append( '##sampleTags=' + ','.join(working.get_tags()) )
                if reference2 is None:
                    reference2 = working.get_reference().reference_fasta_filename()
            elif io.is_sequence_file(sample):
                assert reference is None, 'Only one reference FASTA file allowed.'
                reference = sample
        
        if reference is None:
            reference = reference2
        if reference is None:
            raise grace.Error('No reference FASTA file given.')
        
        with nesoni.Stage() as stage:
            tempspace = stage.enter( workspace.tempspace() )
            if self.depth_limit:
                with nesoni.Stage() as stage2:
                    for i in xrange(len(bams)):
                        sam.Bam_depth_limit(
                            tempspace/('%d'%i), 
                            bams[i], 
                            depth=self.depth_limit
                            ).process_make(stage2)
                        bams[i] = tempspace/('%d.bam'%i)
            
            # FreeBayes claims to handle multiple bams, but it doesn't actually work
            if len(bams) > 1:
                sam.Bam_merge(tempspace/'merged', bams=bams, index=False).run()
                bams = [ tempspace/'merged.bam' ]
        
            command = [ 
                'freebayes',
                '-f', reference,
                '--ploidy',str(self.ploidy),
                '--pvar',str(self.pvar),
                ] + self.freebayes_options + bams
            
            self.log.log('Running: '+' '.join(command)+'\n')
        
            f_out = stage.enter( open(self.prefix+'.vcf','wb') )
            f_in  = stage.enter( io.pipe_from(command) )
            done_extra = False
            for line in f_in:
                if not done_extra and not line.startswith('##'):
                    for extra_line in extra:
                        f_out.write(extra_line+'\n')
                    done_extra = True
                f_out.write(line)

        index_vcf(self.prefix+'.vcf')
예제 #7
0
    def run(self):
        bams = []
        reference = None
        reference2 = None

        extra = []

        for sample in self.samples:
            if sam.is_bam(sample):
                bams.append(sample)
            elif os.path.isdir(sample):
                working = working_directory.Working(sample, True)
                bams.append(working.get_filtered_sorted_bam())
                extra.append('##sampleTags=' + ','.join(working.get_tags()))
                if reference2 is None:
                    reference2 = working.get_reference(
                    ).reference_fasta_filename()
            elif io.is_sequence_file(sample):
                assert reference is None, 'Only one reference FASTA file allowed.'
                reference = sample

        if reference is None:
            reference = reference2
        if reference is None:
            raise grace.Error('No reference FASTA file given.')

        with nesoni.Stage() as stage:
            tempspace = stage.enter(workspace.tempspace())
            if self.depth_limit:
                with nesoni.Stage() as stage2:
                    for i in xrange(len(bams)):
                        sam.Bam_depth_limit(
                            tempspace / ('%d' % i),
                            bams[i],
                            depth=self.depth_limit).process_make(stage2)
                        bams[i] = tempspace / ('%d.bam' % i)

            # FreeBayes claims to handle multiple bams, but it doesn't actually work
            if len(bams) > 1:
                sam.Bam_merge(tempspace / 'merged', bams=bams,
                              index=False).run()
                bams = [tempspace / 'merged.bam']

            command = [
                'freebayes',
                '-f',
                reference,
                '--ploidy',
                str(self.ploidy),
                '--pvar',
                str(self.pvar),
            ] + self.freebayes_options + bams

            self.log.log('Running: ' + ' '.join(command) + '\n')

            f_out = stage.enter(open(self.prefix + '.vcf', 'wb'))
            f_in = stage.enter(io.pipe_from(command))
            done_extra = False
            for line in f_in:
                if not done_extra and not line.startswith('##'):
                    for extra_line in extra:
                        f_out.write(extra_line + '\n')
                    done_extra = True
                f_out.write(line)

        index_vcf(self.prefix + '.vcf')