Beispiel #1
0
    def phylogeny(self) :
        num_sequences = self.__count(self.options['cluster-fasta'])
        p = Pagan()

        if self.options['subset'] :
            self.seqdb = self.__read_fasta(self.options['cluster-fasta'], include=self.options['subset'])
            self.options['cluster-fasta'] = self.__fasta(self.options['cluster-fasta'] + '.subset', self.seqdb.keys())

            self.log.info("read %d cluster centroids using subset(%s)" % (len(self.seqdb), self.options['subset']))

        if not self.options['silva-fasta'] :
            self.log.info("aligning %s sequences with PAGAN ..." % (num_sequences))
            alignment,tree,xmlfile = p.phylogenetic_alignment(self.options['cluster-fasta'])
        else :
            self.log.info("aligning %s sequences with PAGAN against SILVA ..." % (num_sequences))
            alignment,tree,xmlfile = p.silva_phylogenetic_alignment(self.options['silva-fasta'], 
                                                                    self.options['silva-tree'], 
                                                                    self.options['cluster-fasta'])

        os.rename(alignment, self.options['phylogeny-fasta'])
        os.rename(tree,      self.options['phylogeny-tree'])
        os.rename(xmlfile,   self.options['phylogeny-xml'])

        self.log.info("created %s" % self.options['phylogeny-fasta'])
        self.log.info("created %s" % self.options['phylogeny-tree'])
        
        if xmlfile :
            self.log.info("created %s" % self.options['phylogeny-xml'])

        return 0
Beispiel #2
0
    def alignment_similarity(self, seq1, seq2, homopolymer_correction) :
        # write out
        f = open(System.tempfilename(ext='cluster'), 'w')

        print >> f, seq1.fasta()
        print >> f, seq2.fasta()

        f.close()

        # align
        aligned = []
        if homopolymer_correction :
            fq = Pagan().get_454_alignment(f.name)
        else :
            fq = Pagan().get_alignment(f.name)
        
        fq.open()

        for seq in fq :
            if seq.id == ">consensus" :
                continue

            aligned.append(seq.sequence)

        fq.close()

        # delete tmp files
        os.remove(f.name)
        os.remove(fq.get_filename())

        # if things are really dissimilar they do not align
        # so just give up here for this cluster
        if len(aligned) != 2 :
            return 0.0

        return self.distance2(aligned, homopolymer_correction)