def phylogeny(self) : num_sequences = self.__count(self.options['cluster-fasta']) p = Pagan() if self.options['subset'] : self.seqdb = self.__read_fasta(self.options['cluster-fasta'], include=self.options['subset']) self.options['cluster-fasta'] = self.__fasta(self.options['cluster-fasta'] + '.subset', self.seqdb.keys()) self.log.info("read %d cluster centroids using subset(%s)" % (len(self.seqdb), self.options['subset'])) if not self.options['silva-fasta'] : self.log.info("aligning %s sequences with PAGAN ..." % (num_sequences)) alignment,tree,xmlfile = p.phylogenetic_alignment(self.options['cluster-fasta']) else : self.log.info("aligning %s sequences with PAGAN against SILVA ..." % (num_sequences)) alignment,tree,xmlfile = p.silva_phylogenetic_alignment(self.options['silva-fasta'], self.options['silva-tree'], self.options['cluster-fasta']) os.rename(alignment, self.options['phylogeny-fasta']) os.rename(tree, self.options['phylogeny-tree']) os.rename(xmlfile, self.options['phylogeny-xml']) self.log.info("created %s" % self.options['phylogeny-fasta']) self.log.info("created %s" % self.options['phylogeny-tree']) if xmlfile : self.log.info("created %s" % self.options['phylogeny-xml']) return 0
def alignment_similarity(self, seq1, seq2, homopolymer_correction) : # write out f = open(System.tempfilename(ext='cluster'), 'w') print >> f, seq1.fasta() print >> f, seq2.fasta() f.close() # align aligned = [] if homopolymer_correction : fq = Pagan().get_454_alignment(f.name) else : fq = Pagan().get_alignment(f.name) fq.open() for seq in fq : if seq.id == ">consensus" : continue aligned.append(seq.sequence) fq.close() # delete tmp files os.remove(f.name) os.remove(fq.get_filename()) # if things are really dissimilar they do not align # so just give up here for this cluster if len(aligned) != 2 : return 0.0 return self.distance2(aligned, homopolymer_correction)