def align_clone_to_ref(clone, reference): '''Aligns two pycogent Sequence objects with Smith-Waterman algorithm''' if not isinstance(clone, SequenceI): raise ValueError('clone must be a cogent.SequenceI object') if not isinstance(reference, SequenceI): raise ValueError('reference must be a cogent.SequenceI object') aligned = sw_align(clone, reference) ref_len = len(reference) clone_matched = str(aligned[0].parseOutGaps()[1]) ref_matched = str(aligned[1].parseOutGaps()[1]) if ref_matched == '': raise AlignmentError('No alignment') aln = CloneAlignment(aligned) # find where we are in the reference aln.first_ref_pos = str(reference).index(ref_matched) aln.first_clone_pos = str(clone).index(clone_matched) aln.last_ref_pos = aln.first_ref_pos + len(ref_matched) aln.last_clone_pos = aln.first_clone_pos + len(clone_matched) match_len = len(ref_matched) aln.reference_len = ref_len aln.is_truncated = not match_len == ref_len aln.has_gaps = aligned[1].isGapped() aln.has_mismatches = not aligned[1].canMatch(aligned[0]) aln.Seqs[0].Name = clone.Name aln.Seqs[1].Name = reference.Name return aln
def main(): kmers = ["TGTAT", "CGTAT", "TTAGT", "TCTAT", "TCTAC"] motif_clust = MotifCluster(kmers) #result = motif_clust.cluster_by_seq() #print "Result: ", result data = np.transpose(np.array(kmers)) print "DATA: " print data dist_func = lambda x, y: sw_align(x, y, return_score=True) linkage_method = "average" hclust = clustering.hierarchical_clust(np.array(kmers), dist_func, linkage_method)
def cluster_by_sw(self): """ Cluster sequences pairwise by Smith-Waterman alignment. Returns distance matrix. """ # Make pdist matrix with ij entry corresponding # to alignment between sequence i and sequence j score_matrix = [] for kmer_i in self.kmers: score_row = [] for kmer_j in self.kmers: alignment = sw_align(kmer_i, kmer_j, return_score=True) sw_score = alignment[1] score_row.append(sw_score) score_matrix.append(score_row) score_matrix = np.array(score_matrix) return score_matrix
def test_sw_align(self): """Tests for sw_align function.""" (first, second), score = sw_align('ACGU', 'CAGU', return_score=True) self.assertEqual(first, 'GU') self.assertEqual(second, 'GU') self.assertEqual(score, 2)
def test_sw_align_empty(self): """Tests for sw_align function.""" (first, second), score = sw_align('', '', return_score=True) self.assertEqual(first, '') self.assertEqual(second, '') self.assertEqual(score, 0)
def test_sw_align(self): """Tests for sw_align function.""" (first, second), score = sw_align("ACGU", "CAGU", return_score=True) self.assertEqual(first, "GU") self.assertEqual(second, "GU") self.assertEqual(score, 2)
def test_sw_align_empty(self): """Tests for sw_align function.""" (first, second), score = sw_align("", "", return_score=True) self.assertEqual(first, "") self.assertEqual(second, "") self.assertEqual(score, 0)
def test_sw_align(self): """Tests for sw_align function.""" (first,second),score = sw_align('ACGU','CAGU',return_score=True) self.assertEqual(first,'GU') self.assertEqual(second,'GU') self.assertEqual(score,2)