def test_score_threshold_rule(kmer_alphabet, ref_kmer, threshold): """ Test if the similar k-mers given by :class:`ScoreThresholdRule` are equal to k-mers generated by a brute-force approach. """ matrix = align.SubstitutionMatrix.std_protein_matrix() ref_kmer_sequence = seq.ProteinSequence() ref_kmer_sequence.code = kmer_alphabet.split(ref_kmer) ref_sim_kmer_set = set() # Iterate through all possible k-mers for kmer in range(len(kmer_alphabet)): kmer_sequence = seq.ProteinSequence() kmer_sequence.code = kmer_alphabet.split(kmer) score = align.align_ungapped( ref_kmer_sequence, kmer_sequence, matrix, score_only=True ) # Add k-mer to list if the threshold score is reached if score >= threshold: ref_sim_kmer_set.add(kmer) test_rule = align.ScoreThresholdRule(matrix, threshold) test_sim_kmer_set = set(test_rule.similar_kmers(kmer_alphabet, ref_kmer)) assert test_sim_kmer_set == ref_sim_kmer_set
def test_align_ungapped(): seq1 = seq.NucleotideSequence("ACCTGA") seq2 = seq.NucleotideSequence("ACTGGT") matrix = align.SubstitutionMatrix.std_nucleotide_matrix() ali = align.align_ungapped(seq1, seq2, matrix) assert ali.score == 3 assert str(ali) == "ACCTGA\nACTGGT"
consensus_sequence.code = consensus_code return consensus_sequence drug_type_consensus = create_consensus( [sequences[strain] for strain in (1, 10, 13, 20, 53, 54)] ) fiber_type_consensus = create_consensus( [sequences[strain] for strain in (9, 5, 11, 45, 66, 68, 78)] ) # Create an alignment for visualization purposes # No insertion/deletions -> Align ungapped matrix = align.SubstitutionMatrix.std_protein_matrix() alignment = align.align_ungapped( drug_type_consensus, fiber_type_consensus, matrix=matrix ) # A colormap for hightlighting sequence dissimilarity: # At low similarity the symbols are colored red, # at high similarity the symbols are colored white cmap = LinearSegmentedColormap.from_list( "custom", colors=[(1.0, 0.3, 0.3), (1.0, 1.0, 1.0)] # ^ reddish ^ white ) fig = plt.figure(figsize=(8.0, 6.0)) ax = fig.add_subplot(111) graphics.plot_alignment_similarity_based( ax, alignment, matrix=matrix, symbols_per_line=50,