def align(self, seq1, seq2): """ Goal: Align seq2 (automatically detected conditions) with seq1 (truth conditions) and return the best alignment """ print("len(truth_conditions) = {}, len(detected_conditions) = {}".format(len(seq1), len(seq2))) from alignment.sequence import Sequence from alignment.vocabulary import Vocabulary from alignment.sequencealigner import SimpleScoring, GlobalSequenceAligner # Create sequences to be aligned. a = Sequence(seq1) b = Sequence(seq2) # Create a vocabulary and encode the sequences. v = Vocabulary() aEncoded = v.encodeSequence(a) bEncoded = v.encodeSequence(b) # Create a scoring and align the sequences using global aligner. scoring = SimpleScoring(2, -1) aligner = GlobalSequenceAligner(scoring, -2) score, encodeds = aligner.align(aEncoded, bEncoded, backtrace=True) # Iterate over optimal alignments and print them. for encoded in encodeds: alignment = v.decodeSequenceAlignment(encoded) print alignment print 'Alignment score:', alignment.score print 'Percent identity:', alignment.percentIdentity() assert(alignment.percentIdentity() >= 97.0) first, second = list(alignment.first), list(alignment.second) break return second
def align(s1, s2): # Create sequences to be aligned. a = Sequence(s1.split()) b = Sequence(s2.split()) # Create a vocabulary and encode the sequences. v = Vocabulary() aEncoded = v.encodeSequence(a) bEncoded = v.encodeSequence(b) # Create a scoring and align the sequences using global aligner. scoring = SimpleScoring(2, -1) aligner = GlobalSequenceAligner(scoring, -2) score, encodeds = aligner.align(aEncoded, bEncoded, backtrace=True) encoded = encodeds[0] alignment = v.decodeSequenceAlignment(encoded) correct_words = [] offset = 0 for i, (x, y) in enumerate(encoded): if x == y: correct_words.append(a[i - offset]) elif x == 0: offset += 1 return correct_words
def align(sequence1, sequence2): # This is encoded because the aligner uses the dasy as a gap element sequence1 = ['<DASH />' if word == '-' else word for word in sequence1] sequence2 = ['<DASH />' if word == '-' else word for word in sequence2] # Create sequences to be aligned. a = Sequence(sequence1) b = Sequence(sequence2) #print(22) # Create a vocabulary and encode the sequences. v = Vocabulary() aEncoded = v.encodeSequence(a) bEncoded = v.encodeSequence(b) #print(33) # Create a scoring and align the sequences using global aligner. scoring = SimpleScoring(2, -1) aligner = GlobalSequenceAligner(scoring, -2) #print(99) score, encodeds = aligner.align(aEncoded, bEncoded, backtrace=True) #print(34) # Create alignment object and return it alignment = v.decodeSequenceAlignment(encodeds[0]) return alignment
def align(trace1, trace2): # Create sequences to be aligned. a = Sequence(trace1) b = Sequence(trace2) # Create a vocabulary and encode the sequences. v = Vocabulary() aEncoded = v.encodeSequence(a) bEncoded = v.encodeSequence(b) # Create a scoring and align the sequences using global aligner. scoring = SimpleScoring(2, -1) aligner = GlobalSequenceAligner(scoring, -2) score, encodeds = aligner.align(aEncoded, bEncoded, backtrace=True) # Iterate over optimal alignments and print them. for encoded in encodeds: alignment = v.decodeSequenceAlignment(encoded) align = str(alignment) # Convert aligned sequences into list # escaping multiple characters seq_size = len(align) half_size = seq_size / 2 # First Half s1 = align_to_list(align, 0, half_size, 4) # Second Half s2 = align_to_list(align, half_size, seq_size, 4) # return the lists as result return s1, s2
def score_align(x, y): a = Sequence(x) b = Sequence(y) v = Vocabulary() aEncoded = v.encodeSequence(a) bEncoded = v.encodeSequence(b) scoring = SimpleScoring(2, -1) aligner = GlobalSequenceAligner(scoring, -2) score, encodeds = aligner.align(aEncoded, bEncoded, backtrace=True) pI = 0.0 for e in encodeds: alignment = v.decodeSequenceAlignment(e) pI = max(pI, alignment.percentIdentity()) return 1 - pI/100.0
def align_pair( first: NamedSequence, second: NamedSequence, vocabulary: Vocabulary, ) -> AlignmentResult: scoring = EblScoring(vocabulary) aligner = GlobalSequenceAligner(scoring, True) score, alignments = aligner.align(first.sequence, second.sequence, backtrace=True) return AlignmentResult( score, first, second, [vocabulary.decodeSequenceAlignment(encoded) for encoded in alignments], )
def score_align(x, y): a = Sequence(x) b = Sequence(y) v = Vocabulary() aEncoded = v.encodeSequence(a) bEncoded = v.encodeSequence(b) scoring = SimpleScoring(2, -1) aligner = GlobalSequenceAligner(scoring, -2) score, encodeds = aligner.align(aEncoded, bEncoded, backtrace=True) pI = 0.0 for e in encodeds: alignment = v.decodeSequenceAlignment(e) pI = max(pI, alignment.percentIdentity()) return 1 - pI / 100.0
def getAlignment(timit, utterance): tim = list() for li in timit: for ph in li: tim.append(ph) a=Sequence(tim) b=Sequence(utterance) v=Vocabulary() aEnc=v.encodeSequence(a) bEnc=v.encodeSequence(b) scoring=SimpleScoring(2,-1) aligner=GlobalSequenceAligner(scoring,-2) score,encodeds= aligner.align(aEnc,bEnc,backtrace=True) for encoded in encodeds: alignment = v.decodeSequenceAlignment(encoded) return alignment
def match_word_sorted(code1, code2): """return the max scored alignment between the two input codes""" list1 = code1.split(" ") list2 = code2.split(" ") set1 = set(list1) set2 = set(list2) common_words = set1 | set2 try: common_words.remove("") except: pass words1 = [] words2 = [] for word in common_words: words1 += index_word_pairs(word, list1) words2 += index_word_pairs(word, list2) sorted1 = sorted(words1, key=lambda t: t[1]) sorted2 = sorted(words2, key=lambda t: t[1]) a = Sequence(sorted1) b = Sequence(sorted2) v = Vocabulary() a_encoded = v.encodeSequence(a) b_encoded = v.encodeSequence(b) scoring = SimpleScoring(MATCH_SCORE, MISMATCH_SCORE) aligner = GlobalSequenceAligner(scoring, GAP_SCORE) score, encoders = aligner.align(a_encoded, b_encoded, backtrace=True) max_score = 0 for i, encoded in enumerate(encoders): alignment = v.decodeSequenceAlignment(encoded) #print alignment #print 'Alignment score:', alignment.score #print 'Percent identity:', alignment.percentIdentity() if alignment.score > max_score: max_score = alignment.score return max_score
def recommendation(name, movies): """Find the top ten neartest match in a list of movie names Args: name: a string of key words seperated by white space dic: a list of movie names to choose from Returns: A list of movie names """ # Create sequences to be aligned. key = Sequence(name.split()) dic = [Sequence(movie.split()) for movie in movies] # Create a vocabulary and encode the sequences. v = Vocabulary() keyEncoded = v.encodeSequence(key) dicEncoded = [v.encodeSequence(movie) for movie in dic] # Create a scoring and align the sequences using global aligner. scoring = SimpleScoring(1, 0) aligner = GlobalSequenceAligner(scoring, -2) score = [aligner.align(keyEncoded, dEncoded, backtrace=False) for dEncoded in dicEncoded] # Get the top five score in all movies topFive = sorted(range(len(score)), key=lambda i:score[i]) return [ movies[i] for i in topFive ]
def match_word_sorted(code1, code2): """return the max scored alignment between the two input codes""" list1 = code1.split(" ") list2 = code2.split(" ") set1 = set(list1) set2 = set(list2) common_words = set1 & set2 try: common_words.remove("") except: pass words_to_index = {} for word in common_words: in1 = list1.index(word) in2 = list2.index(word) words_to_index[word] = (in1, in2) sorted1 = OrderedDict(sorted(words_to_index.items(), key=lambda t: t[1][0])).keys() sorted2 = OrderedDict(sorted(words_to_index.items(), key=lambda t: t[1][1])).keys() a = Sequence(sorted1) b = Sequence(sorted2) v = Vocabulary() a_encoded = v.encodeSequence(a) b_encoded = v.encodeSequence(b) scoring = SimpleScoring(MATCH_SCORE, MISMATCH_SCORE) aligner = GlobalSequenceAligner(scoring, GAP_SCORE) score, encoders = aligner.align(a_encoded, b_encoded, backtrace=True) max_score = 0 for i, encoded in enumerate(encoders): alignment = v.decodeSequenceAlignment(encoded) if alignment.score > max_score: max_score = alignment.score return max_score
b = Sequence('what a disappointingly bad day'.split()) print 'Sequence A:', a print 'Sequence B:', b print # Create a vocabulary and encode the sequences. v = Vocabulary() aEncoded = v.encodeSequence(a) bEncoded = v.encodeSequence(b) print 'Encoded A:', aEncoded print 'Encoded B:', bEncoded print # Create a scoring and align the sequences using global aligner. scoring = SimpleScoring(2, -1) aligner = GlobalSequenceAligner(scoring, -2) score, alignments = aligner.align(aEncoded, bEncoded, backtrace=True) # Create sequence profiles out of alignments. profiles = [Profile.fromSequenceAlignment(a) for a in alignments] for encoded in profiles: profile = v.decodeProfile(encoded) print profile print # Create a soft scoring and align the first profile against sequence A. scoring = SoftScoring(scoring) aligner = GlobalProfileAligner(scoring, -2) score, alignments = aligner.align(profiles[0], Profile.fromSequence(aEncoded), backtrace=True)
from alignment.sequence import Sequence from alignment.vocabulary import Vocabulary from alignment.sequencealigner import SimpleScoring, GlobalSequenceAligner # Create sequences to be aligned. a = Sequence('amazing'.split()) b = Sequence('what a amazing disappointingly bad day'.split()) # Create a vocabulary and encode the sequences. v = Vocabulary() aEncoded = v.encodeSequence(a) bEncoded = v.encodeSequence(b) # Create a scoring and align the sequences using global aligner. scoring = SimpleScoring(2, -1) aligner = GlobalSequenceAligner(scoring, -2) score, encodeds = aligner.align(aEncoded, bEncoded, backtrace=True) # Iterate over optimal alignments and print them. for encoded in encodeds: alignment = v.decodeSequenceAlignment(encoded) print alignment print 'Alignment score:', alignment.score print 'Percent identity:', alignment.percentIdentity() print from alignment.sequence import Sequence, GAP_ELEMENT from alignment.vocabulary import Vocabulary from alignment.sequencealigner import SimpleScoring, LocalSequenceAligner
# Tests ----------------------------------------------------------------------- if __name__ == '__main__': s1 = Sequence('what a beautiful day'.split()) s2 = Sequence('what a disappointingly bad day'.split()) print('s1', s1) print('s2', s2) print('') from alignment.vocabulary import Vocabulary v = Vocabulary() e1 = v.encodeSequence(s1) e2 = v.encodeSequence(s2) print('v', v) print('e1', e1) print('e2', e2) print('') from alignment.sequencealigner import SimpleScoring from alignment.sequencealigner import GlobalSequenceAligner s = SimpleScoring(2, -1) a = GlobalSequenceAligner(s, -2) score, alignments = a.align(e1, e2, backtrace=True) for alignment in alignments: as1 = v.decodeSequence(alignment.first) as2 = v.decodeSequence(alignment.second) print(alignment.percentIdentity()) print(as1) print(as2) print('')
def ScorePhonemes(self, source=[], target=[]): """Compare the phonemes of a source and target sentence and determine which of the target items were correctly transcribed Returns: hits_phonemes (nested list): list of bools corresponding to the accuracy of each phoneme in the target list for each sentence Note: This scoring method has no word accuracy awareness. Phonemes from correctly input words may wind up as labeled wrong ( i.e. target:"with the" source: "with a" alignement: ) Modified from Eser Aygün (https://pypi.python.org/pypi/alignment/1.0.9) """ if not source: source = self.source_phonemes if not target: target = self.target_phonemes self.source_matched = [] hits = [] for x, ttup in enumerate(target): tphon, twordnum, tword = zip(*ttup) stup = source[x] if not stup: hitlist = [False] * len(tphon) bPhonOut = ['-'] * len(tphon) else: sphon, swordnum, sword = zip(*stup) # Create sequences to be aligned. a = Sequence(tphon) b = Sequence(sphon) # Create a vocabulary and encode the sequences. v = Vocabulary() aEncoded = v.encodeSequence(a) bEncoded = v.encodeSequence(b) # Create a scoring and align the sequences using global aligner. scoring = SimpleScoring(2, -1) aligner = GlobalSequenceAligner(scoring, -2) score, encodeds = aligner.align(aEncoded, bEncoded, backtrace=True) encoded = encodeds[0] #Score based only on hits vs misses, insertions are ignored notInsert = encoded[:][0] != 0 nonInsertMatched = encoded[notInsert][:] #Find the alignment in the target sequence aSeq = nonInsertMatched[:][0] bSeq = nonInsertMatched[:][1] #Label all items not aligned to the target as false hitlist = [] y = 0 for y in range(0, len(aEncoded) - len(aSeq) + 1): aChunk = aEncoded[y:y + len(aSeq)] #print aChunk if sum(aChunk - aSeq) == 0: break hitlist.extend([False] * (y)) hitlist.extend(list(aSeq - bSeq == 0)) hitlist.extend([False] * (len(aEncoded) - y - len(aSeq))) #Export the target aligned phonemes of the source sequence bPhons = np.zeros(len(aEncoded), int) bPhons[y:y + len(bSeq)] = bSeq bPhonOut = np.array(v.elements())[bPhons].tolist() hits.append(hitlist) self.source_matched.append(bPhonOut) self.hits_phonemes = hits
def ScoreWords(self): """Aligns the words of the source sentence to match the target sentence to determine hit vs missed words Returns: hits (nested list): The target [0] and source [1] sentences in a nested list Note: Modified from Eser Aygün (https://pypi.python.org/pypi/alignment/1.0.9) """ target = self.target source = self.source self.source_matchWords = [] hits = [] wscore = np.empty(0) for tnum, tsent in enumerate(target): ssent = source[tnum] # Create sequences to be aligned. a = Sequence(tsent.split()) b = Sequence(ssent.split()) # Create a vocabulary and encode the sequences. v = Vocabulary() aEncoded = v.encodeSequence(a) bEncoded = v.encodeSequence(b) # Create a scoring and align the sequences using global aligner. scoring = SimpleScoring(5, -1) aligner = GlobalSequenceAligner(scoring, -1) score, encodeds = aligner.align(aEncoded, bEncoded, backtrace=True) encoded = encodeds[0] #Score based only on hits vs misses, insertions are ignored notInsert = encoded[:][0] != 0 nonInsertMatched = encoded[notInsert][:] #Find the alignment in the target sequence aSeq = nonInsertMatched[:][0] bSeq = nonInsertMatched[:][1] #Label all items not aligned to the target as false hitlist = [] x = 0 for x in range(0, len(aEncoded) - len(aSeq) + 1): aChunk = aEncoded[x:x + len(aSeq)] #print aChunk if sum(aChunk - aSeq) == 0: break hitlist.extend([False] * (x)) hitlist.extend(list(aSeq - bSeq == 0)) hitlist.extend([False] * (len(aEncoded) - x - len(aSeq))) #Export the target aligned words of the source sequence bWords = np.zeros(len(aEncoded), int) bWords[x:x + len(bSeq)] = bSeq bWordOut = np.array(v.elements())[bWords].tolist() hits.append(hitlist) iwscore = sum(hitlist) * 100 / float(len(hitlist)) wscore = np.hstack([wscore, iwscore]) print bWordOut self.source_matchWords.append(bWordOut) self.hits = hits self.wscore = wscore
sequence = [] for i in spilt_pos: ss = sequence_family[i + 1][0] for ii in range(i + 2, i + 9): ss = ss + sequence_family[ii][0] sequence.append(ss) #%% v = Vocabulary() sequence_encoded = [] for i in range(len(sequence)): sequence_encoded.append( v.encodeSequence(Sequence(split_sequence(sequence[i])))) scoring = SimpleScoring(2, -1) aligner = GlobalSequenceAligner(scoring, -2) Matrix = np.zeros(9 * 9).reshape(9, 9) for i in range(len(sequence_encoded)): for j in range(i + 1, len(sequence_encoded)): score, encodeds = aligner.align(sequence_encoded[i], sequence_encoded[j], backtrace=True) for encoded in encodeds: alignment = v.decodeSequenceAlignment(encoded) score = np.floor((100 - alignment.percentIdentity()) * len(np.array(alignment)) / 100) print(i, j, score) Matrix[i, j] = score Matrix = Matrix.T
def text_to_text_alignment_and_score(text_ref, text_pred): """ Find a word to word alignment between two texts, considering the first is the reference and the second the predicted :param text_ref: text reference :param text_pred: predicted text :return: """ text_ref = text_ref.lower() text_pred = text_pred.lower() iterable = [".", ","] # convert the reference text in order not to contain , and (junk characters) translation_map = str.maketrans(to_translation_map(iterable)) text_ref = text_ref.translate(translation_map) # Create sequences to be aligned. a = Sequence(text_ref.split()) b = Sequence(text_pred.split()) # Create a vocabulary and encode the sequences. v = Vocabulary() a_enc = v.encodeSequence(a) b_enc = v.encodeSequence(b) # Create a scoring and align the sequences using global aligner. scoring = SimpleScoring(1, 0) aligner = GlobalSequenceAligner(scoring, 0) f, score, encodeds = aligner.align(a_enc, b_enc, text_ref.split(), text_pred.split(), backtrace=True) # get the first alignment if exists: #print(encodeds[0]) print(encodeds) if len(encodeds[0]) > 0: alignment = v.decodeSequenceAlignment(encodeds[0]) print(alignment) ##fix first and last missing words of asr text list_asr = [] list_pred = [] for word in text_pred.split(): if word != alignment.second.elements[0]: list_asr.append(word) list_pred.append('-') else: alignment.second.elements = list_asr + alignment.second.elements alignment.first.elements = list_pred + alignment.first.elements break list_asr = [] list_pred = [] for word in reversed(text_pred.split()): if word != alignment.second.elements[-1]: list_asr = [word] + list_asr list_pred.append('-') else: alignment.second.elements = alignment.second.elements + list_asr alignment.first.elements = alignment.first.elements + list_pred break #fix first and last missing words of reference text list_asr = [] list_pred = [] for word in text_ref.split(): if word != alignment.first.elements[0]: list_pred.append(word) list_asr.append('-') else: alignment.second.elements = list_asr + alignment.second.elements alignment.first.elements = list_pred + alignment.first.elements break list_asr = [] list_pred = [] for word in reversed(text_ref.split()): if word != alignment.first.elements[-1]: list_pred = [word] + list_asr list_asr.append('-') else: alignment.second.elements = alignment.second.elements + list_asr alignment.first.elements = alignment.first.elements + list_pred break #print(alignment.second.elements) #print(alignment.first.elements) print(alignment) rec = alignment.score * 100 / len(text_ref.split()) pre = alignment.score * 100 / len(text_pred.split()) else: alignment = [] rec, pre = 0, 0 return alignment, rec, pre
def test_utterance_transcriptions(self): print('Checking utterance transcriptions...') split_directory = self.corpus.split_directory() model_directory = self.trainer.align_directory with mp.Pool(processes=self.corpus.num_jobs) as pool: jobs = [(self, x) for x in range(self.corpus.num_jobs)] results = [pool.apply_async(compile_utterance_train_graphs_func, args=i) for i in jobs] output = [p.get() for p in results] print('Utterance FSTs compiled!') print('Decoding utterances (this will take some time)...') results = [pool.apply_async(test_utterances_func, args=i) for i in jobs] output = [p.get() for p in results] print('Finished decoding utterances!') word_mapping = self.dictionary.reversed_word_mapping v = Vocabulary() errors = {} for job in range(self.corpus.num_jobs): text_path = os.path.join(split_directory, 'text.{}'.format(job)) texts = load_scp(text_path) aligned_int = load_scp(os.path.join(model_directory, 'aligned.{}.int'.format(job))) with open(os.path.join(model_directory, 'aligned.{}'.format(job)), 'w') as outf: for utt, line in sorted(aligned_int.items()): text = [] for t in line: text.append(word_mapping[int(t)]) outf.write('{} {}\n'.format(utt, ' '.join(text))) ref_text = texts[utt] if len(text) < len(ref_text) - 7: insertions = [x for x in text if x not in ref_text] deletions = [x for x in ref_text if x not in text] else: aligned_seq = Sequence(text) ref_seq = Sequence(ref_text) alignedEncoded = v.encodeSequence(aligned_seq) refEncoded = v.encodeSequence(ref_seq) scoring = SimpleScoring(2, -1) a = GlobalSequenceAligner(scoring, -2) score, encodeds = a.align(refEncoded, alignedEncoded, backtrace=True) insertions = [] deletions = [] for encoded in encodeds: alignment = v.decodeSequenceAlignment(encoded) for i, f in enumerate(alignment.first): s = alignment.second[i] if f == '-': insertions.append(s) if s == '-': deletions.append(f) if insertions or deletions: errors[utt] = (insertions, deletions, ref_text, text) if not errors: message = 'There were no utterances with transcription issues.' else: out_path = os.path.join(self.corpus.output_directory, 'transcription_problems.csv') with open(out_path, 'w') as problemf: problemf.write('Utterance,Insertions,Deletions,Reference,Decoded\n') for utt, (insertions, deletions, ref_text, text) in sorted(errors.items(), key=lambda x: -1 * ( len(x[1][1]) + len(x[1][2]))): problemf.write('{},{},{},{},{}\n'.format(utt, ', '.join(insertions), ', '.join(deletions), ' '.join(ref_text), ' '.join(text))) message = 'There were {} of {} utterances with at least one transcription issue. '\ 'Please see the outputted csv file {}.'.format(len(errors), self.corpus.num_utterances, out_path) print(self.transcription_analysis_template.format(message))
def score(aEncoded, bEncoded, v): scoring = SimpleScoring(1, -3) aligner = GlobalSequenceAligner(scoring, 0) #aligner = LocalSequenceAligner(scoring, -1) score, encodeds = aligner.align(aEncoded, bEncoded, backtrace=True) return score, encodeds
b = Sequence('what a disappointingly bad day'.split()) print 'Sequence A:', a print 'Sequence B:', b print # Create a vocabulary and encode the sequences. v = Vocabulary() aEncoded = v.encodeSequence(a) bEncoded = v.encodeSequence(b) print 'Encoded A:', aEncoded print 'Encoded B:', bEncoded print # Create a scoring and align the sequences using global aligner. scoring = SimpleScoring(2, -1) aligner = GlobalSequenceAligner(scoring, -2) score, alignments = aligner.align(aEncoded, bEncoded, backtrace=True) # Create sequence profiles out of alignments. profiles = [Profile.fromSequenceAlignment(a) for a in alignments] for encoded in profiles: profile = v.decodeProfile(encoded) print profile print # Create a soft scoring and align the first profile against sequence A. scoring = SoftScoring(scoring) aligner = GlobalProfileAligner(scoring, -2) score, alignments = aligner.align(profiles[0], Profile.fromSequence(aEncoded), backtrace=True) for encoded in alignments:
from alignment.sequence import Sequence from alignment.vocabulary import Vocabulary from alignment.sequencealigner import SimpleScoring, GlobalSequenceAligner # Create sequences to be aligned. a = Sequence('what a beautiful day'.split()) b = Sequence('what a disappointingly bad day'.split()) # Create a vocabulary and encode the sequences. v = Vocabulary() aEncoded = v.encodeSequence(a) bEncoded = v.encodeSequence(b) # Create a scoring and align the sequences using global aligner. scoring = SimpleScoring(2, -1) aligner = GlobalSequenceAligner(scoring, -2) score, encodeds = aligner.align(aEncoded, bEncoded, backtrace=True) # Iterate over optimal alignments and print them. for encoded in encodeds: alignment = v.decodeSequenceAlignment(encoded) print alignment print 'Alignment score:', alignment.score print 'Percent identity:', alignment.percentIdentity() print
return self.dist_mat[first, second] from alignment.sequencealigner import GlobalSequenceAligner choice_inds = np.random.choice(len(seqs), int(1e3), replace=False) new_seqs = [] for i, s in enumerate(seqs): if i in choice_inds: new_seqs.append(s) seqs = new_seqs nw_scores = np.zeros((len(seqs), len(seqs))) aligner = GlobalSequenceAligner(DistScoring(pdist), 1000 - med * 1000) for i in tqdm(range(len(seqs))): for j in range(i, len(seqs)): score, encodeds = aligner.align(seqs[i], seqs[j], backtrace=True) s = score / (1000 * max(len(seqs[i]), len(seqs[j]))) nw_scores[i, j] = s # %% [markdown] # ## from graspy.utils import symmetrize sns.heatmap(nw_scores) nw_scores = symmetrize(nw_scores, "triu") nw_dists = 1 - nw_scores # %% [markdown] # ##