def test_empty(): l1 = 'abcde' l2 = 'fghij' assert(allcommonsubstrings(l1, l2) == []) l1 = [] l2 = [] assert(allcommonsubstrings(l1, l2) == [])
def extract_single(tokens1, tokens2, minlength, maxlength, same): """Extract gold alignments between two phone lists. Parameters ---------- tokens1, tokens2 : list of FragmentTokens minlength : int Minimum number of symbols in a fragment same : boolean Whether `tokens1` and `tokens2` are identical. Returns ------- l : list of (FragmentToken, FragmentToken) List of token pairs containing the cooccurring fragments """ ids1, intervals1, phones1 = zip(*tokens1) ids2, intervals2, phones2 = zip(*tokens2) id1 = ids1[0] # ids are all the same id2 = ids2[0] css = allcommonsubstrings(phones1, phones2, minlength=minlength, maxlength=maxlength, same=same) if css is None: return [] r = [] for slice1, slice2 in css: r.append((FragmentToken(id1, Interval(intervals1[slice1.start].start, intervals1[slice1.stop - 1].end), phones1[slice1]), FragmentToken(id2, Interval(intervals2[slice2.start].start, intervals2[slice2.stop - 1].end), phones2[slice2]))) return r
def test_uniqueness(l1, l2): results = set() ss = allcommonsubstrings(l1, l2, minlength=4) for slice1, slice2 in ss: results.add((slice1.start, slice1.stop, slice2.start, slice2.stop)) assert(len(results) == len(ss))
def test_equality(l1, l2): for slice1, slice2 in allcommonsubstrings(l1, l2, minlength=4): assert(l1[slice1] == l2[slice2])
def test_length(l1, l2): n = 4 for slice1, slice2 in allcommonsubstrings(l1, l2, minlength=n): assert(slice1.stop - slice1.start >= n) assert(slice2.stop - slice2.start >= n)
def test_simple(): l1 = 'abcde' l2 = 'fghijabc' m1, m2 = allcommonsubstrings(l1, l2, minlength=3)[0] assert(l1[m1] == l2[m2])
def test_single_nonempty(): l1 = 'abcdeabc' expected = [(slice(0, 3, None), slice(5, 8, None)), (slice(5, 8, None), slice(0, 3, None))] assert(allcommonsubstrings(l1, minlength=3, same=True) == expected)
def test_single_empty(): l1 = 'abcde' assert(allcommonsubstrings(l1, same=True) == [])