def Psubs(clsdict, corpus, minlength=3, maxlength=20): """ Generate Psubs - the substring completion of a set of pairs. Psubs is the association between all substrings of the pairs in classdict. Parameters ---------- clsdict : ClassDict corpus : Corpus minlength : int, optional minimum number of phones for the substrings maxlength : int, optional maximum number of phones for the substrings Returns ------- Iterator over (FragmentToken, FragmentToken) pairs """ sub_pairs = (pairwise_substring_completion(f1, f2, corpus, minlength, maxlength) for f1, f2 in clsdict.iter_pairs(within=True, order=True)) return unique(flatten(sub_pairs))
def test_different(self): # fragment1 - fragment3 # abcd - fghi # expected: # abcd - fghi # abc - fgh # bcd - ghi e = set([(self.pfragments[0], self.pfragments[10]), (self.pfragments[2], self.pfragments[12]), (self.pfragments[3], self.pfragments[13])]) p = set(pairwise_substring_completion(self.fragment1, self.fragment3, self.ca, 3, 20)) assert(e == p)
def test_same(self): # fragment1 - fragment2 # abcd - abcd # expected: # abcd - abcd # abc - abc # bcd - bcd e = set([(self.pfragments[0], self.pfragments[5]), (self.pfragments[2], self.pfragments[7]), (self.pfragments[3], self.pfragments[8])]) p = set(pairwise_substring_completion(self.fragment1, self.fragment2, self.ca, 3, 20)) assert(p == e)
def test_different_and_longer(self): # fragment3 - fragment4 # fghi - abcde # expected: # fghi - abcd # fgh - abc # ghi - bcd # fghi - bcde # fgh - bcd # ghi - cde e = set([(self.pfragments[10], self.pfragments[5]), (self.pfragments[12], self.pfragments[7]), (self.pfragments[13], self.pfragments[8]), (self.pfragments[10], self.pfragments[6]), (self.pfragments[12], self.pfragments[8]), (self.pfragments[13], self.pfragments[9])]) p = set(pairwise_substring_completion(self.fragment3, self.fragment4, self.ca, 3, 20)) assert (e == p)
def test_longer(self): # fragment1 - fragment4 # abcd - abcde # expected: # abcd - abcd # abc - abc # bcd - bcd # abcd - bcde # abc - bcd # bcd - cde e = set([(self.pfragments[0], self.pfragments[5]), (self.pfragments[2], self.pfragments[7]), (self.pfragments[3], self.pfragments[8]), (self.pfragments[0], self.pfragments[6]), (self.pfragments[2], self.pfragments[8]), (self.pfragments[3], self.pfragments[9])]) p = set( pairwise_substring_completion(self.fragment1, self.fragment4, self.ca, 3, 20)) assert (e == p)
def test_longer(self): # fragment1 - fragment4 # abcd - abcde # expected: # abcd - abcd # abc - abc # bcd - bcd # abcd - bcde # abc - bcd # bcd - cde e = set([(self.pfragments[0], self.pfragments[5]), (self.pfragments[2], self.pfragments[7]), (self.pfragments[3], self.pfragments[8]), (self.pfragments[0], self.pfragments[6]), (self.pfragments[2], self.pfragments[8]), (self.pfragments[3], self.pfragments[9])]) p = set(pairwise_substring_completion(self.fragment1, self.fragment4, self.ca, 3, 20)) assert (e == p)