def Psubs(clsdict, corpus, minlength=3, maxlength=20): """ Generate Psubs - the substring completion of a set of pairs. Psubs is the association between all substrings of the pairs in classdict. Parameters ---------- clsdict : ClassDict corpus : Corpus minlength : int, optional minimum number of phones for the substrings maxlength : int, optional maximum number of phones for the substrings Returns ------- Iterator over (FragmentToken, FragmentToken) pairs """ sub_pairs = (pairwise_substring_completion(f1, f2, corpus, minlength, maxlength) for f1, f2 in clsdict.iter_pairs(within=True, order=True)) return unique(flatten(sub_pairs))
def iter_fragments(self, with_class=False): """ Iterate over FragmentTokens. Parameters ---------- with_class : bool, optional Iterate over (ClassID, FragmentToken) pairs instead Returns ------- Iterator over FragmentToken or (ClassID, FragmentToken) pairs """ if with_class: return unique(flatten(izip(repeat(c), v) for c, v in self.clsdict.iteritems())) else: return unique(flatten(self.clsdict.itervalues()))
def iter_fragments(self, with_class=False): """ Iterate over FragmentTokens. Parameters ---------- with_class : bool, optional Iterate over (ClassID, FragmentToken) pairs instead Returns ------- Iterator over FragmentToken or (ClassID, FragmentToken) pairs """ if with_class: return unique( flatten( izip(repeat(c), v) for c, v in self.clsdict.iteritems())) else: return unique(flatten(self.clsdict.itervalues()))
def iter_pairs(self, within, order): """ Iterate over FragmentToken pairs. Parameters ---------- within : bool Only select pairs from the same class. order : bool Also include reverse of a pair. Returns ------- Iterator over (FragmentToken, FragmentToken) pairs. """ vals = self.clsdict.itervalues() if within: if order: pairs = flatten(((f1, f2), (f2, f1)) for fragments in vals for f1, f2 in combinations(fragments, 2)) else: pairs = (tuple( sorted((f1, f2), key=lambda f: (f.name, f.interval.start))) for fragments in vals for f1, f2 in combinations(fragments, 2)) else: # across classes if order: pairs = (((f1, f2), (f2, f1)) for f1, f2 in combinations(flatten(vals), 2)) pairs = flatten(pairs) else: pairs = (tuple( sorted((f1, f2), key=lambda f: (f.name, f.interval.start))) for f1, f2 in combinations(flatten(vals), 2)) return unique( ifilterfalse(lambda f: f[0].interval.overlaps_with(f[1].interval), pairs))
def iter_pairs(self, within, order): """ Iterate over FragmentToken pairs. Parameters ---------- within : bool Only select pairs from the same class. order : bool Also include reverse of a pair. Returns ------- Iterator over (FragmentToken, FragmentToken) pairs. """ vals = self.clsdict.itervalues() if within: if order: pairs = flatten(((f1, f2), (f2, f1)) for fragments in vals for f1, f2 in combinations(fragments, 2)) else: pairs = (tuple(sorted((f1, f2), key=lambda f: (f.name, f.interval.start))) for fragments in vals for f1, f2 in combinations(fragments, 2)) else: # across classes if order: pairs = (((f1, f2), (f2, f1)) for f1, f2 in combinations(flatten(vals), 2)) pairs = flatten(pairs) else: pairs = (tuple(sorted((f1, f2), key=lambda f: (f.name, f.interval.start))) for f1, f2 in combinations(flatten(vals), 2)) return unique(ifilterfalse(lambda f: f[0].interval.overlaps_with(f[1].interval), pairs))
def typeset(pairs): """ Yield the unique marks in a pair iterator. Parameters ---------- pairs : Iterator over (FragmentToken, FragmentToken) pairs Returns ------- Iterator over strings Unique marks. """ return unique(f.mark for f in flatten(pairs))
def __init__(self, container, threshold=0.03): self.threshold = threshold if hasattr(container, 'iter_fragments'): iterator = container.iter_fragments() else: iterator = (f for f in container) bounds = defaultdict(list) length = 0 for fragment in iterator: bounds[fragment.name].append(fragment.interval) length += 1 self.length = length self.bounds = {} for name, intervals in bounds.iteritems(): points = set(flatten((interval.start, interval.end) for interval in intervals)) self.bounds[name] = np.sort(np.array(list(points)))
def unique_flatten(pairs): r""" Flatten a sequence of (FragmentToken, FragmentToken) pairs. This functions yields each FragmentToken only once. .. math:: \mathrm{flat}(P) = \{(i, j) | \exists q(((i, j), q) \in P)\} TODO check math Parameters ---------- pairs : Iterator over (FragmentToken, FragmentToken) pairs Returns ------- Iterator over FragmentTokens """ return unique(flatten(pairs))
def iter_segments(self): return flatten(self.itervalues())
def iter_fragments(self): return flatten(self.iter_segments())