class TestCheckTruncateIntervals(object): m1 = IntervalDB({'a': [(0.0, 1.0), (2.0, 3.0)]}) d1 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.0, 1.0), 'm1'), )}) d2 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.5), 'm1'), )}) d3 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('b', Interval(0.0, 1.0), 'm1'), )}) d4 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 2.5), 'm1'), )}) sa = [ SegmentAnnotation('a', [ FragmentToken('a', Interval(0.0, 0.25), 'a'), FragmentToken('a', Interval(0.25, 0.5), 'b'), FragmentToken('a', Interval(0.5, 0.75), 'c'), FragmentToken('a', Interval(0.75, 1.0), 'd') ]) ] ca = Corpus(sa) def test_good_interval(self): assert (truncate_intervals(self.d1, self.ca, self.m1) == (self.d1, [], [])) def test_truncate_interval(self): assert (truncate_intervals(self.d2, self.ca, self.m1) == (ClassDict({ ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.0), ('c', 'd')), ) }), [], [])) assert (truncate_intervals(self.d4, self.ca, self.m1) == (ClassDict({ ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.0), ('c', 'd')), ) }), [], []))
def test_truncate_interval(self): assert (truncate_intervals(self.d2, self.ca, self.m1) == (ClassDict({ ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.0), ('c', 'd')), ) }), [], [])) assert (truncate_intervals(self.d4, self.ca, self.m1) == (ClassDict({ ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.0), ('c', 'd')), ) }), [], []))
class TestCheckIntervals(object): m1 = IntervalDB({'a': [(0.0, 1.0), (2.0, 3.0)]}) d1 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.0, 1.0), 'm1'), )}) d2 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.5), 'm1'), )}) d3 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('b', Interval(0.0, 1.0), 'm1'), )}) d4 = ClassDict( {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 2.5), 'm1'), )}) def test_good_interval(self): assert (check_intervals(self.d1, self.m1) == ([], [])) def test_interval_errors(self): assert (check_intervals(self.d2, self.m1) == ([ FragmentToken('a', Interval(0.5, 1.5), 'm1') ], [])) assert (check_intervals(self.d4, self.m1) == ([ FragmentToken('a', Interval(0.5, 2.5), 'm1') ], [])) def test_bad_filename(self): assert (check_intervals(self.d3, self.m1) == ([], ['b']))
def read_classfile(contents): """Read in class file. Parameters ---------- contents : string Returns ------- r : dict from ClassID to list of FragmentToken """ classp = re.compile(r"^Class (?P<classID>\d+)(?: (?P<mark>.+))?$") r = {} curr = [] # list of FragmentTokens without mark curr_class = None for lineno, line in enumerate(contents.split('\n')): m = re.match(classp, line) if m: # on a line with a class label if curr_class is None: curr_class = ClassID(int(m.group('classID')), m.group('mark')) else: raise ValueError('new class while reading class') else: # on an interval line or a whitespace line if len(line.strip()) > 0: split = line.strip().split(' ') name = split[0] start = float(split[1]) end = float(split[2]) interval = Interval(start, end) curr.append(FragmentToken(name, interval, None)) else: # whitespace line, reset if curr_class is None: continue # if lineno == 0: # continue # print lineno, line # raise ValueError('attempting to end reading class ' # 'while not reading class in line {0}' # .format(lineno)) r[curr_class] = tuple(curr) curr = [] curr_class = None if not curr_class is None: r[curr_class] = tuple(curr) return r
class TestReadClasses(object): tiny_classes = """Class 0 f1 0.000 4.000 f2 0.000 4.000 Class 1 f1 1.000 4.000 f2 1.000 4.000 Class 2 f1 0.000 3.000 f2 0.000 3.000 """ tiny_corpus = """f1 0.000 1.000 a f1 1.000 2.000 b f1 2.000 3.000 c f1 3.000 4.000 d f2 0.000 1.000 a f2 1.000 2.000 b f2 2.000 3.000 c f2 3.000 4.000 d """ clsdict_e = { ClassID(0, None): (FragmentToken('f1', Interval(0.0, 4.0), None), FragmentToken('f2', Interval(0.0, 4.0), None)), ClassID(1, None): (FragmentToken('f1', Interval(1.0, 4.0), None), FragmentToken('f2', Interval(1.0, 4.0), None)), ClassID(2, None): (FragmentToken('f1', Interval(0.0, 3.0), None), FragmentToken('f2', Interval(0.0, 3.0), None)) } clsdict_a = { ClassID(0, None): (FragmentToken('f1', Interval(0.0, 4.0), ('a', 'b', 'c', 'd')), FragmentToken('f2', Interval(0.0, 4.0), ('a', 'b', 'c', 'd'))), ClassID(1, None): (FragmentToken('f1', Interval(1.0, 4.0), ('b', 'c', 'd')), FragmentToken('f2', Interval(1.0, 4.0), ('b', 'c', 'd'))), ClassID(2, None): (FragmentToken('f1', Interval(0.0, 3.0), ('a', 'b', 'c')), FragmentToken('f2', Interval(0.0, 3.0), ('a', 'b', 'c'))) } tokens = [ FragmentToken('f1', Interval(0.0, 1.0), 'a'), FragmentToken('f1', Interval(1.0, 2.0), 'b'), FragmentToken('f1', Interval(2.0, 3.0), 'c'), FragmentToken('f1', Interval(3.0, 4.0), 'd'), FragmentToken('f2', Interval(0.0, 1.0), 'a'), FragmentToken('f2', Interval(1.0, 2.0), 'b'), FragmentToken('f2', Interval(2.0, 3.0), 'c'), FragmentToken('f2', Interval(3.0, 4.0), 'd') ] corpus = Corpus([ SegmentAnnotation('f1', tokens[:4]), SegmentAnnotation('f2', tokens[4:]) ]) def test_small(self): assert (self.clsdict_e == read_classfile(self.tiny_classes)) def test_corpus(self): assert (self.corpus == tokenlists_to_corpus( read_annotation(self.tiny_corpus))) def test_annotate(self): assert (self.clsdict_a == annotate_classes( read_classfile(self.tiny_classes), tokenlists_to_corpus(read_annotation(self.tiny_corpus))))
class TestClassDict(object): tokens = [ FragmentToken('a', Interval(0, 1), 'm1'), FragmentToken('b', Interval(2, 3), 'm1'), FragmentToken('c', Interval(2, 3), 'm1'), FragmentToken('b', Interval(0, 1), 'm2'), FragmentToken('c', Interval(0, 1), 'm2') ] id0 = ClassID(0, 'c1') id1 = ClassID(1, 'c2') d1 = {id0: (tokens[0], tokens[1])} d2 = {id0: (tokens[0], )} d3 = {id0: tuple()} d4 = {id0: (tokens[0], tokens[2]), id1: (tokens[3], tokens[4])} c1 = ClassDict(d1) c2 = ClassDict(d2) c3 = ClassDict(d3) c4 = ClassDict(d4) def test_restrict(self): db1 = IntervalDB({ 'a': [Interval(0, 1)], 'b': [Interval(0, 3)], 'c': [Interval(0, 3)] }) assert (self.c1.restrict(db1) == self.c1) assert (self.c2.restrict(db1) == self.c2) assert (self.c2.restrict(db1, remove_singletons=True) == ClassDict({})) assert (self.c3.restrict(db1) == ClassDict({})) assert (self.c4.restrict(db1) == self.c4) db2 = IntervalDB({'a': [Interval(0, 1)], 'c': [Interval(0, 3)]}) assert (self.c1.restrict(db2) == self.c2) assert (self.c2.restrict(db2) == self.c2) assert (self.c2.restrict(db2, remove_singletons=True) == ClassDict({})) assert (self.c3.restrict(db2) == ClassDict({})) assert (self.c4.restrict(db2) == ClassDict({ self.id0: (self.tokens[0], self.tokens[2]), self.id1: (self.tokens[4], ) })) assert (self.c4.restrict(db2, remove_singletons=True) == ClassDict( {self.id0: (self.tokens[0], self.tokens[2])})) def test_iter_fragments(self): assert (list( self.c1.iter_fragments()) == [self.tokens[0], self.tokens[1]]) assert (list(self.c2.iter_fragments()) == [self.tokens[0]]) assert (list(self.c3.iter_fragments()) == []) assert (list(self.c4.iter_fragments()) == [ self.tokens[0], self.tokens[2], self.tokens[3], self.tokens[4] ]) def test_iter_fragments_with_class(self): assert (list(self.c1.iter_fragments(with_class=True)) == [ (self.id0, self.tokens[0]), (self.id0, self.tokens[1]) ]) assert (list(self.c2.iter_fragments(with_class=True)) == [ (self.id0, self.tokens[0]) ]) assert (list(self.c3.iter_fragments(with_class=True)) == []) assert (list(self.c4.iter_fragments(with_class=True)) == [ (self.id0, self.tokens[0]), (self.id0, self.tokens[2]), (self.id1, self.tokens[3]), (self.id1, self.tokens[4]) ]) def test_iter_pairs_across_set(self): within = False order = False assert (list(self.c1.iter_pairs(within, order)) == [(self.tokens[0], self.tokens[1])]) assert (list(self.c2.iter_pairs(within, order)) == []) assert (list(self.c3.iter_pairs(within, order)) == []) assert (set(self.c4.iter_pairs(within, order)) == set([ (self.tokens[0], self.tokens[2]), (self.tokens[0], self.tokens[3]), (self.tokens[0], self.tokens[4]), (self.tokens[2], self.tokens[3]), (self.tokens[2], self.tokens[4]), (self.tokens[3], self.tokens[4]) ])) def test_iter_pairs_across_order(self): within = False order = True assert (set(self.c1.iter_pairs(within, order)) == set([ (self.tokens[0], self.tokens[1]), (self.tokens[1], self.tokens[0]) ])) assert (list(self.c2.iter_pairs(within, order)) == []) assert (list(self.c3.iter_pairs(within, order)) == []) assert (set(self.c4.iter_pairs(within, order)) == set([ (self.tokens[0], self.tokens[2]), (self.tokens[2], self.tokens[0]), (self.tokens[0], self.tokens[3]), (self.tokens[3], self.tokens[0]), (self.tokens[0], self.tokens[4]), (self.tokens[4], self.tokens[0]), (self.tokens[2], self.tokens[3]), (self.tokens[3], self.tokens[2]), (self.tokens[2], self.tokens[4]), (self.tokens[4], self.tokens[2]), (self.tokens[3], self.tokens[4]), (self.tokens[4], self.tokens[3]) ])) def test_iter_pairs_within_set(self): within = True order = False assert (set(self.c1.iter_pairs(within, order)) == set([ (self.tokens[0], self.tokens[1]) ])) assert (list(self.c2.iter_pairs(within, order)) == []) assert (list(self.c3.iter_pairs(within, order)) == []) assert (set(self.c4.iter_pairs(within, order)) == set([ (self.tokens[0], self.tokens[2]), (self.tokens[3], self.tokens[4]) ])) def test_iter_pairs_within_order(self): within = True order = True assert (set(self.c1.iter_pairs(within, order)) == set([ (self.tokens[0], self.tokens[1]), (self.tokens[1], self.tokens[0]) ])) assert (list(self.c2.iter_pairs(within, order)) == []) assert (list(self.c3.iter_pairs(within, order)) == []) assert (set(self.c4.iter_pairs(within, order)) == set([ (self.tokens[0], self.tokens[2]), (self.tokens[2], self.tokens[0]), (self.tokens[3], self.tokens[4]), (self.tokens[4], self.tokens[3]) ]))
def test_no_mark(self): cid = ClassID(1, None) assert (cid.ID == 1) assert (cid.mark is None) assert (repr(cid) == 'ClassID(1)')
def test_mark(self): cid = ClassID(1, 'markymark') assert (cid.ID == 1) assert (cid.mark == 'markymark') assert (repr(cid) == 'ClassID(1(markymark))')