Esempio n. 1
0
class TestCheckTruncateIntervals(object):
    m1 = IntervalDB({'a': [(0.0, 1.0), (2.0, 3.0)]})
    d1 = ClassDict(
        {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.0, 1.0), 'm1'), )})
    d2 = ClassDict(
        {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.5), 'm1'), )})
    d3 = ClassDict(
        {ClassID(0, 'm1'): (FragmentToken('b', Interval(0.0, 1.0), 'm1'), )})
    d4 = ClassDict(
        {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 2.5), 'm1'), )})
    sa = [
        SegmentAnnotation('a', [
            FragmentToken('a', Interval(0.0, 0.25), 'a'),
            FragmentToken('a', Interval(0.25, 0.5), 'b'),
            FragmentToken('a', Interval(0.5, 0.75), 'c'),
            FragmentToken('a', Interval(0.75, 1.0), 'd')
        ])
    ]
    ca = Corpus(sa)

    def test_good_interval(self):
        assert (truncate_intervals(self.d1, self.ca,
                                   self.m1) == (self.d1, [], []))

    def test_truncate_interval(self):
        assert (truncate_intervals(self.d2, self.ca, self.m1) == (ClassDict({
            ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.0),
                                             ('c', 'd')), )
        }), [], []))
        assert (truncate_intervals(self.d4, self.ca, self.m1) == (ClassDict({
            ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.0),
                                             ('c', 'd')), )
        }), [], []))
Esempio n. 2
0
 def test_truncate_interval(self):
     assert (truncate_intervals(self.d2, self.ca, self.m1) == (ClassDict({
         ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.0),
                                          ('c', 'd')), )
     }), [], []))
     assert (truncate_intervals(self.d4, self.ca, self.m1) == (ClassDict({
         ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.0),
                                          ('c', 'd')), )
     }), [], []))
Esempio n. 3
0
class TestCheckIntervals(object):
    m1 = IntervalDB({'a': [(0.0, 1.0), (2.0, 3.0)]})
    d1 = ClassDict(
        {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.0, 1.0), 'm1'), )})
    d2 = ClassDict(
        {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 1.5), 'm1'), )})
    d3 = ClassDict(
        {ClassID(0, 'm1'): (FragmentToken('b', Interval(0.0, 1.0), 'm1'), )})
    d4 = ClassDict(
        {ClassID(0, 'm1'): (FragmentToken('a', Interval(0.5, 2.5), 'm1'), )})

    def test_good_interval(self):
        assert (check_intervals(self.d1, self.m1) == ([], []))

    def test_interval_errors(self):
        assert (check_intervals(self.d2, self.m1) == ([
            FragmentToken('a', Interval(0.5, 1.5), 'm1')
        ], []))
        assert (check_intervals(self.d4, self.m1) == ([
            FragmentToken('a', Interval(0.5, 2.5), 'm1')
        ], []))

    def test_bad_filename(self):
        assert (check_intervals(self.d3, self.m1) == ([], ['b']))
Esempio n. 4
0
def read_classfile(contents):
    """Read in class file.

    Parameters
    ----------
    contents : string

    Returns
    -------
    r : dict from ClassID to list of FragmentToken

    """
    classp = re.compile(r"^Class (?P<classID>\d+)(?: (?P<mark>.+))?$")
    r = {}
    curr = []  # list of FragmentTokens without mark
    curr_class = None

    for lineno, line in enumerate(contents.split('\n')):
        m = re.match(classp, line)
        if m:  # on a line with a class label
            if curr_class is None:
                curr_class = ClassID(int(m.group('classID')), m.group('mark'))
            else:
                raise ValueError('new class while reading class')
        else:  # on an interval line or a whitespace line
            if len(line.strip()) > 0:
                split = line.strip().split(' ')
                name = split[0]
                start = float(split[1])
                end = float(split[2])
                interval = Interval(start, end)
                curr.append(FragmentToken(name, interval, None))
            else:  # whitespace line, reset
                if curr_class is None:
                    continue
                    # if lineno == 0:
                    #     continue
                    # print lineno, line
                    # raise ValueError('attempting to end reading class '
                    #                  'while not reading class in line {0}'
                    #                  .format(lineno))
                r[curr_class] = tuple(curr)
                curr = []
                curr_class = None
    if not curr_class is None:
        r[curr_class] = tuple(curr)
    return r
Esempio n. 5
0
class TestReadClasses(object):
    tiny_classes = """Class 0
f1 0.000 4.000
f2 0.000 4.000

Class 1
f1 1.000 4.000
f2 1.000 4.000

Class 2
f1 0.000 3.000
f2 0.000 3.000


"""
    tiny_corpus = """f1 0.000 1.000 a
f1 1.000 2.000 b
f1 2.000 3.000 c
f1 3.000 4.000 d
f2 0.000 1.000 a
f2 1.000 2.000 b
f2 2.000 3.000 c
f2 3.000 4.000 d

"""
    clsdict_e = {
        ClassID(0, None):
        (FragmentToken('f1', Interval(0.0, 4.0),
                       None), FragmentToken('f2', Interval(0.0, 4.0), None)),
        ClassID(1, None): (FragmentToken('f1', Interval(1.0, 4.0), None),
                           FragmentToken('f2', Interval(1.0, 4.0), None)),
        ClassID(2, None): (FragmentToken('f1', Interval(0.0, 3.0), None),
                           FragmentToken('f2', Interval(0.0, 3.0), None))
    }
    clsdict_a = {
        ClassID(0, None):
        (FragmentToken('f1', Interval(0.0, 4.0), ('a', 'b', 'c', 'd')),
         FragmentToken('f2', Interval(0.0, 4.0), ('a', 'b', 'c', 'd'))),
        ClassID(1, None):
        (FragmentToken('f1', Interval(1.0, 4.0), ('b', 'c', 'd')),
         FragmentToken('f2', Interval(1.0, 4.0), ('b', 'c', 'd'))),
        ClassID(2, None): (FragmentToken('f1', Interval(0.0, 3.0),
                                         ('a', 'b', 'c')),
                           FragmentToken('f2', Interval(0.0, 3.0),
                                         ('a', 'b', 'c')))
    }
    tokens = [
        FragmentToken('f1', Interval(0.0, 1.0), 'a'),
        FragmentToken('f1', Interval(1.0, 2.0), 'b'),
        FragmentToken('f1', Interval(2.0, 3.0), 'c'),
        FragmentToken('f1', Interval(3.0, 4.0), 'd'),
        FragmentToken('f2', Interval(0.0, 1.0), 'a'),
        FragmentToken('f2', Interval(1.0, 2.0), 'b'),
        FragmentToken('f2', Interval(2.0, 3.0), 'c'),
        FragmentToken('f2', Interval(3.0, 4.0), 'd')
    ]
    corpus = Corpus([
        SegmentAnnotation('f1', tokens[:4]),
        SegmentAnnotation('f2', tokens[4:])
    ])

    def test_small(self):
        assert (self.clsdict_e == read_classfile(self.tiny_classes))

    def test_corpus(self):
        assert (self.corpus == tokenlists_to_corpus(
            read_annotation(self.tiny_corpus)))

    def test_annotate(self):
        assert (self.clsdict_a == annotate_classes(
            read_classfile(self.tiny_classes),
            tokenlists_to_corpus(read_annotation(self.tiny_corpus))))
Esempio n. 6
0
class TestClassDict(object):
    tokens = [
        FragmentToken('a', Interval(0, 1), 'm1'),
        FragmentToken('b', Interval(2, 3), 'm1'),
        FragmentToken('c', Interval(2, 3), 'm1'),
        FragmentToken('b', Interval(0, 1), 'm2'),
        FragmentToken('c', Interval(0, 1), 'm2')
    ]
    id0 = ClassID(0, 'c1')
    id1 = ClassID(1, 'c2')

    d1 = {id0: (tokens[0], tokens[1])}
    d2 = {id0: (tokens[0], )}
    d3 = {id0: tuple()}
    d4 = {id0: (tokens[0], tokens[2]), id1: (tokens[3], tokens[4])}
    c1 = ClassDict(d1)
    c2 = ClassDict(d2)
    c3 = ClassDict(d3)
    c4 = ClassDict(d4)

    def test_restrict(self):
        db1 = IntervalDB({
            'a': [Interval(0, 1)],
            'b': [Interval(0, 3)],
            'c': [Interval(0, 3)]
        })
        assert (self.c1.restrict(db1) == self.c1)
        assert (self.c2.restrict(db1) == self.c2)
        assert (self.c2.restrict(db1, remove_singletons=True) == ClassDict({}))
        assert (self.c3.restrict(db1) == ClassDict({}))
        assert (self.c4.restrict(db1) == self.c4)

        db2 = IntervalDB({'a': [Interval(0, 1)], 'c': [Interval(0, 3)]})
        assert (self.c1.restrict(db2) == self.c2)
        assert (self.c2.restrict(db2) == self.c2)
        assert (self.c2.restrict(db2, remove_singletons=True) == ClassDict({}))
        assert (self.c3.restrict(db2) == ClassDict({}))
        assert (self.c4.restrict(db2) == ClassDict({
            self.id0: (self.tokens[0], self.tokens[2]),
            self.id1: (self.tokens[4], )
        }))
        assert (self.c4.restrict(db2, remove_singletons=True) == ClassDict(
            {self.id0: (self.tokens[0], self.tokens[2])}))

    def test_iter_fragments(self):
        assert (list(
            self.c1.iter_fragments()) == [self.tokens[0], self.tokens[1]])
        assert (list(self.c2.iter_fragments()) == [self.tokens[0]])
        assert (list(self.c3.iter_fragments()) == [])
        assert (list(self.c4.iter_fragments()) == [
            self.tokens[0], self.tokens[2], self.tokens[3], self.tokens[4]
        ])

    def test_iter_fragments_with_class(self):
        assert (list(self.c1.iter_fragments(with_class=True)) == [
            (self.id0, self.tokens[0]), (self.id0, self.tokens[1])
        ])
        assert (list(self.c2.iter_fragments(with_class=True)) == [
            (self.id0, self.tokens[0])
        ])
        assert (list(self.c3.iter_fragments(with_class=True)) == [])
        assert (list(self.c4.iter_fragments(with_class=True)) == [
            (self.id0, self.tokens[0]), (self.id0, self.tokens[2]),
            (self.id1, self.tokens[3]), (self.id1, self.tokens[4])
        ])

    def test_iter_pairs_across_set(self):
        within = False
        order = False
        assert (list(self.c1.iter_pairs(within, order)) == [(self.tokens[0],
                                                             self.tokens[1])])
        assert (list(self.c2.iter_pairs(within, order)) == [])
        assert (list(self.c3.iter_pairs(within, order)) == [])
        assert (set(self.c4.iter_pairs(within, order)) == set([
            (self.tokens[0], self.tokens[2]), (self.tokens[0], self.tokens[3]),
            (self.tokens[0], self.tokens[4]), (self.tokens[2], self.tokens[3]),
            (self.tokens[2], self.tokens[4]), (self.tokens[3], self.tokens[4])
        ]))

    def test_iter_pairs_across_order(self):
        within = False
        order = True
        assert (set(self.c1.iter_pairs(within, order)) == set([
            (self.tokens[0], self.tokens[1]), (self.tokens[1], self.tokens[0])
        ]))
        assert (list(self.c2.iter_pairs(within, order)) == [])
        assert (list(self.c3.iter_pairs(within, order)) == [])
        assert (set(self.c4.iter_pairs(within, order)) == set([
            (self.tokens[0], self.tokens[2]), (self.tokens[2], self.tokens[0]),
            (self.tokens[0], self.tokens[3]), (self.tokens[3], self.tokens[0]),
            (self.tokens[0], self.tokens[4]), (self.tokens[4], self.tokens[0]),
            (self.tokens[2], self.tokens[3]), (self.tokens[3], self.tokens[2]),
            (self.tokens[2], self.tokens[4]), (self.tokens[4], self.tokens[2]),
            (self.tokens[3], self.tokens[4]), (self.tokens[4], self.tokens[3])
        ]))

    def test_iter_pairs_within_set(self):
        within = True
        order = False
        assert (set(self.c1.iter_pairs(within, order)) == set([
            (self.tokens[0], self.tokens[1])
        ]))
        assert (list(self.c2.iter_pairs(within, order)) == [])
        assert (list(self.c3.iter_pairs(within, order)) == [])
        assert (set(self.c4.iter_pairs(within, order)) == set([
            (self.tokens[0], self.tokens[2]), (self.tokens[3], self.tokens[4])
        ]))

    def test_iter_pairs_within_order(self):
        within = True
        order = True
        assert (set(self.c1.iter_pairs(within, order)) == set([
            (self.tokens[0], self.tokens[1]), (self.tokens[1], self.tokens[0])
        ]))
        assert (list(self.c2.iter_pairs(within, order)) == [])
        assert (list(self.c3.iter_pairs(within, order)) == [])
        assert (set(self.c4.iter_pairs(within, order)) == set([
            (self.tokens[0], self.tokens[2]), (self.tokens[2], self.tokens[0]),
            (self.tokens[3], self.tokens[4]), (self.tokens[4], self.tokens[3])
        ]))
Esempio n. 7
0
 def test_no_mark(self):
     cid = ClassID(1, None)
     assert (cid.ID == 1)
     assert (cid.mark is None)
     assert (repr(cid) == 'ClassID(1)')
Esempio n. 8
0
 def test_mark(self):
     cid = ClassID(1, 'markymark')
     assert (cid.ID == 1)
     assert (cid.mark == 'markymark')
     assert (repr(cid) == 'ClassID(1(markymark))')