Example #1
0
class IncRecoTest(unittest.TestCase):
    def setUp(self):
        self.reco = IncReco('data/test.inc_reco')

    def test_methods(self):
        self.failUnlessEqual(self.reco[0], {
            'Chunk': [['0.08', '0.13', '\xc3\xa4h']],
            'Time': 0.13
        })

        self.failUnlessEqual(str(self.reco)[143:155], "'Time': 0.71")

        self.failUnlessEqual(len(self.reco), 106)

        self.failUnlessEqual(
            list(self.reco)[55], {
                'Chunk': [['6.95', '7.10', 'ph']],
                'Time': 7.1
            })

        self.failUnlessEqual(self.reco.get_latest_chunk(13.66)['Time'], 13.58)

        self.failUnlessEqual(self.reco.get_latest_chunk(13.67)['Time'], 13.67)

        self.failUnlessEqual(self.reco.get_latest_chunk(13.68)['Time'], 13.67)

        self.failUnlessEqual(self.reco.get_last_chunk()['Time'], 38.4)

        self.failUnlessEqual(self.reco.get_times()[33], 3.56)

        self.failUnlessEqual([x['Time'] for x in self.reco[5:10]],
                             [0.82, 0.93, 0.98, 1.02, 1.08])
Example #2
0
class IncRecoTest(unittest.TestCase):

    def setUp(self):
        self.reco = IncReco('data/test.inc_reco')

    def test_methods(self):
        self.failUnlessEqual(self.reco[0], {'Chunk': [['0.08',
                                                       '0.13',
                                                       '\xc3\xa4h']],
                                            'Time': 0.13})

        self.failUnlessEqual(str(self.reco)[143:155], "'Time': 0.71")

        self.failUnlessEqual(len(self.reco), 106)

        self.failUnlessEqual(list(self.reco)[55], {'Chunk': [['6.95',
                                                              '7.10',
                                                              'ph']],
                                                   'Time': 7.1})

        self.failUnlessEqual(self.reco.get_latest_chunk(13.66)['Time'], 13.58)

        self.failUnlessEqual(self.reco.get_latest_chunk(13.67)['Time'], 13.67)

        self.failUnlessEqual(self.reco.get_latest_chunk(13.68)['Time'], 13.67)

        self.failUnlessEqual(self.reco.get_last_chunk()['Time'], 38.4)

        self.failUnlessEqual(self.reco.get_times()[33], 3.56)

        self.failUnlessEqual([x['Time'] for x in self.reco[5:10]],
                             [0.82, 0.93, 0.98, 1.02, 1.08])
Example #3
0
def open_intervalframe_from_increco(filepath,
                                    encoding='utf-8',
                                    lastonly=False):
    """ Create an interval frame from an inc_reco file

    Creates a dictionary with an intervalframe per chunk. The dictionary
    keys are the times of the chunks

    Arguments:

    filepath - the path to the inc_reco file

    Kwargs:

    encoding -- the encoding of the file

    lastonly -- Read only the last chunk in the inc_reco file
                rather than all chunks

    """

    reco = IncReco(filepath)
    start_chunk = -1 if lastonly else 0

    frame_dict = dict()
    for chunk in reco[start_chunk:]:
        cur_frame = None
        for line in chunk['Chunk']:
            reco_frame = pd.DataFrame([{"start_time": \
                                        float(line[0].decode(encoding)),
                                        "end_time": \
                                        float(line[1].decode(encoding)),
                                        "text": line[2].decode(encoding)}])
            if cur_frame is None:
                cur_frame = reco_frame
            else:
                cur_frame = cur_frame.append(reco_frame, ignore_index=True)

        #check for minor errors in the times
        #Sometimes the end of the previous word is 1 ms later than
        #the start of the next word
        for i in cur_frame.index[1:]:
            cur_frame.ix[i,
                         'start_time'] = max(cur_frame['start_time'].ix[i],
                                             cur_frame['end_time'].ix[i - 1])

        frame_dict[str(
            chunk['Time'])] = cur_frame.ix[:,
                                           ['start_time', 'end_time', 'text']]
    return frame_dict
Example #4
0
 def setUp(self):
     self.reco = IncReco('data/test.inc_reco')
Example #5
0
 def setUp(self):
     self.reco = IncReco('data/test.inc_reco')