class IncRecoTest(unittest.TestCase): def setUp(self): self.reco = IncReco('data/test.inc_reco') def test_methods(self): self.failUnlessEqual(self.reco[0], { 'Chunk': [['0.08', '0.13', '\xc3\xa4h']], 'Time': 0.13 }) self.failUnlessEqual(str(self.reco)[143:155], "'Time': 0.71") self.failUnlessEqual(len(self.reco), 106) self.failUnlessEqual( list(self.reco)[55], { 'Chunk': [['6.95', '7.10', 'ph']], 'Time': 7.1 }) self.failUnlessEqual(self.reco.get_latest_chunk(13.66)['Time'], 13.58) self.failUnlessEqual(self.reco.get_latest_chunk(13.67)['Time'], 13.67) self.failUnlessEqual(self.reco.get_latest_chunk(13.68)['Time'], 13.67) self.failUnlessEqual(self.reco.get_last_chunk()['Time'], 38.4) self.failUnlessEqual(self.reco.get_times()[33], 3.56) self.failUnlessEqual([x['Time'] for x in self.reco[5:10]], [0.82, 0.93, 0.98, 1.02, 1.08])
class IncRecoTest(unittest.TestCase): def setUp(self): self.reco = IncReco('data/test.inc_reco') def test_methods(self): self.failUnlessEqual(self.reco[0], {'Chunk': [['0.08', '0.13', '\xc3\xa4h']], 'Time': 0.13}) self.failUnlessEqual(str(self.reco)[143:155], "'Time': 0.71") self.failUnlessEqual(len(self.reco), 106) self.failUnlessEqual(list(self.reco)[55], {'Chunk': [['6.95', '7.10', 'ph']], 'Time': 7.1}) self.failUnlessEqual(self.reco.get_latest_chunk(13.66)['Time'], 13.58) self.failUnlessEqual(self.reco.get_latest_chunk(13.67)['Time'], 13.67) self.failUnlessEqual(self.reco.get_latest_chunk(13.68)['Time'], 13.67) self.failUnlessEqual(self.reco.get_last_chunk()['Time'], 38.4) self.failUnlessEqual(self.reco.get_times()[33], 3.56) self.failUnlessEqual([x['Time'] for x in self.reco[5:10]], [0.82, 0.93, 0.98, 1.02, 1.08])
def open_intervalframe_from_increco(filepath, encoding='utf-8', lastonly=False): """ Create an interval frame from an inc_reco file Creates a dictionary with an intervalframe per chunk. The dictionary keys are the times of the chunks Arguments: filepath - the path to the inc_reco file Kwargs: encoding -- the encoding of the file lastonly -- Read only the last chunk in the inc_reco file rather than all chunks """ reco = IncReco(filepath) start_chunk = -1 if lastonly else 0 frame_dict = dict() for chunk in reco[start_chunk:]: cur_frame = None for line in chunk['Chunk']: reco_frame = pd.DataFrame([{"start_time": \ float(line[0].decode(encoding)), "end_time": \ float(line[1].decode(encoding)), "text": line[2].decode(encoding)}]) if cur_frame is None: cur_frame = reco_frame else: cur_frame = cur_frame.append(reco_frame, ignore_index=True) #check for minor errors in the times #Sometimes the end of the previous word is 1 ms later than #the start of the next word for i in cur_frame.index[1:]: cur_frame.ix[i, 'start_time'] = max(cur_frame['start_time'].ix[i], cur_frame['end_time'].ix[i - 1]) frame_dict[str( chunk['Time'])] = cur_frame.ix[:, ['start_time', 'end_time', 'text']] return frame_dict
def setUp(self): self.reco = IncReco('data/test.inc_reco')