def test_fill_grids_file2_labels(self): options.cfg.selected_features = self.selected_features2 writer = textgrids.TextgridWriter(self.df2, self.session) grids = writer.fill_grids() grid = grids[("File2.txt", )] # only one tier expected: self.assertEqual(len(grid.tiers), 1) tier = grid.tiers[0] # expected tiername: word_label self.assertEqual(tier.name, "word_label") # three expected intervals: self.assertEqual(len(tier.intervals), 3) interval1 = tier.intervals[0] self.assertEqual(interval1.start_time, 4) self.assertEqual(interval1.end_time, 4.5) self.assertEqual(interval1.text, "a") interval2 = tier.intervals[1] self.assertEqual(interval2.start_time, 5) self.assertEqual(interval2.end_time, 6) self.assertEqual(interval2.text, "tiny") interval3 = tier.intervals[2] self.assertEqual(interval3.start_time, 8) self.assertEqual(interval3.end_time, 8.5) self.assertEqual(interval3.text, "boat")
def test_get_file_data(self): options.cfg.selected_features = self.selected_features1 writer = textgrids.TextgridWriter(self.df1, self.session) df = _get_file_data(None, [1, 2, 3, 4, 5], [ self.resource.corpus_id, self.resource.file_name, self.resource.file_duration ]) assert_frame_equal(writer.get_file_data(), df)
def test_prepare_textgrids_feature_timing1(self): """ Test the textgrid for a query that has only corpus timings, but no additional lexical features. In this case, at one tier should be created that will contain the corpus IDs of the tokens. """ options.cfg.selected_features = self.selected_features1 writer = textgrids.TextgridWriter(self.df1, self.session) grids = writer.prepare_textgrids() self.assertEqual(list(writer.feature_timing.keys()), ["corpus_id"]) self.assertEqual(writer.feature_timing["corpus_id"], ("corpus_starttime", "corpus_endtime"))
def test_prepare_textgrids_feature_timing2(self): """ Test the textgrid for a query that has a lexical feature in addition to the corpus timings (word_label). In this case, at one tier should be created that will contain the word_labels of the tokens. """ options.cfg.selected_features = self.selected_features2 writer = textgrids.TextgridWriter(self.df2, self.session) grids = writer.prepare_textgrids() self.assertCountEqual(list(writer.feature_timing.keys()), ["corpus_id", "word_label"]) self.assertEqual(writer.feature_timing["word_label"], ("corpus_starttime", "corpus_endtime")) self.assertEqual(writer.feature_timing["corpus_id"], ("corpus_starttime", "corpus_endtime"))
def test_fill_grids_file1_no_labels(self): options.cfg.selected_features = self.selected_features1 writer = textgrids.TextgridWriter(self.df1, self.session) grids = writer.fill_grids() grid = grids[("File1.txt", )] # only one tier expected: self.assertEqual(len(grid.tiers), 1) tier = grid.tiers[0] # expected tiername: corpus_id self.assertEqual(tier.name, "corpus_id") # two expected intervals: self.assertEqual(len(tier.intervals), 2) interval1 = tier.intervals[0] self.assertEqual(interval1.start_time, 4) self.assertEqual(interval1.end_time, 4.5) self.assertEqual(interval1.text, "1") interval2 = tier.intervals[1] self.assertEqual(interval2.start_time, 5) self.assertEqual(interval2.end_time, 5.5) self.assertEqual(interval2.text, "2")
def test_prepare_textgrids_number_of_grids(self): options.cfg.selected_features = self.selected_features1 writer = textgrids.TextgridWriter(self.df1, self.session) grids = writer.prepare_textgrids() self.assertEqual(len(grids), len(writer.get_file_data()["Filename"].unique()))