class TestIntersectionOverUnion(unittest.TestCase): """ Tests lib.src.measurement.intersection_over_union """ intersection_over_union_data_provider = lambda: ( (IntervalMock(1.0, 1.0), Interval(0.0, 0.0), 1.0), # Sanity check (IntervalMock(3.0, 6.0), Interval(0.0, 0.0), 0.5), # Sanity check (IntervalMock(1.0, 0.0), Interval(0.0, 0.0), 0.0 ), # Shouldn't do a division by 0 (IntervalMock(0.0, 1.0), Interval(0.0, 0.0), 0.0 ), # One 0 should yield value 0 ) @data_provider(intersection_over_union_data_provider) def test_intersection_over_union(self, a: IntervalMock, b: IntervalMock, expected_value: float) -> None: """ Tests intersection_over_union function. :param a: :param b: :param expected_value: :return: """ self.assertEqual(intersection_over_union(a, b), expected_value)
def test_get_length(self, start: Any, end: Any, expected_length: float) -> None: """ Tests get_length method's behaviour :param start: Start of the interval :param end: End of the interval :param expected_length: Expected calculated length :return: None """ a = Interval(start, end) self.assertEqual(a.get_length(), expected_length)
def test_to_formatted(self, start: Any, end: Any, expected_formatted: str) -> None: """ Tests to_formatted method's behaviour. :param start: Start of the interval :param end: End of the interval :param expected_formatted: Expected formatted output :return: None """ a = Interval(start, end) self.assertEqual(a.to_formatted(), expected_formatted)
def test_get_union(self, start: Any, end: Any, other_start: Any, other_end: Any, expected_union: float) -> None: """ Tests the get_union method's behaviour :param start: Start of interval a :param end: End of interval a :param other_start: Start of interval b :param other_end: End of interval b :param expected_union: Value to check against :return: None """ a = Interval(start, end) b = Interval(other_start, other_end) self.assertEqual(a.get_union(b), expected_union)
def sentence_from_string(string: str) -> Sentence: """ Creates a Sentence object from a given single line of an alignment :param string: Input string to parse :return: Sentence """ parts = string.split("\t") try: interval_start = float(parts[0]) except ValueError: interval_start = parts[0] try: interval_end = float(parts[1]) except ValueError: interval_end = parts[1] additional_data = None if len(parts) > 3: additional_data = AdditionalData(float(parts[3]), float(parts[4]), float(parts[5]), float(parts[6])) return Sentence(parts[2].strip(), Interval(interval_start, interval_end), additional_data)
def intersection_over_union(ground_truth: Interval, prediction: Interval) -> float: """ Calculates the IOU score for two pairs, ground truth and prediction :param ground_truth: Interval :param prediction: Interval :return: IOU """ intersection = ground_truth.get_intersection(prediction) union = ground_truth.get_union(prediction) if intersection == 0.0 or union == 0.0: return 0 return intersection / union
def transcript_to_sentences(transcript: str) -> List[Sentence]: """ Creates a list of Sentence instances with empty intervals from a given text. :param transcript: String :return: List of Sentence instances """ return [ Sentence(sentence, Interval(None, None), None) for sentence in sent_tokenize(transcript, "german") ]
def merge_with(self, other: "Sentence") -> "Sentence": """ Merges two sentences :param other: Another sentence :return: Merged sentence """ if not (isinstance(self.interval.start, float) and isinstance(other.interval.start, float)) or self.interval.start < other.interval.start: sentence = str(self.sentence).strip() + " " + str(other.sentence).strip() start_time = self.interval.start end_time = other.interval.end else: sentence = str(other.sentence).strip() + " " + str(self.sentence).strip() start_time = other.interval.start end_time = self.interval.end return Sentence(sentence, Interval(start_time, end_time), self.additional_data)
def test_compare_alignment_data_provider() -> Tuple: """ Data provider function :return: Tuple """ sentence_1 = Sentence("foo", Interval(0.0, 0.1), AdditionalData(0.5, 0.4, 0.3, 0.2)) sentence_2 = Sentence("bar", Interval(0.0, 0.2), AdditionalData(0.6, 0.5, 0.4, 0.3)) sentence_3 = Sentence("baz", Interval(0.1, 0.2), AdditionalData(0.7, 0.6, 0.5, 0.4)) sentence_4 = Sentence("qux", Interval(0.0, 0.2), AdditionalData(0.8, 0.7, 0.6, 0.5)) sentence_5 = Sentence("lorem", Interval(0.0001, 0.0002), AdditionalData(0.9, 0.8, 0.7, 0.6)) # Doesn"t appear file_1_type1 = AlignmentFile("file_1_audacity_type1.txt", [sentence_1, sentence_2, sentence_5]) file_2_type1 = AlignmentFile("file_2_audacity_type1.txt", [sentence_1, sentence_2, sentence_5]) file_3_type1 = AlignmentFile("file_3_audacity_type1.txt", [sentence_1]) file_4_type1 = AlignmentFile( "file_4_audacity_type1.txt", [sentence_1, sentence_2, sentence_3, sentence_4]) file_1_type2 = AlignmentFile("file_1_audacity_type2.txt", [sentence_1, sentence_2, sentence_5]) file_2_type2 = AlignmentFile("file_2_audacity_type2.txt", [sentence_1, sentence_2, sentence_5]) file_3_type2 = AlignmentFile("file_3_audacity_type2.txt", [sentence_3]) file_4_type2 = AlignmentFile( "file_4_audacity_type2.txt", [sentence_1, sentence_3, sentence_4, sentence_5]) config = { "no_appearance": { "interval_length": 0.0001 }, "score_weights": { "gaps_google": 1, "gaps_transcript": 1, "alignment_score": -100, "google_confidence": 6.1249349 } } return ( # No data at all, should fallback to zeros etc., no warnings or errors ([], config, { "no_sentences": { "appearing": 0, "total": 0, }, "ious": { "all": [], "all_only": [], "low": [], "mean": nan, "median": nan, "per_file": {} }, "appearance": { "true_positives": 0, "false_positives": 0, "true_negatives": 0, "false_negatives": 0, "precision": 0.0, "recall": 0.0, "f1_score": 0.0 }, "scores": { "alignment_scores": { "all": [], "mean": nan, "median": nan }, "calculated": { "all": [] }, "deviation": { "all": [], "mean": nan, "median": nan }, "google_confidence": { "all": [], "mean": nan, "median": nan }, "google_gaps": { "all": [], "mean": nan, "median": nan }, "transcript_gaps": { "all": [], "mean": nan, "median": nan } } }), # Worst possible score with data ([file_3_type1, file_3_type2], config, { "no_sentences": { "appearing": 1, "total": 1 }, "ious": { "all_only": [0], "all": [(0, 0.1, 0.1, "foo", "\\file_3")], "low": ["\\file_3.wav"], "mean": 0.0, "median": 0.0, "per_file": { "\\file_3": { "mean": 0.0, "median": 0.0, "all": [(0, 0.1, 0.1, "foo", "\\file_3")] } } }, "appearance": { "true_positives": 1, "false_positives": 0, "true_negatives": 0, "false_negatives": 0, "precision": 1.0, "recall": 1.0, "f1_score": 1.0 }, "scores": { "alignment_scores": { "all": [0.6], "mean": 0.6, "median": 0.6 }, "calculated": { "all": [-65.42503905999999] }, "deviation": { "all": [0.2], "mean": 0.2, "median": 0.2 }, "google_confidence": { "all": [0.7], "mean": 0.7, "median": 0.7 }, "google_gaps": { "all": [0.4], "mean": 0.4, "median": 0.4 }, "transcript_gaps": { "all": [0.5], "mean": 0.5, "median": 0.5 } } }), # Perfect scores ([file_1_type1, file_2_type1, file_1_type2, file_2_type2], config, { "no_sentences": { "appearing": 4, "total": 6 }, "ious": { "all_only": [1.0, 1.0, 1.0, 1.0], "all": [(1.0, 0.1, 0.1, "foo", "\\file_1"), (1.0, 0.2, 0.2, "bar", "\\file_1"), (1.0, 0.1, 0.1, "foo", "\\file_2"), (1.0, 0.2, 0.2, "bar", "\\file_2")], "low": [], "mean": 1.0, "median": 1.0, "per_file": { "\\file_1": { "mean": 1.0, "median": 1.0, "all": [(1.0, 0.1, 0.1, "foo", "\\file_1"), (1.0, 0.2, 0.2, "bar", "\\file_1")] }, "\\file_2": { "mean": 1.0, "median": 1.0, "all": [(1.0, 0.1, 0.1, "foo", "\\file_2"), (1.0, 0.2, 0.2, "bar", "\\file_2")] } } }, "appearance": { "true_positives": 4, "false_positives": 0, "true_negatives": 2, "false_negatives": 0, "precision": 1.0, "recall": 1.0, "f1_score": 1.0 }, "scores": { "alignment_scores": { "all": [0.4, 0.5, 0.4, 0.5], "mean": 0.45, "median": 0.45 }, "calculated": { "all": [-47.05002604, -56.23753255, -47.05002604, -56.23753255] }, "deviation": { "all": [0.0, 0.0, 0.0, 0.0], "mean": 0.0, "median": 0.0 }, "google_confidence": { "all": [0.5, 0.6, 0.5, 0.6], "mean": 0.55, "median": 0.55 }, "google_gaps": { "all": [0.2, 0.3, 0.2, 0.3], "mean": 0.25, "median": 0.25 }, "transcript_gaps": { "all": [0.3, 0.4, 0.3, 0.4], "mean": 0.35, "median": 0.35 } } }), # Some intermediate score ([file_4_type1, file_4_type2], config, { "no_sentences": { "appearing": 3, "total": 4 }, "ious": { "all_only": [1.0, 0.5, 0.5], "all": [(1.0, 0.1, 0.1, "foo", "\\file_4"), (0.5, 0.2, 0.1, "bar", "\\file_4"), (0.5, 0.1, 0.2, "baz", "\\file_4")], "low": [], "mean": 0.66666666666666663, "median": 0.5, "per_file": { "\\file_4": { "mean": 0.66666666666666663, "median": 0.5, "all": [(1.0, 0.1, 0.1, "foo", "\\file_4"), (0.5, 0.2, 0.1, "bar", "\\file_4"), (0.5, 0.1, 0.2, "baz", "\\file_4")] } } }, "appearance": { "true_positives": 3, "false_positives": 0, "true_negatives": 0, "false_negatives": 1, "precision": 1.0, "recall": 0.75, "f1_score": 0.8571428571428571 }, "scores": { "alignment_scores": { "all": [0.4, 0.6, 0.7], "mean": 0.5666666666666667, "median": 0.6 }, "calculated": { "all": [-47.05002604, -65.42503905999999, -74.61254557000001] }, "deviation": { "all": [0.0, 0.1, 0.1], "mean": 0.06666666666666667, "median": 0.1 }, "google_confidence": { "all": [0.5, 0.7, 0.8], "mean": 0.6666666666666666, "median": 0.7 }, "google_gaps": { "all": [0.2, 0.4, 0.5], "mean": 0.3666666666666667, "median": 0.4 }, "transcript_gaps": { "all": [0.3, 0.5, 0.6], "mean": 0.4666666666666666, "median": 0.5 } } }), # One file exists, the other one doesn"t, hence no data ([file_1_type1], config, { "no_sentences": { "appearing": 0, "total": 0, }, "ious": { "all_only": [], "all": [], "low": [], "mean": nan, "median": nan, "per_file": {} }, "appearance": { "true_positives": 0, "false_positives": 0, "true_negatives": 0, "false_negatives": 0, "precision": 0.0, "recall": 0.0, "f1_score": 0.0 }, "scores": { "alignment_scores": { "all": [], "mean": nan, "median": nan }, "calculated": { "all": [] }, "deviation": { "all": [], "mean": nan, "median": nan }, "google_confidence": { "all": [], "mean": nan, "median": nan }, "google_gaps": { "all": [], "mean": nan, "median": nan }, "transcript_gaps": { "all": [], "mean": nan, "median": nan } } }), )
class TestSentence(unittest.TestCase): """ Test class lib.src.model.Sentence """ audacity_label_format_data_provider = lambda: ( ("Lorem ipsum dolor", IntervalMock(0.0, 0.0), "mocked_interval\tLorem ipsum dolor"), # Sanity check (None, IntervalMock(0.0, 0.0), "mocked_interval\tNone" ), # Sentence is None ("", IntervalMock(0.0, 0.0), "mocked_interval\t"), # Empty sentence ) @data_provider(audacity_label_format_data_provider) def test_to_audacity_label_format(self, sentence: Any, interval: IntervalMock, expected_format: str) -> None: """ Tests to_audacity_label_format method's behaviour :param sentence: Inner sentence :param interval: Mocked interval :param expected_format: Expected audacity label format :return: None """ s = Sentence(sentence, interval, None) self.assertEqual(s.to_audacity_label_format(), expected_format) merge_sentence_data_provider = lambda: ( ("foo", IntervalMock(0.0, 0.1), "bar", IntervalMock(0.1, 0.2), Sentence("foo bar", Interval(0.0, 0.2), None)), # Sanity check ("foo", IntervalMock(0.1, 0.2), "bar", IntervalMock(0.0, 0.1), Sentence("bar foo", Interval(0.0, 0.2), None)), # Flipped intervals (" foo ", IntervalMock(0.0, 0.1), " bar ", IntervalMock(0.1, 0.2), Sentence("foo bar", Interval(0.0, 0.2), None) ), # Additional spaces around sentences (None, IntervalMock(0.0, 0.1), "bar", IntervalMock(0.1, 0.2), Sentence("None bar", Interval(0.0, 0.2), None) ), # One sentence is None ("foo", IntervalMock(0.0, 0.1), None, IntervalMock(0.1, 0.2), Sentence("foo None", Interval(0.0, 0.2), None) ), # Other sentence is None (None, IntervalMock(0.0, 0.1), None, IntervalMock(0.1, 0.2), Sentence("None None", Interval(0.0, 0.2), None) ), # Both sentences are none ) @data_provider(merge_sentence_data_provider) def test_merge_with(self, sentence: str, interval: IntervalMock, other_sentence: str, other_interval: IntervalMock, expected_sentence: Sentence) -> None: """ Tests the merge_with method's behaviour :param sentence: First sentence as string :param interval: First interval :param other_sentence: Other sentence as string :param other_interval: Other interval :param expected_sentence: Expected merged sentence :return: None """ a = Sentence(sentence, interval, None) b = Sentence(other_sentence, other_interval, None) self.assertEqual( a.merge_with(b).to_audacity_label_format(), expected_sentence.to_audacity_label_format()) pass from_string_data_provider = lambda: ( ("0.0000\t1.0000\tfoo bar baz", float, 0.0, float, 1.0, "foo bar baz" ), # Sanity check ("-\t0.00\tfoo bar baz", str, "-", float, 0.0, "foo bar baz" ), # One interval part isn't float ("0.00\t-\tfoo bar baz", float, 0.0, str, "-", "foo bar baz" ), # Other interval part isn't float ("-\t-\tfoo bar baz", str, "-", str, "-", "foo bar baz" ), # Both interval parts aren't float ) @data_provider(from_string_data_provider) def test_sentence_from_string(self, input_string: str, expected_start_type: Any, expected_start: Any, expected_end_type: Any, expected_end: Any, expected_sentence: str) -> None: """ Tests sntence_from_string function's behaviour. :param input_string: Formatted string to parse :param expected_start_type: Type of the inner Interval's start property :param expected_start: Value of the inner Interval's start property :param expected_end_type: Type of the inner Interval's end property :param expected_end: Value of the inner Interval's end property :param expected_sentence: Expected inner sentence as string :return: None """ s = sentence_from_string(input_string) self.assertIsInstance(s.interval.start, expected_start_type) self.assertEqual(s.interval.start, expected_start) self.assertIsInstance(s.interval.end, expected_end_type) self.assertEqual(s.interval.end, expected_end) self.assertEqual(s.sentence, expected_sentence)