Example #1
0
    def test_insert(self):
        set_1 = SubtitleSet.from_list('en', [
         (0, 1000, "Hey 1"),
         (1000, 2000, "Hey 2"),
         (2000, 3000, "Hey 3"),
         (3000, 4000, "Hey 4"),
        ])
        set_2 = SubtitleSet.from_list('en', [
         (0, 1000, "Hey 1"),
         (500, 800, "Hey 1.5"),
         (1000, 2000, "Hey 2"),
         (2000, 3000, "Hey 3"),
         (3000, 4000, "Hey 4"),
        ])
        result = diff(set_1, set_2)
        self.assertEqual(result['changed'], True)
        # for both time_change and text_changed, we calculate them as follows:
        # there are 9 total subs.  8 of those are matches and 1 is new in
        # set_2.  So the change amount is 1/9
        self.assertAlmostEqual(result['time_changed'], 1/9.0)
        self.assertAlmostEqual(result['text_changed'], 1/9.0)
        self.assertEqual(len(result['subtitle_data']), 5)

        # check the lines that haven't changed
        self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 2, 3, 4)
        # check the line that was inserted
        insert_sub_data = result['subtitle_data'][1]
        self.assertEquals(insert_sub_data['time_changed'], True)
        self.assertEquals(insert_sub_data['text_changed'], True)
        self.assertEquals(insert_sub_data['subtitles'][0], self.empty_line())
        self.assertEquals(insert_sub_data['subtitles'][1], set_2[1])
Example #2
0
    def test_simple_replace(self):
        set_1 = SubtitleSet.from_list('en', [
         (0, 1000, "Hey 1"),
         (1000, 2000, "Hey 2"),
         (2000, 3000, "Hey 3"),
         (3000, 4000, "Hey 4"),
        ])
        set_2 = SubtitleSet.from_list('en', [
         (0, 1000, "Hey 1"),
         (1000, 2000, "Hey New 2"),
         (2000, 3000, "Hey 3"),
         (3000, 4000, "Hey 4"),
        ])
        result = diff(set_1, set_2)
        self.assertEqual(result['changed'], True)
        self.assertAlmostEqual(result['time_changed'], 0)
        # for text_changed, we calculate as follows: there are 8 total subs.
        # 6 of those are matches and 1 is different in both sets.  So 2/8.0
        # has been changed.
        self.assertAlmostEqual(result['text_changed'], 2/8.0)
        self.assertEqual(len(result['subtitle_data']), 4)

        # check the lines that haven't changed
        self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 2, 3)
        # check the line that was inserted
        insert_sub_data = result['subtitle_data'][1]
        self.assertEquals(insert_sub_data['time_changed'], False)
        self.assertEquals(insert_sub_data['text_changed'], True)
        self.assertEquals(insert_sub_data['subtitles'][0], set_1[1])
        self.assertEquals(insert_sub_data['subtitles'][1], set_2[1])
Example #3
0
    def test_replace_multiple_lines_with_single(self):
        set_1 = SubtitleSet.from_list('en', [
         (0, 1000, "Hey 1"),
         (1000, 2000, "Hey 2"),
         (2000, 3000, "Hey 3"),
         (3000, 4000, "Hey 4"),
        ])
        set_2 = SubtitleSet.from_list('en', [
         (0, 1000, "Hey 1"),
         (1000, 3000, "Hey 2 and 3"),
         (3000, 4000, "Hey 4"),
        ])
        result = diff(set_1, set_2)
        self.assertEqual(result['changed'], True)
        # for both time_change and text_changed, we calculate them as follows:
        # there are 7 total subs.  4 of those are matches and 2 in set_1 were
        # replaced with 1 in set_2.  So the change amount is 3/7.
        self.assertAlmostEqual(result['time_changed'], 3/7.0)
        self.assertAlmostEqual(result['text_changed'], 3/7.0)
        self.assertEqual(len(result['subtitle_data']), 4)

        # check the lines that haven't changed
        self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 3)
        # check the line that was inserted
        line1 = result['subtitle_data'][1]
        self.assertEquals(line1['time_changed'], True)
        self.assertEquals(line1['text_changed'], True)
        self.assertEquals(line1['subtitles'][0], set_1[1])
        self.assertEquals(line1['subtitles'][1], set_2[1])
        line2 = result['subtitle_data'][2]
        self.assertEquals(line2['time_changed'], True)
        self.assertEquals(line2['text_changed'], True)
        self.assertEquals(line2['subtitles'][0], set_1[2])
        self.assertEquals(line2['subtitles'][1], self.empty_line())
Example #4
0
 def test_one_set_empty(self):
     set_1 = SubtitleSet.from_list(
         "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")]
     )
     result = diff(set_1, SubtitleSet("en"))
     self.assertEqual(result["changed"], True)
     self.assertEqual(result["text_changed"], 1.0)
     self.assertEqual(result["time_changed"], 1.0)
Example #5
0
    def test_data_ordering(self):
        set_1 = SubtitleSet.from_list("en", [(0, 1000, "Hey 1")])
        set_2 = SubtitleSet.from_list("en", [(0, 1000, "Hey 1"), (1200, 2000, "Hey 2"), (2000, 3000, "Hey 3")])
        result = diff(set_1, set_2)

        subs_result = result["subtitle_data"][2]["subtitles"]
        # make sure the 0 index subs is for set_1, test
        # we respect the ordering of arguments passed to diff
        self.assertEqual(subs_result[0].text, None)
        self.assertEqual(subs_result[1].text, "Hey 3")
Example #6
0
 def test_one_set_empty(self):
     set_1 = SubtitleSet.from_list('en', [
         (0, 1000, "Hey 1"),
         (1000, 2000, "Hey 2"),
         (2000, 3000, "Hey 3"),
         (3000, 4000, "Hey 4"),
     ])
     result = diff(set_1, SubtitleSet('en'))
     self.assertEqual(result['changed'], True)
     self.assertEqual(result['text_changed'], 1.0)
     self.assertEqual(result['time_changed'], 1.0)
Example #7
0
    def test_unsynced_reflect_time_changes(self):
        set_1 = SubtitleSet.from_list('en', [
            (0, 1000, "Hey 1"),
            ])
        set_2 = SubtitleSet.from_list('en', [
            (0, 1000, "Hey 1"),
            (None, None, "Hey 2"),
            ])
        result = diff(set_1, set_2)

        self.assertAlmostEqual(result['time_changed'], 1/3.0)
Example #8
0
    def test_unsynced_reflect_time_changes(self):
        set_1 = SubtitleSet.from_list('en', [
            (0, 1000, "Hey 1"),
            ])
        set_2 = SubtitleSet.from_list('en', [
            (0, 1000, "Hey 1"),
            (None, None, "Hey 2"),
            ])
        result = diff(set_1, set_2)

        self.assertAlmostEqual(result['time_changed'], 1/3.0)
Example #9
0
 def test_text_changes(self):
     set_1 = SubtitleSet.from_list(
         "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")]
     )
     set_2 = SubtitleSet.from_list(
         "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 22"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")]
     )
     result = diff(set_1, set_2)
     self.assertEqual(result["changed"], True)
     self.assertEqual(result["text_changed"], 1 / 4.0)
     self.assertEqual(result["time_changed"], 0)
     self.assertEqual(len(result["subtitle_data"]), 4)
     # only sub #2 should have text changed
     for i, sub_data in enumerate(result["subtitle_data"]):
         self.assertEqual(sub_data["text_changed"], i == 1)
Example #10
0
    def test_data_ordering(self):
        set_1 = SubtitleSet.from_list('en', [
            (0, 1000, "Hey 1"),
        ])
        set_2 = SubtitleSet.from_list('en', [
            (0, 1000, "Hey 1"),
            (1200, 2000, "Hey 2"),
            (2000, 3000, "Hey 3"),
        ])
        result = diff(set_1, set_2)

        subs_result = result['subtitle_data'][2]['subtitles']
        # make sure the 0 index subs is for set_1, test
        # we respect the ordering of arguments passed to diff
        self.assertEqual(subs_result[0].text , None)
        self.assertEqual(subs_result[1].text , "Hey 3")
Example #11
0
 def test_text_changes(self):
     set_1 = SubtitleSet.from_list('en', [
         (0, 1000, "Hey 1"),
         (1000, 2000, "Hey 2"),
         (2000, 3000, "Hey 3"),
         (3000, 4000, "Hey 4"),
         ])
     set_2 = SubtitleSet.from_list('en', [
         (0, 1000, "Hey 1"),
         (1000, 2000, "Hey 22"),
         (2000, 3000, "Hey 3"),
         (3000, 4000, "Hey 4"),
         ])
     result = diff(set_1, set_2)
     self.assertEqual(result['changed'], True)
     self.assertEqual(result['text_changed'], 1/4.0)
     self.assertEqual(result['time_changed'], 0)
     self.assertEqual(len(result['subtitle_data']), 4)
     # only sub #2 should have text changed
     for i,sub_data in enumerate(result['subtitle_data']):
         self.assertEqual(sub_data['text_changed'], i ==1)
Example #12
0
 def test_time_changes(self):
     set_1 = SubtitleSet.from_list('en', [
      (0, 1000, "Hey 1"),
      (1000, 2000, "Hey 2"),
      (2000, 3000, "Hey 3"),
      (3000, 4000, "Hey 4"),
     ])
     set_2 = SubtitleSet.from_list('en', [
      (0, 1000, "Hey 1"),
      (1200, 2000, "Hey 2"),
      (2000, 3000, "Hey 3"),
      (3000, 4000, "Hey 4"),
     ])
     result = diff(set_1, set_2)
     self.assertEqual(result['changed'], True)
     self.assertEqual(result['time_changed'], 1/4.0)
     self.assertEqual(result['text_changed'], 0)
     self.assertEqual(len(result['subtitle_data']), 4)
     # only sub #2 should have text changed
     for i,sub_data in enumerate(result['subtitle_data']):
         self.assertEqual(sub_data['time_changed'], i ==1)
         self.assertFalse(sub_data['text_changed'])
Example #13
0
    def test_delete(self):
        set_1 = SubtitleSet.from_list(
            "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")]
        )
        set_2 = SubtitleSet.from_list("en", [(0, 1000, "Hey 1"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")])
        result = diff(set_1, set_2)
        self.assertEqual(result["changed"], True)
        # for both time_change and text_changed, we calculate them as follows:
        # there are 7 total subs.  6 of those are matches and 1 is new in
        # set_2.  So the change amount is 1/9
        self.assertAlmostEqual(result["time_changed"], 1 / 7.0)
        self.assertAlmostEqual(result["text_changed"], 1 / 7.0)
        self.assertEqual(len(result["subtitle_data"]), 4)

        # check the lines that haven't changed
        self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 2, 3)
        # check the line that was deleted
        delete_sub_data = result["subtitle_data"][1]
        self.assertEquals(delete_sub_data["time_changed"], True)
        self.assertEquals(delete_sub_data["text_changed"], True)
        self.assertEquals(delete_sub_data["subtitles"][1], self.empty_line())
        self.assertEquals(delete_sub_data["subtitles"][0], set_1[1])
Example #14
0
    def test_replace_single_line_with_multiple(self):
        set_1 = SubtitleSet.from_list(
            "en", [(0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4")]
        )
        set_2 = SubtitleSet.from_list(
            "en",
            [
                (0, 1000, "Hey 1"),
                (1000, 1500, "Hey 2.1"),
                (1500, 2000, "Hey 2.2"),
                (2000, 3000, "Hey 3"),
                (3000, 4000, "Hey 4"),
            ],
        )
        result = diff(set_1, set_2)
        self.assertEqual(result["changed"], True)
        # for both time_change and text_changed, we calculate them as follows:
        # there are 9 total subs.  6 of those are matches and 1 in set 1 was
        # changed to 2 in set 2.  So the change amount is 3/9.
        self.assertAlmostEqual(result["time_changed"], 3 / 9.0)
        self.assertAlmostEqual(result["text_changed"], 3 / 9.0)
        self.assertEqual(len(result["subtitle_data"]), 5)

        # check the lines that haven't changed
        self.check_unchanged_subtitle_data(result, set_1, set_2, 0, 3, 4)
        # line 1 in set_1 was replaced my lines 2 and 3 in set_2
        line1 = result["subtitle_data"][1]
        self.assertEquals(line1["time_changed"], True)
        self.assertEquals(line1["text_changed"], True)
        self.assertEquals(line1["subtitles"][0], set_1[1])
        self.assertEquals(line1["subtitles"][1], set_2[1])
        line2 = result["subtitle_data"][2]
        self.assertEquals(line2["time_changed"], True)
        self.assertEquals(line2["text_changed"], True)
        self.assertEquals(line2["subtitles"][0], self.empty_line())
        self.assertEquals(line2["subtitles"][1], set_2[2])
Example #15
0
 def test_empty_subs(self):
     result = diff(SubtitleSet('en'), SubtitleSet('en'))
     self.assertEqual(result['changed'], False)
     self.assertEqual(result['text_changed'], 0)
     self.assertEqual(result['time_changed'], 0)
     self.assertEqual(len(result['subtitle_data']), 0)
Example #16
0
 def test_empty_subs(self):
     result = diff(SubtitleSet('en'), SubtitleSet('en'))
     self.assertEqual(result['changed'], False)
     self.assertEqual(result['text_changed'], 0)
     self.assertEqual(result['time_changed'], 0)
     self.assertEqual(len(result['subtitle_data']), 0)
Example #17
0
 def test_empty_subs(self):
     result = diff(SubtitleSet("en"), SubtitleSet("en"))
     self.assertEqual(result["changed"], False)
     self.assertEqual(result["text_changed"], 0)
     self.assertEqual(result["time_changed"], 0)
     self.assertEqual(len(result["subtitle_data"]), 0)
Example #18
0
    def _get_new_version_for_save(self,
                                  subtitles,
                                  language,
                                  session,
                                  user,
                                  new_title,
                                  new_description,
                                  new_metadata,
                                  save_for_later=None):
        """Return a new subtitle version for this save, or None if not needed."""

        new_version = None
        previous_version = language.get_tip(public=False)

        if previous_version:
            title_changed = (new_title is not None
                             and new_title != previous_version.title)
            desc_changed = (new_description is not None and
                            new_description != previous_version.description)
            metadata_changed = (
                new_metadata is not None
                and new_metadata != previous_version.get_metadata())
        else:
            title_changed = new_title is not None
            desc_changed = new_description is not None
            metadata_changed = new_metadata is not None

        subtitle_set = None
        subs_length = 0
        if isinstance(subtitles, basestring):
            subtitle_set = SubtitleSet(language.language_code, subtitles)
        elif isinstance(subtitles, SubtitleSet):
            subtitle_set = subtitles
        if subtitle_set:
            subs_length = len(subtitle_set)

        # subtitles have changed if only one of the version is empty
        # or if the versions themselves differ
        if not previous_version and not subtitle_set:
            subtitles_changed = False
        elif not previous_version or not subtitle_set:
            subtitles_changed = True
        else:
            subtitles_changed = diff(previous_version.get_subtitles(),
                                     subtitle_set)['changed']

        should_create_new_version = (subtitles_changed or title_changed
                                     or desc_changed or metadata_changed)

        if should_create_new_version:
            new_version, should_create_task = self._create_version(
                session.language,
                user,
                new_title=new_title,
                new_description=new_description,
                new_metadata=new_metadata,
                subtitles=subtitles,
                session=session)

            incomplete = not new_version.is_synced() or save_for_later

            # Record the origin of this set of subtitles.
            #
            # We need to record it *before* creating review/approve tasks (if
            # any) because that means these subs were from a post-publish edit
            # or something similar.  If we record the origin after creating the
            # review task it'll be marked as originating from review, which
            # isn't right because these subs had to come from something else.
            #
            # :(
            record_workflow_origin(new_version,
                                   new_version.video.get_team_video())

            if (not incomplete) and should_create_task:
                self._create_review_or_approve_task(new_version)

        return new_version
Example #19
0
    def test_diffing(self):
        create_langs_and_versions(self.video, ['en'])

        eng = self.video.newsubtitlelanguage_set.get(language_code='en')
        subtitles = SubtitleSet.from_list('en', [
            (10000, 20000, "1 - :D"),
            (20000, 30000, "2 - :D"),
            (30000, 40000, "3 - :D"),
            (40000, 50000, "4 - :D"),
            (50000, 60000, "5 - :D"),
        ])
        subtitles2 = SubtitleSet.from_list(
            'en',
            [
                (10000, 20000, "1 - :D"),
                (20000, 25000, "2 - :D"),  # time change,
                (30000, 40000, "Three - :D"),  # text change,
                # multiple lines replaced by a single line
                (40000, 60000, "45 - :D"),
            ])
        first_version = eng.add_version(subtitles=subtitles)
        second_version = eng.add_version(subtitles=subtitles2)
        # Note on the argument order to diff: we always diff the more recent
        # version against the less recent
        diff_result = diff(subtitles2, subtitles)

        response = self._simple_test('videos:diffing',
                                     [first_version.id, second_version.id])
        self.assertEquals(diff_result, response.context['diff_data'])

        diff_sub_data = diff_result['subtitle_data']

        html = BeautifulSoup(response.content)
        diff_list = html.find('ol', {"class": 'subtitles-diff'})
        diff_items = diff_list.findAll('li')
        # check number of lines
        self.assertEquals(len(diff_items), len(diff_sub_data))

        def check_column_data(column, sub_data):
            """Check the data in the HTML for a column against the data in
            from diff()
            """
            # special check for empty lines
            if sub_data.text is None:
                self.assertEquals(column.string.strip(), "")
                return
            time_span, text_span = column.findAll('span', recursive=False)
            self.assertEquals(text_span.string.strip(), sub_data.text)
            time_child_spans = time_span.findAll('span',
                                                 {'class': 'stamp_text'})
            self.assertEquals(time_child_spans[0].string.strip(),
                              format_sub_time(sub_data.start_time))
            self.assertEquals(time_child_spans[1].string.strip(),
                              format_sub_time(sub_data.end_time))

        for li, diff_sub_data_item in zip(diff_items, diff_sub_data):
            # Intuitively, left_column should be compared against
            # ['subtitles'][0], but we do the opposite.  This is because of
            # the way things are ordered:
            #  - diff() was passed (older_version, newer_version)
            #  - The rendered HTML has the newer version on the left and the
            #  older version on the right
            check_column_data(li.find('div', {'class': 'left_column'}),
                              diff_sub_data_item['subtitles'][1])
            check_column_data(li.find('div', {'class': 'right_column'}),
                              diff_sub_data_item['subtitles'][0])
            # we use the time_change class for either text or time changes.
            time_changes = li.findAll('span', {'class': 'time_change'})
            if (diff_sub_data_item['time_changed']
                    or diff_sub_data_item['text_changed']):
                self.assertNotEqual(len(time_changes), 0)
            else:
                self.assertEquals(len(time_changes), 0)