def test_create_translation_dependent_on_dependent(self): test_utils.invalidate_widget_video_cache.run_original_for_test() request = RequestMockup(self.user_0) session = create_two_sub_dependent_session(request) response = rpc.start_editing( request, session.video.video_id, 'fr', base_language_code=session.language.language_code) session_pk = response['session_pk'] orig_subs = SubtitleSet('en', response['original_subtitles']['subtitles']) self.assertEqual(3, len(orig_subs)) rpc.finished_subtitles(request, session_pk, create_subtitle_set().to_xml()) response = rpc.show_widget(request, VIDEO_URL, False) lang = [r for r in response['drop_down_contents'] if r['language'] == 'fr'][0] subs = rpc.fetch_subtitles(request, session.video.video_id, lang['pk']) subs = SubtitleSet('fr', subs['subtitles']) self.assertEqual(1, len(subs)) self.assertEqual('hey you 0', subs[0].text) self.assertEqual(0, subs[0].start_time) self.assertEqual(1000, subs[0].end_time)
def setUp(self): self.en_subs = SubtitleSet('en') self.es_subs = SubtitleSet('es') self.fr_subs = SubtitleSet('fr') self.en_subs.append_subtitle(1000, 1500, 'content') self.es_subs.append_subtitle(1000, 1500, 'spanish content') self.es_subs.append_subtitle(2000, 2500, 'spanish content 2', new_paragraph=True) self.fr_subs.append_subtitle(1000, 1500, 'french content')
def test_start_translating(self): test_utils.invalidate_widget_video_cache.run_original_for_test() request = RequestMockup(self.user_0) session = self._create_basic_version(request) sl_en = session.language # open translation dialog. response = rpc.start_editing(request, session.video.video_id, 'es', base_language_code=sl_en.language_code) session_pk = response['session_pk'] subs = response['subtitles'] self.assertEquals(True, response['can_edit']) self.assertEquals(0, subs['version']) self.assertEquals(0, len(SubtitleSet('es', subs['subtitles']))) rpc.finished_subtitles(request, session_pk, create_subtitle_set().to_xml()) video = models.Video.objects.get(id=session.video.id) translations = rpc.fetch_subtitles(request, video.video_id, video.subtitle_language('es').pk) subtitles = SubtitleSet('es',translations['subtitles']) self.assertEquals(1, len(subtitles)) self.assertEquals('hey you 0', subtitles[0][2]) language = video.subtitle_language('es') self.assertEquals(1, language.subtitleversion_set.full().count()) self.assertEquals(language.get_translation_source_language_code(), 'en') version = language.get_tip() self.assertTrue('en' in version.get_lineage()) response = rpc.start_editing(request, session.video.video_id, 'es', base_language_code=sl_en.language_code) rpc.finished_subtitles(request, session_pk, create_subtitle_set(2).to_xml()) translations = rpc.fetch_subtitles(request, video.video_id, video.subtitle_language('es').pk) subtitles = SubtitleSet('es',translations['subtitles']) self.assertEquals(3, len(subtitles)) self.assertEquals('hey you 0', subtitles[0][2]) self.assertEquals('hey you 1', subtitles[1][2]) self.assertEquals('hey you 2', subtitles[2][2]) language = video.subtitle_language('es') self.assertEquals(2, language.subtitleversion_set.full().count()) self.assertEquals(language.get_translation_source_language_code(), 'en') version = language.get_tip() self.assertTrue('en' in version.get_lineage())
def merge_subtitles(cls, subtitle_sets, initial_ttml=None): """Combine multiple subtitles sets into a single XML string. """ if len(subtitle_sets) == 0: raise TypeError( "DFXPGenerator.merge_subtitles: No subtitles given") if initial_ttml is None: tt = SubtitleSet('').as_etree_node() body = tt.find(TTML + 'body') body.remove(body.find(TTML + 'div')) else: tt = initial_ttml body = tt.find(TTML + 'body') if body is None: raise ValueError("no body tag") # set the default language to blank. We will create a div for each # subtitle set and set xml:lang on that. tt.set(XML + 'lang', '') # for each subtitle set we will append the body of tt for i, subtitle_set in enumerate(subtitle_sets): root_elt = subtitle_set.as_etree_node() language_code = root_elt.get(XML + 'lang') lang_div = etree.SubElement(body, TTML + 'div') lang_div.set(XML + 'lang', language_code) lang_div.extend(root_elt.find(TTML + 'body').findall(TTML + 'div')) utils.indent_ttml(tt) return etree.tostring(tt)
def setUp(self): self.subs = SubtitleSet(language_code='en') for x in range(0,10): self.subs.append_subtitle( from_ms=(x * 1000), to_ms=(x * 1000) + 1000, content="%s - and *italics* and **bold** and >>." % x )
def to_internal(self): if not hasattr(self, 'sub_set'): try: self.sub_set = SubtitleSet(self.language) xml = etree.fromstring(self.input_string.encode('utf-8')) has_subs = False total_items = len(xml) for i,item in enumerate(xml): duration = 0 start = int(float(item.get('start')) * 1000) if hasattr(item, 'duration'): duration = int(float(item.get('dur', 0)) * 1000) elif i+1 < total_items: # youtube sometimes omits the duration attribute # in this case we're displaying until the next sub # starts next_item = xml[i+1] duration = int(float(next_item.get('start')) * 1000) - start else: # hardcode the last sub duration at 3 seconds duration = 3000 end = start + duration text = item.text and unescape_html(item.text) or u'' self.sub_set.append_subtitle(start, end, text) has_subs = True if not has_subs: raise ValueError("No subs") except Exception as e: raise SubtitleParserError(original_error=e) return self.sub_set
def forwards(self, orm): "Write your forwards methods here." for missing_set_version in chunkediter(orm['subtitles.SubtitleVersion'].objects.filter(serialized_subtitles='')): subtitles = SubtitleSet(missing_set_version.language_code) missing_set_version.serialized_subtitles = compress(subtitles.to_xml()) missing_set_version.subtitle_count = 0 missing_set_version.save()
def test_add_alternate_urls(self): test_utils.invalidate_widget_video_cache.run_original_for_test() url_0 = VIDEO_URL url_1 = 'http://ia700406.us.archive.org/16/items/PeopleOfHtml5-BruceLawsonmp4Version/PeopleOfHtml5-BruceLawson.mp4' request = RequestMockup(self.user_0) return_value = rpc.show_widget(request, url_0, False, additional_video_urls=[url_1]) video_id = return_value['video_id'] return_value = rpc.start_editing(request, video_id, 'en', original_language_code='en') session_pk = return_value['session_pk'] rpc.finished_subtitles(request, session_pk, create_subtitle_set().to_xml()); return_value = rpc.show_widget(request, url_1, False, additional_video_urls=[url_0]) self.assertEqual(video_id, return_value['video_id']) subs = rpc.fetch_subtitles(request, video_id, return_value['drop_down_contents'][0]['pk']) self.assertEquals(1, len(SubtitleSet('en', subs['subtitles']))) return_value = rpc.show_widget(request, url_1, False) self.assertEqual(video_id, return_value['video_id'])
def test_fetch_subtitles(self): request = RequestMockup(self.user_0) version = self._create_basic_version(request) subs = rpc.fetch_subtitles(request, version.video.video_id, version.language.pk) sset = SubtitleSet('en', initial_data=subs['subtitles']) self.assertEqual(1, len(sset))
def test_linebreaks(self): sset = SubtitleSet('en') sset.append_subtitle(0, 1000, '''line 1 line 2 line 3''') sset.append_subtitle(1000, 200, 'second sub') output = unicode(TXTGenerator(sset)) self.assertEqual(output, TXT_LINEBREAKS)
def test_add_subtitles_with_complete_true_but_unsynced_subs(self): # test adding subtitles with complete=True, but the subtitles # themseleves aren't complete. For this corner case, we should not # emit subtitles_published. subs = SubtitleSet(language_code='en') subs.append_subtitle(None, None, 'content') pipeline.add_subtitles(self.video, 'en', subs, complete=True) assert_equal(self.subtitles_published_handler.call_count, 0)
def __init__(self, input_string, language=None): try: self.subtitle_set = SubtitleSet(language, input_string, normalize_time=True) except (XMLSyntaxError, ExpatError), e: raise SubtitleParserError( "There was an error while we were parsing your xml", e)
def test_fork_translation_dependent_on_forked(self): request = RequestMockup(self.user_0) video = self._create_two_sub_forked_subs(request) response = rpc.start_editing(request, video.video_id, 'fr', base_language_code='es') session_pk = response['session_pk'] rpc.finished_subtitles(request, session_pk, create_subtitle_set(2).to_xml()) fr_sl = models.Video.objects.get(video_id=video.video_id).subtitle_language('fr') self.assertEquals(False, fr_sl.is_forked) # now fork french return_value = rpc.show_widget(request, VIDEO_URL, False) video_id = return_value['video_id'] fr_sl = models.Video.objects.get(video_id=video_id).subtitle_language('fr') response = rpc.start_editing(request, video_id, 'fr', subtitle_language_pk=fr_sl.pk) session_pk = response['session_pk'] subtitles = SubtitleSet('fr', response['subtitles']['subtitles']) self.assertEquals(3, len(subtitles)) self.assertEquals('hey you 0', subtitles[0].text) self.assertEquals(0, subtitles[0].start_time) self.assertEquals(1000, subtitles[0].end_time) # update the timing on the French sub. updated = SubtitleSet('fr') updated.append_subtitle(1020, 1500, 'hey 0') updated.append_subtitle(2500, 3500, 'hey 1') rpc.finished_subtitles(request, session_pk, updated.to_xml(), forked=True) french_lang = models.Video.objects.get(video_id=video_id).subtitle_language('fr') fr_version = french_lang.get_tip() fr_version_subtitles = fr_version.get_subtitles() self.assertTrue(french_lang.is_forked) self.assertEquals(1020, fr_version_subtitles[0].start_time) spanish_lang = models.Video.objects.get(video_id=video_id).subtitle_language('es') es_version = spanish_lang.get_tip() es_version_subtitles = es_version.get_subtitles() self.assertEquals(True, spanish_lang.is_forked) self.assertEquals(500, es_version_subtitles[0].start_time)
def create_subtitle_set(number_of_subtitles=0, synced=True): subtitle_set = SubtitleSet('en') for x in xrange(0, number_of_subtitles+1): start = x * 1000 if synced else None end = x * 1000 + 1000 if synced else None subtitle_set.append_subtitle(start, end, 'hey you %s' % x) return subtitle_set
def test_edit_existing_original(self): request = RequestMockup(self.user_0) session = self._create_basic_version(request) language = sub_models.SubtitleLanguage.objects.get(pk=session.language.pk) return_value = rpc.show_widget(request, VIDEO_URL, False) return_value = rpc.start_editing(request, session.video.video_id, 'en', subtitle_language_pk=language.pk) self.assertEquals(len(SubtitleSet('en', return_value['subtitles']['subtitles'])), 1) self.assertFalse('original_subtitles' in return_value)
def test_one_set_empty(self): set_1 = SubtitleSet.from_list('en', [ (0, 1000, "Hey 1"), (1000, 2000, "Hey 2"), (2000, 3000, "Hey 3"), (3000, 4000, "Hey 4"), ]) result = diff(set_1, SubtitleSet('en')) self.assertEqual(result['changed'], True) self.assertEqual(result['text_changed'], 1.0) self.assertEqual(result['time_changed'], 1.0)
def test_autoplay_for_non_finished(self): request = RequestMockup(self.user_0) self._start_editing(request) # request widget with English subtitles preloaded. The widget # expected null subtitles in response when the language only # has a draft. return_value = rpc.show_widget(request, VIDEO_URL, False, base_state = { 'language': 'en' }) subtitles = SubtitleSet('en', return_value['subtitles']) # this was None before, now it's 0 because we are actually always sending a dfpx file (even if empty). self.assertEquals(len(subtitles), 0)
def test_zero_out_trans_version_1(self): request = RequestMockup(self.user_0) session = self._create_basic_dependent_version(request) en_sl = session.video.subtitle_language('en') # user_1 opens translate dialog request_1 = RequestMockup(self.user_1, "b") rpc.show_widget(request_1, VIDEO_URL, False) response = rpc.start_editing( request_1, session.video.video_id, 'es', base_language_code='en') session_pk = response['session_pk'] self.assertEquals(True, response['can_edit']) subs = response['subtitles'] subtitles = SubtitleSet('en', subs['subtitles']) self.assertEquals(1, subs['version']) self.assertEquals(1, len(subtitles)) # user_1 deletes the subtitles. rpc.finished_subtitles(request_1, session_pk, SubtitleSet('en').to_xml()) language = SubtitlingSession.objects.get(pk=session_pk).language self.assertEquals(2, language.subtitleversion_set.full().count()) self.assertEquals(0, len(language.version().get_subtitles())) self.assertTrue(sub_models.SubtitleLanguage.objects.having_nonempty_versions().filter(pk=language.pk).exists()) self.assertFalse(sub_models.SubtitleLanguage.objects.having_nonempty_tip().filter(pk=language.pk).exists())
def to_internal(self): if not hasattr(self, 'sub_set'): self.sub_set = SubtitleSet(self.language) valid = False for item in self._result_iter(): item['text'] = item['text'].replace("\n", '<br/>') if not valid and ''.join(item['text'].split()): valid = True self.sub_set.append_subtitle(item['start'], item['end'], item['text'], escape=False) if not valid: raise SubtitleParserError("No subs") return self.sub_set
def test_zero_out_trans_version_0(self): request = RequestMockup(self.user_0) session = self._create_basic_version(request) original_language = session.language response = rpc.start_editing( request, original_language.video.video_id, 'es', base_language_code=original_language.language_code) session_pk = response['session_pk'] new_language = SubtitlingSession.objects.get(pk=session_pk).language rpc.finished_subtitles(request, session_pk, SubtitleSet('en').to_xml()) # creating an empty version should not store empty stuff on the db self.assertEquals(0, new_language.subtitleversion_set.full().count()) self.assertFalse(sub_models.SubtitleLanguage.objects.having_nonempty_versions().filter(pk=new_language.pk).exists()) self.assertFalse(sub_models.SubtitleLanguage.objects.having_nonempty_tip().filter(pk=new_language.pk).exists())
def test_dfxp_merge(self): en_subs = SubtitleSet('en') es_subs = SubtitleSet('es') en_subs.append_subtitle(1000, 1500, 'content') es_subs.append_subtitle(1000, 1500, 'spanish content') result = self.loader.dfxp_merge([en_subs, es_subs]) utils.assert_long_text_equal(result, """\ <tt xmlns:tts="http://www.w3.org/ns/ttml#styling" xmlns:ttp="http://www.w3.org/ns/ttml#parameter" xmlns:ttm="http://www.w3.org/ns/ttml#metadata" xmlns="http://www.w3.org/ns/ttml" xml:lang=""> <head> <metadata> <ttm:title></ttm:title> <ttm:description></ttm:description> <ttm:copyright/> </metadata> <styling> <style xml:id="test-style" tts:color="white" tts:fontSize="18px"/> </styling> <layout> <region xml:id="bottom" style="test-style" tts:origin="0 80%" tts:extent="100% 20%"/> <region xml:id="top" style="test-style" tts:origin="0 0" tts:extent="100% 20%"/> </layout> </head> <body region="bottom"> <div xml:lang="en"> <div> <p begin="00:00:01.000" end="00:00:01.500">content</p> </div> </div> <div xml:lang="es"> <div> <p begin="00:00:01.000" end="00:00:01.500">spanish content</p> </div> </div> </body> </tt> """)
def _add_subtitles(sub_lang, num_subs, video, translated_from=None): subtitle_set = SubtitleSet(sub_lang.language_code) for i in xrange(0, num_subs): start_time=i * 1000 end_time =i + 800 subtitle_text = 'hey jude %s' % i subtitle_set.append_subtitle(start_time, end_time, subtitle_text) parents = [] if translated_from: parents.append(translated_from.get_tip()) return pipeline.add_subtitles(video, sub_lang.language_code, subtitle_set, parents=parents)
def _create_two_sub_forked_subs(self, request): session = create_two_sub_dependent_session(request) # now fork subtitles response = rpc.start_editing( request, session.video.video_id, 'es', subtitle_language_pk=session.video.subtitle_language('es').pk) session_pk = response['session_pk'] subtitle_set = SubtitleSet('es') subtitle_set.append_subtitle(500, 1500, 'hey') subtitle_set.append_subtitle(1600, 2500, 'you') rpc.finished_subtitles(request, session_pk, subtitle_set.to_xml(), forked=True) return Video.objects.get(pk=session.video.pk)
def test_log_in_then_save(self): request_0 = RequestMockup(NotAuthenticatedUser()) return_value = rpc.show_widget(request_0, VIDEO_URL, False) video_id = return_value['video_id'] return_value = rpc.start_editing( request_0, video_id, 'en', original_language_code='en') session_pk = return_value['session_pk'] sset = SubtitleSet('en') sset.append_subtitle(2300, 3400, 'hey') response = rpc.regain_lock(request_0, session_pk) self.assertEqual('ok', response['response']) request_0.user = self.user_0 rpc.finished_subtitles(request_0, session_pk, sset.to_xml()) sversion = sub_models.SubtitleVersion.objects.order_by('-pk')[0] sversion.subtitle_count = 1 self.assertEqual(request_0.user.pk, sversion.author.pk)
def test_zero_out_version_1(self): request_0 = RequestMockup(self.user_0) version = self._create_basic_version(request_0) # different user opens dialog for video request_1 = RequestMockup(self.user_1, "b") rpc.show_widget(request_1, VIDEO_URL, False) return_value = rpc.start_editing(request_1, version.language.video.video_id, 'en') session_pk = return_value['session_pk'] # user_1 deletes all the subs rpc.finished_subtitles(request_1, session_pk, SubtitleSet('en').to_xml()) video = Video.objects.get(pk=version.language.video.pk) language = SubtitlingSession.objects.get(pk=session_pk).language self.assertEqual(2, language.subtitleversion_set.full().count()) self.assertEqual( 0, len(language.version().get_subtitles())) self.assertTrue(sub_models.SubtitleLanguage.objects.having_nonempty_versions().filter(pk=language.pk).exists()) self.assertFalse(sub_models.SubtitleLanguage.objects.having_nonempty_tip().filter(pk=language.pk).exists())
def to_internal(self): if not hasattr(self, 'sub_set'): match = None try: self.sub_set = SubtitleSet(self.language) for match in self._matches: item = self._get_data(match.groupdict()) text = self.get_markup(item['text']) self.sub_set.append_subtitle( item['start'], item['end'], text, region=item.get('region'), escape=False) if match is None: raise ValueError("No subs found") except Exception as e: raise SubtitleParserError(original_error=e) return self.sub_set
def to_internal(self): if not hasattr(self, 'sub_set'): self.sub_set = SubtitleSet(self.language) try: data = json.loads(self.input_string) except ValueError: raise SubtitleParserError("Invalid JSON data provided.") # Sort by the ``position`` key data = sorted(data, key=lambda k: k['position']) for sub in data: self.sub_set.append_subtitle(sub['start'], sub['end'], sub['text']) return self.sub_set
def test_finish_then_other_user_opens(self): request_0 = RequestMockup(self.user_0) return_value = rpc.show_widget(request_0, VIDEO_URL, False) video_id = return_value['video_id'] return_value = rpc.start_editing( request_0, video_id, 'en', original_language_code='en') session_pk = return_value['session_pk'] rpc.finished_subtitles(request_0, session_pk, subtitles=create_subtitle_set().to_xml()) # different user opens the dialog for video request_1 = RequestMockup(self.user_1, "b") return_value = rpc.start_editing(request_1, video_id, 'en') # make sure we are getting back finished subs. self.assertEqual(True, return_value['can_edit']) subs = return_value['subtitles'] self.assertEqual(1, subs['version']) self.assertEqual(1, len(SubtitleSet('en', subs['subtitles'])))
def test_keep_subtitling_dialog_open(self): request = RequestMockup(self.user_0) return_value = rpc.show_widget(request, VIDEO_URL, False) video_id = return_value['video_id'] return_value = rpc.start_editing( request, video_id, 'en', original_language_code='en') self.assertEqual(True, return_value['can_edit']) subs = return_value['subtitles'] self.assertEqual(0, subs['version']) subtitles = SubtitleSet('es', subs['subtitles']) self.assertEqual(0, len(subtitles)) # the subtitling dialog pings the server, even # though we've done no subtitling work yet. rpc.regain_lock(request, return_value['session_pk']) video = Video.objects.get(video_id=video_id) # if video.latest_version() returns anything other than None, # video.html will show that the video has subtitles. self.assertEqual(None, video.latest_version())
def test_unsynced_generator(self): subs = SubtitleSet('en') for x in xrange(0, 5): subs.append_subtitle(None, None, "%s" % x) output = unicode(SBVGenerator(subs, language='en')) parsed = SBVParser(output, 'en') internal = parsed.to_internal() subs = [x for x in internal.subtitle_items()] self.assertEqual(len(internal), 5) for i, sub in enumerate(subs): self.assertEqual(sub[0], None) self.assertEqual(sub[1], None) generated = SBVGenerator(internal) self.assertEqual(generated.format_time(None), u'9:59:59.000') self.assertIn( u'''9:59:59.000,9:59:59.000\r\n0\r\n\r\n9:59:59.000,9:59:59.000\r\n1\r\n\r\n9:59:59.000,9:59:59.000\r\n2\r\n\r\n9:59:59.000,9:59:59.000\r\n3\r\n\r\n9:59:59.000,9:59:59.000\r\n4\r\n''', unicode(generated))