def clean_draft(self): data = self.cleaned_data['draft'] if data.size > SUBTITLE_FILESIZE_LIMIT_KB * 1024: raise forms.ValidationError(_( u'File size must be less than %d kb.' % SUBTITLE_FILESIZE_LIMIT_KB)) parts = data.name.rsplit('.', 1) self.extension = parts[-1].lower() if self.extension not in SUBTITLE_FILE_FORMATS: raise forms.ValidationError(_( u'Unsupported format. Please upload one of the following: %s' % ", ".join(SUBTITLE_FILE_FORMATS))) text = data.read() encoding = chardet.detect(text)['encoding'] if not encoding: raise forms.ValidationError(_(u'Can not detect file encoding')) # For xml based formats we can't just convert to unicode, as the parser # will complain that the string encoding doesn't match the encoding # declaration in the xml file if it's not utf-8. is_xml = self.extension in ('dfxp', 'ttml', 'xml') decoded = force_unicode(text, encoding) if not is_xml else text try: parser = babelsubs.load_from(decoded, type=self.extension) self._parsed_subtitles = parser.to_internal() except TypeError, e: raise forms.ValidationError(e)
def fetch_subs_vimeo(video_url, user, team): video_id = video_url.videoid existing_langs = set( l.language_code for l in video_url.video.newsubtitlelanguage_set.having_versions()) for vimeo_account in lookup_vimeo_accounts(video_url, user, team): tracks = vimeo.get_text_tracks(vimeo_account, video_id) versions = [] if tracks is not None and \ 'data' in tracks: for track in tracks['data']: language_code = convert_language_code(track['language']) if language_code and language_code not in existing_langs: response = requests.get(track['link']) try: subs = load_from(response.content, type='vtt').to_internal() version = pipeline.add_subtitles( video_url.video, language_code, subs, note="From Vimeo", complete=True, origin=ORIGIN_IMPORTED) versions.append(version) except Exception, e: logger.error( "Exception while importing subtitles from Vimeo " + str(e)) if len(versions) > 0: subtitles_imported.send(sender=versions[0].subtitle_language, versions=versions) break
def version_get_subtitles(sv): """Return the SubtitleSet for this version. A SubtitleSet will always be returned. It may be empty if there are no subtitles. """ return load_from(decompress(sv.serialized_subtitles), type='dfxp').to_internal()
def _retrieve(self, format): res = self.client.post(reverse("widget:convert_subtitles"), { 'subtitles': self.subs.to_xml(), 'language_code': 'pt-br', 'format': format, }) self.assertEqual(res.status_code , 200) data = json.loads(res.content) self.assertNotIn('errors', data) parser = babelsubs.load_from(data['result'], format).to_internal() parsed = [x for x in parser.subtitle_items()] self.assertEqual(len(parsed), 10) return res.content, parsed
def create_langs_and_versions(video, langs, user=None): from subtitles import pipeline subtitles = (babelsubs.load_from(SRT, type='srt', language='en').to_internal()) return [pipeline.add_subtitles(video, l, subtitles) for l in langs]
def _add_language_via_pipeline(video, lang): subtitles = babelsubs.load_from(SRT, type='srt', language='en').to_internal() return pipeline.add_subtitles(video, lang, subtitles)
def test_from_string(self): subs = load_from(open(utils.get_data_file_path("simple-srt.badextension"), 'r').read() ,type='srt') parsed = subs.to_internal() self.assertEquals(len(parsed), 19)
def test_load_from_string(self): filename = utils.get_data_file_path('simple.dfxp') with open(filename) as f: s = f.read() load_from(s, type='dfxp').to_internal()
def test_from_string(self): subs = load_from(open( utils.get_data_file_path("simple-srt.badextension"), 'r').read(), type='srt') parsed = subs.to_internal() self.assertEquals(len(parsed), 19)