def test_heal(self, mock_client_init): val_response = { 'courses': [{u'WestonHS/PFLC1x/3T2015': None}], 'encoded_videos': [{ 'url': 'https://testurl.mp4', 'file_size': 8499040, 'bitrate': 131, 'profile': 'mobile_low', }] } responses.add( responses.GET, build_url(CONFIG_DATA['val_api_url'], self.video_id), body=json.dumps(val_response), content_type='application/json', status=200 ) responses.add( responses.PUT, build_url(CONFIG_DATA['val_api_url'], self.video_id), status=200 ) heal = VedaHeal() heal.discovery()
def start_3play_transcription_process(self, encoded_file): """ 3PlayMedia Transcription Flow Arguments: encoded_file (str): name of encoded file to construct video url """ try: # Picks the first course from the list as there may be multiple # course runs in that list (i.e. all having the same org). org = extract_course_org(self.video_proto.platform_course_url[0]) transcript_secrets = TranscriptCredentials.objects.get(org=org, provider=self.video_query.provider) # update transcript status for video. val_api_client = VALAPICall(video_proto=None, val_status=None) utils.update_video_status( val_api_client=val_api_client, video=self.video_query, status=TranscriptStatus.IN_PROGRESS ) # Initialize 3playMedia client and start transcription process s3_video_url = build_url( self.auth_dict['s3_base_url'], self.auth_dict['edx_s3_endpoint_bucket'], encoded_file ) callback_url = build_url( self.auth_dict['veda_base_url'], reverse( '3play_media_callback', args=[self.auth_dict['transcript_provider_request_token']] ), # Additional attributes that'll come back with the callback org=org, edx_video_id=self.video_query.studio_id, lang_code=self.video_query.source_language, ) three_play_media = ThreePlayMediaClient( org=org, video=self.video_query, media_url=s3_video_url, api_key=transcript_secrets.api_key, api_secret=transcript_secrets.api_secret, callback_url=callback_url, turnaround_level=self.video_query.three_play_turnaround, three_play_api_base_url=self.auth_dict['three_play_api_base_url'], ) three_play_media.generate_transcripts() except TranscriptCredentials.DoesNotExist: LOGGER.warning( '[DELIVERY] : Transcript preference is not found for provider=%s, video=%s', self.video_query.provider, self.video_query.studio_id, )
def cielo24_transcription_flow(self, encoded_file): """ Cielo24 transcription flow. Arguments: encoded_file (str): name of encoded file to construct video url """ org = extract_course_org(self.video_proto.platform_course_url[0]) try: api_key = TranscriptCredentials.objects.get(org=org, provider=self.video_query.provider).api_key except TranscriptCredentials.DoesNotExist: LOGGER.warn('[DELIVERY] Unable to find cielo24 api_key for org=%s', org) return None s3_video_url = build_url( self.auth_dict['s3_base_url'], self.auth_dict['edx_s3_endpoint_bucket'], encoded_file ) callback_base_url = build_url( self.auth_dict['veda_base_url'], reverse( 'cielo24_transcript_completed', args=[self.auth_dict['transcript_provider_request_token']] ) ) # update transcript status for video. val_api_client = VALAPICall(video_proto=None, val_status=None) utils.update_video_status( val_api_client=val_api_client, video=self.video_query, status=TranscriptStatus.IN_PROGRESS ) cielo24 = Cielo24Transcript( self.video_query, org, api_key, self.video_query.cielo24_turnaround, self.video_query.cielo24_fidelity, self.video_query.preferred_languages, s3_video_url, callback_base_url, self.auth_dict['cielo24_api_base_url'], ) cielo24.start_transcription_flow()
def get_available_languages(self): """ Gets all the 3Play Media supported languages """ available_languages_url = build_url(self.base_url, self.available_languages_url, apikey=self.api_key) response = requests.get(url=available_languages_url) if not response.ok: raise ThreePlayMediaLanguagesRetrievalError( 'Error while retrieving available languages: url={url} -- {response} -- {status}' .format(url=scrub_query_params(available_languages_url, ['apikey']), response=response.text, status=response.status_code)) # A normal response should be a list containing 3Play Media supported languages and if we're getting a dict, # there must be an error: https://support.3playmedia.com/hc/en-us/articles/227729968-Captions-Imports-API available_languages = json.loads(response.text) if isinstance(available_languages, dict): raise ThreePlayMediaLanguagesRetrievalError( 'Expected 3Play Media Supported languages but got: {response}'. format(response=response.text)) return available_languages
def embed_media_url(self, job_id): """ Create cielo24 add media url. Arguments: job_id (str): cielo24 job id Returns: cielo24 task id """ media_url = build_url(self.cielo24_api_base_url, self.cielo24_add_media, v=CIELO24_API_VERSION, job_id=job_id, api_token=self.api_key, media_url=self.s3_video_url) response = requests.get(media_url) if not response.ok: raise Cielo24AddMediaError( '[ADD MEDIA ERROR] url={} -- status={} -- text={}'.format( scrub_query_params(media_url, ['api_token']), response.status_code, response.text)) task_id = ast.literal_eval(response.text)['TaskId'] LOGGER.info( '[CIELO24] Media url created for video=%s with job_id=%s and task_id=%s', self.video.studio_id, job_id, task_id) return task_id
def create_job(self): """ Create new job for transcription. Returns: cielo24 job id """ create_job_url = build_url(self.cielo24_api_base_url, self.cielo24_new_job, v=CIELO24_API_VERSION, language=self.video.source_language, api_token=self.api_key, job_name=self.video.studio_id) response = requests.get(create_job_url) if not response.ok: raise Cielo24CreateJobError( '[CREATE JOB ERROR] url={} -- status={} -- text={}'.format( scrub_query_params(create_job_url, ['api_token']), response.status_code, response.text, )) job_id = ast.literal_eval(response.text)['JobId'] LOGGER.info('[CIELO24] New job created for video=%s with job_id=%s', self.video.studio_id, job_id) return job_id
def get_translation_services(api_key): """ GET available 3Play Media Translation services Arguments: api_key(unicode): api key which is required to make an authentic call to 3Play Media Returns: Available 3Play Media Translation services. """ get_translation_services_url = build_url( THREE_PLAY_TRANSLATION_SERVICES_URL, apikey=api_key) response = requests.get(get_translation_services_url) if not response.ok: raise TranscriptTranslationError( u'[3PlayMedia Callback] Error fetching the translation services -- url={url}, {status}, {response}' .format( url=scrub_query_params(get_translation_services_url, ['apikey']), status=response.status_code, response=response.text, )) # Response should be a list containing services, details: # http://support.3playmedia.com/hc/en-us/articles/227729988-Translations-API-Methods available_services = json.loads(response.text) if not isinstance(available_services, list): raise TranscriptTranslationError( u'[3PlayMedia Callback] Expected list but got: -- {response}.'. format(response=response.text, )) return available_services
def perform_transcript(self, job_id, lang_code): """ Request cielo24 to generate transcripts for a video. """ callback_url = '{}?job_id={}&iwp_name={}&lang_code={}&org={}&video_id={}'.format( self.callback_base_url, job_id, '{iwp_name}', lang_code, self.org, self.video.studio_id) perform_transcript_url = build_url( self.cielo24_api_base_url, self.cielo24_perform_transcription, v=CIELO24_API_VERSION, job_id=job_id, target_language=lang_code, callback_url=callback_url, api_token=self.api_key, priority=self.turnaround, transcription_fidelity=self.fidelity, options=json.dumps({"return_iwp": ["FINAL"]})) response = requests.get(perform_transcript_url) if not response.ok: raise Cielo24PerformTranscriptError( '[PERFORM TRANSCRIPT ERROR] url={} -- status={} -- text={}'. format( scrub_query_params(perform_transcript_url, ['api_token']), response.status_code, response.text)) task_id = ast.literal_eval(response.text)['TaskId'] LOGGER.info( '[CIELO24] Perform transcript request successful for video=%s with job_id=%s and task_id=%s', self.video.studio_id, job_id, task_id) return job_id
def setUp(self): """ Tests setup """ self.course = Course.objects.create(course_name=u'Intro to VEDA', institution=u'MAx', edx_classid=u'123') self.video = Video.objects.create(inst_class=self.course, **VIDEO_DATA) self.video_transcript_preferences = { 'org': u'MAx', 'video': self.video, 'media_url': u'https://s3.amazonaws.com/bkt/video.mp4', 'api_key': u'insecure_api_key', 'api_secret': u'insecure_api_secret', 'turnaround_level': ThreePlayTurnaround.STANDARD, 'callback_url': build_url( u'https://veda.edx.org/3playmedia/transcripts/handle/123123', org=u'MAx', edx_video_id=VIDEO_DATA['studio_id'], lang_code=VIDEO_DATA['source_language'], ), 'three_play_api_base_url': 'https://api.3playmedia.com/', }
def get_translations_metadata(api_key, file_id, edx_video_id): """ Get translations metadata from 3Play Media for a given file id. Arguments: api_key(unicode): api key file_id(unicode): file identifier or process identifier edx_video_id(unicode): video studio identifier Returns: A List containing the translations metadata for a file id or None in case of a faulty response. Example: [ { "id": 1234, "translation_service_id": 12, "source_language_name": "English", "source_language_iso_639_1_code": "en", "target_language_name": "French (Canada)", "target_language_iso_639_1_code": "fr", "state": "complete" }, { "id": 1345, "translation_service_id": 32, "source_language_name": "English", "source_language_iso_639_1_code": "en", "target_language_name": "German", "target_language_iso_639_1_code": "de", "state": "in_progress" } ] """ translations_metadata_url = build_url( THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id=file_id, ), apikey=api_key) translations_metadata_response = requests.get(translations_metadata_url) if not translations_metadata_response.ok: LOGGER.error( u'[3PlayMedia Task] Translations metadata request failed, url=%s -- video=%s -- process_id=%s -- status=%s', scrub_query_params(translations_metadata_url, ['apikey']), edx_video_id, file_id, translations_metadata_response.status_code, ) return translations = json.loads(translations_metadata_response.text) if not isinstance(translations, list): LOGGER.error( u'[3PlayMedia Task] unable to get translations metadata for video=%s -- process_id=%s -- response=%s', edx_video_id, file_id, translations_metadata_response.text, ) return return translations
def construct_transcript_names(config): """ Constructs transcript names for 'edxval' and 's3' Arguments: config (dict): instance configuration Returns: transcript names for 'edxval' and 's3' """ transcript_name_without_instance_prefix = build_url( config['aws_video_transcripts_prefix'], uuid.uuid4().hex) transcript_name_with_instance_prefix = build_url( config['instance_prefix'], transcript_name_without_instance_prefix) return transcript_name_without_instance_prefix, transcript_name_with_instance_prefix
def test_build_url(self, urls, params, expected_url): """ Tests that utils.build_url works as expected. """ url = utils.build_url( *urls, **params ) self.assertEqual( url, expected_url )
def test_build_url(self, urls, params): """ Tests that utils.build_url works as expected. """ url = utils.build_url(*urls, **params) parsed = six.moves.urllib.parse.urlparse(url) expected_query_params = six.moves.urllib.parse.parse_qsl(parsed.query) expected_url = '/'.join(item.strip('/') for item in urls if item) self.assertDictEqual(params, dict(expected_query_params)) self.assertTrue(url.startswith(expected_url))
def cielo24_url(self, cielo24, endpoint): """ Return absolute url Arguments: cielo24 (Cielo24Transcript), object endpoint (srt): url endpoint Returns: absolute url """ return build_url(cielo24.cielo24_api_base_url, endpoint)
def fetch_srt_data(url, **request_params): """ Fetch srt data from transcript provider. """ # return TRANSCRIPT_SRT_DATA fetch_srt_data_url = build_url(url, **request_params) response = requests.get(fetch_srt_data_url) if not response.ok: raise TranscriptFetchError( '[TRANSCRIPT FETCH ERROR] url={} -- status={} -- text={}'.format( scrub_query_params(fetch_srt_data_url, ['apikey', 'api_token']), response.status_code, response.text)) return response.text
def submit_media(self): """ Submits the media to perform transcription. Raises: ThreePlayMediaPerformTranscriptionError: error while transcription process """ self.validate_media_url() # Prepare requests payload payload = dict( # Mandatory attributes required for transcription link=self.media_url, apikey=self.api_key, api_secret_key=self.api_secret, turnaround_level=self.turnaround_level, callback_url=self.callback_url, batch_name=self.default_dir, ) available_languages = self.get_available_languages() source_language_id = self.get_source_language_id(available_languages, self.video.source_language) if source_language_id: payload['language_id'] = source_language_id upload_url = build_url(self.base_url, self.upload_media_file_url) response = requests.post(url=upload_url, json=payload) if not response.ok: raise ThreePlayMediaPerformTranscriptionError( 'Upload file request failed with: {response} -- {status}'.format( response=response.text, status=response.status_code ) ) # A normal response should be a text containing file id and if we're getting a deserializable dict, there # must be an error: http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods if isinstance(json.loads(response.text), dict): raise ThreePlayMediaPerformTranscriptionError( 'Expected file id but got: {response}'.format(response=response.text) ) return response.text
def test_transcript_flow(self): """ Verify cielo24 transcription flow """ job_id = '000-111-222' cielo24 = Cielo24Transcript(video=self.video, **self.video_transcript_preferences) responses.add(responses.GET, self.cielo24_url(cielo24, cielo24.cielo24_new_job), body={'JobId': job_id}, status=200) responses.add(responses.GET, self.cielo24_url(cielo24, cielo24.cielo24_add_media), body={'TaskId': '000-000-111'}, status=200) responses.add(responses.GET, self.cielo24_url(cielo24, cielo24.cielo24_perform_transcription), body={'TaskId': '000-000-000'}, status=200) cielo24.start_transcription_flow() # Total of 6 HTTP requests are made # 3 cielo24 requests for first language(en) # 3 cielo24 requests for second language(ur) self.assertEqual(len(responses.calls), 6) # pylint: disable=line-too-long expected_data = [ { 'url': build_url( 'https://sandbox.cielo24.com/api/job/new', v=CIELO24_API_VERSION, job_name='12345', language='en', # A job's language. api_token='cielo24_api_key', ), 'body': None, 'method': 'GET' }, { 'url': build_url( 'https://sandbox.cielo24.com/api/job/add_media', v=CIELO24_API_VERSION, job_id='000-111-222', api_token='cielo24_api_key', media_url='https://s3.amazonaws.com/bkt/video.mp4', ), 'body': None, 'method': 'GET', }, { 'url': build_url( 'https://sandbox.cielo24.com/api/job/perform_transcription', v=CIELO24_API_VERSION, job_id='000-111-222', target_language='TARGET_LANG', callback_url= '{}?job_id={}&iwp_name={}&lang_code={}&org={}&video_id={}'. format( 'https://veda.edx.org/cielo24/transcript_completed/1234567890', '000-111-222', '{iwp_name}', 'TARGET_LANG', 'MAx', '12345', ), api_token='cielo24_api_key', priority='PRIORITY', transcription_fidelity='PROFESSIONAL', options='{"return_iwp": ["FINAL"]}'), 'body': None, 'method': 'GET' } ] received_request_index = 0 for preferred_language in self.video_transcript_preferences[ 'preferred_languages']: for request_data in expected_data: # replace target language with appropriate value if 'api/job/perform_transcription' in request_data['url']: request_data = dict(request_data) request_data['url'] = request_data['url'].replace( 'TARGET_LANG', preferred_language) self.assert_request( responses.calls[received_request_index].request, request_data) received_request_index += 1
from .api import token_finisher from VEDA import utils from VEDA_OS01.enums import TranscriptionProviderErrorType from VEDA_OS01.models import (URL, Course, Encode, TranscriptCredentials, TranscriptProvider, Video) from VEDA_OS01.serializers import (CourseSerializer, EncodeSerializer, URLSerializer, VideoSerializer) from VEDA_OS01.transcripts import CIELO24_API_VERSION from VEDA_OS01.utils import PlainTextParser from control.http_ingest_celeryapp import ingest_video_and_upload_to_hotstore LOGGER = logging.getLogger(__name__) auth_dict = utils.get_config() CIELO24_LOGIN_URL = utils.build_url(auth_dict['cielo24_api_base_url'], '/account/login') class CourseViewSet(viewsets.ModelViewSet): queryset = Course.objects.all() serializer_class = CourseSerializer filter_backends = (filters.DjangoFilterBackend, ) filter_fields = ('institution', 'edx_classid', 'proc_loc', 'course_hold', 'sg_projID') @detail_route(renderer_classes=[renderers.StaticHTMLRenderer]) def highlight(self, request, *args, **kwargs): course = self.get_object() return Response(course.highlighted)
# 3PlayMedia possible send-along statuses for a transcription callback. COMPLETE = 'complete' ERROR = 'error' # Transcript format TRANSCRIPT_SJSON = 'sjson' CIELO24_TRANSCRIPT_COMPLETED = django.dispatch.Signal( providing_args=['job_id', 'iwp_name', 'lang_code', 'org', 'video_id']) CONFIG = get_config() # Cielo24 API version CIELO24_API_VERSION = 1 # Cielo24 API URLs CIELO24_GET_CAPTION_URL = build_url(CONFIG['cielo24_api_base_url'], 'job/get_caption') # 3PlayMedia callback signal THREE_PLAY_TRANSCRIPTION_DONE = django.dispatch.Signal(providing_args=[ 'org', 'lang_code', 'edx_video_id', 'file_id', 'status', 'error_description' ]) # 3PlayMedia API URLs. THREE_PLAY_TRANSCRIPT_URL = build_url(CONFIG['three_play_api_transcript_url'], 'files/{file_id}/transcript.srt') THREE_PLAY_TRANSLATION_SERVICES_URL = build_url( CONFIG['three_play_api_transcript_url'], 'translation_services') THREE_PLAY_ORDER_TRANSLATION_URL = build_url( CONFIG['three_play_api_base_url'], 'files/{file_id}/translations/order') THREE_PLAY_TRANSLATIONS_METADATA_URL = build_url( CONFIG['three_play_api_transcript_url'], 'files/{file_id}/translations')
from api import token_finisher from control.veda_file_discovery import FileDiscovery from VEDA import utils from VEDA_OS01.enums import TranscriptionProviderErrorType from VEDA_OS01.models import (URL, Course, Encode, TranscriptCredentials, TranscriptProvider, Video) from VEDA_OS01.serializers import (CourseSerializer, EncodeSerializer, URLSerializer, VideoSerializer) from VEDA_OS01.transcripts import CIELO24_API_VERSION from VEDA_OS01.utils import PlainTextParser LOGGER = logging.getLogger(__name__) CONFIG = utils.get_config() CIELO24_LOGIN_URL = utils.build_url(CONFIG['cielo24_api_base_url'], '/account/login') try: boto.config.add_section('Boto') except: pass boto.config.set('Boto', 'http_socket_timeout', '100') class CourseViewSet(viewsets.ModelViewSet): queryset = Course.objects.all() serializer_class = CourseSerializer filter_backends = (filters.DjangoFilterBackend, ) filter_fields = ('institution', 'edx_classid', 'proc_loc', 'course_hold', 'sg_projID')