Esempio n. 1
0
    def test_heal(self, mock_client_init):
        val_response = {
            'courses': [{u'WestonHS/PFLC1x/3T2015': None}],
            'encoded_videos': [{
                'url': 'https://testurl.mp4',
                'file_size': 8499040,
                'bitrate': 131,
                'profile': 'mobile_low',
            }]
        }
        responses.add(
            responses.GET,
            build_url(CONFIG_DATA['val_api_url'], self.video_id),
            body=json.dumps(val_response),
            content_type='application/json',
            status=200
        )
        responses.add(
            responses.PUT,
            build_url(CONFIG_DATA['val_api_url'], self.video_id),
            status=200
        )

        heal = VedaHeal()
        heal.discovery()
Esempio n. 2
0
    def start_3play_transcription_process(self, encoded_file):
        """
        3PlayMedia Transcription Flow

        Arguments:
            encoded_file (str): name of encoded file to construct video url
        """
        try:
            # Picks the first course from the list as there may be multiple
            # course runs in that list (i.e. all having the same org).
            org = extract_course_org(self.video_proto.platform_course_url[0])
            transcript_secrets = TranscriptCredentials.objects.get(org=org, provider=self.video_query.provider)

            # update transcript status for video.
            val_api_client = VALAPICall(video_proto=None, val_status=None)
            utils.update_video_status(
                val_api_client=val_api_client,
                video=self.video_query,
                status=TranscriptStatus.IN_PROGRESS
            )

            # Initialize 3playMedia client and start transcription process
            s3_video_url = build_url(
                self.auth_dict['s3_base_url'],
                self.auth_dict['edx_s3_endpoint_bucket'],
                encoded_file
            )
            callback_url = build_url(
                self.auth_dict['veda_base_url'],
                reverse(
                    '3play_media_callback',
                    args=[self.auth_dict['transcript_provider_request_token']]
                ),
                # Additional attributes that'll come back with the callback
                org=org,
                edx_video_id=self.video_query.studio_id,
                lang_code=self.video_query.source_language,
            )
            three_play_media = ThreePlayMediaClient(
                org=org,
                video=self.video_query,
                media_url=s3_video_url,
                api_key=transcript_secrets.api_key,
                api_secret=transcript_secrets.api_secret,
                callback_url=callback_url,
                turnaround_level=self.video_query.three_play_turnaround,
                three_play_api_base_url=self.auth_dict['three_play_api_base_url'],
            )
            three_play_media.generate_transcripts()

        except TranscriptCredentials.DoesNotExist:
            LOGGER.warning(
                '[DELIVERY] : Transcript preference is not found for provider=%s, video=%s',
                self.video_query.provider,
                self.video_query.studio_id,
            )
Esempio n. 3
0
    def cielo24_transcription_flow(self, encoded_file):
        """
        Cielo24 transcription flow.

        Arguments:
            encoded_file (str): name of encoded file to construct video url
        """
        org = extract_course_org(self.video_proto.platform_course_url[0])

        try:
            api_key = TranscriptCredentials.objects.get(org=org, provider=self.video_query.provider).api_key
        except TranscriptCredentials.DoesNotExist:
            LOGGER.warn('[DELIVERY] Unable to find cielo24 api_key for org=%s', org)
            return None

        s3_video_url = build_url(
            self.auth_dict['s3_base_url'],
            self.auth_dict['edx_s3_endpoint_bucket'],
            encoded_file
        )

        callback_base_url = build_url(
            self.auth_dict['veda_base_url'],
            reverse(
                'cielo24_transcript_completed',
                args=[self.auth_dict['transcript_provider_request_token']]
            )
        )

        # update transcript status for video.
        val_api_client = VALAPICall(video_proto=None, val_status=None)
        utils.update_video_status(
            val_api_client=val_api_client,
            video=self.video_query,
            status=TranscriptStatus.IN_PROGRESS
        )

        cielo24 = Cielo24Transcript(
            self.video_query,
            org,
            api_key,
            self.video_query.cielo24_turnaround,
            self.video_query.cielo24_fidelity,
            self.video_query.preferred_languages,
            s3_video_url,
            callback_base_url,
            self.auth_dict['cielo24_api_base_url'],
        )
        cielo24.start_transcription_flow()
    def get_available_languages(self):
        """
        Gets all the 3Play Media supported languages
        """
        available_languages_url = build_url(self.base_url,
                                            self.available_languages_url,
                                            apikey=self.api_key)
        response = requests.get(url=available_languages_url)
        if not response.ok:
            raise ThreePlayMediaLanguagesRetrievalError(
                'Error while retrieving available languages: url={url} -- {response} -- {status}'
                .format(url=scrub_query_params(available_languages_url,
                                               ['apikey']),
                        response=response.text,
                        status=response.status_code))

        # A normal response should be a list containing 3Play Media supported languages and if we're getting a dict,
        # there must be an error: https://support.3playmedia.com/hc/en-us/articles/227729968-Captions-Imports-API
        available_languages = json.loads(response.text)
        if isinstance(available_languages, dict):
            raise ThreePlayMediaLanguagesRetrievalError(
                'Expected 3Play Media Supported languages but got: {response}'.
                format(response=response.text))

        return available_languages
    def embed_media_url(self, job_id):
        """
        Create cielo24 add media url.

        Arguments:
            job_id (str): cielo24 job id

        Returns:
            cielo24 task id
        """
        media_url = build_url(self.cielo24_api_base_url,
                              self.cielo24_add_media,
                              v=CIELO24_API_VERSION,
                              job_id=job_id,
                              api_token=self.api_key,
                              media_url=self.s3_video_url)
        response = requests.get(media_url)

        if not response.ok:
            raise Cielo24AddMediaError(
                '[ADD MEDIA ERROR] url={} -- status={} -- text={}'.format(
                    scrub_query_params(media_url, ['api_token']),
                    response.status_code, response.text))

        task_id = ast.literal_eval(response.text)['TaskId']
        LOGGER.info(
            '[CIELO24] Media url created for video=%s with job_id=%s and task_id=%s',
            self.video.studio_id, job_id, task_id)
        return task_id
    def create_job(self):
        """
        Create new job for transcription.

        Returns:
            cielo24 job id
        """
        create_job_url = build_url(self.cielo24_api_base_url,
                                   self.cielo24_new_job,
                                   v=CIELO24_API_VERSION,
                                   language=self.video.source_language,
                                   api_token=self.api_key,
                                   job_name=self.video.studio_id)
        response = requests.get(create_job_url)

        if not response.ok:
            raise Cielo24CreateJobError(
                '[CREATE JOB ERROR] url={} -- status={} -- text={}'.format(
                    scrub_query_params(create_job_url, ['api_token']),
                    response.status_code,
                    response.text,
                ))

        job_id = ast.literal_eval(response.text)['JobId']
        LOGGER.info('[CIELO24] New job created for video=%s with job_id=%s',
                    self.video.studio_id, job_id)
        return job_id
def get_translation_services(api_key):
    """
    GET available 3Play Media Translation services

    Arguments:
        api_key(unicode): api key which is required to make an authentic call to 3Play Media

    Returns:
        Available 3Play Media Translation services.
    """
    get_translation_services_url = build_url(
        THREE_PLAY_TRANSLATION_SERVICES_URL, apikey=api_key)
    response = requests.get(get_translation_services_url)
    if not response.ok:
        raise TranscriptTranslationError(
            u'[3PlayMedia Callback] Error fetching the translation services -- url={url}, {status}, {response}'
            .format(
                url=scrub_query_params(get_translation_services_url,
                                       ['apikey']),
                status=response.status_code,
                response=response.text,
            ))

    # Response should be a list containing services, details:
    # http://support.3playmedia.com/hc/en-us/articles/227729988-Translations-API-Methods
    available_services = json.loads(response.text)
    if not isinstance(available_services, list):
        raise TranscriptTranslationError(
            u'[3PlayMedia Callback] Expected list but got: -- {response}.'.
            format(response=response.text, ))

    return available_services
    def perform_transcript(self, job_id, lang_code):
        """
        Request cielo24 to generate transcripts for a video.
        """
        callback_url = '{}?job_id={}&iwp_name={}&lang_code={}&org={}&video_id={}'.format(
            self.callback_base_url, job_id, '{iwp_name}', lang_code, self.org,
            self.video.studio_id)

        perform_transcript_url = build_url(
            self.cielo24_api_base_url,
            self.cielo24_perform_transcription,
            v=CIELO24_API_VERSION,
            job_id=job_id,
            target_language=lang_code,
            callback_url=callback_url,
            api_token=self.api_key,
            priority=self.turnaround,
            transcription_fidelity=self.fidelity,
            options=json.dumps({"return_iwp": ["FINAL"]}))
        response = requests.get(perform_transcript_url)

        if not response.ok:
            raise Cielo24PerformTranscriptError(
                '[PERFORM TRANSCRIPT ERROR] url={} -- status={} -- text={}'.
                format(
                    scrub_query_params(perform_transcript_url, ['api_token']),
                    response.status_code, response.text))

        task_id = ast.literal_eval(response.text)['TaskId']
        LOGGER.info(
            '[CIELO24] Perform transcript request successful for video=%s with job_id=%s and task_id=%s',
            self.video.studio_id, job_id, task_id)
        return job_id
Esempio n. 9
0
    def setUp(self):
        """
        Tests setup
        """
        self.course = Course.objects.create(course_name=u'Intro to VEDA',
                                            institution=u'MAx',
                                            edx_classid=u'123')

        self.video = Video.objects.create(inst_class=self.course, **VIDEO_DATA)

        self.video_transcript_preferences = {
            'org':
            u'MAx',
            'video':
            self.video,
            'media_url':
            u'https://s3.amazonaws.com/bkt/video.mp4',
            'api_key':
            u'insecure_api_key',
            'api_secret':
            u'insecure_api_secret',
            'turnaround_level':
            ThreePlayTurnaround.STANDARD,
            'callback_url':
            build_url(
                u'https://veda.edx.org/3playmedia/transcripts/handle/123123',
                org=u'MAx',
                edx_video_id=VIDEO_DATA['studio_id'],
                lang_code=VIDEO_DATA['source_language'],
            ),
            'three_play_api_base_url':
            'https://api.3playmedia.com/',
        }
Esempio n. 10
0
def get_translations_metadata(api_key, file_id, edx_video_id):
    """
    Get translations metadata from 3Play Media for a given file id.

    Arguments:
        api_key(unicode): api key
        file_id(unicode): file identifier or process identifier
        edx_video_id(unicode): video studio identifier

    Returns:
        A List containing the translations metadata for a file id or None
        in case of a faulty response.
        Example:
        [
            {
                "id": 1234,
                "translation_service_id": 12,
                "source_language_name": "English",
                "source_language_iso_639_1_code": "en",
                "target_language_name": "French (Canada)",
                "target_language_iso_639_1_code": "fr",
                "state": "complete"
            },
            {
                "id": 1345,
                "translation_service_id": 32,
                "source_language_name": "English",
                "source_language_iso_639_1_code": "en",
                "target_language_name": "German",
                "target_language_iso_639_1_code": "de",
                "state": "in_progress"
            }
        ]
    """
    translations_metadata_url = build_url(
        THREE_PLAY_TRANSLATIONS_METADATA_URL.format(file_id=file_id, ),
        apikey=api_key)
    translations_metadata_response = requests.get(translations_metadata_url)
    if not translations_metadata_response.ok:
        LOGGER.error(
            u'[3PlayMedia Task] Translations metadata request failed, url=%s -- video=%s -- process_id=%s -- status=%s',
            scrub_query_params(translations_metadata_url, ['apikey']),
            edx_video_id,
            file_id,
            translations_metadata_response.status_code,
        )
        return

    translations = json.loads(translations_metadata_response.text)
    if not isinstance(translations, list):
        LOGGER.error(
            u'[3PlayMedia Task] unable to get translations metadata for video=%s -- process_id=%s -- response=%s',
            edx_video_id,
            file_id,
            translations_metadata_response.text,
        )
        return

    return translations
Esempio n. 11
0
def construct_transcript_names(config):
    """
    Constructs transcript names for 'edxval' and 's3'

    Arguments:
        config (dict): instance configuration

    Returns:
        transcript names for 'edxval' and 's3'
    """
    transcript_name_without_instance_prefix = build_url(
        config['aws_video_transcripts_prefix'],
        uuid.uuid4().hex)

    transcript_name_with_instance_prefix = build_url(
        config['instance_prefix'], transcript_name_without_instance_prefix)

    return transcript_name_without_instance_prefix, transcript_name_with_instance_prefix
Esempio n. 12
0
 def test_build_url(self, urls, params, expected_url):
     """
     Tests that utils.build_url works as expected.
     """
     url = utils.build_url(
         *urls,
         **params
     )
     self.assertEqual(
         url,
         expected_url
     )
Esempio n. 13
0
    def test_build_url(self, urls, params):
        """
        Tests that utils.build_url works as expected.
        """
        url = utils.build_url(*urls, **params)
        parsed = six.moves.urllib.parse.urlparse(url)
        expected_query_params = six.moves.urllib.parse.parse_qsl(parsed.query)
        expected_url = '/'.join(item.strip('/') for item in urls if item)

        self.assertDictEqual(params, dict(expected_query_params))

        self.assertTrue(url.startswith(expected_url))
    def cielo24_url(self, cielo24, endpoint):
        """
        Return absolute url

        Arguments:
            cielo24 (Cielo24Transcript), object
            endpoint (srt): url endpoint

        Returns:
            absolute url
        """
        return build_url(cielo24.cielo24_api_base_url, endpoint)
Esempio n. 15
0
def fetch_srt_data(url, **request_params):
    """
    Fetch srt data from transcript provider.
    """
    # return TRANSCRIPT_SRT_DATA
    fetch_srt_data_url = build_url(url, **request_params)
    response = requests.get(fetch_srt_data_url)

    if not response.ok:
        raise TranscriptFetchError(
            '[TRANSCRIPT FETCH ERROR] url={} -- status={} -- text={}'.format(
                scrub_query_params(fetch_srt_data_url,
                                   ['apikey', 'api_token']),
                response.status_code, response.text))

    return response.text
    def submit_media(self):
        """
        Submits the media to perform transcription.

        Raises:
            ThreePlayMediaPerformTranscriptionError: error while transcription process
        """
        self.validate_media_url()
        # Prepare requests payload
        payload = dict(
            # Mandatory attributes required for transcription
            link=self.media_url,
            apikey=self.api_key,
            api_secret_key=self.api_secret,
            turnaround_level=self.turnaround_level,
            callback_url=self.callback_url,
            batch_name=self.default_dir,
        )

        available_languages = self.get_available_languages()
        source_language_id = self.get_source_language_id(available_languages, self.video.source_language)
        if source_language_id:
            payload['language_id'] = source_language_id

        upload_url = build_url(self.base_url, self.upload_media_file_url)
        response = requests.post(url=upload_url, json=payload)

        if not response.ok:
            raise ThreePlayMediaPerformTranscriptionError(
                'Upload file request failed with: {response} -- {status}'.format(
                    response=response.text, status=response.status_code
                )
            )

        # A normal response should be a text containing file id and if we're getting a deserializable dict, there
        # must be an error: http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods
        if isinstance(json.loads(response.text), dict):
            raise ThreePlayMediaPerformTranscriptionError(
                'Expected file id but got: {response}'.format(response=response.text)
            )

        return response.text
    def test_transcript_flow(self):
        """
        Verify cielo24 transcription flow
        """
        job_id = '000-111-222'

        cielo24 = Cielo24Transcript(video=self.video,
                                    **self.video_transcript_preferences)

        responses.add(responses.GET,
                      self.cielo24_url(cielo24, cielo24.cielo24_new_job),
                      body={'JobId': job_id},
                      status=200)
        responses.add(responses.GET,
                      self.cielo24_url(cielo24, cielo24.cielo24_add_media),
                      body={'TaskId': '000-000-111'},
                      status=200)
        responses.add(responses.GET,
                      self.cielo24_url(cielo24,
                                       cielo24.cielo24_perform_transcription),
                      body={'TaskId': '000-000-000'},
                      status=200)

        cielo24.start_transcription_flow()

        # Total of 6 HTTP requests are made
        # 3 cielo24 requests for first language(en)
        # 3 cielo24 requests for second language(ur)
        self.assertEqual(len(responses.calls), 6)

        # pylint: disable=line-too-long
        expected_data = [
            {
                'url':
                build_url(
                    'https://sandbox.cielo24.com/api/job/new',
                    v=CIELO24_API_VERSION,
                    job_name='12345',
                    language='en',  # A job's language.
                    api_token='cielo24_api_key',
                ),
                'body':
                None,
                'method':
                'GET'
            },
            {
                'url':
                build_url(
                    'https://sandbox.cielo24.com/api/job/add_media',
                    v=CIELO24_API_VERSION,
                    job_id='000-111-222',
                    api_token='cielo24_api_key',
                    media_url='https://s3.amazonaws.com/bkt/video.mp4',
                ),
                'body':
                None,
                'method':
                'GET',
            },
            {
                'url':
                build_url(
                    'https://sandbox.cielo24.com/api/job/perform_transcription',
                    v=CIELO24_API_VERSION,
                    job_id='000-111-222',
                    target_language='TARGET_LANG',
                    callback_url=
                    '{}?job_id={}&iwp_name={}&lang_code={}&org={}&video_id={}'.
                    format(
                        'https://veda.edx.org/cielo24/transcript_completed/1234567890',
                        '000-111-222',
                        '{iwp_name}',
                        'TARGET_LANG',
                        'MAx',
                        '12345',
                    ),
                    api_token='cielo24_api_key',
                    priority='PRIORITY',
                    transcription_fidelity='PROFESSIONAL',
                    options='{"return_iwp": ["FINAL"]}'),
                'body':
                None,
                'method':
                'GET'
            }
        ]

        received_request_index = 0
        for preferred_language in self.video_transcript_preferences[
                'preferred_languages']:
            for request_data in expected_data:
                # replace target language with appropriate value
                if 'api/job/perform_transcription' in request_data['url']:
                    request_data = dict(request_data)
                    request_data['url'] = request_data['url'].replace(
                        'TARGET_LANG', preferred_language)

                self.assert_request(
                    responses.calls[received_request_index].request,
                    request_data)
                received_request_index += 1
Esempio n. 18
0
from .api import token_finisher
from VEDA import utils
from VEDA_OS01.enums import TranscriptionProviderErrorType
from VEDA_OS01.models import (URL, Course, Encode, TranscriptCredentials,
                              TranscriptProvider, Video)
from VEDA_OS01.serializers import (CourseSerializer, EncodeSerializer,
                                   URLSerializer, VideoSerializer)
from VEDA_OS01.transcripts import CIELO24_API_VERSION
from VEDA_OS01.utils import PlainTextParser
from control.http_ingest_celeryapp import ingest_video_and_upload_to_hotstore

LOGGER = logging.getLogger(__name__)

auth_dict = utils.get_config()
CIELO24_LOGIN_URL = utils.build_url(auth_dict['cielo24_api_base_url'],
                                    '/account/login')


class CourseViewSet(viewsets.ModelViewSet):

    queryset = Course.objects.all()
    serializer_class = CourseSerializer
    filter_backends = (filters.DjangoFilterBackend, )
    filter_fields = ('institution', 'edx_classid', 'proc_loc', 'course_hold',
                     'sg_projID')

    @detail_route(renderer_classes=[renderers.StaticHTMLRenderer])
    def highlight(self, request, *args, **kwargs):
        course = self.get_object()
        return Response(course.highlighted)
Esempio n. 19
0
# 3PlayMedia possible send-along statuses for a transcription callback.
COMPLETE = 'complete'
ERROR = 'error'

# Transcript format
TRANSCRIPT_SJSON = 'sjson'
CIELO24_TRANSCRIPT_COMPLETED = django.dispatch.Signal(
    providing_args=['job_id', 'iwp_name', 'lang_code', 'org', 'video_id'])
CONFIG = get_config()

# Cielo24 API version
CIELO24_API_VERSION = 1

# Cielo24 API URLs
CIELO24_GET_CAPTION_URL = build_url(CONFIG['cielo24_api_base_url'],
                                    'job/get_caption')

# 3PlayMedia callback signal
THREE_PLAY_TRANSCRIPTION_DONE = django.dispatch.Signal(providing_args=[
    'org', 'lang_code', 'edx_video_id', 'file_id', 'status',
    'error_description'
])
# 3PlayMedia API URLs.
THREE_PLAY_TRANSCRIPT_URL = build_url(CONFIG['three_play_api_transcript_url'],
                                      'files/{file_id}/transcript.srt')
THREE_PLAY_TRANSLATION_SERVICES_URL = build_url(
    CONFIG['three_play_api_transcript_url'], 'translation_services')
THREE_PLAY_ORDER_TRANSLATION_URL = build_url(
    CONFIG['three_play_api_base_url'], 'files/{file_id}/translations/order')
THREE_PLAY_TRANSLATIONS_METADATA_URL = build_url(
    CONFIG['three_play_api_transcript_url'], 'files/{file_id}/translations')
Esempio n. 20
0
from api import token_finisher
from control.veda_file_discovery import FileDiscovery
from VEDA import utils
from VEDA_OS01.enums import TranscriptionProviderErrorType
from VEDA_OS01.models import (URL, Course, Encode, TranscriptCredentials,
                              TranscriptProvider, Video)
from VEDA_OS01.serializers import (CourseSerializer, EncodeSerializer,
                                   URLSerializer, VideoSerializer)
from VEDA_OS01.transcripts import CIELO24_API_VERSION
from VEDA_OS01.utils import PlainTextParser

LOGGER = logging.getLogger(__name__)

CONFIG = utils.get_config()
CIELO24_LOGIN_URL = utils.build_url(CONFIG['cielo24_api_base_url'],
                                    '/account/login')

try:
    boto.config.add_section('Boto')
except:
    pass
boto.config.set('Boto', 'http_socket_timeout', '100')


class CourseViewSet(viewsets.ModelViewSet):

    queryset = Course.objects.all()
    serializer_class = CourseSerializer
    filter_backends = (filters.DjangoFilterBackend, )
    filter_fields = ('institution', 'edx_classid', 'proc_loc', 'course_hold',
                     'sg_projID')