def batch_download_transcript(): media_to_download_transcript_for = csv_to_json( csv_filename="%s/%s" % (settings.get('base', 'path.local.transcripts'), settings.get('test', 'media.download_transcript.csv')))['data'] for media_item in media_to_download_transcript_for: download_transcript(media_uuid=media_item[0])
def transcribe(self, client, success_callback_url='', error_callback_url='', **kwargs): """ Transcribe an existing media item. """ headers = {} url = [ settings.get('base', 'paths.api.media'), self.uuid, settings.get('base', 'paths.api.media.transcribe') ] data = urllib.urlencode({ 'success_callback_url': success_callback_url, 'error_callback_url': error_callback_url }) response = client.request(url=url, data="", headers=headers) response_json = json.loads(response) self.process_transcription = Process(fields=response_json['process']) return self.process_transcription
def align(self, client, aligndata, success_callback_url='', error_callback_url='', **kwargs): """ Align an existing media item. """ log.info("aligning %s" % aligndata) data = {} headers = {} headers_ = {} url = [ settings.get('base', 'paths.api.media'), self.uuid, settings.get('base', 'paths.api.media.align') ] data, headers_ = multipart_encode({ 'aligndata': read_file(aligndata), 'success_callback_url': success_callback_url, 'error_callback_url': error_callback_url }) headers.update(headers_) response = client.request(url=url, data=data, headers=headers) response_json = json.loads(response) self.process_alignment = Process(fields=response_json['process']) return self.process_alignment
def batch_upload_transcribe(): media_to_upload_transcribe = csv_to_json( csv_filename="%s/%s" % (settings.get('base', 'path.local.media'), settings.get('test', 'media.transcribe.csv')))['data'] for media_item in media_to_upload_transcribe: upload_transcribe(media_filename=media_item[0].strip(), title=media_item[1])
def batch_upload_transcribe(): register_openers() media_to_upload_transcribe = csv_to_json( csv_filename="%s/%s" % (settings.get('base', 'path.local.media'), settings.get('test', 'media.transcribe.csv')))['data'] for media_item in media_to_upload_transcribe: upload_transcribe(media_filename=media_item[0])
def download_transcript(media_uuid): media_item = Media.get(client=client, uuid=media_uuid) print media_item.current_transcript transcript = Transcript.get(client=client, uuid=media_item.current_transcript['uuid'], format='srt') if not os.path.exists(settings.get('base','path.local.scripts.output')): os.makedirs(settings.get('base','path.local.scripts.output')) f = open("%s/%s.srt" % (settings.get('base','path.local.scripts.output'), media_item.title), 'w') f.write(transcript.content)
def batch_anthrotranscribe(): # transcribe all the files from the csv media_to_anthrotranscribe = csv_to_json( csv_filename="%s/%s" % (settings.get('base', 'path.local.media'), settings.get('test', 'media.existing.anthrotranscribe.csv')))['data'] for media_item in media_to_anthrotranscribe: anthrotranscribe(media_uuid=media_item[0], )
def batch_download_transcript(): media_to_download_transcript_for = csv_to_json( csv_filename="%s/%s" % ( settings.get('base', 'path.local.transcripts'), settings.get('test', 'media.download_transcript.csv') ) )['data'] for media_item in media_to_download_transcript_for: download_transcript( media_uuid=media_item[0] )
def batch_upload_transcribe(): media_to_upload_transcribe = csv_to_json( csv_filename="%s/%s" % ( settings.get('base', 'path.local.media'), settings.get('test', 'media.transcribe.csv') ) )['data'] for media_item in media_to_upload_transcribe: upload_transcribe( media_filename=media_item[0].strip(), title=media_item[1] )
def test_align_remote(self): media_item = Media.create( client=self.client, media_filename=settings.get('test', 'audio_test_remote_mp3'), aligndata="%s/%s" % (settings.get('base', 'path.local.transcripts'), settings.get('test', 'transcript.align')), transcribe=False ) assert hasattr(media_item, 'process_alignment') assert media_item.process_alignment.status == 'PENDING' assert media_item.process_alignment.progress == 0 assert hasattr(media_item.process_alignment, 'uuid')
def batch_upload_transcribe(): register_openers() media_to_upload_transcribe = csv_to_json( csv_filename="%s/%s" % ( settings.get('base', 'path.local.media'), settings.get('test', 'media.transcribe.csv') ) )['data'] for media_item in media_to_upload_transcribe: upload_transcribe( media_filename=media_item[0] )
def test_align_remote(self): media_item = Media.create( client=self.client, media_filename=settings.get('test', 'audio_test_remote_mp3'), aligndata="%s/%s" % (settings.get('base', 'path.local.transcripts'), settings.get('test', 'transcript.align')), transcribe=False) assert hasattr(media_item, 'process_alignment') assert media_item.process_alignment.status == 'PENDING' assert media_item.process_alignment.progress == 0 assert hasattr(media_item.process_alignment, 'uuid')
def batch_anthrotranscribe(): # transcribe all the files from the csv media_to_anthrotranscribe = csv_to_json( csv_filename="%s/%s" % ( settings.get('base', 'path.local.media'), settings.get('test', 'media.existing.anthrotranscribe.csv') ) )['data'] for media_item in media_to_anthrotranscribe: anthrotranscribe( media_uuid=media_item[0], )
def download_transcript(media_uuid): media_item = Media.get(client=client, uuid=media_uuid) print media_item.current_transcript transcript = Transcript.get(client=client, uuid=media_item.current_transcript['uuid'], format='srt') if not os.path.exists(settings.get('base', 'path.local.scripts.output')): os.makedirs(settings.get('base', 'path.local.scripts.output')) f = open( "%s/%s.srt" % (settings.get('base', 'path.local.scripts.output'), media_item.title), 'w') f.write(transcript.content)
def __init__(self): """ Create the api client """ # init config try: self.username = settings.get("credentials", "username") self.password = settings.get("credentials", "password") except Exception, e: log.error("Error getting api credentials from settings file") log.error(e) log.error(traceback.format_exc()) raise e
def __init__(self): """ Create the api client """ # init config try: self.username = settings.get("credentials", "username") self.password = settings.get("credentials", "password") except Exception,e: log.error("Error getting api credentials from settings file") log.error(e) log.error(traceback.format_exc()) raise e
def batch_align(): register_openers() # align all the files from the csv media_to_align = csv_to_json( csv_filename="%s/%s" % (settings.get('base', 'path.local.media'), settings.get('test', 'media.align.csv')))['data'] for media_item in media_to_align: upload_align(media_filename=media_item[1], aligndata="%s/%s.txt" % ( settings.get('base', 'path.local.transcripts'), media_item[0], ))
class BaseClient(object): """ This is the base client, implements basic rest methods """ def __init__(self): """ Create the api client """ # init config try: self.username = settings.get("credentials", "username") self.password = settings.get("credentials", "password") except Exception, e: log.error("Error getting api credentials from settings file") log.error(e) log.error(traceback.format_exc()) raise e self._reset_headers() try: self.base_path = settings.get("base", "paths.api") except Exception, e: log.error("Error getting api path from settings file") log.error(e) log.error(traceback.format_exc()) raise e
def test_create_no_transcribe(self): remote_media_file = settings.get('test', 'audio_test_remote_mp3') self.model = Media.create(client=self.client, media_filename=remote_media_file, transcribe=False) media_item = Media.get(client=self.client, uuid=self.model.uuid) assert Media.get(client=self.client, uuid=self.model.uuid).uuid == self.model.uuid assert media_item.title == remote_media_file assert media_item.status in (MEDIA_STATUS['UPLOAD'], MEDIA_STATUS['TRANSCODE'])
def transcribe(self, client, success_callback_url='', error_callback_url='', **kwargs): """ Transcribe an existing media item. """ headers = {} url = [settings.get('base', 'paths.api.media'), self.uuid, settings.get('base', 'paths.api.media.transcribe')] data = urllib.urlencode( {'success_callback_url': success_callback_url, 'error_callback_url': error_callback_url}) response = client.request(url=url, data="", headers=headers) response_json = json.loads(response) self.process_transcription = Process(fields=response_json['process']) return self.process_transcription
def _reset_headers(self, headers={}, accept=None): self.headers = headers import base64 if accept is None: accept = settings.get('base', settings.get('base', 'accept.default')) else: accept = settings.get('base', 'accept.%s' % accept) if self.username != "" and self.password != "": auth_string = base64.encodestring('%s:%s' % (self.username, self.password))[:-1] self.headers.update({'authorization': 'basic %s' % auth_string, 'accept': accept, }) else: log.error('The username and/or password are empty.') exit()
def test_create_remote(self): remote_media_files = [ settings.get('test', 'audio_test_remote_mp3'), settings.get('test', 'audio_test_remote_youtube'), settings.get('test', 'audio_test_remote_youtube_https'), ] for remote_media_file in remote_media_files: self.model = Media.create( client=self.client, media_filename=remote_media_file, ) media_item = Media.get(client=self.client, uuid=self.model.uuid) assert Media.get(client=self.client, uuid=self.model.uuid).uuid == self.model.uuid assert media_item.title == remote_media_file assert media_item.status == MEDIA_STATUS['ASR'] assert media_item.progress == 0
def test_create_local(self): local_media_files = ['test_mp4_short.mp4', 'test_64K_short.mp3'] for local_media_file in local_media_files: media_filename = "%s/%s" % (settings.get( 'base', 'path.local.media'), local_media_file) self.model = Media.create( client=self.client, media_filename=media_filename, ) media_item = Media.get(client=self.client, uuid=self.model.uuid) assert media_item.uuid == self.model.uuid assert media_item.title == "%s/%s" % (settings.get( 'base', 'path.local.media'), local_media_file) assert media_item.status == MEDIA_STATUS['ASR'] assert media_item.progress == 0
def batch_align(): register_openers() # align all the files from the csv media_to_align = csv_to_json( csv_filename="%s/%s" % ( settings.get('base', 'path.local.media'), settings.get('test', 'media.align.csv') ) )['data'] for media_item in media_to_align: upload_align( media_filename=media_item[1].strip(), aligndata="%s/%s.txt" % ( settings.get('base', 'path.local.transcripts'), media_item[0], ) )
def test_create_local(self): local_media_files = [ 'test_mp4_short.mp4', 'test_64K_short.mp3' ] for local_media_file in local_media_files: media_filename = "%s/%s" % (settings.get('base', 'path.local.media'), local_media_file) self.model = Media.create( client=self.client, media_filename=media_filename, ) media_item = Media.get(client=self.client, uuid=self.model.uuid) assert media_item.uuid == self.model.uuid assert media_item.title == "%s/%s" % (settings.get('base', 'path.local.media'), local_media_file) assert media_item.status == MEDIA_STATUS['ASR'] assert media_item.progress == 0
def get_all(cls, client, *args, **kwargs): url = [settings.get('base', 'paths.api.processes')] response = client.request(url=url) response_json = json.loads(response) processes = [] for process in response_json['processes']: processes.append(Process(fields=process)) return processes
def _reset_headers(self, headers={}, accept=None): self.headers = headers import base64 if accept is None: accept = settings.get('base', settings.get('base', 'accept.default')) else: accept = settings.get('base', 'accept.%s' % accept) if self.username != "" and self.password != "": auth_string = base64.encodestring( '%s:%s' % (self.username, self.password))[:-1] self.headers.update({ 'authorization': 'basic %s' % auth_string, 'accept': accept, }) else: log.error('The username and/or password are empty.') exit()
def align(self, client, aligndata, success_callback_url='', error_callback_url='', **kwargs): """ Align an existing media item. """ log.info("aligning %s" % aligndata) data = {} headers = {} headers_ = {} url = [settings.get('base', 'paths.api.media'), self.uuid, settings.get('base', 'paths.api.media.align')] data, headers_ = multipart_encode({ 'aligndata': read_file(aligndata), 'success_callback_url': success_callback_url, 'error_callback_url': error_callback_url }) headers.update(headers_) response = client.request(url=url, data=data, headers=headers) response_json = json.loads(response) self.process_alignment = Process(fields=response_json['process']) return self.process_alignment
def get_all(cls, client, *args, **kwargs): url = [settings.get('base', 'paths.api.transcripts')] response = client.request(url=url) response_json = json.loads(response) transcripts = [] log.debug(response_json['transcripts']) for transcript in response_json['transcripts']: transcripts.append(Transcript(fields=transcript)) return transcripts
def delete(cls, client, uuid, **kwargs): url_params = {} if kwargs.get('delete_transcripts'): url_params.update({'delete_transcripts': 'true'}) if kwargs.get('delete_storage'): url_params.update({'delete_storage': 'true'}) url = [settings.get('base', 'paths.api.kobjects'), uuid] response = client.request(url=url, url_params=url_params, method='DELETE') print print response print response_json = json.loads(response) return KObject(fields=response_json['kobject'])
def get(cls, client, uuid, deleted=False, format='json'): url = [settings.get('base', 'paths.api.transcripts'), uuid] response = client.request(url=url, accept=format) content = None if format != 'json': content = response # always get the json response to build the object response = client.request(url=url, accept='json') response_json = json.loads(response) if content is not None: response_json['content'] = content return Transcript(fields=response_json)
def test_get_all(self): local_media_files = ['test_mp4_short.mp4', 'test_64K_short.mp3'] for local_media_file in local_media_files: media_filename = "%s/%s" % (settings.get( 'base', 'path.local.media'), local_media_file) m = Media.create( client=self.client, media_filename=media_filename, ) media = Media.get_all(client=self.client) assert len(media) > 0 for m in media: assert hasattr(m, 'title') assert hasattr(m, '')
def get(cls, client, uuid, deleted=False, format="json"): url = [settings.get("base", "paths.api.transcripts"), uuid] response = client.request(url=url, accept=format) content = None if format != "json": content = response # always get the json response to build the object response = client.request(url=url, accept="json") response_json = json.loads(response) if content is not None: response_json["content"] = content else: response_json["content"] = json.dumps(response_json["segmentation"]) return Transcript(fields=response_json)
def get_all(cls, client, *args, **kwargs): url_params = {} if kwargs.get('status'): url_params.update({'status_filter': '-'.join(map(lambda x: str(x), kwargs.get('status')))}) if kwargs.get('search_query'): url_params.update({'search_query': kwargs.get('search_query')}) url = [settings.get('base', 'paths.api.media')] response = client.request(url=url, url_params=url_params) response_json = json.loads(response) media = [] log.debug(response_json['media']) for media_item in response_json['media']: media.append(Media(fields=media_item)) return media
def get_all(cls, client, *args, **kwargs): url_data = {} if kwargs.get('status'): url_data.update({'status_filter': '-'.join(map(lambda x: str(x), kwargs.get('status')))}) if kwargs.get('search_query'): url_data.update({'search_query': kwargs.get('search_query')}) url_data = urllib.urlencode(url_data) url = [settings.get('base', 'paths.api.notes'), url_data] response = client.request(url=url) response_json = json.loads(response) media = [] for media_item in response_json['notes']: media.append(Note(fields=media_item)) return media
def test_get_all(self): local_media_files = [ 'test_mp4_short.mp4', 'test_64K_short.mp3' ] for local_media_file in local_media_files: media_filename = "%s/%s" % (settings.get('base', 'path.local.media'), local_media_file) m = Media.create( client=self.client, media_filename=media_filename, ) media = Media.get_all(client=self.client) assert len(media) > 0 for m in media: assert hasattr(m, 'title') assert hasattr(m, '')
def get_all(cls, client, *args, **kwargs): url_params = {} if kwargs.get('status'): url_params.update({ 'status_filter': '-'.join(map(lambda x: str(x), kwargs.get('status'))) }) if kwargs.get('search_query'): url_params.update({'search_query': kwargs.get('search_query')}) url = [settings.get('base', 'paths.api.media')] response = client.request(url=url, url_params=url_params) response_json = json.loads(response) media = [] log.debug(response_json['media']) for media_item in response_json['media']: media.append(Media(fields=media_item)) return media
def test_get(self): """ Create a media and do a get request on it """ fields = { 'title': 'Test media title', 'description': 'Test media description', 'local_media_file': 'test_64K_short.mp3', } media_filename = "%s/%s" % (settings.get('base', 'path.local.media'), fields['local_media_file']) m = Media.create( client=self.client, media_filename=media_filename, title=fields['title'], description=fields['description'], ) m = Media.get(client=self.client, uuid=m.uuid) assert m.title == fields['title'] assert m.description == fields['description']
def create( cls, client, media_item, parent_note=None, **kwargs): """ Create a note. """ data = {} # TODO : fill in note params headers = {} url = [settings.get('base', 'paths.api.notes')] register_openers() response = client.request(url=url, data=data, headers=headers) response_json = json.loads(response) note = Note(fields=response_json['note']) return note
def test_create_local_metadata(self): """ Test creation of a media from a local file, by specifying some metadata (title, description, ...) """ local_media = { 'path': 'test_mp4_short.mp4', 'title': 'Test media title', 'description': 'Test media description', } media_filename = "%s/%s" % (settings.get('base', 'path.local.media'), local_media['path']) self.model = Media.create( client=self.client, media_filename=media_filename, title=local_media['title'], description=local_media['description'], ) media_item = Media.get(client=self.client, uuid=self.model.uuid) assert media_item.title == local_media['title'] assert media_item.description == local_media['description']
def test_get(self): """ Create a media and do a get request on it """ fields = { 'title': 'Test media title', 'description': 'Test media description', 'local_media_file': 'test_64K_short.mp3', } media_filename = "%s/%s" % (settings.get( 'base', 'path.local.media'), fields['local_media_file']) m = Media.create( client=self.client, media_filename=media_filename, title=fields['title'], description=fields['description'], ) m = Media.get(client=self.client, uuid=m.uuid) assert m.title == fields['title'] assert m.description == fields['description']
def test_get_all(self): local_media_files = [ 'test_mp4_short.mp4', 'test_64K_short.mp3', 'test_64K_short.mp3', 'test_64K_short.mp3', 'test_64K_short.mp3', 'test_64K_short.mp3', 'test_64K_short.mp3', 'test_64K_short.mp3', 'test_64K_short.mp3', 'test_64K_short.mp3', 'test_64K_short.mp3', 'test_64K_short.mp3', 'test_64K_short.mp3' ] for local_media_file in local_media_files: media_filename = "%s/%s" % (settings.get('base', 'path.local.media'), local_media_file) m = Media.create( client=self.client, media_filename=media_filename, ) # vanilla test media = Media.get_all(client=self.client) assert len(media) > 0 for m in media: assert hasattr(m, 'title') assert len(media) == 10 # test pagination page_size = 5 media = Media.get_all(client=self.client, start=0, count=page_size) assert len(media) == page_size media2 = Media.get_all(client=self.client, start=page_size-1, count=page_size) assert media[page_size-1].uuid == media2[0].uuid
def test_create_local_metadata(self): """ Test creation of a media from a local file, by specifying some metadata (title, description, ...) """ local_media = { 'path': 'test_mp4_short.mp4', 'title': 'Test media title', 'description': 'Test media description', } media_filename = "%s/%s" % (settings.get( 'base', 'path.local.media'), local_media['path']) self.model = Media.create( client=self.client, media_filename=media_filename, title=local_media['title'], description=local_media['description'], ) media_item = Media.get(client=self.client, uuid=self.model.uuid) assert media_item.title == local_media['title'] assert media_item.description == local_media['description']
def get(cls, client, uuid, deleted=False): url = [settings.get('base', 'paths.api.users'), uuid] response = client.request(url=url) response_json = json.loads(response) return User(fields=response_json["user"])
def create( cls, client, media_filename, transcribe=True, aligndata=None, **kwargs): """ Create a media item. If transcript_filename provided: create media and align the transcript. @param media_filename: local/remote address of the media file to transcribe @param metadata_filename: local path to the metadata file containing media info (title, description, ...) @param transcript_filename: local path to the plain text transcript file to align @param transcribe: automagically launch transcription @param kwargs: title, description, tags, ... """ data = {} headers = {} headers_ = {} url = [settings.get('base', 'paths.api.media')] # create the media from a service if 'service' in kwargs: data.update({ 'service': kwargs.get('service'), 'item_id': kwargs.get('item_id')} ) if 'title' in kwargs and kwargs["title"] is not None: data.update({ 'title': kwargs.get('title'), }) if 'description' in kwargs: data.update({ 'description': kwargs.get('description'), }) # upload from remote url if 'http' in media_filename: #url.append("?media=" + urllib.quote(media_filename, safe='')) #data = "" # should not be empty dict but empty string! data.update({ 'media': media_filename }) data, headers_ = multipart_encode(data) # upload from local hard drive else: register_openers() data.update( {'media': open(media_filename, "rb")} ) data, headers_ = multipart_encode(data) headers.update(headers_) response = client.request(url=url, data=data, headers=headers) response_json = json.loads(response) media_item = Media(fields=response_json['media_item']) if aligndata is not None: media_item.align(client=client, aligndata=aligndata) elif transcribe: media_item.transcribe(client=client) return media_item
def get(cls, client, uuid, deleted=False): url = [settings.get('base', 'paths.api.media'), uuid] response = client.request(url=url) response_json = json.loads(response) return Media(fields=response_json["media_item"])
def get(cls, client, uuid, deleted=False): url = [settings.get('base', 'paths.api.notes'), uuid] response = client.request(url=url) response_json = json.loads(response) return Note(fields=response_json["note"])
def has_valid_file_extension(cls, file_path): return check_file_extension( file_path=file_path, authorized_file_types=settings.get( 'base', 'transcript.align.authorized_extensions').split(','))
def get(cls, client, uuid, deleted=False): url = [settings.get('base', 'paths.api.processes'), uuid] response = client.request(url=url) response_json = json.loads(response) return Process(fields=response_json["process"])
def has_valid_file_extension(cls, file_path): return check_file_extension( file_path=file_path, authorized_file_types=settings.get('base', 'transcript.align.authorized_extensions').split(',') )
def get(cls, client, uuid, deleted=False): url = [settings.get('base', 'paths.api.kobjects'), uuid] response = client.request(url=url) response_json = json.loads(response) return KObject(fields=response_json["kobject"])