async def commit_change(request): if request.method == 'GET': abort(404) curr_vtt = request['session']['vtt'] transcribe = Transcribe() new_srt = transcribe.vtt_mem_to_srt(curr_vtt) variables = request.form id = variables['id'][0] author = variables['author'][0] index = uuid.uuid4().hex table = dynamodb.Table('Videos') db_query = table.get_item(Key={'id': id}, ConsistentRead=True) db_item = db_query.get('Item') new_item = {} for k, v in db_item.items(): new_item[k] = v new_item['id'] = index new_item['author'] = author new_item['upload_date'] = int(time.time()) new_item['job_status'] = 'Edited from <a href="{}">{}</a>'.format(id, id) new_item['vote_count'] = 0 new_item['subs'] = new_srt table.put_item(Item=new_item) return response.redirect('/job/{}'.format(index))
async def post_upload(request): if request.method == 'GET': return abort(404) if 'file' not in request.files: return response.redirect('/') file = request.files['file'] file_body = file[0].body file_name = file[0].name file_type = file[0].type if "" in [file_body, file_name, file_type]: return response.redirect('/') # check if is valid filetype if 'video' in file_type: index = uuid.uuid4().hex create_file(index, file_body) hashcode = compute_md5(file_body) log.info('hash generated = {}'.format(hashcode)) b64hash = base64.b64encode(hashcode).decode("ascii") es_search = es.search(index='videos', body={"query": { "match": { "hash": b64hash } }}) if es_search['hits']['total'] > 0: # check if truly have hash es_record_found = es_search['hits']['hits'] for record in es_record_found: if record['_source']['hash'] == b64hash: # found 1 is enough return response.redirect('/hash/{}'.format(b64hash)) # if cannot find, continue to create new job table = dynamodb.Table('Videos') table.put_item( Item={ 'id': index, 'title': file_name, 'hash': hashcode, 'job_status': 'Queue for Audio Extraction', 'transcript': None, 'subs': None, 'link': index, 'vote_count': 0, 'author': 'anonymous', 'upload_date': int(time.time()) # time in seconds }) q.enqueue(aws_stuff, index, timeout='2h', job_id=index) return response.redirect('/job/{}'.format(index)) else: create_file(file_name, file_body) trans = Transcribe() trans.parseOutput(file_name) return response.text(file_type)
async def vtt(request, srt): t = Transcribe() table = dynamodb.Table('Videos') db_query = table.get_item(Key={'id': srt}, ConsistentRead=True) item = db_query.get('Item') if item is None: abort(404) return response.text(t.srt_mem_to_vtt_mem(item['subs']))
async def sub_edit(request, id): try: jinja_response = {} table = dynamodb.Table('Videos') db_query = table.get_item(Key={'id': id}, ConsistentRead=True) transcribe = Transcribe() db_item = db_query['Item'] subtitle = transcribe.srt_to_edit(db_item['subs']) request['session']['vtt'] = transcribe.srt_mem_to_vtt_mem( db_item['subs']) jinja_response.update({'subtitles': subtitle}) jinja_response.update({'vtt': '{}'.format(id)}) jinja_response.update({'id': id}) jinja_response.update({'video': db_item['link']}) return jinja_response except: abort(404)
async def interrim_vtt(request): variables = request.form srt = variables['id'][0] start = variables['start'][0] end = variables['end'][0] text = variables['text'][0] index = int(variables['index'][0]) t = Transcribe() if request['session'].get('vtt') is None: table = dynamodb.Table('Videos') db_query = table.get_item(Key={'id': srt}, ConsistentRead=True) item = db_query.get('Item') request['session']['vtt'] = t.srt_mem_to_vtt_mem(item['subs']) curr_vtt = request['session']['vtt'] curr_vtt = t.make_change_vtt(curr_vtt, index, start, end, text) request['session']['vtt'] = curr_vtt return response.json({ 'status': 'ok', 'uri': '/edit/vtt/{}.vtt'.format(srt) })
async def retrieve_job(request, id): jinja_response = {} table = dynamodb.Table('Videos') db_query = table.get_item(Key={'id': id}, ConsistentRead=True) db_item = db_query.get('Item') if db_item is None: abort(404) job_status = db_item['job_status'] title = {'title': db_item['title']} timestamp = {'date': db_item['upload_date']} author = {'author': db_item['author']} count = db_item.get('vote_count') if count is None: count = 0 jinja_response.update({'status': job_status}) jinja_response.update(title) jinja_response.update(timestamp) jinja_response.update(author) try: duration = {'duration': db_item['video_length']} jinja_response.update(duration) except: pass if job_status == 'Sent Audio For Transcription': transcribe = boto3.client('transcribe') result = transcribe.get_transcription_job(TranscriptionJobName=id) status = result['TranscriptionJob']['TranscriptionJobStatus'] jinja_response.update({'status': status}) if status == 'COMPLETED': status = 'Transcription done' trans_uri = result['TranscriptionJob']['Transcript'][ 'TranscriptFileUri'] trans_file = './resources/trans{}'.format(id) path_file = 'trans{}'.format(id) urllib.request.urlretrieve(trans_uri, trans_file) trans = Transcribe() trans.parseOutput(trans_file) srt_mem = trans.srt_mem(trans_file + '.srt') table = dynamodb.Table('Videos') trans_data = {} with open(trans_file, "r") as f: trans_data = json.load(f) # Delete transcription file try: os.remove(trans_file) except OSError: pass # Delete srt file in disk try: os.remove('./resources/{}'.format(path_file)) except OSError: pass # Delete audio file in S3 bucket s3 = boto3.client('s3') bucket = 'orbitalphase1' audio_file = './resources/{}.flac'.format(id) s3.delete_object(Bucket=bucket, Key=audio_file) table.update_item( Key={'id': id}, UpdateExpression= "SET job_status=:job_status, subs=:subtitles, transcript=:transcript", ExpressionAttributeValues={ ':job_status': 'Transcription done', ':transcript': trans_data, ':subtitles': srt_mem }) jinja_response.update({'status': status}) jinja_response.update({ 'srt': id, 'flac': id, 'ready': True, 'count': count }) return jinja_response elif status == 'IN_PROGRESS': status = 'Sent to AWS to Transcribe' jinja_response.update({'status': status}) return jinja_response else: return jinja_response elif job_status == 'Transcription done' or "Edited" in job_status: jinja_response.update({ 'flac': id, 'srt': id, 'ready': True, 'count': count }) return jinja_response else: return jinja_response
async def post_transcribe(request): if request.method == 'GET': return abort(404) Transcribe.parseOutput() return response.text("TODO")
async def post_transcribe(request): Transcribe.parseOutput() return response.text("TODO")