async def post_upload(request): if request.method == 'GET': return abort(404) if 'file' not in request.files: return response.redirect('/') file = request.files['file'] file_body = file[0].body file_name = file[0].name file_type = file[0].type if "" in [file_body, file_name, file_type]: return response.redirect('/') # check if is valid filetype if 'video' in file_type: index = uuid.uuid4().hex create_file(index, file_body) hashcode = compute_md5(file_body) log.info('hash generated = {}'.format(hashcode)) b64hash = base64.b64encode(hashcode).decode("ascii") es_search = es.search(index='videos', body={"query": { "match": { "hash": b64hash } }}) if es_search['hits']['total'] > 0: # check if truly have hash es_record_found = es_search['hits']['hits'] for record in es_record_found: if record['_source']['hash'] == b64hash: # found 1 is enough return response.redirect('/hash/{}'.format(b64hash)) # if cannot find, continue to create new job table = dynamodb.Table('Videos') table.put_item( Item={ 'id': index, 'title': file_name, 'hash': hashcode, 'job_status': 'Queue for Audio Extraction', 'transcript': None, 'subs': None, 'link': index, 'vote_count': 0, 'author': 'anonymous', 'upload_date': int(time.time()) # time in seconds }) q.enqueue(aws_stuff, index, timeout='2h', job_id=index) return response.redirect('/job/{}'.format(index)) else: create_file(file_name, file_body) trans = Transcribe() trans.parseOutput(file_name) return response.text(file_type)
async def retrieve_job(request, id): jinja_response = {} table = dynamodb.Table('Videos') db_query = table.get_item(Key={'id': id}, ConsistentRead=True) db_item = db_query.get('Item') if db_item is None: abort(404) job_status = db_item['job_status'] title = {'title': db_item['title']} timestamp = {'date': db_item['upload_date']} author = {'author': db_item['author']} count = db_item.get('vote_count') if count is None: count = 0 jinja_response.update({'status': job_status}) jinja_response.update(title) jinja_response.update(timestamp) jinja_response.update(author) try: duration = {'duration': db_item['video_length']} jinja_response.update(duration) except: pass if job_status == 'Sent Audio For Transcription': transcribe = boto3.client('transcribe') result = transcribe.get_transcription_job(TranscriptionJobName=id) status = result['TranscriptionJob']['TranscriptionJobStatus'] jinja_response.update({'status': status}) if status == 'COMPLETED': status = 'Transcription done' trans_uri = result['TranscriptionJob']['Transcript'][ 'TranscriptFileUri'] trans_file = './resources/trans{}'.format(id) path_file = 'trans{}'.format(id) urllib.request.urlretrieve(trans_uri, trans_file) trans = Transcribe() trans.parseOutput(trans_file) srt_mem = trans.srt_mem(trans_file + '.srt') table = dynamodb.Table('Videos') trans_data = {} with open(trans_file, "r") as f: trans_data = json.load(f) # Delete transcription file try: os.remove(trans_file) except OSError: pass # Delete srt file in disk try: os.remove('./resources/{}'.format(path_file)) except OSError: pass # Delete audio file in S3 bucket s3 = boto3.client('s3') bucket = 'orbitalphase1' audio_file = './resources/{}.flac'.format(id) s3.delete_object(Bucket=bucket, Key=audio_file) table.update_item( Key={'id': id}, UpdateExpression= "SET job_status=:job_status, subs=:subtitles, transcript=:transcript", ExpressionAttributeValues={ ':job_status': 'Transcription done', ':transcript': trans_data, ':subtitles': srt_mem }) jinja_response.update({'status': status}) jinja_response.update({ 'srt': id, 'flac': id, 'ready': True, 'count': count }) return jinja_response elif status == 'IN_PROGRESS': status = 'Sent to AWS to Transcribe' jinja_response.update({'status': status}) return jinja_response else: return jinja_response elif job_status == 'Transcription done' or "Edited" in job_status: jinja_response.update({ 'flac': id, 'srt': id, 'ready': True, 'count': count }) return jinja_response else: return jinja_response
async def post_transcribe(request): if request.method == 'GET': return abort(404) Transcribe.parseOutput() return response.text("TODO")
async def post_transcribe(request): Transcribe.parseOutput() return response.text("TODO")