Ejemplo n.º 1
0
async def commit_change(request):
    if request.method == 'GET':
        abort(404)

    curr_vtt = request['session']['vtt']
    transcribe = Transcribe()
    new_srt = transcribe.vtt_mem_to_srt(curr_vtt)

    variables = request.form
    id = variables['id'][0]
    author = variables['author'][0]
    index = uuid.uuid4().hex

    table = dynamodb.Table('Videos')
    db_query = table.get_item(Key={'id': id}, ConsistentRead=True)
    db_item = db_query.get('Item')
    new_item = {}
    for k, v in db_item.items():
        new_item[k] = v
    new_item['id'] = index
    new_item['author'] = author
    new_item['upload_date'] = int(time.time())
    new_item['job_status'] = 'Edited from <a href="{}">{}</a>'.format(id, id)
    new_item['vote_count'] = 0
    new_item['subs'] = new_srt

    table.put_item(Item=new_item)

    return response.redirect('/job/{}'.format(index))
Ejemplo n.º 2
0
async def post_upload(request):
    if request.method == 'GET':
        return abort(404)

    if 'file' not in request.files:
        return response.redirect('/')

    file = request.files['file']
    file_body = file[0].body
    file_name = file[0].name
    file_type = file[0].type
    if "" in [file_body, file_name, file_type]:
        return response.redirect('/')
    # check if is valid filetype
    if 'video' in file_type:
        index = uuid.uuid4().hex
        create_file(index, file_body)
        hashcode = compute_md5(file_body)
        log.info('hash generated = {}'.format(hashcode))

        b64hash = base64.b64encode(hashcode).decode("ascii")
        es_search = es.search(index='videos',
                              body={"query": {
                                  "match": {
                                      "hash": b64hash
                                  }
                              }})
        if es_search['hits']['total'] > 0:
            # check if truly have hash
            es_record_found = es_search['hits']['hits']
            for record in es_record_found:
                if record['_source']['hash'] == b64hash:
                    # found 1 is enough
                    return response.redirect('/hash/{}'.format(b64hash))
            # if cannot find, continue to create new job

        table = dynamodb.Table('Videos')
        table.put_item(
            Item={
                'id': index,
                'title': file_name,
                'hash': hashcode,
                'job_status': 'Queue for Audio Extraction',
                'transcript': None,
                'subs': None,
                'link': index,
                'vote_count': 0,
                'author': 'anonymous',
                'upload_date': int(time.time())  # time in seconds
            })

        q.enqueue(aws_stuff, index, timeout='2h', job_id=index)
        return response.redirect('/job/{}'.format(index))

    else:
        create_file(file_name, file_body)
        trans = Transcribe()
        trans.parseOutput(file_name)
        return response.text(file_type)
Ejemplo n.º 3
0
async def vtt(request, srt):
    t = Transcribe()
    table = dynamodb.Table('Videos')
    db_query = table.get_item(Key={'id': srt}, ConsistentRead=True)
    item = db_query.get('Item')
    if item is None:
        abort(404)
    return response.text(t.srt_mem_to_vtt_mem(item['subs']))
Ejemplo n.º 4
0
async def sub_edit(request, id):
    try:
        jinja_response = {}
        table = dynamodb.Table('Videos')
        db_query = table.get_item(Key={'id': id}, ConsistentRead=True)
        transcribe = Transcribe()
        db_item = db_query['Item']
        subtitle = transcribe.srt_to_edit(db_item['subs'])
        request['session']['vtt'] = transcribe.srt_mem_to_vtt_mem(
            db_item['subs'])
        jinja_response.update({'subtitles': subtitle})
        jinja_response.update({'vtt': '{}'.format(id)})
        jinja_response.update({'id': id})
        jinja_response.update({'video': db_item['link']})

        return jinja_response
    except:
        abort(404)
Ejemplo n.º 5
0
async def interrim_vtt(request):
    variables = request.form
    srt = variables['id'][0]
    start = variables['start'][0]
    end = variables['end'][0]
    text = variables['text'][0]
    index = int(variables['index'][0])
    t = Transcribe()
    if request['session'].get('vtt') is None:
        table = dynamodb.Table('Videos')
        db_query = table.get_item(Key={'id': srt}, ConsistentRead=True)
        item = db_query.get('Item')
        request['session']['vtt'] = t.srt_mem_to_vtt_mem(item['subs'])
    curr_vtt = request['session']['vtt']
    curr_vtt = t.make_change_vtt(curr_vtt, index, start, end, text)
    request['session']['vtt'] = curr_vtt
    return response.json({
        'status': 'ok',
        'uri': '/edit/vtt/{}.vtt'.format(srt)
    })
Ejemplo n.º 6
0
async def retrieve_job(request, id):
    jinja_response = {}

    table = dynamodb.Table('Videos')
    db_query = table.get_item(Key={'id': id}, ConsistentRead=True)
    db_item = db_query.get('Item')
    if db_item is None:
        abort(404)
    job_status = db_item['job_status']
    title = {'title': db_item['title']}
    timestamp = {'date': db_item['upload_date']}
    author = {'author': db_item['author']}
    count = db_item.get('vote_count')
    if count is None:
        count = 0
    jinja_response.update({'status': job_status})
    jinja_response.update(title)
    jinja_response.update(timestamp)
    jinja_response.update(author)
    try:
        duration = {'duration': db_item['video_length']}
        jinja_response.update(duration)
    except:
        pass

    if job_status == 'Sent Audio For Transcription':
        transcribe = boto3.client('transcribe')
        result = transcribe.get_transcription_job(TranscriptionJobName=id)

        status = result['TranscriptionJob']['TranscriptionJobStatus']
        jinja_response.update({'status': status})
        if status == 'COMPLETED':
            status = 'Transcription done'
            trans_uri = result['TranscriptionJob']['Transcript'][
                'TranscriptFileUri']
            trans_file = './resources/trans{}'.format(id)
            path_file = 'trans{}'.format(id)
            urllib.request.urlretrieve(trans_uri, trans_file)
            trans = Transcribe()
            trans.parseOutput(trans_file)
            srt_mem = trans.srt_mem(trans_file + '.srt')
            table = dynamodb.Table('Videos')
            trans_data = {}
            with open(trans_file, "r") as f:
                trans_data = json.load(f)
            # Delete transcription file
            try:
                os.remove(trans_file)
            except OSError:
                pass
            # Delete srt file in disk
            try:
                os.remove('./resources/{}'.format(path_file))
            except OSError:
                pass
            # Delete audio file in S3 bucket
            s3 = boto3.client('s3')
            bucket = 'orbitalphase1'
            audio_file = './resources/{}.flac'.format(id)
            s3.delete_object(Bucket=bucket, Key=audio_file)

            table.update_item(
                Key={'id': id},
                UpdateExpression=
                "SET job_status=:job_status, subs=:subtitles, transcript=:transcript",
                ExpressionAttributeValues={
                    ':job_status': 'Transcription done',
                    ':transcript': trans_data,
                    ':subtitles': srt_mem
                })
            jinja_response.update({'status': status})
            jinja_response.update({
                'srt': id,
                'flac': id,
                'ready': True,
                'count': count
            })
            return jinja_response
        elif status == 'IN_PROGRESS':
            status = 'Sent to AWS to Transcribe'
            jinja_response.update({'status': status})
            return jinja_response
        else:
            return jinja_response
    elif job_status == 'Transcription done' or "Edited" in job_status:
        jinja_response.update({
            'flac': id,
            'srt': id,
            'ready': True,
            'count': count
        })
        return jinja_response
    else:
        return jinja_response
Ejemplo n.º 7
0
async def post_transcribe(request):
    if request.method == 'GET':
        return abort(404)
    Transcribe.parseOutput()
    return response.text("TODO")
Ejemplo n.º 8
0
async def post_transcribe(request):
    Transcribe.parseOutput()
    return response.text("TODO")