Ejemplo n.º 1
0
def transcribe_document(
    document: Document,
    aws_bucket_name: str,
    lang: str,
):

    filekey = document.filename

    job_raw = transcribe(
        filekey=filekey,
        aws_bucket_name=aws_bucket_name,
        lang=lang,
    )
    full_text = job_raw["results"]["transcripts"][0]["transcript"]

    transcription = AWSTranscription(raw=job_raw, full_text=full_text)
    document.words = []

    for i, item in enumerate(job_raw["results"]["items"]):
        if item["type"] == "punctuation":
            continue
        alternative = item["alternatives"][0]
        assert "start_time" in item, item
        word = Word(
            word=alternative["content"],
            order=i,
            start_time=timedelta(seconds=float(item["start_time"])),
            end_time=timedelta(seconds=float(item["end_time"])),
            confidence=float(alternative["confidence"]),
        )
        document.words.append(word)

    document.transcription = transcription