Beispiel #1
0
    def test_recover(self):
        upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB)
        upload._invalid = True  # Make sure invalid.
        upload._stream = mock.Mock(spec=["seek"])
        upload._resumable_url = u"http://test.invalid?upload_id=big-deal"

        end = 55555
        headers = {u"range": u"bytes=0-{:d}".format(end)}
        transport = self._chunk_mock(resumable_media.PERMANENT_REDIRECT,
                                     headers)

        ret_val = upload.recover(transport)
        assert ret_val is transport.request.return_value
        # Check the state of ``upload`` after.
        assert upload.bytes_uploaded == end + 1
        assert not upload.invalid
        upload._stream.seek.assert_called_once_with(end + 1)
        expected_headers = {u"content-range": u"bytes */*"}
        transport.request.assert_called_once_with(
            u"PUT",
            upload.resumable_url,
            data=None,
            headers=expected_headers,
            timeout=EXPECTED_TIMEOUT,
        )
Beispiel #2
0
    def test_initiate_w_custom_timeout(self):
        upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB)
        data = b"Knock knock who is there"
        stream = io.BytesIO(data)
        metadata = {u"name": u"got-jokes.txt"}

        transport = mock.Mock(spec=["request"])
        location = (u"http://test.invalid?upload_id=AACODBBBxuw9u3AA", )
        response_headers = {u"location": location}
        post_response = _make_response(headers=response_headers)
        transport.request.return_value = post_response

        upload.initiate(
            transport,
            stream,
            metadata,
            BASIC_CONTENT,
            total_bytes=100,
            timeout=12.6,
        )

        # Make sure timeout was passed to the transport
        json_bytes = b'{"name": "got-jokes.txt"}'
        expected_headers = {
            u"content-type": JSON_TYPE,
            u"x-upload-content-type": BASIC_CONTENT,
            u"x-upload-content-length": u"{:d}".format(100),
        }
        transport.request.assert_called_once_with(
            u"POST",
            RESUMABLE_URL,
            data=json_bytes,
            headers=expected_headers,
            timeout=12.6,
        )
Beispiel #3
0
 def _upload_in_flight(data, headers=None):
     upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB, headers=headers)
     upload._stream = io.BytesIO(data)
     upload._content_type = BASIC_CONTENT
     upload._total_bytes = len(data)
     upload._resumable_url = u"http://test.invalid?upload_id=not-none"
     return upload
Beispiel #4
0
    def test_initiate(self):
        upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB)
        data = b"Knock knock who is there"
        stream = io.BytesIO(data)
        metadata = {u"name": u"got-jokes.txt"}

        transport = mock.Mock(spec=["request"])
        location = (u"http://test.invalid?upload_id=AACODBBBxuw9u3AA", )
        response_headers = {u"location": location}
        post_response = _make_response(headers=response_headers)
        transport.request.return_value = post_response
        # Check resumable_url before.
        assert upload._resumable_url is None
        # Make request and check the return value (against the mock).
        total_bytes = 100
        assert total_bytes > len(data)
        response = upload.initiate(
            transport,
            stream,
            metadata,
            BASIC_CONTENT,
            total_bytes=total_bytes,
            stream_final=False,
        )
        assert response is transport.request.return_value
        # Check resumable_url after.
        assert upload._resumable_url == location
        # Make sure the mock was called as expected.
        json_bytes = b'{"name": "got-jokes.txt"}'
        expected_headers = {
            u"content-type": JSON_TYPE,
            u"x-upload-content-type": BASIC_CONTENT,
            u"x-upload-content-length": u"{:d}".format(total_bytes),
        }
        transport.request.assert_called_once_with(
            u"POST",
            RESUMABLE_URL,
            data=json_bytes,
            headers=expected_headers,
            timeout=EXPECTED_TIMEOUT,
        )
    def test_recover(self):
        upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB)
        upload._invalid = True  # Make sure invalid.
        upload._stream = mock.Mock(spec=[u'seek'])
        upload._resumable_url = u'http://test.invalid?upload_id=big-deal'

        end = 55555
        headers = {u'range': u'bytes=0-{:d}'.format(end)}
        transport = self._chunk_mock(resumable_media.PERMANENT_REDIRECT,
                                     headers)

        ret_val = upload.recover(transport)
        assert ret_val is transport.request.return_value
        # Check the state of ``upload`` after.
        assert upload.bytes_uploaded == end + 1
        assert not upload.invalid
        upload._stream.seek.assert_called_once_with(end + 1)
        expected_headers = {u'content-range': u'bytes */*'}
        transport.request.assert_called_once_with(u'PUT',
                                                  upload.resumable_url,
                                                  data=None,
                                                  headers=expected_headers)
    def test_initiate(self):
        upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB)
        data = b'Knock knock who is there'
        stream = io.BytesIO(data)
        metadata = {u'name': u'got-jokes.txt'}

        transport = mock.Mock(spec=[u'request'])
        location = u'http://test.invalid?upload_id=AACODBBBxuw9u3AA',
        response_headers = {u'location': location}
        post_response = _make_response(headers=response_headers)
        transport.request.return_value = post_response
        # Check resumable_url before.
        assert upload._resumable_url is None
        # Make request and check the return value (against the mock).
        total_bytes = 100
        assert total_bytes > len(data)
        response = upload.initiate(transport,
                                   stream,
                                   metadata,
                                   BASIC_CONTENT,
                                   total_bytes=total_bytes,
                                   stream_final=False)
        assert response is transport.request.return_value
        # Check resumable_url after.
        assert upload._resumable_url == location
        # Make sure the mock was called as expected.
        json_bytes = b'{"name": "got-jokes.txt"}'
        expected_headers = {
            u'content-type': JSON_TYPE,
            u'x-upload-content-type': BASIC_CONTENT,
            u'x-upload-content-length': u'{:d}'.format(total_bytes),
        }
        transport.request.assert_called_once_with(u'POST',
                                                  RESUMABLE_URL,
                                                  data=json_bytes,
                                                  headers=expected_headers)
def transcribe(source_audio_path: str, target_srt_path: str,
               progress_callback: Callable[[str, float]]):
    '''
    Accesses Google to transcribe a WAV file at `source_audio_path` and stores
    the text transcript in SubRip Subtitle (SRT) format at `target_srt_path`.
    
    Blocks the thread and reports progress regularly through the
    `progress_callback`.
    '''
    blob_name = path.basename(source_audio_path)
    # Upload the audio to Google Cloud Storage (GCS), which is required for
    # audios longer than 1 minute
    global _speech_client, _storage_bucket, _storage_client
    blob = _storage_bucket.blob(blob_name)
    resumable = upload.ResumableUpload(_UPLOAD_URL, 2**18)  # 256 KB chunks
    transport = requests.AuthorizedSession(_storage_client._credentials)
    with open(source_audio_path, 'rb') as audio:
        resumable.initiate(transport, audio, {'name': blob.name}, 'audio/wav')
        progress_callback('upload', 0)
        while not resumable.finished:
            resumable.transmit_next_chunk(transport)
            progress_callback('upload',
                              resumable.bytes_uploaded / resumable.total_bytes)
        progress_callback('upload', 1)
    # Request an online transcription
    with wave.open(source_audio_path, 'rb') as audio:  # rb for read binary
        frame_rate = audio.getframerate()
    config = speech.RecognitionConfig(
        enable_word_time_offsets=True,
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        language_code='en-US',
        sample_rate_hertz=frame_rate)
    audio = speech.RecognitionAudio(uri=_ACCESS_URI_FORMAT % blob_name)
    operation = _speech_client.long_running_recognize(config=config,
                                                      audio=audio)
    while not operation.done():
        progress_callback('transcribe',
                          operation.metadata.progress_percent * .01)
        time.sleep(_PROGRESS_INTERVAL)
    # Delete the uploaded audio to save cloud storage
    blob.delete()
    # Store the transcription
    line = []
    line_index = 1
    line_start_time = line_end_time = 0  # all in milliseconds
    with open(target_srt_path, 'w') as target:
        for res in operation.result().results:
            for word_data in res.alternatives[0].words:
                word_start_time = (word_data.start_time.seconds * 10**3 +
                                   word_data.start_time.microseconds // 10**3)
                word_end_time = (word_data.end_time.seconds * 10**3 +
                                 word_data.end_time.microseconds // 10**3)
                if (len(line) == _MAX_NUM_WORDS_IN_LINE or line
                        and word_start_time - line_end_time >= _LINE_INTERVAL):
                    _write_line(target, line, line_index, line_start_time,
                                line_end_time)
                    line.clear()
                    line_index += 1
                    line_start_time = word_start_time
                line.append(word_data.word)
                line_end_time = word_end_time
        if line:
            _write_line(target, line, line_index, line_start_time,
                        line_end_time)