def test_recover(self): upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB) upload._invalid = True # Make sure invalid. upload._stream = mock.Mock(spec=["seek"]) upload._resumable_url = u"http://test.invalid?upload_id=big-deal" end = 55555 headers = {u"range": u"bytes=0-{:d}".format(end)} transport = self._chunk_mock(resumable_media.PERMANENT_REDIRECT, headers) ret_val = upload.recover(transport) assert ret_val is transport.request.return_value # Check the state of ``upload`` after. assert upload.bytes_uploaded == end + 1 assert not upload.invalid upload._stream.seek.assert_called_once_with(end + 1) expected_headers = {u"content-range": u"bytes */*"} transport.request.assert_called_once_with( u"PUT", upload.resumable_url, data=None, headers=expected_headers, timeout=EXPECTED_TIMEOUT, )
def test_initiate_w_custom_timeout(self): upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB) data = b"Knock knock who is there" stream = io.BytesIO(data) metadata = {u"name": u"got-jokes.txt"} transport = mock.Mock(spec=["request"]) location = (u"http://test.invalid?upload_id=AACODBBBxuw9u3AA", ) response_headers = {u"location": location} post_response = _make_response(headers=response_headers) transport.request.return_value = post_response upload.initiate( transport, stream, metadata, BASIC_CONTENT, total_bytes=100, timeout=12.6, ) # Make sure timeout was passed to the transport json_bytes = b'{"name": "got-jokes.txt"}' expected_headers = { u"content-type": JSON_TYPE, u"x-upload-content-type": BASIC_CONTENT, u"x-upload-content-length": u"{:d}".format(100), } transport.request.assert_called_once_with( u"POST", RESUMABLE_URL, data=json_bytes, headers=expected_headers, timeout=12.6, )
def _upload_in_flight(data, headers=None): upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB, headers=headers) upload._stream = io.BytesIO(data) upload._content_type = BASIC_CONTENT upload._total_bytes = len(data) upload._resumable_url = u"http://test.invalid?upload_id=not-none" return upload
def test_initiate(self): upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB) data = b"Knock knock who is there" stream = io.BytesIO(data) metadata = {u"name": u"got-jokes.txt"} transport = mock.Mock(spec=["request"]) location = (u"http://test.invalid?upload_id=AACODBBBxuw9u3AA", ) response_headers = {u"location": location} post_response = _make_response(headers=response_headers) transport.request.return_value = post_response # Check resumable_url before. assert upload._resumable_url is None # Make request and check the return value (against the mock). total_bytes = 100 assert total_bytes > len(data) response = upload.initiate( transport, stream, metadata, BASIC_CONTENT, total_bytes=total_bytes, stream_final=False, ) assert response is transport.request.return_value # Check resumable_url after. assert upload._resumable_url == location # Make sure the mock was called as expected. json_bytes = b'{"name": "got-jokes.txt"}' expected_headers = { u"content-type": JSON_TYPE, u"x-upload-content-type": BASIC_CONTENT, u"x-upload-content-length": u"{:d}".format(total_bytes), } transport.request.assert_called_once_with( u"POST", RESUMABLE_URL, data=json_bytes, headers=expected_headers, timeout=EXPECTED_TIMEOUT, )
def test_recover(self): upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB) upload._invalid = True # Make sure invalid. upload._stream = mock.Mock(spec=[u'seek']) upload._resumable_url = u'http://test.invalid?upload_id=big-deal' end = 55555 headers = {u'range': u'bytes=0-{:d}'.format(end)} transport = self._chunk_mock(resumable_media.PERMANENT_REDIRECT, headers) ret_val = upload.recover(transport) assert ret_val is transport.request.return_value # Check the state of ``upload`` after. assert upload.bytes_uploaded == end + 1 assert not upload.invalid upload._stream.seek.assert_called_once_with(end + 1) expected_headers = {u'content-range': u'bytes */*'} transport.request.assert_called_once_with(u'PUT', upload.resumable_url, data=None, headers=expected_headers)
def test_initiate(self): upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB) data = b'Knock knock who is there' stream = io.BytesIO(data) metadata = {u'name': u'got-jokes.txt'} transport = mock.Mock(spec=[u'request']) location = u'http://test.invalid?upload_id=AACODBBBxuw9u3AA', response_headers = {u'location': location} post_response = _make_response(headers=response_headers) transport.request.return_value = post_response # Check resumable_url before. assert upload._resumable_url is None # Make request and check the return value (against the mock). total_bytes = 100 assert total_bytes > len(data) response = upload.initiate(transport, stream, metadata, BASIC_CONTENT, total_bytes=total_bytes, stream_final=False) assert response is transport.request.return_value # Check resumable_url after. assert upload._resumable_url == location # Make sure the mock was called as expected. json_bytes = b'{"name": "got-jokes.txt"}' expected_headers = { u'content-type': JSON_TYPE, u'x-upload-content-type': BASIC_CONTENT, u'x-upload-content-length': u'{:d}'.format(total_bytes), } transport.request.assert_called_once_with(u'POST', RESUMABLE_URL, data=json_bytes, headers=expected_headers)
def transcribe(source_audio_path: str, target_srt_path: str, progress_callback: Callable[[str, float]]): ''' Accesses Google to transcribe a WAV file at `source_audio_path` and stores the text transcript in SubRip Subtitle (SRT) format at `target_srt_path`. Blocks the thread and reports progress regularly through the `progress_callback`. ''' blob_name = path.basename(source_audio_path) # Upload the audio to Google Cloud Storage (GCS), which is required for # audios longer than 1 minute global _speech_client, _storage_bucket, _storage_client blob = _storage_bucket.blob(blob_name) resumable = upload.ResumableUpload(_UPLOAD_URL, 2**18) # 256 KB chunks transport = requests.AuthorizedSession(_storage_client._credentials) with open(source_audio_path, 'rb') as audio: resumable.initiate(transport, audio, {'name': blob.name}, 'audio/wav') progress_callback('upload', 0) while not resumable.finished: resumable.transmit_next_chunk(transport) progress_callback('upload', resumable.bytes_uploaded / resumable.total_bytes) progress_callback('upload', 1) # Request an online transcription with wave.open(source_audio_path, 'rb') as audio: # rb for read binary frame_rate = audio.getframerate() config = speech.RecognitionConfig( enable_word_time_offsets=True, encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, language_code='en-US', sample_rate_hertz=frame_rate) audio = speech.RecognitionAudio(uri=_ACCESS_URI_FORMAT % blob_name) operation = _speech_client.long_running_recognize(config=config, audio=audio) while not operation.done(): progress_callback('transcribe', operation.metadata.progress_percent * .01) time.sleep(_PROGRESS_INTERVAL) # Delete the uploaded audio to save cloud storage blob.delete() # Store the transcription line = [] line_index = 1 line_start_time = line_end_time = 0 # all in milliseconds with open(target_srt_path, 'w') as target: for res in operation.result().results: for word_data in res.alternatives[0].words: word_start_time = (word_data.start_time.seconds * 10**3 + word_data.start_time.microseconds // 10**3) word_end_time = (word_data.end_time.seconds * 10**3 + word_data.end_time.microseconds // 10**3) if (len(line) == _MAX_NUM_WORDS_IN_LINE or line and word_start_time - line_end_time >= _LINE_INTERVAL): _write_line(target, line, line_index, line_start_time, line_end_time) line.clear() line_index += 1 line_start_time = word_start_time line.append(word_data.word) line_end_time = word_end_time if line: _write_line(target, line, line_index, line_start_time, line_end_time)