def _testSeekForward(self, initial_seek): """Tests seeking to an initial position and then reading. This function simulates an upload that is resumed after a process break. It seeks from zero to the initial position (as if the server already had those bytes). Then it reads to the end of the file, ensuring the hash matches the original file upon completion. Args: initial_seek: Number of bytes to initially seek. Raises: AssertionError on wrong amount of data remaining or hash mismatch. """ tmp_file = self._GetTestFile() tmp_file_len = os.path.getsize(tmp_file) self.assertLess( initial_seek, tmp_file_len, 'initial_seek must be less than test file size %s ' '(but was actually: %s)' % (tmp_file_len, initial_seek)) digesters = {'md5': GetMd5()} with open(tmp_file, 'rb') as stream: wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': GetMd5}, self._dummy_url, self.logger) wrapper.seek(initial_seek) self.assertEqual(wrapper.tell(), initial_seek) data = wrapper.read() self.assertEqual(len(data), tmp_file_len - initial_seek) with open(tmp_file, 'rb') as stream: actual = CalculateMd5FromContents(stream) self.assertEqual(actual, digesters['md5'].hexdigest())
def testGetsMd5HashOnRedHatSystem(self, mock_md5): # Can't actually compare output to calling hashlib.md5 because that could # trigger an error on a non-Red Hat system. # Return one ValueError to simulate a FIPS-mode distribution. mock_md5.side_effect = [ValueError, 'hash'] self.assertEqual(GetMd5(b''), 'hash') self.assertEqual( mock_md5.mock_calls, [mock.call(b''), mock.call(b'', usedforsecurity=False)])
def testReadInChunks(self): tmp_file = self._GetTestFile() with open(tmp_file, 'rb') as stream: wrapper = ResumableStreamingJsonUploadWrapper( stream, TRANSFER_BUFFER_SIZE, test_small_buffer=True) hash_dict = {'md5': GetMd5()} # CalculateHashesFromContents reads in chunks, but does not seek. CalculateHashesFromContents(wrapper, hash_dict) with open(tmp_file, 'rb') as stream: actual = CalculateMd5FromContents(stream) self.assertEqual(actual, hash_dict['md5'].hexdigest())
def testReadToEOF(self): digesters = {'md5': GetMd5()} tmp_file = self.CreateTempFile(contents=b'a' * TRANSFER_BUFFER_SIZE * 4) with open(tmp_file, 'rb') as stream: wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': GetMd5}, self._dummy_url, self.logger) wrapper.read() with open(tmp_file, 'rb') as stream: actual = CalculateMd5FromContents(stream) self.assertEqual(actual, digesters['md5'].hexdigest())
def _testSeekBack(self, initial_position, seek_back_amount): """Tests reading then seeking backwards. This function simulates an upload that is resumed after a connection break. It reads one transfer buffer at a time until it reaches initial_position, then seeks backwards (as if the server did not receive some of the bytes) and reads to the end of the file, ensuring the hash matches the original file upon completion. Args: initial_position: Initial number of bytes to read before seek. seek_back_amount: Number of bytes to seek backward. Raises: AssertionError on wrong amount of data remaining or hash mismatch. """ tmp_file = self._GetTestFile() tmp_file_len = os.path.getsize(tmp_file) self.assertGreaterEqual( initial_position, seek_back_amount, 'seek_back_amount must be less than initial position %s ' '(but was actually: %s)' % (initial_position, seek_back_amount)) self.assertLess( initial_position, tmp_file_len, 'initial_position must be less than test file size %s ' '(but was actually: %s)' % (tmp_file_len, initial_position)) digesters = {'md5': GetMd5()} with open(tmp_file, 'rb') as stream: wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': GetMd5}, self._dummy_url, self.logger) position = 0 while position < initial_position - TRANSFER_BUFFER_SIZE: data = wrapper.read(TRANSFER_BUFFER_SIZE) position += len(data) wrapper.read(initial_position - position) wrapper.seek(initial_position - seek_back_amount) self.assertEqual(wrapper.tell(), initial_position - seek_back_amount) data = wrapper.read() self.assertEqual( len(data), tmp_file_len - (initial_position - seek_back_amount)) with open(tmp_file, 'rb') as stream: actual = CalculateMd5FromContents(stream) self.assertEqual(actual, digesters['md5'].hexdigest())
def testInvalidSeekAway(self): """Tests seeking to EOF and then reading without first doing a SEEK_SET.""" tmp_file = self._GetTestFile() digesters = {'md5': GetMd5()} with open(tmp_file, 'rb') as stream: wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': GetMd5}, self._dummy_url, self.logger) wrapper.read(TRANSFER_BUFFER_SIZE) wrapper.seek(0, os.SEEK_END) try: wrapper.read() self.fail('Expected CommandException for invalid seek.') except CommandException as e: self.assertIn( 'Read called on hashing file pointer in an unknown position', str(e))
def _testSeekAway(self, initial_read): """Tests reading to an initial position and then seeking to EOF and back. This function simulates an size check on the input file by seeking to the end of the file and then back to the current position. Then it reads to the end of the file, ensuring the hash matches the original file upon completion. Args: initial_read: Number of bytes to initially read. Raises: AssertionError on wrong amount of data remaining or hash mismatch. """ tmp_file = self._GetTestFile() tmp_file_len = os.path.getsize(tmp_file) self.assertLess( initial_read, tmp_file_len, 'initial_read must be less than test file size %s ' '(but was actually: %s)' % (tmp_file_len, initial_read)) digesters = {'md5': GetMd5()} with open(tmp_file, 'rb') as stream: wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': GetMd5}, self._dummy_url, self.logger) wrapper.read(initial_read) self.assertEqual(wrapper.tell(), initial_read) wrapper.seek(0, os.SEEK_END) self.assertEqual(wrapper.tell(), tmp_file_len) wrapper.seek(initial_read, os.SEEK_SET) data = wrapper.read() self.assertEqual(len(data), tmp_file_len - initial_read) with open(tmp_file, 'rb') as stream: actual = CalculateMd5FromContents(stream) self.assertEqual(actual, digesters['md5'].hexdigest())
def HashRewriteParameters(src_obj_metadata, dst_obj_metadata, projection, src_generation=None, gen_match=None, meta_gen_match=None, canned_acl=None, max_bytes_per_call=None, src_dec_key_sha256=None, dst_enc_key_sha256=None, fields=None): """Creates an MD5 hex digest of the parameters for a rewrite call. Resuming rewrites requires that the input parameters are identical. Thus, the rewrite tracker file needs to represent the input parameters. For easy comparison, hash the input values. If a user does a performs a same-source/same-destination rewrite via a different command (for example, with a changed ACL), the hashes will not match and we will restart the rewrite from the beginning. Args: src_obj_metadata: apitools Object describing source object. Must include bucket, name, and etag. dst_obj_metadata: apitools Object describing destination object. Must include bucket and object name projection: Projection used for the API call. src_generation: Optional source generation. gen_match: Optional generation precondition. meta_gen_match: Optional metageneration precondition. canned_acl: Optional canned ACL string. max_bytes_per_call: Optional maximum bytes rewritten per call. src_dec_key_sha256: Optional SHA256 hash string of decryption key for source object. dst_enc_key_sha256: Optional SHA256 hash string of encryption key for destination object. fields: Optional fields to include in response to call. Returns: MD5 hex digest Hash of the input parameters, or None if required parameters are missing. """ if (not src_obj_metadata or not src_obj_metadata.bucket or not src_obj_metadata.name or not src_obj_metadata.etag or not dst_obj_metadata or not dst_obj_metadata.bucket or not dst_obj_metadata.name or not projection): return md5_hash = GetMd5() for input_param in ( src_obj_metadata, dst_obj_metadata, projection, src_generation, gen_match, meta_gen_match, canned_acl, fields, max_bytes_per_call, src_dec_key_sha256, dst_enc_key_sha256, ): # Tracker file matching changed between gsutil 4.15 -> 4.16 and will cause # rewrites to start over from the beginning on a gsutil version upgrade. if input_param is not None: md5_hash.update(six.text_type(input_param).encode('UTF8')) return md5_hash.hexdigest()
def testGetsMd5HashOnNonRedHatSystem(self, mock_md5): # Can't actually compare output to calling hashlib.md5 because that could # trigger an error on a Red Hat system. mock_md5.return_value = 'hash' self.assertEqual(GetMd5(b''), 'hash') mock_md5.assert_called_once_with(b'')