def test_store_object(self): client = self.client client.connection.head_container = Mock() client.connection.put_object = Mock() gobj = client.get_object client.get_object = Mock(return_value=self.fake_object) content = u"something with ü†ƒ-8" etag = utils.get_checksum(content) obj = client.store_object(self.cont_name, self.obj_name, content, content_type="test/test", etag=etag, content_encoding="gzip") self.assertEqual(client.connection.put_object.call_count, 1) # Add extra_info response = {} obj = client.store_object(self.cont_name, self.obj_name, content, content_type="test/test", etag=etag, content_encoding="gzip", extra_info=response) client.connection.put_object.assert_called_with(ANY, ANY, contents=ANY, content_type=ANY, etag=ANY, headers=ANY, response_dict=response) client.get_object = gobj
def test_get_checksum_from_string(self): test = "some random text" md = hashlib.md5() md.update(test) expected = md.hexdigest() received = utils.get_checksum(test) self.assertEqual(expected, received)
def upload(fileobj, content_type, etag): if isinstance(fileobj, basestring): # This is an empty directory file fsize = 0 else: fsize = get_file_size(fileobj) if fsize < self.max_file_size: # We can just upload it as-is. return self.connection.put_object(cont.name, obj_name, contents=fileobj, content_type=content_type, etag=etag) # Files larger than self.max_file_size must be segmented # and uploaded separately. num_segments = int(math.ceil(float(fsize) / self.max_file_size)) digits = int(math.log10(num_segments)) + 1 # NOTE: This could be greatly improved with threading or other # async design. for segment in xrange(num_segments): sequence = str(segment + 1).zfill(digits) seg_name = "%s.%s" % (fname, sequence) with utils.SelfDeletingTempfile() as tmpname: with file(tmpname, "wb") as tmp: tmp.write(fileobj.read(self.max_file_size)) with file(tmpname, "rb") as tmp: # We have to calculate the etag for each segment etag = utils.get_checksum(tmp) self.connection.put_object(cont.name, seg_name, contents=tmp, content_type=content_type, etag=etag) # Upload the manifest hdr = {"X-Object-Meta-Manifest": "%s." % fname} return self.connection.put_object(cont.name, fname, contents=None, headers=hdr)
def test_get_checksum_from_string(self): test = utils.random_ascii().encode("ascii") md = hashlib.md5() md.update(test) expected = md.hexdigest() received = utils.get_checksum(test) self.assertEqual(expected, received)
def upload(fileobj, content_type, etag, headers): if isinstance(fileobj, basestring): # This is an empty directory file fsize = 0 else: fsize = get_file_size(fileobj) if fsize < self.max_file_size: # We can just upload it as-is. return self.connection.put_object(cont.name, obj_name, contents=fileobj, content_type=content_type, etag=etag, headers=headers) # Files larger than self.max_file_size must be segmented # and uploaded separately. num_segments = int(math.ceil(float(fsize) / self.max_file_size)) digits = int(math.log10(num_segments)) + 1 # NOTE: This could be greatly improved with threading or other # async design. for segment in xrange(num_segments): sequence = str(segment + 1).zfill(digits) seg_name = "%s.%s" % (fname, sequence) with utils.SelfDeletingTempfile() as tmpname: with open(tmpname, "wb") as tmp: tmp.write(fileobj.read(self.max_file_size)) with open(tmpname, "rb") as tmp: # We have to calculate the etag for each segment etag = utils.get_checksum(tmp) self.connection.put_object(cont.name, seg_name, contents=tmp, content_type=content_type, etag=etag, headers=headers) # Upload the manifest headers["X-Object-Meta-Manifest"] = "%s." % fname return self.connection.put_object(cont.name, fname, contents=None, headers=headers)
def test_get_checksum_from_binary(self): test = fakes.get_png_content() md = hashlib.md5() enc = "utf8" md.update(test) expected = md.hexdigest() received = utils.get_checksum(test) self.assertEqual(expected, received)
def test_get_checksum_from_unicode(self): test = utils.random_unicode() md = hashlib.md5() enc = "utf8" md.update(test.encode(enc)) expected = md.hexdigest() received = utils.get_checksum(test) self.assertEqual(expected, received)
def test_get_checksum_from_unicode_alt_encoding(self): test = u"some ñøñåßçîî text" md = hashlib.md5() enc = "Windows-1252" md.update(test.encode(enc)) expected = md.hexdigest() received = utils.get_checksum(test, enc) self.assertEqual(expected, received)
def test_get_checksum_from_binary(self): test = os.urandom(1024) md = hashlib.md5() enc = "utf8" md.update(test) expected = md.hexdigest() received = utils.get_checksum(test) self.assertEqual(expected, received)
def test_get_checksum_from_binary(self): # test = utils.random_name() # test = open("tests/unit/python-logo.png", "rb").read() test = fakes.png_file md = hashlib.md5() enc = "utf8" md.update(test) expected = md.hexdigest() received = utils.get_checksum(test) self.assertEqual(expected, received)
def test_get_checksum_from_file(self): test = "some random text" md = hashlib.md5() md.update(test) expected = md.hexdigest() with utils.SelfDeletingTempfile() as tmp: with open(tmp, "w") as testfile: testfile.write(test) with open(tmp, "r") as testfile: received = utils.get_checksum(testfile) self.assertEqual(expected, received)
def add_entry(base, path): node = FSNode(base, path) if path in manifest and node.mtime <= manifest[path]["mtime"]: return print "noype" file_hash = utils.get_checksum(os.path.join(base, path)) manifest[path] = ({ "md5": file_hash, "atime": node.atime, "ctime": node.ctime, "mtime": node.mtime })
def test_store_object(self): client = self.client client.connection.head_container = Mock() client.connection.put_object = Mock() gobj = client.get_object client.get_object = Mock(return_value=self.fake_object) content = u"something with ü†ƒ-8" etag = utils.get_checksum(content) obj = client.store_object(self.cont_name, self.obj_name, content, content_type="test/test", etag=etag) self.assertEqual(client.connection.put_object.call_count, 1) client.get_object = gobj
def test_store_object(self): cont = self.container cont.client.connection.head_container = Mock() cont.client.connection.put_object = Mock() gobj = cont.client.get_object cont.client.get_object = Mock(return_value=self.fake_object) content = "something" etag = utils.get_checksum(content) obj = cont.store_object(self.obj_name, content, content_type="test/test", etag=etag, content_encoding="gzip") self.assertEqual(cont.client.connection.put_object.call_count, 1) cont.client.get_object = gobj
def _sync_folder_to_container(self, folder_path, cont, prefix, delete, include_hidden, ignore, ignore_timestamps): """ This is the internal method that is called recursively to handle nested folder structures. """ fnames = os.listdir(folder_path) ignore = utils.coerce_string_to_list(ignore) if not include_hidden: ignore.append(".*") for fname in fnames: if utils.match_pattern(fname, ignore): continue pth = os.path.join(folder_path, fname) if os.path.isdir(pth): subprefix = fname if prefix: subprefix = "%s/%s" % (prefix, subprefix) self._sync_folder_to_container(pth, cont, prefix=subprefix, delete=delete, include_hidden=include_hidden, ignore=ignore, ignore_timestamps=ignore_timestamps) continue self._local_files.append(os.path.join(prefix, fname)) local_etag = utils.get_checksum(pth) fullname = fname if prefix: fullname = "%s/%s" % (prefix, fname) try: obj = cont.get_object(fullname) obj_etag = obj.etag except exc.NoSuchObject: obj = None obj_etag = None if local_etag != obj_etag: if not ignore_timestamps: if obj: obj_time_str = obj.last_modified[:19] else: obj_time_str = EARLY_DATE_STR local_mod = datetime.datetime.utcfromtimestamp( os.stat(pth).st_mtime) local_mod_str = local_mod.isoformat() if obj_time_str >= local_mod_str: # Remote object is newer continue cont.upload_file(pth, obj_name=fullname, etag=local_etag, return_none=True) if delete and not prefix: self._delete_objects_not_in_list(cont)
def delete_file(self, path, prefixed_path, source_storage): """ Compare files using hashes/etags vs modified time """ if self.storage.exists(prefixed_path): full_local_path = source_storage.path(prefixed_path) local_etag = get_checksum(full_local_path) remote_etag = self.storage.etag(prefixed_path) if local_etag == remote_etag: if prefixed_path not in self.unmodified_files: self.unmodified_files.append(prefixed_path) self.log("Skipping '%s' (not modified)" % path) return False # Then delete the existing file if really needed if self.dry_run: self.log("Pretending to delete '%s'" % path) else: self.log("Deleting '%s'" % path) self.storage.delete(prefixed_path) return True