def test_hash_path(self): # Yes, these tests are deliberately very fragile. We want to make sure # that if someones changes the results hash_path produces, they know it self.assertEquals(utils.hash_path('a'), '1c84525acb02107ea475dcd3d09c2c58') self.assertEquals(utils.hash_path('a', 'c'), '33379ecb053aa5c9e356c68997cbb59e') self.assertEquals(utils.hash_path('a', 'c', 'o'), '06fbf0b514e5199dfc4e00f42eb5ea83') self.assertEquals(utils.hash_path('a', 'c', 'o', raw_digest=False), '06fbf0b514e5199dfc4e00f42eb5ea83') self.assertEquals(utils.hash_path('a', 'c', 'o', raw_digest=True), '\x06\xfb\xf0\xb5\x14\xe5\x19\x9d\xfcN\x00\xf4.\xb5\xea\x83') self.assertRaises(ValueError, utils.hash_path, 'a', object='o')
def test_hash_path(self): # Yes, these tests are deliberately very fragile. We want to make sure # that if someones changes the results hash_path produces, they know it self.assertEquals(utils.hash_path('a'), '1c84525acb02107ea475dcd3d09c2c58') self.assertEquals(utils.hash_path('a', 'c'), '33379ecb053aa5c9e356c68997cbb59e') self.assertEquals(utils.hash_path('a', 'c', 'o'), '06fbf0b514e5199dfc4e00f42eb5ea83') self.assertEquals(utils.hash_path('a', 'c', 'o', raw_digest=False), '06fbf0b514e5199dfc4e00f42eb5ea83') self.assertEquals( utils.hash_path('a', 'c', 'o', raw_digest=True), '\x06\xfb\xf0\xb5\x14\xe5\x19\x9d\xfcN\x00\xf4.\xb5\xea\x83') self.assertRaises(ValueError, utils.hash_path, 'a', object='o')
def test_run_once(self): replicator = object_replicator.ObjectReplicator( dict(chase_dir=self.testdir, devices=self.devices, mount_check='false', timeout='300', stats_interval='1')) was_connector = object_replicator.http_connect object_replicator.http_connect = mock_http_connect(200) cur_part = '0' df = DiskFile(self.devices, 'sda', cur_part, 'a', 'c', 'o', FakeLogger()) mkdirs(df.datadir) f = open( os.path.join(df.datadir, normalize_timestamp(time.time()) + '.data'), 'wb') f.write('1234567890') f.close() ohash = hash_path('a', 'c', 'o') data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, cur_part, data_dir) process_arg_checker = [] nodes = [node for node in self.ring.get_part_nodes(int(cur_part)) \ if node['ip'] not in _ips()] for node in nodes: rsync_mod = '[%s]::object/sda/objects/%s' % (node['ip'], cur_part) process_arg_checker.append( (0, '', ['rsync', whole_path_from, rsync_mod])) with _mock_process(process_arg_checker): replicator.run_once() self.assertFalse(process_errors) object_replicator.http_connect = was_connector
def get_nodes(self, account, container=None, obj=None): """ Get the partition and nodes for an account/container/object. :param account: account name :param container: container name :param obj: object name :returns: a tuple of (partition, list of node dicts) Each node dict will have at least the following keys: ====== =============================================================== id unique integer identifier amongst devices weight a float of the relative weight of this device as compared to others; this indicates how many partitions the builder will try to assign to this device zone integer indicating which zone the device is in; a given partition will not be assigned to multiple devices within the same zone ip the ip address of the device port the tcp port of the device device the device's name on disk (sdb1, for example) meta general use 'extra' field; for example: the online date, the hardware description ====== =============================================================== """ key = hash_path(account, container, obj, raw_digest=True) if time() > self._rtime: self._reload() part = unpack_from('>I', key)[0] >> self._part_shift return part, [self.devs[r[part]] for r in self._replica2part2dev_id]
def test_hash_suffix_multi_file_two(self): df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger()) mkdirs(df.datadir) for tdiff in [1, 50, 100, 500]: suffs = ['.meta', '.data'] if tdiff > 50: suffs.append('.ts') for suff in suffs: f = open( os.path.join( df.datadir, normalize_timestamp(int(time.time()) - tdiff) + suff), 'wb') f.write('1234567890') f.close() ohash = hash_path('a', 'c', 'o') data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, '0', data_dir) hsh_path = os.listdir(whole_path_from)[0] whole_hsh_path = os.path.join(whole_path_from, hsh_path) object_replicator.hash_suffix(whole_path_from, 99) # only the meta and data should be left self.assertEquals(len(os.listdir(whole_hsh_path)), 2)
def test_run_once_recover_from_timeout(self): replicator = object_replicator.ObjectReplicator( dict(chase_dir=self.testdir, devices=self.devices, mount_check='false', timeout='300', stats_interval='1')) was_connector = object_replicator.http_connect was_get_hashes = object_replicator.get_hashes was_execute = tpool.execute self.get_hash_count = 0 try: def fake_get_hashes(*args, **kwargs): self.get_hash_count += 1 if self.get_hash_count == 3: # raise timeout on last call to get hashes raise Timeout() return 2, {'abc': 'def'} def fake_exc(tester, *args, **kwargs): if 'Error syncing partition' in args[0]: tester.i_failed = True self.i_failed = False object_replicator.http_connect = mock_http_connect(200) object_replicator.get_hashes = fake_get_hashes replicator.logger.exception = \ lambda *args, **kwargs: fake_exc(self, *args, **kwargs) # Write some files into '1' and run replicate- they should be moved # to the other partitoins and then node should get deleted. cur_part = '1' df = DiskFile(self.devices, 'sda', cur_part, 'a', 'c', 'o', FakeLogger()) mkdirs(df.datadir) f = open(os.path.join(df.datadir, normalize_timestamp(time.time()) + '.data'), 'wb') f.write('1234567890') f.close() ohash = hash_path('a', 'c', 'o') data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, cur_part, data_dir) process_arg_checker = [] nodes = [node for node in self.ring.get_part_nodes(int(cur_part)) \ if node['ip'] not in _ips()] for node in nodes: rsync_mod = '[%s]::object/sda/objects/%s' % (node['ip'], cur_part) process_arg_checker.append( (0, '', ['rsync', whole_path_from, rsync_mod])) self.assertTrue(os.access(os.path.join(self.objects, '1', data_dir, ohash), os.F_OK)) with _mock_process(process_arg_checker): replicator.run_once() self.assertFalse(process_errors) self.assertFalse(self.i_failed) finally: object_replicator.http_connect = was_connector object_replicator.get_hashes = was_get_hashes tpool.execute = was_execute
def test_invalidate_hash(self): def assertFileData(file_path, data): with open(file_path, 'r') as fp: fdata = fp.read() self.assertEquals(pickle.loads(fdata), pickle.loads(data)) df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger()) mkdirs(df.datadir) ohash = hash_path('a', 'c', 'o') data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, '0', data_dir) hashes_file = os.path.join(self.objects, '0', object_replicator.HASH_FILE) # test that non existant file except caught self.assertEquals(object_replicator.invalidate_hash(whole_path_from), None) # test that hashes get cleared check_pickle_data = pickle.dumps({data_dir: None}, object_replicator.PICKLE_PROTOCOL) for data_hash in [{data_dir: None}, {data_dir: 'abcdefg'}]: with open(hashes_file, 'wb') as fp: pickle.dump(data_hash, fp, object_replicator.PICKLE_PROTOCOL) object_replicator.invalidate_hash(whole_path_from) assertFileData(hashes_file, check_pickle_data)
def test_run_once(self): replicator = object_replicator.ObjectReplicator( dict(chase_dir=self.testdir, devices=self.devices, mount_check='false', timeout='300', stats_interval='1')) was_connector = object_replicator.http_connect object_replicator.http_connect = mock_http_connect(200) cur_part = '0' df = DiskFile(self.devices, 'sda', cur_part, 'a', 'c', 'o', FakeLogger()) mkdirs(df.datadir) f = open(os.path.join(df.datadir, normalize_timestamp(time.time()) + '.data'), 'wb') f.write('1234567890') f.close() ohash = hash_path('a', 'c', 'o') data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, cur_part, data_dir) process_arg_checker = [] nodes = [node for node in self.ring.get_part_nodes(int(cur_part)) \ if node['ip'] not in _ips()] for node in nodes: rsync_mod = '[%s]::object/sda/objects/%s' % (node['ip'], cur_part) process_arg_checker.append( (0, '', ['rsync', whole_path_from, rsync_mod])) with _mock_process(process_arg_checker): replicator.run_once() self.assertFalse(process_errors) object_replicator.http_connect = was_connector
def setup_bad_zero_byte(self, with_ts=False): self.auditor = auditor.ObjectAuditor(self.conf) self.auditor.log_time = 0 ts_file_path = '' if with_ts: name_hash = hash_path('a', 'c', 'o') dir_path = os.path.join(self.devices, 'sda', storage_directory(DATADIR, '0', name_hash)) ts_file_path = os.path.join(dir_path, '99999.ts') if not os.path.exists(dir_path): mkdirs(dir_path) fp = open(ts_file_path, 'w') fp.close() etag = md5() with self.disk_file.mkstemp() as (fd, tmppath): etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': str(normalize_timestamp(time.time())), 'Content-Length': 10, } self.disk_file.put(fd, tmppath, metadata) etag = md5() etag = etag.hexdigest() metadata['ETag'] = etag write_metadata(fd, metadata) if self.disk_file.data_file: return self.disk_file.data_file return ts_file_path
def test_delete_partition(self): df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger()) mkdirs(df.datadir) ohash = hash_path('a', 'c', 'o') data_dir = ohash[-3:] part_path = os.path.join(self.objects, '1') self.assertTrue(os.access(part_path, os.F_OK)) self.replicator.replicate() self.assertFalse(os.access(part_path, os.F_OK))
def test_run_once(self): cu = object_updater.ObjectUpdater({ 'devices': self.devices_dir, 'mount_check': 'false', 'chase_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'node_timeout': '15', }) cu.run_once() async_dir = os.path.join(self.sda1, object_server.ASYNCDIR) os.mkdir(async_dir) cu.run_once() self.assert_(os.path.exists(async_dir)) odd_dir = os.path.join(async_dir, 'not really supposed to be here') os.mkdir(odd_dir) cu.run_once() self.assert_(os.path.exists(async_dir)) self.assert_(not os.path.exists(odd_dir)) ohash = hash_path('a', 'c', 'o') odir = os.path.join(async_dir, ohash[-3:]) mkdirs(odir) op_path = os.path.join(odir, '%s-%s' % (ohash, normalize_timestamp(time()))) pickle.dump({'op': 'PUT', 'account': 'a', 'container': 'c', 'obj': 'o', 'headers': {'X-Container-Timestamp': normalize_timestamp(0)}}, open(op_path, 'wb')) cu.run_once() self.assert_(os.path.exists(op_path)) bindsock = listen(('127.0.0.1', 0)) def accepter(sock, return_code): try: with Timeout(3): inc = sock.makefile('rb') out = sock.makefile('wb') out.write('HTTP/1.1 %d OK\r\nContent-Length: 0\r\n\r\n' % return_code) out.flush() self.assertEquals(inc.readline(), 'PUT /sda1/0/a/c/o HTTP/1.1\r\n') headers = {} line = inc.readline() while line and line != '\r\n': headers[line.split(':')[0].lower()] = \ line.split(':')[1].strip() line = inc.readline() self.assert_('x-container-timestamp' in headers) except BaseException, err: return err return None
def __init__(self, path, device, partition, account, container, obj, logger, keep_data_fp=False, disk_chunk_size=65536): self.disk_chunk_size = disk_chunk_size self.name = '/' + '/'.join((account, container, obj)) name_hash = hash_path(account, container, obj) self.datadir = os.path.join( path, device, storage_directory(DATADIR, partition, name_hash)) self.device_path = os.path.join(path, device) self.tmpdir = os.path.join(path, device, 'tmp') self.logger = logger self.metadata = {} self.meta_file = None self.data_file = None self.fp = None self.iter_etag = None self.started_at_0 = False self.read_to_eof = False self.quarantined_dir = None self.keep_cache = False if not os.path.exists(self.datadir): return files = sorted(os.listdir(self.datadir), reverse=True) for file in files: if file.endswith('.ts'): self.data_file = self.meta_file = None self.metadata = {'deleted': True} return if file.endswith('.meta') and not self.meta_file: self.meta_file = os.path.join(self.datadir, file) if file.endswith('.data') and not self.data_file: self.data_file = os.path.join(self.datadir, file) break if not self.data_file: return self.fp = open(self.data_file, 'rb') self.metadata = read_metadata(self.fp) if not keep_data_fp: self.close(verify_file=False) if self.meta_file: with open(self.meta_file) as mfp: for key in self.metadata.keys(): if key.lower() not in DISALLOWED_HEADERS: del self.metadata[key] self.metadata.update(read_metadata(mfp))
def _get_container_broker(self, drive, part, account, container): """ Get a DB broker for the container. :param drive: drive that holds the container :param part: partition the container is in :param account: account name :param container: container name :returns: ContainerBroker object """ hsh = hash_path(account, container) db_dir = storage_directory(DATADIR, part, hsh) db_path = os.path.join(self.root, drive, db_dir, hsh + '.db') return ContainerBroker(db_path, account=account, container=container, logger=self.logger)
def _setup_data_file(self, container, obj, data): client.put_container(self.url, self.token, container) client.put_object(self.url, self.token, container, obj, data) odata = client.get_object(self.url, self.token, container, obj)[-1] self.assertEquals(odata, data) opart, onodes = self.object_ring.get_nodes(self.account, container, obj) onode = onodes[0] node_id = (onode["port"] - 6000) / 10 device = onode["device"] hash_str = hash_path(self.account, container, obj) obj_server_conf = readconf("/etc/chase/object-server/%s.conf" % node_id) devices = obj_server_conf["app:object-server"]["devices"] obj_dir = "%s/%s/objects/%s/%s/%s/" % (devices, device, opart, hash_str[-3:], hash_str) data_file = self._get_data_file_path(obj_dir) return onode, opart, data_file
def test_run_once_recover_from_failure(self): replicator = object_replicator.ObjectReplicator( dict(chase_dir=self.testdir, devices=self.devices, mount_check='false', timeout='300', stats_interval='1')) was_connector = object_replicator.http_connect try: object_replicator.http_connect = mock_http_connect(200) # Write some files into '1' and run replicate- they should be moved # to the other partitoins and then node should get deleted. cur_part = '1' df = DiskFile(self.devices, 'sda', cur_part, 'a', 'c', 'o', FakeLogger()) mkdirs(df.datadir) f = open( os.path.join(df.datadir, normalize_timestamp(time.time()) + '.data'), 'wb') f.write('1234567890') f.close() ohash = hash_path('a', 'c', 'o') data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, cur_part, data_dir) process_arg_checker = [] nodes = [node for node in self.ring.get_part_nodes(int(cur_part)) \ if node['ip'] not in _ips()] for node in nodes: rsync_mod = '[%s]::object/sda/objects/%s' % (node['ip'], cur_part) process_arg_checker.append( (0, '', ['rsync', whole_path_from, rsync_mod])) self.assertTrue( os.access(os.path.join(self.objects, '1', data_dir, ohash), os.F_OK)) with _mock_process(process_arg_checker): replicator.run_once() self.assertFalse(process_errors) for i, result in [('0', True), ('1', False), ('2', True), ('3', True)]: self.assertEquals( os.access( os.path.join(self.objects, i, object_replicator.HASH_FILE), os.F_OK), result) finally: object_replicator.http_connect = was_connector
def test_hash_suffix_one_file(self): df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger()) mkdirs(df.datadir) f = open(os.path.join(df.datadir, normalize_timestamp(time.time() - 100) + '.ts'), 'wb') f.write('1234567890') f.close() ohash = hash_path('a', 'c', 'o') data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, '0', data_dir) object_replicator.hash_suffix(whole_path_from, 101) self.assertEquals(len(os.listdir(self.parts['0'])), 1) object_replicator.hash_suffix(whole_path_from, 99) self.assertEquals(len(os.listdir(self.parts['0'])), 0)
def test_hash_suffix_one_file(self): df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger()) mkdirs(df.datadir) f = open( os.path.join(df.datadir, normalize_timestamp(time.time() - 100) + '.ts'), 'wb') f.write('1234567890') f.close() ohash = hash_path('a', 'c', 'o') data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, '0', data_dir) object_replicator.hash_suffix(whole_path_from, 101) self.assertEquals(len(os.listdir(self.parts['0'])), 1) object_replicator.hash_suffix(whole_path_from, 99) self.assertEquals(len(os.listdir(self.parts['0'])), 0)
def _get_container_db_files(self, container): opart, onodes = self.container_ring.get_nodes(self.account, container) onode = onodes[0] db_files = [] for onode in onodes: node_id = (onode['port'] - 6000) / 10 device = onode['device'] hash_str = hash_path(self.account, container) server_conf = readconf('/etc/chase/container-server/%s.conf' % node_id) devices = server_conf['app:container-server']['devices'] obj_dir = '%s/%s/containers/%s/%s/%s/' % (devices, device, opart, hash_str[-3:], hash_str) db_files.append(self._get_db_file_path(obj_dir)) return db_files
def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice): """ Sends or saves an async update. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param host: host that the container is on :param partition: partition that the container is on :param contdevice: device name that the container is on :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in """ full_path = '/%s/%s/%s' % (account, container, obj) if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(':', 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if 200 <= response.status < 300: return else: self.logger.error(_('ERROR Container update failed ' '(saving for async update later): %(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), {'status': response.status, 'ip': ip, 'port': port, 'dev': contdevice}) except (Exception, Timeout): self.logger.exception(_('ERROR container update failed with ' '%(ip)s:%(port)s/%(dev)s (saving for async update later)'), {'ip': ip, 'port': port, 'dev': contdevice}) async_dir = os.path.join(self.devices, objdevice, ASYNCDIR) ohash = hash_path(account, container, obj) write_pickle( {'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out}, os.path.join(async_dir, ohash[-3:], ohash + '-' + normalize_timestamp(headers_out['x-timestamp'])), os.path.join(self.devices, objdevice, 'tmp'))
def _setup_data_file(self, container, obj, data): client.put_container(self.url, self.token, container) client.put_object(self.url, self.token, container, obj, data) odata = client.get_object(self.url, self.token, container, obj)[-1] self.assertEquals(odata, data) opart, onodes = self.object_ring.get_nodes(self.account, container, obj) onode = onodes[0] node_id = (onode['port'] - 6000) / 10 device = onode['device'] hash_str = hash_path(self.account, container, obj) obj_server_conf = readconf('/etc/chase/object-server/%s.conf' % node_id) devices = obj_server_conf['app:object-server']['devices'] obj_dir = '%s/%s/objects/%s/%s/%s/' % (devices, device, opart, hash_str[-3:], hash_str) data_file = self._get_data_file_path(obj_dir) return onode, opart, data_file
def __init__(self, path, device, partition, account, container, obj, logger, keep_data_fp=False, disk_chunk_size=65536): self.disk_chunk_size = disk_chunk_size self.name = '/' + '/'.join((account, container, obj)) name_hash = hash_path(account, container, obj) self.datadir = os.path.join(path, device, storage_directory(DATADIR, partition, name_hash)) self.device_path = os.path.join(path, device) self.tmpdir = os.path.join(path, device, 'tmp') self.logger = logger self.metadata = {} self.meta_file = None self.data_file = None self.fp = None self.iter_etag = None self.started_at_0 = False self.read_to_eof = False self.quarantined_dir = None self.keep_cache = False if not os.path.exists(self.datadir): return files = sorted(os.listdir(self.datadir), reverse=True) for file in files: if file.endswith('.ts'): self.data_file = self.meta_file = None self.metadata = {'deleted': True} return if file.endswith('.meta') and not self.meta_file: self.meta_file = os.path.join(self.datadir, file) if file.endswith('.data') and not self.data_file: self.data_file = os.path.join(self.datadir, file) break if not self.data_file: return self.fp = open(self.data_file, 'rb') self.metadata = read_metadata(self.fp) if not keep_data_fp: self.close(verify_file=False) if self.meta_file: with open(self.meta_file) as mfp: for key in self.metadata.keys(): if key.lower() not in DISALLOWED_HEADERS: del self.metadata[key] self.metadata.update(read_metadata(mfp))
def test_run_once_recover_from_failure(self): replicator = object_replicator.ObjectReplicator( dict(chase_dir=self.testdir, devices=self.devices, mount_check='false', timeout='300', stats_interval='1')) was_connector = object_replicator.http_connect try: object_replicator.http_connect = mock_http_connect(200) # Write some files into '1' and run replicate- they should be moved # to the other partitoins and then node should get deleted. cur_part = '1' df = DiskFile(self.devices, 'sda', cur_part, 'a', 'c', 'o', FakeLogger()) mkdirs(df.datadir) f = open(os.path.join(df.datadir, normalize_timestamp(time.time()) + '.data'), 'wb') f.write('1234567890') f.close() ohash = hash_path('a', 'c', 'o') data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, cur_part, data_dir) process_arg_checker = [] nodes = [node for node in self.ring.get_part_nodes(int(cur_part)) \ if node['ip'] not in _ips()] for node in nodes: rsync_mod = '[%s]::object/sda/objects/%s' % (node['ip'], cur_part) process_arg_checker.append( (0, '', ['rsync', whole_path_from, rsync_mod])) self.assertTrue(os.access(os.path.join(self.objects, '1', data_dir, ohash), os.F_OK)) with _mock_process(process_arg_checker): replicator.run_once() self.assertFalse(process_errors) for i, result in [('0', True), ('1', False), ('2', True), ('3', True)]: self.assertEquals(os.access( os.path.join(self.objects, i, object_replicator.HASH_FILE), os.F_OK), result) finally: object_replicator.http_connect = was_connector
def test_hash_suffix_hash_dir_is_file_quarantine(self): df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger()) mkdirs(os.path.dirname(df.datadir)) open(df.datadir, 'wb').close() ohash = hash_path('a', 'c', 'o') data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, '0', data_dir) orig_quarantine_renamer = object_replicator.quarantine_renamer called = [False] def wrapped(*args, **kwargs): called[0] = True return orig_quarantine_renamer(*args, **kwargs) try: object_replicator.quarantine_renamer = wrapped object_replicator.hash_suffix(whole_path_from, 101) finally: object_replicator.quarantine_renamer = orig_quarantine_renamer self.assertTrue(called[0])
def test_hash_suffix_multi_file_one(self): df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger()) mkdirs(df.datadir) for tdiff in [1, 50, 100, 500]: for suff in ['.meta', '.data', '.ts']: f = open(os.path.join(df.datadir, normalize_timestamp(int(time.time()) - tdiff) + suff), 'wb') f.write('1234567890') f.close() ohash = hash_path('a', 'c', 'o') data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, '0', data_dir) hsh_path = os.listdir(whole_path_from)[0] whole_hsh_path = os.path.join(whole_path_from, hsh_path) object_replicator.hash_suffix(whole_path_from, 99) # only the tombstone should be left self.assertEquals(len(os.listdir(whole_hsh_path)), 1)
def test_object_sweep(self): prefix_dir = os.path.join(self.sda1, ASYNCDIR, 'abc') mkpath(prefix_dir) objects = { 'a': [1089.3, 18.37, 12.83, 1.3], 'b': [49.4, 49.3, 49.2, 49.1], 'c': [109984.123], } expected = set() for o, timestamps in objects.iteritems(): ohash = hash_path('account', 'container', o) for t in timestamps: o_path = os.path.join(prefix_dir, ohash + '-' + normalize_timestamp(t)) if t == timestamps[0]: expected.add(o_path) write_pickle({}, o_path) seen = set() class MockObjectUpdater(object_updater.ObjectUpdater): def process_object_update(self, update_path, device): seen.add(update_path) os.unlink(update_path) cu = MockObjectUpdater({ 'devices': self.devices_dir, 'mount_check': 'false', 'chase_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'node_timeout': '5', }) cu.object_sweep(self.sda1) self.assert_(not os.path.exists(prefix_dir)) self.assertEqual(expected, seen)
def test_run_once(self): cu = object_updater.ObjectUpdater({ 'devices': self.devices_dir, 'mount_check': 'false', 'chase_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'node_timeout': '15', }) cu.run_once() async_dir = os.path.join(self.sda1, object_server.ASYNCDIR) os.mkdir(async_dir) cu.run_once() self.assert_(os.path.exists(async_dir)) odd_dir = os.path.join(async_dir, 'not really supposed to be here') os.mkdir(odd_dir) cu.run_once() self.assert_(os.path.exists(async_dir)) self.assert_(not os.path.exists(odd_dir)) ohash = hash_path('a', 'c', 'o') odir = os.path.join(async_dir, ohash[-3:]) mkdirs(odir) op_path = os.path.join(odir, '%s-%s' % (ohash, normalize_timestamp(time()))) pickle.dump( { 'op': 'PUT', 'account': 'a', 'container': 'c', 'obj': 'o', 'headers': { 'X-Container-Timestamp': normalize_timestamp(0) } }, open(op_path, 'wb')) cu.run_once() self.assert_(os.path.exists(op_path)) bindsock = listen(('127.0.0.1', 0)) def accepter(sock, return_code): try: with Timeout(3): inc = sock.makefile('rb') out = sock.makefile('wb') out.write('HTTP/1.1 %d OK\r\nContent-Length: 0\r\n\r\n' % return_code) out.flush() self.assertEquals(inc.readline(), 'PUT /sda1/0/a/c/o HTTP/1.1\r\n') headers = {} line = inc.readline() while line and line != '\r\n': headers[line.split(':')[0].lower()] = \ line.split(':')[1].strip() line = inc.readline() self.assert_('x-container-timestamp' in headers) except BaseException, err: return err return None
def test_run_once_recover_from_timeout(self): replicator = object_replicator.ObjectReplicator( dict(chase_dir=self.testdir, devices=self.devices, mount_check='false', timeout='300', stats_interval='1')) was_connector = object_replicator.http_connect was_get_hashes = object_replicator.get_hashes was_execute = tpool.execute self.get_hash_count = 0 try: def fake_get_hashes(*args, **kwargs): self.get_hash_count += 1 if self.get_hash_count == 3: # raise timeout on last call to get hashes raise Timeout() return 2, {'abc': 'def'} def fake_exc(tester, *args, **kwargs): if 'Error syncing partition' in args[0]: tester.i_failed = True self.i_failed = False object_replicator.http_connect = mock_http_connect(200) object_replicator.get_hashes = fake_get_hashes replicator.logger.exception = \ lambda *args, **kwargs: fake_exc(self, *args, **kwargs) # Write some files into '1' and run replicate- they should be moved # to the other partitoins and then node should get deleted. cur_part = '1' df = DiskFile(self.devices, 'sda', cur_part, 'a', 'c', 'o', FakeLogger()) mkdirs(df.datadir) f = open( os.path.join(df.datadir, normalize_timestamp(time.time()) + '.data'), 'wb') f.write('1234567890') f.close() ohash = hash_path('a', 'c', 'o') data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, cur_part, data_dir) process_arg_checker = [] nodes = [node for node in self.ring.get_part_nodes(int(cur_part)) \ if node['ip'] not in _ips()] for node in nodes: rsync_mod = '[%s]::object/sda/objects/%s' % (node['ip'], cur_part) process_arg_checker.append( (0, '', ['rsync', whole_path_from, rsync_mod])) self.assertTrue( os.access(os.path.join(self.objects, '1', data_dir, ohash), os.F_OK)) with _mock_process(process_arg_checker): replicator.run_once() self.assertFalse(process_errors) self.assertFalse(self.i_failed) finally: object_replicator.http_connect = was_connector object_replicator.get_hashes = was_get_hashes tpool.execute = was_execute
def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice): """ Sends or saves an async update. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param host: host that the container is on :param partition: partition that the container is on :param contdevice: device name that the container is on :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in """ full_path = '/%s/%s/%s' % (account, container, obj) if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(':', 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if 200 <= response.status < 300: return else: self.logger.error( _('ERROR Container update failed ' '(saving for async update later): %(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), { 'status': response.status, 'ip': ip, 'port': port, 'dev': contdevice }) except (Exception, Timeout): self.logger.exception( _('ERROR container update failed with ' '%(ip)s:%(port)s/%(dev)s (saving for async update later)' ), { 'ip': ip, 'port': port, 'dev': contdevice }) async_dir = os.path.join(self.devices, objdevice, ASYNCDIR) ohash = hash_path(account, container, obj) write_pickle( { 'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out }, os.path.join( async_dir, ohash[-3:], ohash + '-' + normalize_timestamp(headers_out['x-timestamp'])), os.path.join(self.devices, objdevice, 'tmp'))
def container_sync(self, path): """ Checks the given path for a container database, determines if syncing is turned on for that database and, if so, sends any updates to the other container. :param path: the path to a container db """ try: if not path.endswith('.db'): return broker = ContainerBroker(path) info = broker.get_info() x, nodes = self.container_ring.get_nodes(info['account'], info['container']) for ordinal, node in enumerate(nodes): if node['ip'] in self._myips and node['port'] == self._myport: break else: return if not broker.is_deleted(): sync_to = None sync_key = None sync_point1 = info['x_container_sync_point1'] sync_point2 = info['x_container_sync_point2'] for key, (value, timestamp) in broker.metadata.iteritems(): if key.lower() == 'x-container-sync-to': sync_to = value elif key.lower() == 'x-container-sync-key': sync_key = value if not sync_to or not sync_key: self.container_skips += 1 return sync_to = sync_to.rstrip('/') err = validate_sync_to(sync_to, self.allowed_sync_hosts) if err: self.logger.info( _('ERROR %(db_file)s: %(validate_sync_to_err)s'), { 'db_file': broker.db_file, 'validate_sync_to_err': err }) self.container_failures += 1 return stop_at = time() + self.container_time while time() < stop_at and sync_point2 < sync_point1: rows = broker.get_items_since(sync_point2, 1) if not rows: break row = rows[0] if row['ROWID'] >= sync_point1: break key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only intially sync out one third of the # objects (if 3 replicas, 1/4 if 4, etc.). This section # will attempt to sync previously skipped rows in case the # other nodes didn't succeed. if unpack_from('>I', key)[0] % \ self.container_ring.replica_count != ordinal: if not self.container_sync_row(row, sync_to, sync_key, broker, info): return sync_point2 = row['ROWID'] broker.set_x_container_sync_points(None, sync_point2) while time() < stop_at: rows = broker.get_items_since(sync_point1, 1) if not rows: break row = rows[0] key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only intially sync out one third of the # objects (if 3 replicas, 1/4 if 4, etc.). It'll come back # around to the section above and attempt to sync # previously skipped rows in case the other nodes didn't # succeed. if unpack_from('>I', key)[0] % \ self.container_ring.replica_count == ordinal: if not self.container_sync_row(row, sync_to, sync_key, broker, info): return sync_point1 = row['ROWID'] broker.set_x_container_sync_points(sync_point1, None) self.container_syncs += 1 except (Exception, Timeout), err: self.container_failures += 1 self.logger.exception(_('ERROR Syncing %s'), (broker.db_file))
def _get_account_broker(self, drive, part, account): hsh = hash_path(account) db_dir = storage_directory(DATADIR, part, hsh) db_path = os.path.join(self.root, drive, db_dir, hsh + '.db') return AccountBroker(db_path, account=account, logger=self.logger)
def container_sync(self, path): """ Checks the given path for a container database, determines if syncing is turned on for that database and, if so, sends any updates to the other container. :param path: the path to a container db """ try: if not path.endswith('.db'): return broker = ContainerBroker(path) info = broker.get_info() x, nodes = self.container_ring.get_nodes(info['account'], info['container']) for ordinal, node in enumerate(nodes): if node['ip'] in self._myips and node['port'] == self._myport: break else: return if not broker.is_deleted(): sync_to = None sync_key = None sync_point1 = info['x_container_sync_point1'] sync_point2 = info['x_container_sync_point2'] for key, (value, timestamp) in broker.metadata.iteritems(): if key.lower() == 'x-container-sync-to': sync_to = value elif key.lower() == 'x-container-sync-key': sync_key = value if not sync_to or not sync_key: self.container_skips += 1 return sync_to = sync_to.rstrip('/') err = validate_sync_to(sync_to, self.allowed_sync_hosts) if err: self.logger.info( _('ERROR %(db_file)s: %(validate_sync_to_err)s'), {'db_file': broker.db_file, 'validate_sync_to_err': err}) self.container_failures += 1 return stop_at = time() + self.container_time while time() < stop_at and sync_point2 < sync_point1: rows = broker.get_items_since(sync_point2, 1) if not rows: break row = rows[0] if row['ROWID'] >= sync_point1: break key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only intially sync out one third of the # objects (if 3 replicas, 1/4 if 4, etc.). This section # will attempt to sync previously skipped rows in case the # other nodes didn't succeed. if unpack_from('>I', key)[0] % \ self.container_ring.replica_count != ordinal: if not self.container_sync_row(row, sync_to, sync_key, broker, info): return sync_point2 = row['ROWID'] broker.set_x_container_sync_points(None, sync_point2) while time() < stop_at: rows = broker.get_items_since(sync_point1, 1) if not rows: break row = rows[0] key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only intially sync out one third of the # objects (if 3 replicas, 1/4 if 4, etc.). It'll come back # around to the section above and attempt to sync # previously skipped rows in case the other nodes didn't # succeed. if unpack_from('>I', key)[0] % \ self.container_ring.replica_count == ordinal: if not self.container_sync_row(row, sync_to, sync_key, broker, info): return sync_point1 = row['ROWID'] broker.set_x_container_sync_points(sync_point1, None) self.container_syncs += 1 except (Exception, Timeout), err: self.container_failures += 1 self.logger.exception(_('ERROR Syncing %s'), (broker.db_file))