def get_data(self, db_path): """ Data for generated csv has the following columns: Account Hash, Container Name, Object Count, Bytes Used This will just collect whether or not the metadata is set using a 1 or ''. :raises sqlite3.Error: does not catch errors connecting to db """ line_data = None broker = ContainerBroker(db_path) if not broker.is_deleted(): info = broker.get_info() encoded_container_name = urllib.quote(info["container"]) line_data = '"%s","%s",%d,%d' % ( info["account"], encoded_container_name, info["object_count"], info["bytes_used"], ) if self.metadata_keys: metadata_results = ",".join([info["metadata"].get(mkey) and "1" or "" for mkey in self.metadata_keys]) line_data += ",%s" % metadata_results line_data += "\n" return line_data
def get_reconciler_broker(self, timestamp): """ Get a local instance of the reconciler container broker that is appropriate to enqueue the given timestamp. :param timestamp: the timestamp of the row to be enqueued :returns: a local reconciler broker """ container = get_reconciler_container_name(timestamp) if self.reconciler_containers and \ container in self.reconciler_containers: return self.reconciler_containers[container][1] account = MISPLACED_OBJECTS_ACCOUNT part = self.ring.get_part(account, container) node = self.find_local_handoff_for_part(part) if not node: raise DeviceUnavailable( 'No mounted devices found suitable to Handoff reconciler ' 'container %s in partition %s' % (container, part)) hsh = hash_path(account, container) db_dir = storage_directory(DATADIR, part, hsh) db_path = os.path.join(self.root, node['device'], db_dir, hsh + '.db') broker = ContainerBroker(db_path, account=account, container=container) if not os.path.exists(broker.db_file): try: broker.initialize(timestamp, 0) except DatabaseAlreadyExists: pass if self.reconciler_containers is not None: self.reconciler_containers[container] = part, broker, node['id'] return broker
def test_container_stat_get_data(self): stat = db_stats_collector.ContainerStatsCollector(self.conf) container_db = ContainerBroker("%s/con.db" % self.containers, account='test_acc', container='test_con') container_db.initialize() container_db.put_object('test_obj', time.time(), 10, 'text', 'faketag') info = stat.get_data("%s/con.db" % self.containers) self.assertEquals('''"test_acc","test_con",1,10\n''', info)
def process_container(self, dbfile): """ Process a container, and update the information in the account. :param dbfile: container DB to process """ start_time = time.time() broker = ContainerBroker(dbfile, logger=self.logger) info = broker.get_info() # Don't send updates if the container was auto-created since it # definitely doesn't have up to date statistics. if float(info['put_timestamp']) <= 0: return if self.account_suppressions.get(info['account'], 0) > time.time(): return if info['put_timestamp'] > info['reported_put_timestamp'] or \ info['delete_timestamp'] > info['reported_delete_timestamp'] \ or info['object_count'] != info['reported_object_count'] or \ info['bytes_used'] != info['reported_bytes_used']: container = '/%s/%s' % (info['account'], info['container']) part, nodes = self.get_account_ring().get_nodes(info['account']) events = [spawn(self.container_report, node, part, container, info['put_timestamp'], info['delete_timestamp'], info['object_count'], info['bytes_used']) for node in nodes] successes = 0 failures = 0 for event in events: if is_success(event.wait()): successes += 1 else: failures += 1 if successes > failures: self.logger.increment('successes') self.successes += 1 self.logger.debug( _('Update report sent for %(container)s %(dbfile)s'), {'container': container, 'dbfile': dbfile}) broker.reported(info['put_timestamp'], info['delete_timestamp'], info['object_count'], info['bytes_used']) else: self.logger.increment('failures') self.failures += 1 self.logger.debug( _('Update report failed for %(container)s %(dbfile)s'), {'container': container, 'dbfile': dbfile}) self.account_suppressions[info['account']] = until = \ time.time() + self.account_suppression_time if self.new_account_suppressions: print >>self.new_account_suppressions, \ info['account'], until # Only track timing data for attempted updates: self.logger.timing_since('timing', start_time) else: self.logger.increment('no_changes') self.no_changes += 1
def _make_broker(self, account='a', container='c', device='sda', part=0): datadir = os.path.join( self.testdir, device, 'containers', str(part), 'ash', 'hash') db_file = os.path.join(datadir, 'hash.db') broker = ContainerBroker( db_file, account=account, container=container) broker.initialize() return broker
def _abort_rsync_then_merge(self, db_file, old_filename): if super(ContainerReplicatorRpc, self)._abort_rsync_then_merge( db_file, old_filename): return True # if the local db has started sharding since the original 'sync' # request then abort object replication now; instantiate a fresh broker # each time this check if performed so to get latest state broker = ContainerBroker(db_file) return broker.sharding_initiated()
def test_find_replace_enable(self): db_file = os.path.join(self.testdir, 'hash.db') broker = ContainerBroker(db_file) broker.account = 'a' broker.container = 'c' broker.initialize() ts = utils.Timestamp.now() broker.merge_items([ {'name': 'obj%02d' % i, 'created_at': ts.internal, 'size': 0, 'content_type': 'application/octet-stream', 'etag': 'not-really', 'deleted': 0, 'storage_policy_index': 0, 'ctype_timestamp': ts.internal, 'meta_timestamp': ts.internal} for i in range(100)]) out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): with mock_timestamp_now() as now: main([broker.db_file, 'find_and_replace', '10', '--enable']) expected = [ 'No shard ranges found to delete.', 'Injected 10 shard ranges.', 'Run container-replicator to replicate them to other nodes.', "Container moved to state 'sharding' with epoch %s." % now.internal, 'Run container-sharder on all nodes to shard the container.'] self.assertEqual(expected, out.getvalue().splitlines()) self.assertEqual(['Loaded db broker for a/c.'], err.getvalue().splitlines()) self._assert_enabled(broker, now) self.assertEqual( [(data['lower'], data['upper']) for data in self.shard_data], [(sr.lower_str, sr.upper_str) for sr in broker.get_shard_ranges()])
def get_reconciler_broker(self, timestamp): """ Get a local instance of the reconciler container broker that is appropriate to enqueue the given timestamp. :param timestamp: the timestamp of the row to be enqueued :returns: a local reconciler broker """ container = get_reconciler_container_name(timestamp) if self.reconciler_containers and \ container in self.reconciler_containers: return self.reconciler_containers[container][1] account = MISPLACED_OBJECTS_ACCOUNT part = self.ring.get_part(account, container) node = self.find_local_handoff_for_part(part) if not node: raise DeviceUnavailable( 'No mounted devices found suitable to Handoff reconciler ' 'container %s in partition %s' % (container, part)) broker = ContainerBroker.create_broker( os.path.join(self.root, node['device']), part, account, container, logger=self.logger, put_timestamp=timestamp, storage_policy_index=0) if self.reconciler_containers is not None: self.reconciler_containers[container] = part, broker, node['id'] return broker
def test_run_once_with_get_info_timeout(self, mock_dump_recon): cu = self._get_container_updater() containers_dir = os.path.join(self.sda1, DATADIR) os.mkdir(containers_dir) subdir = os.path.join(containers_dir, 'subdir') os.mkdir(subdir) db_file = os.path.join(subdir, 'hash.db') cb = ContainerBroker(db_file, account='a', container='c') cb.initialize(normalize_timestamp(1), 0) timeout = exceptions.LockTimeout(10, db_file) timeout.cancel() with mock.patch('swift.container.updater.ContainerBroker.get_info', side_effect=timeout): cu.run_once() log_lines = self.logger.get_lines_for_level('info') self.assertIn('Failed to get container info (Lock timeout: ' '10 seconds: %s); skipping.' % db_file, log_lines)
def test_error_in_process(self, mock_process, mock_dump_recon): cu = self._get_container_updater() containers_dir = os.path.join(self.sda1, DATADIR) os.mkdir(containers_dir) subdir = os.path.join(containers_dir, 'subdir') os.mkdir(subdir) cb = ContainerBroker(os.path.join(subdir, 'hash.db'), account='a', container='c', pending_timeout=1) cb.initialize(normalize_timestamp(1), 0) cu.run_once() log_lines = self.logger.get_lines_for_level('error') self.assertTrue(log_lines) self.assertIn('Error processing container ', log_lines[0]) self.assertIn('devices/sda1/containers/subdir/hash.db', log_lines[0]) self.assertIn('Boom!', log_lines[0]) self.assertFalse(log_lines[1:]) self.assertEqual(1, len(mock_dump_recon.mock_calls))
def setUp(self): self.testdir = os.path.join(mkdtemp(), 'tmp_test_container_updater') rmtree(self.testdir, ignore_errors=1) os.mkdir(self.testdir) self.devices_dir = os.path.join(self.testdir, 'devices') os.mkdir(self.devices_dir) self.sda1 = os.path.join(self.devices_dir, 'sda1') os.mkdir(self.sda1) containers_dir = os.path.join(self.sda1, container_server.DATADIR) os.mkdir(containers_dir) self.assert_(os.path.exists(containers_dir)) self.subdir = os.path.join(containers_dir, 'subdir') os.mkdir(self.subdir) cb = ContainerBroker(os.path.join(self.subdir, 'hash.db'), account='a', container='c') cb.initialize(normalize_timestamp(1)) cb.put_object('o', normalize_timestamp(2), 3, 'text/plain', '68b329da9893e34099c7d8ad5cb9c940')
def container_audit(self, path): """ Audits the given container path :param path: the path to a container db """ start_time = time.time() try: broker = ContainerBroker(path) if not broker.is_deleted(): broker.get_info() self.logger.increment('passes') self.container_passes += 1 self.logger.debug(_('Audit passed for %s'), broker.db_file) except (Exception, Timeout): self.logger.increment('failures') self.container_failures += 1 self.logger.exception(_('ERROR Could not get container info %s'), broker.db_file) self.logger.timing_since('timing', start_time)
def container_crawl(self, path): """ Crawls the given container path. :param path: the path to an container db """ metaDict = {} try: broker = ContainerBroker(path) if not broker.is_deleted(): #reportedTime = broker.get_info()['put_timestamp'] #if normalize_timestamp(self.crawled_time) #< reportedTime < normalize_timestamp(start_time): metaDict = broker.get_info() metaDict.update( (key, value) for key, (value, timestamp) in broker.metadata.iteritems() if value != '' and is_sys_or_user_meta('container', key)) except (Exception, Timeout): self.logger.increment('failures') return metaDict
def test_container_stat_get_metadata(self): container_db = ContainerBroker("%s/con.db" % self.containers, account='test_acc', container='test_con') container_db.initialize(storage_policy_index=0) container_db.put_object('test_obj', time.time(), 10, 'text', 'faketag') container_db.update_metadata({'X-Container-Meta-Test1': ('val', 1000)}) self.conf['metadata_keys'] = 'test1,test2' stat = db_stats_collector.ContainerStatsCollector(self.conf) info = stat.get_data("%s/con.db" % self.containers) self.assertEquals('''"test_acc","test_con",1,10,1,\n''', info)
def test_unicode(self): cu = container_updater.ContainerUpdater( { "devices": self.devices_dir, "mount_check": "false", "swift_dir": self.testdir, "interval": "1", "concurrency": "1", "node_timeout": "15", } ) containers_dir = os.path.join(self.sda1, DATADIR) os.mkdir(containers_dir) subdir = os.path.join(containers_dir, "subdir") os.mkdir(subdir) cb = ContainerBroker(os.path.join(subdir, "hash.db"), account="a", container="\xce\xa9") cb.initialize(normalize_timestamp(1), 0) cb.put_object("\xce\xa9", normalize_timestamp(2), 3, "text/plain", "68b329da9893e34099c7d8ad5cb9c940") def accept(sock, addr): try: with Timeout(3): inc = sock.makefile("rb") out = sock.makefile("wb") out.write("HTTP/1.1 201 OK\r\nContent-Length: 0\r\n\r\n") out.flush() inc.read() except BaseException as err: import traceback traceback.print_exc() return err return None bindsock = listen(("127.0.0.1", 0)) def spawn_accepts(): events = [] for _junk in range(2): with Timeout(3): sock, addr = bindsock.accept() events.append(spawn(accept, sock, addr)) return events spawned = spawn(spawn_accepts) for dev in cu.get_account_ring().devs: if dev is not None: dev["port"] = bindsock.getsockname()[1] cu.run_once() for event in spawned.wait(): err = event.wait() if err: raise err info = cb.get_info() self.assertEqual(info["object_count"], 1) self.assertEqual(info["bytes_used"], 3) self.assertEqual(info["reported_object_count"], 1) self.assertEqual(info["reported_bytes_used"], 3)
def test_unicode(self): cu = container_updater.ContainerUpdater({ 'devices': self.devices_dir, 'mount_check': 'false', 'swift_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'node_timeout': '15', }) containers_dir = os.path.join(self.sda1, DATADIR) os.mkdir(containers_dir) subdir = os.path.join(containers_dir, 'subdir') os.mkdir(subdir) cb = ContainerBroker(os.path.join(subdir, 'hash.db'), account='a', container='\xce\xa9') cb.initialize(normalize_timestamp(1), 0) cb.put_object('\xce\xa9', normalize_timestamp(2), 3, 'text/plain', '68b329da9893e34099c7d8ad5cb9c940') def accept(sock, addr): try: with Timeout(3): inc = sock.makefile('rb') out = sock.makefile('wb') out.write('HTTP/1.1 201 OK\r\nContent-Length: 0\r\n\r\n') out.flush() inc.read() except BaseException as err: import traceback traceback.print_exc() return err return None bindsock = listen(('127.0.0.1', 0)) def spawn_accepts(): events = [] for _junk in range(2): with Timeout(3): sock, addr = bindsock.accept() events.append(spawn(accept, sock, addr)) return events spawned = spawn(spawn_accepts) for dev in cu.get_account_ring().devs: if dev is not None: dev['port'] = bindsock.getsockname()[1] cu.run_once() for event in spawned.wait(): err = event.wait() if err: raise err info = cb.get_info() self.assertEquals(info['object_count'], 1) self.assertEquals(info['bytes_used'], 3) self.assertEquals(info['reported_object_count'], 1) self.assertEquals(info['reported_bytes_used'], 3)
def _get_db_info(self, account, container, number): server_type = 'container' obj_conf = self.configs['%s-server' % server_type] config_path = obj_conf[number] options = utils.readconf(config_path, 'app:container-server') root = options.get('devices') swift_dir = options.get('swift_dir', '/etc/swift') ring = Ring(swift_dir, ring_name=server_type) part, nodes = ring.get_nodes(account, container) for node in nodes: # assumes one to one mapping if node['port'] == int(options.get('bind_port')): device = node['device'] break else: return None path_hash = utils.hash_path(account, container) _dir = utils.storage_directory('%ss' % server_type, part, path_hash) db_dir = os.path.join(root, device, _dir) db_file = os.path.join(db_dir, '%s.db' % path_hash) db = ContainerBroker(db_file) return db.get_info()
def _get_container_broker(self, drive, part, account, container, **kwargs): """ Get a DB broker for the container. :param drive: drive that holds the container :param part: partition the container is in :param account: account name :param container: container name :returns: ContainerBroker object """ hsh = hash_path(account, container) db_dir = storage_directory(DATADIR, part, hsh) db_path = os.path.join(self.root, drive, db_dir, hsh + '.db') kwargs.setdefault('account', account) kwargs.setdefault('container', container) kwargs.setdefault('logger', self.logger) return ContainerBroker(db_path, **kwargs)
def test_unicode(self): cu = self._get_container_updater() containers_dir = os.path.join(self.sda1, DATADIR) os.mkdir(containers_dir) subdir = os.path.join(containers_dir, 'subdir') os.mkdir(subdir) cb = ContainerBroker(os.path.join(subdir, 'hash.db'), account='a', container='\xce\xa9') cb.initialize(normalize_timestamp(1), 0) obj_name = u'\N{GREEK CAPITAL LETTER OMEGA}' if six.PY2: obj_name = obj_name.encode('utf-8') cb.put_object(obj_name, normalize_timestamp(2), 3, 'text/plain', '68b329da9893e34099c7d8ad5cb9c940') def accept(sock, addr): try: with Timeout(3): inc = sock.makefile('rb') out = sock.makefile('wb') out.write(b'HTTP/1.1 201 OK\r\nContent-Length: 0\r\n\r\n') out.flush() inc.read() except BaseException as err: import traceback traceback.print_exc() return err return None bindsock = listen_zero() def spawn_accepts(): events = [] for _junk in range(2): with Timeout(3): sock, addr = bindsock.accept() events.append(spawn(accept, sock, addr)) return events spawned = spawn(spawn_accepts) for dev in cu.get_account_ring().devs: if dev is not None: dev['port'] = bindsock.getsockname()[1] cu.run_once() for event in spawned.wait(): err = event.wait() if err: raise err info = cb.get_info() self.assertEqual(info['object_count'], 1) self.assertEqual(info['bytes_used'], 3) self.assertEqual(info['reported_object_count'], 1) self.assertEqual(info['reported_bytes_used'], 3)
def test_find_replace_enable(self): db_file = os.path.join(self.testdir, 'hash.db') broker = ContainerBroker(db_file) broker.account = 'a' broker.container = 'c' broker.initialize() ts = utils.Timestamp.now() broker.merge_items([{ 'name': 'obj%02d' % i, 'created_at': ts.internal, 'size': 0, 'content_type': 'application/octet-stream', 'etag': 'not-really', 'deleted': 0, 'storage_policy_index': 0, 'ctype_timestamp': ts.internal, 'meta_timestamp': ts.internal } for i in range(100)]) out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): with mock_timestamp_now() as now: main([broker.db_file, 'find_and_replace', '10', '--enable']) expected = [ 'No shard ranges found to delete.', 'Injected 10 shard ranges.', 'Run container-replicator to replicate them to other nodes.', "Container moved to state 'sharding' with epoch %s." % now.internal, 'Run container-sharder on all nodes to shard the container.' ] self.assertEqual(expected, out.getvalue().splitlines()) self.assertEqual(['Loaded db broker for a/c.'], err.getvalue().splitlines()) self._assert_enabled(broker, now) self.assertEqual([(data['lower'], data['upper']) for data in self.shard_data], [(sr.lower_str, sr.upper_str) for sr in broker.get_shard_ranges()])
def print_info(db_type, db_file, swift_dir='/etc/swift', stale_reads_ok=False, drop_prefixes=False, verbose=False): if db_type not in ('account', 'container'): print("Unrecognized DB type: internal error") raise InfoSystemExit() if not os.path.exists(db_file) or not db_file.endswith('.db'): print("DB file doesn't exist") raise InfoSystemExit() if not db_file.startswith(('/', './')): db_file = './' + db_file # don't break if the bare db file is given if db_type == 'account': broker = AccountBroker(db_file, stale_reads_ok=stale_reads_ok) datadir = ABDATADIR else: broker = ContainerBroker(db_file, stale_reads_ok=stale_reads_ok) datadir = CBDATADIR try: info = broker.get_info() except sqlite3.OperationalError as err: if 'no such table' in str(err): print("Does not appear to be a DB of type \"%s\": %s" % (db_type, db_file)) raise InfoSystemExit() raise account = info['account'] container = None info['is_deleted'] = broker.is_deleted() if db_type == 'container': container = info['container'] info['is_root'] = broker.is_root_container() sranges = broker.get_shard_ranges() if sranges: info['shard_ranges'] = sranges print_db_info_metadata(db_type, info, broker.metadata, drop_prefixes, verbose) try: ring = Ring(swift_dir, ring_name=db_type) except Exception: ring = None else: print_ring_locations(ring, datadir, account, container)
def collect_brokers(conf_path, names2nodes): conf = utils.readconf(conf_path, 'container-replicator') root = conf.get('devices', '/srv/node') swift_dir = conf.get('swift_dir', '/etc/swift') c_ring = ring.Ring(swift_dir, ring_name='container') dirs = [] brokers = defaultdict(dict) for node in c_ring.devs: if node is None: continue datadir = os.path.join(root, node['device'], DATADIR) if os.path.isdir(datadir): dirs.append((datadir, node['id'], lambda *args: True)) for part, object_file, node_id in roundrobin_datadirs(dirs): broker = ContainerBroker(object_file) for node in c_ring.get_part_nodes(int(part)): if node['id'] == node_id: node_index = str(node['index']) break else: node_index = 'handoff' names2nodes[broker_key(broker)][(node_id, node_index)] = broker return brokers
def process_container(self, dbfile): """ Process a container, and update the information in the account. :param dbfile: container DB to process """ start_time = time.time() broker = ContainerBroker(dbfile, logger=self.logger) try: info = broker.get_info() except LockTimeout as e: self.logger.info( "Failed to get container info (Lock timeout: %s); skipping.", str(e)) return # Don't send updates if the container was auto-created since it # definitely doesn't have up to date statistics. if Timestamp(info['put_timestamp']) <= 0: return if self.account_suppressions.get(info['account'], 0) > time.time(): return if not broker.is_root_container(): # Don't double-up account stats. # The sharder should get these stats to the root container, # and the root's updater will get them to the right account. info['object_count'] = info['bytes_used'] = 0 if info['put_timestamp'] > info['reported_put_timestamp'] or \ info['delete_timestamp'] > info['reported_delete_timestamp'] \ or info['object_count'] != info['reported_object_count'] or \ info['bytes_used'] != info['reported_bytes_used']: container = '/%s/%s' % (info['account'], info['container']) part, nodes = self.get_account_ring().get_nodes(info['account']) events = [ spawn(self.container_report, node, part, container, info['put_timestamp'], info['delete_timestamp'], info['object_count'], info['bytes_used'], info['storage_policy_index']) for node in nodes ] successes = 0 stub404s = 0 for event in events: result = event.wait() if is_success(result): successes += 1 if result == 404: stub404s += 1 if successes >= majority_size(len(events)): self.logger.increment('successes') self.successes += 1 self.logger.debug( _('Update report sent for %(container)s %(dbfile)s'), { 'container': container, 'dbfile': dbfile }) broker.reported(info['put_timestamp'], info['delete_timestamp'], info['object_count'], info['bytes_used']) elif stub404s == len(events): self.logger.increment('failures') self.failures += 1 self.logger.debug( _('Update report stub for %(container)s %(dbfile)s'), { 'container': container, 'dbfile': dbfile }) broker.quarantine('no account replicas exist') # All that's left at this point is a few sacks of Gnocchi, # easily collected by the dark data watcher in object auditor. else: self.logger.increment('failures') self.failures += 1 self.logger.debug( _('Update report failed for %(container)s %(dbfile)s'), { 'container': container, 'dbfile': dbfile }) self.account_suppressions[info['account']] = until = \ time.time() + self.account_suppression_time if self.new_account_suppressions: print(info['account'], until, file=self.new_account_suppressions) # Only track timing data for attempted updates: self.logger.timing_since('timing', start_time) else: self.logger.increment('no_changes') self.no_changes += 1
def process_container(self, dbfile): """ Process a container, and update the information in the account. :param dbfile: container DB to process """ start_time = time.time() broker = ContainerBroker(dbfile, logger=self.logger) info = broker.get_info() # Don't send updates if the container was auto-created since it # definitely doesn't have up to date statistics. if Timestamp(info["put_timestamp"]) <= 0: return if self.account_suppressions.get(info["account"], 0) > time.time(): return if ( info["put_timestamp"] > info["reported_put_timestamp"] or info["delete_timestamp"] > info["reported_delete_timestamp"] or info["object_count"] != info["reported_object_count"] or info["bytes_used"] != info["reported_bytes_used"] ): container = "/%s/%s" % (info["account"], info["container"]) part, nodes = self.get_account_ring().get_nodes(info["account"]) events = [ spawn( self.container_report, node, part, container, info["put_timestamp"], info["delete_timestamp"], info["object_count"], info["bytes_used"], info["storage_policy_index"], ) for node in nodes ] successes = 0 for event in events: if is_success(event.wait()): successes += 1 if successes >= quorum_size(len(events)): self.logger.increment("successes") self.successes += 1 self.logger.debug( _("Update report sent for %(container)s %(dbfile)s"), {"container": container, "dbfile": dbfile} ) broker.reported( info["put_timestamp"], info["delete_timestamp"], info["object_count"], info["bytes_used"] ) else: self.logger.increment("failures") self.failures += 1 self.logger.debug( _("Update report failed for %(container)s %(dbfile)s"), {"container": container, "dbfile": dbfile} ) self.account_suppressions[info["account"]] = until = time.time() + self.account_suppression_time if self.new_account_suppressions: print >>self.new_account_suppressions, info["account"], until # Only track timing data for attempted updates: self.logger.timing_since("timing", start_time) else: self.logger.increment("no_changes") self.no_changes += 1
def test_shard_container(self): cu = self._get_container_updater() cu.run_once() containers_dir = os.path.join(self.sda1, DATADIR) os.mkdir(containers_dir) cu.run_once() self.assertTrue(os.path.exists(containers_dir)) subdir = os.path.join(containers_dir, 'subdir') os.mkdir(subdir) cb = ContainerBroker(os.path.join(subdir, 'hash.db'), account='.shards_a', container='c') cb.initialize(normalize_timestamp(1), 0) cb.set_sharding_sysmeta('Quoted-Root', 'a/c') self.assertFalse(cb.is_root_container()) cu.run_once() info = cb.get_info() self.assertEqual(info['object_count'], 0) self.assertEqual(info['bytes_used'], 0) self.assertEqual(info['reported_put_timestamp'], '0') self.assertEqual(info['reported_delete_timestamp'], '0') self.assertEqual(info['reported_object_count'], 0) self.assertEqual(info['reported_bytes_used'], 0) cb.put_object('o', normalize_timestamp(2), 3, 'text/plain', '68b329da9893e34099c7d8ad5cb9c940') # Fake us having already reported *bad* stats under swift 2.18.0 cb.reported('0', '0', 1, 3) # Should fail with a bunch of connection-refused cu.run_once() info = cb.get_info() self.assertEqual(info['object_count'], 1) self.assertEqual(info['bytes_used'], 3) self.assertEqual(info['reported_put_timestamp'], '0') self.assertEqual(info['reported_delete_timestamp'], '0') self.assertEqual(info['reported_object_count'], 1) self.assertEqual(info['reported_bytes_used'], 3) def accept(sock, addr, return_code): try: with Timeout(3): inc = sock.makefile('rb') out = sock.makefile('wb') out.write(b'HTTP/1.1 %d OK\r\nContent-Length: 0\r\n\r\n' % return_code) out.flush() self.assertEqual(inc.readline(), b'PUT /sda1/2/.shards_a/c HTTP/1.1\r\n') headers = {} line = inc.readline() while line and line != b'\r\n': headers[line.split(b':')[0].lower()] = \ line.split(b':')[1].strip() line = inc.readline() self.assertIn(b'x-put-timestamp', headers) self.assertIn(b'x-delete-timestamp', headers) self.assertIn(b'x-object-count', headers) self.assertIn(b'x-bytes-used', headers) except BaseException as err: import traceback traceback.print_exc() return err return None bindsock = listen_zero() def spawn_accepts(): events = [] for _junk in range(2): sock, addr = bindsock.accept() events.append(spawn(accept, sock, addr, 201)) return events spawned = spawn(spawn_accepts) for dev in cu.get_account_ring().devs: if dev is not None: dev['port'] = bindsock.getsockname()[1] cu.run_once() for event in spawned.wait(): err = event.wait() if err: raise err info = cb.get_info() self.assertEqual(info['object_count'], 1) self.assertEqual(info['bytes_used'], 3) self.assertEqual(info['reported_put_timestamp'], '0000000001.00000') self.assertEqual(info['reported_delete_timestamp'], '0') self.assertEqual(info['reported_object_count'], 0) self.assertEqual(info['reported_bytes_used'], 0)
def container_sync(self, path): """ Checks the given path for a container database, determines if syncing is turned on for that database and, if so, sends any updates to the other container. :param path: the path to a container db """ broker = None try: broker = ContainerBroker(path) info = broker.get_info() x, nodes = self.container_ring.get_nodes(info['account'], info['container']) for ordinal, node in enumerate(nodes): if node['ip'] in self._myips and node['port'] == self._myport: break else: return if not broker.is_deleted(): sync_to = None sync_key = None sync_point1 = info['x_container_sync_point1'] sync_point2 = info['x_container_sync_point2'] for key, (value, timestamp) in broker.metadata.iteritems(): if key.lower() == 'x-container-sync-to': sync_to = value elif key.lower() == 'x-container-sync-key': sync_key = value if not sync_to or not sync_key: self.container_skips += 1 self.logger.increment('skips') return sync_to = sync_to.rstrip('/') err = validate_sync_to(sync_to, self.allowed_sync_hosts) if err: self.logger.info( _('ERROR %(db_file)s: %(validate_sync_to_err)s'), {'db_file': broker.db_file, 'validate_sync_to_err': err}) self.container_failures += 1 self.logger.increment('failures') return stop_at = time() + self.container_time next_sync_point = None while time() < stop_at and sync_point2 < sync_point1: rows = broker.get_items_since(sync_point2, 1) if not rows: break row = rows[0] if row['ROWID'] > sync_point1: break key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only initially sync out one third of the # objects (if 3 replicas, 1/4 if 4, etc.) and will skip # problematic rows as needed in case of faults. # This section will attempt to sync previously skipped # rows in case the previous attempts by any of the nodes # didn't succeed. if not self.container_sync_row(row, sync_to, sync_key, broker, info): if not next_sync_point: next_sync_point = sync_point2 sync_point2 = row['ROWID'] broker.set_x_container_sync_points(None, sync_point2) if next_sync_point: broker.set_x_container_sync_points(None, next_sync_point) while time() < stop_at: rows = broker.get_items_since(sync_point1, 1) if not rows: break row = rows[0] key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only initially sync out one third of the # objects (if 3 replicas, 1/4 if 4, etc.). It'll come back # around to the section above and attempt to sync # previously skipped rows in case the other nodes didn't # succeed or in case it failed to do so the first time. if unpack_from('>I', key)[0] % \ len(nodes) == ordinal: self.container_sync_row(row, sync_to, sync_key, broker, info) sync_point1 = row['ROWID'] broker.set_x_container_sync_points(sync_point1, None) self.container_syncs += 1 self.logger.increment('syncs') except (Exception, Timeout) as err: self.container_failures += 1 self.logger.increment('failures') self.logger.exception(_('ERROR Syncing %s'), broker.db_file if broker else path)
def _get_broker(self, part, node, account=None, container=None): container_dir, container_hash = self._get_storage_dir( part, node, account=account, container=container) db_file = os.path.join(container_dir, container_hash + '.db') return ContainerBroker(db_file)
def process_container(self, dbfile): """ Process a container, and update the information in the account. :param dbfile: container DB to process """ start_time = time.time() broker = ContainerBroker(dbfile, logger=self.logger) try: info = broker.get_info() except LockTimeout as e: self.logger.info( "Failed to get container info (Lock timeout: %s); skipping.", str(e)) return # Don't send updates if the container was auto-created since it # definitely doesn't have up to date statistics. if Timestamp(info['put_timestamp']) <= 0: return if self.account_suppressions.get(info['account'], 0) > time.time(): return if not broker.is_root_container(): # Don't double-up account stats. # The sharder should get these stats to the root container, # and the root's updater will get them to the right account. info['object_count'] = info['bytes_used'] = 0 if info['put_timestamp'] > info['reported_put_timestamp'] or \ info['delete_timestamp'] > info['reported_delete_timestamp'] \ or info['object_count'] != info['reported_object_count'] or \ info['bytes_used'] != info['reported_bytes_used']: container = '/%s/%s' % (info['account'], info['container']) part, nodes = self.get_account_ring().get_nodes(info['account']) events = [spawn(self.container_report, node, part, container, info['put_timestamp'], info['delete_timestamp'], info['object_count'], info['bytes_used'], info['storage_policy_index']) for node in nodes] successes = 0 for event in events: if is_success(event.wait()): successes += 1 if successes >= majority_size(len(events)): self.logger.increment('successes') self.successes += 1 self.logger.debug( _('Update report sent for %(container)s %(dbfile)s'), {'container': container, 'dbfile': dbfile}) broker.reported(info['put_timestamp'], info['delete_timestamp'], info['object_count'], info['bytes_used']) else: self.logger.increment('failures') self.failures += 1 self.logger.debug( _('Update report failed for %(container)s %(dbfile)s'), {'container': container, 'dbfile': dbfile}) self.account_suppressions[info['account']] = until = \ time.time() + self.account_suppression_time if self.new_account_suppressions: print(info['account'], until, file=self.new_account_suppressions) # Only track timing data for attempted updates: self.logger.timing_since('timing', start_time) else: self.logger.increment('no_changes') self.no_changes += 1
def test_shard_container(self): cu = self._get_container_updater() cu.run_once() containers_dir = os.path.join(self.sda1, DATADIR) os.mkdir(containers_dir) cu.run_once() self.assertTrue(os.path.exists(containers_dir)) subdir = os.path.join(containers_dir, 'subdir') os.mkdir(subdir) cb = ContainerBroker(os.path.join(subdir, 'hash.db'), account='.shards_a', container='c') cb.initialize(normalize_timestamp(1), 0) cb.set_sharding_sysmeta('Root', 'a/c') self.assertFalse(cb.is_root_container()) cu.run_once() info = cb.get_info() self.assertEqual(info['object_count'], 0) self.assertEqual(info['bytes_used'], 0) self.assertEqual(info['reported_put_timestamp'], '0') self.assertEqual(info['reported_delete_timestamp'], '0') self.assertEqual(info['reported_object_count'], 0) self.assertEqual(info['reported_bytes_used'], 0) cb.put_object('o', normalize_timestamp(2), 3, 'text/plain', '68b329da9893e34099c7d8ad5cb9c940') # Fake us having already reported *bad* stats under swift 2.18.0 cb.reported('0', '0', 1, 3) # Should fail with a bunch of connection-refused cu.run_once() info = cb.get_info() self.assertEqual(info['object_count'], 1) self.assertEqual(info['bytes_used'], 3) self.assertEqual(info['reported_put_timestamp'], '0') self.assertEqual(info['reported_delete_timestamp'], '0') self.assertEqual(info['reported_object_count'], 1) self.assertEqual(info['reported_bytes_used'], 3) def accept(sock, addr, return_code): try: with Timeout(3): inc = sock.makefile('rb') out = sock.makefile('wb') out.write(b'HTTP/1.1 %d OK\r\nContent-Length: 0\r\n\r\n' % return_code) out.flush() self.assertEqual(inc.readline(), b'PUT /sda1/2/.shards_a/c HTTP/1.1\r\n') headers = {} line = inc.readline() while line and line != b'\r\n': headers[line.split(b':')[0].lower()] = \ line.split(b':')[1].strip() line = inc.readline() self.assertIn(b'x-put-timestamp', headers) self.assertIn(b'x-delete-timestamp', headers) self.assertIn(b'x-object-count', headers) self.assertIn(b'x-bytes-used', headers) except BaseException as err: import traceback traceback.print_exc() return err return None bindsock = listen_zero() def spawn_accepts(): events = [] for _junk in range(2): sock, addr = bindsock.accept() events.append(spawn(accept, sock, addr, 201)) return events spawned = spawn(spawn_accepts) for dev in cu.get_account_ring().devs: if dev is not None: dev['port'] = bindsock.getsockname()[1] cu.run_once() for event in spawned.wait(): err = event.wait() if err: raise err info = cb.get_info() self.assertEqual(info['object_count'], 1) self.assertEqual(info['bytes_used'], 3) self.assertEqual(info['reported_put_timestamp'], '0000000001.00000') self.assertEqual(info['reported_delete_timestamp'], '0') self.assertEqual(info['reported_object_count'], 0) self.assertEqual(info['reported_bytes_used'], 0)
def process_container(self, dbfile): """ Process a container, and update the information in the account. :param dbfile: container DB to process """ start_time = time.time() broker = ContainerBroker(dbfile, logger=self.logger) info = broker.get_info() # Don't send updates if the container was auto-created since it # definitely doesn't have up to date statistics. if Timestamp(info['put_timestamp']) <= 0: return if self.account_suppressions.get(info['account'], 0) > time.time(): return if info['put_timestamp'] > info['reported_put_timestamp'] or \ info['delete_timestamp'] > info['reported_delete_timestamp'] \ or info['object_count'] != info['reported_object_count'] or \ info['bytes_used'] != info['reported_bytes_used']: container = '/%s/%s' % (info['account'], info['container']) part, nodes = self.get_account_ring().get_nodes(info['account']) events = [ spawn(self.container_report, node, part, container, info['put_timestamp'], info['delete_timestamp'], info['object_count'], info['bytes_used'], info['storage_policy_index']) for node in nodes ] successes = 0 for event in events: if is_success(event.wait()): successes += 1 if successes >= quorum_size(len(events)): self.logger.increment('successes') self.successes += 1 self.logger.debug( _('Update report sent for %(container)s %(dbfile)s'), { 'container': container, 'dbfile': dbfile }) broker.reported(info['put_timestamp'], info['delete_timestamp'], info['object_count'], info['bytes_used']) else: self.logger.increment('failures') self.failures += 1 self.logger.debug( _('Update report failed for %(container)s %(dbfile)s'), { 'container': container, 'dbfile': dbfile }) self.account_suppressions[info['account']] = until = \ time.time() + self.account_suppression_time if self.new_account_suppressions: print(info['account'], until, file=self.new_account_suppressions) # Only track timing data for attempted updates: self.logger.timing_since('timing', start_time) else: self.logger.increment('no_changes') self.no_changes += 1
def container_sync(self, path): """ Checks the given path for a container database, determines if syncing is turned on for that database and, if so, sends any updates to the other container. :param path: the path to a container db """ broker = None try: broker = ContainerBroker(path) info = broker.get_info() x, nodes = self.container_ring.get_nodes(info['account'], info['container']) for ordinal, node in enumerate(nodes): if node['ip'] in self._myips and node['port'] == self._myport: break else: return if not broker.is_deleted(): sync_to = None sync_key = None sync_point1 = info['x_container_sync_point1'] sync_point2 = info['x_container_sync_point2'] for key, (value, timestamp) in broker.metadata.iteritems(): if key.lower() == 'x-container-sync-to': sync_to = value elif key.lower() == 'x-container-sync-key': sync_key = value if not sync_to or not sync_key: self.container_skips += 1 self.logger.increment('skips') return sync_to = sync_to.rstrip('/') err = validate_sync_to(sync_to, self.allowed_sync_hosts) if err: self.logger.info( _('ERROR %(db_file)s: %(validate_sync_to_err)s'), { 'db_file': str(broker), 'validate_sync_to_err': err }) self.container_failures += 1 self.logger.increment('failures') return stop_at = time() + self.container_time next_sync_point = None while time() < stop_at and sync_point2 < sync_point1: rows = broker.get_items_since(sync_point2, 1) if not rows: break row = rows[0] if row['ROWID'] > sync_point1: break key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only initially sync out one third of the # objects (if 3 replicas, 1/4 if 4, etc.) and will skip # problematic rows as needed in case of faults. # This section will attempt to sync previously skipped # rows in case the previous attempts by any of the nodes # didn't succeed. if not self.container_sync_row(row, sync_to, sync_key, broker, info): if not next_sync_point: next_sync_point = sync_point2 sync_point2 = row['ROWID'] broker.set_x_container_sync_points(None, sync_point2) if next_sync_point: broker.set_x_container_sync_points(None, next_sync_point) while time() < stop_at: rows = broker.get_items_since(sync_point1, 1) if not rows: break row = rows[0] key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only initially sync out one third of the # objects (if 3 replicas, 1/4 if 4, etc.). It'll come back # around to the section above and attempt to sync # previously skipped rows in case the other nodes didn't # succeed or in case it failed to do so the first time. if unpack_from('>I', key)[0] % \ len(nodes) == ordinal: self.container_sync_row(row, sync_to, sync_key, broker, info) sync_point1 = row['ROWID'] broker.set_x_container_sync_points(sync_point1, None) self.container_syncs += 1 self.logger.increment('syncs') except (Exception, Timeout) as err: self.container_failures += 1 self.logger.increment('failures') self.logger.exception(_('ERROR Syncing %s'), broker if broker else path)
def main(args=None): parser = _make_parser() args = parser.parse_args(args) if not args.subcommand: # On py2, subparsers are required; on py3 they are not; see # https://bugs.python.org/issue9253. py37 added a `required` kwarg # to let you control it, but prior to that, there was no choice in # the matter. So, check whether the destination was set and bomb # out if not. parser.print_help() print('\nA sub-command is required.') return 1 conf = {} rows_per_shard = DEFAULT_ROWS_PER_SHARD shrink_threshold = DEFAULT_SHRINK_THRESHOLD expansion_limit = DEFAULT_ROWS_PER_SHARD if args.conf_file: try: conf = readconf(args.conf_file, 'container-sharder') shard_container_threshold = config_positive_int_value(conf.get( 'shard_container_threshold', DEFAULT_SHARD_CONTAINER_THRESHOLD)) if shard_container_threshold: rows_per_shard = shard_container_threshold // 2 shrink_threshold = int( shard_container_threshold * config_percent_value( conf.get('shard_shrink_point', DEFAULT_SHARD_SHRINK_POINT))) expansion_limit = int( shard_container_threshold * config_percent_value( conf.get('shard_shrink_merge_point', DEFAULT_SHARD_MERGE_POINT))) except Exception as exc: print('Error opening config file %s: %s' % (args.conf_file, exc), file=sys.stderr) return 2 # seems having sub parsers mean sometimes an arg wont exist in the args # namespace. But we can check if it is with the 'in' statement. if "max_shrinking" in args and args.max_shrinking is None: args.max_shrinking = int(conf.get( "max_shrinking", DEFAULT_MAX_SHRINKING)) if "max_expanding" in args and args.max_expanding is None: args.max_expanding = int(conf.get( "max_expanding", DEFAULT_MAX_EXPANDING)) if "shrink_threshold" in args and args.shrink_threshold is None: args.shrink_threshold = shrink_threshold if "expansion_limit" in args and args.expansion_limit is None: args.expansion_limit = expansion_limit if "rows_per_shard" in args and args.rows_per_shard is None: args.rows_per_shard = rows_per_shard if args.func in (analyze_shard_ranges,): args.input = args.path_to_file return args.func(args) or 0 logger = get_logger({}, name='ContainerBroker', log_to_console=True) broker = ContainerBroker(os.path.realpath(args.path_to_file), logger=logger, skip_commits=not args.force_commits) try: broker.get_info() except Exception as exc: print('Error opening container DB %s: %s' % (args.path_to_file, exc), file=sys.stderr) return 2 print('Loaded db broker for %s' % broker.path, file=sys.stderr) return args.func(broker, args)
def get_broker(self, account, container, part, node): db_hash = hash_path(account.encode('utf-8'), container.encode('utf-8')) db_dir = storage_directory(DATADIR, part, db_hash) db_path = os.path.join(self.root, node['device'], db_dir, db_hash + '.db') return ContainerBroker(db_path, account=account, container=container)
def _gen_container_stat(self, set_metadata=False): if set_metadata: self.conf['metadata_keys'] = 'test1,test2' # webob runs title on all headers stat = db_stats_collector.ContainerStatsCollector(self.conf) output_data = set() for i in range(10): cont_db = ContainerBroker("%s/container-stats-201001010%s-%s.db" % (self.containers, i, uuid.uuid4().hex), account='test_acc_%s' % i, container='test_con') cont_db.initialize(storage_policy_index=0) cont_db.put_object('test_obj', time.time(), 10, 'text', 'faketag') metadata_output = '' if set_metadata: if i % 2: cont_db.update_metadata({'X-Container-Meta-Test1': (5, 1)}) metadata_output = ',1,' else: cont_db.update_metadata({'X-Container-Meta-Test2': (7, 2)}) metadata_output = ',,1' # this will "commit" the data cont_db.get_info() output_data.add('''"test_acc_%s","test_con",1,10%s''' % (i, metadata_output)) self.assertEqual(len(output_data), 10) return stat, output_data
from swift.common.utils import Timestamp from swift.container.backend import ContainerBroker import time broker_path='/mnt/sdb1/1/node/sdb3/containers/155/28b/9bd49503214e557dec94935a26de428b/9bd49503214e557dec94935a26de428b.db' t = Timestamp(time.time()).internal item = dict(name='zzzzzzzz', created_at=t, size=0, content_type='application/octet-stream', etag='d41d8cd98f00b204e9800998ecf8427e', deleted=0, storage_policy_index=0, record_type=0) items = [item] broker = ContainerBroker(broker_path) broker.merge_items(items)
def container_sync(self, path): """ Checks the given path for a container database, determines if syncing is turned on for that database and, if so, sends any updates to the other container. :param path: the path to a container db """ broker = None try: broker = ContainerBroker(path) # The path we pass to the ContainerBroker is a real path of # a container DB. If we get here, however, it means that this # path is linked from the sync_containers dir. In rare cases # of race or processes failures the link can be stale and # the get_info below will raise a DB doesn't exist exception # In this case we remove the stale link and raise an error # since in most cases the db should be there. try: info = broker.get_info() except DatabaseConnectionError as db_err: if str(db_err).endswith("DB doesn't exist"): self.sync_store.remove_synced_container(broker) raise x, nodes = self.container_ring.get_nodes(info['account'], info['container']) for ordinal, node in enumerate(nodes): if is_local_device(self._myips, self._myport, node['ip'], node['port']): break else: return if not broker.is_deleted(): sync_to = None user_key = None sync_point1 = info['x_container_sync_point1'] sync_point2 = info['x_container_sync_point2'] for key, (value, timestamp) in broker.metadata.items(): if key.lower() == 'x-container-sync-to': sync_to = value elif key.lower() == 'x-container-sync-key': user_key = value if not sync_to or not user_key: self.container_skips += 1 self.logger.increment('skips') return err, sync_to, realm, realm_key = validate_sync_to( sync_to, self.allowed_sync_hosts, self.realms_conf) if err: self.logger.info( _('ERROR %(db_file)s: %(validate_sync_to_err)s'), {'db_file': str(broker), 'validate_sync_to_err': err}) self.container_failures += 1 self.logger.increment('failures') return stop_at = time() + self.container_time next_sync_point = None while time() < stop_at and sync_point2 < sync_point1: rows = broker.get_items_since(sync_point2, 1) if not rows: break row = rows[0] if row['ROWID'] > sync_point1: break key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only initially sync out one third of the # objects (if 3 replicas, 1/4 if 4, etc.) and will skip # problematic rows as needed in case of faults. # This section will attempt to sync previously skipped # rows in case the previous attempts by any of the nodes # didn't succeed. if not self.container_sync_row( row, sync_to, user_key, broker, info, realm, realm_key): if not next_sync_point: next_sync_point = sync_point2 sync_point2 = row['ROWID'] broker.set_x_container_sync_points(None, sync_point2) if next_sync_point: broker.set_x_container_sync_points(None, next_sync_point) while time() < stop_at: rows = broker.get_items_since(sync_point1, 1) if not rows: break row = rows[0] key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only initially sync out one third of the # objects (if 3 replicas, 1/4 if 4, etc.). It'll come back # around to the section above and attempt to sync # previously skipped rows in case the other nodes didn't # succeed or in case it failed to do so the first time. if unpack_from('>I', key)[0] % \ len(nodes) == ordinal: self.container_sync_row( row, sync_to, user_key, broker, info, realm, realm_key) sync_point1 = row['ROWID'] broker.set_x_container_sync_points(sync_point1, None) self.container_syncs += 1 self.logger.increment('syncs') except (Exception, Timeout): self.container_failures += 1 self.logger.increment('failures') self.logger.exception(_('ERROR Syncing %s'), broker if broker else path)
def _gen_container_stat(self, set_metadata=False, drop_metadata=False): if set_metadata: self.conf['metadata_keys'] = 'test1,test2' # webob runs title on all headers stat = db_stats_collector.ContainerStatsCollector(self.conf) output_data = set() for i in range(10): cont_db = ContainerBroker( "%s/container-stats-201001010%s-%s.db" % (self.containers, i, uuid.uuid4().hex), account='test_acc_%s' % i, container='test_con') cont_db.initialize() cont_db.put_object('test_obj', time.time(), 10, 'text', 'faketag') metadata_output = '' if set_metadata: if i % 2: cont_db.update_metadata({'X-Container-Meta-Test1': (5, 1)}) metadata_output = ',1,' else: cont_db.update_metadata({'X-Container-Meta-Test2': (7, 2)}) metadata_output = ',,1' # this will "commit" the data cont_db.get_info() if drop_metadata: output_data.add('''"test_acc_%s","test_con",1,10,,''' % i) else: output_data.add('''"test_acc_%s","test_con",1,10%s''' % (i, metadata_output)) if drop_metadata: self._drop_metadata_col(cont_db, 'test_acc_%s' % i) self.assertEqual(len(output_data), 10) return stat, output_data
def test_find_shard_ranges(self): db_file = os.path.join(self.testdir, 'hash.db') broker = ContainerBroker(db_file) broker.account = 'a' broker.container = 'c' broker.initialize() ts = utils.Timestamp.now() broker.merge_items([{ 'name': 'obj%02d' % i, 'created_at': ts.internal, 'size': 0, 'content_type': 'application/octet-stream', 'etag': 'not-really', 'deleted': 0, 'storage_policy_index': 0, 'ctype_timestamp': ts.internal, 'meta_timestamp': ts.internal } for i in range(100)]) # Default uses a large enough value that sharding isn't required out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): main([db_file, 'find']) self.assert_formatted_json(out.getvalue(), []) err_lines = err.getvalue().split('\n') self.assert_starts_with(err_lines[0], 'Loaded db broker for ') self.assert_starts_with(err_lines[1], 'Found 0 ranges in ') out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): main([db_file, 'find', '100']) self.assert_formatted_json(out.getvalue(), []) err_lines = err.getvalue().split('\n') self.assert_starts_with(err_lines[0], 'Loaded db broker for ') self.assert_starts_with(err_lines[1], 'Found 0 ranges in ') out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): main([db_file, 'find', '99']) self.assert_formatted_json(out.getvalue(), [ { 'index': 0, 'lower': '', 'upper': 'obj98', 'object_count': 99 }, { 'index': 1, 'lower': 'obj98', 'upper': '', 'object_count': 1 }, ]) err_lines = err.getvalue().split('\n') self.assert_starts_with(err_lines[0], 'Loaded db broker for ') self.assert_starts_with(err_lines[1], 'Found 2 ranges in ') out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): main([db_file, 'find', '10']) self.assert_formatted_json(out.getvalue(), [ { 'index': 0, 'lower': '', 'upper': 'obj09', 'object_count': 10 }, { 'index': 1, 'lower': 'obj09', 'upper': 'obj19', 'object_count': 10 }, { 'index': 2, 'lower': 'obj19', 'upper': 'obj29', 'object_count': 10 }, { 'index': 3, 'lower': 'obj29', 'upper': 'obj39', 'object_count': 10 }, { 'index': 4, 'lower': 'obj39', 'upper': 'obj49', 'object_count': 10 }, { 'index': 5, 'lower': 'obj49', 'upper': 'obj59', 'object_count': 10 }, { 'index': 6, 'lower': 'obj59', 'upper': 'obj69', 'object_count': 10 }, { 'index': 7, 'lower': 'obj69', 'upper': 'obj79', 'object_count': 10 }, { 'index': 8, 'lower': 'obj79', 'upper': 'obj89', 'object_count': 10 }, { 'index': 9, 'lower': 'obj89', 'upper': '', 'object_count': 10 }, ]) err_lines = err.getvalue().split('\n') self.assert_starts_with(err_lines[0], 'Loaded db broker for ') self.assert_starts_with(err_lines[1], 'Found 10 ranges in ')
def test_find_shard_ranges(self): db_file = os.path.join(self.testdir, 'hash.db') broker = ContainerBroker(db_file) broker.account = 'a' broker.container = 'c' broker.initialize() ts = utils.Timestamp.now() broker.merge_items([ {'name': 'obj%02d' % i, 'created_at': ts.internal, 'size': 0, 'content_type': 'application/octet-stream', 'etag': 'not-really', 'deleted': 0, 'storage_policy_index': 0, 'ctype_timestamp': ts.internal, 'meta_timestamp': ts.internal} for i in range(100)]) # Default uses a large enough value that sharding isn't required out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): main([db_file, 'find']) self.assert_formatted_json(out.getvalue(), []) err_lines = err.getvalue().split('\n') self.assert_starts_with(err_lines[0], 'Loaded db broker for ') self.assert_starts_with(err_lines[1], 'Found 0 ranges in ') out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): main([db_file, 'find', '100']) self.assert_formatted_json(out.getvalue(), []) err_lines = err.getvalue().split('\n') self.assert_starts_with(err_lines[0], 'Loaded db broker for ') self.assert_starts_with(err_lines[1], 'Found 0 ranges in ') out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): main([db_file, 'find', '99']) self.assert_formatted_json(out.getvalue(), [ {'index': 0, 'lower': '', 'upper': 'obj98', 'object_count': 99}, {'index': 1, 'lower': 'obj98', 'upper': '', 'object_count': 1}, ]) err_lines = err.getvalue().split('\n') self.assert_starts_with(err_lines[0], 'Loaded db broker for ') self.assert_starts_with(err_lines[1], 'Found 2 ranges in ') out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): main([db_file, 'find', '10']) self.assert_formatted_json(out.getvalue(), [ {'index': 0, 'lower': '', 'upper': 'obj09', 'object_count': 10}, {'index': 1, 'lower': 'obj09', 'upper': 'obj19', 'object_count': 10}, {'index': 2, 'lower': 'obj19', 'upper': 'obj29', 'object_count': 10}, {'index': 3, 'lower': 'obj29', 'upper': 'obj39', 'object_count': 10}, {'index': 4, 'lower': 'obj39', 'upper': 'obj49', 'object_count': 10}, {'index': 5, 'lower': 'obj49', 'upper': 'obj59', 'object_count': 10}, {'index': 6, 'lower': 'obj59', 'upper': 'obj69', 'object_count': 10}, {'index': 7, 'lower': 'obj69', 'upper': 'obj79', 'object_count': 10}, {'index': 8, 'lower': 'obj79', 'upper': 'obj89', 'object_count': 10}, {'index': 9, 'lower': 'obj89', 'upper': '', 'object_count': 10}, ]) err_lines = err.getvalue().split('\n') self.assert_starts_with(err_lines[0], 'Loaded db broker for ') self.assert_starts_with(err_lines[1], 'Found 10 ranges in ')
from swift.container.backend import ContainerBroker from swift.common.utils import Timestamp from time import time import os def show_result(is_deleted, is_empty): print 'deleted? %s\nempty? %s\n' % (is_deleted, is_empty) if __name__ == '__main__': db_file = ':memory:' broker = ContainerBroker(db_file=db_file, account='account', container='container') broker.initialize(Timestamp(time()).internal, 0) obj = 'object' print 'put object:' broker.put_object(obj, Timestamp(time()).internal, 10, '', '') info, is_deleted = broker.get_info_is_deleted() show_result(is_deleted, broker.empty()) print 'delete object:' broker.delete_object(obj, Timestamp(time()).internal) info, is_deleted = broker.get_info_is_deleted() show_result(is_deleted, broker.empty()) print 'delete db:'
from swift.container.backend import ContainerBroker broker = ContainerBroker('/srv/1/node/sdb3/containers/122/8cc/7a369ae647a78e' '80995744973934e8cc/7a369ae647a78e80995744973934e' '8cc_pivot.db') #broker = ContainerBroker('/srv/1/node/sdb3/containers/122/8cc/7a369ae647a78e' # '80995744973934e8cc/7a369ae647a78e80995744973934e' # '8cc.db') items = broker.list_objects_iter(10000, '', '', '', '') more_items = [] for item in items: i = list(item) i[0] = '%d%s' % (0, i[0]) i.append(0) more_items.append(i) dlist = [] for item in more_items: dlist.append(broker._record_to_dict(item)) broker.merge_items(dlist)
def container_sync(self, path): """ Checks the given path for a container database, determines if syncing is turned on for that database and, if so, sends any updates to the other container. :param path: the path to a container db """ broker = None try: broker = ContainerBroker(path, logger=self.logger) # The path we pass to the ContainerBroker is a real path of # a container DB. If we get here, however, it means that this # path is linked from the sync_containers dir. In rare cases # of race or processes failures the link can be stale and # the get_info below will raise a DB doesn't exist exception # In this case we remove the stale link and raise an error # since in most cases the db should be there. try: info = broker.get_info() except DatabaseConnectionError as db_err: if str(db_err).endswith("DB doesn't exist"): self.sync_store.remove_synced_container(broker) raise x, nodes = self.container_ring.get_nodes(info['account'], info['container']) for ordinal, node in enumerate(nodes): if is_local_device(self._myips, self._myport, node['ip'], node['port']): break else: return if not broker.is_deleted(): sync_to = None user_key = None sync_point1 = info['x_container_sync_point1'] sync_point2 = info['x_container_sync_point2'] for key, (value, timestamp) in broker.metadata.items(): if key.lower() == 'x-container-sync-to': sync_to = value elif key.lower() == 'x-container-sync-key': user_key = value if not sync_to or not user_key: self.container_skips += 1 self.logger.increment('skips') return err, sync_to, realm, realm_key = validate_sync_to( sync_to, self.allowed_sync_hosts, self.realms_conf) if err: self.logger.info( _('ERROR %(db_file)s: %(validate_sync_to_err)s'), { 'db_file': str(broker), 'validate_sync_to_err': err }) self.container_failures += 1 self.logger.increment('failures') return start_at = time() stop_at = start_at + self.container_time next_sync_point = None sync_stage_time = start_at try: while time() < stop_at and sync_point2 < sync_point1: rows = broker.get_items_since(sync_point2, 1) if not rows: break row = rows[0] if row['ROWID'] > sync_point1: break # This node will only initially sync out one third # of the objects (if 3 replicas, 1/4 if 4, etc.) # and will skip problematic rows as needed in case of # faults. # This section will attempt to sync previously skipped # rows in case the previous attempts by any of the # nodes didn't succeed. if not self.container_sync_row(row, sync_to, user_key, broker, info, realm, realm_key): if not next_sync_point: next_sync_point = sync_point2 sync_point2 = row['ROWID'] broker.set_x_container_sync_points(None, sync_point2) if next_sync_point: broker.set_x_container_sync_points( None, next_sync_point) else: next_sync_point = sync_point2 sync_stage_time = time() while sync_stage_time < stop_at: rows = broker.get_items_since(sync_point1, 1) if not rows: break row = rows[0] key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only initially sync out one third of # the objects (if 3 replicas, 1/4 if 4, etc.). # It'll come back around to the section above # and attempt to sync previously skipped rows in case # the other nodes didn't succeed or in case it failed # to do so the first time. if unpack_from('>I', key)[0] % \ len(nodes) == ordinal: self.container_sync_row(row, sync_to, user_key, broker, info, realm, realm_key) sync_point1 = row['ROWID'] broker.set_x_container_sync_points(sync_point1, None) sync_stage_time = time() self.container_syncs += 1 self.logger.increment('syncs') finally: self.container_report(start_at, sync_stage_time, sync_point1, next_sync_point, info, broker.get_max_row()) except (Exception, Timeout): self.container_failures += 1 self.logger.increment('failures') self.logger.exception(_('ERROR Syncing %s'), broker if broker else path)
def test_run_once(self): cu = container_updater.ContainerUpdater( { "devices": self.devices_dir, "mount_check": "false", "swift_dir": self.testdir, "interval": "1", "concurrency": "1", "node_timeout": "15", "account_suppression_time": 0, } ) cu.run_once() containers_dir = os.path.join(self.sda1, DATADIR) os.mkdir(containers_dir) cu.run_once() self.assertTrue(os.path.exists(containers_dir)) subdir = os.path.join(containers_dir, "subdir") os.mkdir(subdir) cb = ContainerBroker(os.path.join(subdir, "hash.db"), account="a", container="c") cb.initialize(normalize_timestamp(1), 0) cu.run_once() info = cb.get_info() self.assertEqual(info["object_count"], 0) self.assertEqual(info["bytes_used"], 0) self.assertEqual(info["reported_object_count"], 0) self.assertEqual(info["reported_bytes_used"], 0) cb.put_object("o", normalize_timestamp(2), 3, "text/plain", "68b329da9893e34099c7d8ad5cb9c940") cu.run_once() info = cb.get_info() self.assertEqual(info["object_count"], 1) self.assertEqual(info["bytes_used"], 3) self.assertEqual(info["reported_object_count"], 0) self.assertEqual(info["reported_bytes_used"], 0) def accept(sock, addr, return_code): try: with Timeout(3): inc = sock.makefile("rb") out = sock.makefile("wb") out.write("HTTP/1.1 %d OK\r\nContent-Length: 0\r\n\r\n" % return_code) out.flush() self.assertEqual(inc.readline(), "PUT /sda1/0/a/c HTTP/1.1\r\n") headers = {} line = inc.readline() while line and line != "\r\n": headers[line.split(":")[0].lower()] = line.split(":")[1].strip() line = inc.readline() self.assertTrue("x-put-timestamp" in headers) self.assertTrue("x-delete-timestamp" in headers) self.assertTrue("x-object-count" in headers) self.assertTrue("x-bytes-used" in headers) except BaseException as err: import traceback traceback.print_exc() return err return None bindsock = listen(("127.0.0.1", 0)) def spawn_accepts(): events = [] for _junk in range(2): sock, addr = bindsock.accept() events.append(spawn(accept, sock, addr, 201)) return events spawned = spawn(spawn_accepts) for dev in cu.get_account_ring().devs: if dev is not None: dev["port"] = bindsock.getsockname()[1] cu.run_once() for event in spawned.wait(): err = event.wait() if err: raise err info = cb.get_info() self.assertEqual(info["object_count"], 1) self.assertEqual(info["bytes_used"], 3) self.assertEqual(info["reported_object_count"], 1) self.assertEqual(info["reported_bytes_used"], 3)
def test_run_once(self): cu = container_updater.ContainerUpdater({ 'devices': self.devices_dir, 'mount_check': 'false', 'swift_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'node_timeout': '15', 'account_suppression_time': 0 }) cu.run_once() containers_dir = os.path.join(self.sda1, DATADIR) os.mkdir(containers_dir) cu.run_once() self.assertTrue(os.path.exists(containers_dir)) subdir = os.path.join(containers_dir, 'subdir') os.mkdir(subdir) cb = ContainerBroker(os.path.join(subdir, 'hash.db'), account='a', container='c') cb.initialize(normalize_timestamp(1), 0) cu.run_once() info = cb.get_info() self.assertEqual(info['object_count'], 0) self.assertEqual(info['bytes_used'], 0) self.assertEqual(info['reported_object_count'], 0) self.assertEqual(info['reported_bytes_used'], 0) cb.put_object('o', normalize_timestamp(2), 3, 'text/plain', '68b329da9893e34099c7d8ad5cb9c940') cu.run_once() info = cb.get_info() self.assertEqual(info['object_count'], 1) self.assertEqual(info['bytes_used'], 3) self.assertEqual(info['reported_object_count'], 0) self.assertEqual(info['reported_bytes_used'], 0) def accept(sock, addr, return_code): try: with Timeout(3): inc = sock.makefile('rb') out = sock.makefile('wb') out.write('HTTP/1.1 %d OK\r\nContent-Length: 0\r\n\r\n' % return_code) out.flush() self.assertEqual(inc.readline(), 'PUT /sda1/0/a/c HTTP/1.1\r\n') headers = {} line = inc.readline() while line and line != '\r\n': headers[line.split(':')[0].lower()] = \ line.split(':')[1].strip() line = inc.readline() self.assertTrue('x-put-timestamp' in headers) self.assertTrue('x-delete-timestamp' in headers) self.assertTrue('x-object-count' in headers) self.assertTrue('x-bytes-used' in headers) except BaseException as err: import traceback traceback.print_exc() return err return None bindsock = listen(('127.0.0.1', 0)) def spawn_accepts(): events = [] for _junk in range(2): sock, addr = bindsock.accept() events.append(spawn(accept, sock, addr, 201)) return events spawned = spawn(spawn_accepts) for dev in cu.get_account_ring().devs: if dev is not None: dev['port'] = bindsock.getsockname()[1] cu.run_once() for event in spawned.wait(): err = event.wait() if err: raise err info = cb.get_info() self.assertEqual(info['object_count'], 1) self.assertEqual(info['bytes_used'], 3) self.assertEqual(info['reported_object_count'], 1) self.assertEqual(info['reported_bytes_used'], 3)
from swift.common.utils import Timestamp from swift.container.backend import ContainerBroker import time broker_path = '/mnt/sdb1/1/node/sdb3/containers/155/28b/9bd49503214e557dec94935a26de428b/9bd49503214e557dec94935a26de428b.db' t = Timestamp(time.time()).internal item = dict(name='zzzzzzzz', created_at=t, size=0, content_type='application/octet-stream', etag='d41d8cd98f00b204e9800998ecf8427e', deleted=0, storage_policy_index=0, record_type=0) items = [item] broker = ContainerBroker(broker_path) broker.merge_items(items)
def test_run_once(self): cu = self._get_container_updater() cu.run_once() containers_dir = os.path.join(self.sda1, DATADIR) os.mkdir(containers_dir) cu.run_once() self.assertTrue(os.path.exists(containers_dir)) subdir = os.path.join(containers_dir, 'subdir') os.mkdir(subdir) cb = ContainerBroker(os.path.join(subdir, 'hash.db'), account='a', container='c') cb.initialize(normalize_timestamp(1), 0) cu.run_once() info = cb.get_info() self.assertEqual(info['object_count'], 0) self.assertEqual(info['bytes_used'], 0) self.assertEqual(info['reported_object_count'], 0) self.assertEqual(info['reported_bytes_used'], 0) cb.put_object('o', normalize_timestamp(2), 3, 'text/plain', '68b329da9893e34099c7d8ad5cb9c940') cu.run_once() info = cb.get_info() self.assertEqual(info['object_count'], 1) self.assertEqual(info['bytes_used'], 3) self.assertEqual(info['reported_object_count'], 0) self.assertEqual(info['reported_bytes_used'], 0) def accept(sock, addr, return_code): try: with Timeout(3): inc = sock.makefile('rb') out = sock.makefile('wb') out.write('HTTP/1.1 %d OK\r\nContent-Length: 0\r\n\r\n' % return_code) out.flush() self.assertEqual(inc.readline(), 'PUT /sda1/0/a/c HTTP/1.1\r\n') headers = {} line = inc.readline() while line and line != '\r\n': headers[line.split(':')[0].lower()] = \ line.split(':')[1].strip() line = inc.readline() self.assertTrue('x-put-timestamp' in headers) self.assertTrue('x-delete-timestamp' in headers) self.assertTrue('x-object-count' in headers) self.assertTrue('x-bytes-used' in headers) except BaseException as err: import traceback traceback.print_exc() return err return None bindsock = listen_zero() def spawn_accepts(): events = [] for _junk in range(2): sock, addr = bindsock.accept() events.append(spawn(accept, sock, addr, 201)) return events spawned = spawn(spawn_accepts) for dev in cu.get_account_ring().devs: if dev is not None: dev['port'] = bindsock.getsockname()[1] cu.run_once() for event in spawned.wait(): err = event.wait() if err: raise err info = cb.get_info() self.assertEqual(info['object_count'], 1) self.assertEqual(info['bytes_used'], 3) self.assertEqual(info['reported_object_count'], 1) self.assertEqual(info['reported_bytes_used'], 3)
def test_update_pending(self): # Create container container = 'contx' client.put_container(self.url, self.token, container) part, nodes = self.account_ring.get_nodes(self.account) anode = nodes[0] # Stop a quorum of account servers # This allows the put to continue later. kill_nonprimary_server(nodes, self.ipport2server) kill_server((anode['ip'], anode['port']), self.ipport2server) # Put object # This creates an outstanding update. client.put_object(self.url, self.token, container, 'object1', b'123') cont_db_files = self.get_container_db_files(container) self.assertEqual(len(cont_db_files), 3) # Collect the observable state from containers outstanding_files = [] for cfile in cont_db_files: broker = ContainerBroker(cfile) try: info = broker.get_info() except LockTimeout: self.fail('LockTimeout at %s' % (cfile, )) if Timestamp(info['put_timestamp']) <= 0: self.fail('No put_timestamp at %s' % (cfile, )) # Correct even if reported_put_timestamp is zero. if info['put_timestamp'] > info['reported_put_timestamp']: outstanding_files.append(cfile) self.assertGreater(len(outstanding_files), 0) # At this point the users shut everything down and screw up the # hash in swift.conf. But we destroy the account DB instead. files = self.get_account_db_files(self.account) for afile in files: os.unlink(afile) # Restart the stopped primary server start_server((anode['ip'], anode['port']), self.ipport2server) # Make sure updaters run Manager(['container-updater']).once() # Collect the observable state from containers again and examine it outstanding_files_new = [] for cfile in cont_db_files: # We aren't catching DatabaseConnectionError, because # we only want to approve of DBs that were quarantined, # and not otherwise damaged. So if the code below throws # an exception for other reason, we want the test to fail. if not os.path.exists(cfile): continue broker = ContainerBroker(cfile) try: info = broker.get_info() except LockTimeout: self.fail('LockTimeout at %s' % (cfile, )) if Timestamp(info['put_timestamp']) <= 0: self.fail('No put_timestamp at %s' % (cfile, )) # Correct even if reported_put_timestamp is zero. if info['put_timestamp'] > info['reported_put_timestamp']: outstanding_files_new.append(cfile) self.assertLengthEqual(outstanding_files_new, 0) self.get_to_final_state()