def __init__(self, namespace, logger=None, **kwargs): """ Initialize the object storage API. :param namespace: name of the namespace to interract with :type namespace: `str` :keyword connection_timeout: connection timeout towards rawx services :type connection_timeout: `float` seconds :keyword read_timeout: timeout for rawx responses and data reads from the caller (when uploading) :type read_timeout: `float` seconds :keyword write_timeout: timeout for rawx write requests :type write_timeout: `float` seconds :keyword pool_manager: a pooled connection manager that will be used for all HTTP based APIs (except rawx) :type pool_manager: `urllib3.PoolManager` """ self.namespace = namespace conf = {"namespace": self.namespace} self.logger = logger or get_logger(conf) self.timeouts = {tok: float_value(tov, None) for tok, tov in kwargs.items() if tok in self.__class__.TIMEOUT_KEYS} from oio.account.client import AccountClient from oio.container.client import ContainerClient from oio.directory.client import DirectoryClient self.directory = DirectoryClient(conf, logger=self.logger, **kwargs) self.container = ContainerClient(conf, logger=self.logger, **kwargs) # In AccountClient, "endpoint" is the account service, not the proxy acct_kwargs = kwargs.copy() acct_kwargs["proxy_endpoint"] = acct_kwargs.pop("endpoint", None) self.account = AccountClient(conf, logger=self.logger, **acct_kwargs)
def __init__(self, app, conf, **kwargs): self.logger = get_logger(conf) super(AccountUpdateFilter, self).__init__(app, conf, logger=self.logger, **kwargs) self.account = AccountClient(conf, logger=self.logger)
def __init__(self, namespace, concurrency=50, error_file=None): self.pool = GreenPool(concurrency) self.error_file = error_file if self.error_file: f = open(self.error_file, 'a') self.error_writer = csv.writer(f, delimiter=' ') conf = {'namespace': namespace} self.account_client = AccountClient(conf) self.container_client = ContainerClient(conf) self.blob_client = BlobClient() self.accounts_checked = 0 self.containers_checked = 0 self.objects_checked = 0 self.chunks_checked = 0 self.account_not_found = 0 self.container_not_found = 0 self.object_not_found = 0 self.chunk_not_found = 0 self.account_exceptions = 0 self.container_exceptions = 0 self.object_exceptions = 0 self.chunk_exceptions = 0 self.list_cache = {} self.running = {}
def __init__(self, namespace, **kwargs): """ Initialize the object storage API. :param namespace: name of the namespace to interract with :type namespace: `str` :keyword connection_timeout: connection timeout towards rawx services :type connection_timeout: `float` seconds :keyword read_timeout: timeout for rawx responses and data reads from the caller (when uploading) :type read_timeout: `float` seconds :keyword write_timeout: timeout for rawx write requests :type write_timeout: `float` seconds """ self.namespace = namespace self.connection_timeout = utils.float_value( kwargs.get("connection_timeout"), None) self.read_timeout = utils.float_value(kwargs.get("read_timeout"), None) self.write_timeout = utils.float_value(kwargs.get("write_timeout"), None) # FIXME: share session between all the clients self.directory = DirectoryClient({"namespace": self.namespace}, **kwargs) self.account = AccountClient({"namespace": self.namespace}, **kwargs) self.container = ContainerClient({"namespace": self.namespace}, **kwargs)
def __init__(self, conf, accounts=None, **kwargs): super(AccountRebuilder, self).__init__(conf, **kwargs) self._accounts_to_refresh = set() self._accounts_refreshed = eventlet.Queue() # input self.accounts = accounts self.account_client = AccountClient(self.conf, logger=self.logger)
def _build_account_client(self, **kwargs): endpoint = "http://1.2.3.4:8000" resp = FakeApiResponse() body = {"listing": [['ct', 0, 0, 0]]} client = AccountClient({'namespace': 'fake'}, endpoint=endpoint, proxy_endpoint=endpoint, **kwargs) client._direct_request = Mock(return_value=(resp, body)) client._get_account_addr = Mock(return_value=endpoint) return client
def test_create_without_account(self): account = random_str(32) name = random_str(32) account_client = AccountClient(self.conf) self.assertRaises(exc.NotFound, account_client.account_show, account) self.api.create(account, name) time.sleep(0.5) # ensure account event have been processed self.assertEqual(account_client.account_show(account)['id'], account) # clean self.api.delete(account, name) account_client.account_delete(account)
def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) self.account_client.account_create(self.account_id) self.container_client.container_create(acct=self.account_id, ref="container1") self.container_client.container_create(acct=self.account_id, ref="container2") time.sleep(.5) # ensure container event have been processed
def __init__(self, namespace, concurrency=50, error_file=None, rebuild_file=None, full=True, limit_listings=0, request_attempts=1): self.pool = GreenPool(concurrency) self.error_file = error_file self.full = bool(full) # Optimisation for when we are only checking one object # or one container. # 0 -> do not limit # 1 -> limit account listings (list of containers) # 2 -> limit container listings (list of objects) self.limit_listings = limit_listings if self.error_file: f = open(self.error_file, 'a') self.error_writer = csv.writer(f, delimiter=' ') self.rebuild_file = rebuild_file if self.rebuild_file: fd = open(self.rebuild_file, 'a') self.rebuild_writer = csv.writer(fd, delimiter='|') conf = {'namespace': namespace} self.account_client = AccountClient(conf, max_retries=request_attempts - 1) self.container_client = ContainerClient( conf, max_retries=request_attempts - 1, request_attempts=request_attempts) self.blob_client = BlobClient(conf=conf) self.accounts_checked = 0 self.containers_checked = 0 self.objects_checked = 0 self.chunks_checked = 0 self.account_not_found = 0 self.container_not_found = 0 self.object_not_found = 0 self.chunk_not_found = 0 self.account_exceptions = 0 self.container_exceptions = 0 self.object_exceptions = 0 self.chunk_exceptions = 0 self.list_cache = {} self.running = {}
def __init__(self, namespace, **kwargs): ep_parts = ["http:/", load_namespace_conf(namespace).get('proxy'), "v3.0", namespace, "content"] super(CheckMeta2, self).__init__(namespace, "meta2", endpoint="/".join(ep_parts), **kwargs) self.account = AccountClient({"namespace": self.ns}) self.container = ContainerClient({"namespace": self.ns}) self.directory = DirectoryClient({"namespace": self.ns}) self.reference = random_buffer('0123456789ABCDEF', 64)
class AccountUpdateFilter(Filter): def __init__(self, app, conf, **kwargs): self.logger = get_logger(conf) super(AccountUpdateFilter, self).__init__(app, conf, logger=self.logger, **kwargs) self.account = AccountClient(conf, logger=self.logger) def process(self, env, cb): event = Event(env) if event.event_type in CONTAINER_EVENTS: mtime = event.when / 1000000.0 # convert to seconds data = event.data url = event.env.get('url') body = dict() if event.event_type == EventTypes.CONTAINER_STATE: body['bytes'] = data.get('bytes-count', 0) body['objects'] = data.get('object-count', 0) body['mtime'] = mtime elif event.event_type == EventTypes.CONTAINER_DELETED: body['dtime'] = mtime elif event.event_type == EventTypes.CONTAINER_NEW: body['mtime'] = mtime try: self.account.container_update(url.get('account'), url.get('user'), body, read_timeout=ACCOUNT_TIMEOUT) except OioTimeout as exc: msg = 'account update failure: %s' % str(exc) resp = EventError(event=Event(env), body=msg) return resp(env, cb) except ClientException as exc: if (exc.http_status == 409 and "No update needed" in exc.message): self.logger.info("Discarding event %s (%s): %s", event.job_id, event.event_type, exc.message) else: msg = 'account update failure: %s' % str(exc) resp = EventError(event=Event(env), body=msg) return resp(env, cb) elif event.event_type == EventTypes.ACCOUNT_SERVICES: url = event.env.get('url') self.account.account_create(url.get('account'), read_timeout=ACCOUNT_TIMEOUT) return self.app(env, cb)
def init(self): self.concurrency = int_value(self.conf.get('concurrency'), 10) self.tube = self.conf.get("tube", DEFAULT_TUBE) acct_refresh_interval = int_value( self.conf.get('acct_refresh_interval'), 3600) self.app_env['account_client'] = AccountClient( self.conf, logger=self.logger, refresh_delay=acct_refresh_interval, pool_connections=3, # 1 account, 1 proxy, 1 extra ) self.app_env['rdir_client'] = RdirClient( self.conf, logger=self.logger, pool_maxsize=self.concurrency, # 1 cnx per greenthread per host ) if 'handlers_conf' not in self.conf: raise ValueError("'handlers_conf' path not defined in conf") self.handlers = loadhandlers(self.conf.get('handlers_conf'), global_conf=self.conf, app=self) for opt in ('acct_update', 'rdir_update', 'retries_per_second', 'batch_size'): if opt in self.conf: self.logger.warn('Deprecated option: %s', opt) super(EventWorker, self).init()
def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) self.account_client.account_create(self.account_id) self.container_client.container_create(acct=self.account_id, ref="container1") self.container_client.container_create(acct=self.account_id, ref="container2") time.sleep(0.5) # ensure container event have been processed
class TestAccountClient(BaseTestCase): def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) self.account_client.account_create(self.account_id) self.container_client.container_create(acct=self.account_id, ref="container1") self.container_client.container_create(acct=self.account_id, ref="container2") time.sleep(0.5) # ensure container event have been processed def test_containers_list(self): resp = self.account_client.containers_list(self.account_id) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0], ["container2", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, marker="container1", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container2", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, marker="container2", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [])
def __init__(self, conf, logger): self.conf = conf self.logger = logger self.account = conf[CONF_ACCOUNT] self.container_client = ContainerClient(self.conf) self.account_client = AccountClient(self.conf) self.content_factory = ContentFactory(self.conf) self.passes = 0 self.errors = 0 self.last_reported = 0 self.contents_run_time = 0 self.total_contents_processed = 0 self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_contents_per_second = int_value( conf.get('contents_per_second'), 30) self.container_fetch_limit = int_value( conf.get('container_fetch_limit'), 100) self.content_fetch_limit = int_value(conf.get('content_fetch_limit'), 100) self.outdated_threshold = int_value(conf.get(CONF_OUTDATED_THRESHOLD), 9999999999) self.new_policy = conf.get(CONF_NEW_POLICY)
def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) retry = 3 for i in range(retry + 1): try: self.account_client.account_create(self.account_id) break except ClientException: if i < retry: time.sleep(2) else: raise self.container_client.container_create(account=self.account_id, reference="container1") self.container_client.container_create(account=self.account_id, reference="container2") time.sleep(.5) # ensure container event have been processed
class AccountUpdateFilter(Filter): def init(self): self.account = AccountClient(self.conf, logger=self.logger) def process(self, env, cb): event = Event(env) if event.event_type in CONTAINER_EVENTS: mtime = event.when / 1000000.0 # convert to seconds data = event.data url = event.env.get('url') body = dict() if event.event_type == EventTypes.CONTAINER_STATE: body['bytes'] = data.get('bytes-count', 0) body['objects'] = data.get('object-count', 0) body['mtime'] = mtime elif event.event_type == EventTypes.CONTAINER_NEW: body['mtime'] = mtime try: self.account.container_update(url.get('account'), url.get('user'), body, read_timeout=ACCOUNT_TIMEOUT) except OioTimeout as exc: msg = 'account update failure: %s' % str(exc) resp = EventError(event=Event(env), body=msg) return resp(env, cb) except ClientException as exc: if (exc.http_status == 409 and "No update needed" in exc.message): self.logger.info("Discarding event %s (%s): %s", event.job_id, event.event_type, exc.message) else: msg = 'account update failure: %s' % str(exc) resp = EventError(event=Event(env), body=msg) return resp(env, cb) elif event.event_type == EventTypes.ACCOUNT_SERVICES: url = event.env.get('url') if isinstance(event.data, list): # Legacy format: list of services new_services = event.data else: # New format: dictionary with new and deleted services new_services = event.data.get('services') or list() m2_services = [x for x in new_services if x.get('type') == 'meta2'] if not m2_services: # No service in charge, container has been deleted self.account.container_update( url.get('account'), url.get('user'), {'dtime': event.when / 1000000.0}, read_timeout=ACCOUNT_TIMEOUT) else: self.account.account_create(url.get('account'), read_timeout=ACCOUNT_TIMEOUT) return self.app(env, cb)
class TestAccountClient(BaseTestCase): def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) retry = 3 for i in range(retry+1): try: self.account_client.account_create(self.account_id) break except ClientException: if i < retry: time.sleep(2) else: raise self.container_client.container_create(acct=self.account_id, ref="container1") self.container_client.container_create(acct=self.account_id, ref="container2") time.sleep(.5) # ensure container event have been processed def test_containers_list(self): resp = self.account_client.containers_list(self.account_id) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [ ["container1", 0, 0, 0], ["container2", 0, 0, 0] ]) resp = self.account_client.containers_list(self.account_id, limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [ ["container1", 0, 0, 0] ]) resp = self.account_client.containers_list(self.account_id, marker="container1", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [ ["container2", 0, 0, 0] ]) resp = self.account_client.containers_list(self.account_id, marker="container2", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [])
def main(myid, queue, concurrency, delay=5.0, duration=DURATION): counter = 0 created = list() results = LightQueue(concurrency * 10) pool = GreenPool(concurrency) api = AccountClient({'namespace': NS}, pool_maxsize=concurrency+1) now = start = checkpoint = time.time() pool.starmap(create_loop, [(api, 'buck-%d-%d' % (myid, n), results) for n in range(concurrency)]) while now - start < duration: try: res = results.get(timeout=delay) created.append(res) counter += 1 except Empty: pass if now - checkpoint > delay: print("Proc %d: %d updates in %fs, %f updates per second." % ( myid, counter, now - checkpoint, counter / (now - checkpoint))) counter = 0 checkpoint = now now = time.time() for coro in pool.coroutines_running: coro.kill() while not results.empty(): created.append(results.get(block=False)) end = time.time() rate = len(created) / (end - start) print("Proc %d: end. %d updates in %fs, %f updates per second." % ( myid, len(created), end - start, rate)) time.sleep(2) print("Proc %d: cleaning..." % myid) del_req = {'dtime': time.time()} # Do not delete twice (or an exception is raised) uniq_ct = set(created) for _ in pool.starmap(api.container_update, [(ACCOUNT, n, del_req) for n in uniq_ct]): pass pool.waitall() queue.put(rate) return 0
def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) retry = 3 for i in range(retry+1): try: self.account_client.account_create(self.account_id) break except ClientException: if i < retry: time.sleep(2) else: raise self.container_client.container_create(acct=self.account_id, ref="container1") self.container_client.container_create(acct=self.account_id, ref="container2") time.sleep(.5) # ensure container event have been processed
class TestAccountClient(BaseTestCase): def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) retry = 3 for i in range(retry + 1): try: self.account_client.account_create(self.account_id) break except ClientException: if i < retry: time.sleep(2) else: raise self.container_client.container_create(account=self.account_id, reference="container1") self.container_client.container_create(account=self.account_id, reference="container2") time.sleep(.5) # ensure container event have been processed def test_container_list(self): resp = self.account_client.container_list(self.account_id) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0], ["container2", 0, 0, 0]]) resp = self.account_client.container_list(self.account_id, limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0]]) resp = self.account_client.container_list(self.account_id, marker="container1", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container2", 0, 0, 0]]) resp = self.account_client.container_list(self.account_id, marker="container2", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [])
def __init__(self, conf, logger): self.conf = conf self.logger = logger self.account = conf[CONF_ACCOUNT] self.container_client = ContainerClient(self.conf) self.account_client = AccountClient(self.conf) self.content_factory = ContentFactory(self.conf) self.passes = 0 self.errors = 0 self.last_reported = 0 self.contents_run_time = 0 self.total_contents_processed = 0 self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_contents_per_second = int_value( conf.get('contents_per_second'), 30) self.container_fetch_limit = int_value( conf.get('container_fetch_limit'), 100) self.content_fetch_limit = int_value( conf.get('content_fetch_limit'), 100) self.outdated_threshold = int_value( conf.get(CONF_OUTDATED_THRESHOLD), 9999999999) self.new_policy = conf.get(CONF_NEW_POLICY)
class TestAccountClient(BaseTestCase): def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) self.account_client.account_create(self.account_id) self.container_client.container_create(acct=self.account_id, ref="container1") self.container_client.container_create(acct=self.account_id, ref="container2") time.sleep(.5) # ensure container event have been processed def test_containers_list(self): resp = self.account_client.containers_list(self.account_id) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0], ["container2", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, marker="container1", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container2", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, marker="container2", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [])
class ObjectStorageApi(object): """ The Object Storage API. High level API that wraps `AccountClient`, `ContainerClient` and `DirectoryClient` classes. Every method that takes a `kwargs` argument accepts the at least the following keywords: - `headers`: `dict` of extra headers to pass to the proxy - `connection_timeout`: `float` - `read_timeout`: `float` - `write_timeout`: `float` """ TIMEOUT_KEYS = ('connection_timeout', 'read_timeout', 'write_timeout') def __init__(self, namespace, logger=None, **kwargs): """ Initialize the object storage API. :param namespace: name of the namespace to interract with :type namespace: `str` :keyword connection_timeout: connection timeout towards rawx services :type connection_timeout: `float` seconds :keyword read_timeout: timeout for rawx responses and data reads from the caller (when uploading) :type read_timeout: `float` seconds :keyword write_timeout: timeout for rawx write requests :type write_timeout: `float` seconds :keyword pool_manager: a pooled connection manager that will be used for all HTTP based APIs (except rawx) :type pool_manager: `urllib3.PoolManager` """ self.namespace = namespace conf = {"namespace": self.namespace} self.logger = logger or get_logger(conf) self.timeouts = {tok: float_value(tov, None) for tok, tov in kwargs.items() if tok in self.__class__.TIMEOUT_KEYS} from oio.account.client import AccountClient from oio.container.client import ContainerClient from oio.directory.client import DirectoryClient self.directory = DirectoryClient(conf, logger=self.logger, **kwargs) self.container = ContainerClient(conf, logger=self.logger, **kwargs) # In AccountClient, "endpoint" is the account service, not the proxy acct_kwargs = kwargs.copy() acct_kwargs["proxy_endpoint"] = acct_kwargs.pop("endpoint", None) self.account = AccountClient(conf, logger=self.logger, **acct_kwargs) def _patch_timeouts(self, kwargs): """ Insert timeout settings from this class's constructor into `kwargs`, if they are not already there. """ for tok, tov in self.timeouts.items(): if tok not in kwargs: kwargs[tok] = tov def account_create(self, account, **kwargs): """ Create an account. :param account: name of the account to create :type account: `str` :returns: `True` if the account has been created """ return self.account.account_create(account, **kwargs) @handle_account_not_found def account_delete(self, account, **kwargs): """ Delete an account. :param account: name of the account to delete :type account: `str` """ self.account.account_delete(account, **kwargs) @handle_account_not_found def account_show(self, account, **kwargs): """ Get information about an account. """ return self.account.account_show(account, **kwargs) def account_list(self, **kwargs): """ List known accounts. Notice that account creation is asynchronous, and an autocreated account may appear in the listing only after several seconds. """ return self.account.account_list(**kwargs) @handle_account_not_found def account_update(self, account, metadata, to_delete=None, **kwargs): warnings.warn("You'd better use account_set_properties()", DeprecationWarning, stacklevel=2) self.account.account_update(account, metadata, to_delete, **kwargs) @handle_account_not_found def account_set_properties(self, account, properties, **kwargs): self.account.account_update(account, properties, None, **kwargs) @handle_account_not_found def account_del_properties(self, account, properties, **kwargs): self.account.account_update(account, None, properties, **kwargs) def container_create(self, account, container, properties=None, **kwargs): """ Create a container. :param account: account in which to create the container :type account: `str` :param container: name of the container :type container: `str` :param properties: properties to set on the container :type properties: `dict` :returns: True if the container has been created, False if it already exists """ return self.container.container_create(account, container, properties=properties, **kwargs) @handle_container_not_found @ensure_headers @ensure_request_id def container_touch(self, account, container, **kwargs): """ Trigger a notification about the container state. :param account: account from which to delete the container :type account: `str` :param container: name of the container :type container: `str` """ self.container.container_touch(account, container, **kwargs) def container_create_many(self, account, containers, properties=None, **kwargs): """ Create Many containers :param account: account in which to create the containers :type account: `str` :param containers: names of the containers :type containers: `list` :param properties: properties to set on the containers :type properties: `dict` """ return self.container.container_create_many(account, containers, properties=properties, **kwargs) @handle_container_not_found def container_delete(self, account, container, **kwargs): """ Delete a container. :param account: account from which to delete the container :type account: `str` :param container: name of the container :type container: `str` """ self.container.container_delete(account, container, **kwargs) @handle_account_not_found def container_list(self, account, limit=None, marker=None, end_marker=None, prefix=None, delimiter=None, **kwargs): """ Get the list of containers of an account. :param account: account from which to get the container list :type account: `str` :keyword limit: maximum number of results to return :type limit: `int` :keyword marker: name of the container from where to start the listing :type marker: `str` :keyword end_marker: :keyword prefix: :keyword delimiter: :return: the list of containers of an account :rtype: `list` of items (`list`) with 4 fields: name, number of objects, number of bytes, and 1 if the item is a prefix or 0 if the item is actually a container """ resp = self.account.container_list(account, limit=limit, marker=marker, end_marker=end_marker, prefix=prefix, delimiter=delimiter, **kwargs) return resp["listing"] @handle_container_not_found def container_show(self, account, container, **kwargs): """ Get information about a container (user properties). :param account: account in which the container is :type account: `str` :param container: name of the container :type container: `str` :returns: a `dict` with "properties" containing a `dict` of user properties. """ return self.container.container_show(account, container, **kwargs) @handle_container_not_found def container_snapshot(self, account, container, dst_account, dst_container, batch=100, **kwargs): """ Create a copy of the container (only the content of the database) :param account: account in which the target is :type account: `str` :param container: name of the target :type container: `str` :param dst_account: account in which the snapshot will be. :type dst_account: `str` :param dst_container: name of the snapshot :type dst_container: `str` """ try: self.container.container_freeze(account, container) self.container.container_snapshot( account, container, dst_account, dst_container) resp = self.object_list(dst_account, dst_container) obj_gen = resp['objects'] target_beans = [] copy_beans = [] for obj in obj_gen: data = self.object_locate( account, container, obj["name"]) chunks = [chunk['url'] for chunk in data[1]] copies = self._generate_copy(chunks) fullpath = self._generate_fullpath( dst_account, dst_container, obj['name'], obj['version']) self._send_copy(chunks, copies, fullpath[0]) t_beans, c_beans = self._prepare_update_meta2( data[1], copies, dst_account, dst_container, obj['content']) target_beans.extend(t_beans) copy_beans.extend(c_beans) if len(target_beans) > batch: self.container.container_raw_update( target_beans, copy_beans, dst_account, dst_container, frozen=True) target_beans = [] copy_beans = [] if target_beans: self.container.container_raw_update( target_beans, copy_beans, dst_account, dst_container, frozen=True) finally: self.container.container_enable(account, container) @handle_container_not_found def container_get_properties(self, account, container, properties=None, **kwargs): """ Get information about a container (user and system properties). :param account: account in which the container is :type account: `str` :param container: name of the container :type container: `str` :param properties: *ignored* :returns: a `dict` with "properties" and "system" entries, containing respectively a `dict` of user properties and a `dict` of system properties. """ return self.container.container_get_properties(account, container, properties=properties, **kwargs) @handle_container_not_found def container_set_properties(self, account, container, properties=None, clear=False, **kwargs): """ Set properties on a container. :param account: name of the account :type account: `str` :param container: name of the container where to set properties :type container: `str` :param properties: a dictionary of properties :type properties: `dict` :param clear: :type clear: `bool` :keyword system: dictionary of system properties to set """ return self.container.container_set_properties( account, container, properties, clear=clear, **kwargs) @handle_container_not_found def container_del_properties(self, account, container, properties, **kwargs): """ Delete properties of a container. :param account: name of the account :type account: `str` :param container: name of the container to deal with :type container: `str` :param properties: a list of property keys :type properties: `list` """ return self.container.container_del_properties( account, container, properties, **kwargs) def container_update(self, account, container, metadata, clear=False, **kwargs): warnings.warn("You'd better use container_set_properties()", DeprecationWarning) if not metadata: self.container_del_properties( account, container, [], **kwargs) else: self.container_set_properties( account, container, metadata, clear, **kwargs) @handle_container_not_found @ensure_headers @ensure_request_id def object_create(self, account, container, file_or_path=None, data=None, etag=None, obj_name=None, mime_type=None, metadata=None, policy=None, key_file=None, append=False, properties=None, **kwargs): """ Create an object or append data to object in *container* of *account* with data taken from either *data* (`str` or `generator`) or *file_or_path* (path to a file or file-like object). The object will be named after *obj_name* if specified, or after the base name of *file_or_path*. :param account: name of the account where to create the object :type account: `str` :param container: name of the container where to create the object :type container: `str` :param file_or_path: file-like object or path to a file from which to read object data :type file_or_path: `str` or file-like object :param data: object data (if `file_or_path` is not set) :type data: `str` or `generator` :keyword etag: entity tag of the object :type etag: `str` :keyword obj_name: name of the object to create. If not set, will use the base name of `file_or_path`. :keyword mime_type: MIME type of the object :type mime_type: `str` :keyword properties: a dictionary of properties :type properties: `dict` :keyword policy: name of the storage policy :type policy: `str` :keyword key_file: :param append: if set, data will be append to existing object (or object will be created if unset) :type append: `bool` :returns: `list` of chunks, size and hash of the what has been uploaded """ if (data, file_or_path) == (None, None): raise exc.MissingData() src = data if data is not None else file_or_path if src is file_or_path: if isinstance(file_or_path, basestring): if not os.path.exists(file_or_path): raise exc.FileNotFound("File '%s' not found." % file_or_path) file_name = os.path.basename(file_or_path) else: try: file_name = os.path.basename(file_or_path.name) except AttributeError: file_name = None obj_name = obj_name or file_name elif isgenerator(src): file_or_path = GeneratorIO(src) src = file_or_path if not obj_name: raise exc.MissingName( "No name for the object has been specified" ) sysmeta = {'mime_type': mime_type, 'etag': etag} if metadata: warnings.warn( "You'd better use 'properties' instead of 'metadata'", DeprecationWarning, stacklevel=4) if not properties: properties = metadata else: properties.update(metadata) if src is data: return self._object_create( account, container, obj_name, BytesIO(data), sysmeta, properties=properties, policy=policy, key_file=key_file, append=append, **kwargs) elif hasattr(file_or_path, "read"): return self._object_create( account, container, obj_name, src, sysmeta, properties=properties, policy=policy, key_file=key_file, append=append, **kwargs) else: with open(file_or_path, "rb") as f: return self._object_create( account, container, obj_name, f, sysmeta, properties=properties, policy=policy, key_file=key_file, append=append, **kwargs) @ensure_headers @ensure_request_id def object_touch(self, account, container, obj, version=None, **kwargs): """ Trigger a notification about an object (as if it just had been created). :param account: name of the account where to create the object :type account: `str` :param container: name of the container where to create the object :type container: `str` :param obj: name of the object to touch """ self.container.content_touch(account, container, obj, version=version, **kwargs) def object_drain(self, account, container, obj, version=None, **kwargs): """ Remove all the chunks of a content, but keep all the metadata. :param account: name of the account where the object is present :type account: `str` :param container: name of the container where the object is present :type container: `str` :param obj: name of the object to drain """ self.container.content_drain(account, container, obj, version=version, **kwargs) @handle_object_not_found @ensure_headers @ensure_request_id def object_delete(self, account, container, obj, version=None, **kwargs): """ Delete an object from a container. If versioning is enabled and no version is specified, the object will be marked as deleted but not actually deleted. :param account: name of the account the object belongs to :type account: `str` :param container: name of the container the object belongs to :type container: `str` :param obj: name of the object to delete :param version: version of the object to delete :returns: True on success """ return self.container.content_delete(account, container, obj, version=version, **kwargs) @ensure_headers @ensure_request_id def object_delete_many(self, account, container, objs, **kwargs): return self.container.content_delete_many( account, container, objs, **kwargs) @handle_object_not_found @ensure_headers @ensure_request_id def object_truncate(self, account, container, obj, version=None, size=None, **kwargs): """ Truncate object at specified size. Only shrink is supported. A download may occur if size is not on chunk boundaries. :param account: name of the account in which the object is stored :param container: name of the container in which the object is stored :param obj: name of the object to query :param version: version of the object to query :param size: new size of object """ # code copied from object_fetch (should be factorized !) meta, raw_chunks = self.object_locate( account, container, obj, version=version, **kwargs) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) chunks = _sort_chunks(raw_chunks, storage_method.ec) for pos in sorted(chunks.keys()): chunk = chunks[pos][0] if (size >= chunk['offset'] and size <= chunk['offset'] + chunk['size']): break else: raise exc.OioException("No chunk found at position %d" % size) if chunk['offset'] != size: # retrieve partial chunk ret = self.object_fetch(account, container, obj, version=version, ranges=[(chunk['offset'], size-1)]) # TODO implement a proper object_update pos = int(chunk['pos'].split('.')[0]) self.object_create(account, container, obj_name=obj, data=ret[1], meta_pos=pos, content_id=meta['id']) return self.container.content_truncate(account, container, obj, version=version, size=size, **kwargs) @handle_container_not_found def object_list(self, account, container, limit=None, marker=None, delimiter=None, prefix=None, end_marker=None, properties=False, versions=False, deleted=False, **kwargs): """ Lists objects inside a container. :param properties: if True, list object properties along with objects :param versions: if True, list all versions of objects :param deleted: if True, list also the deleted objects :returns: a dict which contains * 'objects': the list of objects * 'prefixes': common prefixes (only if delimiter and prefix are set) * 'properties': a dict of container properties * 'system': a dict of system metadata """ _, resp_body = self.container.content_list( account, container, limit=limit, marker=marker, end_marker=end_marker, prefix=prefix, delimiter=delimiter, properties=properties, versions=versions, deleted=deleted, **kwargs) for obj in resp_body['objects']: mtype = obj.get('mime-type') if mtype is not None: obj['mime_type'] = mtype del obj['mime-type'] version = obj.get('ver') if version is not None: obj['version'] = version del obj['ver'] return resp_body @handle_object_not_found def object_locate(self, account, container, obj, version=None, **kwargs): """ Get a description of the object along with the list of its chunks. :param account: name of the account in which the object is stored :param container: name of the container in which the object is stored :param obj: name of the object to query :param version: version of the object to query :returns: a tuple with object metadata `dict` as first element and chunk `list` as second element """ obj_meta, chunks = self.container.content_locate( account, container, obj, version=version, **kwargs) return obj_meta, chunks def object_analyze(self, *args, **kwargs): """ :deprecated: use `object_locate` """ warnings.warn("You'd better use object_locate()", DeprecationWarning) return self.object_locate(*args, **kwargs) @ensure_headers @ensure_request_id def object_fetch(self, account, container, obj, version=None, ranges=None, key_file=None, **kwargs): meta, raw_chunks = self.object_locate( account, container, obj, version=version, **kwargs) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) chunks = _sort_chunks(raw_chunks, storage_method.ec) meta['container_id'] = cid_from_name(account, container).upper() meta['ns'] = self.namespace self._patch_timeouts(kwargs) if storage_method.ec: stream = fetch_stream_ec(chunks, ranges, storage_method, **kwargs) elif storage_method.backblaze: stream = self._fetch_stream_backblaze(meta, chunks, ranges, storage_method, key_file, **kwargs) else: stream = fetch_stream(chunks, ranges, storage_method, **kwargs) return meta, stream @handle_object_not_found def object_get_properties(self, account, container, obj, **kwargs): return self.container.content_get_properties(account, container, obj, **kwargs) @handle_object_not_found def object_show(self, account, container, obj, version=None, **kwargs): """ Get a description of the content along with its user properties. :param account: name of the account in which the object is stored :param container: name of the container in which the object is stored :param obj: name of the object to query :returns: a `dict` describing the object .. python:: {'hash': '6BF60C17CC15EEA108024903B481738F', 'ctime': '1481031763', 'deleted': 'False', 'properties': { u'projet': u'OpenIO-SDS'}, 'length': '43518', 'hash_method': 'md5', 'chunk_method': 'ec/algo=liberasurecode_rs_vand,k=6,m=3', 'version': '1481031762951972', 'policy': 'EC', 'id': '20BF2194FD420500CD4729AE0B5CBC07', 'mime_type': 'application/octet-stream', 'name': 'Makefile'} """ return self.container.content_show(account, container, obj, version=version, **kwargs) def object_update(self, account, container, obj, metadata, version=None, clear=False, **kwargs): warnings.warn("You'd better use object_set_properties()", DeprecationWarning, stacklevel=2) if clear: self.object_del_properties( account, container, obj, [], version=version, **kwargs) if metadata: self.object_set_properties( account, container, obj, metadata, version=version, **kwargs) @handle_object_not_found def object_set_properties(self, account, container, obj, properties, version=None, **kwargs): return self.container.content_set_properties( account, container, obj, properties={'properties': properties}, version=version, **kwargs) @handle_object_not_found def object_del_properties(self, account, container, obj, properties, version=None, **kwargs): return self.container.content_del_properties( account, container, obj, properties=properties, version=version, **kwargs) def _content_preparer(self, account, container, obj_name, policy=None, **kwargs): # TODO: optimize by asking more than one metachunk at a time obj_meta, first_body = self.container.content_prepare( account, container, obj_name, size=1, stgpol=policy, autocreate=True, **kwargs) storage_method = STORAGE_METHODS.load(obj_meta['chunk_method']) def _fix_mc_pos(chunks, mc_pos): for chunk in chunks: raw_pos = chunk["pos"].split(".") if storage_method.ec: chunk['num'] = int(raw_pos[1]) chunk["pos"] = "%d.%d" % (mc_pos, chunk['num']) else: chunk["pos"] = str(mc_pos) def _metachunk_preparer(): mc_pos = kwargs.get('meta_pos', 0) _fix_mc_pos(first_body, mc_pos) yield first_body while True: mc_pos += 1 _, next_body = self.container.content_prepare( account, container, obj_name, 1, stgpol=policy, autocreate=True, **kwargs) _fix_mc_pos(next_body, mc_pos) yield next_body return obj_meta, _metachunk_preparer def _generate_fullpath(self, account, container_name, path, version): return ['{0}/{1}/{2}/{3}'.format(quote_plus(account), quote_plus(container_name), quote_plus(path), version)] def _object_create(self, account, container, obj_name, source, sysmeta, properties=None, policy=None, key_file=None, **kwargs): self._patch_timeouts(kwargs) obj_meta, chunk_prep = self._content_preparer( account, container, obj_name, policy=policy, **kwargs) obj_meta.update(sysmeta) obj_meta['content_path'] = obj_name obj_meta['container_id'] = cid_from_name(account, container).upper() obj_meta['ns'] = self.namespace obj_meta['full_path'] = self._generate_fullpath(account, container, obj_name, obj_meta['version']) obj_meta['oio_version'] = (obj_meta.get('oio_version') or OIO_VERSION) # XXX content_id is necessary to update an existing object kwargs['content_id'] = kwargs.get('content_id', obj_meta['id']) storage_method = STORAGE_METHODS.load(obj_meta['chunk_method']) if storage_method.ec: handler = ECWriteHandler( source, obj_meta, chunk_prep, storage_method, **kwargs) elif storage_method.backblaze: backblaze_info = self._b2_credentials(storage_method, key_file) handler = BackblazeWriteHandler( source, obj_meta, chunk_prep, storage_method, backblaze_info, **kwargs) else: handler = ReplicatedWriteHandler( source, obj_meta, chunk_prep, storage_method, **kwargs) final_chunks, bytes_transferred, content_checksum = handler.stream() etag = obj_meta.get('etag') if etag and etag.lower() != content_checksum.lower(): raise exc.EtagMismatch( "given etag %s != computed %s" % (etag, content_checksum)) obj_meta['etag'] = content_checksum data = {'chunks': final_chunks, 'properties': properties or {}} # FIXME: we may just pass **obj_meta self.container.content_create( account, container, obj_name, size=bytes_transferred, checksum=content_checksum, data=data, stgpol=obj_meta['policy'], version=obj_meta['version'], mime_type=obj_meta['mime_type'], chunk_method=obj_meta['chunk_method'], **kwargs) return final_chunks, bytes_transferred, content_checksum def _b2_credentials(self, storage_method, key_file): key_file = key_file or '/etc/oio/sds/b2-appkey.conf' try: return BackblazeUtils.get_credentials(storage_method, key_file) except BackblazeUtilsException as err: raise exc.ConfigurationException(str(err)) def _fetch_stream_backblaze(self, meta, chunks, ranges, storage_method, key_file, **kwargs): backblaze_info = self._b2_credentials(storage_method, key_file) total_bytes = 0 current_offset = 0 size = None offset = 0 for pos in range(len(chunks)): if ranges: offset = ranges[pos][0] size = ranges[pos][1] if size is None: size = int(meta["length"]) chunk_size = int(chunks[pos][0]["size"]) if total_bytes >= size: break if current_offset + chunk_size > offset: if current_offset < offset: _offset = offset - current_offset else: _offset = 0 if chunk_size + total_bytes > size: _size = size - total_bytes else: _size = chunk_size handler = BackblazeChunkDownloadHandler( meta, chunks[pos], _offset, _size, backblaze_info=backblaze_info) stream = handler.get_stream() if not stream: raise exc.OioException("Error while downloading") total_bytes += len(stream) yield stream current_offset += chunk_size @handle_container_not_found def container_refresh(self, account, container, attempts=3, **kwargs): for i in range(attempts): try: self.account.container_reset(account, container, time.time()) except exc.Conflict: if i >= attempts - 1: raise try: self.container.container_touch(account, container) except exc.ClientException as e: if e.status != 406 and e.status != 431: raise # CODE_USER_NOTFOUND or CODE_CONTAINER_NOTFOUND metadata = dict() metadata["dtime"] = time.time() self.account.container_update(account, container, metadata) @handle_account_not_found def account_refresh(self, account, **kwargs): self.account.account_refresh(account) containers = self.container_list(account) for container in containers: try: self.container_refresh(account, container[0]) except exc.NoSuchContainer: # container remove in the meantime pass while containers: marker = containers[-1][0] containers = self.container_list(account, marker=marker) if containers: for container in containers: try: self.container_refresh(account, container[0]) except exc.NoSuchContainer: # container remove in the meantime pass def all_accounts_refresh(self, **kwargs): accounts = self.account_list() for account in accounts: try: self.account_refresh(account) except exc.NoSuchAccount: # account remove in the meantime pass @handle_account_not_found def account_flush(self, account): self.account.account_flush(account) def _random_buffer(self, dictionary, n): return ''.join(random.choice(dictionary) for _ in range(n)) def _generate_copy(self, chunks, random_hex=60): # random_hex is the number of hexadecimals characters to generate for # the copy path copies = [] for c in chunks: tmp = ''.join([c[:-random_hex], self._random_buffer('0123456789ABCDEF', random_hex)]) copies.append(tmp) return copies def _send_copy(self, targets, copies, fullpath): headers = {"x-oio-chunk-meta-full-path": fullpath} if not hasattr(self, "blob_client"): from oio.blob.client import BlobClient self.blob_client = BlobClient() for t, c in zip(targets, copies): self.blob_client.chunk_link(t, c, headers=headers).status def _prepare_update_meta2(self, targets, copies, account, container, content): targets_beans = [] copies_beans = [] for t, c in zip(targets, copies): targets_beans.append(self._meta2bean(t['url'], t, content)) copies_beans.append(self._meta2bean(c, t, content)) return targets_beans, copies_beans def _meta2bean(self, url, meta, content): return {"type": "chunk", "id": url, "hash": meta['hash'], "size": int(meta["size"]), "pos": meta["pos"], "content": content}
class StorageTiererWorker(object): def __init__(self, conf, logger): self.conf = conf self.logger = logger self.account = conf[CONF_ACCOUNT] self.container_client = ContainerClient(self.conf, logger=self.logger) self.account_client = AccountClient(self.conf, logger=self.logger) self.content_factory = ContentFactory(self.conf) self.passes = 0 self.errors = 0 self.last_reported = 0 self.contents_run_time = 0 self.total_contents_processed = 0 self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_contents_per_second = int_value( conf.get('contents_per_second'), 30) self.container_fetch_limit = int_value( conf.get('container_fetch_limit'), 100) self.content_fetch_limit = int_value( conf.get('content_fetch_limit'), 100) self.outdated_threshold = int_value( conf.get(CONF_OUTDATED_THRESHOLD), 9999999999) self.new_policy = conf.get(CONF_NEW_POLICY) def _list_containers(self): container = None while True: resp = self.account_client.container_list( self.account, marker=container, limit=self.container_fetch_limit) if len(resp["listing"]) == 0: break for res in resp["listing"]: container = res[0] yield container def _list_contents(self): for container in self._list_containers(): marker = None while True: try: _, listing = self.container_client.content_list( account=self.account, reference=container, limit=self.content_fetch_limit, marker=marker) except NotFound: self.logger.warn( "Container %s appears in account but doesn't exist", container) break if len(listing["objects"]) == 0: break for obj in listing["objects"]: marker = obj["name"] if obj["mtime"] > time.time() - self.outdated_threshold: continue if obj["policy"] == self.new_policy: continue container_id = cid_from_name(self.account, container) yield (container_id, obj["content"]) def run(self): start_time = report_time = time.time() total_errors = 0 for (container_id, content_id) in self._list_contents(): self.safe_change_policy(container_id, content_id) self.contents_run_time = ratelimit( self.contents_run_time, self.max_contents_per_second ) self.total_contents_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(total).2f ' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'total': (now - start_time) } ) report_time = now total_errors += self.errors self.passes = 0 self.errors = 0 self.last_reported = now elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(content_rate).2f ' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'content_rate': self.total_contents_processed / elapsed } ) def safe_change_policy(self, container_id, content_id): try: self.change_policy(container_id, content_id) except Exception: self.errors += 1 self.logger.exception("ERROR while changing policy for content " "%s/%s", container_id, content_id) self.passes += 1 def change_policy(self, container_id, content_id): self.logger.info("Changing policy for content %s/%s", container_id, content_id) self.content_factory.change_policy( container_id, content_id, self.new_policy)
def setUpClass(cls): super(TestReplicateFilter, cls).setUpClass() cls.account_client = AccountClient({'namespace': cls._cls_ns}) _App.app_env['account_client'] = cls.account_client
class CheckMeta2(CheckService): account_name = "_meta2_probe" def __init__(self, namespace, **kwargs): ep_parts = ["http:/", load_namespace_conf(namespace).get('proxy'), "v3.0", namespace, "content"] super(CheckMeta2, self).__init__(namespace, "meta2", endpoint="/".join(ep_parts), **kwargs) self.account = AccountClient({"namespace": self.ns}) self.container = ContainerClient({"namespace": self.ns}) self.directory = DirectoryClient({"namespace": self.ns}) self.reference = random_buffer('0123456789ABCDEF', 64) def _get_params(self): path = random_buffer('0123456789ABCDEF', 64) return {'acct': self.account_name, 'ref': self.reference, 'path': path} def _compare_chunks(self, chunks1, chunks2): def light_chunks(chunks): new_chunks = [] for chunk in chunks: new_chunk = dict() new_chunk["url"] = chunk["url"] new_chunk["hash"] = chunk["hash"] new_chunks.append(new_chunk) return new_chunks try: chunks1 = light_chunks(chunks1) chunks1.sort() chunks2 = light_chunks(chunks2) chunks2.sort() return cmp(chunks1, chunks2) == 0 except TypeError: return False def _cycle(self, meta2_host): self.directory.unlink( account=self.account_name, reference=self.reference, service_type=self.service_type) service = {"host": meta2_host, "type": self.service_type, "args": "", "seq": 1} self.directory.force( account=self.account_name, reference=self.reference, service_type=self.service_type, services=service) params = self._get_params() global_success = True _, body, success = self._request( "GET", "/locate", params=params, expected_status=404) global_success &= success headers = {'X-oio-action-mode': 'autocreate'} _, body, success = self._request( "POST", "/prepare", params=params, headers=headers, json={"size": "1024"}, expected_status=200) global_success &= success chunks = body _, body, success = self._request( "GET", "/locate", params=params, expected_status=404) global_success &= success headers = {"x-oio-content-meta-length": "1024"} _, _, success = self._request( "POST", "/create", params=params, headers=headers, json=chunks, expected_status=204) global_success &= success _, body, success = self._request( "GET", "/locate", params=params, expected_status=200) global_success &= success success = self._compare_chunks(chunks, body) global_success &= success _, _, success = self._request( "POST", "/delete", params=params, expected_status=204) global_success &= success _, body, success = self._request( "GET", "/locate", params=params, expected_status=404) global_success &= success return global_success def run(self): try: self.container.container_create(account=self.account_name, reference=self.reference) super(CheckMeta2, self).run() self.container.container_delete(account=self.account_name, reference=self.reference) sleep(1) self.account.account_delete(self.account_name) except Exception as exc: print("Exception - " + str(exc))
class AccountRebuilder(Tool): """ Rebuild the account services. """ def __init__(self, conf, accounts=None, **kwargs): super(AccountRebuilder, self).__init__(conf, **kwargs) self._accounts_to_refresh = set() self._accounts_refreshed = eventlet.Queue() # input self.accounts = accounts self.account_client = AccountClient(self.conf, logger=self.logger) @staticmethod def string_from_item(item): namespace, account, container = item if container is None: return '%s|%s' % (namespace, account) return '%s|%s|%s' % (namespace, account, container) def _fetch_items_from_accounts(self): for obj in self.accounts: namespace = obj['namespace'] account = obj['account'] item = namespace, account, None yield item def _fetch_items_from_all_accounts(self): try: accounts = self.account_client.account_list() for account in accounts: item = self.namespace, account, None yield item except Exception as exc: self.success = False self.logger.error('Failed to list accounts: %s', exc) def _fetch_items(self): if self.accounts: items = self._fetch_items_from_accounts() else: items = self._fetch_items_from_all_accounts() for item in items: self._accounts_to_refresh.add(item) yield item while True: if not self._accounts_to_refresh: break item = self._accounts_refreshed.get() namespace, account, error = item self._accounts_to_refresh.remove((namespace, account, None)) if error: continue try: containers = self.account_client.container_list(account) for container in containers["listing"]: yield namespace, account, container[0] except Exception as exc: self.success = False self.logger.error('Failed to list containers (account=%s): %s', account, exc) def _get_report(self, status, end_time, counters): entries_processed, total_entries_processed, \ errors, total_errors = counters time_since_last_report = (end_time - self.last_report) or 0.00001 total_time = (end_time - self.start_time) or 0.00001 report = ( '%(status)s ' 'last_report=%(last_report)s %(time_since_last_report).2fs ' 'entries=%(entries)d %(entries_rate).2f/s ' 'errors=%(errors)d %(errors_rate).2f%% ' 'start_time=%(start_time)s %(total_time).2fs ' 'total_entries=%(total_entries)d %(total_entries_rate).2f/s ' 'total_errors=%(total_errors)d %(total_errors_rate).2f%%' % { 'status': status, 'last_report': datetime.fromtimestamp(int(self.last_report)).isoformat(), 'time_since_last_report': time_since_last_report, 'entries': entries_processed, 'entries_rate': entries_processed / time_since_last_report, 'errors': errors, 'errors_rate': 100 * errors / float(entries_processed or 1), 'start_time': datetime.fromtimestamp(int(self.start_time)).isoformat(), 'total_time': total_time, 'total_entries': total_entries_processed, 'total_entries_rate': total_entries_processed / total_time, 'total_errors': total_errors, 'total_errors_rate': 100 * total_errors / float(total_entries_processed or 1) }) if self.total_expected_items is not None: progress = 100 * total_entries_processed / \ float(self.total_expected_items or 1) report += ' progress=%d/%d %.2f%%' % \ (total_entries_processed, self.total_expected_items, progress) return report def create_worker(self, queue_workers, queue_reply): return AccountRebuilderWorker(self, queue_workers, queue_reply) def _load_total_expected_items(self): if self.accounts and isinstance(self.accounts, list): self.total_expected_items = len(self.accounts) def run(self): tasks_res = super(AccountRebuilder, self).run() for task_res in tasks_res: item, _, error = task_res namespace, account, container = item if container is None: self._accounts_refreshed.put((namespace, account, error)) yield task_res
def init(self): self.account = AccountClient(self.conf, logger=self.logger)
class Checker(object): def __init__(self, namespace, concurrency=50, error_file=None, rebuild_file=None, full=True): self.pool = GreenPool(concurrency) self.error_file = error_file self.full = bool(full) if self.error_file: f = open(self.error_file, 'a') self.error_writer = csv.writer(f, delimiter=' ') self.rebuild_file = rebuild_file if self.rebuild_file: fd = open(self.rebuild_file, 'a') self.rebuild_writer = csv.writer(fd, delimiter='|') conf = {'namespace': namespace} self.account_client = AccountClient(conf) self.container_client = ContainerClient(conf) self.blob_client = BlobClient() self.accounts_checked = 0 self.containers_checked = 0 self.objects_checked = 0 self.chunks_checked = 0 self.account_not_found = 0 self.container_not_found = 0 self.object_not_found = 0 self.chunk_not_found = 0 self.account_exceptions = 0 self.container_exceptions = 0 self.object_exceptions = 0 self.chunk_exceptions = 0 self.list_cache = {} self.running = {} def write_error(self, target): error = [target.account] if target.container: error.append(target.container) if target.obj: error.append(target.obj) if target.chunk: error.append(target.chunk) self.error_writer.writerow(error) def write_rebuilder_input(self, target, obj_meta, ct_meta): try: cid = ct_meta['system']['sys.name'].split('.', 1)[0] except KeyError: cid = ct_meta['properties']['sys.name'].split('.', 1)[0] self.rebuild_writer.writerow((cid, obj_meta['id'], target.chunk)) def write_chunk_error(self, target, obj_meta, chunk=None): if chunk is not None: target = target.copy() target.chunk = chunk if self.error_file: self.write_error(target) if self.rebuild_file: self.write_rebuilder_input( target, obj_meta, self.list_cache[(target.account, target.container)][1]) def _check_chunk_xattr(self, target, obj_meta, xattr_meta): error = False # Composed position -> erasure coding attr_prefix = 'meta' if '.' in obj_meta['pos'] else '' attr_key = attr_prefix + 'chunk_size' if str(obj_meta['size']) != xattr_meta.get(attr_key): print( " Chunk %s '%s' xattr (%s) " "differs from size in meta2 (%s)" % (target, attr_key, xattr_meta.get(attr_key), obj_meta['size'])) error = True attr_key = attr_prefix + 'chunk_hash' if obj_meta['hash'] != xattr_meta.get(attr_key): print( " Chunk %s '%s' xattr (%s) " "differs from hash in meta2 (%s)" % (target, attr_key, xattr_meta.get(attr_key), obj_meta['hash'])) error = True return error def check_chunk(self, target): chunk = target.chunk obj_listing, obj_meta = self.check_obj(target) error = False if chunk not in obj_listing: print(' Chunk %s missing from object listing' % target) error = True db_meta = dict() else: db_meta = obj_listing[chunk] try: xattr_meta = self.blob_client.chunk_head(chunk, xattr=self.full) except exc.NotFound as e: self.chunk_not_found += 1 error = True print(' Not found chunk "%s": %s' % (target, str(e))) except Exception as e: self.chunk_exceptions += 1 error = True print(' Exception chunk "%s": %s' % (target, str(e))) else: if db_meta and self.full: error = self._check_chunk_xattr(target, db_meta, xattr_meta) if error: self.write_chunk_error(target, obj_meta) self.chunks_checked += 1 def check_obj_policy(self, target, obj_meta, chunks): """ Check that the list of chunks of an object matches the object's storage policy. """ stg_met = STORAGE_METHODS.load(obj_meta['chunk_method']) chunks_by_pos = _sort_chunks(chunks, stg_met.ec) if stg_met.ec: required = stg_met.ec_nb_data + stg_met.ec_nb_parity else: required = stg_met.nb_copy for pos, clist in chunks_by_pos.iteritems(): if len(clist) < required: print(' Missing %d chunks at position %s of %s' % (required - len(clist), pos, target)) if stg_met.ec: subs = {x['num'] for x in clist} for sub in range(required): if sub not in subs: self.write_chunk_error(target, obj_meta, '%d.%d' % (pos, sub)) else: self.write_chunk_error(target, obj_meta, str(pos)) def check_obj(self, target, recurse=False): account = target.account container = target.container obj = target.obj if (account, container, obj) in self.running: self.running[(account, container, obj)].wait() if (account, container, obj) in self.list_cache: return self.list_cache[(account, container, obj)] self.running[(account, container, obj)] = Event() print('Checking object "%s"' % target) container_listing, ct_meta = self.check_container(target) error = False if obj not in container_listing: print(' Object %s missing from container listing' % target) error = True # checksum = None else: # TODO check checksum match # checksum = container_listing[obj]['hash'] pass results = [] meta = dict() try: meta, results = self.container_client.content_locate( account=account, reference=container, path=obj) except exc.NotFound as e: self.object_not_found += 1 error = True print(' Not found object "%s": %s' % (target, str(e))) except Exception as e: self.object_exceptions += 1 error = True print(' Exception object "%s": %s' % (target, str(e))) chunk_listing = dict() for chunk in results: chunk_listing[chunk['url']] = chunk self.check_obj_policy(target.copy(), meta, results) self.objects_checked += 1 self.list_cache[(account, container, obj)] = (chunk_listing, meta) self.running[(account, container, obj)].send(True) del self.running[(account, container, obj)] if recurse: for chunk in chunk_listing: t = target.copy() t.chunk = chunk self.pool.spawn_n(self.check_chunk, t) if error and self.error_file: self.write_error(target) return chunk_listing, meta def check_container(self, target, recurse=False): account = target.account container = target.container if (account, container) in self.running: self.running[(account, container)].wait() if (account, container) in self.list_cache: return self.list_cache[(account, container)] self.running[(account, container)] = Event() print('Checking container "%s"' % target) account_listing = self.check_account(target) error = False if container not in account_listing: error = True print(' Container %s missing from account listing' % target) marker = None results = [] ct_meta = dict() while True: try: _, resp = self.container_client.content_list( account=account, reference=container, marker=marker) except exc.NotFound as e: self.container_not_found += 1 error = True print(' Not found container "%s": %s' % (target, str(e))) break except Exception as e: self.container_exceptions += 1 error = True print(' Exception container "%s": %s' % (target, str(e))) break if resp['objects']: marker = resp['objects'][-1]['name'] results.extend(resp['objects']) else: ct_meta = resp ct_meta.pop('objects') break container_listing = dict() for obj in results: container_listing[obj['name']] = obj self.containers_checked += 1 self.list_cache[(account, container)] = container_listing, ct_meta self.running[(account, container)].send(True) del self.running[(account, container)] if recurse: for obj in container_listing: t = target.copy() t.obj = obj self.pool.spawn_n(self.check_obj, t, True) if error and self.error_file: self.write_error(target) return container_listing, ct_meta def check_account(self, target, recurse=False): account = target.account if account in self.running: self.running[account].wait() if account in self.list_cache: return self.list_cache[account] self.running[account] = Event() print('Checking account "%s"' % target) error = False marker = None results = [] while True: try: resp = self.account_client.container_list(account, marker=marker) except Exception as e: self.account_exceptions += 1 error = True print(' Exception account "%s": %s' % (target, str(e))) break if resp['listing']: marker = resp['listing'][-1][0] else: break results.extend(resp['listing']) containers = dict() for e in results: containers[e[0]] = (e[1], e[2]) self.list_cache[account] = containers self.running[account].send(True) del self.running[account] self.accounts_checked += 1 if recurse: for container in containers: t = target.copy() t.container = container self.pool.spawn_n(self.check_container, t, True) if error and self.error_file: self.write_error(target) return containers def check(self, target): if target.chunk and target.obj and target.container: self.pool.spawn_n(self.check_chunk, target) elif target.obj and target.container: self.pool.spawn_n(self.check_obj, target, True) elif target.container: self.pool.spawn_n(self.check_container, target, True) else: self.pool.spawn_n(self.check_account, target, True) def wait(self): self.pool.waitall() def report(self): def _report_stat(name, stat): print("{0:18}: {1}".format(name, stat)) print() print('Report') _report_stat("Accounts checked", self.accounts_checked) if self.account_not_found: _report_stat("Missing accounts", self.account_not_found) if self.account_exceptions: _report_stat("Exceptions", self.account_not_found) print() _report_stat("Containers checked", self.containers_checked) if self.container_not_found: _report_stat("Missing containers", self.container_not_found) if self.container_exceptions: _report_stat("Exceptions", self.container_exceptions) print() _report_stat("Objects checked", self.objects_checked) if self.object_not_found: _report_stat("Missing objects", self.object_not_found) if self.object_exceptions: _report_stat("Exceptions", self.object_exceptions) print() _report_stat("Chunks checked", self.chunks_checked) if self.chunk_not_found: _report_stat("Missing chunks", self.chunk_not_found) if self.chunk_exceptions: _report_stat("Exceptions", self.chunk_exceptions)
action='store_true', default=False, help='Start a controller bond to the given addresses') parser.add_argument("endpoints", metavar='ENDPOINT', type=str, nargs='+', help='Endpoints to connect/bind to') args = parser.parse_args() zctx = zmq.Context() if args.controller: s = ObjectStorageApi("benchmark") #Creating account ac = AccountClient({"namespace": "benchmark"}) retry = 3 for i in range(retry + 1): try: ac.account_create("benchmark_account") break except ClientException: if i < retry: time.sleep(2) else: raise #Creating Container s.container_create(account="benchmark_account", reference="container1") controller(zctx, args.endpoints, ac, s)
class Checker(object): def __init__(self, namespace, concurrency=50, error_file=None): self.pool = GreenPool(concurrency) self.error_file = error_file if self.error_file: f = open(self.error_file, 'a') self.error_writer = csv.writer(f, delimiter=' ') conf = {'namespace': namespace} self.account_client = AccountClient(conf) self.container_client = ContainerClient(conf) self.blob_client = BlobClient() self.accounts_checked = 0 self.containers_checked = 0 self.objects_checked = 0 self.chunks_checked = 0 self.account_not_found = 0 self.container_not_found = 0 self.object_not_found = 0 self.chunk_not_found = 0 self.account_exceptions = 0 self.container_exceptions = 0 self.object_exceptions = 0 self.chunk_exceptions = 0 self.list_cache = {} self.running = {} def write_error(self, target): error = [target.account] if target.container: error.append(target.container) if target.obj: error.append(target.obj) if target.chunk: error.append(target.chunk) self.error_writer.writerow(error) def check_chunk(self, target): chunk = target.chunk obj_listing = self.check_obj(target) error = False if chunk not in obj_listing: print(' Chunk %s missing in object listing' % target) error = True # checksum = None else: # TODO check checksum match # checksum = obj_listing[chunk]['hash'] pass try: self.blob_client.chunk_head(chunk) except exc.NotFound as e: self.chunk_not_found += 1 error = True print(' Not found chunk "%s": %s' % (target, str(e))) except Exception as e: self.chunk_exceptions += 1 error = True print(' Exception chunk "%s": %s' % (target, str(e))) if error and self.error_file: self.write_error(target) self.chunks_checked += 1 def check_obj(self, target, recurse=False): account = target.account container = target.container obj = target.obj if (account, container, obj) in self.running: self.running[(account, container, obj)].wait() if (account, container, obj) in self.list_cache: return self.list_cache[(account, container, obj)] self.running[(account, container, obj)] = Event() print('Checking object "%s"' % target) container_listing = self.check_container(target) error = False if obj not in container_listing: print(' Object %s missing in container listing' % target) error = True # checksum = None else: # TODO check checksum match # checksum = container_listing[obj]['hash'] pass results = [] try: _, resp = self.container_client.content_show(acct=account, ref=container, path=obj) except exc.NotFound as e: self.object_not_found += 1 error = True print(' Not found object "%s": %s' % (target, str(e))) except Exception as e: self.object_exceptions += 1 error = True print(' Exception object "%s": %s' % (target, str(e))) else: results = resp chunk_listing = dict() for chunk in results: chunk_listing[chunk['url']] = chunk self.objects_checked += 1 self.list_cache[(account, container, obj)] = chunk_listing self.running[(account, container, obj)].send(True) del self.running[(account, container, obj)] if recurse: for chunk in chunk_listing: t = target.copy() t.chunk = chunk self.pool.spawn_n(self.check_chunk, t) if error and self.error_file: self.write_error(target) return chunk_listing def check_container(self, target, recurse=False): account = target.account container = target.container if (account, container) in self.running: self.running[(account, container)].wait() if (account, container) in self.list_cache: return self.list_cache[(account, container)] self.running[(account, container)] = Event() print('Checking container "%s"' % target) account_listing = self.check_account(target) error = False if container not in account_listing: error = True print(' Container %s missing in account listing' % target) marker = None results = [] while True: try: resp = self.container_client.container_list(acct=account, ref=container, marker=marker) except exc.NotFound as e: self.container_not_found += 1 error = True print(' Not found container "%s": %s' % (target, str(e))) break except Exception as e: self.container_exceptions += 1 error = True print(' Exception container "%s": %s' % (target, str(e))) break if resp['objects']: marker = resp['objects'][-1]['name'] else: break results.extend(resp['objects']) container_listing = dict() for obj in results: container_listing[obj['name']] = obj self.containers_checked += 1 self.list_cache[(account, container)] = container_listing self.running[(account, container)].send(True) del self.running[(account, container)] if recurse: for obj in container_listing: t = target.copy() t.obj = obj self.pool.spawn_n(self.check_obj, t, True) if error and self.error_file: self.write_error(target) return container_listing def check_account(self, target, recurse=False): account = target.account if account in self.running: self.running[account].wait() if account in self.list_cache: return self.list_cache[account] self.running[account] = Event() print('Checking account "%s"' % target) error = False marker = None results = [] while True: try: resp = self.account_client.containers_list(account, marker=marker) except Exception as e: self.account_exceptions += 1 error = True print(' Exception account "%s": %s' % (target, str(e))) break if resp['listing']: marker = resp['listing'][-1][0] else: break results.extend(resp['listing']) containers = dict() for e in results: containers[e[0]] = (e[1], e[2]) self.list_cache[account] = containers self.running[account].send(True) del self.running[account] self.accounts_checked += 1 if recurse: for container in containers: t = target.copy() t.container = container self.pool.spawn_n(self.check_container, t, True) if error and self.error_file: self.write_error(target) return containers def check(self, target): if target.chunk and target.obj and target.container: self.pool.spawn_n(self.check_chunk, target) elif target.obj and target.container: self.pool.spawn_n(self.check_obj, target, True) elif target.container: self.pool.spawn_n(self.check_container, target, True) else: self.pool.spawn_n(self.check_account, target, True) def wait(self): self.pool.waitall() def report(self): def _report_stat(name, stat): print("{0:18}: {1}".format(name, stat)) print() print('Report') _report_stat("Accounts checked", self.accounts_checked) if self.account_not_found: _report_stat("Missing accounts", self.account_not_found) if self.account_exceptions: _report_stat("Exceptions", self.account_not_found) print() _report_stat("Containers checked", self.containers_checked) if self.container_not_found: _report_stat("Missing containers", self.container_not_found) if self.container_exceptions: _report_stat("Exceptions", self.container_exceptions) print() _report_stat("Objects checked", self.objects_checked) if self.object_not_found: _report_stat("Missing objects", self.object_not_found) if self.object_exceptions: _report_stat("Exceptions", self.object_exceptions) print() _report_stat("Chunks checked", self.chunks_checked) if self.chunk_not_found: _report_stat("Missing chunks", self.chunk_not_found) if self.chunk_exceptions: _report_stat("Exceptions", self.chunk_exceptions)
class TestAccountClient(BaseTestCase): def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) retry = 3 for i in range(retry + 1): try: self.account_client.account_create(self.account_id) break except ClientException: if i < retry: time.sleep(2) else: raise self.container_client.container_create(account=self.account_id, reference="container1") self.container_client.container_create(account=self.account_id, reference="container2") time.sleep(.5) # ensure container event have been processed def test_container_list(self): resp = self.account_client.container_list(self.account_id) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0], ["container2", 0, 0, 0]]) resp = self.account_client.container_list(self.account_id, limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0]]) resp = self.account_client.container_list(self.account_id, marker="container1", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container2", 0, 0, 0]]) resp = self.account_client.container_list(self.account_id, marker="container2", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], []) # TODO: move this test somewhere under tests/unit/ def test_account_service_refresh(self): self.account_client.endpoint = "126.0.0.1:6666" self.account_client._last_refresh = time.time() self.account_client._get_account_addr = Mock( return_value="126.0.0.1:6667") self.assertRaises(OioNetworkException, self.account_client.account_list) self.account_client._get_account_addr.assert_called_once() self.assertIn("126.0.0.1:6667", self.account_client.endpoint) def test_container_reset(self): metadata = dict() metadata["mtime"] = time.time() metadata["bytes"] = 42 metadata["objects"] = 12 self.account_client.container_update(self.account_id, "container1", metadata=metadata) self.account_client.container_reset(self.account_id, "container1", time.time()) resp = self.account_client.container_list(self.account_id, prefix="container1") for container in resp["listing"]: name, nb_objects, nb_bytes, _ = container if name == 'container1': self.assertEqual(nb_objects, 0) self.assertEqual(nb_bytes, 0) return self.fail("No container container1") def test_account_refresh(self): metadata = dict() metadata["mtime"] = time.time() metadata["bytes"] = 42 metadata["objects"] = 12 self.account_client.container_update(self.account_id, "container1", metadata=metadata) self.account_client.account_refresh(self.account_id) resp = self.account_client.account_show(self.account_id) self.assertEqual(resp["bytes"], 42) self.assertEqual(resp["objects"], 12) def test_account_flush(self): metadata = dict() metadata["mtime"] = time.time() metadata["bytes"] = 42 metadata["objects"] = 12 self.account_client.container_update(self.account_id, "container1", metadata=metadata) self.account_client.account_flush(self.account_id) resp = self.account_client.account_show(self.account_id) self.assertEqual(resp["bytes"], 0) self.assertEqual(resp["objects"], 0) resp = self.account_client.container_list(self.account_id) self.assertEqual(len(resp["listing"]), 0)
class ObjectStorageApi(object): """ The Object Storage API. High level API that wraps `AccountClient`, `ContainerClient` and `DirectoryClient` classes. """ def __init__(self, namespace, **kwargs): """ Initialize the object storage API. :param namespace: name of the namespace to interract with :type namespace: `str` :keyword connection_timeout: connection timeout towards rawx services :type connection_timeout: `float` seconds :keyword read_timeout: timeout for rawx responses and data reads from the caller (when uploading) :type read_timeout: `float` seconds :keyword write_timeout: timeout for rawx write requests :type write_timeout: `float` seconds """ self.namespace = namespace self.connection_timeout = utils.float_value( kwargs.get("connection_timeout"), None) self.read_timeout = utils.float_value(kwargs.get("read_timeout"), None) self.write_timeout = utils.float_value(kwargs.get("write_timeout"), None) # FIXME: share session between all the clients self.directory = DirectoryClient({"namespace": self.namespace}, **kwargs) self.account = AccountClient({"namespace": self.namespace}, **kwargs) self.container = ContainerClient({"namespace": self.namespace}, **kwargs) def account_create(self, account, headers=None): """ Create an account. :param account: name of the account to create :type account: `str` :returns: `True` if the account has been created """ return self.account.account_create(account, headers=headers) @handle_account_not_found def account_delete(self, account, headers=None): """ Delete an account. :param account: name of the account to delete :type account: `str` """ self.account.account_delete(account, headers=headers) @handle_account_not_found def account_show(self, account, headers=None): """ Get information about an account. """ return self.account.account_show(account, headers=headers) def account_list(self, headers=None): """ List accounts """ return self.account.account_list(headers=headers) # FIXME: @handle_account_not_found def account_update(self, account, metadata, to_delete=None, headers=None): self.account.account_update(account, metadata, to_delete, headers=headers) @handle_account_not_found def account_set_properties(self, account, properties, headers=None): self.account_update(account, properties, headers=headers) @handle_account_not_found def account_del_properties(self, account, properties, headers=None): self.account_update(account, None, properties, headers=headers) def container_create(self, account, container, properties=None, headers=None, **kwargs): """ Create a container. :param account: account in which to create the container :type account: `str` :param container: name of the container :type container: `str` :param properties: properties to set on the container :type properties: `dict` :keyword headers: extra headers to send to the proxy :type headers: `dict` :returns: True if the container has been created, False if it already exists """ return self.container.container_create(account, container, properties=properties, headers=headers, autocreate=True, **kwargs) @handle_container_not_found def container_touch(self, account, container, headers=None, **kwargs): """ Trigger a notification about the container state. :param account: account from which to delete the container :type account: `str` :param container: name of the container :type container: `str` :keyword headers: extra headers to send to the proxy :type headers: `dict` """ if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() self.container.container_touch(account, container, headers=headers, **kwargs) def container_create_many(self, account, containers, properties=None, headers=None, **kwargs): """ Create Many containers :param account: account in which to create the containers :type account: `str` :param containers: names of the containers :type containers: `list` :param properties: properties to set on the containers :type properties: `dict` :keyword headers: extra headers to send to the proxy :type headers: `dict` """ return self.container.container_create_many(account, containers, properties=properties, headers=headers, autocreate=True, **kwargs) @handle_container_not_found def container_delete(self, account, container, headers=None, **kwargs): """ Delete a container. :param account: account from which to delete the container :type account: `str` :param container: name of the container :type container: `str` :keyword headers: extra headers to send to the proxy :type headers: `dict` """ self.container.container_delete(account, container, headers=headers, **kwargs) @handle_account_not_found def container_list(self, account, limit=None, marker=None, end_marker=None, prefix=None, delimiter=None, headers=None): """ Get the list of containers of an account. :param account: account from which to get the container list :type account: `str` :keyword limit: maximum number of results to return :type limit: `int` :keyword marker: name of the container from where to start the listing :type marker: `str` :keyword end_marker: :keyword prefix: :keyword delimiter: :keyword headers: extra headers to send to the proxy :type headers: `dict` """ resp = self.account.container_list(account, limit=limit, marker=marker, end_marker=end_marker, prefix=prefix, delimiter=delimiter, headers=headers) return resp["listing"] @handle_container_not_found def container_show(self, account, container, headers=None): """ Get information about a container (user properties). :param account: account in which the container is :type account: `str` :param container: name of the container :type container: `str` :keyword headers: extra headers to send to the proxy :type headers: `dict` :returns: a `dict` with "properties" containing a `dict` of user properties. """ return self.container.container_show(account, container, headers=headers) @handle_container_not_found def container_get_properties(self, account, container, properties=None, headers=None): """ Get information about a container (user and system properties). :param account: account in which the container is :type account: `str` :param container: name of the container :type container: `str` :param properties: *ignored* :keyword headers: extra headers to send to the proxy :type headers: `dict` :returns: a `dict` with "properties" and "system" entries, containing respectively a `dict` of user properties and a `dict` of system properties. """ return self.container.container_get_properties(account, container, properties=properties, headers=headers) @handle_container_not_found def container_set_properties(self, account, container, properties=None, clear=False, headers=None, **kwargs): """ Set properties on a container. :param account: name of the account :type account: `str` :param container: name of the container where to set properties :type container: `str` :param properties: a dictionary of properties :type properties: `dict` :param clear: :type clear: `bool` :param headers: extra headers to pass to the proxy :type headers: `dict` :keyword system: dictionary of system properties to set """ return self.container.container_set_properties(account, container, properties, clear=clear, headers=headers, **kwargs) @handle_container_not_found def container_del_properties(self, account, container, properties, headers=None, **kwargs): return self.container.container_del_properties(account, container, properties, headers=headers, **kwargs) def container_update(self, account, container, metadata, clear=False, headers=None): if not metadata: self.container_del_properties(account, container, [], headers=headers) else: self.container_set_properties(account, container, metadata, clear, headers=headers) @handle_container_not_found def object_create(self, account, container, file_or_path=None, data=None, etag=None, obj_name=None, mime_type=None, metadata=None, policy=None, headers=None, key_file=None, **_kwargs): """ Create an object in *container* of *account* with data taken from either *data* (`str` or `generator`) or *file_or_path* (path to a file or file-like object). The object will be named after *obj_name* if specified, or after the base name of *file_or_path*. :param account: name of the account where to create the object :type account: `str` :param container: name of the container where to create the object :type container: `str` :param file_or_path: file-like object or path to a file from which to read object data :type file_or_path: `str` or file-like object :param data: object data (if `file_or_path` is not set) :type data: `str` or `generator` :keyword etag: entity tag of the object :type etag: `str` :keyword obj_name: name of the object to create. If not set, will use the base name of `file_or_path`. :keyword mime_type: MIME type of the object :type mime_type: `str` :keyword properties: a dictionary of properties :type properties: `dict` :keyword policy: name of the storage policy :type policy: `str` :param headers: extra headers to pass to the proxy :type headers: `dict` :keyword key_file: """ if (data, file_or_path) == (None, None): raise exc.MissingData() src = data if data is not None else file_or_path if src is file_or_path: if isinstance(file_or_path, basestring): if not os.path.exists(file_or_path): raise exc.FileNotFound("File '%s' not found." % file_or_path) file_name = os.path.basename(file_or_path) else: try: file_name = os.path.basename(file_or_path.name) except AttributeError: file_name = None obj_name = obj_name or file_name elif isgenerator(src): file_or_path = utils.GeneratorIO(src) src = file_or_path if not obj_name: raise exc.MissingName("No name for the object has been specified") sysmeta = {'mime_type': mime_type, 'etag': etag} if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() if src is data: return self._object_create(account, container, obj_name, BytesIO(data), sysmeta, properties=metadata, policy=policy, headers=headers, key_file=key_file) elif hasattr(file_or_path, "read"): return self._object_create(account, container, obj_name, src, sysmeta, properties=metadata, policy=policy, headers=headers, key_file=key_file) else: with open(file_or_path, "rb") as f: return self._object_create(account, container, obj_name, f, sysmeta, properties=metadata, policy=policy, headers=headers, key_file=key_file) def object_touch(self, account, container, obj, headers=None, **kwargs): """ Trigger a notification about an object (as if it just had been created). :param account: name of the account where to create the object :type account: `str` :param container: name of the container where to create the object :type container: `str` :param obj: name of the object to touch :param headers: extra headers to pass to the proxy """ if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() self.container.content_touch(account, container, obj, headers=headers, **kwargs) @handle_object_not_found def object_delete(self, account, container, obj, headers=None, **kwargs): if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() return self.container.content_delete(account, container, obj, headers=headers, **kwargs) def object_delete_many(self, account, container, objs, headers=None, **kwargs): if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() return self.container.content_delete_many(account, container, objs, headers=headers, **kwargs) @handle_container_not_found def object_list(self, account, container, limit=None, marker=None, delimiter=None, prefix=None, end_marker=None, headers=None, properties=False, **kwargs): """ Lists objects inside a container. :returns: a dict which contains * 'objects': the list of objects * 'prefixes': common prefixes (only if delimiter and prefix are set) * 'properties': a dict of container properties * 'system': system metadata """ _, resp_body = self.container.content_list(account, container, limit=limit, marker=marker, end_marker=end_marker, prefix=prefix, delimiter=delimiter, properties=properties, headers=headers, **kwargs) for obj in resp_body['objects']: mtype = obj.get('mime-type') if mtype: obj['mime_type'] = mtype del obj['mime-type'] return resp_body # FIXME: @handle_object_not_found def object_locate(self, account, container, obj, headers=None): obj_meta, body = self.container.content_locate(account, container, obj) return obj_meta, body def object_analyze(self, *args, **kwargs): """ :deprecated: use `object_locate` """ return self.object_locate(*args, **kwargs) def object_fetch(self, account, container, obj, ranges=None, headers=None, key_file=None): if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() meta, raw_chunks = self.object_locate(account, container, obj, headers=headers) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) chunks = _sort_chunks(raw_chunks, storage_method.ec) meta['container_id'] = utils.name2cid(account, container).upper() meta['ns'] = self.namespace if storage_method.ec: stream = self._fetch_stream_ec(meta, chunks, ranges, storage_method, headers) elif storage_method.backblaze: stream = self._fetch_stream_backblaze(meta, chunks, ranges, storage_method, key_file) else: stream = self._fetch_stream(meta, chunks, ranges, storage_method, headers) return meta, stream @handle_object_not_found def object_get_properties(self, account, container, obj, headers=None): return self.container.content_get_properties(account, container, obj) def object_show(self, account, container, obj, headers=None): """ Get a description of the content along with its user properties. :param account: name of the account in which the object is stored :param container: name of the container in which the object is stored :param obj: name of the object to query :returns: a `dict` describing the object .. python:: {'hash': '6BF60C17CC15EEA108024903B481738F', 'ctime': '1481031763', 'deleted': 'False', 'properties': { u'projet': u'OpenIO-SDS'}, 'length': '43518', 'hash_method': 'md5', 'chunk_method': 'ec/algo=liberasurecode_rs_vand,k=6,m=3', 'version': '1481031762951972', 'policy': 'EC', 'id': '20BF2194FD420500CD4729AE0B5CBC07', 'mime_type': 'application/octet-stream', 'name': 'Makefile'} """ return self.container.content_show(account, container, obj, headers=headers) def object_update(self, account, container, obj, metadata, clear=False, headers=None): if clear: self.object_del_properties(account, container, obj, [], headers=headers) if metadata: self.object_set_properties(account, container, obj, metadata, headers=headers) @handle_object_not_found def object_set_properties(self, account, container, obj, properties, clear=False, headers=None, **kwargs): return self.container.content_set_properties( account, container, obj, properties={'properties': properties}, headers=headers, **kwargs) @handle_object_not_found def object_del_properties(self, account, container, obj, properties, headers=None, **kwargs): return self.container.content_del_properties(account, container, obj, properties=properties, headers=headers, **kwargs) # FIXME: remove and call self.container.content_prepare() directly def _content_prepare(self, account, container, obj_name, size, policy=None, headers=None): return self.container.content_prepare(account, container, obj_name, size, stgpol=policy, autocreate=True, headers=headers) def _content_preparer(self, account, container, obj_name, policy=None, headers=None): # TODO: optimize by asking more than one metachunk at a time obj_meta, first_body = self.container.content_prepare(account, container, obj_name, size=1, stgpol=policy, autocreate=True, headers=headers) storage_method = STORAGE_METHODS.load(obj_meta['chunk_method']) def _fix_mc_pos(chunks, mc_pos): for chunk in chunks: raw_pos = chunk["pos"].split(".") if storage_method.ec: chunk['num'] = int(raw_pos[1]) chunk["pos"] = "%d.%d" % (mc_pos, chunk['num']) else: chunk["pos"] = str(mc_pos) def _metachunk_preparer(): mc_pos = 0 _fix_mc_pos(first_body, mc_pos) yield first_body while True: mc_pos += 1 _, next_body = self._content_prepare(account, container, obj_name, 1, policy, headers) _fix_mc_pos(next_body, mc_pos) yield next_body return obj_meta, _metachunk_preparer def _object_create(self, account, container, obj_name, source, sysmeta, properties=None, policy=None, headers=None, key_file=None): obj_meta, chunk_prep = self._content_preparer(account, container, obj_name, policy=policy, headers=headers) obj_meta.update(sysmeta) obj_meta['content_path'] = obj_name obj_meta['container_id'] = utils.name2cid(account, container).upper() obj_meta['ns'] = self.namespace storage_method = STORAGE_METHODS.load(obj_meta['chunk_method']) if storage_method.ec: handler = ECWriteHandler( source, obj_meta, chunk_prep, storage_method, headers=headers, write_timeout=self.write_timeout, read_timeout=self.read_timeout, connection_timeout=self.connection_timeout) elif storage_method.backblaze: backblaze_info = self._b2_credentials(storage_method, key_file) handler = BackblazeWriteHandler(source, obj_meta, chunk_prep, storage_method, headers, backblaze_info) else: handler = ReplicatedWriteHandler( source, obj_meta, chunk_prep, storage_method, headers=headers, write_timeout=self.write_timeout, read_timeout=self.read_timeout, connection_timeout=self.connection_timeout) final_chunks, bytes_transferred, content_checksum = handler.stream() etag = obj_meta.get('etag') if etag and etag.lower() != content_checksum.lower(): raise exc.EtagMismatch("given etag %s != computed %s" % (etag, content_checksum)) obj_meta['etag'] = content_checksum data = {'chunks': final_chunks, 'properties': properties or {}} # FIXME: we may just pass **obj_meta self.container.content_create(account, container, obj_name, size=bytes_transferred, checksum=content_checksum, data=data, content_id=obj_meta['id'], stgpol=obj_meta['policy'], version=obj_meta['version'], mime_type=obj_meta['mime_type'], chunk_method=obj_meta['chunk_method'], headers=headers) return final_chunks, bytes_transferred, content_checksum def _fetch_stream(self, meta, chunks, ranges, storage_method, headers): total_bytes = 0 headers = headers or {} ranges = ranges or [(None, None)] meta_range_list = get_meta_ranges(ranges, chunks) for meta_range_dict in meta_range_list: for pos, meta_range in meta_range_dict.iteritems(): meta_start, meta_end = meta_range if meta_start is not None and meta_end is not None: headers['Range'] = http_header_from_ranges([meta_range]) reader = io.ChunkReader( iter(chunks[pos]), io.READ_CHUNK_SIZE, headers, connection_timeout=self.connection_timeout, response_timeout=self.read_timeout, read_timeout=self.read_timeout) try: it = reader.get_iter() except Exception as err: raise exc.OioException( "Error while downloading position %d: %s" % (pos, err)) for part in it: for d in part['iter']: total_bytes += len(d) yield d def _fetch_stream_ec(self, meta, chunks, ranges, storage_method, headers): ranges = ranges or [(None, None)] meta_range_list = get_meta_ranges(ranges, chunks) for meta_range_dict in meta_range_list: for pos, meta_range in meta_range_dict.iteritems(): meta_start, meta_end = meta_range handler = ECChunkDownloadHandler( storage_method, chunks[pos], meta_start, meta_end, headers, connection_timeout=self.connection_timeout, response_timeout=self.read_timeout, read_timeout=self.read_timeout) stream = handler.get_stream() for part_info in stream: for d in part_info['iter']: yield d stream.close() def _b2_credentials(self, storage_method, key_file): key_file = key_file or '/etc/oio/sds/b2-appkey.conf' try: return BackblazeUtils.get_credentials(storage_method, key_file) except BackblazeUtilsException as err: raise exc.ConfigurationException(str(err)) def _fetch_stream_backblaze(self, meta, chunks, ranges, storage_method, key_file): backblaze_info = self._b2_credentials(storage_method, key_file) total_bytes = 0 current_offset = 0 size = None offset = 0 for pos in range(len(chunks)): if ranges: offset = ranges[pos][0] size = ranges[pos][1] if size is None: size = int(meta["length"]) chunk_size = int(chunks[pos][0]["size"]) if total_bytes >= size: break if current_offset + chunk_size > offset: if current_offset < offset: _offset = offset - current_offset else: _offset = 0 if chunk_size + total_bytes > size: _size = size - total_bytes else: _size = chunk_size handler = BackblazeChunkDownloadHandler( meta, chunks[pos], _offset, _size, backblaze_info=backblaze_info) stream = handler.get_stream() if not stream: raise exc.OioException("Error while downloading") total_bytes += len(stream) yield stream current_offset += chunk_size
def account_client(self): if self._account_client is None: from oio.account.client import AccountClient self._account_client = AccountClient( self.client_conf, pool_manager=self.pool_manager) return self._account_client
class StorageTiererWorker(object): def __init__(self, conf, logger): self.conf = conf self.logger = logger self.account = conf[CONF_ACCOUNT] self.container_client = ContainerClient(self.conf) self.account_client = AccountClient(self.conf) self.content_factory = ContentFactory(self.conf) self.passes = 0 self.errors = 0 self.last_reported = 0 self.contents_run_time = 0 self.total_contents_processed = 0 self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_contents_per_second = int_value( conf.get('contents_per_second'), 30) self.container_fetch_limit = int_value( conf.get('container_fetch_limit'), 100) self.content_fetch_limit = int_value( conf.get('content_fetch_limit'), 100) self.outdated_threshold = int_value( conf.get(CONF_OUTDATED_THRESHOLD), 9999999999) self.new_policy = conf.get(CONF_NEW_POLICY) def _list_containers(self): container = None while True: resp = self.account_client.containers_list( self.account, marker=container, limit=self.container_fetch_limit) if len(resp["listing"]) == 0: break for container, _, _, _ in resp["listing"]: yield container def _list_contents(self): for container in self._list_containers(): marker = None while True: try: resp = self.container_client.container_list( acct=self.account, ref=container, limit=self.content_fetch_limit, marker=marker) except NotFound: self.logger.warn("Container %s in account " "but not found" % container) break if len(resp["objects"]) == 0: break for obj in resp["objects"]: marker = obj["name"] if obj["mtime"] > time.time() - self.outdated_threshold: continue if obj["policy"] == self.new_policy: continue container_id = cid_from_name(self.account, container) yield (container_id, obj["content"]) def run(self): start_time = report_time = time.time() total_errors = 0 for (container_id, content_id) in self._list_contents(): self.safe_change_policy(container_id, content_id) self.contents_run_time = ratelimit( self.contents_run_time, self.max_contents_per_second ) self.total_contents_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(total).2f ' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'total': (now - start_time) } ) report_time = now total_errors += self.errors self.passes = 0 self.errors = 0 self.last_reported = now elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(content_rate).2f ' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'content_rate': self.total_contents_processed / elapsed } ) def safe_change_policy(self, container_id, content_id): try: self.change_policy(container_id, content_id) except Exception: self.errors += 1 self.logger.exception("ERROR while changing policy for content " "%s/%s", (container_id, content_id)) self.passes += 1 def change_policy(self, container_id, content_id): self.logger.info("Changing policy for content %s/%s" % (container_id, content_id)) self.content_factory.change_policy( container_id, content_id, self.new_policy)