def __init__(self, conf, logger, volume, try_chunk_delete=False, beanstalkd_addr=None, **kwargs): super(BlobRebuilder, self).__init__(conf, logger, volume, **kwargs) # rdir self.rdir_client = RdirClient(conf, logger=self.logger) self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100) # rawx self.try_chunk_delete = try_chunk_delete # beanstalk if beanstalkd_addr: self.beanstalkd_listener = BeanstalkdListener( beanstalkd_addr, conf.get('beanstalkd_tube', DEFAULT_REBUILDER_TUBE), self.logger, **kwargs) else: self.beanstalkd_listener = None # counters self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_expected_chunks = None # distributed self.distributed = False
def _deindex_chunk(self, chunk): rdir = RdirClient(self.conf, pool_manager=self.conscience.pool_manager) url = chunk['url'] volume_id = url.split('/', 3)[2] chunk_id = url.split('/', 3)[3] rdir.chunk_delete(volume_id, self.container_id, self.content_id, chunk_id)
def setUp(self): super(TestIndexerCrawler, self).setUp() self.namespace = self.conf['namespace'] self.gridconf = {"namespace": self.namespace} self.rdir_client = RdirClient(self.gridconf)
def __init__(self, conf, logger, volume, input_file=None, try_chunk_delete=False, beanstalkd_addr=None): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.dry_run = true_value(conf.get('dry_run', False)) self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value(conf.get('bytes_per_second'), 10000000) self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100) self.allow_same_rawx = true_value(conf.get('allow_same_rawx')) self.input_file = input_file self.rdir_client = RdirClient(conf, logger=self.logger) self.content_factory = ContentFactory(conf) self.try_chunk_delete = try_chunk_delete self.beanstalkd_addr = beanstalkd_addr self.beanstalkd_tube = conf.get('beanstalkd_tube', 'rebuild') self.beanstalk = None
def __init__(self, conf, **kwargs): super(BlobIndexer, self).__init__(conf) self.logger = get_logger(conf) volume = conf.get('volume') if not volume: raise exc.ConfigurationException('No volume specified for indexer') self.volume = volume self.passes = 0 self.errors = 0 self.successes = 0 self.last_reported = 0 self.total_since_last_reported = 0 self.chunks_run_time = 0 self.interval = int_value( conf.get('interval'), 300) self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) pm = get_pool_manager(pool_connections=10) self.index_client = RdirClient(conf, logger=self.logger, pool_manager=pm) self.namespace, self.volume_id = check_volume(self.volume) self.convert_chunks = true_value(conf.get('convert_chunks')) if self.convert_chunks: converter_conf = self.conf.copy() converter_conf['no_backup'] = True self.converter = BlobConverter(converter_conf, logger=self.logger, pool_manager=pm) else: self.converter = None
def init(self): self.concurrency = int_value(self.conf.get('concurrency'), 10) self.tube = self.conf.get("tube", DEFAULT_TUBE) acct_refresh_interval = int_value( self.conf.get('acct_refresh_interval'), 3600) self.app_env['account_client'] = AccountClient( self.conf, logger=self.logger, refresh_delay=acct_refresh_interval, pool_connections=3, # 1 account, 1 proxy, 1 extra ) self.app_env['rdir_client'] = RdirClient( self.conf, logger=self.logger, pool_maxsize=self.concurrency, # 1 cnx per greenthread per host ) if 'handlers_conf' not in self.conf: raise ValueError("'handlers_conf' path not defined in conf") self.handlers = loadhandlers(self.conf.get('handlers_conf'), global_conf=self.conf, app=self) for opt in ('acct_update', 'rdir_update', 'retries_per_second', 'batch_size'): if opt in self.conf: self.logger.warn('Deprecated option: %s', opt) super(EventWorker, self).init()
def __init__(self, namespace, concurrency=50, error_file=None, rebuild_file=None, check_xattr=True, limit_listings=0, request_attempts=1, logger=None, verbose=False, check_hash=False, **_kwargs): self.pool = GreenPool(concurrency) self.error_file = error_file self.check_xattr = bool(check_xattr) self.check_hash = bool(check_hash) self.logger = logger or get_logger( {'namespace': namespace}, name='integrity', verbose=verbose) # Optimisation for when we are only checking one object # or one container. # 0 -> do not limit # 1 -> limit account listings (list of containers) # 2 -> limit container listings (list of objects) self.limit_listings = limit_listings if self.error_file: outfile = open(self.error_file, 'a') self.error_writer = csv.writer(outfile, delimiter=' ') self.rebuild_file = rebuild_file if self.rebuild_file: self.fd = open(self.rebuild_file, 'a') self.rebuild_writer = csv.writer(self.fd, delimiter='|') self.api = ObjectStorageApi(namespace, logger=self.logger, max_retries=request_attempts - 1, request_attempts=request_attempts) self.rdir_client = RdirClient({"namespace": namespace}, logger=self.logger) self.accounts_checked = 0 self.containers_checked = 0 self.objects_checked = 0 self.chunks_checked = 0 self.account_not_found = 0 self.container_not_found = 0 self.object_not_found = 0 self.chunk_not_found = 0 self.account_exceptions = 0 self.container_exceptions = 0 self.object_exceptions = 0 self.chunk_exceptions = 0 self.list_cache = {} self.running = {} self.running_lock = Semaphore(1) self.result_queue = Queue(concurrency) self.run_time = 0
def setUp(self): super(TestMeta2Indexing, self).setUp() self.rdir_client = RdirClient(self.conf) self.directory_client = DirectoryClient(self.conf) self.container_client = ContainerClient(self.conf) self.containers = [random_str(14) for _ in range(0, randint(1, 10))] self.containers_svcs = {} self.event_agent_name = 'event-agent-1'
def setUp(self): super(TestRdirClient, self).setUp() self.rawx_conf = random.choice(self.conf['services']['rawx']) self.rawx_id = self.rawx_conf.get('service_id', self.rawx_conf['addr']) self.rdir = RdirClient(self.conf) self.rdir.admin_clear(self.rawx_id, clear_all=True) self._push_chunks() self.rdir._direct_request = Mock(side_effect=self.rdir._direct_request)
def setUp(self): super(TestIndexerCrawler, self).setUp() self.namespace = self.conf['namespace'] self.rawx_conf = self.conf['services']['rawx'][0] self.conf = {"namespace": self.namespace, "volume": self.rawx_conf['path']} self.rdir_client = RdirClient(self.conf)
def test_rebuild_chunk(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "TWOCOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) rebuilder.chunk_rebuild(content.container_id, content.content_id, content.chunks[0].id) # check meta2 information _, res = self.container_client.content_show(acct=content.account, ref=content.container_name, content=content.content_id) new_chunk_info = None for c in res: if (c['url'] != content.chunks[0].url and c['url'] != content.chunks[1].url): new_chunk_info = c new_chunk_id = new_chunk_info['url'].split('/')[-1] self.assertEqual(new_chunk_info['hash'], content.chunks[0].hash) self.assertEqual(new_chunk_info['pos'], content.chunks[0].pos) self.assertEqual(new_chunk_info['size'], content.chunks[0].size) # check chunk information meta, stream = self.blob_client.chunk_get(new_chunk_info['url']) self.assertEqual(meta['content_size'], str(content.chunks[0].size)) self.assertEqual(meta['content_path'], content.content_name) self.assertEqual(meta['content_cid'], content.container_id) self.assertEqual(meta['content_id'], content.content_id) self.assertEqual(meta['chunk_id'], new_chunk_id) self.assertEqual(meta['chunk_pos'], content.chunks[0].pos) self.assertEqual(meta['content_version'], content.version) self.assertEqual(meta['chunk_hash'], content.chunks[0].hash) self.assertEqual(stream.next(), content.chunks[0].data) # check rtime flag in rdir rdir_client = RdirClient(self.gridconf) res = rdir_client.chunk_fetch(self.conf['rawx'][0]['addr']) key = (content.container_id, content.content_id, content.chunks[0].id) for i_container, i_content, i_chunk, i_value in res: if (i_container, i_content, i_chunk) == key: check_value = i_value self.assertIsNotNone(check_value.get('rtime'))
def setUp(self): super(TestRdirMeta2Client, self).setUp() self.namespace = "dummy" self.volid = "e29b4c56-8522-4118-82ea" self.container_url = "OPENIO/testing/test1" self.container_id = "random833999id" self.mtime = 2874884.47 self.rdir_client = RdirClient({'namespace': self.namespace}, endpoint='127.0.0.0:6000')
def __init__(self, conf, logger, volume, try_chunk_delete=False, **kwargs): super(BlobRebuilderWorker, self).__init__(conf, logger, **kwargs) self.volume = volume self.bytes_processed = 0 self.total_bytes_processed = 0 self.dry_run = true_value(conf.get('dry_run', False)) self.allow_same_rawx = true_value(conf.get('allow_same_rawx')) self.rdir_client = RdirClient(conf, logger=self.logger) self.content_factory = ContentFactory(conf, logger=self.logger) self.try_chunk_delete = try_chunk_delete
def __init__(self, conf, input_file=None, service_id=None, **kwargs): super(Meta2Rebuilder, self).__init__(conf, **kwargs) # input self.input_file = input_file self.meta2_id = service_id # rawx/rdir self.rdir_client = RdirClient(self.conf, logger=self.logger) self.rdir_fetch_limit = int_value(self.conf.get('rdir_fetch_limit'), self.DEFAULT_RDIR_FETCH_LIMIT)
def __init__(self, ns, max_containers=256, max_contents=256): conf = {'namespace': ns} self.cs = ConscienceClient(conf) self.rdir = RdirClient(conf) self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')] self.sent = set() self.max_containers = max_containers self.max_contents = max_contents self.pushed_count = 0 self.pushed_time = 0 self.removed_count = 0 self.removed_time = 0
def tier_content(self, config, stats, control): def _set(lock_, field, value): lock_.acquire() field.value = value lock_.release() def _add(lock_, field, value): lock_.acquire() field.value += value lock_.release() lock = control.get('lock') try: src = config.get('src') del config['src'] self.client.lock_score(dict(type="rawx", addr=src)) api = ObjectStorageApi(config["namespace"]) rdir_client = RdirClient({'namespace': config["namespace"]}) self.log.info("Starting tierer on %s with policy %s" % (src, config["policy"])) policies = dict() for part in config["policy"].split(','): policies[part.split(':')[0]] = part.split(':')[1] self.log.info("Parsed policy: " + part.split(':')[0] + " " + part.split(':')[1]) for marker in config["markers"]: req = dict( start_after=marker, limit=1000, ) _, resp_body = rdir_client._rdir_request(src, 'POST', 'fetch', json=req) for (key, value) in resp_body: _, _, chunk = key.split('|') res = requests.head("http://" + src + "/" + chunk) policy = res.headers.get("x-oio-chunk-meta-content-storage-policy", "") if policy not in policies.keys(): _add(lock, stats.get("skip"), 1) continue path = res.headers.get("x-oio-chunk-meta-full-path", "///") path_parts = path.split('/') if len(path_parts) < 3: _add(lock, stats.get("skip"), 1) continue try: api.object_change_policy(unquote(path_parts[0]), unquote(path_parts[1]), unquote(path_parts[2]), policies[policy]) _add(lock, stats.get("success"), 1) except Exception as e: self.log.info("Operation failed %s: %s (%s)" % (path, format_exc(e), policies[policy])) _add(lock, stats.get("fail"), 1) except Exception as e: self.log.error("Tierer failed with %s" % format_exc(e)) _set(lock, control.get('status'), 2) _set(lock, control.get('end'), int(time.time()))
def __init__(self, volume_path, conf, pool_manager=None): """ Initializes an Indexing worker for indexing meta2 databases. Possible values of conf relating to this worker are: - interval: (int) in sec time between two full scans. Default: half an hour. - report_interval: (int) in sec, time between two reports: Default: 300 - scanned_per_second: (int) maximum number of indexed databases /s. - try_removing_faulty_indexes : In the event where we encounter a database that's not supposed to be handled by this volume, attempt to remove it from this volume rdir index if it exists WARNING: The decision is based off of a proxy response, that could be affected by cache inconsistencies for example, use at your own risk. Default: False :param volume_path: The volume path to be indexed :param conf: The configuration to be passed to the needed services :param pool_manager: A connection pool manager. If none is given, a new one with a default size of 10 will be created. """ self.logger = get_logger(conf) self._stop = False self.volume = volume_path self.success_nb = 0 self.failed_nb = 0 self.full_scan_nb = 0 self.last_report_time = 0 self.last_scan_time = 0 self.last_index_time = 0 self.start_time = 0 self.indexed_since_last_report = 0 self.scans_interval = int_value( conf.get('interval'), 1800) self.report_interval = int_value( conf.get('report_interval'), 300) self.max_indexed_per_second = int_value( conf.get('scanned_per_second'), 3000) self.namespace, self.volume_id = check_volume_for_service_type( self.volume, "meta2") self.attempt_bad_index_removal = boolean_value( conf.get('try_removing_faulty_indexes', False) ) if not pool_manager: pool_manager = get_pool_manager(pool_connections=10) self.index_client = RdirClient(conf, logger=self.logger, pool_manager=pool_manager) self.dir_client = DirectoryClient(conf, logger=self.logger, pool_manager=pool_manager)
def setUp(self): super(TestRdirClient, self).setUp() self.namespace = self.conf['namespace'] self.rdir_client = RdirClient({'namespace': self.namespace}) self.rdir_client._get_rdir_addr = Mock(return_value="0.1.2.3:4567") self.container_id_1 = random_id(64) self.container_id_2 = random_id(64) self.container_id_3 = random_id(64) self.content_id_1 = random_id(32) self.content_id_2 = random_id(32) self.content_id_3 = random_id(32) self.chunk_id_1 = random_id(64) self.chunk_id_2 = random_id(64) self.chunk_id_3 = random_id(64)
def init(self): eventlet.monkey_patch(os=False) self.session = requests.Session() self.cs = ConscienceClient(self.conf) self.rdir = RdirClient(self.conf) self._acct_addr = None self.acct_update = 0 self.graceful_timeout = 1 self.acct_refresh_interval = int_value( self.conf.get('acct_refresh_interval'), 60) self.concurrency = int_value(self.conf.get('concurrency'), 1000) self.acct_update = true_value(self.conf.get('acct_update', True)) self.rdir_update = true_value(self.conf.get('rdir_update', True)) super(EventWorker, self).init()
def __init__(self, conf, input_file=None, service_id=None, **kwargs): super(BlobRebuilder, self).__init__(conf, **kwargs) # counters self.bytes_processed = 0 self.total_bytes_processed = 0 # input self.input_file = input_file self.rawx_id = service_id # rawx/rdir self.rdir_client = RdirClient(self.conf, logger=self.logger) self.rdir_fetch_limit = int_value(self.conf.get('rdir_fetch_limit'), self.DEFAULT_RDIR_FETCH_LIMIT)
def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger self.volume = volume self.passes = 0 self.errors = 0 self.last_reported = 0 self.chunks_run_time = 0 self.total_chunks_processed = 0 self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) self.index_client = RdirClient(conf) self.namespace, self.volume_id = check_volume(self.volume)
def setUp(self): super(TestRdirClient, self).setUp() self.namespace = "dummy" self.rdir_client = RdirClient({'namespace': self.namespace}, endpoint='127.0.0.0:6000') self.rdir_client._get_rdir_addr = Mock(return_value="0.1.2.3:4567") self.container_id_1 = random_id(64) self.container_id_2 = random_id(64) self.container_id_3 = random_id(64) self.content_id_1 = random_id(32) self.content_id_2 = random_id(32) self.content_id_3 = random_id(32) self.chunk_id_1 = random_id(64) self.chunk_id_2 = random_id(64) self.chunk_id_3 = random_id(64)
def __init__(self, conf, name, context, **kwargs): self.conf = conf self.name = name verbose = kwargs.pop('verbose', False) self.logger = get_logger(self.conf, verbose=verbose) self.init_zmq(context) self.cs = ConscienceClient(self.conf) self.rdir = RdirClient(self.conf) self._acct_addr = None self.acct_update = 0 self.acct_refresh_interval = int_value( conf.get('acct_refresh_interval'), 60) self.acct_update = true_value(conf.get('acct_update', True)) self.rdir_update = true_value(conf.get('rdir_update', True)) self.session = requests.Session() self.failed = False
def __init__(self, conf, logger, volume, try_chunk_delete=False, beanstalkd_addr=None, **kwargs): super(BlobRebuilder, self).__init__(conf, logger, **kwargs) self.volume = volume self.rdir_client = RdirClient(conf, logger=self.logger) self.try_chunk_delete = try_chunk_delete self.beanstalkd_addr = beanstalkd_addr self.beanstalkd_tube = conf.get('beanstalkd_tube', DEFAULT_REBUILDER_TUBE) self.beanstalk = None self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100)
def __init__(self, conf, **kwargs): super(BlobIndexer, self).__init__(conf) self.logger = get_logger(conf) volume = conf.get('volume') if not volume: raise exc.ConfigurationException('No volume specified for indexer') self.volume = volume self.passes = 0 self.errors = 0 self.successes = 0 self.last_reported = 0 self.chunks_run_time = 0 self.interval = int_value(conf.get('interval'), 300) self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) self.index_client = RdirClient(conf, logger=self.logger) self.namespace, self.volume_id = check_volume(self.volume)
def init(self): eventlet.monkey_patch(os=False) self.tube = self.conf.get("tube", DEFAULT_TUBE) self.cs = ConscienceClient(self.conf, logger=self.logger) self.rdir = RdirClient(self.conf, logger=self.logger) self._acct_addr = None self.acct_update = 0 self.graceful_timeout = 1 self.acct_refresh_interval = int_value( self.conf.get('acct_refresh_interval'), 60) self.acct_update = true_value(self.conf.get('acct_update', True)) self.rdir_update = true_value(self.conf.get('rdir_update', True)) self.app_env['acct_addr'] = self.acct_addr if 'handlers_conf' not in self.conf: raise ValueError("'handlers_conf' path not defined in conf") self.handlers = loadhandlers(self.conf.get('handlers_conf'), global_conf=self.conf, app=self) super(EventWorker, self).init()
def setUp(self): super(TestBlobIndexer, self).setUp() self.rdir_client = RdirClient(self.conf) self.blob_client = BlobClient(self.conf) _, self.rawx_path, rawx_addr, _ = \ self.get_service_url('rawx') services = self.conscience.all_services('rawx') self.rawx_id = None for rawx in services: if rawx_addr == rawx['addr']: self.rawx_id = rawx['tags'].get('tag.service_id', None) if self.rawx_id is None: self.rawx_id = rawx_addr conf = self.conf.copy() conf['volume'] = self.rawx_path self.blob_indexer = BlobIndexer(conf) # clear rawx/rdir chunk_files = paths_gen(self.rawx_path) for chunk_file in chunk_files: os.remove(chunk_file) self.rdir_client.admin_clear(self.rawx_id, clear_all=True)
def test_link_rdir_to_zero_scored_rawx(self): client = RdirClient({'namespace': self.ns}) disp = RdirDispatcher({'namespace': self.ns}) # Register a service, with score locked to zero new_rawx = self._srv('rawx', {'tag.loc': 'whatever'}) new_rawx['score'] = 0 self._register_srv(new_rawx) self._reload_proxy() all_rawx = disp.assign_all_rawx() all_rawx_keys = [x['addr'] for x in all_rawx] self.assertIn(new_rawx['addr'], all_rawx_keys) rdir_addr = client._get_rdir_addr(new_rawx['addr']) self.assertIsNotNone(rdir_addr) try: self.api.unlink('_RDIR', new_rawx['addr'], 'rdir') self.api.delete('_RDIR', new_rawx['addr']) # self._flush_cs('rawx') except Exception: pass
def init(self): self.tube = self.conf.get("tube", DEFAULT_TUBE) self.cs = ConscienceClient(self.conf, logger=self.logger) self.rdir = RdirClient(self.conf, logger=self.logger) self._acct_addr = None self.acct_update = 0 self.graceful_timeout = 1 self.acct_refresh_interval = int_value( self.conf.get('acct_refresh_interval'), 60) self.app_env['acct_addr'] = self.acct_addr if 'handlers_conf' not in self.conf: raise ValueError("'handlers_conf' path not defined in conf") self.handlers = loadhandlers(self.conf.get('handlers_conf'), global_conf=self.conf, app=self) for opt in ('acct_update', 'rdir_update', 'retries_per_second', 'batch_size'): if opt in self.conf: self.logger.warn('Deprecated option: %s', opt) super(EventWorker, self).init()
def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.dry_run = true_value(conf.get('dry_run', False)) self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value(conf.get('bytes_per_second'), 10000000) self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100) self.rdir_client = RdirClient(conf) self.content_factory = ContentFactory(conf)