def __init__(self, conf, logger, volume, input_file=None, **kwargs): # pylint: disable=no-member self.conf = conf self.logger = logger or get_logger(conf) self.namespace = conf['namespace'] self.volume = volume self.input_file = input_file self.concurrency = int_value(conf.get('concurrency'), self.DEFAULT_CONCURRENCY) self.success = True # exit gracefully self.running = True signal.signal(signal.SIGINT, self.exit_gracefully) signal.signal(signal.SIGTERM, self.exit_gracefully) # counters self.lock_counters = threading.Lock() self.items_processed = 0 self.errors = 0 self.total_items_processed = 0 self.total_errors = 0 # report self.lock_report = threading.Lock() self.start_time = 0 self.last_report = 0 self.report_interval = int_value(conf.get('report_interval'), self.DEFAULT_REPORT_INTERVAL)
def list_containers(self, account_id, limit=1000, marker=None, end_marker=None, prefix=None, delimiter=None, s3_buckets_only=False): raw_list, _next_marker = self._raw_listing( self.clistkey(account_id), limit=limit, marker=marker, end_marker=end_marker, prefix=prefix, delimiter=delimiter, s3_buckets_only=s3_buckets_only) pipeline = self.conn_slave.pipeline(True) # skip prefix for container in [entry for entry in raw_list if not entry[3]]: pipeline.hmget(AccountBackend.ckey(account_id, container[0]), 'objects', 'bytes', 'mtime') res = pipeline.execute() i = 0 for container in raw_list: if not container[3]: # FIXME(adu) Convert to dict container[1] = int_value(res[i][0], 0) container[2] = int_value(res[i][1], 0) container[4] = float_value(res[i][2], 0.0) i += 1 return raw_list
def sanitize_config(cls, job_config): """ Validate and sanitize the job configuration Ex: cast a string as integer, set a default Also return the lock id if there is one """ sanitized_job_config = dict() tasks_per_second = int_value(job_config.get('tasks_per_second'), cls.DEFAULT_TASKS_PER_SECOND) sanitized_job_config['tasks_per_second'] = tasks_per_second tasks_batch_size = int_value(job_config.get('tasks_batch_size'), None) if tasks_batch_size is None: if tasks_per_second > 0: tasks_batch_size = min(tasks_per_second, cls.MAX_TASKS_BATCH_SIZE) else: tasks_batch_size = cls.MAX_TASKS_BATCH_SIZE elif tasks_batch_size < 1: raise ValueError('Tasks batch size should be positive') elif tasks_batch_size > cls.MAX_TASKS_BATCH_SIZE: raise ValueError('Tasks batch size should be less than %d' % cls.MAX_TASKS_BATCH_SIZE) sanitized_job_config['tasks_batch_size'] = tasks_batch_size sanitized_job_params, lock = cls.sanitize_params( job_config.get('params') or dict()) sanitized_job_config['params'] = sanitized_job_params return sanitized_job_config, lock
def init(self): self.concurrency = int_value(self.conf.get('concurrency'), 10) self.tube = self.conf.get("tube", DEFAULT_TUBE) acct_refresh_interval = int_value( self.conf.get('acct_refresh_interval'), 3600) self.app_env['account_client'] = AccountClient( self.conf, logger=self.logger, refresh_delay=acct_refresh_interval, pool_connections=3, # 1 account, 1 proxy, 1 extra ) self.app_env['rdir_client'] = RdirClient( self.conf, logger=self.logger, pool_maxsize=self.concurrency, # 1 cnx per greenthread per host ) if 'handlers_conf' not in self.conf: raise ValueError("'handlers_conf' path not defined in conf") self.handlers = loadhandlers(self.conf.get('handlers_conf'), global_conf=self.conf, app=self) for opt in ('acct_update', 'rdir_update', 'retries_per_second', 'batch_size'): if opt in self.conf: self.logger.warn('Deprecated option: %s', opt) super(EventWorker, self).init()
def take_action(self, parsed_args): from oio.common.easy_value import convert_size self.log.debug('take_action(%s)', parsed_args) account = self.app.client_manager.account self.take_action_container(parsed_args) # The command is named 'show' but we must call # container_get_properties() because container_show() does # not return system properties (and we need them). data = self.app.client_manager.storage.container_get_properties( account, parsed_args.container, cid=parsed_args.cid, admin_mode=True) sys = data['system'] ctime = float(sys[M2_PROP_CTIME]) / 1000000. bytes_usage = sys.get(M2_PROP_USAGE, 0) objects = sys.get(M2_PROP_OBJECTS, 0) damaged_objects = sys.get(M2_PROP_DAMAGED_OBJECTS, 0) missing_chunks = sys.get(M2_PROP_MISSING_CHUNKS, 0) if parsed_args.formatter == 'table': ctime = int(ctime) bytes_usage = convert_size(int(bytes_usage), unit="B") objects = convert_size(int(objects)) info = { 'account': sys['sys.account'], 'base_name': sys['sys.name'], 'container': sys['sys.user.name'], 'ctime': ctime, 'bytes_usage': bytes_usage, 'quota': sys.get(M2_PROP_QUOTA, "Namespace default"), 'objects': objects, 'damaged_objects': damaged_objects, 'missing_chunks': missing_chunks, 'storage_policy': sys.get(M2_PROP_STORAGE_POLICY, "Namespace default"), 'max_versions': sys.get(M2_PROP_VERSIONING_POLICY, "Namespace default"), 'status': OIO_DB_STATUS_NAME.get(sys.get('sys.status'), "Unknown"), } for k in ('stats.page_count', 'stats.freelist_count', 'stats.page_size'): info[k] = sys.get(k) wasted = (float_value(info['stats.freelist_count'], 0) / float_value(info['stats.page_count'], 1)) wasted_bytes = (int_value(info['stats.freelist_count'], 0) * int_value(info['stats.page_size'], 0)) info['stats.space_wasted'] = "%5.2f%% (est. %s)" % \ (wasted * 100, convert_size(wasted_bytes)) bucket = sys.get(M2_PROP_BUCKET_NAME, None) if bucket is not None: info['bucket'] = bucket delete_exceeding = sys.get(M2_PROP_DEL_EXC_VERSIONS, None) if delete_exceeding is not None: info['delete_exceeding_versions'] = delete_exceeding != '0' for k, v in iteritems(data['properties']): info['meta.' + k] = v return list(zip(*sorted(info.items())))
def __init__(self, conf, logger, volume, input_file=None, try_chunk_delete=False, beanstalkd_addr=None): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.dry_run = true_value(conf.get('dry_run', False)) self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value(conf.get('bytes_per_second'), 10000000) self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100) self.allow_same_rawx = true_value(conf.get('allow_same_rawx')) self.input_file = input_file self.rdir_client = RdirClient(conf, logger=self.logger) self.content_factory = ContentFactory(conf) self.try_chunk_delete = try_chunk_delete self.beanstalkd_addr = beanstalkd_addr self.beanstalkd_tube = conf.get('beanstalkd_tube', 'rebuild') self.beanstalk = None
def list_containers(self, account_id, limit=1000, marker=None, end_marker=None, prefix=None, delimiter=None): raw_list = self._raw_listing(account_id, limit=limit, marker=marker, end_marker=end_marker, prefix=prefix, delimiter=delimiter) pipeline = self.conn.pipeline(True) # skip prefix for container in [entry for entry in raw_list if not entry[3]]: pipeline.hmget(AccountBackend.ckey(account_id, container[0]), 'objects', 'bytes', 'mtime') res = pipeline.execute() i = 0 for container in raw_list: if not container[3]: container[1] = int_value(res[i][0], 0) container[2] = int_value(res[i][1], 0) container[4] = float_value(res[i][2], 0.0) i += 1 return raw_list
def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.last_usage_check = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.usage_target = int_value(conf.get('usage_target'), 0) self.usage_check_interval = int_value(conf.get('usage_check_interval'), 3600) self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value(conf.get('bytes_per_second'), 10000000) self.blob_client = BlobClient() self.container_client = ContainerClient(conf, logger=self.logger) self.content_factory = ContentFactory(conf)
def __init__(self, conf, logger=None, **kwargs): self.conf = conf self.logger = logger or get_logger(conf) volume = conf.get('volume') if not volume: raise ConfigurationException('No volume specified for converter') self.volume = volume self.namespace, self.volume_id = check_volume(self.volume) # cache self.name_by_cid = CacheDict() self.content_id_by_name = CacheDict() # client self.container_client = ContainerClient(conf, **kwargs) self.content_factory = ContentFactory(conf, self.container_client, logger=self.logger) # stats/logs self.errors = 0 self.passes = 0 self.total_chunks_processed = 0 self.start_time = 0 self.last_reported = 0 self.report_interval = int_value(conf.get('report_interval'), 3600) # speed self.chunks_run_time = 0 self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) # backup self.no_backup = true_value(conf.get('no_backup', False)) self.backup_dir = conf.get('backup_dir') or tempfile.gettempdir() self.backup_name = 'backup_%s_%f' \ % (self.volume_id, time.time()) # dry run self.dry_run = true_value(conf.get('dry_run', False))
def __init__(self, conf, **kwargs): super(BlobIndexer, self).__init__(conf) self.logger = get_logger(conf) volume = conf.get('volume') if not volume: raise exc.ConfigurationException('No volume specified for indexer') self.volume = volume self.passes = 0 self.errors = 0 self.successes = 0 self.last_reported = 0 self.total_since_last_reported = 0 self.chunks_run_time = 0 self.interval = int_value( conf.get('interval'), 300) self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) pm = get_pool_manager(pool_connections=10) self.index_client = RdirClient(conf, logger=self.logger, pool_manager=pm) self.namespace, self.volume_id = check_volume(self.volume) self.convert_chunks = true_value(conf.get('convert_chunks')) if self.convert_chunks: converter_conf = self.conf.copy() converter_conf['no_backup'] = True self.converter = BlobConverter(converter_conf, logger=self.logger, pool_manager=pm) else: self.converter = None
def sanitize_params(cls, job_params): sanitized_job_params, _ = super(RawxDecommissionJob, cls).sanitize_params(job_params) # specific configuration service_id = job_params.get('service_id') if not service_id: raise ValueError('Missing service ID') sanitized_job_params['service_id'] = service_id sanitized_job_params['rawx_timeout'] = float_value( job_params.get('rawx_timeout'), cls.DEFAULT_RAWX_TIMEOUT) sanitized_job_params['min_chunk_size'] = int_value( job_params.get('min_chunk_size'), cls.DEFAULT_MIN_CHUNK_SIZE) sanitized_job_params['max_chunk_size'] = int_value( job_params.get('max_chunk_size'), cls.DEFAULT_MAX_CHUNK_SIZE) excluded_rawx = job_params.get('excluded_rawx') if excluded_rawx: excluded_rawx = excluded_rawx.split(',') else: excluded_rawx = list() sanitized_job_params['excluded_rawx'] = excluded_rawx sanitized_job_params['usage_target'] = int_value( job_params.get('usage_target'), cls.DEFAULT_USAGE_TARGET) sanitized_job_params['usage_check_interval'] = float_value( job_params.get('usage_check_interval'), cls.DEFAULT_USAGE_CHECK_INTERVAL) return sanitized_job_params, 'rawx/%s' % service_id
def __init__(self, conf, logger, volume, container_ids): self.conf = conf self.logger = logger self.volume = volume self.volume_ns, self.volume_id = check_volume(self.volume) self.container_ids = container_ids or list() self.container_ids = [ container_id.upper() for container_id in self.container_ids ] self.namespace = self.conf['namespace'] if self.namespace != self.volume_ns: raise ValueError( 'Namespace (%s) mismatch with volume namespace (%s)', self.namespace, self.volume_ns) # action self.action_name = self.conf['action'].lower() if (self.action_name == 'insert'): self.action = self._insert_bean elif (self.action_name == 'update'): self.action = self._update_bean elif (self.action_name == 'check'): self.action = self._check_bean else: raise ValueError('Unknown action (%s)', self.action_name) # speed self.chunks_run_time = 0 self.max_chunks_per_second = int_value( self.conf.get('chunks_per_second'), self.DEFAULT_CHUNKS_PER_SECOND) # counters self.chunks_processed = 0 self.chunk_errors = 0 self.beans_processed = dict() self.bean_successes = dict() self.bean_already_exists = dict() self.bean_orphans = dict() self.bean_errors = dict() for bean_type in self.BEAN_TYPES: self.beans_processed[bean_type] = 0 self.bean_successes[bean_type] = 0 self.bean_already_exists[bean_type] = 0 self.bean_orphans[bean_type] = 0 self.bean_errors[bean_type] = 0 # report self.start_time = 0 self.last_report = 0 self.report_interval = int_value(conf.get('report_interval'), self.DEFAULT_REPORT_INTERVAL) self.client = ContainerClient({'namespace': self.namespace}, logger=self.logger) self.ctime = int(time.time())
def __init__(self, conf, service, **kwargs): self.conf = conf self.running = False for k in ['host', 'port', 'type']: if k not in service: raise Exception('Missing field "%s" in service configuration' % k) self.name = '%s|%s|%s' % \ (service['type'], service['host'], service['port']) self.service = service self.rise = int_value(self._load_item_config('rise'), 1) self.fall = int_value(self._load_item_config('fall'), 1) self.check_interval = float_value( self._load_item_config('check_interval'), 1) self.deregister_on_exit = true_value( self._load_item_config('deregister_on_exit', False)) self.logger = get_logger(self.conf) self.pool_manager = get_pool_manager() self.cs = ConscienceClient(self.conf, pool_manager=self.pool_manager, logger=self.logger) # FIXME: explain that self.client = ProxyClient(self.conf, pool_manager=self.pool_manager, no_ns_in_url=True, logger=self.logger) self.last_status = False self.status = False self.failed = False self.service_definition = { 'ns': self.conf['namespace'], 'type': self.service['type'], 'addr': '%s:%s' % (self.service['host'], self.service['port']), 'score': 0, 'tags': {} } if self.service.get('slots', None): self.service_definition['tags']['tag.slots'] = \ ','.join(self.service['slots']) for name, tag in (('location', 'tag.loc'), ('service_id', 'tag.service_id'), ('tls', 'tag.tls')): if self.service.get(name): self.service_definition['tags'][tag] = \ self.service[name] self.service_checks = list() self.service_stats = list() self.init_checkers(service) self.init_stats(service)
def __init__(self, conf, logger, **kwargs): self.conf = conf self.logger = logger or get_logger(conf) self.passes = 0 self.errors = 0 self.last_reported = 0 self.items_run_time = 0 self.total_items_processed = 0 self.waiting_time = 0 self.rebuilder_time = 0 self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_items_per_second = int_value(conf.get('items_per_second'), 30)
def __init__(self, volume_path, conf, pool_manager=None): """ Initializes an Indexing worker for indexing meta2 databases. Possible values of conf relating to this worker are: - interval: (int) in sec time between two full scans. Default: half an hour. - report_interval: (int) in sec, time between two reports: Default: 300 - scanned_per_second: (int) maximum number of indexed databases /s. - try_removing_faulty_indexes : In the event where we encounter a database that's not supposed to be handled by this volume, attempt to remove it from this volume rdir index if it exists WARNING: The decision is based off of a proxy response, that could be affected by cache inconsistencies for example, use at your own risk. Default: False :param volume_path: The volume path to be indexed :param conf: The configuration to be passed to the needed services :param pool_manager: A connection pool manager. If none is given, a new one with a default size of 10 will be created. """ self.logger = get_logger(conf) self._stop = False self.volume = volume_path self.success_nb = 0 self.failed_nb = 0 self.full_scan_nb = 0 self.last_report_time = 0 self.last_scan_time = 0 self.last_index_time = 0 self.start_time = 0 self.indexed_since_last_report = 0 self.scans_interval = int_value( conf.get('interval'), 1800) self.report_interval = int_value( conf.get('report_interval'), 300) self.max_indexed_per_second = int_value( conf.get('scanned_per_second'), 3000) self.namespace, self.volume_id = check_volume_for_service_type( self.volume, "meta2") self.attempt_bad_index_removal = boolean_value( conf.get('try_removing_faulty_indexes', False) ) if not pool_manager: pool_manager = get_pool_manager(pool_connections=10) self.index_client = RdirClient(conf, logger=self.logger, pool_manager=pool_manager) self.dir_client = DirectoryClient(conf, logger=self.logger, pool_manager=pool_manager)
def sanitize_params(cls, job_params): sanitized_job_params, _ = super(TesterJob, cls).sanitize_params(job_params) sanitized_job_params['start'] = int_value(job_params.get('start'), cls.DEFAULT_START) sanitized_job_params['end'] = int_value(job_params.get('end'), cls.DEFAULT_END) sanitized_job_params['error_percentage'] = int_value( job_params.get('error_percentage'), cls.DEFAULT_ERROR_PERCENTAGE) return sanitized_job_params, job_params.get('lock', cls.DEFAULT_LOCK)
def __init__(self, conf, logger, volume, try_chunk_delete=False, beanstalkd_addr=None, **kwargs): super(BlobRebuilder, self).__init__(conf, logger, volume, **kwargs) # rdir self.rdir_client = RdirClient(conf, logger=self.logger) self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100) # rawx self.try_chunk_delete = try_chunk_delete # beanstalk if beanstalkd_addr: self.beanstalkd_listener = BeanstalkdListener( beanstalkd_addr, conf.get('beanstalkd_tube', DEFAULT_REBUILDER_TUBE), self.logger, **kwargs) else: self.beanstalkd_listener = None # counters self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_expected_chunks = None # distributed self.distributed = False
def on_job_list(self, req): limit = int_value(req.args.get('limit'), None) marker = req.args.get('marker') job_infos = self.backend.list_jobs(limit=limit, marker=marker) return Response( json.dumps(job_infos), mimetype='application/json')
def __init__(self, conf, beanstalkd_addr=None, logger=None): self.conf = conf self.logger = logger or get_logger(self.conf) self.namespace = conf['namespace'] self.success = True # counters self.items_processed = 0 self.total_items_processed = 0 self.errors = 0 self.total_errors = 0 self.total_expected_items = None # report self.start_time = 0 self.last_report = 0 self.report_interval = int_value(self.conf.get('report_interval'), self.DEFAULT_REPORT_INTERVAL) # dispatcher self.dispatcher = None # input self.beanstalkd = None if beanstalkd_addr: self.beanstalkd = BeanstalkdListener( beanstalkd_addr, self.conf.get('beanstalkd_worker_tube') or self.DEFAULT_BEANSTALKD_WORKER_TUBE, self.logger)
def __init__(self, conf, tool): super(_LocalDispatcher, self).__init__(conf, tool) nb_workers = int_value(self.conf.get('workers'), self.tool.DEFAULT_WORKERS) self.max_items_per_second = int_value( self.conf.get('items_per_second'), self.tool.DEFAULT_ITEM_PER_SECOND) self.queue_workers = eventlet.Queue(nb_workers * 2) self.queue_reply = eventlet.Queue() self.workers = list() for _ in range(nb_workers): worker = self.tool.create_worker(self.queue_workers, self.queue_reply) self.workers.append(worker)
def run(self): coros = [] queue_url = self.conf.get('queue_url', 'beanstalk://127.0.0.1:11300') concurrency = int_value(self.conf.get('concurrency'), 10) server_gt = greenthread.getcurrent() for url in queue_url.split(';'): for i in range(concurrency): beanstalk = Beanstalk.from_url(url) gt = eventlet.spawn(self.handle, beanstalk) gt.link(_eventlet_stop, server_gt, beanstalk) coros.append(gt) beanstalk, gt = None, None while self.alive: self.notify() try: eventlet.sleep(1.0) except AssertionError: self.alive = False break self.notify() try: with Timeout(self.graceful_timeout) as t: [c.kill(StopServe()) for c in coros] [c.wait() for c in coros] except Timeout as te: if te != t: raise [c.kill() for c in coros]
def on_account_containers(self, req): account_id = self._get_account_id(req) info = self.backend.info_account(account_id) if not info: return NotFound('Account not found') marker = req.args.get('marker', '') end_marker = req.args.get('end_marker', '') prefix = req.args.get('prefix', '') limit = int(req.args.get('limit', '1000')) limit = max(0, min(ACCOUNT_LISTING_MAX_LIMIT, int_value( req.args.get('limit'), 0))) if limit <= 0: limit = ACCOUNT_LISTING_DEFAULT_LIMIT delimiter = req.args.get('delimiter', '') s3_buckets_only = true_value(req.args.get('s3_buckets_only', False)) user_list = self.backend.list_containers( account_id, limit=limit, marker=marker, end_marker=end_marker, prefix=prefix, delimiter=delimiter, s3_buckets_only=s3_buckets_only) info['listing'] = user_list # TODO(FVE): add "truncated" entry telling if the listing is truncated result = json.dumps(info) return Response(result, mimetype='text/json')
def rebuild(self): pile = GreenPile(len(self.meta_chunk)) nb_data = self.storage_method.ec_nb_data headers = {} for chunk in self.meta_chunk: pile.spawn(self._get_response, chunk, headers) # Sort all responses according to the chunk size total_resps = 0 resps_by_size = dict() resps_without_chunk_size = list() for resp in pile: if not resp: continue chunk_size = int_value( resp.getheader(CHUNK_HEADERS['chunk_size'], None), None) if chunk_size is None: self.logger.warning('Missing chunk size') resps_without_chunk_size.append(resp) continue total_resps += 1 resps_by_size.setdefault(chunk_size, list()).append(resp) # Select the chunk with the majority chunk size resps = None max_resps = 0 assumed_chunk_size = None for chunk_size, resps in resps_by_size.items(): nb_resp = len(resps) if nb_resp > max_resps: max_resps = nb_resp assumed_chunk_size = chunk_size if assumed_chunk_size is None: self.logger.warning( 'No chunk available with chunk size information') resps = list() else: resps = resps_by_size[assumed_chunk_size] if max_resps != total_resps: self.logger.warning( '%d/%d chunks are not the same size as others (%d), ' 'they should be removed', total_resps - max_resps, total_resps, assumed_chunk_size) # Check the number of chunks available if max_resps < nb_data: # Add the chunks without size information # assuming they are the correct size resps = resps + resps_without_chunk_size if len(resps) < nb_data: self.logger.error( 'Unable to read enough valid sources to rebuild') raise exceptions.UnrecoverableContent( 'Not enough valid sources to rebuild') self.logger.warning( 'Use chunk(s) without size information to rebuild a chunk') rebuild_iter = self._make_rebuild_iter(resps[:nb_data]) return assumed_chunk_size, rebuild_iter
def __init__(self, conf, beanstalkd_addr=None, logger=None): self.conf = conf self.logger = logger or get_logger(self.conf) self.namespace = conf['namespace'] self.success = True # exit gracefully self.running = True signal.signal(signal.SIGINT, self.exit_gracefully) signal.signal(signal.SIGTERM, self.exit_gracefully) # counters self.items_processed = 0 self.total_items_processed = 0 self.errors = 0 self.total_errors = 0 self.total_expected_items = None # report self.start_time = 0 self.last_report = 0 self.report_interval = int_value(self.conf.get( 'report_interval'), self.DEFAULT_REPORT_INTERVAL) # dispatcher self.dispatcher = None # input self.beanstalkd = None if beanstalkd_addr: self.beanstalkd = BeanstalkdListener( beanstalkd_addr, self.conf.get('beanstalkd_worker_tube') or self.DEFAULT_BEANSTALKD_WORKER_TUBE, self.logger) # retry self.retryer = None self.retry_queue = None if self.beanstalkd: self.retryer = BeanstalkdSender( self.beanstalkd.addr, self.beanstalkd.tube, self.logger) self.retry_queue = eventlet.Queue() self.retry_delay = int_value(self.conf.get('retry_delay'), self.DEFAULT_RETRY_DELAY)
def __init__(self, conf, logger, volume, input_file=None, **kwargs): self.conf = conf self.logger = logger or get_logger(conf) self.namespace = conf['namespace'] self.volume = volume self.input_file = input_file self.nworkers = int_value(conf.get('workers'), 1) # counters self.lock_counters = threading.Lock() self.items_processed = 0 self.errors = 0 self.total_items_processed = 0 self.total_errors = 0 # report self.lock_report = threading.Lock() self.start_time = 0 self.last_report = 0 self.report_interval = int_value(conf.get('report_interval'), 3600)
def __init__(self, conf, **kwargs): super(BlobIndexer, self).__init__(conf) self.logger = get_logger(conf) volume = conf.get('volume') if not volume: raise exc.ConfigurationException('No volume specified for indexer') self.volume = volume self.passes = 0 self.errors = 0 self.successes = 0 self.last_reported = 0 self.chunks_run_time = 0 self.interval = int_value(conf.get('interval'), 300) self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) self.index_client = RdirClient(conf, logger=self.logger) self.namespace, self.volume_id = check_volume(self.volume)
def __init__(self, rebuilder, **kwargs): self.rebuilder = rebuilder self.conf = rebuilder.conf self.logger = rebuilder.logger self.namespace = rebuilder.namespace self.volume = rebuilder.volume self.items_run_time = 0 self.max_items_per_second = int_value( rebuilder.conf.get('items_per_second'), 30)
def list_buckets(self, account_id, limit=1000, marker=None, end_marker=None, prefix=None, **kwargs): """ Get the list of buckets of the specified account. :returns: the list of buckets (with metadata), and the next marker (in case the list is truncated). """ raw_list, next_marker = self._raw_listing(self.blistkey(account_id), limit=limit, marker=marker, end_marker=end_marker, prefix=prefix, **kwargs) conn = self.get_slave_conn(**kwargs) pipeline = conn.pipeline(True) for entry in raw_list: # For real buckets (not prefixes), fetch metadata. if not entry[3]: pipeline.hmget(self.bkey(entry[0]), 'objects', 'bytes', 'mtime') res = pipeline.execute() output = list() i = 0 for bucket in raw_list: if not bucket[3]: bdict = { 'name': bucket[0], 'objects': int_value(res[i][0], 0), 'bytes': int_value(res[i][1], 0), 'mtime': float_value(res[i][2], 0.0), } i += 1 else: bdict = {'prefix': bucket} output.append(bdict) return output, next_marker
def __init__(self, conf, logger, beanstalkd_addr, **kwargs): super(BlobImprover, self).__init__(conf, logger, volume=None, **kwargs) self.content_factory = ContentFactory(self.conf, logger=self.logger) beanstalkd_tube = self.conf.get('beanstalkd_tube', DEFAULT_IMPROVER_TUBE) self.listener = BeanstalkdListener(beanstalkd_addr, beanstalkd_tube, self.logger, **kwargs) self.sender = BeanstalkdSender(beanstalkd_addr, beanstalkd_tube, self.logger, **kwargs) self.retry_delay = int_value(self.conf.get('retry_delay'), 30) self.reqid_prefix = 'blob-impr-'
def __init__(self, conf, input_file=None, service_id=None, **kwargs): super(Meta2Rebuilder, self).__init__(conf, **kwargs) # input self.input_file = input_file self.meta2_id = service_id # rawx/rdir self.rdir_client = RdirClient(self.conf, logger=self.logger) self.rdir_fetch_limit = int_value(self.conf.get('rdir_fetch_limit'), self.DEFAULT_RDIR_FETCH_LIMIT)