def __init__(self, ctx, key, key_infos, missed_groups, node, callback): self.ctx = ctx self.complete = threading.Event() self.callback = callback self.stats = RecoverStat() self.key = key self.key_flags = 0 self.key_infos = key_infos self.diff_groups = [] self.missed_groups = list(missed_groups) self.read_session = elliptics.Session(node) self.read_session.trace_id = ctx.trace_id self.read_session.set_filter(elliptics.filters.all) self.write_session = elliptics.Session(node) self.write_session.trace_id = ctx.trace_id self.write_session.set_checker(elliptics.checkers.all) self.remove_session = elliptics.Session(node) self.remove_session.trace_id = ctx.trace_id self.remove_session.set_checker(elliptics.checkers.all) self.result = False self.attempt = 0 log.debug( "Recovering key: {0} from nonempty groups: {1} and missed groups: {2}" .format(repr(self.key), [k.group_id for k in self.key_infos], self.missed_groups)) self.run()
def __init__(self, key, timestamp, size, address, backend_id, group, ctx, node, check=True, callback=None): self.key = key self.key_timestamp = timestamp self.address = address self.backend_id = backend_id self.group = group self.node = node self.direct_session = elliptics.Session(node) self.direct_session.set_direct_id(self.address, self.backend_id) self.direct_session.groups = [group] self.session = elliptics.Session(node) self.session.groups = [group] self.ctx = ctx self.stats = RecoverStat() self.result = True self.attempt = 0 self.total_size = size self.recovered_size = 0 self.just_remove = False # if size of object more that size of one chunk than file should be read/written in chunks self.chunked = self.total_size > self.ctx.chunk_size self.check = check self.callback = callback self.complete = threading.Event() log.debug("Created Recovery object for key: {0}, node: {1}/{2}".format(repr(key), address, backend_id))
def test_2_backends_with_equal_ids_and_group(self, servers): ''' These test case check correct handling situation when some backend from one nodes has the same group and ids like another backend from another node. For this test all nodes mustn't know about each other, their `remote` are empty. In this test creates 2 client nodes and both connects to 2 different nodes. At each node selects one backend from one group, equal to both backend. Updates ids for both backends to make it be equal. To the second client node adds remote to the first node. It raises route-list update error: -EEXIST and raises exception. After that makes read some noexistent key from all groups - it will raise an exception. With old bug thes test case caused `Segmentation fault` on read_data_from_groups. At the end reverts ids of both backends. ''' address1, address2 = servers.remotes address1 = elliptics.Address.from_host_port_family(address1) session1 = elliptics.Session( elliptics.Node( elliptics.Logger("client.log", elliptics.log_level.debug))) session1._node.add_remotes(address1) routes = session1.routes.filter_by_address(address1) group = routes.groups()[-1] groups = routes.groups() routes = session1.routes.filter_by_group(group) ids = [session1.transform('somekey')] backend_id1 = routes.get_address_backends(address1)[0] old_ids1 = [ r.id for r in routes.filter_by_address(address1).filter_by_backend( backend_id1) ] session1.set_backend_ids(address1, backend_id1, ids).get() address2 = elliptics.Address.from_host_port_family(address2) session2 = elliptics.Session( elliptics.Node( elliptics.Logger("client.log", elliptics.log_level.debug))) session2._node.add_remotes(address2) routes = session2.routes.filter_by_group(group) backend_id2 = routes.get_address_backends(address2)[0] old_ids2 = [ r.id for r in routes.filter_by_address(address2).filter_by_backend( backend_id2) ] session2.set_backend_ids(address2, backend_id2, ids).get() with pytest.raises(elliptics.core.Error): session2._node.add_remotes(address1) assert session2.routes.addresses() == (address2, ) for g in groups: with pytest.raises(elliptics.core.Error): session2.read_data_from_groups( 'unique key for test_2_backends_with_equal_ids_and_group', [g]).get() session1.set_backend_ids(address1, backend_id1, old_ids1).get() session2.set_backend_ids(address2, backend_id2, old_ids2).get()
def make_elliptics_node(): log = elliptics.Logger('/tmp/ell-namespace-convert.log', config["dnet_log_mask"]) node_config = elliptics.Config() meta_node = elliptics.Node(log, node_config) addresses = [elliptics.Address(host=str(node[0]), port=node[1], family=node[2]) for node in config["metadata"]["nodes"]] logger.info('Connecting to meta nodes: {0}'.format(config["metadata"]["nodes"])) meta_wait_timeout = config['metadata'].get('wait_timeout', 5) try: meta_node.add_remotes(addresses) except Exception as e: logger.error('Failed to connect to any elliptics meta storage node: {0}'.format( e)) raise ValueError('Failed to connect to any elliptics storage META node') meta_session = elliptics.Session(meta_node) meta_session.set_timeout(meta_wait_timeout) meta_session.add_groups(list(config["metadata"]["groups"])) nodes = config.get('elliptics', {}).get('nodes', []) or config["elliptics_nodes"] logger.debug("config elliptics nodes: %s" % str(nodes)) node_config = elliptics.Config() node_config.io_thread_num = config.get('io_thread_num', 1) node_config.nonblocking_io_thread_num = config.get('nonblocking_io_thread_num', 1) node_config.net_thread_num = config.get('net_thread_num', 1) logger.info('Node config: io_thread_num {0}, nonblocking_io_thread_num {1}, ' 'net_thread_num {2}'.format(node_config.io_thread_num, node_config.nonblocking_io_thread_num, node_config.net_thread_num)) n = elliptics.Node(log, node_config) addresses = [elliptics.Address(host=str(node[0]), port=node[1], family=node[2]) for node in nodes] try: n.add_remotes(addresses) except Exception as e: logger.error('Failed to connect to any elliptics storage node: {0}'.format( e)) raise ValueError('Failed to connect to any elliptics storage node') n.meta_session = meta_session wait_timeout = config.get('elliptics', {}).get('wait_timeout', 5) s = elliptics.Session(n) s.set_timeout(wait_timeout) print 'sleeping for wait timeout: {0} seconds'.format(wait_timeout) time.sleep(wait_timeout) return n
def __init__(self, key, origin_group, diff_groups, missed_groups, node): self.complete = threading.Event() self.stats = RecoverStat() self.key = key self.origin_group = origin_group self.diff_groups = set(diff_groups) self.missed_groups = set(missed_groups) self.origin_session = elliptics.Session(node) self.origin_session.groups = [origin_group] self.write_session = elliptics.Session(node) self.result = False
def test_monitor_stat(self, server, simple_node): session = elliptics.Session(simple_node) for addr in session.routes.addresses(): stat = session.monitor_stat(addr).get()[0] assert stat.error.code == 0 assert stat.error.message == '' assert type(stat.statistics) == dict
def __init__(self, config): cfg = elliptics.Config() # The parameter which sets the time to wait for the operation complete cfg.config.wait_timeout = config.get("wait-timeout", 60) # The parameter which sets the timeout for pinging node cfg.config.check_timeout = config.get("check_timeout", 60) # Number of IO threads in processing pool cfg.config.io_thread_num = config.get("io-thread-num", 2) # Number of threads in network processing pool cfg.config.net_thread_num = config.get("net-thread-num", 2) # Number of IO threads in processing pool dedicated to nonblocking ops nonblock_io_threads = config.get("nonblocking_io_thread_num", 2) cfg.config.nonblocking_io_thread_num = nonblock_io_threads groups = config.get('groups', []) if len(groups) == 0: raise ValueError("Specify groups") # loglevel of elliptics logger elliptics_log_level = config.get('verbosity', 0) # path to logfile elliptics_log_file = config.get('logfile', '/dev/stderr') log = elliptics.Logger(elliptics_log_file, elliptics_log_level) self._elliptics_node = elliptics.Node(log, cfg) for host, port in config.get('nodes').iteritems(): self._elliptics_node.add_remote(host, port) self._session = elliptics.Session(self._elliptics_node) self._session.groups = groups self._session.set_namespace(NAMESPACE)
def make_symm_group(n, couple, namespace): logger.info('writing couple info: ' + str(couple)) logger.info('groups in couple %s are being assigned namespace "%s"' % (couple, namespace)) s = elliptics.Session(n) s.set_timeout(config.get('wait_timeout', 5)) good = [] bad = () for group in couple: try: packed = msgpack.packb(couple.compose_group_meta(namespace)) logger.info('packed couple for group %d: "%s"' % (group.group_id, str(packed).encode('hex'))) s.add_groups([group.group_id]) EllAsyncResult(s.write_data(keys.SYMMETRIC_GROUPS_KEY, packed), EllLookupResult).get() group.parse_meta(packed) good.append(group.group_id) except Exception as e: logger.error('Failed to write symm group info, group %d: %s\n%s' % (group.group_id, str(e), traceback.format_exc())) bad = (group.group_id, e) break return (good, bad)
def get_group_meta(self, request): gid = request[0] key = request[1] or keys.SYMMETRIC_GROUPS_KEY unpack = request[2] if not gid in storage.groups: raise ValueError('Group %d is not found' % group) group = storage.groups[gid] logger.info('Creating elliptics session') s = elliptics.Session(self.node) s.set_timeout(config.get('wait_timeout', 5)) s.add_groups([group.group_id]) data = s.read_data(key).get()[0] logger.info('Read key {0} from group {1}: {2}'.format( key.replace('\0', r'\0'), group, data.data)) return { 'id': repr(data.id), 'full_id': str(data.id), 'data': msgpack.unpackb(data.data) if unpack else data.data }
def connect(endpoints, groups, **kw): remotes = [] for r in endpoints: parts = r.split(":") remotes.append((parts[0], int(parts[1]))) def rename(new, old): if old in kw: kw[new] = kw.pop(old) kw.pop('elog', None) kw.pop('cfg', None) kw.pop('remotes', None) rename('log_file', 'logfile') rename('log_level', 'loglevel') n = elliptics.create_node(**kw) # def create_node(**kw): # log = elliptics.Logger(kw.get('logfile', '/dev/stderr'), kw.get('loglevel', 1)) # config = elliptics.Config() # config.config.wait_timeout = kw.get('wait-timeout', 60) # return elliptics.Node(log, config) # n = create_node(**kw) for r in remotes: try: n.add_remote(r[0], r[1]) except Exception as e: pass s = elliptics.Session(n) s.add_groups(groups) #XXX: Is it time to drop PassthroughWrapper binder? return PassthroughWrapper(n, s)
def elliptics_create_session(node=None, group=None, cflags=elliptics.command_flags.default, trace_id=0): log.debug("Creating session: {0}@{1}.{2}".format(node, group, cflags)) session = elliptics.Session(node) session.groups = [group] session.cflags = cflags session.trace_id = trace_id return session
def test_indexes_simple(self, server, simple_node): session = elliptics.Session(simple_node) session.groups = session.routes.groups() check_dict = {} key = 'simple_key' indexes = ['simple_index_1', 'simple_index_2', 'simple_index_3', 'simple_index_4', 'simple_index_5'] datas = ['key_data_1', 'key_data_2', 'key_data_3', 'key_data_4', 'key_data_5'] session.set_indexes(key, indexes, datas).wait() for i, idx in enumerate(indexes): check_dict[idx] = datas[i] self.check_indexes(session, key, check_dict.keys(), check_dict.values()) indexes_2 = ['simple_index_4', 'simple_index_5', 'simple_index_6', 'simple_index_7'] datas_2 = ['key_data_4.2', 'key_data_5.2', 'key_data_6.2', 'key_data_7.2'] session.update_indexes(key, indexes_2, datas_2).wait() for i, idx in enumerate(indexes_2): check_dict[idx] = datas_2[i] self.check_indexes(session, key, check_dict.keys(), check_dict.values()) removed_indexes = indexes[:3] session.remove_indexes(key, removed_indexes).wait() for idx in removed_indexes: del check_dict[idx] self.check_indexes(session, key, check_dict.keys(), check_dict.values())
def test_indexes_dict(self, server, simple_node): session = elliptics.Session(simple_node) session.groups = session.routes.groups() key = 'dict_key' indexes = { 'dict_index_1': 'key_data_1', 'dict_index_2': 'key_data_2', 'dict_index_3': 'key_data_3', 'dict_index_4': 'key_data_4', 'dict_index_5': 'key_data_5' } set_session = session.clone() # We want to count only successfully finished transactions set_session.set_filter(elliptics.filters.positive_final) result = set_session.set_indexes(key, indexes) assert len(result.get()) == len(session.groups) self.check_indexes(session, key, indexes.keys(), indexes.values()) indexes_2 = { 'dict_index_4': 'key_data_4.2', 'dict_index_5': 'key_data_5.2', 'dict_index_6': 'key_data_6.2', 'dict_index_7': 'key_data_7.2' } session.update_indexes(key, indexes_2).wait() indexes.update(indexes_2) self.check_indexes(session, key, indexes.keys(), indexes.values())
def make_meta_session(): log = elliptics.Logger('/tmp/ell-namespace-convert.log', config["dnet_log_mask"]) node_config = elliptics.Config() meta_node = elliptics.Node(log, node_config) nodes = config['metadata']['nodes'] addresses = [ elliptics.Address(host=host, port=port, family=family) for (host, port, family) in nodes ] logger.info('Connecting to meta nodes: {0}'.format(nodes)) try: meta_node.add_remotes(addresses) except Exception: raise ValueError( 'Failed to connect to any elliptics storage META node') meta_session = elliptics.Session(meta_node) meta_wait_timeout = config['metadata'].get('wait_timeout', 5) meta_session.set_timeout(meta_wait_timeout) meta_session.add_groups(list(config["metadata"]["groups"])) time.sleep(meta_wait_timeout) return meta_session
def make_meta_session(): log = elliptics.Logger('/tmp/ell-namespace-convert.log', config["dnet_log_mask"]) node_config = elliptics.Config() meta_node = elliptics.Node(log, node_config) addresses = [ elliptics.Address(host=str(node[0]), port=node[1], family=node[2]) for node in config["metadata"]["nodes"] ] logger.info('Connecting to meta nodes: {0}'.format( config["metadata"]["nodes"])) meta_wait_timeout = config['metadata'].get('wait_timeout', 5) try: meta_node.add_remotes(addresses) except Exception as e: logger.error( 'Failed to connect to any elliptics meta storage node: {0}'.format( e)) raise ValueError( 'Failed to connect to any elliptics storage META node') meta_session = elliptics.Session(meta_node) meta_session.set_timeout(meta_wait_timeout) meta_session.add_groups(list(config["metadata"]["groups"])) time.sleep(5) return meta_session
def __init__(self, node, niu, job_processor, db): self.node = node self.niu = niu self.session = elliptics.Session(self.node) wait_timeout = config.get('elliptics', {}).get('wait_timeout', 5) self.session.set_timeout(wait_timeout) self.service_metakey = str( self.session.transform(keys.SYMMETRIC_GROUPS_KEY)) try: keys_db_uri = config['metadata']['cache']['db'] except KeyError: logger.error('Config parameter metadata.cache.db is required ' 'for cache manager') raise self.keys_db = Collection(db[keys_db_uri], 'keys') self.distributor = CacheDistributor( self.node, self.keys_db, job_processor) self.top_keys = {} self.__tq = timed_queue.TimedQueue() self.nodes_update() self.update_cache_groups() self.top_update_timer = periodic_timer( seconds=CACHE_CFG.get('top_update_period', 1800)) self.__tq.add_task_at( CacheManager.MONITOR_TOP_STATS, self.top_update_timer.next(), self.monitor_top_stats)
def connect(endpoints, groups, **kw): remotes = [] for r in endpoints: remotes.append(elliptics.Address.from_host_port_family(r)) def rename(kw, old, new): if old in kw: kw[new] = kw.pop(old) # drop impeding attrs, just in case kw.pop('elog', None) kw.pop('cfg', None) kw.pop('remotes', None) # rename good names to required bad ones rename(kw, 'logfile', 'log_file') rename(kw, 'loglevel', 'log_level') n = elliptics.create_node(**kw) n.add_remotes(remotes) s = elliptics.Session(n) s.add_groups(groups) # return PassthroughWrapper(n, s) return s
def __init__(self, node, keys_db, job_processor): self.node = node self.session = elliptics.Session(self.node) wait_timeout = config.get('elliptics', {}).get('wait_timeout', 5) self.session.set_timeout(wait_timeout) self.bandwidth_per_copy = CACHE_CFG.get('bandwidth_per_copy', 5242880) self.copies_reduce_factor = CACHE_CFG['copies_reduce_factor'] assert 0.0 < self.copies_reduce_factor <= 1.0, "Copies reduce factor "\ "should be in (0.0, 1.0] interval" self.copies_expand_step = CACHE_CFG['copies_expand_step'] assert self.copies_expand_step > 0, "Copies expand step "\ "should be > 0" self.keys_db = keys_db self.cleaner = CacheCleaner(self, job_processor) self.groups_units = {} self.cache_groups = {} self.executing_tasks = [] self._cache_groups_lock = threading.Lock() self.node_types = inventory.get_balancer_node_types() self.dc_node_type = inventory.get_dc_node_type() self.dryrun = CACHE_CFG.get('dryrun', False)
def __init__(self, node, job_finder, couple_record_finder=None, prepare_namespaces_states=False, prepare_flow_stats=False, statistics=None): logger.info("Created NodeInfoUpdater") self.__node = node self.statistics = statistics self.job_finder = job_finder self.couple_record_finder = couple_record_finder self._namespaces_states = CachedGzipResponse() self._flow_stats = {} self.__tq = timed_queue.TimedQueue() self.__session = elliptics.Session(self.__node) wait_timeout = config.get('elliptics', {}).get('wait_timeout') or config.get( 'wait_timeout', 5) self.__session.set_timeout(wait_timeout) self.__nodeUpdateTimestamps = (time.time(), time.time()) self.__cluster_update_lock = threading.Lock() if prepare_namespaces_states and statistics is None: raise AssertionError( 'Statistics is required for namespaces states calculation') if prepare_flow_stats and statistics is None: raise AssertionError( 'Statistics is required for flow stats calculation') self._prepare_namespaces_states = prepare_namespaces_states self._prepare_flow_stats = prepare_flow_stats
def test_resetting_timeout(self, server, simple_node): session = elliptics.Session(simple_node) assert session.timeout == 5 # check default timeout value session.timeout = 1 # set different value assert session.timeout == 1 # check that the value has been set session.timeout = 0 # set timeout to 0 which should reset to default assert session.timeout == 5 # check default timeout value
def elliptics_create_session(node=None, group=None, cflags=elliptics.command_flags.default): log.debug("Creating session: {0}@{1}.{2}".format(node, group, cflags)) session = elliptics.Session(node) session.set_groups([group]) session.set_cflags(cflags) return session
def test_stat_log_count(self, server, simple_node): session = elliptics.Session(simple_node) stat_count = session.stat_log_count().get() assert len(stat_count) == len(session.routes.addresses()) for stat in stat_count: assert stat.error.code == 0 assert stat.error.message == '' assert stat.address.group_id == session.routes.get_address_group_id(stat.address)
def test_properties(self, server, simple_node, prop, setter, getter, values): session = elliptics.Session(node=simple_node) assert type(session) == elliptics.Session for value in values: set_property(session, prop, value, setter=setter, getter=getter)
def elliptics_session(self, groups, bucket): session = elliptics.Session(self.node) session.set_groups(groups) if bucket: session.set_namespace(bucket) return session
def test_stat_log(self, server, simple_node): session = elliptics.Session(simple_node) for addr in session.routes.addresses(): addr_id = session.routes.get_address_id(addr) stat = session.stat_log(addr_id).get()[0] assert stat.error.code == 0 assert stat.error.message == '' assert stat.address.group_id == session.routes.get_address_group_id(stat.address)
def __init__(self, ctx, node, group): self.routes = self._prepare_routes(ctx, group) self.session = elliptics.Session(node) self.session.exceptions_policy = elliptics.exceptions_policy.no_exceptions self.session.set_filter(elliptics.filters.all) self.session.timeout = 60 self.session.groups = [group] self.session.trace_id = ctx.trace_id self.ctx = ctx
def test_write_without_groups(self, server, simple_node, key, data): session = elliptics.Session(simple_node) result = session.write_data(key, data) try: result.get() except elliptics.Error as e: assert e.message.message == 'insufficient results count due to'\ ' checker: 0 of 1 (1): No such device or address: -6' else: pytest.fail('Failed: DID NOT RAISE')
def combine_logs(users, keys, new_key, batch_size, node, groups): log.debug("Creating session for reading and appending logs") log_session = elliptics.Session(node) log_session.ioflags = elliptics.io_flags.append log_session.cflags = elliptics.command_flags.nolock log_session.groups = groups log.debug("Creating session for reading and updating activity") activity_session = elliptics.Session(node) activity_session.cflags = elliptics.command_flags.nolock activity_session.groups = groups for key in keys: process_key(users=users, key=key, new_key=new_key, batch_size=batch_size, log_session=log_session, activity_session=activity_session)
def lookup_keys(ctx): log.info("Start looking up keys") stats = ctx.stats["lookup"] stats.timer('process', 'started') elog = elliptics.Logger(ctx.log_file, int(ctx.log_level)) node = elliptics_create_node(address=ctx.address, elog=elog, wait_timeout=ctx.wait_timeout, net_thread_num=1, io_thread_num=1, remotes=ctx.remotes) session = elliptics.Session(node) session.trace_id = ctx.trace_id filename = os.path.join(ctx.tmp_dir, 'merged_result') rest_keys_filename = os.path.join(ctx.tmp_dir, 'rest_keys') ctx.rest_file = open(rest_keys_filename, 'wb') ctx.bucket_files = dict() group_freq = dict() with open(ctx.dump_file, 'r') as dump_f: for str_id in dump_f: id = elliptics.Id(str_id) lookups = [] for g in ctx.groups: session.groups = [g] lookups.append(session.read_data(id, size=1)) key_infos = [] for i, l in enumerate(lookups): try: result = l.get()[0] address = result.address key_infos.append( KeyInfo(address, ctx.groups[i], result.timestamp, result.total_size, result.user_flags, result.record_flags)) except Exception, e: log.debug( "Failed to lookup key: {0} in group: {1}: {2}, traceback: {3}" .format(id, ctx.groups[i], repr(e), traceback.format_exc())) stats.counter("lookups", -1) if len(key_infos) > 0: key_data = (id, key_infos) if not skip_key_data(ctx, key_data): key_infos.sort(key=lambda x: (x.timestamp, x.size), reverse=True) newest_key_group = key_infos[0].group_id dump_key(ctx, id, key_infos, newest_key_group) group_freq[newest_key_group] = group_freq.get( newest_key_group, 0) + 1 stats.counter("lookups", len(key_infos)) else: log.error( "Key: {0} is missing in all specified groups: {1}. It won't be recovered." .format(id, ctx.groups))
def combine_logs(remotes, groups, min_write, keys, new_key): elog = elliptics.Logger("/dev/stderr", 0) cfg = elliptics.Config() cfg.config.wait_timeout = 60 cfg.config.check_timeout = 60 cfg.config.io_thread_num = 16 cfg.config.nonblocking_io_thread_num = 16 cfg.config.net_thread_num = 16 node = elliptics.Node(elog, cfg) for r in remotes: try: node.add_remote(addr=r[0], port=r[1], family=r[2]) except Exception as e: print "Coudn't connect to elliptics node: {0}: {1}".format(r, e) log_s = elliptics.Session(node) log_s.set_groups(groups) log_s.set_ioflags(elliptics.io_flags.append) index_s = elliptics.Session(node) index_s.set_groups(groups) index_s.set_ioflags(elliptics.io_flags.cache) users = Set() print "Keys: {0}".format(keys) for key in keys: try: users.update(process_key(key, log_s, index_s, new_key)) except Exception as e: print "Process key failed: {0}".format(e) print "Users: {0}".format(users) for u in users: try: index_s.update_indexes(elliptics.Id(u), [new_key + ".0"], [u]) except Exception as e: print "Update_indexes failed: {0}".format(e)