def __sync_removed(self, keys_to_remove, namespaces, passive=True): for ns in namespaces: for key in keys_to_remove[ns]: existing_key = self.keys[ns][key] if not passive: task = self.__transport_key(existing_key, action='remove', dgroups=existing_key['dgroups']) logger.info('Put task for cache distribution: %s' % task) transport.put(json.dumps(task)) self.__upstream_remove_key(ns, existing_key) for gid in existing_key['dgroups']: self.instances[gid].remove_file(existing_key[self.ITEM_SIZE_KEY]) self.__namespaces[ns]['cache_size'] -= existing_key[self.ITEM_SIZE_KEY] logger.info('Namespace %s: cache size changed -%s = %s' % (ns, mb(existing_key[self.ITEM_SIZE_KEY]), mb(self.__namespaces[ns]['cache_size']))) del self.keys[ns][key]
def __sync(self, items, namespace=None, passive=True): """Keeps the internal state up with remote meta state. Updates current state along with remote meta state when passive=False""" keys_to_remove = {} if namespace and namespace in self.__namespaces: namespaces = [namespace] else: namespaces = self.__namespaces if not namespaces: logger.info('No valid namespaces for synchronizing cache') return for ns in namespaces: keys_to_remove[ns] = set(self.keys[ns].keys()) for item in items: logger.info('Updating cache key %s' % item['key']) ns = namespace or item.get('namespace') if not ns: logger.info('No namespace for key %s' % item['key']) continue keys_to_remove[ns].discard(item['key']) existing_key = self.keys[ns].get(item['key']) ext_groups = set() for gid in (item.get('dgroups') or []): group = storage.groups[gid] if not group in self.instances: continue ext_groups.add(gid) cur_groups = set(existing_key and existing_key['dgroups'] or []) if existing_key: logger.info('Existing key: %s' % item['key']) if not passive: req_ci_num = self.__cache_instances_num(item['traffic']) req_ci_num -= len(cur_groups) logger.info('Key %s already dispatched %s cache instances, %s more required' % (item['key'], len(cur_groups), req_ci_num)) key = {} for k in ('key', self.ITEM_SIZE_KEY, 'traffic', 'sgroups'): key[k] = item[k] updated_key = self.keys[ns].setdefault(key['key'], {'dgroups': []}) updated_key.update(key) updated_key['namespace'] = ns if req_ci_num == 0: ext_groups = cur_groups elif req_ci_num < 0: cis = self.__cis_choose_remove(abs(req_ci_num), updated_key['dgroups']) gids = set([ci.group.group_id for ci in cis]) ext_groups = cur_groups - gids updated_key['dgroups'] = list(ext_groups) task = self.__transport_key(updated_key, action='remove', dgroups=list(gids)) transport.put(json.dumps(task)) else: space_needed = self.__need_space(ns, req_ci_num, item[self.ITEM_SIZE_KEY]) if space_needed > 0: logger.info('Additional space for namespaces required: %s' % mb(space_needed)) keys_removed, freed_space = self.__pop_least_popular_keys(ns, item['traffic'], space_needed) keys_to_remove[ns] = keys_to_remove[ns] - keys_removed[ns] if freed_space < space_needed: logger.info('Not enough space for key %s (size: %s, require add.space: %s)' % (item['key'], mb(item[self.ITEM_SIZE_KEY]), mb(space_needed))) continue cis = self.__cis_choose_add(req_ci_num, item['sgroups'], item['traffic'], item[self.ITEM_SIZE_KEY]) ext_groups = set(updated_key['dgroups'] + [ci.group.group_id for ci in cis]) updated_key['dgroups'] = list(ext_groups) # TODO: exclude existing dgroups from task task = self.__transport_key(updated_key, action='add') logger.info('Put task for cache distribution: %s' % task) transport.put(json.dumps(task)) self.__upstream_update_key(ns, updated_key) if passive: updated_key = copy.copy(item) # dgroups should contain only groups that are in our cache instances updated_key['dgroups'] = list(ext_groups) self.keys[ns][item['key']] = updated_key logger.info('External key %s' % (updated_key,)) for gid in cur_groups - ext_groups: group = storage.groups[gid] self.instances[group].remove_file(item[self.ITEM_SIZE_KEY]) self.__namespaces[ns]['cache_size'] -= item[self.ITEM_SIZE_KEY] logger.info('Namespace %s: cache size changed -%s = %s' % (ns, mb(item[self.ITEM_SIZE_KEY]), mb(self.__namespaces[ns]['cache_size']))) for gid in ext_groups - cur_groups: group = storage.groups[gid] self.instances[group].add_file(item[self.ITEM_SIZE_KEY]) self.__namespaces[ns]['cache_size'] += item[self.ITEM_SIZE_KEY] logger.info('Namespace %s: cache size changed +%s = %s' % (ns, mb(item[self.ITEM_SIZE_KEY]), mb(self.__namespaces[ns]['cache_size']))) self.__sync_removed(keys_to_remove, namespaces, passive=passive)