Python ExpiringHash.keys Exemples

Langage de programmation: Python

Espace de nommage/Pack: assemblyline.remote.datatypes.hash

Class/Type: ExpiringHash

Méthode/Fonction: keys

Exemples au hotexamples.com: 2

Python ExpiringHash.keys - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de assemblyline.remote.datatypes.hash.ExpiringHash.keys extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

ExpiringHash(17)

pop(6)

items(5)

set(5)

get(3)

add(2)

increment(2)

keys(2)

delete(1)

exists(1)

limited_add(1)

multi_set(1)

Méthodes fréquemment utilisées

ExpiringHash (17)

pop (6)

items (5)

set (5)

get (3)

add (2)

increment (2)

keys (2)

delete (1)

exists (1)

Méthodes fréquemment utilisées

limited_add (1)

multi_set (1)

Exemple #1

0

Afficher le fichier

Fichier : dispatch_hash.py Projet : kryptoslogic/assemblyline-core

class DispatchHash: def __init__(self, sid: str, client: Union[Redis, StrictRedis], fetch_results=False): """ :param sid: :param client: :param fetch_results: Preload all the results on the redis server. """ self.client = client self.sid = sid self._dispatch_key = f'{sid}{dispatch_tail}' self._finish_key = f'{sid}{finished_tail}' self._finish = self.client.register_script(finish_script) # cache the schedules calculated for the dispatcher, used to prevent rebuilding the # schedule repeatedly, and for telling the UI what services are pending self.schedules = ExpiringHash(f'dispatch-hash-schedules-{sid}', host=self.client) # How many services are outstanding for each file in the submission self._outstanding_service_count = ExpiringHash( f'dispatch-hash-files-{sid}', host=self.client) # Track which files have been extracted by what, in order to rebuild the file tree later self._file_tree = ExpiringSet(f'dispatch-hash-parents-{sid}', host=self.client) self._attempts = ExpiringHash(f'dispatch-hash-attempts-{sid}', host=self.client) # Local caches for _files and finished table self._cached_files = set(self._outstanding_service_count.keys()) self._cached_results = dict() if fetch_results: self._cached_results = self.all_results() # Errors that are related to a submission, but not the terminal errors of a service self._other_errors = ExpiringSet(f'dispatch-hash-errors-{sid}', host=self.client) # TODO set these expire times from the global time limit for submissions retry_call(self.client.expire, self._dispatch_key, 60 * 60) retry_call(self.client.expire, self._finish_key, 60 * 60) def add_file(self, file_hash: str, file_limit, parent_hash) -> bool: """Add a file to a submission. Returns: Whether the file could be added to the submission or has been rejected. """ if parent_hash: self._file_tree.add(f'{file_hash}-{parent_hash}') else: self._file_tree.add(file_hash) # If it was already in the set, we don't need to check remotely if file_hash in self._cached_files: return True # If the set is already full, and its not in the set, then we don't need to check remotely if len(self._cached_files) >= file_limit: return False # Our local checks are unclear, check remotely, # 0 => already exists, still want to return true # 1 => didn't exist before # None => over size limit, return false if self._outstanding_service_count.limited_add(file_hash, 0, file_limit) is not None: # If it was added, add it to the local cache so we don't need to check again self._cached_files.add(file_hash) return True return False def add_error(self, error_key: str) -> bool: """Add an error to a submission. NOTE: This method is for errors occuring outside of any errors handled via 'fail_*recoverable' Returns true if the error is new, false if the error is a duplicate. """ return self._other_errors.add(error_key) > 0 def dispatch(self, file_hash: str, service: str): """Mark that a service has been dispatched for the given sha.""" if retry_call(self.client.hset, self._dispatch_key, f"{file_hash}-{service}", time.time()): self._outstanding_service_count.increment(file_hash, 1) def drop_dispatch(self, file_hash: str, service: str): """If a dispatch has been found to be un-needed remove the counters.""" if retry_call(self.client.hdel, self._dispatch_key, f"{file_hash}-{service}"): self._outstanding_service_count.increment(file_hash, -1) def dispatch_count(self): """How many tasks have been dispatched for this submission.""" return retry_call(self.client.hlen, self._dispatch_key) def dispatch_time(self, file_hash: str, service: str) -> float: """When was dispatch called for this sha/service pair.""" result = retry_call(self.client.hget, self._dispatch_key, f"{file_hash}-{service}") if result is None: return 0 return float(result) def all_dispatches(self) -> Dict[str, Dict[str, float]]: """Load the entire table of things that should currently be running.""" rows = retry_call(self.client.hgetall, self._dispatch_key) output = {} for key, timestamp in rows.items(): file_hash, service = key.split(b'-', maxsplit=1) file_hash = file_hash.decode() service = service.decode() if file_hash not in output: output[file_hash] = {} output[file_hash][service] = float(timestamp) return output def fail_recoverable(self, file_hash: str, service: str, error_key: str = None): """A service task has failed, but should be retried, clear that it has been dispatched. After this call, the service is in a non-dispatched state, and the status can't be update until it is dispatched again. """ if error_key: self._other_errors.add(error_key) retry_call(self.client.hdel, self._dispatch_key, f"{file_hash}-{service}") self._outstanding_service_count.increment(file_hash, -1) def fail_nonrecoverable(self, file_hash: str, service, error_key) -> Tuple[int, bool]: """A service task has failed and should not be retried, entry the error as the result. Has exactly the same semantics as `finish` but for errors. """ return retry_call(self._finish, args=[ self.sid, file_hash, service, json.dumps(['error', error_key, 0, False, '']) ]) def finish(self, file_hash, service, result_key, score, classification, drop=False) -> Tuple[int, bool]: """ As a single transaction: - Remove the service from the dispatched list - Add the file to the finished list, with the given result key - return the number of items in the dispatched list and if this was a duplicate call to finish """ return retry_call(self._finish, args=[ self.sid, file_hash, service, json.dumps([ 'result', result_key, score, drop, str(classification) ]) ]) def finished_count(self) -> int: """How many tasks have been finished for this submission.""" return retry_call(self.client.hlen, self._finish_key) def finished(self, file_hash, service) -> Union[DispatchRow, None]: """If a service has been finished, return the key of the result document.""" # Try the local cache result = self._cached_results.get(file_hash, {}).get(service, None) if result: return result # Try the server result = retry_call(self.client.hget, self._finish_key, f"{file_hash}-{service}") if result: return DispatchRow(*json.loads(result)) return None def all_finished(self) -> bool: """Are there no outstanding tasks, and at least one finished task.""" return self.finished_count() > 0 and self.dispatch_count() == 0 def all_results(self) -> Dict[str, Dict[str, DispatchRow]]: """Get all the records stored in the dispatch table. :return: output[file_hash][service_name] -> DispatchRow """ rows = retry_call(self.client.hgetall, self._finish_key) output = {} for key, status in rows.items(): file_hash, service = key.split(b'-', maxsplit=1) file_hash = file_hash.decode() service = service.decode() if file_hash not in output: output[file_hash] = {} output[file_hash][service] = DispatchRow(*json.loads(status)) return output def all_extra_errors(self): """Return the set of errors not part of the dispatch table itself.""" return self._other_errors.members() def all_files(self): return self._outstanding_service_count.keys() def file_tree(self): """Returns a mapping from file, to a list of files that are that file's parents. A none value being in the list indicates that the file is one of the root files of the submission. """ edges = self._file_tree.members() output = {} for string in edges: if '-' in string: child, parent = string.split('-') else: child, parent = string, None if child not in output: output[child] = [] output[child].append(parent) return output def delete(self): """Clear the tables from the redis server.""" retry_call(self.client.delete, self._dispatch_key) retry_call(self.client.delete, self._finish_key) self.schedules.delete() self._outstanding_service_count.delete() self._file_tree.delete() self._other_errors.delete() self._attempts.delete()

Exemple #2

0

Afficher le fichier

Fichier : scaler_server.py Projet : kryptoslogic/assemblyline-core

class ScalerServer(CoreBase): def __init__(self, config=None, datastore=None, redis=None, redis_persist=None): super().__init__('assemblyline.scaler', config=config, datastore=datastore, redis=redis, redis_persist=redis_persist) self.scaler_timeout_queue = NamedQueue(SCALER_TIMEOUT_QUEUE, host=self.redis_persist) self.error_count = {} self.status_table = ExpiringHash(SERVICE_STATE_HASH, host=self.redis, ttl=30 * 60) labels = { 'app': 'assemblyline', 'section': 'service', } if KUBERNETES_AL_CONFIG: self.log.info( f"Loading Kubernetes cluster interface on namespace: {NAMESPACE}" ) self.controller = KubernetesController( logger=self.log, prefix='alsvc_', labels=labels, namespace=NAMESPACE, priority='al-service-priority') # If we know where to find it, mount the classification into the service containers if CLASSIFICATION_CONFIGMAP: self.controller.config_mount( 'classification-config', config_map=CLASSIFICATION_CONFIGMAP, key=CLASSIFICATION_CONFIGMAP_KEY, target_path='/etc/assemblyline/classification.yml') else: self.log.info("Loading Docker cluster interface.") self.controller = DockerController( logger=self.log, prefix=NAMESPACE, cpu_overallocation=self.config.core.scaler.cpu_overallocation, memory_overallocation=self.config.core.scaler. memory_overallocation, labels=labels) # If we know where to find it, mount the classification into the service containers if CLASSIFICATION_HOST_PATH: self.controller.global_mounts.append( (CLASSIFICATION_HOST_PATH, '/etc/assemblyline/classification.yml')) self.profiles: Dict[str, ServiceProfile] = {} # Prepare a single threaded scheduler self.state = collection.Collection( period=self.config.core.metrics.export_interval) self.scheduler = sched.scheduler() self.scheduler_stopped = threading.Event() def add_service(self, profile: ServiceProfile): profile.desired_instances = max( self.controller.get_target(profile.name), profile.min_instances) profile.running_instances = profile.desired_instances self.log.debug( f'Starting service {profile.name} with a target of {profile.desired_instances}' ) profile.last_update = time.time() self.profiles[profile.name] = profile self.controller.add_profile(profile) def try_run(self): # Do an initial call to the main methods, who will then be registered with the scheduler self.sync_services() self.sync_metrics() self.update_scaling() self.expire_errors() self.process_timeouts() self.export_metrics() self.flush_service_status() self.log_container_events() self.heartbeat() # Run as long as we need to while self.running: delay = self.scheduler.run(False) time.sleep(min(delay, 2)) self.scheduler_stopped.set() def stop(self): super().stop() self.scheduler_stopped.wait(5) self.controller.stop() def heartbeat(self): """Periodically touch a file on disk. Since tasks are run serially, the delay between touches will be the maximum of HEARTBEAT_INTERVAL and the longest running task. """ if self.config.logging.heartbeat_file: self.scheduler.enter(HEARTBEAT_INTERVAL, 0, self.heartbeat) super().heartbeat() def sync_services(self): self.scheduler.enter(SERVICE_SYNC_INTERVAL, 0, self.sync_services) default_settings = self.config.core.scaler.service_defaults image_variables = defaultdict(str) image_variables.update(self.config.services.image_variables) current_services = set(self.profiles.keys()) discovered_services = [] # Get all the service data for service in self.datastore.list_all_services(full=True): service: Service = service name = service.name stage = self.get_service_stage(service.name) discovered_services.append(name) # noinspection PyBroadException try: if service.enabled and stage == ServiceStage.Off: # Enable this service's dependencies self.controller.prepare_network( service.name, service.docker_config.allow_internet_access) for _n, dependency in service.dependencies.items(): self.controller.start_stateful_container( service_name=service.name, container_name=_n, spec=dependency, labels={'dependency_for': service.name}) # Move to the next service stage if service.update_config and service.update_config.wait_for_update: self._service_stage_hash.set(name, ServiceStage.Update) else: self._service_stage_hash.set(name, ServiceStage.Running) if not service.enabled: self.stop_service(service.name, stage) continue # Check that all enabled services are enabled if service.enabled and stage == ServiceStage.Running: # Compute a hash of service properties not include in the docker config, that # should still result in a service being restarted when changed config_hash = hash(str(sorted(service.config.items()))) config_hash = hash( (config_hash, str(service.submission_params))) # Build the docker config for the service, we are going to either create it or # update it so we need to know what the current configuration is either way docker_config = service.docker_config docker_config.image = Template( docker_config.image).safe_substitute(image_variables) set_keys = set(var.name for var in docker_config.environment) for var in default_settings.environment: if var.name not in set_keys: docker_config.environment.append(var) # Add the service to the list of services being scaled if name not in self.profiles: self.log.info(f'Adding {service.name} to scaling') self.add_service( ServiceProfile( name=name, min_instances=default_settings.min_instances, growth=default_settings.growth, shrink=default_settings.shrink, config_hash=config_hash, backlog=default_settings.backlog, max_instances=service.licence_count, container_config=docker_config, queue=get_service_queue(name, self.redis), shutdown_seconds=service.timeout + 30, # Give service an extra 30 seconds to upload results )) # Update RAM, CPU, licence requirements for running services else: profile = self.profiles[name] if profile.container_config != docker_config or profile.config_hash != config_hash: self.log.info( f"Updating deployment information for {name}") profile.container_config = docker_config profile.config_hash = config_hash self.controller.restart(profile) self.log.info( f"Deployment information for {name} replaced") if service.licence_count == 0: profile._max_instances = float('inf') else: profile._max_instances = service.licence_count except Exception: self.log.exception( f"Error applying service settings from: {service.name}") self.handle_service_error(service.name) # Find any services we have running, that are no longer in the database and remove them for stray_service in current_services - set(discovered_services): stage = self.get_service_stage(stray_service) self.stop_service(stray_service, stage) def stop_service(self, name, current_stage): if current_stage != ServiceStage.Off: # Disable this service's dependencies self.controller.stop_containers(labels={'dependency_for': name}) # Mark this service as not running in the shared record self._service_stage_hash.set(name, ServiceStage.Off) # Stop any running disabled services if name in self.profiles or self.controller.get_target(name) > 0: self.log.info(f'Removing {name} from scaling') self.controller.set_target(name, 0) self.profiles.pop(name, None) def update_scaling(self): """Check if we need to scale any services up or down.""" self.scheduler.enter(SCALE_INTERVAL, 0, self.update_scaling) try: # Figure out what services are expected to be running and how many profiles: List[ServiceProfile] = list(self.profiles.values()) targets = { _p.name: self.controller.get_target(_p.name) for _p in profiles } for name, profile in self.profiles.items(): self.log.debug(f'{name}') self.log.debug( f'Instances \t{profile.min_instances} < {profile.desired_instances} | ' f'{targets[name]} < {profile.max_instances}') self.log.debug( f'Pressure \t{profile.shrink_threshold} < {profile.pressure} < {profile.growth_threshold}' ) # # 1. Any processes that want to release resources can always be approved first # for name, profile in self.profiles.items(): if targets[name] > profile.desired_instances: self.log.info( f"{name} wants less resources changing allocation " f"{targets[name]} -> {profile.desired_instances}") self.controller.set_target(name, profile.desired_instances) targets[name] = profile.desired_instances if not self.running: return # # 2. Any processes that aren't reaching their min_instances target must be given # more resources before anyone else is considered. # for name, profile in self.profiles.items(): if targets[name] < profile.min_instances: self.log.info( f"{name} isn't meeting minimum allocation " f"{targets[name]} -> {profile.min_instances}") self.controller.set_target(name, profile.min_instances) targets[name] = profile.min_instances # # 3. Try to estimate available resources, and based on some metric grant the # resources to each service that wants them. While this free memory # pool might be spread across many nodes, we are going to treat it like # it is one big one, and let the orchestration layer sort out the details. # free_cpu = self.controller.free_cpu() free_memory = self.controller.free_memory() # def trim(prof: List[ServiceProfile]): prof = [ _p for _p in prof if _p.desired_instances > targets[_p.name] ] drop = [ _p for _p in prof if _p.cpu > free_cpu or _p.ram > free_memory ] if drop: drop = {_p.name: (_p.cpu, _p.ram) for _p in drop} self.log.debug( f"Can't make more because not enough resources {drop}") prof = [ _p for _p in prof if _p.cpu <= free_cpu and _p.ram <= free_memory ] return prof profiles = trim(profiles) while profiles: # TODO do we need to add balancing metrics other than 'least running' for this? probably if True: profiles.sort( key=lambda _p: self.controller.get_target(_p.name)) # Add one for the profile at the bottom free_memory -= profiles[0].container_config.ram_mb free_cpu -= profiles[0].container_config.cpu_cores targets[profiles[0].name] += 1 # profiles = [_p for _p in profiles if _p.desired_instances > targets[_p.name]] # profiles = [_p for _p in profiles if _p.cpu < free_cpu and _p.ram < free_memory] profiles = trim(profiles) # Apply those adjustments we have made back to the controller for name, value in targets.items(): old = self.controller.get_target(name) if value != old: self.log.info(f"Scaling service {name}: {old} -> {value}") self.controller.set_target(name, value) if not self.running: return except ServiceControlError as error: self.log.exception("Error while scaling services.") self.handle_service_error(error.service_name) def handle_service_error(self, service_name): """Handle an error occurring in the *analysis* service. Errors for core systems should simply be logged, and a best effort to continue made. For analysis services, ignore the error a few times, then disable the service. """ self.error_count[service_name] = self.error_count.get(service_name, 0) + 1 if self.error_count[service_name] >= MAXIMUM_SERVICE_ERRORS: self.datastore.service_delta.update( service_name, [(self.datastore.service_delta.UPDATE_SET, 'enabled', False)]) del self.error_count[service_name] def sync_metrics(self): """Check if there are any pubsub messages we need.""" self.scheduler.enter(METRIC_SYNC_INTERVAL, 3, self.sync_metrics) # Pull service metrics from redis service_data = self.status_table.items() for host, (service, state, time_limit) in service_data.items(): # If an entry hasn't expired, take it into account if time.time() < time_limit: self.state.update(service=service, host=host, throughput=0, busy_seconds=METRIC_SYNC_INTERVAL if state == ServiceStatus.Running else 0) # If an entry expired a while ago, the host is probably not in use any more if time.time() > time_limit + 600: self.status_table.pop(host) # Check the set of services that might be sitting at zero instances, and if it is, we need to # manually check if it is offline export_interval = self.config.core.metrics.export_interval for profile_name, profile in self.profiles.items(): # Pull out statistics from the metrics regularization update = self.state.read(profile_name) if update: delta = time.time() - profile.last_update profile.update(delta=delta, backlog=profile.queue.length(), **update) # Check if we expect no messages, if so pull the queue length ourselves since there is no heartbeat if self.controller.get_target( profile_name ) == 0 and profile.desired_instances == 0 and profile.queue: queue_length = profile.queue.length() if queue_length > 0: self.log.info(f"Service at zero instances has messages: " f"{profile.name} ({queue_length} in queue)") profile.update(delta=export_interval, instances=0, backlog=queue_length, duty_cycle=profile.target_duty_cycle) # TODO maybe find another way of implementing this that is less aggressive # for profile_name, profile in self.profiles.items(): # # In the case that there should actually be instances running, but we haven't gotten # # any heartbeat messages we might be waiting for a container that can't start properly # if self.services.controller.get_target(profile_name) > 0: # if time.time() - profile.last_update > profile.shutdown_seconds: # self.log.error(f"Starting service {profile_name} has timed out " # f"({time.time() - profile.last_update} > {profile.shutdown_seconds} seconds)") # # # Disable the the service # self.datastore.service_delta.update(profile_name, [ # (self.datastore.service_delta.UPDATE_SET, 'enabled', False) # ]) def expire_errors(self): self.scheduler.enter(ERROR_EXPIRY_INTERVAL, 0, self.expire_errors) self.error_count = { name: err - 1 for name, err in self.error_count.items() if err > 1 } def process_timeouts(self): self.scheduler.enter(PROCESS_TIMEOUT_INTERVAL, 0, self.process_timeouts) while True: message = self.scaler_timeout_queue.pop(blocking=False) if not message: break # noinspection PyBroadException try: self.log.info( f"Killing service container: {message['container']} running: {message['service']}" ) self.controller.stop_container(message['service'], message['container']) except Exception: self.log.exception( f"Exception trying to stop timed out service container: {message}" ) def export_metrics(self): self.scheduler.enter(self.config.logging.export_interval, 0, self.export_metrics) for service_name, profile in self.profiles.items(): metrics = { 'running': profile.running_instances, 'target': profile.desired_instances, 'minimum': profile.min_instances, 'maximum': profile.instance_limit, 'dynamic_maximum': profile.max_instances, 'queue': profile.queue_length, 'duty_cycle': profile.duty_cycle, 'pressure': profile.pressure } export_metrics_once(service_name, Status, metrics, host=HOSTNAME, counter_type='scaler-status', config=self.config, redis=self.redis) memory, memory_total = self.controller.memory_info() cpu, cpu_total = self.controller.cpu_info() metrics = { 'memory_total': memory_total, 'cpu_total': cpu_total, 'memory_free': memory, 'cpu_free': cpu } export_metrics_once('scaler', Metrics, metrics, host=HOSTNAME, counter_type='scaler', config=self.config, redis=self.redis) def flush_service_status(self): """The service status table may have references to containers that have crashed. Try to remove them all.""" self.scheduler.enter(SERVICE_STATUS_FLUSH, 0, self.flush_service_status) # Pull all container names names = set(self.controller.get_running_container_names()) # Get the names we have status for for hostname in self.status_table.keys(): if hostname not in names: self.status_table.pop(hostname) def log_container_events(self): """The service status table may have references to containers that have crashed. Try to remove them all.""" self.scheduler.enter(CONTAINER_EVENTS_LOG_INTERVAL, 0, self.log_container_events) for message in self.controller.new_events(): self.log.warning("Container Event :: " + message)