def get_resource_usage(raw_csv_usage: str, value_count: int, interval_sec: int) -> List[ResourceUsage]: log.debug("raw: {}".format(raw_csv_usage)) parsed = parse_usage_csv(raw_csv_usage) log.debug("parsed: {}".format(parsed)) padded = pad_usage(parsed, value_count) log.debug("padded: {}".format(padded)) TIME = 'Time' end_time = datetime.strptime(padded[TIME][-1], "%Y-%m-%d %H:%M:%S") end_time = pytz.utc.localize(end_time) end_time_epoch = datetime.timestamp(end_time) start_time_epoch = end_time_epoch - (value_count * interval_sec) usages = [] for k, v in padded.items(): if k == TIME: continue if is_kubernetes(): w_id, resource_name = parse_kubernetes_csv_usage_heading(k) else: w_id, resource_name = parse_mesos_csv_usage_heading(k) values = [float('nan') if x == '' else float(x) for x in v] usage = ResourceUsage(w_id, resource_name, start_time_epoch, interval_sec, values) usages.append(usage) return usages
def get_current_workloads(docker_client): workloads = [] for container in docker_client.containers.list(): try: if is_kubernetes(): workloads.append(get_workload_from_kubernetes(container.name)) else: workloads.append(get_workload_from_disk(container.name)) except: log.exception("Failed to read environment for container: '%s'", container.name) return workloads
def __snapshot_usage_raw(self): try: # Avoid making a metrics query on a potentially empty dataset which causes the query command to fail, which # causes noisy logs which look like failures. workload_manager = get_workload_manager() if workload_manager is None or len( workload_manager.get_workloads()) == 0: log.info('No workloads so skipping pcp snapshot.') return instance_filter = "INVALID_INSTANCE_FILTER" if is_kubernetes(): instance_filter = '.*titus-executor.*.service' else: instance_filter = '/containers.slice/[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}' # pmrep -a /var/log/pcp/pmlogger/$(hostname)/ -S -60m -t 1m -y s -o csv -i .*titus-executor.*.service cgroup.cpuacct.usage cgroup.memory.usage snapshot_cmd_fmt = """ pmrep -a {0} \ -S -{1}s \ -T -0s \ -t {2}s \ -y s \ -o csv \ -i {3} \ cgroup.cpuacct.usage \ cgroup.memory.usage \ titus.network.in.bytes \ titus.network.out.bytes \ titus.disk.bytes_used """ cmd_str = snapshot_cmd_fmt.format(get_pcp_archive_path(), self.__relative_start_sec, self.__interval_sec, instance_filter) log.info('Snapshoting usage from pcp: {}'.format(' '.join( cmd_str.split()))) byte_array = subprocess.check_output( cmd_str, shell=True, timeout=self.__query_timeout_sec) raw_csv_snapshot = byte_array.decode('utf-8') usages = get_resource_usage(raw_csv_snapshot, self.__interval_count, self.__interval_sec) with self.__lock: self.__usages = usages except: log.exception("Failed to snapshot pcp data or compute usages")
def get_current_workloads(docker_client): workloads = [] for container in docker_client.containers.list(): workload = None try: if is_kubernetes(): workload = get_workload_from_kubernetes(container.name) else: workload = get_workload_from_disk(container.name) except Exception: log.error("Failed to read environment for container: '%s'", container.name) if workload is not None: workloads.append(workload) return workloads
def handle(self, event): if not self.__relevant(event): return workload = None container_name = get_container_name(event) if is_kubernetes(): workload = get_workload_from_kubernetes(container_name) else: workload = get_workload_from_disk(container_name) if workload is None: raise Exception('failed to construct workload from event') self.handling_event(event, "adding workload: '{}'".format(workload.get_id())) self.workload_manager.add_workload(workload) self.handled_event(event, "added workload: '{}'".format(workload.get_id()))
event_manager.start_processing_events() if __name__ != '__main__' and not is_testing(): set_config_manager(ConfigManager(EnvPropertyProvider)) log.info("Configuring logging...") gunicorn_logger = logging.getLogger('gunicorn.error') app.logger.handlers = gunicorn_logger.handlers app.logger.setLevel(gunicorn_logger.level) # Set the schedule library's logging level higher so it doesn't spam messages every time it schedules a task logging.getLogger('schedule').setLevel(logging.WARN) exit_handler = RealExitHandler() if is_kubernetes(): log.info("Setting pod manager...") pod_manager = PodManager() pod_manager.start() set_pod_manager(pod_manager) log.info("Setting event log manager...") event_log_manager = LocalEventLogManager() set_event_log_manager(event_log_manager) log.info("Watching property changes for restart...") RestartPropertyWatcher(get_config_manager(), exit_handler, RESTART_PROPERTIES) log.info("Modeling the CPU...") cpu = get_cpu_from_env()