async def test_replacements(cli: CLI) -> None: async def execute(template: str, replace_place_holder: bool = True) -> str: result = await cli.evaluate_cli_command(f"echo {template}", replace_place_holder=replace_place_holder) return result[0].parsed_commands.commands[0].args # type: ignore # lookup keys are not case-sensitive today = utc().date().strftime("%Y-%m-%d") assert await execute("@today@ and @TODAY@") == f"{today} and {today}" # if the value is not provided, but a function is called in_3_days = utc() + timedelta(days=3) assert abs(from_utc(await execute("@3d.from_now@")) - in_3_days) < timedelta(seconds=1) # replacement is not touched if flag is set assert await execute("@today@", False) == "@today@"
async def check_outdated_handler(self) -> None: """ Periodically check, if there are subscribers that have subscribed, but are not connected. The subscription will be removed when a dead subscription is detected. """ now = utc() # In case the service has just been started/restarted: # do not remove any subscriptions during the first minutes. if (now - self.started_at) > timedelta(minutes=5): expected = set(self._subscribers_by_id.keys()) connected = set(self.message_bus.active_listener.keys()) # remove all connected subscriber from the not connected map for c in connected: self.not_connected_since.pop(c, None) missing = expected - connected for subscriber in missing: at = self.not_connected_since.get(subscriber) if at and (now - at) > timedelta(minutes=3): log.warning(f"Subscriber {subscriber} is missing. Remove all subscription.") await self.remove_subscriber(subscriber) self.not_connected_since.pop(subscriber, None) elif at: pass else: self.not_connected_since[subscriber] = now
async def __add_task(self, task: WorkerTask, retry_count: int) -> bool: def outstanding_tasks(subscription: WorkerTaskSubscription) -> int: return self.work_count[subscription.worker_id] def can_perform(subscription: WorkerTaskSubscription) -> bool: # the filter criteria in the subscription needs to be matched by the task attributes # note: the task can define more attributes, that would be ignored def matches_task_filter(name: str, filter_list: List[str]) -> bool: value = task.attrs.get(name) return value in filter_list if value else False return all(matches_task_filter(n, f) for n, f in subscription.task.filter.items()) # all workers that match the task filter matching_subscriptions = [wt for wt in self.worker_by_task_name[task.name] if can_perform(wt)] if matching_subscriptions: # filter the list for worker with the most specific filter (==most task filter) max_filter_len = reduce(lambda res, x: max(res, len(x.task.filter)), matching_subscriptions, 0) same_filter_len = (a for a in matching_subscriptions if len(a.task.filter) == max_filter_len) # sort and use the one with the least amount of outstanding tasks subscriptions = sorted(same_filter_len, key=outstanding_tasks) sub = subscriptions[0] # this is the worker with the least amount of work # todo: store task in db self.outstanding_tasks[task.id] = WorkerTaskInProgress(task, sub, retry_count, utc() + task.timeout) await sub.queue.put(task) self.work_count[sub.worker_id] = self.work_count[sub.worker_id] + 1 return True else: self.outstanding_tasks.pop(task.id, None) if task.id not in self.unassigned_tasks: self.unassigned_tasks[task.id] = WorkerTaskOnHold(task, retry_count, utc() + task.timeout) return False
def __init__(self, uid: str, descriptor: TaskDescription, subscribers_by_event: Callable[[], Dict[str, List[Subscriber]]]): self.id = uid self.is_error = False self.descriptor = descriptor self.received_messages: MutableSequence[Message] = [] self.subscribers_by_event = subscribers_by_event self.task_started_at = utc() self.step_started_at = self.task_started_at self.update_task: Optional[Task] = None # type: ignore # pypy self.descriptor_alive = True steps = [StepState.from_step(step, self) for step in descriptor.steps] start = StartState(self) end = EndState(self) states: List[StepState] = [start, *steps, end] self.machine = Machine(self, states, start, auto_transitions=False, queued=True) for current_state, next_state in interleave(states): self.machine.add_transition("_next_state", current_state.name, next_state.name, [current_state.current_step_done]) self.machine.add_transition("_to_err", current_state.name, end.name, [end.is_error])
async def test_send_analytics_proper() -> None: sd = SystemData("test", utc(), 1) async with PostHogEventSender(sd, client_flush_interval=0.01, client_retries=0) as sender: event = await sender.core_event("test-event") assert event.kind == "test-event"
def instances() -> List[RunningTaskData]: messages = [ActionDone(str(a), "test", "bla", "sf") for a in range(0, 10)] state_data = {"test": 1} return [ RunningTaskData(str(a), str(a), "task_123", messages, "start", state_data, utc()) for a in range(0, 10) ]
def replacements(**env: str) -> Dict[str, str]: now_string = env.get("now") ut = from_utc(now_string) if now_string else utc() t = ut.date() try: n = ut.astimezone(get_localzone()) except Exception: n = ut return { "UTC": utc_str(ut), "NOW": utc_str(n), "TODAY": t.strftime("%Y-%m-%d"), "TOMORROW": (t + timedelta(days=1)).isoformat(), "YESTERDAY": (t + timedelta(days=-1)).isoformat(), "YEAR": t.strftime("%Y"), "MONTH": t.strftime("%m"), "DAY": t.strftime("%d"), "TIME": n.strftime("%H:%M:%S"), "HOUR": n.strftime("%H"), "MINUTE": n.strftime("%M"), "SECOND": n.strftime("%S"), "TZ_OFFSET": n.strftime("%z"), "TZ": n.strftime("%Z"), "MONDAY": (t + timedelta((calendar.MONDAY - t.weekday()) % 7)).isoformat(), "TUESDAY": (t + timedelta((calendar.TUESDAY - t.weekday()) % 7)).isoformat(), "WEDNESDAY": (t + timedelta((calendar.WEDNESDAY - t.weekday()) % 7)).isoformat(), "THURSDAY": (t + timedelta((calendar.THURSDAY - t.weekday()) % 7)).isoformat(), "FRIDAY": (t + timedelta((calendar.FRIDAY - t.weekday()) % 7)).isoformat(), "SATURDAY": (t + timedelta((calendar.SATURDAY - t.weekday()) % 7)).isoformat(), "SUNDAY": (t + timedelta((calendar.SUNDAY - t.weekday()) % 7)).isoformat(), }
async def core_event(self, kind: str, context: Optional[Mapping[str, JsonElement]] = None, **counters: Union[int, float]) -> AnalyticsEvent: event = AnalyticsEvent("resotocore", kind, context if context else {}, counters, utc()) await self.capture(event) return event
def __init__(self, db: SubscriberDb, message_bus: MessageBus) -> None: self.db = db self.message_bus = message_bus self._subscribers_by_id: Dict[str, Subscriber] = {} self._subscribers_by_event: Dict[str, List[Subscriber]] = {} self.started_at = utc() self.cleaner = Periodic("subscription_cleaner", self.check_outdated_handler, timedelta(seconds=10)) self.not_connected_since: Dict[str, datetime] = {}
async def test_send_analytics_no_service() -> None: sd = SystemData("test", utc(), 1) async with PostHogEventSender(sd, flush_at=1, host="https://127.0.0.1:54321", client_flush_interval=0.01, client_retries=0) as sender: event = await sender.core_event("test-event") assert event.kind == "test-event"
def insert_system_data() -> SystemData: system = SystemData(uuid_str(), utc(), 1) log.info(f"Create new system data entry: {system}") db.insert_document("system_data", { "_key": "system", **to_js(system) }, overwrite=True) return system
def check_timeout(self) -> bool: """ Return true if the internal state of the fsm has changed by this event. This method is called periodically by the cleaner task. """ if (self.instance.step_started_at + self.timeout()) < utc(): self.timed_out = True return True return False
async def find() -> AnyT: result = first( lambda m: isinstance(m, t) and m.message_type == message_type, all_events) # type: ignore if result: return result # type: ignore elif utc() > stop_at: raise TimeoutError() else: await asyncio.sleep(0.1) return await find()
async def read_forever() -> GraphUpdate: nonlocal deadline nonlocal dead_adjusted while utc() < deadline: # After exit of updater: adjust the deadline once if not updater.is_alive() and not dead_adjusted: log.debug("Import process done or dead. Adjust deadline.") deadline = utc() + timedelta(seconds=30) dead_adjusted = True try: action = await run_async(read.get, True, stale) if isinstance(action, EmitAnalyticsEvent): await event_sender.capture(action.event) elif isinstance(action, Result): return action.get_value() except Empty: # empty is fine pass raise ImportAborted( f"Import process died. (ExitCode: {updater.exitcode})")
async def on_stop() -> None: duration = utc() - info.started_at await api.stop() await task_handler.stop() await cli.stop() await event_sender.core_event(CoreEvent.SystemStopped, total_seconds=int( duration.total_seconds())) await event_emitter.stop() await worker_task_queue.stop() await scheduler.stop() await subscriptions.stop() await db.stop() await event_sender.stop()
def test_transform() -> None: age = TransformKind("dt", "duration", "datetime", "duration_to_datetime", True) age.resolve({ "duration": DurationKind("duration"), "datetime": DateTimeKind("datetime") }) with pytest.raises(AttributeError): age.check_valid( "3s" ) # check valid is not allowed on synthetic values (they do not get imported!) # age transforms a duration into a timestamp before now one_day_old = from_utc(age.coerce("1d")) # difference between 1d and computed utc-24h should be less than 2 seconds (depending on test env less) assert (one_day_old - (utc() - timedelta(hours=24))).total_seconds() <= 2 # transform back from underlying timestamp to timedelta assert age.transform(utc_str(utc() - timedelta(seconds=123))) == "2min3s" assert age.transform(utc_str(utc() - timedelta(seconds=123456))) == "1d10h" assert age.transform(utc_str(utc() - timedelta(seconds=1234567))) == "14d6h" assert age.transform(utc_str(utc() - timedelta(seconds=123456789))) == "3yr10mo"
async def check_outdated_unassigned_tasks(self) -> None: now = utc() outstanding = [ip for ip in self.outstanding_tasks.values() if ip.deadline < now] not_started_outdated = [ns for ns in self.unassigned_tasks.values() if ns.deadline < now] async with self.lock: await self.__retry_tasks(outstanding) for ns in not_started_outdated: log.info(f"No worker for task: {ns.task.id}. Give up.") set_future_result(ns.task.callback, Exception(f"No worker for task: {ns.task.name}")) self.unassigned_tasks.pop(ns.task.id, None) # unassigned_task now only holds valid tasks for ns in list(self.unassigned_tasks.values()): if await self.__add_task(ns.task, ns.retry_counter): self.unassigned_tasks.pop(ns.task.id, None)
def __init__( self, identifier: str, name: Optional[str] = None, some_int: int = 0, some_string: str = "hello", now_is: datetime = utc(), ctime: Optional[datetime] = None, ) -> None: super().__init__(identifier) self.name = name self.some_int = some_int self.some_string = some_string self.now_is = now_is self.ctime = ctime
def __init__( self, sub: MultiDiGraph, maybe_root_id: Optional[str] = None, visited_nodes: Optional[Set[Any]] = None, visited_edges: Optional[Set[EdgeKey]] = None, ): super().__init__() self.g = sub self.nodes = sub.nodes() self.visited_nodes: Set[object] = visited_nodes if visited_nodes else set() self.visited_edges: Set[EdgeKey] = visited_edges if visited_edges else set() self.at = utc() self.at_json = utc_str(self.at) self.maybe_root_id = maybe_root_id self.resolved = False
async def wait_for_message( all_events: List[Message], message_type: str, t: Type[AnyT], timeout: timedelta = timedelta(seconds=1)) -> AnyT: stop_at = utc() + timeout async def find() -> AnyT: result = first( lambda m: isinstance(m, t) and m.message_type == message_type, all_events) # type: ignore if result: return result # type: ignore elif utc() > stop_at: raise TimeoutError() else: await asyncio.sleep(0.1) return await find() return await find()
def replacements(**env: str) -> Dict[str, str]: now_string = env.get("now") ut = from_utc(now_string) if now_string else utc() t = ut.date() try: n = ut.astimezone(get_localzone()) except Exception: n = ut return CIKeyDict( UTC=utc_str(ut), NOW=n.strftime("%Y-%m-%dT%H:%M:%S%z"), TODAY=t.strftime("%Y-%m-%d"), TOMORROW=(t + timedelta(days=1)).isoformat(), YESTERDAY=(t + timedelta(days=-1)).isoformat(), YEAR=t.strftime("%Y"), MONTH=t.strftime("%m"), DAY=t.strftime("%d"), TIME=n.strftime("%H:%M:%S"), HOUR=n.strftime("%H"), MINUTE=n.strftime("%M"), SECOND=n.strftime("%S"), TZ_OFFSET=n.strftime("%z"), TZ=n.strftime("%Z"), MONDAY=(t + timedelta( (calendar.MONDAY - t.weekday()) % 7)).isoformat(), TUESDAY=(t + timedelta( (calendar.TUESDAY - t.weekday()) % 7)).isoformat(), WEDNESDAY=(t + timedelta( (calendar.WEDNESDAY - t.weekday()) % 7)).isoformat(), THURSDAY=(t + timedelta( (calendar.THURSDAY - t.weekday()) % 7)).isoformat(), FRIDAY=(t + timedelta( (calendar.FRIDAY - t.weekday()) % 7)).isoformat(), SATURDAY=(t + timedelta( (calendar.SATURDAY - t.weekday()) % 7)).isoformat(), SUNDAY=(t + timedelta( (calendar.SUNDAY - t.weekday()) % 7)).isoformat(), )
def send_analytics(run_id: str, event: str): if "RESOTOCORE_ANALYTICS_OPT_OUT" not in os.environ: client = Client( api_key="n/a", host="https://analytics.some.engineering", flush_interval=0.5, max_retries=3, gzip=True, ) api_key = requests.get( "https://cdn.some.engineering/posthog/public_api_key").text.strip( ) client.api_key = api_key for consumer in client.consumers: consumer.api_key = api_key system_id = f"dot-rendering-script" now = utc() client.identify(system_id, {"run_id": run_id, "created_at": now}) client.capture( distinct_id=system_id, event=event, properties={"run_id": run_id}, # type: ignore timestamp=now, )
async def delete_running_task(self, task: RunningTask) -> None: # send analytics event await self.event_sender.core_event( CoreEvent.TaskCompleted, { "task_descriptor_id": task.descriptor.id, "task_descriptor_name": task.descriptor.name, "kind": type(task.descriptor).__name__, "success": not task.is_error, }, duration=(utc() - task.task_started_at).total_seconds(), step_count=len(task.descriptor.steps), ) task.descriptor_alive = False # remove tasks from list of running tasks self.tasks.pop(task.id, None) if task.update_task and not task.update_task.done(): task.update_task.cancel() # mark step as error task.end() # remove from database with suppress(Exception): await self.running_task_db.delete(task.id)
def begin_step(self) -> None: log.info(f"Task {self.id}: begin step is: {self.current_step.name}") # update the step started time, whenever a new state is entered self.step_started_at = utc() self.current_state.step_started()
async def merge_graph_process( db: GraphDB, event_sender: AnalyticsEventSender, args: Namespace, content: AsyncGenerator[Union[bytes, Json], None], max_wait: timedelta, maybe_batch: Optional[str], ) -> GraphUpdate: change_id = maybe_batch if maybe_batch else uuid_str() write = Queue() # type: ignore read = Queue() # type: ignore updater = DbUpdaterProcess( write, read, args) # the process reads from our write queue and vice versa stale = timedelta(seconds=5).total_seconds( ) # consider dead communication after this amount of time deadline = utc() + max_wait dead_adjusted = False async def send_to_child(pa: ProcessAction) -> bool: alive = updater.is_alive() if alive: await run_async(write.put, pa, True, stale) return alive def read_results() -> Task: # type: ignore # pypy async def read_forever() -> GraphUpdate: nonlocal deadline nonlocal dead_adjusted while utc() < deadline: # After exit of updater: adjust the deadline once if not updater.is_alive() and not dead_adjusted: log.debug("Import process done or dead. Adjust deadline.") deadline = utc() + timedelta(seconds=30) dead_adjusted = True try: action = await run_async(read.get, True, stale) if isinstance(action, EmitAnalyticsEvent): await event_sender.capture(action.event) elif isinstance(action, Result): return action.get_value() except Empty: # empty is fine pass raise ImportAborted( f"Import process died. (ExitCode: {updater.exitcode})") return asyncio.create_task(read_forever()) task: Optional[Task] = None # type: ignore # pypy result: Optional[GraphUpdate] = None try: reset_process_start_method( ) # other libraries might have tampered the value in the mean time updater.start() task = read_results() # concurrently read result queue chunked: Stream = stream.chunks(content, BatchSize) async with chunked.stream() as streamer: # pylint: disable=no-member async for lines in streamer: if not await send_to_child(ReadElement(lines)): # in case the child is dead, we should stop break await send_to_child( MergeGraph(db.name, change_id, maybe_batch is not None)) result = cast(GraphUpdate, await task) # wait for final result return result finally: if task is not None and not task.done(): task.cancel() if not result: # make sure the change is aborted in case of transaction log.info(f"Abort update manually: {change_id}") await db.abort_update(change_id) await send_to_child(PoisonPill()) await run_async(updater.join, stale) if updater.is_alive(): log.warning( f"Process is still alive after poison pill. Terminate process {updater.pid}" ) with suppress(Exception): updater.terminate() await asyncio.sleep(3) if updater.is_alive(): log.warning( f"Process is still alive after terminate. Kill process {updater.pid}" ) with suppress(Exception): updater.kill() await asyncio.sleep(3) if not updater.is_alive(): with suppress(Exception): updater.close()
def from_now(result: str) -> str: return utc_str(utc() + duration(result))
def test_from_now() -> None: res = render_template("{{delta.from_now}}", {"delta": "4h"}) in_4_hours = utc() + timedelta(hours=4) assert abs((in_4_hours - from_utc(res)).total_seconds()) < 1
def connect( cls, args: Namespace, timeout: timedelta, sleep_time: float = 5, verify: Union[str, bool, None] = None ) -> Tuple[bool, SystemData, StandardDatabase]: deadline = utc() + timeout db = cls.client(args, verify) def create_database() -> None: try: # try to access the system database with default credentials. # this only works if arango has been started with default settings. http_client = ArangoHTTPClient(args.graphdb_request_timeout, not args.graphdb_no_ssl_verify) root_pw = args.graphdb_root_password secure_root = not args.graphdb_bootstrap_do_not_secure root_db = ArangoClient( hosts=args.graphdb_server, http_client=http_client).db(password=root_pw) root_db.echo( ) # this call will fail, if we are not allowed to access the system db user = args.graphdb_username passwd = args.graphdb_password database = args.graphdb_database change = False if not root_db.has_user(user): log.info( "Configured graph db user does not exist. Create it.") root_db.create_user(user, passwd, active=True) change = True if not root_db.has_database(database): log.info( "Configured graph db database does not exist. Create it." ) root_db.create_database( database, [{ "username": user, "password": passwd, "active": True, "extra": { "generated": "resoto" } }], ) change = True if change and secure_root and root_pw == "" and passwd != "" and passwd not in { "test" }: root_db.replace_user("root", passwd, True) log.info( "Database is using an empty password. " "Secure the root account with the provided user password. " "Login to the Resoto database via provided username and password. " "Login to the System database via `root` and provided password!" ) if not change: log.info( "Not allowed to access database, while user and database exist. Wrong password?" ) except Exception as ex: log.error( "Database or user does not exist or does not have enough permissions. " f"Attempt to create user/database via default system account is not possible. Reason: {ex}. " "You can provide the password of the root user via --graphdb-root-password to setup " "a Resoto user and database automatically.") def system_data() -> Tuple[bool, SystemData]: def insert_system_data() -> SystemData: system = SystemData(uuid_str(), utc(), 1) log.info(f"Create new system data entry: {system}") db.insert_document("system_data", { "_key": "system", **to_js(system) }, overwrite=True) return system if not db.has_collection("system_data"): db.create_collection("system_data") sys_js = db.collection("system_data").get("system") return (True, insert_system_data()) if not sys_js else ( False, from_js(sys_js, SystemData)) while True: try: db.echo() try: db_version = int(db.required_db_version()) except Exception as ex: log.warning( f"Not able to retrieve version of arangodb. Reason: {ex}. Continue." ) else: if db_version < 30802: raise RequiredDependencyMissingError( "Need arangodb in version 3.8.2 or later") created, sys_data = system_data() return created, sys_data, db except ArangoServerError as ex: if utc() > deadline: log.error("Can not connect to database. Giving up.") shutdown_process(1) elif ex.error_code in (11, 1228, 1703): # https://www.arangodb.com/docs/stable/appendix-error-codes.html # This means we can reach the database, but are either not allowed to access it # or the related user and or database could not be found. # We assume the database does not exist and try to create it. create_database() else: log.warning( f"Problem accessing the graph database: {ex}. Trying again in 5 seconds." ) # Retry directly after the first attempt sleep(sleep_time) except (RequestException, ConnectionError) as ex: log.warning( f"Can not access database. Trying again in 5 seconds: {ex}" ) sleep(sleep_time)
def from_duration(value: str, now: datetime = utc()) -> str: # in case of duration, compute the timestamp as: now + duration delta = duration(value) instant = now + delta return instant.strftime(DateTimeKind.Format)
from arango.database import StandardDatabase from resotolib.args import ArgumentParser from resotolib.jwt import add_args as jwt_add_args from resotolib.utils import iec_size_format from resotocore import async_extensions, version from resotocore.analytics import AnalyticsEventSender from resotocore.db.db_access import DbAccess from resotocore.durations import parse_duration from resotocore.model.adjust_node import DirectAdjuster from resotocore.util import utc log = logging.getLogger(__name__) SystemInfo = namedtuple("SystemInfo", ["version", "cpus", "mem_available", "mem_total", "inside_docker", "started_at"]) started_at = utc() def system_info() -> SystemInfo: mem = psutil.virtual_memory() return SystemInfo( version=version(), cpus=mp.cpu_count(), mem_available=iec_size_format(mem.available), mem_total=iec_size_format(mem.total), inside_docker=os.path.exists("/.dockerenv"), # this file is created by the docker runtime started_at=started_at, ) def parse_args(args: Optional[List[str]] = None, namespace: Optional[str] = None) -> Namespace: