def test_auth_logout_api_token_removes_api_token(patch_post, cloud_api): patch_post(dict(data=dict(tenant="id"))) client = prefect.Client(api_token="foo") client._save_local_settings({"api_token": client._api_token}) runner = CliRunner() result = runner.invoke(auth, ["logout"], input="Y") assert result.exit_code == 0 assert "This will remove your API token" in result.output client = prefect.Client() assert "api_token" not in client._load_local_settings()
def deploy(self, project_name: str, build: bool = True, set_schedule_active: bool = True, **kwargs: Any) -> str: """ Deploy the flow to Prefect Cloud; if no storage is present on the Flow, the default value from your config will be used and initialized with `**kwargs`. Args: - project_name (str): the project that should contain this flow. - build (bool, optional): if `True`, the flow's environment is built prior to serialization; defaults to `True` - set_schedule_active (bool, optional): if `False`, will set the schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule). Defaults to `True`. This can be changed later. - **kwargs (Any): if instantiating a Storage object from default settings, these keyword arguments will be passed to the initialization method of the default Storage class Returns: - str: the ID of the flow that was deployed """ if self.storage is None: self.storage = get_default_storage_class()(**kwargs) client = prefect.Client() deployed_flow = client.deploy( flow=self, build=build, project_name=project_name, set_schedule_active=set_schedule_active, ) return deployed_flow
def find_failing_flows(): client = prefect.Client(api_server=API_SERVER) flow_groups_query = {"query": {"flow_group": {"id"}}} response = graphql_query(client, flow_groups_query) flow_group_ids = [data["id"] for data in response["data"]["flow_group"]] failing_flows = [] for flow_group_id in flow_group_ids: flow_results = run_and_parse_flow_group_results(client, flow_group_id) if not flow_results: continue if not flow_results.is_recently_run(): continue if flow_results.is_recent_success(): continue delta_seconds = flow_results.last_success_delta_seconds() if not delta_seconds: failing_flows.append(flow_results) continue last_success_hours = delta_seconds / 3600 if last_success_hours > 24: failing_flows.append(flow_results) return failing_flows
def deploy(self, project_name: str, build: bool = True, set_schedule_active: bool = True) -> str: """ Deploy the flow to Prefect Cloud Args: - project_name (str): the project that should contain this flow. - build (bool, optional): if `True`, the flow's environment is built prior to serialization; defaults to `True` - set_schedule_active (bool, optional): if `False`, will set the schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule). Defaults to `True`. This can be changed later. Returns: - str: the ID of the flow that was deployed """ client = prefect.Client() deployed_flow = client.deploy( flow=self, build=build, project_name=project_name, set_schedule_active=set_schedule_active, ) return deployed_flow
def _query_for_flows( where: dict, order_by: dict = None, error_on_empty: bool = True, ) -> List[dict]: """ Query for flow data necessary to initialize `Flow` instances with `_Flow.from_flow_data`. Args: - where (required): The Hasura `where` clause to filter by - order_by (optional): An optional Hasura `order_by` clause to order results by - error_on_empty (optional): If `True` and no flows are found, a `ValueError` will be raised Returns: A list of dicts of flow information """ client = prefect.Client() query_args = {"where": where} if order_by is not None: query_args["order_by"] = order_by flow_query = { "query": { with_args("flow", query_args): { "id": True, "settings": True, "run_config": True, "serialized_flow": True, "name": True, "archived": True, "project": {"name"}, "core_version": True, "storage": True, "flow_group": {"labels"}, } } } result = client.graphql(flow_query) flows = result.get("data", {}).get("flow", None) if flows is None: raise ValueError( f"Received bad result while querying for flows where {where}: " f"{result}") if not flows: # Empty list if error_on_empty: raise ValueError( f"No results found while querying for flows where {where!r}" ) return [] # Return a list return flows
def get_logs( self, start_time: pendulum.DateTime = None, end_time: pendulum.DateTime = None, ) -> List["FlowRunLog"]: """ Get logs for this flow run from `start_time` to `end_time`. Args: - start_time (optional): A time to start the log query at, useful for limiting the scope. If not provided, all logs up to `updated_at` are retrieved. - end_time (optional): A time to end the log query at. By default, this is set to `self.updated_at` which is the last time that the flow run was updated in the backend before this object was created. Returns: A list of `FlowRunLog` objects sorted by timestamp """ client = prefect.Client() end_time = end_time or self.updated_at logs_query = { with_args( "logs", { "order_by": {EnumValue("timestamp"): EnumValue("asc")}, "where": { "_and": [ {"timestamp": {"_lte": end_time.isoformat()}}, ( {"timestamp": {"_gt": start_time.isoformat()}} if start_time else {} ), ] }, }, ): {"timestamp": True, "message": True, "level": True} } result = client.graphql( { "query": { with_args( "flow_run", { "where": {"id": {"_eq": self.flow_run_id}}, }, ): logs_query } } ) # Unpack the result logs = result.get("data", {}).get("flow_run", [{}])[0].get("logs", []) return [FlowRunLog.from_dict(log) for log in logs]
def test_auth_logout_not_confirm(patch_post, cloud_api): patch_post(dict(data=dict(auth_info=dict(tenant_id="id")))) client = prefect.Client(api_key="foo") client.save_auth_to_disk() runner = CliRunner() result = runner.invoke(auth, ["logout"], input="N") assert result.exit_code == 1
def _get_next_task_run_start_time( flow_run_id: str) -> Optional[pendulum.DateTime]: """ Queries task runs associated with a flow run to get the earliest state start time. This time _may_ be in the past. Long retries are handled by exiting flow execution leaving the flow run in a 'Running' state and attaching a start time to the task runs that need to be retried. This function checks for a long retry by querying for task runs that have a start time set. This allows us to wait until this run time is reached before starting flow run execution. If we started execution, the runner would just walk the DAG and exit since the task run is not ready to begin yet. Args: - flow_run_id: The flow run the task runs belong to Returns: None: If no scheduled task runs are found, otherwise pendulum.DateTime: The earliest scheduled task run start time. """ client = prefect.Client() result = client.graphql({ "query": { with_args( "task_run", { "where": { "state_start_time": { "_is_null": False }, "flow_run_id": { "_eq": flow_run_id }, "flow_run": { # Only include flow runs in a 'Running' state to reduce # the scope of the query to retrying flow runs "state": { "_eq": "Running" } }, } }, ): {"state_start_time"} } }) task_runs = result.get("data", {}).get("task_run") if task_runs is None: raise ValueError( f"Unexpected result while querying for task runs: {result}") elif not task_runs: return None # No scheduled task runs task_run = min(task_runs, key=lambda task_run: task_run.state_start_time) next_start_time = task_run.state_start_time return cast(pendulum.DateTime, pendulum.parse(next_start_time))
def ensure_started(self) -> None: """Ensure the log manager is started""" if self.thread is None: self.client = prefect.Client() self.logging_period = context.config.cloud.logging_heartbeat self.thread = threading.Thread( target=self._write_logs_loop, name="prefect-log-manager", daemon=True, ) self.thread.start() atexit.register(self._on_shutdown)
def ensure_project_exists(): client = prefect.Client() print("Ensuring prefect project named '{prefect_project_name}'exists.") try: client.create_project(project_name=prefect_project_name) print(f"{prefect_project_name} has been created.") except prefect.utilities.exceptions.ClientError as ce: if "Uniqueness violation" in str(ce): print(f"Project: {prefect_project_name} exists") else: raise ce
def create_tenant(name, slug): """ This command creates a tenant for the Prefect Server \b Options: --name, -n TEXT The name of a tenant to create --slug, -n TEXT The slug of a tenant to create """ client = prefect.Client() tenant_id = client.create_tenant(name=name, slug=slug) click.secho(f"Tenant created with ID: {tenant_id}", fg="green")
def _fail_flow_run(flow_run_id: str, message: str) -> None: """ Set a flow run to a 'Failed' state and write a a failure message log """ client = prefect.Client() client.set_flow_run_state(flow_run_id=flow_run_id, state=prefect.engine.state.Failed(message)) client.write_run_logs([ dict( flow_run_id=flow_run_id, # type: ignore name="prefect.backend.execution", message=message, level="ERROR", ) ])
def deploy(self, project_name: str, build: bool = True, labels: List[str] = None, set_schedule_active: bool = True, version_group_id: str = None, **kwargs: Any) -> str: """ Deploy the flow to Prefect Cloud; if no storage is present on the Flow, the default value from your config will be used and initialized with `**kwargs`. Args: - project_name (str): the project that should contain this flow. - build (bool, optional): if `True`, the flow's environment is built prior to serialization; defaults to `True` - labels (List[str], optional): a list of labels to add to this Flow's environment; useful for associating Flows with individual Agents; see http://docs.prefect.io/cloud/agent/overview.html#flow-affinity-labels - set_schedule_active (bool, optional): if `False`, will set the schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule). Defaults to `True`. This can be changed later. - version_group_id (str, optional): the UUID version group ID to use for versioning this Flow in Cloud; if not provided, the version group ID associated with this Flow's project and name will be used. - **kwargs (Any): if instantiating a Storage object from default settings, these keyword arguments will be passed to the initialization method of the default Storage class Returns: - str: the ID of the flow that was deployed """ if self.storage is None: self.storage = get_default_storage_class()(**kwargs) if isinstance(self.storage, prefect.environments.storage.Local): self.environment.labels.add("local") self.environment.labels.add(slugify(self.name)) if labels: self.environment.labels.update(labels) client = prefect.Client() deployed_flow = client.deploy( flow=self, build=build, project_name=project_name, set_schedule_active=set_schedule_active, version_group_id=version_group_id, ) return deployed_flow
def logger_helper(): time.sleep(6) TOKEN = os.getenv('AZURE_TMP_TOKEN') client = prefect.Client(api_token=TOKEN) client.login_to_tenant(tenant_slug="km-inc") client.graphql(""" mutation { create_flow_run( input: { flow_id: "61d6071b-cd81-4505-877b-18081c129b4b", } ) { id } } """)
def get_task_run_ids(self) -> List[str]: """ Get all task run ids associated with this flow run. Lazily loaded at call time then cached for future calls. Returns: A list of string task run ids """ # Return the cached value immediately if it exists if self._task_run_ids: return self._task_run_ids client = prefect.Client() task_query = { "query": { with_args( "task_run", { "where": { "flow_run_id": {"_eq": self.flow_run_id}, } }, ): { "id": True, } } } result = client.graphql(task_query) task_runs = result.get("data", {}).get("task_run", None) if task_runs is None: logger.warning( f"Failed to load task run ids for flow run {self.flow_run_id}: " f"{result}" ) task_run_ids = [task_run["id"] for task_run in task_runs] # If the flow run is done, we can safely cache this value if self.state.is_finished(): self._task_run_ids = task_run_ids return task_run_ids
def test_auth_logout_api_token_with_tenant_removes_tenant_id( patch_posts, cloud_api): patch_posts([ # Login to tenant call during setup dict(data=dict(tenant=[dict(id=str(uuid.uuid4()))])), # Access token retrieval call during setup dict(data=dict(switch_tenant=dict( access_token="access-token", expires_at=pendulum.now().isoformat(), refresh_token="refresh-token", ))), # Login to tenant call during logout dict(data=dict(tenant=[dict(id=str(uuid.uuid4()))])), # Access token retrieval call during logout dict(data=dict(switch_tenant=dict( access_token="access-token", expires_at=pendulum.now().isoformat(), refresh_token="refresh-token", ))), ]) client = prefect.Client() client._save_local_settings({ "api_token": "token", "active_tenant_id": str(uuid.uuid4()) }) runner = CliRunner() result = runner.invoke(auth, ["logout"], input="Y") assert result.exit_code == 0 settings = client._load_local_settings() # Does not remove the API token assert "This will remove your API token" not in result.output assert "api_token" in settings # Removes the tenant id assert "Logged out from tenant" in result.output assert "active_tenant_id" not in settings
def _query_for_flow_run(where: dict) -> dict: client = prefect.Client() flow_run_query = { "query": { with_args("flow_run", {"where": where}): { "id": True, "name": True, "flow_id": True, "serialized_state": True, "states": {"timestamp", "serialized_state"}, "labels": True, "parameters": True, "context": True, "updated": True, "run_config": True, } } } result = client.graphql(flow_run_query) flow_runs = result.get("data", {}).get("flow_run", None) if flow_runs is None: raise ValueError( f"Received bad result while querying for flow runs where {where}: " f"{result}" ) if not flow_runs: raise ValueError( f"No flow runs found while querying for flow runs where {where}" ) if len(flow_runs) > 1: raise ValueError( f"Found multiple ({len(flow_runs)}) flow runs while querying for flow " f"runs where {where}: {flow_runs}" ) return flow_runs[0]
def create_tenant(name, slug): """ This command creates a tenant for the Prefect Server \b Options: --name, -n TEXT The name of a tenant to create --slug, -n TEXT The slug of a tenant to create """ # client = prefect.Client() # if not client.get_available_tenants(): # tenant_id = client.create_tenant(name=name, slug=slug) # click.secho(f"Tenant created with ID: {tenant_id}", fg="green") # print(ascii_welcome()) started = False with prefect.utilities.configuration.set_temporary_config( { "cloud.api": "http://apollo:4200", "cloud.graphql": "http://apollo:4200/graphql", "backend": "server", } ): while not started: try: client = prefect.Client() client.graphql("query{hello}", retry_on_api_error=False) started = True # Create a default tenant if no tenant exists if not client.get_available_tenants(): client.create_tenant(name="default") print(ascii_welcome()) except Exception: time.sleep(0.5) pass while True: time.sleep(0.5)
def start( version, ui_version, skip_pull, no_upgrade, no_ui, postgres_port, hasura_port, graphql_port, ui_port, server_port, no_postgres_port, no_hasura_port, no_graphql_port, no_ui_port, no_server_port, use_volume, volume_path, ): """ This command spins up all infrastructure and services for the Prefect Core server \b Options: --version, -v TEXT The server image versions to use (for example, '0.1.0' or 'master'). Defaults to `core-a.b.c` where `a.b.c.` is the version of Prefect Core currently running. --ui-version, -uv TEXT The UI image version to use (for example, '0.1.0' or 'master'). Defaults to `core-a.b.c` where `a.b.c.` is the version of Prefect Core currently running. --skip-pull Flag to skip pulling new images (if available) --no-upgrade, -n Flag to avoid running a database upgrade when the database spins up --no-ui, -u Flag to avoid starting the UI \b --postgres-port TEXT Port used to serve Postgres, defaults to '5432' --hasura-port TEXT Port used to serve Hasura, defaults to '3001' --graphql-port TEXT Port used to serve the GraphQL API, defaults to '4001' --ui-port TEXT Port used to serve the UI, defaults to '8080' --server-port TEXT Port used to serve the Core server, defaults to '4200' \b --no-postgres-port Disable port map of Postgres to host --no-hasura-port Disable port map of Hasura to host --no-graphql-port Disable port map of the GraphQL API to host --no-ui-port Disable port map of the UI to host --no-server-port Disable port map of the Core server to host \b --use-volume Enable the use of a volume for the Postgres service --volume-path TEXT A path to use for the Postgres volume, defaults to '~/.prefect/pg_data' """ docker_dir = Path(__file__).parents[0] compose_dir_path = docker_dir # Remove port mappings if specified if (no_postgres_port or no_hasura_port or no_graphql_port or no_ui_port or no_server_port or not use_volume or no_ui): temp_dir = tempfile.gettempdir() temp_path = os.path.join(temp_dir, "docker-compose.yml") shutil.copy2(os.path.join(docker_dir, "docker-compose.yml"), temp_path) with open(temp_path, "r") as file: y = yaml.safe_load(file) if no_postgres_port: del y["services"]["postgres"]["ports"] if no_hasura_port: del y["services"]["hasura"]["ports"] if no_graphql_port: del y["services"]["graphql"]["ports"] if no_ui_port: del y["services"]["ui"]["ports"] if no_server_port: del y["services"]["apollo"]["ports"] if not use_volume: del y["services"]["postgres"]["volumes"] if no_ui: del y["services"]["ui"] with open(temp_path, "w") as f: y = yaml.safe_dump(y, f) compose_dir_path = temp_dir # Temporary config set for port allocation with set_temporary_config({ "server.database.host_port": str(postgres_port), "server.hasura.host_port": str(hasura_port), "server.graphql.host_port": str(graphql_port), "server.ui.host_port": str(ui_port), "server.host_port": str(server_port), "server.database.volume_path": volume_path, }): env = make_env() base_version = prefect.__version__.split("+") if len(base_version) > 1: default_tag = "master" else: default_tag = f"core-{base_version[0]}" if "PREFECT_SERVER_TAG" not in env: env.update(PREFECT_SERVER_TAG=version or default_tag) if "PREFECT_UI_TAG" not in env: env.update(PREFECT_UI_TAG=ui_version or default_tag) if "PREFECT_SERVER_DB_CMD" not in env: cmd = ("prefect-server database upgrade -y" if not no_upgrade else "echo 'DATABASE MIGRATIONS SKIPPED'") env.update(PREFECT_SERVER_DB_CMD=cmd) proc = None try: if not skip_pull: subprocess.check_call(["docker-compose", "pull"], cwd=compose_dir_path, env=env) cmd = ["docker-compose", "up"] proc = subprocess.Popen(cmd, cwd=compose_dir_path, env=env) started = False with prefect.utilities.configuration.set_temporary_config({ "cloud.api": "http://localhost:4200", "cloud.graphql": "http://localhost:4200/graphql", "backend": "server", }): while not started: try: client = prefect.Client() client.graphql("query{hello}", retry_on_api_error=False) started = True # Create a default tenant if no tenant exists if not client.get_available_tenants(): client.create_tenant(name="default") print(ascii_welcome(ui_port=str(ui_port))) except Exception: time.sleep(0.5) pass while True: time.sleep(0.5) except BaseException: click.secho( "Exception caught; killing services (press ctrl-C to force)", fg="white", bg="red", ) subprocess.check_output(["docker-compose", "down"], cwd=compose_dir_path, env=env) if proc: proc.kill() raise
def start( version, ui_version, skip_pull, no_upgrade, no_ui, external_postgres, postgres_url, detach, postgres_port, hasura_port, graphql_port, ui_port, server_port, no_postgres_port, no_hasura_port, no_graphql_port, no_ui_port, no_server_port, use_volume, volume_path, ): """ This command spins up all infrastructure and services for the Prefect Core server \b Options: --version, -v TEXT The server image versions to use (for example, '0.1.0' or 'master'). Defaults to `core-a.b.c` where `a.b.c.` is the version of Prefect Core currently running. --ui-version, -uv TEXT The UI image version to use (for example, '0.1.0' or 'master'). Defaults to `core-a.b.c` where `a.b.c.` is the version of Prefect Core currently running. --no-upgrade, -n Flag to avoid running a database upgrade when the database spins up --no-ui, -u Flag to avoid starting the UI \b --external-postgres, -ep Disable the Postgres service, connect to an external one instead --postgres-url TEXT Postgres connection url to use. Expected format is postgres://<username>:<password>@hostname:<port>/<dbname> \b --postgres-port TEXT Port used to serve Postgres, defaults to '5432'. Not valid for external Postgres. --hasura-port TEXT Port used to serve Hasura, defaults to '3000' --graphql-port TEXT Port used to serve the GraphQL API, defaults to '4201' --ui-port TEXT Port used to serve the UI, defaults to '8080' --server-port TEXT Port used to serve the Core server, defaults to '4200' \b --no-postgres-port Disable port map of Postgres to host. Not valid for external Postgres. --no-hasura-port Disable port map of Hasura to host --no-graphql-port Disable port map of the GraphQL API to host --no-ui-port Disable port map of the UI to host --no-server-port Disable port map of the Core server to host \b --use-volume Enable the use of a volume for the Postgres service. Not valid for external Postgres. --volume-path TEXT A path to use for the Postgres volume, defaults to '~/.prefect/pg_data' Not valid for external Postgres. \b --detach, -d Detached mode. Runs Server containers in the background --skip-pull Flag to skip pulling new images (if available) """ # set external postgres flag if the user has provided `--postgres-url` if postgres_url is not None: external_postgres = True if external_postgres: warn_for_postgres_settings_when_using_external_postgres( no_postgres_port=no_postgres_port, postgres_port=postgres_port, use_volume=use_volume, volume_path=volume_path, ) compose_path = setup_compose_file( no_ui=no_ui, external_postgres=external_postgres, no_postgres_port=no_postgres_port, no_hasura_port=no_hasura_port, no_graphql_port=no_graphql_port, no_ui_port=no_ui_port, no_server_port=no_server_port, use_volume=use_volume, ) compose_dir_path = str(Path(compose_path).parent) env = setup_compose_env( version=version, ui_version=ui_version, no_upgrade=no_upgrade, external_postgres=external_postgres, postgres_url=postgres_url, postgres_port=postgres_port, hasura_port=hasura_port, graphql_port=graphql_port, ui_port=ui_port, server_port=server_port, volume_path=volume_path, ) proc = None try: if not skip_pull: subprocess.check_call(["docker-compose", "pull"], cwd=compose_dir_path, env=env) cmd = ["docker-compose", "up"] if detach: cmd.append("--detach") proc = subprocess.Popen(cmd, cwd=compose_dir_path, env=env) started = False with prefect.utilities.configuration.set_temporary_config({ "cloud.api": "http://localhost:4200", "cloud.graphql": "http://localhost:4200/graphql", "backend": "server", }): while not started: try: # Get a client with the correct server port client = prefect.Client( api_server=f"{config.server.host}:{server_port}") client.graphql("query{hello}", retry_on_api_error=False) started = True # Create a default tenant if no tenant exists if not client.get_available_tenants(): client.create_tenant(name="default") print(ascii_welcome(ui_port=str(ui_port))) except Exception: time.sleep(0.5) pass if detach: return while True: time.sleep(0.5) except BaseException: click.secho( "Exception caught; killing services (press ctrl-C to force)", fg="white", bg="red", ) subprocess.check_output(["docker-compose", "down"], cwd=compose_dir_path, env=env) if proc: proc.kill() raise
def _get_flow_run_scheduled_start_time(flow_run_id: str) -> Optional[pendulum.DateTime]: """ Queries for the current scheduled start time of a flow Flow runs store a `scheduled_start_time` as their originally scheduled time to start. 'Scheduled' flow run states also store a `start_time` that supercedes the time on the flow run object itself. For example, if a flow run is scheduled for some time in the future and a user clicks the 'Start Now' button in the UI, we'll create a new 'Scheduled' state with an updated start time. This allows us to preserve start time history while making the state the source of truth. This function will always return the start time associated with the most recently created 'Scheduled' state, if available. If the most recent 'Scheduled' state has a null `start_time`, we will fall back to the flow run's `scheduled_start_time`. Args: - flow_run_id: The flow run of interest Returns: pendulum.DateTime: The most recent scheduled flow run start time Raises: - ValueError: On API error - ValueError: When zero or more than one flow runs are found """ client = prefect.Client() result = client.graphql( { # We cannot query for states directly and must go through the `flow_run` # object "query": { with_args("flow_run", {"where": {"id": {"_eq": flow_run_id}}}): { with_args("states", {"where": {"state": {"_eq": "Scheduled"}}}): { "start_time", "created", }, "scheduled_start_time": True, } } } ) flow_runs = result.get("data", {}).get("flow_run") if flow_runs is None: raise ValueError( f"Unexpected result while querying for flow run states: {result}" ) elif len(flow_runs) > 1: raise ValueError( f"Found more than one flow run matching id {flow_run_id!r}: {result}" ) elif not flow_runs: raise ValueError(f"No flow run exists with id {flow_run_id!r}.") # Get the one found flow run flow_run = flow_runs[0] # Get the most recently created state states = sorted( flow_run.states, key=lambda state: state.get("created", ""), reverse=True ) state = states[0] if states else None # Return the most recently created state start time; default to the flow run # scheduled start time in case there are no state times start_time = ( state.start_time if state and state.get("start_time") else flow_run.scheduled_start_time ) if not start_time: return None # There is no scheduled start time in the states or on the run return cast(pendulum.DateTime, pendulum.parse(start_time))
def generate_flow_run_environ( flow_run_id: str, flow_id: str, run_config: RunConfig, run_api_key: str = None, include_local_env: bool = False, ) -> Dict[str, str]: """ Utility to generate the environment variables required for a flow run Args: - flow_run_id: The id for the flow run that will be executed - flow_id: The id for the flow of the flow run that will be executed - run_config: The run config for the flow run, contributes environment variables - run_api_key: An optional API key to pass to the flow run for authenticating with the backend. If not set, it will be pulled from the Client - include_local_env: If `True`, the currently available environment variables will be passed through to the flow run. Defaults to `False` for security. Returns: - A dictionary of environment variables """ # TODO: Generalize this and use it for all agents # Local environment env = cast(Dict[str, Optional[str]], os.environ.copy() if include_local_env else {}) # Pass through config options that can be overriden by run config env.update( to_environment_variables( prefect.config, include={ "logging.level", "logging.format", "logging.datefmt", "cloud.send_flow_run_logs", }, ) ) # Update with run config environment if run_config is not None and run_config.env is not None: env.update(run_config.env) # Update with config options that cannot be overriden by the run config env.update( to_environment_variables( prefect.config, include={"backend", "cloud.api", "cloud.tenant_id"}, ) ) # Pass authentication through client = prefect.Client() # Instantiate a client to get the current API key env["PREFECT__CLOUD__API_KEY"] = run_api_key or client.api_key or "" # Backwards compat for auth tokens env["PREFECT__CLOUD__AUTH_TOKEN"] = ( run_api_key or prefect.config.cloud.agent.get("auth_token") or prefect.config.cloud.get("auth_token") ) # Add context information for the run env.update( { "PREFECT__CONTEXT__FLOW_RUN_ID": flow_run_id, "PREFECT__CONTEXT__FLOW_ID": flow_id, } ) # Update hardcoded execution variables env.update( { "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudFlowRunner", "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudTaskRunner", } ) # Filter out `None` values return {k: v for k, v in env.items() if v is not None}
def register_internal( project: str, paths: List[str], modules: List[str], names: List[str] = None, labels: List[str] = None, force: bool = False, in_watch: bool = False, ) -> None: """Do a single registration pass, loading, building, and registering the requested flows. Args: - project (str): the project in which to register the flows. - paths (List[str]): a list of file paths containing flows. - modules (List[str]): a list of python modules containing flows. - names (List[str], optional): a list of flow names that should be registered. If not provided, all flows found will be registered. - labels (List[str], optional): a list of extra labels to set on all flows. - force (bool, optional): If false (default), an idempotency key will be used to avoid unnecessary register calls. - in_watch (bool, optional): Whether this call resulted from a `register --watch` call. """ # Load flows from all files/modules requested click.echo("Collecting flows...") source_to_flows = collect_flows(paths, modules, in_watch) # Filter flows by name if requested if names: names = set(names) source_to_flows = { source: [f for f in flows if f.name in names] for source, flows in source_to_flows.items() } missing = names.difference(f.name for flows in source_to_flows.values() for f in flows) if missing: missing_flows = "\n".join(f"- {n}" for n in sorted(missing)) click.secho( f"Failed to find the following flows:\n{missing_flows}", fg="red") if not in_watch: raise TerminalError # Iterate through each file, building all storage and registering all flows # Log errors as they happen, but only exit once all files have been processed client = prefect.Client() stats = Counter(registered=0, errored=0, skipped=0) for source, flows in source_to_flows.items(): if flows: click.echo(f"Processing {source.location!r}:") stats += build_and_register(client, flows, project, labels=labels, force=force) # Output summary message registered = stats["registered"] skipped = stats["skipped"] errored = stats["errored"] parts = [click.style(f"{registered} registered", fg="green")] if skipped: parts.append(click.style(f"{skipped} skipped", fg="yellow")) if errored: parts.append(click.style(f"{errored} errored", fg="red")) msg = ", ".join(parts) bar_length = max(60 - len(click.unstyle(msg)), 4) // 2 bar = "=" * bar_length click.echo(f"{bar} {msg} {bar}") # If not in a watch call, exit with appropriate exit code if not in_watch and stats["errored"]: raise TerminalError
def register_internal( project: str, paths: List[str], modules: List[str], json_paths: List[str] = None, names: List[str] = None, labels: List[str] = None, force: bool = False, schedule: bool = True, in_watch: bool = False, ) -> None: """Do a single registration pass, loading, building, and registering the requested flows. Args: - project (str): the project in which to register the flows. - paths (List[str]): a list of file paths containing flows. - modules (List[str]): a list of python modules containing flows. - json_paths (List[str]): a list of file paths containing serialied flows produced by `prefect build`. - names (List[str], optional): a list of flow names that should be registered. If not provided, all flows found will be registered. - labels (List[str], optional): a list of extra labels to set on all flows. - force (bool, optional): If false (default), an idempotency key will be used to avoid unnecessary register calls. - schedule (bool, optional): If `True` (default) activates the flow schedule upon registering. - in_watch (bool, optional): Whether this call resulted from a `register --watch` call. """ client = prefect.Client() # Determine the project id project_id = get_project_id(client, project) # Load flows from all files/modules requested click.echo("Collecting flows...") source_to_flows = collect_flows(paths, modules, json_paths, names=names, in_watch=in_watch) # Iterate through each file, building all storage and registering all flows # Log errors as they happen, but only exit once all files have been processed stats = Counter(registered=0, errored=0, skipped=0) for source, flows in source_to_flows.items(): click.echo(f"Processing {source.location!r}:") stats += build_and_register(client, flows, project_id, labels=labels, force=force, schedule=schedule) # Output summary message registered = stats["registered"] skipped = stats["skipped"] errored = stats["errored"] parts = [click.style(f"{registered} registered", fg="green")] if skipped: parts.append(click.style(f"{skipped} skipped", fg="yellow")) if errored: parts.append(click.style(f"{errored} errored", fg="red")) msg = ", ".join(parts) bar_length = max(60 - len(click.unstyle(msg)), 4) // 2 bar = "=" * bar_length click.echo(f"{bar} {msg} {bar}") # If not in a watch call, exit with appropriate exit code if not in_watch and stats["errored"]: raise TerminalError
def check_for_compatible_agents(labels: Iterable[str], since_minutes: int = 1) -> str: """ Checks for agents compatible with a set of labels returning a user-friendly message indicating the status, roughly one of the following cases: - There is a healthy agent with matching labels - There are N healthy agents with matching labels - There is an unhealthy agent with matching labels but no healthy agents matching - There are N unhealthy agents with matching labels but no healthy agents matching - There are no healthy agents at all and no unhealthy agents with matching labels - There are healthy agents but no healthy or unhealthy agent has matching labels Args: - labels: A set of labels; typically associated with a flow run - since_minutes: The amount of time in minutes to allow an agent to be idle and considered active/healthy still Returns: A message string """ client = prefect.Client() labels = set(labels) labels_blurb = f"labels {labels!r}" if labels else "empty labels" result = client.graphql( {"query": { "agent": {"last_queried", "labels", "name", "id"} }}) agents = result.get("data", {}).get("agent") if agents is None: raise ValueError( f"Received bad result while querying for agents: {result}") # Parse last query times for agent in agents: agent.last_queried = cast( Optional[pendulum.DateTime], pendulum.parse(agent.last_queried) if agent.last_queried else None, ) # Drop agents that have not queried agents = [agent for agent in agents if agent.last_queried is not None] # Drop agents that have not sent a recent hearbeat since = pendulum.now().subtract(minutes=since_minutes) healthy_agents = [agent for agent in agents if agent.last_queried >= since] # Search for the flow run labels in running agents matching_healthy = [] matching_unhealthy = [] for agent in agents: empty_labels_match = not agent.labels and not labels if empty_labels_match or (labels and labels.issubset(agent.labels)): if agent in healthy_agents: matching_healthy.append(agent) else: matching_unhealthy.append(agent) if len(matching_healthy) == 1: agent = matching_healthy[0] # Display the single matching agent name_blurb = f" ({agent.name})" if agent.name else "" return ( f"Agent {agent.id}{name_blurb} has matching labels and last queried " f"{agent.last_queried.diff_for_humans()}. It should deploy your flow run." ) if len(matching_healthy) > 1: # Display that there are multiple matching agents return ( f"Found {len(matching_healthy)} healthy agents with matching labels. One " "of them should pick up your flow.") # We now know we have no matching healthy agents... if not healthy_agents and not matching_unhealthy: # Display that there are no matching agents all-time return ( "There are no healthy agents in your tenant and it does not look like an " "agent with the required labels has been run recently. Start an agent with " f"{labels_blurb} to run your flow.") if len(matching_unhealthy) == 1: agent = matching_unhealthy[0] # Display that there is a single matching unhealthy agent name_blurb = f" ({agent.name})" if agent.name else "" return ( f"Agent {agent.id}{name_blurb} has matching labels and last queried " f"{agent.last_queried.diff_for_humans()}. Since it hasn't queried recently, it looks " f"unhealthy. Restart it or start a new agent with {labels_blurb} to deploy " f"your flow run.") if len(matching_unhealthy) > 1: # Display that there are multiple matching unhealthy agents return ( f"Found {len(matching_unhealthy)} agents with matching labels but they " "have not queried recently and look unhealthy. Restart one of them or " f"start a new agent with {labels_blurb} deploy your flow run.") # No matching healthy or unhealthy agents return ( f"You have {len(healthy_agents)} healthy agents in your tenant but do not have " f"an agent with {labels_blurb}. Start an agent with matching labels to deploy " "your flow run.")
def register_flow(f: prefect.Flow, project_name: str) -> None: """Registers f to "Monitorfich" project. Args: f (prefect.Flow): Prefect flow """ f.register(project_name) if __name__ == "__main__": # Initialize a client, which can interact with the Prefect orchestrator. # The communication with the orchestrator is done through the Prefect GraphQL API. # This API is served on localhost:4200. print("Create client") client = prefect.Client() # Create the project "Monitorfish" in the orchestrator if it does not yet exist print("Create project") create_project_if_not_exists(client, PROJECT_NAME) # Register all flows print("Register flows") for f in flows_to_register: print(f"Register flow {f.name}") register_flow(f, PROJECT_NAME) # Start local "agent" process # This process queries the Prefect GraphQL API every second to ask if any new flows # should be run agent = LocalAgent(show_flow_logs=True)