Esempio n. 1
0
def test_auth_logout_api_token_removes_api_token(patch_post, cloud_api):
    patch_post(dict(data=dict(tenant="id")))

    client = prefect.Client(api_token="foo")
    client._save_local_settings({"api_token": client._api_token})

    runner = CliRunner()
    result = runner.invoke(auth, ["logout"], input="Y")
    assert result.exit_code == 0
    assert "This will remove your API token" in result.output

    client = prefect.Client()
    assert "api_token" not in client._load_local_settings()
Esempio n. 2
0
    def deploy(self,
               project_name: str,
               build: bool = True,
               set_schedule_active: bool = True,
               **kwargs: Any) -> str:
        """
        Deploy the flow to Prefect Cloud; if no storage is present on the Flow, the default value from your config
        will be used and initialized with `**kwargs`.

        Args:
            - project_name (str): the project that should contain this flow.
            - build (bool, optional): if `True`, the flow's environment is built
                prior to serialization; defaults to `True`
            - set_schedule_active (bool, optional): if `False`, will set the
                schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule).
                Defaults to `True`. This can be changed later.
            - **kwargs (Any): if instantiating a Storage object from default settings, these keyword arguments
                will be passed to the initialization method of the default Storage class

        Returns:
            - str: the ID of the flow that was deployed
        """
        if self.storage is None:
            self.storage = get_default_storage_class()(**kwargs)

        client = prefect.Client()
        deployed_flow = client.deploy(
            flow=self,
            build=build,
            project_name=project_name,
            set_schedule_active=set_schedule_active,
        )
        return deployed_flow
def find_failing_flows():
    client = prefect.Client(api_server=API_SERVER)

    flow_groups_query = {"query": {"flow_group": {"id"}}}

    response = graphql_query(client, flow_groups_query)
    flow_group_ids = [data["id"] for data in response["data"]["flow_group"]]

    failing_flows = []
    for flow_group_id in flow_group_ids:
        flow_results = run_and_parse_flow_group_results(client, flow_group_id)
        if not flow_results:
            continue

        if not flow_results.is_recently_run():
            continue

        if flow_results.is_recent_success():
            continue

        delta_seconds = flow_results.last_success_delta_seconds()
        if not delta_seconds:
            failing_flows.append(flow_results)
            continue

        last_success_hours = delta_seconds / 3600
        if last_success_hours > 24:
            failing_flows.append(flow_results)

    return failing_flows
Esempio n. 4
0
    def deploy(self,
               project_name: str,
               build: bool = True,
               set_schedule_active: bool = True) -> str:
        """
        Deploy the flow to Prefect Cloud

        Args:
            - project_name (str): the project that should contain this flow.
            - build (bool, optional): if `True`, the flow's environment is built
                prior to serialization; defaults to `True`
            - set_schedule_active (bool, optional): if `False`, will set the
                schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule).
                Defaults to `True`. This can be changed later.

        Returns:
            - str: the ID of the flow that was deployed
        """
        client = prefect.Client()
        deployed_flow = client.deploy(
            flow=self,
            build=build,
            project_name=project_name,
            set_schedule_active=set_schedule_active,
        )
        return deployed_flow
Esempio n. 5
0
    def _query_for_flows(
        where: dict,
        order_by: dict = None,
        error_on_empty: bool = True,
    ) -> List[dict]:
        """
        Query for flow data necessary to initialize `Flow` instances with
        `_Flow.from_flow_data`.

        Args:
            - where (required): The Hasura `where` clause to filter by
            - order_by (optional): An optional Hasura `order_by` clause to order
                 results by
            - error_on_empty (optional): If `True` and no flows are found, a
                `ValueError` will be raised

        Returns:
            A list of dicts of flow information
        """
        client = prefect.Client()

        query_args = {"where": where}
        if order_by is not None:
            query_args["order_by"] = order_by

        flow_query = {
            "query": {
                with_args("flow", query_args): {
                    "id": True,
                    "settings": True,
                    "run_config": True,
                    "serialized_flow": True,
                    "name": True,
                    "archived": True,
                    "project": {"name"},
                    "core_version": True,
                    "storage": True,
                    "flow_group": {"labels"},
                }
            }
        }

        result = client.graphql(flow_query)
        flows = result.get("data", {}).get("flow", None)

        if flows is None:
            raise ValueError(
                f"Received bad result while querying for flows where {where}: "
                f"{result}")

        if not flows:  # Empty list
            if error_on_empty:
                raise ValueError(
                    f"No results found while querying for flows where {where!r}"
                )
            return []

        # Return a list
        return flows
Esempio n. 6
0
    def get_logs(
        self,
        start_time: pendulum.DateTime = None,
        end_time: pendulum.DateTime = None,
    ) -> List["FlowRunLog"]:
        """
        Get logs for this flow run from `start_time` to `end_time`.

        Args:
            - start_time (optional): A time to start the log query at, useful for
                limiting the scope. If not provided, all logs up to `updated_at` are
                retrieved.
            - end_time (optional): A time to end the log query at. By default, this is
                set to `self.updated_at` which is the last time that the flow run was
                updated in the backend before this object was created.

        Returns:
            A list of `FlowRunLog` objects sorted by timestamp
        """

        client = prefect.Client()
        end_time = end_time or self.updated_at

        logs_query = {
            with_args(
                "logs",
                {
                    "order_by": {EnumValue("timestamp"): EnumValue("asc")},
                    "where": {
                        "_and": [
                            {"timestamp": {"_lte": end_time.isoformat()}},
                            (
                                {"timestamp": {"_gt": start_time.isoformat()}}
                                if start_time
                                else {}
                            ),
                        ]
                    },
                },
            ): {"timestamp": True, "message": True, "level": True}
        }

        result = client.graphql(
            {
                "query": {
                    with_args(
                        "flow_run",
                        {
                            "where": {"id": {"_eq": self.flow_run_id}},
                        },
                    ): logs_query
                }
            }
        )

        # Unpack the result
        logs = result.get("data", {}).get("flow_run", [{}])[0].get("logs", [])

        return [FlowRunLog.from_dict(log) for log in logs]
Esempio n. 7
0
def test_auth_logout_not_confirm(patch_post, cloud_api):
    patch_post(dict(data=dict(auth_info=dict(tenant_id="id"))))

    client = prefect.Client(api_key="foo")
    client.save_auth_to_disk()

    runner = CliRunner()
    result = runner.invoke(auth, ["logout"], input="N")
    assert result.exit_code == 1
Esempio n. 8
0
def _get_next_task_run_start_time(
        flow_run_id: str) -> Optional[pendulum.DateTime]:
    """
    Queries task runs associated with a flow run to get the earliest state start time.
    This time _may_ be in the past.

    Long retries are handled by exiting flow execution leaving the flow run in a
    'Running' state and attaching a start time to the task runs that need to be retried.
    This function checks for a long retry by querying for task runs that have a start
    time set. This allows us to wait until this run time is reached before starting
    flow run execution. If we started execution, the runner would just walk the DAG and
    exit since the task run is not ready to begin yet.

    Args:
        - flow_run_id: The flow run the task runs belong to

    Returns:
        None: If no scheduled task runs are found, otherwise
        pendulum.DateTime: The earliest scheduled task run start time.
    """
    client = prefect.Client()
    result = client.graphql({
        "query": {
            with_args(
                "task_run",
                {
                    "where": {
                        "state_start_time": {
                            "_is_null": False
                        },
                        "flow_run_id": {
                            "_eq": flow_run_id
                        },
                        "flow_run": {
                            # Only include flow runs in a 'Running' state to reduce
                            # the scope of the query to retrying flow runs
                            "state": {
                                "_eq": "Running"
                            }
                        },
                    }
                },
            ): {"state_start_time"}
        }
    })
    task_runs = result.get("data", {}).get("task_run")
    if task_runs is None:
        raise ValueError(
            f"Unexpected result while querying for task runs: {result}")
    elif not task_runs:
        return None  # No scheduled task runs

    task_run = min(task_runs, key=lambda task_run: task_run.state_start_time)
    next_start_time = task_run.state_start_time
    return cast(pendulum.DateTime, pendulum.parse(next_start_time))
Esempio n. 9
0
 def ensure_started(self) -> None:
     """Ensure the log manager is started"""
     if self.thread is None:
         self.client = prefect.Client()
         self.logging_period = context.config.cloud.logging_heartbeat
         self.thread = threading.Thread(
             target=self._write_logs_loop,
             name="prefect-log-manager",
             daemon=True,
         )
         self.thread.start()
         atexit.register(self._on_shutdown)
Esempio n. 10
0
def ensure_project_exists():
    client = prefect.Client()

    print("Ensuring prefect project named '{prefect_project_name}'exists.")

    try:
        client.create_project(project_name=prefect_project_name)
        print(f"{prefect_project_name} has been created.")
    except prefect.utilities.exceptions.ClientError as ce:
        if "Uniqueness violation" in str(ce):
            print(f"Project: {prefect_project_name} exists")
        else:
            raise ce
Esempio n. 11
0
def create_tenant(name, slug):
    """
    This command creates a tenant for the Prefect Server

    \b
    Options:
        --name, -n       TEXT    The name of a tenant to create
        --slug, -n       TEXT    The slug of a tenant to create
    """
    client = prefect.Client()
    tenant_id = client.create_tenant(name=name, slug=slug)

    click.secho(f"Tenant created with ID: {tenant_id}", fg="green")
Esempio n. 12
0
def _fail_flow_run(flow_run_id: str, message: str) -> None:
    """
    Set a flow run to a 'Failed' state and write a a failure message log
    """
    client = prefect.Client()
    client.set_flow_run_state(flow_run_id=flow_run_id,
                              state=prefect.engine.state.Failed(message))
    client.write_run_logs([
        dict(
            flow_run_id=flow_run_id,  # type: ignore
            name="prefect.backend.execution",
            message=message,
            level="ERROR",
        )
    ])
Esempio n. 13
0
    def deploy(self,
               project_name: str,
               build: bool = True,
               labels: List[str] = None,
               set_schedule_active: bool = True,
               version_group_id: str = None,
               **kwargs: Any) -> str:
        """
        Deploy the flow to Prefect Cloud; if no storage is present on the Flow, the default value from your config
        will be used and initialized with `**kwargs`.

        Args:
            - project_name (str): the project that should contain this flow.
            - build (bool, optional): if `True`, the flow's environment is built
                prior to serialization; defaults to `True`
            - labels (List[str], optional): a list of labels to add to this Flow's environment; useful for
                associating Flows with individual Agents; see http://docs.prefect.io/cloud/agent/overview.html#flow-affinity-labels
            - set_schedule_active (bool, optional): if `False`, will set the
                schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule).
                Defaults to `True`. This can be changed later.
            - version_group_id (str, optional): the UUID version group ID to use for versioning this Flow
                in Cloud; if not provided, the version group ID associated with this Flow's project and name
                will be used.
            - **kwargs (Any): if instantiating a Storage object from default settings, these keyword arguments
                will be passed to the initialization method of the default Storage class

        Returns:
            - str: the ID of the flow that was deployed
        """
        if self.storage is None:
            self.storage = get_default_storage_class()(**kwargs)

        if isinstance(self.storage, prefect.environments.storage.Local):
            self.environment.labels.add("local")
            self.environment.labels.add(slugify(self.name))

        if labels:
            self.environment.labels.update(labels)

        client = prefect.Client()
        deployed_flow = client.deploy(
            flow=self,
            build=build,
            project_name=project_name,
            set_schedule_active=set_schedule_active,
            version_group_id=version_group_id,
        )
        return deployed_flow
Esempio n. 14
0
def logger_helper():
    time.sleep(6)
    TOKEN = os.getenv('AZURE_TMP_TOKEN')
    client = prefect.Client(api_token=TOKEN)
    client.login_to_tenant(tenant_slug="km-inc")
    client.graphql("""
        mutation {
            create_flow_run(
                input: {
                    flow_id: "61d6071b-cd81-4505-877b-18081c129b4b",
                }
            ) {
                id
            }
        }
        """)
Esempio n. 15
0
    def get_task_run_ids(self) -> List[str]:
        """
        Get all task run ids associated with this flow run. Lazily loaded at call time
        then cached for future calls.

        Returns:
            A list of string task run ids
        """
        # Return the cached value immediately if it exists
        if self._task_run_ids:
            return self._task_run_ids

        client = prefect.Client()

        task_query = {
            "query": {
                with_args(
                    "task_run",
                    {
                        "where": {
                            "flow_run_id": {"_eq": self.flow_run_id},
                        }
                    },
                ): {
                    "id": True,
                }
            }
        }
        result = client.graphql(task_query)
        task_runs = result.get("data", {}).get("task_run", None)

        if task_runs is None:
            logger.warning(
                f"Failed to load task run ids for flow run {self.flow_run_id}: "
                f"{result}"
            )

        task_run_ids = [task_run["id"] for task_run in task_runs]

        # If the flow run is done, we can safely cache this value
        if self.state.is_finished():
            self._task_run_ids = task_run_ids

        return task_run_ids
Esempio n. 16
0
def test_auth_logout_api_token_with_tenant_removes_tenant_id(
        patch_posts, cloud_api):
    patch_posts([
        # Login to tenant call during setup
        dict(data=dict(tenant=[dict(id=str(uuid.uuid4()))])),
        # Access token retrieval call during setup
        dict(data=dict(switch_tenant=dict(
            access_token="access-token",
            expires_at=pendulum.now().isoformat(),
            refresh_token="refresh-token",
        ))),
        # Login to tenant call during logout
        dict(data=dict(tenant=[dict(id=str(uuid.uuid4()))])),
        # Access token retrieval call during logout
        dict(data=dict(switch_tenant=dict(
            access_token="access-token",
            expires_at=pendulum.now().isoformat(),
            refresh_token="refresh-token",
        ))),
    ])

    client = prefect.Client()
    client._save_local_settings({
        "api_token": "token",
        "active_tenant_id": str(uuid.uuid4())
    })

    runner = CliRunner()
    result = runner.invoke(auth, ["logout"], input="Y")

    assert result.exit_code == 0

    settings = client._load_local_settings()

    # Does not remove the API token
    assert "This will remove your API token" not in result.output
    assert "api_token" in settings

    # Removes the tenant id
    assert "Logged out from tenant" in result.output
    assert "active_tenant_id" not in settings
Esempio n. 17
0
    def _query_for_flow_run(where: dict) -> dict:
        client = prefect.Client()

        flow_run_query = {
            "query": {
                with_args("flow_run", {"where": where}): {
                    "id": True,
                    "name": True,
                    "flow_id": True,
                    "serialized_state": True,
                    "states": {"timestamp", "serialized_state"},
                    "labels": True,
                    "parameters": True,
                    "context": True,
                    "updated": True,
                    "run_config": True,
                }
            }
        }

        result = client.graphql(flow_run_query)
        flow_runs = result.get("data", {}).get("flow_run", None)

        if flow_runs is None:
            raise ValueError(
                f"Received bad result while querying for flow runs where {where}: "
                f"{result}"
            )

        if not flow_runs:
            raise ValueError(
                f"No flow runs found while querying for flow runs where {where}"
            )

        if len(flow_runs) > 1:
            raise ValueError(
                f"Found multiple ({len(flow_runs)}) flow runs while querying for flow "
                f"runs where {where}: {flow_runs}"
            )

        return flow_runs[0]
Esempio n. 18
0
def create_tenant(name, slug):
    """
    This command creates a tenant for the Prefect Server

    \b
    Options:
        --name, -n       TEXT    The name of a tenant to create
        --slug, -n       TEXT    The slug of a tenant to create
    """
    # client = prefect.Client()
    # if not client.get_available_tenants():
    #     tenant_id = client.create_tenant(name=name, slug=slug)
    #     click.secho(f"Tenant created with ID: {tenant_id}", fg="green")
    # print(ascii_welcome())
    started = False
    with prefect.utilities.configuration.set_temporary_config(
        {
            "cloud.api": "http://apollo:4200",
            "cloud.graphql": "http://apollo:4200/graphql",
            "backend": "server",
        }
    ):
        while not started:
            try:
                client = prefect.Client()
                client.graphql("query{hello}", retry_on_api_error=False)
                started = True
                # Create a default tenant if no tenant exists
                if not client.get_available_tenants():
                    client.create_tenant(name="default")
                print(ascii_welcome())
            except Exception:
                time.sleep(0.5)
                pass
        while True:
            time.sleep(0.5)
Esempio n. 19
0
def start(
    version,
    ui_version,
    skip_pull,
    no_upgrade,
    no_ui,
    postgres_port,
    hasura_port,
    graphql_port,
    ui_port,
    server_port,
    no_postgres_port,
    no_hasura_port,
    no_graphql_port,
    no_ui_port,
    no_server_port,
    use_volume,
    volume_path,
):
    """
    This command spins up all infrastructure and services for the Prefect Core server

    \b
    Options:
        --version, -v       TEXT    The server image versions to use (for example, '0.1.0'
                                    or 'master'). Defaults to `core-a.b.c` where `a.b.c.`
                                    is the version of Prefect Core currently running.
        --ui-version, -uv   TEXT    The UI image version to use (for example, '0.1.0' or
                                    'master'). Defaults to `core-a.b.c` where `a.b.c.` is
                                    the version of Prefect Core currently running.
        --skip-pull                 Flag to skip pulling new images (if available)
        --no-upgrade, -n            Flag to avoid running a database upgrade when the
                                    database spins up
        --no-ui, -u                 Flag to avoid starting the UI

    \b
        --postgres-port     TEXT    Port used to serve Postgres, defaults to '5432'
        --hasura-port       TEXT    Port used to serve Hasura, defaults to '3001'
        --graphql-port      TEXT    Port used to serve the GraphQL API, defaults to '4001'
        --ui-port           TEXT    Port used to serve the UI, defaults to '8080'
        --server-port       TEXT    Port used to serve the Core server, defaults to '4200'

    \b
        --no-postgres-port          Disable port map of Postgres to host
        --no-hasura-port            Disable port map of Hasura to host
        --no-graphql-port           Disable port map of the GraphQL API to host
        --no-ui-port                Disable port map of the UI to host
        --no-server-port            Disable port map of the Core server to host

    \b
        --use-volume                Enable the use of a volume for the Postgres service
        --volume-path       TEXT    A path to use for the Postgres volume, defaults to
                                    '~/.prefect/pg_data'
    """

    docker_dir = Path(__file__).parents[0]
    compose_dir_path = docker_dir

    # Remove port mappings if specified
    if (no_postgres_port or no_hasura_port or no_graphql_port or no_ui_port
            or no_server_port or not use_volume or no_ui):
        temp_dir = tempfile.gettempdir()
        temp_path = os.path.join(temp_dir, "docker-compose.yml")
        shutil.copy2(os.path.join(docker_dir, "docker-compose.yml"), temp_path)

        with open(temp_path, "r") as file:
            y = yaml.safe_load(file)

            if no_postgres_port:
                del y["services"]["postgres"]["ports"]

            if no_hasura_port:
                del y["services"]["hasura"]["ports"]

            if no_graphql_port:
                del y["services"]["graphql"]["ports"]

            if no_ui_port:
                del y["services"]["ui"]["ports"]

            if no_server_port:
                del y["services"]["apollo"]["ports"]

            if not use_volume:
                del y["services"]["postgres"]["volumes"]

            if no_ui:
                del y["services"]["ui"]

        with open(temp_path, "w") as f:
            y = yaml.safe_dump(y, f)

        compose_dir_path = temp_dir

    # Temporary config set for port allocation
    with set_temporary_config({
            "server.database.host_port": str(postgres_port),
            "server.hasura.host_port": str(hasura_port),
            "server.graphql.host_port": str(graphql_port),
            "server.ui.host_port": str(ui_port),
            "server.host_port": str(server_port),
            "server.database.volume_path": volume_path,
    }):
        env = make_env()

    base_version = prefect.__version__.split("+")
    if len(base_version) > 1:
        default_tag = "master"
    else:
        default_tag = f"core-{base_version[0]}"
    if "PREFECT_SERVER_TAG" not in env:
        env.update(PREFECT_SERVER_TAG=version or default_tag)
    if "PREFECT_UI_TAG" not in env:
        env.update(PREFECT_UI_TAG=ui_version or default_tag)
    if "PREFECT_SERVER_DB_CMD" not in env:
        cmd = ("prefect-server database upgrade -y"
               if not no_upgrade else "echo 'DATABASE MIGRATIONS SKIPPED'")
        env.update(PREFECT_SERVER_DB_CMD=cmd)

    proc = None
    try:
        if not skip_pull:
            subprocess.check_call(["docker-compose", "pull"],
                                  cwd=compose_dir_path,
                                  env=env)

        cmd = ["docker-compose", "up"]
        proc = subprocess.Popen(cmd, cwd=compose_dir_path, env=env)
        started = False
        with prefect.utilities.configuration.set_temporary_config({
                "cloud.api":
                "http://localhost:4200",
                "cloud.graphql":
                "http://localhost:4200/graphql",
                "backend":
                "server",
        }):
            while not started:
                try:
                    client = prefect.Client()
                    client.graphql("query{hello}", retry_on_api_error=False)
                    started = True
                    # Create a default tenant if no tenant exists
                    if not client.get_available_tenants():
                        client.create_tenant(name="default")
                    print(ascii_welcome(ui_port=str(ui_port)))
                except Exception:
                    time.sleep(0.5)
                    pass
            while True:
                time.sleep(0.5)
    except BaseException:
        click.secho(
            "Exception caught; killing services (press ctrl-C to force)",
            fg="white",
            bg="red",
        )
        subprocess.check_output(["docker-compose", "down"],
                                cwd=compose_dir_path,
                                env=env)
        if proc:
            proc.kill()
        raise
Esempio n. 20
0
def start(
    version,
    ui_version,
    skip_pull,
    no_upgrade,
    no_ui,
    external_postgres,
    postgres_url,
    detach,
    postgres_port,
    hasura_port,
    graphql_port,
    ui_port,
    server_port,
    no_postgres_port,
    no_hasura_port,
    no_graphql_port,
    no_ui_port,
    no_server_port,
    use_volume,
    volume_path,
):
    """
    This command spins up all infrastructure and services for the Prefect Core server

    \b
    Options:
        --version, -v       TEXT    The server image versions to use (for example, '0.1.0'
                                    or 'master'). Defaults to `core-a.b.c` where `a.b.c.`
                                    is the version of Prefect Core currently running.
        --ui-version, -uv   TEXT    The UI image version to use (for example, '0.1.0' or
                                    'master'). Defaults to `core-a.b.c` where `a.b.c.` is
                                    the version of Prefect Core currently running.
        --no-upgrade, -n            Flag to avoid running a database upgrade when the
                                    database spins up
        --no-ui, -u                 Flag to avoid starting the UI

    \b
        --external-postgres, -ep    Disable the Postgres service, connect to an external one instead
        --postgres-url      TEXT    Postgres connection url to use. Expected format
                                    is postgres://<username>:<password>@hostname:<port>/<dbname>

    \b
        --postgres-port     TEXT    Port used to serve Postgres, defaults to '5432'.
                                    Not valid for external Postgres.
        --hasura-port       TEXT    Port used to serve Hasura, defaults to '3000'
        --graphql-port      TEXT    Port used to serve the GraphQL API, defaults to '4201'
        --ui-port           TEXT    Port used to serve the UI, defaults to '8080'
        --server-port       TEXT    Port used to serve the Core server, defaults to '4200'

    \b
        --no-postgres-port          Disable port map of Postgres to host.
                                    Not valid for external Postgres.
        --no-hasura-port            Disable port map of Hasura to host
        --no-graphql-port           Disable port map of the GraphQL API to host
        --no-ui-port                Disable port map of the UI to host
        --no-server-port            Disable port map of the Core server to host

    \b
        --use-volume                Enable the use of a volume for the Postgres service.
                                    Not valid for external Postgres.
        --volume-path       TEXT    A path to use for the Postgres volume, defaults to
                                    '~/.prefect/pg_data' Not valid for external Postgres.

    \b
        --detach, -d                Detached mode. Runs Server containers in the background
        --skip-pull                 Flag to skip pulling new images (if available)
    """
    # set external postgres flag if the user has provided `--postgres-url`
    if postgres_url is not None:
        external_postgres = True

    if external_postgres:
        warn_for_postgres_settings_when_using_external_postgres(
            no_postgres_port=no_postgres_port,
            postgres_port=postgres_port,
            use_volume=use_volume,
            volume_path=volume_path,
        )

    compose_path = setup_compose_file(
        no_ui=no_ui,
        external_postgres=external_postgres,
        no_postgres_port=no_postgres_port,
        no_hasura_port=no_hasura_port,
        no_graphql_port=no_graphql_port,
        no_ui_port=no_ui_port,
        no_server_port=no_server_port,
        use_volume=use_volume,
    )

    compose_dir_path = str(Path(compose_path).parent)

    env = setup_compose_env(
        version=version,
        ui_version=ui_version,
        no_upgrade=no_upgrade,
        external_postgres=external_postgres,
        postgres_url=postgres_url,
        postgres_port=postgres_port,
        hasura_port=hasura_port,
        graphql_port=graphql_port,
        ui_port=ui_port,
        server_port=server_port,
        volume_path=volume_path,
    )

    proc = None
    try:
        if not skip_pull:
            subprocess.check_call(["docker-compose", "pull"],
                                  cwd=compose_dir_path,
                                  env=env)

        cmd = ["docker-compose", "up"]
        if detach:
            cmd.append("--detach")
        proc = subprocess.Popen(cmd, cwd=compose_dir_path, env=env)
        started = False
        with prefect.utilities.configuration.set_temporary_config({
                "cloud.api":
                "http://localhost:4200",
                "cloud.graphql":
                "http://localhost:4200/graphql",
                "backend":
                "server",
        }):
            while not started:
                try:
                    # Get a client with the correct server port
                    client = prefect.Client(
                        api_server=f"{config.server.host}:{server_port}")
                    client.graphql("query{hello}", retry_on_api_error=False)
                    started = True
                    # Create a default tenant if no tenant exists
                    if not client.get_available_tenants():
                        client.create_tenant(name="default")
                    print(ascii_welcome(ui_port=str(ui_port)))
                except Exception:
                    time.sleep(0.5)
                    pass
            if detach:
                return
            while True:
                time.sleep(0.5)
    except BaseException:
        click.secho(
            "Exception caught; killing services (press ctrl-C to force)",
            fg="white",
            bg="red",
        )
        subprocess.check_output(["docker-compose", "down"],
                                cwd=compose_dir_path,
                                env=env)
        if proc:
            proc.kill()
        raise
Esempio n. 21
0
def _get_flow_run_scheduled_start_time(flow_run_id: str) -> Optional[pendulum.DateTime]:
    """
    Queries for the current scheduled start time of a flow

    Flow runs store a `scheduled_start_time` as their originally scheduled time to
    start. 'Scheduled' flow run states also store a `start_time` that supercedes the
    time on the flow run object itself. For example, if a flow run is scheduled for some
    time in the future and a user clicks the 'Start Now' button in the UI, we'll create
    a new 'Scheduled' state with an updated start time. This allows us to preserve
    start time history while making the state the source of truth.

    This function will always return the start time associated with the most recently
    created 'Scheduled' state, if available. If the most recent 'Scheduled' state has a
    null `start_time`, we will fall back to the flow run's `scheduled_start_time`.

    Args:
        - flow_run_id: The flow run of interest

    Returns:
        pendulum.DateTime: The most recent scheduled flow run start time

    Raises:
        - ValueError: On API error
        - ValueError: When zero or more than one flow runs are found

    """
    client = prefect.Client()
    result = client.graphql(
        {
            # We cannot query for states directly and must go through the `flow_run`
            # object
            "query": {
                with_args("flow_run", {"where": {"id": {"_eq": flow_run_id}}}): {
                    with_args("states", {"where": {"state": {"_eq": "Scheduled"}}}): {
                        "start_time",
                        "created",
                    },
                    "scheduled_start_time": True,
                }
            }
        }
    )
    flow_runs = result.get("data", {}).get("flow_run")
    if flow_runs is None:
        raise ValueError(
            f"Unexpected result while querying for flow run states: {result}"
        )
    elif len(flow_runs) > 1:
        raise ValueError(
            f"Found more than one flow run matching id {flow_run_id!r}: {result}"
        )
    elif not flow_runs:
        raise ValueError(f"No flow run exists with id {flow_run_id!r}.")

    # Get the one found flow run
    flow_run = flow_runs[0]

    # Get the most recently created state
    states = sorted(
        flow_run.states, key=lambda state: state.get("created", ""), reverse=True
    )
    state = states[0] if states else None

    # Return the most recently created state start time; default to the flow run
    # scheduled start time in case there are no state times
    start_time = (
        state.start_time
        if state and state.get("start_time")
        else flow_run.scheduled_start_time
    )

    if not start_time:
        return None  # There is no scheduled start time in the states or on the run

    return cast(pendulum.DateTime, pendulum.parse(start_time))
Esempio n. 22
0
def generate_flow_run_environ(
    flow_run_id: str,
    flow_id: str,
    run_config: RunConfig,
    run_api_key: str = None,
    include_local_env: bool = False,
) -> Dict[str, str]:
    """
    Utility to generate the environment variables required for a flow run

    Args:
        - flow_run_id: The id for the flow run that will be executed
        - flow_id: The id for the flow of the flow run that will be executed
        - run_config: The run config for the flow run, contributes environment variables
        - run_api_key: An optional API key to pass to the flow run for authenticating
            with the backend. If not set, it will be pulled from the Client
        - include_local_env: If `True`, the currently available environment variables
            will be passed through to the flow run. Defaults to `False` for security.

    Returns:
        - A dictionary of environment variables
    """
    # TODO: Generalize this and use it for all agents

    # Local environment
    env = cast(Dict[str, Optional[str]], os.environ.copy() if include_local_env else {})

    # Pass through config options that can be overriden by run config
    env.update(
        to_environment_variables(
            prefect.config,
            include={
                "logging.level",
                "logging.format",
                "logging.datefmt",
                "cloud.send_flow_run_logs",
            },
        )
    )

    # Update with run config environment
    if run_config is not None and run_config.env is not None:
        env.update(run_config.env)

    # Update with config options that cannot be overriden by the run config
    env.update(
        to_environment_variables(
            prefect.config,
            include={"backend", "cloud.api", "cloud.tenant_id"},
        )
    )

    # Pass authentication through
    client = prefect.Client()  # Instantiate a client to get the current API key
    env["PREFECT__CLOUD__API_KEY"] = run_api_key or client.api_key or ""
    # Backwards compat for auth tokens
    env["PREFECT__CLOUD__AUTH_TOKEN"] = (
        run_api_key
        or prefect.config.cloud.agent.get("auth_token")
        or prefect.config.cloud.get("auth_token")
    )

    # Add context information for the run
    env.update(
        {
            "PREFECT__CONTEXT__FLOW_RUN_ID": flow_run_id,
            "PREFECT__CONTEXT__FLOW_ID": flow_id,
        }
    )

    # Update hardcoded execution variables
    env.update(
        {
            "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudFlowRunner",
            "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudTaskRunner",
        }
    )

    # Filter out `None` values
    return {k: v for k, v in env.items() if v is not None}
Esempio n. 23
0
def register_internal(
    project: str,
    paths: List[str],
    modules: List[str],
    names: List[str] = None,
    labels: List[str] = None,
    force: bool = False,
    in_watch: bool = False,
) -> None:
    """Do a single registration pass, loading, building, and registering the
    requested flows.

    Args:
        - project (str): the project in which to register the flows.
        - paths (List[str]): a list of file paths containing flows.
        - modules (List[str]): a list of python modules containing flows.
        - names (List[str], optional): a list of flow names that should be
            registered. If not provided, all flows found will be registered.
        - labels (List[str], optional): a list of extra labels to set on all
            flows.
        - force (bool, optional): If false (default), an idempotency key will
            be used to avoid unnecessary register calls.
        - in_watch (bool, optional): Whether this call resulted from a
            `register --watch` call.
    """
    # Load flows from all files/modules requested
    click.echo("Collecting flows...")
    source_to_flows = collect_flows(paths, modules, in_watch)

    # Filter flows by name if requested
    if names:
        names = set(names)
        source_to_flows = {
            source: [f for f in flows if f.name in names]
            for source, flows in source_to_flows.items()
        }
        missing = names.difference(f.name
                                   for flows in source_to_flows.values()
                                   for f in flows)
        if missing:
            missing_flows = "\n".join(f"- {n}" for n in sorted(missing))
            click.secho(
                f"Failed to find the following flows:\n{missing_flows}",
                fg="red")
            if not in_watch:
                raise TerminalError

    # Iterate through each file, building all storage and registering all flows
    # Log errors as they happen, but only exit once all files have been processed
    client = prefect.Client()
    stats = Counter(registered=0, errored=0, skipped=0)
    for source, flows in source_to_flows.items():
        if flows:
            click.echo(f"Processing {source.location!r}:")
            stats += build_and_register(client,
                                        flows,
                                        project,
                                        labels=labels,
                                        force=force)

    # Output summary message
    registered = stats["registered"]
    skipped = stats["skipped"]
    errored = stats["errored"]
    parts = [click.style(f"{registered} registered", fg="green")]
    if skipped:
        parts.append(click.style(f"{skipped} skipped", fg="yellow"))
    if errored:
        parts.append(click.style(f"{errored} errored", fg="red"))

    msg = ", ".join(parts)
    bar_length = max(60 - len(click.unstyle(msg)), 4) // 2
    bar = "=" * bar_length
    click.echo(f"{bar} {msg} {bar}")

    # If not in a watch call, exit with appropriate exit code
    if not in_watch and stats["errored"]:
        raise TerminalError
Esempio n. 24
0
def register_internal(
    project: str,
    paths: List[str],
    modules: List[str],
    json_paths: List[str] = None,
    names: List[str] = None,
    labels: List[str] = None,
    force: bool = False,
    schedule: bool = True,
    in_watch: bool = False,
) -> None:
    """Do a single registration pass, loading, building, and registering the
    requested flows.

    Args:
        - project (str): the project in which to register the flows.
        - paths (List[str]): a list of file paths containing flows.
        - modules (List[str]): a list of python modules containing flows.
        - json_paths (List[str]): a list of file paths containing serialied
            flows produced by `prefect build`.
        - names (List[str], optional): a list of flow names that should be
            registered. If not provided, all flows found will be registered.
        - labels (List[str], optional): a list of extra labels to set on all
            flows.
        - force (bool, optional): If false (default), an idempotency key will
            be used to avoid unnecessary register calls.
        - schedule (bool, optional): If `True` (default) activates the flow schedule
            upon registering.
        - in_watch (bool, optional): Whether this call resulted from a
            `register --watch` call.
    """
    client = prefect.Client()

    # Determine the project id
    project_id = get_project_id(client, project)

    # Load flows from all files/modules requested
    click.echo("Collecting flows...")
    source_to_flows = collect_flows(paths,
                                    modules,
                                    json_paths,
                                    names=names,
                                    in_watch=in_watch)

    # Iterate through each file, building all storage and registering all flows
    # Log errors as they happen, but only exit once all files have been processed
    stats = Counter(registered=0, errored=0, skipped=0)
    for source, flows in source_to_flows.items():
        click.echo(f"Processing {source.location!r}:")
        stats += build_and_register(client,
                                    flows,
                                    project_id,
                                    labels=labels,
                                    force=force,
                                    schedule=schedule)

    # Output summary message
    registered = stats["registered"]
    skipped = stats["skipped"]
    errored = stats["errored"]
    parts = [click.style(f"{registered} registered", fg="green")]
    if skipped:
        parts.append(click.style(f"{skipped} skipped", fg="yellow"))
    if errored:
        parts.append(click.style(f"{errored} errored", fg="red"))

    msg = ", ".join(parts)
    bar_length = max(60 - len(click.unstyle(msg)), 4) // 2
    bar = "=" * bar_length
    click.echo(f"{bar} {msg} {bar}")

    # If not in a watch call, exit with appropriate exit code
    if not in_watch and stats["errored"]:
        raise TerminalError
Esempio n. 25
0
def check_for_compatible_agents(labels: Iterable[str],
                                since_minutes: int = 1) -> str:
    """
    Checks for agents compatible with a set of labels returning a user-friendly message
    indicating the status, roughly one of the following cases:

    - There is a healthy agent with matching labels
    - There are N healthy agents with matching labels
    - There is an unhealthy agent with matching labels but no healthy agents matching
    - There are N unhealthy agents with matching labels but no healthy agents matching
    - There are no healthy agents at all and no unhealthy agents with matching labels
    - There are healthy agents but no healthy or unhealthy agent has matching labels

    Args:
        - labels: A set of labels; typically associated with a flow run
        - since_minutes: The amount of time in minutes to allow an agent to be idle and
            considered active/healthy still

    Returns:
        A message string
    """
    client = prefect.Client()

    labels = set(labels)
    labels_blurb = f"labels {labels!r}" if labels else "empty labels"

    result = client.graphql(
        {"query": {
            "agent": {"last_queried", "labels", "name", "id"}
        }})

    agents = result.get("data", {}).get("agent")
    if agents is None:
        raise ValueError(
            f"Received bad result while querying for agents: {result}")

    # Parse last query times
    for agent in agents:
        agent.last_queried = cast(
            Optional[pendulum.DateTime],
            pendulum.parse(agent.last_queried) if agent.last_queried else None,
        )

    # Drop agents that have not queried
    agents = [agent for agent in agents if agent.last_queried is not None]

    # Drop agents that have not sent a recent hearbeat
    since = pendulum.now().subtract(minutes=since_minutes)
    healthy_agents = [agent for agent in agents if agent.last_queried >= since]

    # Search for the flow run labels in running agents
    matching_healthy = []
    matching_unhealthy = []

    for agent in agents:
        empty_labels_match = not agent.labels and not labels
        if empty_labels_match or (labels and labels.issubset(agent.labels)):
            if agent in healthy_agents:
                matching_healthy.append(agent)
            else:
                matching_unhealthy.append(agent)

    if len(matching_healthy) == 1:
        agent = matching_healthy[0]
        # Display the single matching agent
        name_blurb = f" ({agent.name})" if agent.name else ""
        return (
            f"Agent {agent.id}{name_blurb} has matching labels and last queried "
            f"{agent.last_queried.diff_for_humans()}. It should deploy your flow run."
        )

    if len(matching_healthy) > 1:
        # Display that there are multiple matching agents
        return (
            f"Found {len(matching_healthy)} healthy agents with matching labels. One "
            "of them should pick up your flow.")

    # We now know we have no matching healthy agents...

    if not healthy_agents and not matching_unhealthy:
        # Display that there are no matching agents all-time
        return (
            "There are no healthy agents in your tenant and it does not look like an "
            "agent with the required labels has been run recently. Start an agent with "
            f"{labels_blurb} to run your flow.")

    if len(matching_unhealthy) == 1:
        agent = matching_unhealthy[0]
        # Display that there is a single matching unhealthy agent
        name_blurb = f" ({agent.name})" if agent.name else ""
        return (
            f"Agent {agent.id}{name_blurb} has matching labels and last queried "
            f"{agent.last_queried.diff_for_humans()}. Since it hasn't queried recently, it looks "
            f"unhealthy. Restart it or start a new agent with {labels_blurb} to deploy "
            f"your flow run.")

    if len(matching_unhealthy) > 1:
        # Display that there are multiple matching unhealthy agents
        return (
            f"Found {len(matching_unhealthy)} agents with matching labels but they "
            "have not queried recently and look unhealthy. Restart one of them or "
            f"start a new agent with {labels_blurb} deploy your flow run.")

    # No matching healthy or unhealthy agents
    return (
        f"You have {len(healthy_agents)} healthy agents in your tenant but do not have "
        f"an agent with {labels_blurb}. Start an agent with matching labels to deploy "
        "your flow run.")
Esempio n. 26
0
def register_flow(f: prefect.Flow, project_name: str) -> None:
    """Registers f to "Monitorfich" project.

    Args:
        f (prefect.Flow): Prefect flow
    """
    f.register(project_name)


if __name__ == "__main__":
    # Initialize a client, which can interact with the Prefect orchestrator.
    # The communication with the orchestrator is done through the Prefect GraphQL API.
    # This API is served on localhost:4200.
    print("Create client")
    client = prefect.Client()

    # Create the project "Monitorfish" in the orchestrator if it does not yet exist
    print("Create project")
    create_project_if_not_exists(client, PROJECT_NAME)

    # Register all flows
    print("Register flows")
    for f in flows_to_register:
        print(f"Register flow {f.name}")
        register_flow(f, PROJECT_NAME)

    # Start local "agent" process
    # This process queries the Prefect GraphQL API every second to ask if any new flows
    # should be run
    agent = LocalAgent(show_flow_logs=True)