コード例 #1
0
ファイル: describe.py プロジェクト: sebastianbertoli/prefect
def flows(name, version, project):
    """
    Describe a Prefect flow.

    \b
    Options:
        --name, -n      TEXT    A flow name to query                [required]
        --version, -v   INTEGER A flow version to query
        --project, -p   TEXT    The name of a project to query
    """
    query = {
        "query": {
            with_args(
                "flow",
                {
                    "where": {
                        "_and": {
                            "name": {
                                "_eq": name
                            },
                            "version": {
                                "_eq": version
                            },
                            "project": {
                                "name": {
                                    "_eq": project
                                }
                            },
                        }
                    },
                    "order_by": {
                        "name": EnumValue("asc"),
                        "version": EnumValue("desc"),
                    },
                    "distinct_on": EnumValue("name"),
                },
            ): {
                "name": True,
                "version": True,
                "project": {
                    "name": True
                },
                "created": True,
                "description": True,
                "parameters": True,
                "archived": True,
                "storage": True,
                "environment": True,
            }
        }
    }

    result = Client().graphql(query)

    flow_data = result.data.flow

    if flow_data:
        click.echo(flow_data[0])
    else:
        click.secho("{} not found".format(name), fg="red")
コード例 #2
0
    async def test_traverse_upstream_with_where_clause(self, flow_id):
        task = await models.Task.where(
            {"flow_id": {"_eq": flow_id}, "slug": {"_eq": "t5"}}
        ).first({"id"})
        result = await prefect.plugins.hasura.client.execute(
            {
                "query": {
                    with_args(
                        "utility_upstream_tasks",
                        {
                            "args": {"start_task_ids": LiteralSetValue([task.id])},
                            "order_by": {
                                "depth": EnumValue("asc"),
                                "task": {"slug": EnumValue("asc")},
                            },
                            "where": {
                                "task": {"slug": {"_neq": "t4"}},
                                "depth": {"_neq": 4},
                            },
                        },
                    ): {"task": {"slug"}, "depth": True}
                }
            }
        )

        assert result.data.utility_upstream_tasks == [
            {"task": {"slug": "t5"}, "depth": 0},
            {"task": {"slug": "t9"}, "depth": 1},
            {"task": {"slug": "t3"}, "depth": 2},
            {"task": {"slug": "t8"}, "depth": 2},
            {"task": {"slug": "t7"}, "depth": 3},
            {"task": {"slug": "t1"}, "depth": 5},
        ]
コード例 #3
0
ファイル: flow_run.py プロジェクト: limx0/prefect
    def get_logs(
        self,
        start_time: pendulum.DateTime = None,
        end_time: pendulum.DateTime = None,
    ) -> List["FlowRunLog"]:
        """
        Get logs for this flow run from `start_time` to `end_time`.

        Args:
            - start_time (optional): A time to start the log query at, useful for
                limiting the scope. If not provided, all logs up to `updated_at` are
                retrieved.
            - end_time (optional): A time to end the log query at. By default, this is
                set to `self.updated_at` which is the last time that the flow run was
                updated in the backend before this object was created.

        Returns:
            A list of `FlowRunLog` objects sorted by timestamp
        """

        client = prefect.Client()
        end_time = end_time or self.updated_at

        logs_query = {
            with_args(
                "logs",
                {
                    "order_by": {EnumValue("timestamp"): EnumValue("asc")},
                    "where": {
                        "_and": [
                            {"timestamp": {"_lte": end_time.isoformat()}},
                            (
                                {"timestamp": {"_gt": start_time.isoformat()}}
                                if start_time
                                else {}
                            ),
                        ]
                    },
                },
            ): {"timestamp": True, "message": True, "level": True}
        }

        result = client.graphql(
            {
                "query": {
                    with_args(
                        "flow_run",
                        {
                            "where": {"id": {"_eq": self.flow_run_id}},
                        },
                    ): logs_query
                }
            }
        )

        # Unpack the result
        logs = result.get("data", {}).get("flow_run", [{}])[0].get("logs", [])

        return [FlowRunLog.from_dict(log) for log in logs]
コード例 #4
0
    async def test_traverse_downstream_with_limit(self, flow_id):
        task = await models.Task.where({
            "flow_id": {
                "_eq": flow_id
            },
            "slug": {
                "_eq": "t1"
            }
        }).first({"id"})
        result = await hasura.HasuraClient().execute({
            "query": {
                with_args(
                    "utility_downstream_tasks",
                    {
                        "args": {
                            "start_task_ids": LiteralSetValue([task.id]),
                            "depth_limit": 2,
                        },
                        "order_by": {
                            "depth": EnumValue("asc"),
                            "task": {
                                "slug": EnumValue("asc")
                            },
                        },
                    },
                ): {
                    "task": {"slug"},
                    "depth": True
                }
            }
        })

        assert result.data.utility_downstream_tasks == [
            {
                "task": {
                    "slug": "t1"
                },
                "depth": 0
            },
            {
                "task": {
                    "slug": "t2"
                },
                "depth": 1
            },
            {
                "task": {
                    "slug": "t3"
                },
                "depth": 2
            },
            {
                "task": {
                    "slug": "t7"
                },
                "depth": 2
            },
        ]
コード例 #5
0
def projects(name):
    """
    Query information regarding your Prefect projects.

    \b
    Options:
        --name, -n      TEXT    A project name to query
    """
    query = {
        "query": {
            with_args(
                "project",
                {
                    "where": {
                        "_and": {
                            "name": {
                                "_eq": name
                            }
                        }
                    },
                    "order_by": {
                        "name": EnumValue("asc")
                    },
                },
            ): {
                "name": True,
                "created": True,
                "description": True,
                with_args("flows_aggregate", {
                    "distinct_on": EnumValue("name")
                }): {
                    EnumValue("aggregate"): EnumValue("count")
                },
            }
        }
    }

    result = Client().graphql(query)

    project_data = result.data.project

    output = []
    for item in project_data:
        output.append([
            item.name,
            item.flows_aggregate.aggregate.count,
            pendulum.parse(item.created).diff_for_humans(),
            item.description,
        ])

    click.echo(
        tabulate(
            output,
            headers=["NAME", "FLOW COUNT", "AGE", "DESCRIPTION"],
            tablefmt="plain",
            numalign="left",
            stralign="left",
        ))
コード例 #6
0
def tasks(name, version, project):
    """
    Describe tasks from a Prefect flow. This command is similar to `prefect describe flow`
    but instead of flow metadata it outputs task metadata.

    \b
    Options:
        --name, -n      TEXT    A flow name to query                [required]
        --version, -v   INTEGER A flow version to query
        --project, -p   TEXT    The name of a project to query
    """
    query = {
        "query": {
            with_args(
                "flow",
                {
                    "where": {
                        "_and": {
                            "name": {"_eq": name},
                            "version": {"_eq": version},
                            "project": {"name": {"_eq": project}},
                        }
                    },
                    "order_by": {
                        "name": EnumValue("asc"),
                        "version": EnumValue("desc"),
                    },
                    "distinct_on": EnumValue("name"),
                },
            ): {
                "tasks": {
                    "name": True,
                    "created": True,
                    "slug": True,
                    "description": True,
                    "type": True,
                    "max_retries": True,
                    "retry_delay": True,
                    "mapped": True,
                }
            }
        }
    }

    result = Client().graphql(query)

    flow_data = result.data.flow
    if not flow_data:
        click.secho("{} not found".format(name), fg="red")
        return

    task_data = flow_data[0].tasks

    if task_data:
        for item in task_data:
            click.echo(item)
    else:
        click.secho("No tasks found for flow {}".format(name), fg="red")
コード例 #7
0
ファイル: describe.py プロジェクト: zpencerq/prefect
def flows(name, version, project, output):
    """
    Describe a Prefect flow.

    \b
    Options:
        --name, -n      TEXT    A flow name to query                [required]
        --version, -v   INTEGER A flow version to query
        --project, -p   TEXT    The name of a project to query
        --output, -o    TEXT    Output format, one of {'json', 'yaml'}.
                                Defaults to json.
    """

    where_clause = {
        "_and": {
            "name": {"_eq": name},
            "version": {"_eq": version},
            "project": {"name": {"_eq": project}},
        }
    }
    query_results = {
        "name": True,
        "version": True,
        "project": {"name": True},
        "created": True,
        "description": True,
        "parameters": True,
        "archived": True,
        "storage": True,
        "environment": True,
    }

    query = {
        "query": {
            with_args(
                "flow",
                {
                    "where": where_clause,
                    "order_by": {
                        "name": EnumValue("asc"),
                        "version": EnumValue("desc"),
                    },
                    "distinct_on": EnumValue("name"),
                },
            ): query_results
        }
    }

    result = Client().graphql(query)

    flow_data = result.data.flow
    if flow_data:
        display_output(flow_data[0].to_dict(), output=output)
    else:
        click.secho("{} not found".format(name), fg="red")
コード例 #8
0
ファイル: test_flows.py プロジェクト: mindkhichdi/server-1
    async def test_schedule_runs_gives_preference_to_flow_group_schedule(
            self, flow_id, flow_group_id):
        # give the flow group a schedule for once a year
        await models.FlowGroup.where(id=flow_group_id).update(set=dict(
            schedule=dict(type="Schedule",
                          clocks=[{
                              "type": "CronClock",
                              "cron": "0 0 1 * *"
                          }])))
        # give the flow a schedule for once a minute
        await models.Flow.where(id=flow_id).update(set=dict(
            schedule=dict(type="Schedule",
                          clocks=[{
                              "type": "CronClock",
                              "cron": "* * * * *"
                          }])))
        await models.Flow.where(id=flow_id
                                ).update(set=dict(is_schedule_active=True))

        await models.FlowRun.where({"flow_id": {"_eq": flow_id}}).delete()
        await api.flows.schedule_flow_runs(flow_id)
        # assert the 10 scheduled runs were scheduled months out, not for the next 10 minutes
        flow_runs = await models.FlowRun.where({
            "flow_id": {
                "_eq": flow_id
            }
        }).get(
            selection_set={"scheduled_start_time"},
            order_by={"scheduled_start_time": EnumValue("desc")},
        )
        assert len(flow_runs) == 10
        assert flow_runs[0].scheduled_start_time > pendulum.now("utc").add(
            minutes=15)
コード例 #9
0
    async def test_get_or_create_mapped_children_creates_children(
        self, flow_id, flow_run_id
    ):
        # get a task from the flow
        task = await models.Task.where({"flow_id": {"_eq": flow_id}}).first({"id"})
        task_runs = await models.TaskRun.where({"task_id": {"_eq": task.id}}).get()

        mapped_children = await api.runs.get_or_create_mapped_task_run_children(
            flow_run_id=flow_run_id, task_id=task.id, max_map_index=10
        )
        # confirm 11 children were returned as a result (indices 0, through 10)
        assert len(mapped_children) == 11
        # confirm those 11 children are in the DB
        assert len(task_runs) + 11 == len(
            await models.TaskRun.where({"task_id": {"_eq": task.id}}).get()
        )
        # confirm that those 11 children have api.states and the map indices are ordered
        map_indices = []
        for child in mapped_children:
            task_run = await models.TaskRun.where(id=child).first(
                {
                    "map_index": True,
                    with_args(
                        "states",
                        {"order_by": {"version": EnumValue("desc")}, "limit": 1},
                    ): {"id"},
                }
            )
            map_indices.append(task_run.map_index)
            assert task_run.states[0] is not None
        assert map_indices == sorted(map_indices)
コード例 #10
0
ファイル: client.py プロジェクト: quickpanda/prefect
    def set_flow_run_state(self, flow_run_id: str, version: int,
                           state: "prefect.engine.state.State") -> None:
        """
        Sets new state for a flow run in the database.

        Args:
            - flow_run_id (str): the id of the flow run to set state for
            - version (int): the current version of the flow run state
            - state (State): the new state for this flow run

        Raises:
            - ClientError: if the GraphQL mutation is bad for any reason
        """
        mutation = {
            "mutation($state: JSON!)": {
                with_args(
                    "setFlowRunState",
                    {
                        "input": {
                            "flowRunId": flow_run_id,
                            "version": version,
                            "state": EnumValue("$state"),
                        }
                    },
                ): {"id"}
            }
        }

        serialized_state = state.serialize()

        self.graphql(mutation, state=serialized_state)  # type: Any
コード例 #11
0
ファイル: test_flows.py プロジェクト: lixiangbins/server
    async def test_schedule_creates_parametrized_flow_runs(self, project_id):
        clock1 = prefect.schedules.clocks.IntervalClock(
            start_date=pendulum.now("UTC").add(minutes=1),
            interval=datetime.timedelta(minutes=2),
            parameter_defaults=dict(x="a"),
        )
        clock2 = prefect.schedules.clocks.IntervalClock(
            start_date=pendulum.now("UTC"),
            interval=datetime.timedelta(minutes=2),
            parameter_defaults=dict(x="b"),
        )

        flow = prefect.Flow(
            name="Test Scheduled Flow",
            schedule=prefect.schedules.Schedule(clocks=[clock1, clock2]),
        )
        flow.add_task(prefect.Parameter("x", default=1))
        flow_id = await api.flows.create_flow(
            project_id=project_id, serialized_flow=flow.serialize()
        )
        await models.FlowRun.where({"flow_id": {"_eq": flow_id}}).delete()
        assert len(await api.flows.schedule_flow_runs(flow_id)) == 10

        flow_runs = await models.FlowRun.where({"flow_id": {"_eq": flow_id}}).get(
            selection_set={"parameters": True, "scheduled_start_time": True},
            order_by={"scheduled_start_time": EnumValue("asc")},
        )

        assert all([fr.parameters == dict(x="a") for fr in flow_runs[::2]])
        assert all([fr.parameters == dict(x="b") for fr in flow_runs[1::2]])
コード例 #12
0
    def get_latest_cached_states(
        self, task_id: str, cache_key: Optional[str], created_after: datetime.datetime
    ) -> List["prefect.engine.state.State"]:
        """
        Pulls all Cached states for the given task that were created after the provided date.

        Args:
            - task_id (str): the task id for this task run
            - cache_key (Optional[str]): the cache key for this Task's cache; if `None`, the task id alone will be used
            - created_after (datetime.datetime): the earliest date the state should have been created at

        Returns:
            - List[State]: a list of Cached states created after the given date
        """
        where_clause = {
            "where": {
                "state": {"_eq": "Cached"},
                "_or": [
                    {"cache_key": {"_eq": cache_key}},
                    {"task_id": {"_eq": task_id}},
                ],
                "state_timestamp": {"_gte": created_after.isoformat()},
            },
            "order_by": {"state_timestamp": EnumValue("desc")},
        }
        query = {"query": {with_args("task_run", where_clause): "serialized_state"}}
        result = self.graphql(query)  # type: Any
        deserializer = prefect.engine.state.State.deserialize
        valid_states = [
            deserializer(res.serialized_state) for res in result.data.task_run
        ]
        return valid_states
コード例 #13
0
async def resolve_create_flow(obj: Any, info: GraphQLResolveInfo, input: dict) -> dict:
    serialized_flow = input["serialized_flow"]
    project_id = input["project_id"]
    version_group_id = input.get("version_group_id", None)
    set_schedule_active = input.get("set_schedule_active", True)
    description = input.get("description", None)
    idempotency_key = input.get("idempotency_key", None)

    if project_id is None:
        raise ValueError("Invalid project ID")

    # if no version_group_id is supplied, see if a flow with the same name exists in this
    # project
    new_version_group = True
    if not version_group_id:
        flow = await models.Flow.where(
            {
                "project_id": {"_eq": project_id},
                "name": {"_eq": serialized_flow.get("name")},
            }
        ).first(
            order_by={"created": EnumValue("desc")}, selection_set={"version_group_id"}
        )
        if flow:
            version_group_id = flow.version_group_id  # type:ignore
            new_version_group = False
    # otherwise look the flow up directly using the version group ID
    else:
        flow = await models.Flow.where(
            {"version_group_id": {"_eq": version_group_id}}
        ).first(selection_set={"version_group_id"})
        if flow:
            new_version_group = False

    flow_id = await api.flows.create_flow(
        project_id=project_id,
        serialized_flow=serialized_flow,
        version_group_id=version_group_id,
        set_schedule_active=set_schedule_active,
        description=description,
        idempotency_key=idempotency_key,
    )

    # archive all other versions
    if version_group_id:
        all_other_unarchived_versions = await models.Flow.where(
            {
                "version_group_id": {"_eq": version_group_id},
                "id": {"_neq": flow_id},
                "archived": {"_eq": False},
            }
        ).get(
            {"id"}
        )  # type: Any

        for version in all_other_unarchived_versions:
            await api.flows.archive_flow(version.id)  # type: ignore

    return {"id": flow_id}
コード例 #14
0
    async def schedule_flows(self, n_flows=100) -> int:
        """

        Args:
            - n_flows (int): the maximum number of flows to schedule

        Returns:
            - int: The number of scheduled runs
        """

        now = pendulum.now("utc")

        # load 100 rows from the schedules table
        schedules = await models.Schedule.where(
            {
                # schedule is active
                "active": {"_eq": True},
                # ensure the flow is not archived
                "flow": {"archived": {"_eq": False}},
                # schedule has already started, or will start within the next day
                "_and": [
                    {
                        "_or": [
                            {"schedule_start": {"_lte": str(now.add(days=1))}},
                            {"schedule_start": {"_is_null": True}},
                        ]
                    },
                    # schedule has not yet ended
                    {
                        "_or": [
                            {"schedule_end": {"_gte": str(now)}},
                            {"schedule_end": {"_is_null": True}},
                        ]
                    },
                ],
            }
        ).get(
            selection_set={"id", "flow_id", "last_checked"},
            order_by=[{"last_checked": EnumValue("asc_nulls_first")}],
            limit=n_flows,
        )

        runs_scheduled = 0

        # concurrently schedule all runs
        all_run_ids = await asyncio.gather(
            *[
                api.schedules.schedule_flow_runs(
                    schedule.id, seconds_since_last_checked=60
                )
                for schedule in schedules
            ]
        )

        new_runs = sum(len(ids) for ids in all_run_ids)
        self.logger.info(f"Scheduled {new_runs} flow runs.")
        runs_scheduled += new_runs

        return runs_scheduled
コード例 #15
0
ファイル: zombie_killer.py プロジェクト: zhangguiyu/server
    async def reap_zombie_cancelling_flow_runs(
            self, heartbeat_cutoff: datetime.datetime = None) -> int:
        """
        Marks flow runs that are in a `Cancelling` state but fail to move to a
        `Cancelled` state as `Failed`.

        Returns:
            - int: the number of flow runs that were handled
        """
        zombies = 0
        heartbeat_cutoff = heartbeat_cutoff or pendulum.now("utc").subtract(
            minutes=10)

        where_clause = await self.get_flow_runs_where_clause(
            heartbeat_cutoff=heartbeat_cutoff)
        flow_runs = await models.FlowRun.where(where_clause).get(
            selection_set={"id", "tenant_id"},
            limit=5000,
            order_by={"updated": EnumValue("desc")},
        )

        if flow_runs:
            self.logger.info(
                f"Zombie killer found {len(flow_runs)} flow runs.")

        # Set flow run states to failed
        for fr in flow_runs:
            try:
                message = "No heartbeat detected from the flow run; marking the run as failed."
                await prefect.api.states.set_flow_run_state(
                    flow_run_id=fr.id,
                    state=Failed(message=message),
                )

                # log the state change to the flow run
                await prefect.api.logs.create_logs(
                    [
                        dict(
                            tenant_id=fr.tenant_id,
                            flow_run_id=fr.id,
                            name=f"{self.logger.name}.FlowRun",
                            message=message,
                            level="ERROR",
                        )
                    ],
                    defer_db_write=False,
                )

                zombies += 1

            except ValueError:
                self.logger.error("Error updating flow run %s",
                                  fr.id,
                                  exc_info=True)

        if zombies:
            self.logger.info(f"Addressed {zombies} zombie flow runs.")

        return zombies
コード例 #16
0
def test_tenant_view_query_for_tenants_uses_order_by_in_query(monkeypatch):
    post = MagicMock(return_value={"data": {"tenant": [TENANT_DATA_1]}})
    monkeypatch.setattr("prefect.client.client.Client.post", post)

    TenantView._query_for_tenants(where={}, order_by={"foo": EnumValue("asc")})

    assert ("tenant(where: {}, order_by: { foo: asc })"
            in post.call_args[1]["params"]["query"])
コード例 #17
0
async def query_upstream(*ids,):
    return await prefect.plugins.hasura.client.execute(
        {
            "query": {
                with_args(
                    "utility_upstream_tasks",
                    {
                        "args": {"start_task_ids": LiteralSetValue([*ids])},
                        "order_by": {
                            "depth": EnumValue("asc"),
                            "task": {"slug": EnumValue("asc")},
                        },
                    },
                ): {"task": {"slug"}, "depth": True}
            }
        }
    )
コード例 #18
0
ファイル: test_runs.py プロジェクト: harupy/server
    async def test_get_or_create_mapped_children_handles_partial_children(
            self, flow_id, flow_run_id):
        # get a task from the flow
        task = await models.Task.where({
            "flow_id": {
                "_eq": flow_id
            }
        }).first({"id", "cache_key"})

        # create a few mapped children
        await models.TaskRun(
            flow_run_id=flow_run_id,
            task_id=task.id,
            map_index=3,
            cache_key=task.cache_key,
        ).insert()
        stateful_child = await models.TaskRun(
            flow_run_id=flow_run_id,
            task_id=task.id,
            map_index=6,
            cache_key=task.cache_key,
            states=[
                models.TaskRunState(
                    **models.TaskRunState.fields_from_state(
                        Pending(message="Task run created")), )
            ],
        ).insert()

        # retrieve mapped children
        mapped_children = await api.runs.get_or_create_mapped_task_run_children(
            flow_run_id=flow_run_id, task_id=task.id, max_map_index=10)
        assert len(mapped_children) == 11
        map_indices = []
        # confirm each of the mapped children has a state and is ordered properly
        for child in mapped_children:
            task_run = await models.TaskRun.where(id=child).first({
                "map_index": True,
                with_args(
                    "states",
                    {
                        "order_by": {
                            "version": EnumValue("desc")
                        },
                        "limit": 1
                    },
                ): {"id"},
            })
            map_indices.append(task_run.map_index)
            assert task_run.states[0] is not None
        assert map_indices == sorted(map_indices)

        # confirm the one child created with a state only has the one state
        child_states = await models.TaskRunState.where({
            "task_run_id": {
                "_eq": stateful_child
            }
        }).get()
        assert len(child_states) == 1
コード例 #19
0
ファイル: runs.py プロジェクト: dhfromkorea/server
async def create_flow_run(
    flow_id: str = None,
    parameters: dict = None,
    context: dict = None,
    scheduled_start_time: datetime.datetime = None,
    flow_run_name: str = None,
    version_group_id: str = None,
    idempotency_key: str = None,
) -> Any:
    """
    Creates a new flow run for an existing flow.

    Args:
        - flow_id (str): A string representing the current flow id
        - parameters (dict, optional): A dictionary of parameters that were specified for the flow
        - context (dict, optional): A dictionary of context values
        - scheduled_start_time (datetime.datetime): When the flow_run should be scheduled to run. If `None`,
            defaults to right now. Must be UTC.
        - flow_run_name (str, optional): An optional string representing this flow run
        - version_group_id (str, optional): An optional version group ID; if provided, will run the most
            recent unarchived version of the group
        - idempotency_key (str, optional): An optional idempotency key to prevent duplicate run creation.
            Idempotency keys are only respected for 24 hours after a flow is created.
    """

    if idempotency_key is not None:

        where = {
            "idempotency_key": {"_eq": idempotency_key},
            "created": {"_gt": str(pendulum.now().subtract(days=1))},
        }
        if flow_id is not None:
            where.update({"flow_id": {"_eq": flow_id}})
        if version_group_id is not None:
            where.update({"flow": {"version_group_id": {"_eq": version_group_id}}})
        run = await models.FlowRun.where(where).first(
            {"id"}, order_by={"created": EnumValue("desc")}
        )
        if run is not None:
            return run.id

    flow_run_id = await _create_flow_run(
        flow_id=flow_id,
        parameters=parameters,
        context=context,
        scheduled_start_time=scheduled_start_time,
        flow_run_name=flow_run_name,
        version_group_id=version_group_id,
    )

    if idempotency_key is not None:
        await models.FlowRun.where(id=flow_run_id).update(
            {"idempotency_key": idempotency_key}
        )

    return flow_run_id
コード例 #20
0
ファイル: scheduler.py プロジェクト: jacoblonghurst/server
    async def run_once(self) -> int:
        """
        Returns:
            - int: The number of scheduled runs
        """

        runs_scheduled = 0
        iterations = 0

        # visit all flows in batches of 500
        while True:

            flows = await models.Flow.where({
                # schedule is active
                "is_schedule_active": {
                    "_eq": True
                },
                # flow is not archived
                "archived": {
                    "_eq": False
                },
            }).get(
                selection_set={
                    "id",
                },
                # deterministic sort for batching
                order_by=[{
                    "id": EnumValue("desc")
                }],
                limit=500,
                offset=500 * iterations,
            )

            if not flows:
                break

            iterations += 1

            # concurrently schedule all runs
            all_run_ids = await asyncio.gather(
                *[
                    api.flows.schedule_flow_runs(
                        flow.id,
                        max_runs=config.services.towel.
                        max_scheduled_runs_per_flow,
                    ) for flow in flows
                ],
                return_exceptions=True,
            )
            runs_scheduled += sum(
                len(ids) for ids in all_run_ids
                # only include lists to avoid errors
                if isinstance(ids, list))

        self.logger.info(f"Scheduled {runs_scheduled} flow runs.")
        return runs_scheduled
コード例 #21
0
ファイル: test_runs.py プロジェクト: kmoonwright/server
 async def test_new_run_state_is_in_history(self, simple_flow_id):
     dt = pendulum.datetime(2020, 1, 1)
     flow_run_id = await api.runs.create_flow_run(flow_id=simple_flow_id,
                                                  scheduled_start_time=dt)
     frs = await models.FlowRunState.where({
         "flow_run_id": {
             "_eq": flow_run_id
         }
     }).get({"state", "start_time", "message"},
            order_by={"timestamp": EnumValue("asc")})
     assert len(frs) == 2
     assert frs[1].state == "Scheduled"
     assert frs[1].start_time == dt
     assert frs[1].message == "Flow run scheduled."
コード例 #22
0
ファイル: flow.py プロジェクト: omarbelkady/prefect
    def from_flow_name(cls,
                       flow_name: str,
                       project_name: str = "",
                       last_updated: bool = False) -> "FlowView":
        """
        Get an instance of this class given a flow name. Optionally, a project name can
        be included since flow names are not guaranteed to be unique across projects.

        Args:
            - flow_name: The name of the flow to lookup
            - project_name: The name of the project to lookup. If `None`, flows with an
                explicitly null project will be searched. If `""` (default), the
                lookup will be across all projects.
            - last_updated: By default, if multiple flows are found an error will be
                thrown. If `True`, the most recently updated flow will be returned
                instead.

        Returns:
            A new instance of FlowView
        """
        where: Dict[str, Any] = {
            "name": {
                "_eq": flow_name
            },
            "archived": {
                "_eq": False
            }
        }
        if project_name != "":
            where["project"] = {
                "name": ({
                    "_eq": project_name
                } if project_name else {
                    "_is_null": True
                })
            }

        flows = cls._query_for_flows(
            where=where,
            order_by={"created": EnumValue("desc")},
        )
        if len(flows) > 1 and not last_updated:
            raise ValueError(
                f"Found multiple flows matching {where}. "
                "Provide a `project_name` as well or toggle `last_updated` "
                "to use the flow that was most recently updated")

        flow = flows[0]
        return cls._from_flow_data(flow)
コード例 #23
0
    async def run_once(self) -> int:
        """
        Returns:
            - int: The number of scheduled runs
        """

        runs_scheduled = 0
        iterations = 0

        # visit all flows in batches of 500
        while True:

            flows = await models.Flow.where({
                # schedule is active
                "is_schedule_active": {
                    "_eq": True
                },
                # flow is not archived
                "archived": {
                    "_eq": False
                },
            }).get(
                selection_set={
                    "id",
                },
                order_by=[{
                    "flow_runs_aggregate": {
                        "max": {
                            "scheduled_start_time": EnumValue("asc_nulls_last")
                        }
                    }
                }],
                limit=500,
                offset=500 * iterations,
            )

            if not flows:
                break

            iterations += 1

            # concurrently schedule all runs
            all_run_ids = await asyncio.gather(
                *[api.flows.schedule_flow_runs(flow.id) for flow in flows])
            runs_scheduled += sum(len(ids) for ids in all_run_ids)

        self.logger.info(f"Scheduled {runs_scheduled} flow runs.")
        return runs_scheduled
コード例 #24
0
ファイル: client.py プロジェクト: shasm13/prefect
    def set_task_run_state(
        self,
        task_run_id: str,
        version: int,
        state: "prefect.engine.state.State",
        cache_for: datetime.timedelta = None,
    ) -> None:
        """
        Sets new state for a task run.

        Args:
            - task_run_id (str): the id of the task run to set state for
            - version (int): the current version of the task run state
            - state (State): the new state for this task run
            - cache_for (timedelta, optional): how long to store the result of this task for, using the
                serializer set in config; if not provided, no caching occurs

        Raises:
            - ClientError: if the GraphQL mutation is bad for any reason
        """
        mutation = {
            "mutation($state: JSON!)": {
                with_args(
                    "setTaskRunState",
                    {
                        "input": {
                            "taskRunId": task_run_id,
                            "version": version,
                            "state": EnumValue("$state"),
                        }
                    },
                ): {"id"}
            }
        }

        serialized_state = state.serialize()

        self.graphql(mutation,
                     variables=dict(state=serialized_state))  # type: Any
コード例 #25
0
ファイル: flow.py プロジェクト: omarbelkady/prefect
    def from_flow_group_id(cls, flow_group_id: str) -> "FlowView":
        """
        Get an instance of this class given a `flow_group_id` to lookup; the newest
        flow in the flow group will be retrieved

        Args:
            - flow_group_id: The uuid of the flow group

        Returns:
            A new instance of FlowView
        """
        if not isinstance(flow_group_id, str):
            raise TypeError(
                f"Unexpected type {type(flow_group_id)!r} for `flow_group_id`, "
                f"expected 'str'.")

        return cls._from_flow_data(
            # Get the most recently created flow in the group
            cls._query_for_flows(
                where={"flow_group_id": {
                    "_eq": flow_group_id
                }},
                order_by={"created": EnumValue("desc")},
            )[0])
コード例 #26
0
def cloud(name, project, version, watch, logs):
    """
    Run a deployed flow in Prefect Cloud.

    \b
    Options:
        --name, -n      TEXT    The name of a flow to run                                       [required]
        --project, -p   TEXT    The name of a project that contains the flow                    [required]
        --version, -v   INTEGER A flow version to run
        --watch, -w             Watch current state of the flow run, stream output to stdout
        --logs, -l              Get logs of the flow run, stream output to stdout
    """

    if watch and logs:
        click.secho(
            "Streaming state and logs not currently supported together.",
            fg="red")
        return

    query = {
        "query": {
            with_args(
                "flow",
                {
                    "where": {
                        "_and": {
                            "name": {
                                "_eq": name
                            },
                            "version": {
                                "_eq": version
                            },
                            "project": {
                                "name": {
                                    "_eq": project
                                }
                            },
                        }
                    },
                    "order_by": {
                        "name": EnumValue("asc"),
                        "version": EnumValue("desc"),
                    },
                    "distinct_on": EnumValue("name"),
                },
            ): {
                "id": True
            }
        }
    }

    client = Client()
    result = client.graphql(query)

    flow_data = result.data.flow

    if flow_data:
        flow_id = flow_data[0].id
    else:
        click.secho("{} not found".format(name), fg="red")
        return

    flow_run_id = client.create_flow_run(flow_id=flow_id)
    click.echo("Flow Run ID: {}".format(flow_run_id))

    if watch:
        current_states = []
        while True:
            query = {
                "query": {
                    with_args("flow_run_by_pk", {"id": flow_run_id}): {
                        with_args(
                            "states",
                            {
                                "order_by": {
                                    EnumValue("timestamp"): EnumValue("asc")
                                }
                            },
                        ): {
                            "state": True,
                            "timestamp": True
                        }
                    }
                }
            }

            result = client.graphql(query)

            # Filter through retrieved states and output in order
            for state_index in result.data.flow_run_by_pk.states:
                state = state_index.state
                if state not in current_states:
                    if state != "Success" and state != "Failed":
                        click.echo("{} -> ".format(state), nl=False)
                    else:
                        click.echo(state)
                        return

                    current_states.append(state)

            time.sleep(3)

    if logs:
        all_logs = []

        log_query = {
            with_args("logs", {
                "order_by": {
                    EnumValue("timestamp"): EnumValue("asc")
                }
            }): {
                "timestamp": True,
                "message": True,
                "level": True
            },
            "start_time": True,
        }

        query = {
            "query": {
                with_args(
                    "flow_run",
                    {
                        "where": {
                            "id": {
                                "_eq": flow_run_id
                            }
                        },
                        "order_by": {
                            EnumValue("start_time"): EnumValue("desc")
                        },
                    },
                ):
                log_query
            }
        }

        while True:
            result = Client().graphql(query)

            flow_run = result.data.flow_run
            if not flow_run:
                click.secho("{} not found".format(flow_run_id), fg="red")
                return

            new_run = flow_run[0]
            logs = new_run.logs
            output = []

            for i in logs:
                if [i.timestamp, i.level, i.message] not in all_logs:

                    if not len(all_logs):
                        click.echo(
                            tabulate(
                                [[i.timestamp, i.level, i.message]],
                                headers=["TIMESTAMP", "LEVEL", "MESSAGE"],
                                tablefmt="plain",
                                numalign="left",
                                stralign="left",
                            ))
                        all_logs.append([i.timestamp, i.level, i.message])
                        continue

                    output.append([i.timestamp, i.level, i.message])
                    all_logs.append([i.timestamp, i.level, i.message])

            if output:
                click.echo(
                    tabulate(output,
                             tablefmt="plain",
                             numalign="left",
                             stralign="left"))

            # Check if state is either Success or Failed, exit if it is
            pk_query = {
                "query": {
                    with_args("flow_run_by_pk", {"id": flow_run_id}): {
                        "state": True
                    }
                }
            }
            result = client.graphql(pk_query)

            if (result.data.flow_run_by_pk.state == "Success"
                    or result.data.flow_run_by_pk.state == "Failed"):
                return

            time.sleep(3)
コード例 #27
0
def flows(name, version, project, limit, all_versions):
    """
    Query information regarding your Prefect flows.

    \b
    Options:
        --name, -n      TEXT    A flow name to query
        --version, -v   TEXT    A flow version to query
        --project, -p   TEXT    The name of a project to query
        --limit, -l     INTEGER A limit amount of flows to query, defaults to 10
        --all-versions          Output all versions of a flow, default shows most recent
    """

    distinct_on = EnumValue("name")
    if all_versions:
        distinct_on = None

    query = {
        "query": {
            with_args(
                "flow",
                {
                    "where": {
                        "_and": {
                            "name": {
                                "_eq": name
                            },
                            "version": {
                                "_eq": version
                            },
                            "project": {
                                "name": {
                                    "_eq": project
                                }
                            },
                        }
                    },
                    "order_by": {
                        "name": EnumValue("asc"),
                        "version": EnumValue("desc"),
                    },
                    "distinct_on": distinct_on,
                    "limit": limit,
                },
            ): {
                "name": True,
                "version": True,
                "project": {
                    "name": True
                },
                "created": True,
            }
        }
    }

    result = Client().graphql(query)

    flow_data = result.data.flow

    output = []
    for item in flow_data:
        output.append([
            item.name,
            item.version,
            item.project.name,
            pendulum.parse(item.created).diff_for_humans(),
        ])

    click.echo(
        tabulate(
            output,
            headers=["NAME", "VERSION", "PROJECT NAME", "AGE"],
            tablefmt="plain",
            numalign="left",
            stralign="left",
        ))
コード例 #28
0
def logs(name, info):
    """
    Query logs for a flow run.

    \b
    Options:
        --name, -n      TEXT    A flow run name to query        [required]
        --info, -i              Retrieve detailed logging info
    """
    log_query = {
        with_args("logs", {
            "order_by": {
                EnumValue("timestamp"): EnumValue("asc")
            }
        }): {
            "timestamp": True,
            "message": True,
            "level": True,
        },
        "start_time": True,
    }
    if info:
        log_query = {
            with_args("logs", {
                "order_by": {
                    EnumValue("timestamp"): EnumValue("asc")
                }
            }): {
                "timestamp": True,
                "info": True
            },
            "start_time": True,
        }

    query = {
        "query": {
            with_args(
                "flow_run",
                {
                    "where": {
                        "name": {
                            "_eq": name
                        }
                    },
                    "order_by": {
                        EnumValue("start_time"): EnumValue("desc")
                    },
                },
            ):
            log_query
        }
    }

    result = Client().graphql(query)

    flow_run = result.data.flow_run
    if not flow_run:
        click.secho("{} not found".format(name), fg="red")
        return

    run = flow_run[0]
    logs = run.logs
    output = []

    if not info:
        for log in logs:
            output.append([log.timestamp, log.level, log.message])

        click.echo(
            tabulate(
                output,
                headers=["TIMESTAMP", "LEVEL", "MESSAGE"],
                tablefmt="plain",
                numalign="left",
                stralign="left",
            ))
        return

    for log in logs:
        click.echo(log.info)
コード例 #29
0
def tasks(name, flow_name, flow_version, project, limit):
    """
    Query information regarding your Prefect tasks.

    \b
    Options:
        --name, -n          TEXT    A task name to query
        --flow-name, -fn    TEXT    A flow name to query
        --flow-version, -fx INTEGER A flow version to query
        --project, -p       TEXT    The name of a project to query
        --limit, -l         INTEGER A limit amount of tasks to query, defaults to 10
    """

    query = {
        "query": {
            with_args(
                "task",
                {
                    "where": {
                        "_and": {
                            "name": {
                                "_eq": name
                            },
                            "flow": {
                                "name": {
                                    "_eq": flow_name
                                },
                                "project": {
                                    "name": {
                                        "_eq": project
                                    }
                                },
                                "version": {
                                    "_eq": flow_version
                                },
                            },
                        }
                    },
                    "limit": limit,
                    "order_by": {
                        "created": EnumValue("desc")
                    },
                },
            ): {
                "name": True,
                "created": True,
                "flow": {
                    "name": True,
                    "version": True
                },
                "mapped": True,
                "type": True,
            }
        }
    }

    result = Client().graphql(query)

    task_data = result.data.task

    output = []
    for item in task_data:
        output.append([
            item.name,
            item.flow.name,
            item.flow.version,
            pendulum.parse(item.created).diff_for_humans(),
            item.mapped,
            item.type,
        ])

    click.echo(
        tabulate(
            output,
            headers=[
                "NAME", "FLOW NAME", "FLOW VERSION", "AGE", "MAPPED", "TYPE"
            ],
            tablefmt="plain",
            numalign="left",
            stralign="left",
        ))
コード例 #30
0
def flow_runs(limit, flow, project, started):
    """
    Query information regarding Prefect flow runs.

    \b
    Options:
        --limit, l          INTEGER A limit amount of flow runs to query, defaults to 10
        --flow, -f          TEXT    Name of a flow to query for runs
        --project, -p       TEXT    Name of a project to query
        --started, -s               Only retrieve started flow runs, default shows `Scheduled` runs
    """

    if started:
        order = {"start_time": EnumValue("desc")}

        where = {
            "_and": {
                "flow": {
                    "_and": {
                        "name": {
                            "_eq": flow
                        },
                        "project": {
                            "name": {
                                "_eq": project
                            }
                        },
                    }
                },
                "start_time": {
                    "_is_null": False
                },
            }
        }
    else:
        order = {"created": EnumValue("desc")}

        where = {
            "flow": {
                "_and": {
                    "name": {
                        "_eq": flow
                    },
                    "project": {
                        "name": {
                            "_eq": project
                        }
                    }
                }
            }
        }

    query = {
        "query": {
            with_args("flow_run", {
                "where": where,
                "limit": limit,
                "order_by": order
            }): {
                "flow": {
                    "name": True
                },
                "created": True,
                "state": True,
                "name": True,
                "duration": True,
                "start_time": True,
            }
        }
    }

    result = Client().graphql(query)

    flow_run_data = result.data.flow_run

    output = []
    for item in flow_run_data:
        start_time = (pendulum.parse(item.start_time).to_datetime_string()
                      if item.start_time else None)
        output.append([
            item.name,
            item.flow.name,
            item.state,
            pendulum.parse(item.created).diff_for_humans(),
            start_time,
            item.duration,
        ])

    click.echo(
        tabulate(
            output,
            headers=[
                "NAME", "FLOW NAME", "STATE", "AGE", "START TIME", "DURATION"
            ],
            tablefmt="plain",
            numalign="left",
            stralign="left",
        ))