async def test_create_compressed_flow(self, run_query, project_id): serialized_flow = compress( prefect.Flow(name="test").serialize(build=False)) result = await run_query( query=self.create_compressed_flow_mutation, variables=dict(input=dict(serialized_flow=serialized_flow, project_id=project_id)), ) flow = await models.Flow.where( id=result.data.create_flow_from_compressed_string.id ).first({"project_id", "name"}) assert flow.project_id == project_id assert flow.name == "test"
def deploy( self, flow: "Flow", project_name: str, build: bool = True, set_schedule_active: bool = True, compressed: bool = True, ) -> str: """ Push a new flow to Prefect Cloud Args: - flow (Flow): a flow to deploy - project_name (str): the project that should contain this flow. - build (bool, optional): if `True`, the flow's environment is built prior to serialization; defaults to `True` - set_schedule_active (bool, optional): if `False`, will set the schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule). Defaults to `True`. This can be changed later. - compressed (bool, optional): if `True`, the serialized flow will be; defaults to `True` compressed Returns: - str: the ID of the newly-deployed flow Raises: - ClientError: if the deploy failed """ required_parameters = {p for p in flow.parameters() if p.required} if flow.schedule is not None and required_parameters: raise ClientError( "Flows with required parameters can not be scheduled automatically." ) if compressed: create_mutation = { "mutation($input: createFlowFromCompressedStringInput!)": { "createFlowFromCompressedString(input: $input)": {"id"} } } else: create_mutation = { "mutation($input: createFlowInput!)": { "createFlow(input: $input)": {"id"} } } query_project = { "query": { with_args("project", { "where": { "name": { "_eq": project_name } } }): { "id": True } } } project = self.graphql(query_project).data.project # type: ignore if not project: raise ValueError( 'Project {} not found. Run `client.create_project("{}")` to create it.' .format(project_name, project_name)) serialized_flow = flow.serialize(build=build) # type: Any # verify that the serialized flow can be deserialized try: prefect.serialization.flow.FlowSchema().load(serialized_flow) except Exception as exc: raise ValueError( "Flow could not be deserialized successfully. Error was: {}". format(repr(exc))) if compressed: serialized_flow = compress(serialized_flow) res = self.graphql( create_mutation, variables=dict(input=dict( projectId=project[0].id, serializedFlow=serialized_flow, setScheduleActive=set_schedule_active, )), ) # type: Any flow_id = (res.data.createFlowFromCompressedString.id if compressed else res.data.createFlow.id) return flow_id
def register( self, flow: "Flow", project_name: str, build: bool = True, set_schedule_active: bool = True, version_group_id: str = None, compressed: bool = True, no_url: bool = False, ) -> str: """ Push a new flow to Prefect Cloud Args: - flow (Flow): a flow to register - project_name (str): the project that should contain this flow. - build (bool, optional): if `True`, the flow's environment is built prior to serialization; defaults to `True` - set_schedule_active (bool, optional): if `False`, will set the schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule). Defaults to `True`. This can be changed later. - version_group_id (str, optional): the UUID version group ID to use for versioning this Flow in Cloud; if not provided, the version group ID associated with this Flow's project and name will be used. - compressed (bool, optional): if `True`, the serialized flow will be; defaults to `True` compressed - no_url (bool, optional): if `True`, the stdout from this function will not contain the URL link to the newly-registered flow in the Cloud UI Returns: - str: the ID of the newly-registered flow Raises: - ClientError: if the register failed """ required_parameters = {p for p in flow.parameters() if p.required} if flow.schedule is not None and required_parameters: required_names = {p.name for p in required_parameters} if not all( [ required_names == set(c.parameter_defaults.keys()) for c in flow.schedule.clocks ] ): raise ClientError( "Flows with required parameters can not be scheduled automatically." ) if any(e.key for e in flow.edges) and flow.result_handler is None: warnings.warn( "No result handler was specified on your Flow. Cloud features such as input caching and resuming task runs from failure may not work properly.", UserWarning, ) if compressed: create_mutation = { "mutation($input: createFlowFromCompressedStringInput!)": { "createFlowFromCompressedString(input: $input)": {"id"} } } else: create_mutation = { "mutation($input: createFlowInput!)": { "createFlow(input: $input)": {"id"} } } query_project = { "query": { with_args("project", {"where": {"name": {"_eq": project_name}}}): { "id": True } } } project = self.graphql(query_project).data.project # type: ignore if not project: raise ValueError( 'Project {} not found. Run `client.create_project("{}")` to create it.'.format( project_name, project_name ) ) serialized_flow = flow.serialize(build=build) # type: Any # verify that the serialized flow can be deserialized try: prefect.serialization.flow.FlowSchema().load(serialized_flow) except Exception as exc: raise ValueError( "Flow could not be deserialized successfully. Error was: {}".format( repr(exc) ) ) if compressed: serialized_flow = compress(serialized_flow) res = self.graphql( create_mutation, variables=dict( input=dict( projectId=project[0].id, serializedFlow=serialized_flow, setScheduleActive=set_schedule_active, versionGroupId=version_group_id, ) ), ) # type: Any flow_id = ( res.data.createFlowFromCompressedString.id if compressed else res.data.createFlow.id ) if not no_url: # Generate direct link to Cloud flow flow_url = self.get_cloud_url("flow", flow_id) print("Flow: {}".format(flow_url)) return flow_id
def register_serialized_flow( client: "prefect.Client", serialized_flow: dict, project_id: str, force: bool = False, schedule: bool = True, ) -> Tuple[str, int, bool]: """Register a pre-serialized flow. Args: - client (prefect.Client): the prefect client - serialized_flow (dict): the serialized flow - project_id (str): the project id - force (bool, optional): If `False` (default), an idempotency key will be generated to avoid unnecessary re-registration. Set to `True` to force re-registration. - schedule (bool, optional): If `True` (default) activates the flow schedule upon registering. Returns: - flow_id (str): the flow id - flow_version (int): the flow version - is_new (bool): True if this is a new flow version, false if re-registration was skipped. """ # Get most recent flow id for this flow. This can be removed once # the registration graphql routes return more information flow_name = serialized_flow["name"] resp = client.graphql({ "query": { with_args( "flow", { "where": { "_and": { "name": { "_eq": flow_name }, "project": { "id": { "_eq": project_id } }, } }, "order_by": { "version": EnumValue("desc") }, "limit": 1, }, ): {"id", "version"} } }) if resp.data.flow: prev_id = resp.data.flow[0].id prev_version = resp.data.flow[0].version else: prev_id = None prev_version = 0 inputs = dict( project_id=project_id, serialized_flow=compress(serialized_flow), set_schedule_active=schedule, ) if not force: inputs["idempotency_key"] = hashlib.sha256( json.dumps(serialized_flow, sort_keys=True).encode()).hexdigest() res = client.graphql( { "mutation($input: create_flow_from_compressed_string_input!)": { "create_flow_from_compressed_string(input: $input)": {"id"} } }, variables=dict(input=inputs), retry_on_api_error=False, ) new_id = res.data.create_flow_from_compressed_string.id if new_id == prev_id: return new_id, prev_version, False else: return new_id, prev_version + 1, True
def test_compression_back_translation(obj): assert decompress(compress(obj)) == obj
def test_decompress(): test_str = compress({"test": 42}) result = decompress(test_str) assert isinstance(result, dict)
def test_compress(): result = compress({"test": 42}) assert isinstance(result, str)
def register( self, flow: "Flow", project_name: str = None, build: bool = True, set_schedule_active: bool = True, version_group_id: str = None, compressed: bool = True, no_url: bool = False, ) -> str: """ Push a new flow to Prefect Cloud Args: - flow (Flow): a flow to register - project_name (str, optional): the project that should contain this flow. - build (bool, optional): if `True`, the flow's environment is built prior to serialization; defaults to `True` - set_schedule_active (bool, optional): if `False`, will set the schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule). Defaults to `True`. This can be changed later. - version_group_id (str, optional): the UUID version group ID to use for versioning this Flow in Cloud; if not provided, the version group ID associated with this Flow's project and name will be used. - compressed (bool, optional): if `True`, the serialized flow will be; defaults to `True` compressed - no_url (bool, optional): if `True`, the stdout from this function will not contain the URL link to the newly-registered flow in the Cloud UI Returns: - str: the ID of the newly-registered flow Raises: - ClientError: if the register failed """ required_parameters = {p for p in flow.parameters() if p.required} if flow.schedule is not None and required_parameters: required_names = {p.name for p in required_parameters} if not all( [ required_names <= set(c.parameter_defaults.keys()) for c in flow.schedule.clocks ] ): raise ClientError( "Flows with required parameters can not be scheduled automatically." ) if any(e.key for e in flow.edges) and flow.result is None: warnings.warn( "No result handler was specified on your Flow. Cloud features such as " "input caching and resuming task runs from failure may not work properly." ) if compressed: create_mutation = { "mutation($input: create_flow_from_compressed_string_input!)": { "create_flow_from_compressed_string(input: $input)": {"id"} } } else: create_mutation = { "mutation($input: create_flow_input!)": { "create_flow(input: $input)": {"id"} } } project = None if prefect.config.backend == "cloud": if project_name is None: raise TypeError( "'project_name' is a required field when registering a flow with Cloud. " "If you are attempting to register a Flow with a local Prefect server " "you may need to run `prefect backend server` first." ) query_project = { "query": { with_args("project", {"where": {"name": {"_eq": project_name}}}): { "id": True } } } project = self.graphql(query_project).data.project # type: ignore if not project: raise ValueError( 'Project {} not found. Run `client.create_project("{}")` to create it.'.format( project_name, project_name ) ) serialized_flow = flow.serialize(build=build) # type: Any # Set Docker storage image in environment metadata if provided if isinstance(flow.storage, prefect.environments.storage.Docker): flow.environment.metadata["image"] = flow.storage.name serialized_flow = flow.serialize(build=False) # If no image ever set, default metadata to all_extras image on current version if not flow.environment.metadata.get("image"): version = prefect.__version__.split("+")[0] flow.environment.metadata[ "image" ] = f"prefecthq/prefect:all_extras-{version}" serialized_flow = flow.serialize(build=False) # verify that the serialized flow can be deserialized try: prefect.serialization.flow.FlowSchema().load(serialized_flow) except Exception as exc: raise ValueError( "Flow could not be deserialized successfully. Error was: {}".format( repr(exc) ) ) if compressed: serialized_flow = compress(serialized_flow) res = self.graphql( create_mutation, variables=dict( input=dict( project_id=(project[0].id if project else None), serialized_flow=serialized_flow, set_schedule_active=set_schedule_active, version_group_id=version_group_id, ) ), ) # type: Any flow_id = ( res.data.create_flow_from_compressed_string.id if compressed else res.data.create_flow.id ) if not no_url: # Generate direct link to Cloud flow flow_url = self.get_cloud_url("flow", flow_id) print("Flow: {}".format(flow_url)) return flow_id