Ejemplo n.º 1
0
    def test_result(self, stub):
        format_proto = user_format_to_proto({
            "type": "pyarrow",
            "compression": "brotli"
        })
        destination_proto = user_destination_to_proto("download")

        job = Job._from_proto(
            job_pb2.Job(
                id="foo",
                state=job_pb2.Job.State(stage=job_pb2.Job.Stage.SUCCEEDED),
                type=9,
                format=format_proto,
                destination=destination_proto,
            ))

        result = 2
        codec = "lz4"
        serialized = serialize_pyarrow(result, codec)

        responses.add(
            responses.GET,
            Job.BUCKET_PREFIX.format(job.id),
            body=serialized,
            headers={
                "x-goog-stored-content-encoding": "application/vnd.pyarrow",
                "x-goog-meta-X-Arrow-Codec": codec,
            },
            status=200,
        )

        assert download(job) == result
Ejemplo n.º 2
0
    def test_wait_success(self, stub):
        id_ = "foo"
        destination = user_destination_to_proto({"type": "download"})
        message = job_pb2.Job(id=id_, destination=destination)
        j = Job._from_proto(message)
        job_state = job_pb2.Job.State(stage=job_pb2.Job.Stage.SUCCEEDED)

        stub.return_value.WatchJob.return_value = [job_state]

        j.wait()
        assert j._message.state.stage == job_state.stage
Ejemplo n.º 3
0
 def create_side_effect(req, **kwargs):
     return job_pb2.Job(
         id=id_,
         parameters=req.parameters,
         serialized_graft=req.serialized_graft,
         typespec=req.typespec,
         type=req.type,
         channel=req.channel,
         state=job_state,
         format=user_format_to_proto(format),
         destination=user_destination_to_proto(destination),
     )
Ejemplo n.º 4
0
    def test_create(self, stub):
        obj = types.Int(1)
        parameters = {"foo": types.Str("bar")}

        typespec = cereal.serialize_typespec(type(obj))
        format_proto = user_format_to_proto({
            "type": "pyarrow",
            "compression": "brotli"
        })
        destination_proto = user_destination_to_proto({"type": "download"})
        create_job_request_message = job_pb2.CreateJobRequest(
            parameters=json.dumps(parameters_to_grafts(**parameters)),
            serialized_graft=json.dumps(obj.graft),
            typespec=typespec,
            type=types_pb2.ResultType.Value(
                cereal.typespec_to_unmarshal_str(typespec)),
            format=format_proto,
            destination=destination_proto,
            no_cache=False,
            channel=_channel.__channel__,
        )

        message = job_pb2.Job(
            id="foo",
            parameters=create_job_request_message.parameters,
            serialized_graft=create_job_request_message.serialized_graft,
            typespec=create_job_request_message.typespec,
            type=create_job_request_message.type,
            format=create_job_request_message.format,
            destination=create_job_request_message.destination,
            no_cache=create_job_request_message.no_cache,
            channel=create_job_request_message.channel,
        )
        stub.return_value.CreateJob.return_value = message

        job = Job(
            obj,
            parameters,
            format={
                "type": "pyarrow",
                "compression": "brotli"
            },
            destination="download",
        )

        stub.return_value.CreateJob.assert_called_once_with(
            create_job_request_message,
            timeout=Client.DEFAULT_TIMEOUT,
            metadata=(("x-wf-channel", create_job_request_message.channel), ),
        )

        assert job._message is message
Ejemplo n.º 5
0
    def test_wait_terminated(self, stub):
        id_ = "foo"
        destination = user_destination_to_proto({"type": "download"})
        message = job_pb2.Job(id=id_, destination=destination)
        j = Job._from_proto(message)

        job_state = job_pb2.Job.State(
            stage=job_pb2.Job.Stage.FAILED,
            error=job_pb2.Job.Error(code=errors_pb2.ERROR_TERMINATED),
        )

        stub.return_value.WatchJob.return_value = [job_state]

        with pytest.raises(JobTerminated):
            j.wait()
        assert j._message.state.stage == job_state.stage
Ejemplo n.º 6
0
 def create_side_effect(req, **kwargs):
     return job_pb2.Job(
         id=id_,
         serialized_graft=req.serialized_graft,
         typespec=req.typespec,
         arguments=req.arguments,
         geoctx_graft=req.geoctx_graft,
         no_ruster=req.no_ruster,
         channel=req.channel,
         client_version=__version__,
         expires_timestamp=expires_timestamp,
         no_cache=req.no_cache,
         trace=req.trace,
         state=job_state,
         type=req.type,
         format=user_format_to_proto(format),
         destination=user_destination_to_proto(destination),
     )
Ejemplo n.º 7
0
    def test_wait_timeout(self, stub):
        id_ = "foo"
        destination = user_destination_to_proto({"type": "download"})
        message = job_pb2.Job(id=id_, destination=destination)
        j = Job._from_proto(message)

        job_state = job_pb2.Job.State(stage=job_pb2.Job.Stage.QUEUED)

        def side_effect(*args, **kwargs):
            yield job_state
            raise MockRpcError(grpc.StatusCode.DEADLINE_EXCEEDED)

        stub.return_value.WatchJob.side_effect = side_effect

        with pytest.raises(JobTimeoutError):
            j.wait(timeout=1)

        stub.return_value.WatchJob.assert_called()
        assert j._message.state.stage == job_state.stage
Ejemplo n.º 8
0
    def test_result_to_file(self, stub, file_path, tmpdir):
        format_proto = user_format_to_proto("json")
        destination_proto = user_destination_to_proto("download")
        destination_proto.download.result_url = (
            "https://storage.googleapis.com/dl-compute-dev-results")

        job = Job._from_proto(
            job_pb2.Job(
                id="foo",
                state=job_pb2.Job.State(stage=job_pb2.Job.Stage.SUCCEEDED),
                format=format_proto,
                destination=destination_proto,
            ))

        result = [1, 2, 3, 4]
        responses.add(
            responses.GET,
            job.url,
            body=json.dumps(result),
            headers={"x-goog-stored-content-encoding": "application/json"},
            status=200,
            stream=True,
        )

        path = tmpdir.join("test.json")
        file_arg = str(path) if file_path else path.open("wb")

        job.result_to_file(file_arg)

        if not file_path:
            assert not file_arg.closed
            file_arg.flush()

        with open(str(path), "r") as f:
            assert result == json.load(f)

        if not file_path:
            file_arg.close()
Ejemplo n.º 9
0
    def __init__(
        self,
        proxy_object,
        parameters,
        format="pyarrow",
        destination="download",
        client=None,
        cache=True,
    ):
        """
        Creates a new `Job` to compute the provided proxy object with the given
        parameters.

        Parameters
        ----------
        proxy_object: Proxytype
            Proxy object to compute
        parameters: dict[str, Proxytype]
            Python dictionary of parameter names and values
        format: str or dict, default "pyarrow"
            The serialization format for the result.
        destination: str or dict, default "download"
            The destination for the result.
        client : `.workflows.client.Client`, optional
            Allows you to use a specific client instance with non-default
            auth and parameters
        cache : bool, default True
            Whether to use the cache for this job.

        Returns
        -------
        Job
            The job that's executing.

        Example
        -------
        >>> from descarteslabs.workflows import Job, Int, parameter
        >>> my_int = Int(1) + parameter("other_int", Int)
        >>> job = Job(my_int, {"other_int": 10}) # doctest: +SKIP
        >>> job.stage # doctest: +SKIP
        QUEUED
        """
        if client is None:
            client = get_global_grpc_client()

        typespec = serialize_typespec(type(proxy_object))
        result_type = typespec_to_unmarshal_str(typespec)
        # ^ this also preemptively checks whether the result type is something we'll know how to unmarshal

        format_proto = user_format_to_proto(format)
        destination_proto = user_destination_to_proto(destination)

        parameters = parameters_to_grafts(**parameters)

        message = client.api["CreateJob"](
            job_pb2.CreateJobRequest(
                parameters=json.dumps(parameters),
                serialized_graft=json.dumps(proxy_object.graft),
                typespec=typespec,
                type=types_pb2.ResultType.Value(result_type),
                format=format_proto,
                destination=destination_proto,
                no_cache=not cache,
                channel=_channel.__channel__,
            ),
            timeout=client.DEFAULT_TIMEOUT,
        )

        self._message = message
        self._client = client
        self._object = proxy_object
Ejemplo n.º 10
0
    def __init__(
        self,
        obj,
        geoctx=None,
        format="pyarrow",
        destination="download",
        cache=True,
        _ruster=None,
        _trace=False,
        client=None,
        **arguments,
    ):
        """
        Creates a new `Job` to compute the provided proxy object with the given
        arguments.

        Parameters
        ----------
        obj: Proxytype
            Proxy object to compute, or list/tuple of proxy objects.
            If it depends on parameters, ``obj`` is first converted
            to a `.Function` that takes those parameters.
        geoctx: `~.workflows.types.geospatial.GeoContext`, or None
            The GeoContext parameter under which to run the computation.
            Almost all computations will require a `~.workflows.types.geospatial.GeoContext`,
            but for operations that only involve non-geospatial types,
            this parameter is optional.
        format: str or dict, default "pyarrow"
            The serialization format for the result.
        destination: str or dict, default "download"
            The destination for the result.
        cache: bool, default True
            Whether to use the cache for this job.
        client: `.workflows.client.Client`, optional
            Allows you to use a specific client instance with non-default
            auth and parameters
        **arguments: Any
            Values for all parameters that ``obj`` depends on
            (or arguments that ``obj`` takes, if it's a `.Function`).
            Can be given as Proxytypes, or as Python objects like numbers,
            lists, and dicts that can be promoted to them.
            These arguments cannot depend on any parameters.

        Example
        -------
        >>> from descarteslabs.workflows import Job, Int, parameter
        >>> my_int = Int(1) + parameter("other_int", Int)
        >>> job = Job(my_int, other_int=10) # doctest: +SKIP
        >>> job.stage # doctest: +SKIP
        QUEUED
        """
        if client is None:
            client = get_global_grpc_client()

        if geoctx is not None:
            try:
                geoctx = GeoContext._promote(geoctx)
            except ProxyTypeError as e:
                raise TypeError(f"Invalid GeoContext {geoctx!r}: {e}")

        obj, argument_grafts, typespec, result_type = to_computable(
            obj, arguments)

        format_proto = user_format_to_proto(format)
        destination_proto = user_destination_to_proto(destination)

        message = client.api["CreateJob"](
            job_pb2.CreateJobRequest(
                serialized_graft=json.dumps(obj.graft),
                typespec=typespec,
                arguments={
                    name: json.dumps(arg)
                    for name, arg in argument_grafts.items()
                },
                geoctx_graft=json.dumps(geoctx.graft)
                if geoctx is not None else None,
                no_ruster=_ruster is False,
                channel=client._wf_channel,
                client_version=__version__,
                no_cache=not cache,
                trace=_trace,
                type=types_pb2.ResultType.Value(result_type),
                format=format_proto,
                destination=destination_proto,
            ),
            timeout=client.DEFAULT_TIMEOUT,
        )

        self._message = message
        self._client = client
        self._object = obj
        self._arguments = None