Ejemplo n.º 1
0
    def _query_request_status(self, client_request_id):
        """Queries the the current state of an estimation request
        and stores it in a `RequestResult`.

        Parameters
        ----------
        client_request_id: str
            The id of the request to query.

        Returns
        -------
        RequestResult
            The state of the request.
        EvaluatorException, optional
            The exception raised while retrieving the status,
            if any.
        """

        request_results = RequestResult()

        for batch_id in self._batch_ids_per_client_id[client_request_id]:

            # Find the batch.
            if batch_id in self._queued_batches:
                batch = self._queued_batches[batch_id]

            elif batch_id in self._finished_batches:

                batch = self._finished_batches[batch_id]

                if len(batch.queued_properties) > 0:

                    return (
                        None,
                        EvaluatorException(
                            message=
                            f"An internal error occurred - the {batch_id} "
                            f"batch was prematurely marked us finished."),
                    )

            else:

                return (
                    None,
                    EvaluatorException(
                        message=f"An internal error occurred - the {batch_id} "
                        f"request was not found on the server."),
                )

            request_results.queued_properties.add_properties(
                *batch.queued_properties)
            request_results.unsuccessful_properties.add_properties(
                *batch.unsuccessful_properties)
            request_results.estimated_properties.add_properties(
                *batch.estimated_properties)
            request_results.exceptions.extend(batch.exceptions)

        return request_results, None
Ejemplo n.º 2
0
    def _handle_job_query(self, connection, message_length):
        """An asynchronous routine for handling the receiving and
        processing of request status queries from a client

        Parameters
        ----------
        connection:
            An IO stream used to pass messages between the
            server and client.
        message_length: int
            The length of the message being received.
        """

        encoded_request_id = recvall(connection, message_length)
        client_request_id = encoded_request_id.decode()

        response = None

        if client_request_id not in self._batch_ids_per_client_id:

            error = EvaluatorException(
                message=f"The request id ({client_request_id}) was not found "
                f"on the server.", )

        else:
            response, error = self._query_request_status(client_request_id)

        response_json = json.dumps((response, error), cls=TypedJSONEncoder)

        encoded_response = response_json.encode()
        length = pack_int(len(encoded_response))

        connection.sendall(length + encoded_response)
Ejemplo n.º 3
0
    def process_failed_property(physical_property, **_):
        """Return a result as if the property could not be estimated.
        """

        return_object = CalculationLayerResult()
        return_object.physical_property = physical_property
        return_object.exceptions = [
            EvaluatorException(message="Failure Message")
        ]

        return return_object
Ejemplo n.º 4
0
    def _launch_batch(self, batch):
        """Launch a batch of properties to estimate.

        This method will recursively cascade through all allowed calculation
        layers or until all properties have been calculated.

        Parameters
        ----------
        batch : Batch
            The batch to launch.
        """

        if (len(batch.options.calculation_layers) == 0
                or len(batch.queued_properties) == 0):

            # Move any remaining properties to the unsuccessful list.
            batch.unsuccessful_properties = [*batch.queued_properties]
            batch.queued_properties = []

            self._queued_batches.pop(batch.id)
            self._finished_batches[batch.id] = batch

            logger.info(f"Finished server request {batch.id}")
            return

        current_layer_type = batch.options.calculation_layers.pop(0)

        if current_layer_type not in registered_calculation_layers:

            # Add an exception if we reach an unsupported calculation layer.
            error_object = EvaluatorException(
                message=f"The {current_layer_type} layer is not "
                f"supported by / available on the server.")

            batch.exceptions.append(error_object)
            self._launch_batch(batch)
            return

        logger.info(
            f"Launching batch {batch.id} using the {current_layer_type} layer")

        layer_directory = os.path.join(self._working_directory,
                                       current_layer_type, batch.id)
        os.makedirs(layer_directory, exist_ok=True)

        current_layer = registered_calculation_layers[current_layer_type]

        current_layer.schedule_calculation(
            self._calculation_backend,
            self._storage_backend,
            layer_directory,
            batch,
            self._launch_batch,
        )
Ejemplo n.º 5
0
def test_serialize_layer_result():
    """Tests that the `CalculationLayerResult` can be properly
    serialized and deserialized."""

    dummy_result = CalculationLayerResult()

    dummy_result.physical_property = create_dummy_property(Density)
    dummy_result.exceptions = [EvaluatorException()]

    dummy_result.data_to_store = [("dummy_object_path", "dummy_directory")]

    dummy_result_json = json.dumps(dummy_result, cls=TypedJSONEncoder)

    recreated_result = json.loads(dummy_result_json, cls=TypedJSONDecoder)
    recreated_result_json = json.dumps(recreated_result, cls=TypedJSONEncoder)

    assert recreated_result_json == dummy_result_json
Ejemplo n.º 6
0
    def _process_results(results_future, batch, layer_name, storage_backend,
                         callback):
        """Processes the results of a calculation layer, updates the server request,
        then passes it back to the callback ready for propagation to the next layer
        in the stack.

        Parameters
        ----------
        results_future: distributed.Future
            The future object which will hold the results.
        batch: Batch
            The batch which spawned the awaited results.
        layer_name: str
            The name of the layer processing the results.
        storage_backend: StorageBackend
            The backend used to store / retrieve data from previous calculations.
        callback: function
            The function to call when the backend returns the results (or an error).
        """

        # Wrap everything in a try catch to make sure the whole calculation backend /
        # server doesn't go down when an unexpected exception occurs.
        try:

            results = list(results_future.result())

            if len(results) > 0 and isinstance(results[0],
                                               collections.Iterable):
                results = results[0]

            results_future.release()

            for returned_output in results:

                if returned_output is None:
                    # Indicates the layer could not calculate this
                    # particular property.
                    continue

                if not isinstance(returned_output, CalculationLayerResult):

                    # Make sure we are actually dealing with the object we expect.
                    raise ValueError("The output of the calculation was not "
                                     "a CalculationLayerResult as expected.")

                if len(returned_output.exceptions) > 0:

                    # If exceptions were raised, make sure to add them to the list.
                    batch.exceptions.extend(returned_output.exceptions)

                    logger.info(
                        f"Exceptions were raised while executing batch {batch.id}"
                    )

                    for exception in returned_output.exceptions:
                        logger.info(str(exception))

                else:

                    # Make sure to store any important calculation data if no exceptions
                    # were thrown.
                    if returned_output.data_to_store is not None:

                        CalculationLayer._store_cached_output(
                            batch, returned_output, storage_backend)

                matches = []

                if returned_output.physical_property != UNDEFINED:

                    matches = [
                        x for x in batch.queued_properties
                        if x.id == returned_output.physical_property.id
                    ]

                    if len(matches) > 1:

                        raise ValueError(
                            f"A property id ({returned_output.physical_property.id}) "
                            f"conflict occurred.")

                    elif len(matches) == 0:

                        logger.info(
                            "A calculation layer returned results for a property not in "
                            "the queue. This sometimes and expectedly occurs when using "
                            "queue based calculation backends, but should be investigated."
                        )

                        continue

                if returned_output.physical_property == UNDEFINED:

                    if len(returned_output.exceptions) == 0:

                        logger.info(
                            "A calculation layer did not return an estimated property nor did it "
                            "raise an Exception. This sometimes and expectedly occurs when using "
                            "queue based calculation backends, but should be investigated."
                        )

                    continue

                if len(returned_output.exceptions) > 0:
                    continue

                # Check that the property has been estimated to within the
                # requested tolerance.
                uncertainty = returned_output.physical_property.uncertainty
                options = batch.options.calculation_schemas[
                    returned_output.physical_property.__class__.
                    __name__][layer_name]

                if (options.absolute_tolerance != UNDEFINED
                        and options.absolute_tolerance < uncertainty):
                    continue
                elif (options.relative_tolerance != UNDEFINED and
                      options.relative_tolerance * uncertainty < uncertainty):
                    continue

                # Move the property from queued to estimated.
                for match in matches:
                    batch.queued_properties.remove(match)

                batch.estimated_properties.append(
                    returned_output.physical_property)

        except Exception as e:

            logger.exception(
                f"Error processing layer results for request {batch.id}")
            exception = EvaluatorException.from_exception(e)

            batch.exceptions.append(exception)

        callback(batch)
Ejemplo n.º 7
0
    def _handle_job_submission(self, connection, address, message_length):
        """An asynchronous routine for handling the receiving and processing
        of job submissions from a client.

        Parameters
        ----------
        connection:
            An IO stream used to pass messages between the
            server and client.
        address: str
            The address from which the request came.
        message_length: int
            The length of the message being received.
        """

        logger.info("Received estimation request from {}".format(address))

        # Read the incoming request from the server. The first four bytes
        # of the response should be the length of the message being sent.

        # Decode the client submission json.
        encoded_json = recvall(connection, message_length)
        json_model = encoded_json.decode()

        request_id = None
        error = None

        try:

            # noinspection PyProtectedMember
            submission = EvaluatorClient._Submission.parse_json(json_model)
            submission.validate()

        except Exception as e:

            formatted_exception = traceback.format_exception(
                None, e, e.__traceback__)

            error = EvaluatorException(
                message=f"An exception occured when parsing "
                f"the submission: {formatted_exception}")

            submission = None

        if error is None:

            while request_id is None or request_id in self._batch_ids_per_client_id:
                request_id = str(uuid.uuid4()).replace("-", "")

            self._batch_ids_per_client_id[request_id] = []

        # Pass the id of the submitted requests back to the client
        # as well as any error which may have occurred.
        return_packet = json.dumps((request_id, error), cls=TypedJSONEncoder)

        encoded_return_packet = return_packet.encode()
        length = pack_int(len(encoded_return_packet))

        connection.sendall(length + encoded_return_packet)

        if error is not None:
            # Exit early if there is an error.
            return

        # Batch the request into more managable chunks.
        batches = self._prepare_batches(submission, request_id)

        # Launch the batches
        for batch in batches:
            self._launch_batch(batch)
Ejemplo n.º 8
0
    def _gather_results(
        directory,
        workflow_id,
        value_reference,
        gradient_sources,
        outputs_to_store,
        *protocol_result_paths,
        **_,
    ):
        """Gather the data associated with the workflows in this graph.

        Parameters
        ----------
        directory: str
            The directory to store any working files in.
        workflow_id: str
            The id of the workflow associated with this result.
        value_reference: ProtocolPath, optional
            A reference to which property in the output dictionary is the actual value.
        gradient_sources: list of ProtocolPath
            A list of references to those entries in the output dictionaries which correspond
            to parameter gradients.
        outputs_to_store: dict of str and WorkflowOutputToStore
            A list of references to data which should be stored on the storage backend.
        protocol_results: dict of str and str
            The result dictionary of the protocol which calculated the value of the property.

        Returns
        -------
        CalculationLayerResult, optional
            The result of attempting to estimate this property from a workflow graph. `None`
            will be returned if the target uncertainty is set but not met.
        """

        return_object = WorkflowResult()
        return_object.workflow_id = workflow_id

        try:

            results_by_id = {}

            for protocol_id, protocol_result_path in protocol_result_paths:

                with open(protocol_result_path, "r") as file:
                    protocol_results = json.load(file, cls=TypedJSONDecoder)

                # Make sure none of the protocols failed and we actually have a value
                # and uncertainty.
                if isinstance(protocol_results, EvaluatorException):

                    return_object.exceptions.append(protocol_results)
                    return return_object

                # Store the protocol results in a dictionary, with keys of the
                # path to the original protocol output.
                for protocol_path, output_value in protocol_results.items():

                    protocol_path = ProtocolPath.from_string(protocol_path)

                    if (protocol_path.start_protocol is None
                            or protocol_path.start_protocol != protocol_id):
                        protocol_path.prepend_protocol_id(protocol_id)

                    results_by_id[protocol_path] = output_value

            if value_reference is not None:
                return_object.value = results_by_id[value_reference]

            for gradient_source in gradient_sources:
                return_object.gradients.append(results_by_id[gradient_source])

            return_object.data_to_store = []

            for output_to_store in outputs_to_store.values():

                unique_id = str(uuid.uuid4()).replace("-", "")

                data_object_path = path.join(directory,
                                             f"data_{unique_id}.json")
                data_directory = path.join(directory, f"data_{unique_id}")

                WorkflowGraph._store_output_data(
                    data_object_path,
                    data_directory,
                    output_to_store,
                    results_by_id,
                )

                return_object.data_to_store.append(
                    (data_object_path, data_directory))

        except Exception as e:
            return_object.exceptions.append(
                EvaluatorException.from_exception(e))

        return return_object
Ejemplo n.º 9
0
    def _send_calculations_to_server(self, submission):
        """Attempts to connect to the calculation server, and
        submit the requested calculations.

        Parameters
        ----------
        submission: _Submission
            The jobs to submit.

        Returns
        -------
        str, optional:
           The id which the server has assigned the submitted calculations.
           This can be used to query the server for when the calculation
           has completed.

           Returns None if the calculation could not be submitted.
        EvaluatorException, optional
            Any exceptions raised while attempting the submit the request.
        """

        # Attempt to establish a connection to the server.
        connection_settings = (
            self._connection_options.server_address,
            self._connection_options.server_port,
        )

        connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        connection.connect(connection_settings)

        request_id = None

        try:

            # Encode the submission json into an encoded
            # packet ready to submit to the server.
            message_type = pack_int(EvaluatorMessageTypes.Submission)

            encoded_json = submission.json().encode()
            length = pack_int(len(encoded_json))

            connection.sendall(message_type + length + encoded_json)

            # Wait for confirmation that the server has received
            # the jobs.
            header = recvall(connection, 4)
            length = unpack_int(header)[0]

            # Decode the response from the server. If everything
            # went well, this should be the id of the submitted
            # calculations.
            encoded_json = recvall(connection, length)
            request_id, error = json.loads(encoded_json.decode(),
                                           cls=TypedJSONDecoder)

        except Exception as e:

            trace = traceback.format_exception(None, e, e.__traceback__)
            error = EvaluatorException(message=trace)

        finally:

            if connection is not None:
                connection.close()

        # Return the ids of the submitted jobs.
        return request_id, error