コード例 #1
0
def results_to_pandas(force_fields: List[str]) -> pandas.DataFrame:
    """Imports the experimental and estimated data sets and stores them in a
    pandas data frame.
    """

    # Load in the experimental data set.
    training_set = {
        physical_property.id: physical_property
        for physical_property in PhysicalPropertyDataSet.from_json(
            os.path.join("raw_data_v2", "curated_data_set.json"))
    }

    # Load in the results.
    estimated_results = {
        force_field: {
            physical_property.id: physical_property
            for physical_property in PhysicalPropertyDataSet.from_json(
                os.path.join("raw_data_v2", f"{force_field}.json"))
        }
        for force_field in force_fields
    }

    # Refactor the experimental and estimated data into a single data frame.
    data_rows = []

    for property_id in training_set:

        experimental_property = training_set[property_id]

        estimated_properties = {
            force_field: estimated_results[force_field].get(property_id, None)
            for force_field in force_fields
        }

        if (any(estimated_property is None
                for estimated_property in estimated_properties.values())
                or property_id in OUTLIERS):
            print(f"Skipping property {property_id}")
            continue

        data_rows.extend({
            "Id":
            property_id,
            "Type": (f"{experimental_property.__class__.__name__}_"
                     f"{len(experimental_property.substance)}"),
            "Force Field":
            force_field,
            "NIST ThermoML":
            experimental_property.value.to(
                experimental_property.default_unit()).magnitude,
            "Estimated":
            estimated_properties[force_field].value.to(
                experimental_property.default_unit()).magnitude,
            "Estimated Uncertainty":
            estimated_properties[force_field].uncertainty.to(
                experimental_property.default_unit()).magnitude,
        } for force_field in force_fields)

    return pandas.DataFrame(data_rows)
コード例 #2
0
def main():

    setup_timestamp_logging()

    # Load in the force field
    force_field_path = "smirnoff99Frosst-1.1.0.offxml"
    force_field_source = SmirnoffForceFieldSource.from_path(force_field_path)

    # Load in the data set containing the pure and binary properties.
    data_set = PhysicalPropertyDataSet.from_json("pure_data_set.json")
    data_set.merge(PhysicalPropertyDataSet.from_json("binary_data_set.json"))

    # Set up a server object to run the calculations using.
    server = setup_server(backend_type=BackendType.LocalGPU,
                          max_number_of_workers=1,
                          port=8001)

    with server:

        # Request the estimates.
        property_estimator = EvaluatorClient(
            ConnectionOptions(server_port=8001))

        for calculation_layer in ["SimulationLayer", "ReweightingLayer"]:

            options = RequestOptions()
            options.calculation_layers = [calculation_layer]

            parameter_gradient_keys = [
                ParameterGradientKey(tag="vdW",
                                     smirks="[#6X4:1]",
                                     attribute="epsilon"),
                ParameterGradientKey(tag="vdW",
                                     smirks="[#6X4:1]",
                                     attribute="rmin_half"),
            ]

            request, _ = property_estimator.request_estimate(
                property_set=data_set,
                force_field_source=force_field_source,
                options=options,
                parameter_gradient_keys=parameter_gradient_keys,
            )

            # Wait for the results.
            results, _ = request.results(True, 5)

            layer_name = re.sub(r"(?<!^)(?=[A-Z])", "_",
                                calculation_layer).lower()
            results.json(f"pure_binary_{layer_name}.json", True)
コード例 #3
0
def test_analyze_non_integer_ids(mock_target, caplog):

    optimization, target, directory = mock_target

    reference_data_set: PhysicalPropertyDataSet = PhysicalPropertyDataSet.from_json(
        os.path.join(directory, "training-set.json"))
    assert len(reference_data_set
               ) == 1  # Sanity check in case this changes in future.
    reference_data_set.properties[0].id = "a"
    reference_data_set.json(os.path.join(directory, "training-set.json"))

    results = RequestResult()
    results.estimated_properties = reference_data_set
    results.json(os.path.join(directory, "results.json"))

    with caplog.at_level(logging.WARNING):

        target_result = EvaluatorAnalysisFactory.analyze(
            optimization=optimization,
            target=target,
            target_directory=directory,
            result_directory=directory,
            reindex=False,
        )

    assert ("The reference data set contains properties "
            "with ids that cannot be cast to integers" in caplog.text)

    assert numpy.isclose(target_result.objective_function, 1.0)
    assert len(target_result.statistic_entries) == 1
コード例 #4
0
    def test_generate_evaluator_target(self, requests_mock):

        data_set = create_data_set("data-set-1")
        mock_get_data_set(requests_mock, data_set)

        target = create_evaluator_target("evaluator-target-1", [data_set.id])

        with temporary_cd():

            OptimizationInputFactory._generate_evaluator_target(
                target, 8000, None)

            assert os.path.isfile("training-set.json")
            off_data_set = PhysicalPropertyDataSet.from_json(
                "training-set.json")
            assert off_data_set.json() == data_set.to_evaluator().json()

            assert os.path.isfile("options.json")
コード例 #5
0
def main():

    setup_timestamp_logging()

    # Load in the force field
    force_field_path = "smirnoff99Frosst-1.1.0.offxml"
    force_field_source = SmirnoffForceFieldSource.from_path(force_field_path)

    # Create a data set containing three solvation free energies.
    data_set = PhysicalPropertyDataSet.from_json("hydration_data_set.json")
    data_set.json("hydration_data_set.json", format=True)

    # Set up a server object to run the calculations using.
    server = setup_server(backend_type=BackendType.LocalGPU,
                          max_number_of_workers=1,
                          port=8002)

    with server:

        # Request the estimates.
        property_estimator = EvaluatorClient(
            ConnectionOptions(server_port=8002))

        options = RequestOptions()
        options.calculation_layers = ["SimulationLayer"]
        options.add_schema("SimulationLayer", "SolvationFreeEnergy",
                           _get_fixed_lambda_schema())

        request, _ = property_estimator.request_estimate(
            property_set=data_set,
            force_field_source=force_field_source,
            options=options,
        )

        # Wait for the results.
        results, _ = request.results(True, 60)

        # Save the result to file.
        results.json("results.json", True)
コード例 #6
0
    def _initialize(self):
        """Initializes the evaluator target from an input json file.

        1. Reads the user specified input file.
        2. Creates a `evaluator` client object.
        3. Loads in a reference experimental data set.
        4. Assigns and normalises weights for each property.
        """

        # Load in the options from a user provided JSON file.
        print(os.path.join(self.tgtdir, self.evaluator_input))
        options_file_path = os.path.join(self.tgtdir, self.evaluator_input)
        self._options = self.OptionsFile.from_json(options_file_path)

        for property_type, denominator in self._options.denominators.items():
            self._default_units[property_type] = denominator.units

        # Attempt to create an evaluator client object using the specified
        # connection options.
        self._client = EvaluatorClient(self._options.connection_options)

        # Load in the experimental data set.
        data_set_path = os.path.join(self.tgtdir, self._options.data_set_path)
        self._reference_data_set = PhysicalPropertyDataSet.from_json(data_set_path)

        if len(self._reference_data_set) == 0:

            raise ValueError(
                "The physical property data set to optimise against is empty."
            )

        # Print the reference data, and count the number of instances of
        # each property type.
        printcool("Loaded experimental data.")

        property_types = self._reference_data_set.property_types

        number_of_properties = {
            x: sum(1 for y in self._reference_data_set.properties_by_type(x))
            for x in property_types
        }

        for substance in self._reference_data_set.substances:

            dict_for_print = {}

            for physical_property in self._reference_data_set.properties_by_substance(
                substance
            ):

                property_type = physical_property.__class__.__name__

                value = physical_property.value.to(self._default_units[property_type])
                uncertainty = np.nan

                if physical_property.uncertainty != UNDEFINED:

                    uncertainty = physical_property.uncertainty.to(
                        self._default_units[property_type]
                    )

                tuple_key = (
                    property_type,
                    physical_property.thermodynamic_state.temperature,
                    physical_property.thermodynamic_state.pressure,
                )

                dict_for_print["%s %s-%s" % tuple_key] = "%s+/-%s" % (
                    value,
                    uncertainty,
                )

            printcool_dictionary(
                dict_for_print, title="Reference %s data" % substance.identifier,
            )

        # Assign and normalize weights for each phase point (average for now)
        self._normalised_weights = {}

        for property_type in self._reference_data_set.property_types:

            self._normalised_weights[property_type] = (
                self._options.weights[property_type]
                / number_of_properties[property_type]
            )
コード例 #7
0
    def analyze(
        cls,
        optimization: Optimization,
        target: EvaluatorTarget,
        target_directory: str,
        result_directory: str,
        reindex: bool = False,
    ) -> Optional[EvaluatorTargetResult]:

        from openff.evaluator.client import RequestResult
        from openff.evaluator.datasets import PhysicalPropertyDataSet

        results_path = os.path.join(result_directory, "results.json")

        if not os.path.isfile(results_path):
            return None

        # Load the reference data set
        reference_data_set: PhysicalPropertyDataSet = PhysicalPropertyDataSet.from_json(
            os.path.join(target_directory, "training-set.json")
        )

        # Check to see if any of the ids were set to strings that can't be cast to
        # integers, and if so, apply slight re-indexing
        try:
            {int(entry.id) for entry in reference_data_set.properties}
        except (TypeError, ValueError):

            _logger.warning(
                "The reference data set contains properties with ids that cannot be "
                "cast to integers - attempting to fix. Note this in general is not "
                "recommended and in future it is suggested to use integer ids in "
                "physical property data sets."
            )

            for i, physical_property in enumerate(reference_data_set):
                physical_property.id = str(i + 1)

            reindex = True

        reference_data_set: DataSet = DataSet.from_pandas(
            reference_data_set.to_pandas(),
            identifier="empty",
            description="empty",
            authors=[Author(name="empty", email="*****@*****.**", institute="empty")],
        )

        results = RequestResult.from_json(results_path)

        if reindex:
            results = reindex_results(results, reference_data_set)

        estimated_data_set = results.estimated_properties

        # Generate statistics about each iteration.
        data_set_result = DataSetResult.from_evaluator(
            reference_data_set=reference_data_set,
            estimated_data_set=estimated_data_set,
            analysis_environments=optimization.analysis_environments,
            statistic_types=[StatisticType.RMSE],
        )

        objective_function = cls._read_objective_function(result_directory)

        return EvaluatorTargetResult(
            objective_function=target.weight * objective_function,
            statistic_entries=data_set_result.statistic_entries,
        )