def test_state_equality():

    state_a = ThermodynamicState(
        temperature=1.0 * unit.kelvin, pressure=1.0 * unit.pascals
    )

    state_b = ThermodynamicState(
        temperature=1.0004 * unit.kelvin, pressure=1.0004 * unit.pascals
    )

    assert state_a == state_b

    state_c = ThermodynamicState(
        temperature=1.001 * unit.kelvin, pressure=1.001 * unit.pascals
    )

    assert state_a != state_c
    assert hash(state_a) != hash(state_c)

    state_d = ThermodynamicState(
        temperature=1.0005 * unit.kelvin, pressure=1.0005 * unit.pascals
    )

    assert state_a == state_d
    assert state_c != state_d

    state_e = ThermodynamicState(
        temperature=0.9995 * unit.kelvin, pressure=0.9995 * unit.pascals
    )

    assert state_a == state_e
Exemple #2
0
def test_validate_data_set():

    valid_property = Density(
        ThermodynamicState(298 * unit.kelvin, 1 * unit.atmosphere),
        PropertyPhase.Liquid,
        Substance.from_components("O"),
        0.0 * unit.gram / unit.milliliter,
        0.0 * unit.gram / unit.milliliter,
    )

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(valid_property)

    data_set.validate()

    invalid_property = Density(
        ThermodynamicState(-1 * unit.kelvin, 1 * unit.atmosphere),
        PropertyPhase.Liquid,
        Substance.from_components("O"),
        0.0 * unit.gram / unit.milliliter,
        0.0 * unit.gram / unit.milliliter,
    )

    with pytest.raises(AssertionError):
        data_set.add_properties(invalid_property)

    data_set.add_properties(invalid_property, validate=False)

    with pytest.raises(AssertionError):
        data_set.validate()
Exemple #3
0
def create_filterable_data_set():
    """Creates a dummy data with a diverse set of properties to
    be filtered, namely:

        - a liquid density measured at 298 K and 0.5 atm with 1 component containing only carbon.
        - a gaseous dielectric measured at 288 K and 1 atm with 2 components containing only nitrogen.
        - a solid EoM measured at 308 K and 1.5 atm with 3 components containing only oxygen.

    Returns
    -------
    PhysicalPropertyDataSet
        The created data set.
    """

    source = CalculationSource("Dummy", {})
    carbon_substance = create_dummy_substance(number_of_components=1,
                                              elements=["C"])

    density_property = Density(
        thermodynamic_state=ThermodynamicState(temperature=298 * unit.kelvin,
                                               pressure=0.5 * unit.atmosphere),
        phase=PropertyPhase.Liquid,
        substance=carbon_substance,
        value=1 * unit.gram / unit.milliliter,
        uncertainty=0.11 * unit.gram / unit.milliliter,
        source=source,
    )

    nitrogen_substance = create_dummy_substance(number_of_components=2,
                                                elements=["N"])

    dielectric_property = DielectricConstant(
        thermodynamic_state=ThermodynamicState(temperature=288 * unit.kelvin,
                                               pressure=1 * unit.atmosphere),
        phase=PropertyPhase.Gas,
        substance=nitrogen_substance,
        value=1 * unit.dimensionless,
        uncertainty=0.11 * unit.dimensionless,
        source=source,
    )

    oxygen_substance = create_dummy_substance(number_of_components=3,
                                              elements=["O"])

    enthalpy_property = EnthalpyOfMixing(
        thermodynamic_state=ThermodynamicState(temperature=308 * unit.kelvin,
                                               pressure=1.5 * unit.atmosphere),
        phase=PropertyPhase.Solid,
        substance=oxygen_substance,
        value=1 * unit.kilojoules / unit.mole,
        uncertainty=0.11 * unit.kilojoules / unit.mole,
        source=source,
    )

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(density_property, dielectric_property,
                            enthalpy_property)

    return data_set
Exemple #4
0
def test_gradient_reduced_potentials(use_subset):

    substance = Substance.from_components("O")
    thermodynamic_state = ThermodynamicState(298 * unit.kelvin,
                                             1.0 * unit.atmosphere)

    with tempfile.TemporaryDirectory() as directory:

        force_field_path = path.join(directory, "ff.json")

        with open(force_field_path, "w") as file:
            file.write(build_tip3p_smirnoff_force_field().json())

        reduced_potentials = OpenMMGradientPotentials(f"reduced_potentials")
        reduced_potentials.substance = substance
        reduced_potentials.thermodynamic_state = thermodynamic_state
        reduced_potentials.statistics_path = get_data_filename(
            "test/statistics/stats_pandas.csv")
        reduced_potentials.force_field_path = force_field_path
        reduced_potentials.trajectory_file_path = get_data_filename(
            "test/trajectories/water.dcd")
        reduced_potentials.coordinate_file_path = get_data_filename(
            "test/trajectories/water.pdb")
        reduced_potentials.use_subset_of_force_field = use_subset
        reduced_potentials.enable_pbc = True
        reduced_potentials.parameter_key = ParameterGradientKey(
            "vdW", "[#1]-[#8X2H2+0:1]-[#1]", "epsilon")

        reduced_potentials.execute(directory, ComputeResources())
        assert path.isfile(reduced_potentials.forward_potentials_path)
        assert path.isfile(reduced_potentials.reverse_potentials_path)
Exemple #5
0
def create_dummy_property(property_class):
    """Create a dummy liquid property of specified type measured at
    298 K and 1 atm, with 2 components of methane and ethane.

    The property also contains a dummy receptor metadata entry.

    Parameters
    ----------
    property_class : type of PhysicalProperty
        The type of property, e.g. Density, DielectricConstant...

    Returns
    -------
    PhysicalProperty
        The created property.
    """
    substance = create_dummy_substance(number_of_components=2)

    dummy_property = property_class(
        thermodynamic_state=ThermodynamicState(temperature=298 * unit.kelvin,
                                               pressure=1 * unit.atmosphere),
        phase=PropertyPhase.Liquid,
        substance=substance,
        value=10.0 * property_class.default_unit(),
        uncertainty=1.0 * property_class.default_unit(),
    )

    dummy_property.source = CalculationSource(fidelity="dummy", provenance={})

    # Make sure the property has the meta data required for more
    # involved properties.
    dummy_property.metadata = {"receptor_mol2": "unknown_path.mol2"}

    return dummy_property
Exemple #6
0
def test_nested_input():

    dict_protocol = DummyInputOutputProtocol("dict_protocol")
    dict_protocol.input_value = {"a": ThermodynamicState(1.0 * unit.kelvin)}

    quantity_protocol = DummyInputOutputProtocol("quantity_protocol")
    quantity_protocol.input_value = ProtocolPath("output_value[a].temperature",
                                                 dict_protocol.id)

    schema = WorkflowSchema()
    schema.protocol_schemas = [dict_protocol.schema, quantity_protocol.schema]
    schema.validate()

    workflow = Workflow({})
    workflow.schema = schema

    workflow_graph = workflow.to_graph()

    with tempfile.TemporaryDirectory() as temporary_directory:

        with DaskLocalCluster() as calculation_backend:

            results_futures = workflow_graph.execute(temporary_directory,
                                                     calculation_backend)

            assert len(results_futures) == 1
            result = results_futures[0].result()

    assert isinstance(result, WorkflowResult)
Exemple #7
0
def test_run_openmm_simulation_checkpoints():

    import mdtraj

    thermodynamic_state = ThermodynamicState(298 * unit.kelvin,
                                             1.0 * unit.atmosphere)

    with tempfile.TemporaryDirectory() as directory:

        coordinate_path, system_path = _setup_dummy_system(directory)

        # Check that executing twice doesn't run the simulation twice
        npt_equilibration = OpenMMSimulation("npt_equilibration")
        npt_equilibration.total_number_of_iterations = 1
        npt_equilibration.steps_per_iteration = 4
        npt_equilibration.output_frequency = 1
        npt_equilibration.thermodynamic_state = thermodynamic_state
        npt_equilibration.input_coordinate_file = coordinate_path
        npt_equilibration.system_path = system_path

        npt_equilibration.execute(directory, ComputeResources())
        assert os.path.isfile(npt_equilibration._checkpoint_path)
        npt_equilibration.execute(directory, ComputeResources())

        assert (len(
            StatisticsArray.from_pandas_csv(
                npt_equilibration.statistics_file_path)) == 4)
        assert (len(
            mdtraj.load(npt_equilibration.trajectory_file_path,
                        top=coordinate_path)) == 4)

        # Make sure that the output files are correctly truncating if more frames
        # than expected are written
        with open(npt_equilibration._checkpoint_path, "r") as file:
            checkpoint = json.load(file, cls=TypedJSONDecoder)

            # Fake having saved more frames than expected
            npt_equilibration.steps_per_iteration = 8
            checkpoint.steps_per_iteration = 8
            npt_equilibration.output_frequency = 2
            checkpoint.output_frequency = 2

        with open(npt_equilibration._checkpoint_path, "w") as file:
            json.dump(checkpoint, file, cls=TypedJSONEncoder)

        npt_equilibration.execute(directory, ComputeResources())

        assert (len(
            StatisticsArray.from_pandas_csv(
                npt_equilibration.statistics_file_path)) == 4)
        assert (len(
            mdtraj.load(npt_equilibration.trajectory_file_path,
                        top=coordinate_path)) == 4)
def test_same_component_batching():

    thermodynamic_state = ThermodynamicState(temperature=1.0 * unit.kelvin,
                                             pressure=1.0 * unit.atmosphere)

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(
        Density(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "C"),
            value=0.0 * unit.kilogram / unit.meter**3,
        ),
        EnthalpyOfVaporization(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "C"),
            value=0.0 * unit.kilojoule / unit.mole,
        ),
        Density(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "CO"),
            value=0.0 * unit.kilogram / unit.meter**3,
        ),
        EnthalpyOfVaporization(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "CO"),
            value=0.0 * unit.kilojoule / unit.mole,
        ),
    )

    options = RequestOptions()

    submission = EvaluatorClient._Submission()
    submission.dataset = data_set
    submission.options = options

    with DaskLocalCluster() as calculation_backend:

        server = EvaluatorServer(calculation_backend)
        batches = server._batch_by_same_component(submission, "")

    assert len(batches) == 2

    assert len(batches[0].queued_properties) == 2
    assert len(batches[1].queued_properties) == 2
Exemple #9
0
def test_calculate_reduced_potential_openmm():

    substance = Substance.from_components("O")
    thermodynamic_state = ThermodynamicState(298 * unit.kelvin,
                                             1.0 * unit.atmosphere)

    with tempfile.TemporaryDirectory() as directory:
        force_field_path = path.join(directory, "ff.json")

        with open(force_field_path, "w") as file:
            file.write(build_tip3p_smirnoff_force_field().json())

        build_coordinates = BuildCoordinatesPackmol("build_coordinates")
        build_coordinates.max_molecules = 10
        build_coordinates.mass_density = 0.05 * unit.grams / unit.milliliters
        build_coordinates.substance = substance
        build_coordinates.execute(directory, None)

        assign_parameters = BuildSmirnoffSystem(f"assign_parameters")
        assign_parameters.force_field_path = force_field_path
        assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path
        assign_parameters.substance = substance
        assign_parameters.execute(directory, None)

        reduced_potentials = OpenMMReducedPotentials(f"reduced_potentials")
        reduced_potentials.substance = substance
        reduced_potentials.thermodynamic_state = thermodynamic_state
        reduced_potentials.reference_force_field_paths = [force_field_path]
        reduced_potentials.system_path = assign_parameters.system_path
        reduced_potentials.trajectory_file_path = get_data_filename(
            "test/trajectories/water.dcd")
        reduced_potentials.coordinate_file_path = get_data_filename(
            "test/trajectories/water.pdb")
        reduced_potentials.kinetic_energies_path = get_data_filename(
            "test/statistics/stats_pandas.csv")
        reduced_potentials.high_precision = False
        reduced_potentials.execute(directory, ComputeResources())

        assert path.isfile(reduced_potentials.statistics_file_path)

        final_array = StatisticsArray.from_pandas_csv(
            reduced_potentials.statistics_file_path)
        assert ObservableType.ReducedPotential in final_array
Exemple #10
0
def test_run_openmm_simulation():

    thermodynamic_state = ThermodynamicState(298 * unit.kelvin,
                                             1.0 * unit.atmosphere)

    with tempfile.TemporaryDirectory() as directory:

        coordinate_path, system_path = _setup_dummy_system(directory)

        npt_equilibration = OpenMMSimulation("npt_equilibration")
        npt_equilibration.steps_per_iteration = 2
        npt_equilibration.output_frequency = 1
        npt_equilibration.thermodynamic_state = thermodynamic_state
        npt_equilibration.input_coordinate_file = coordinate_path
        npt_equilibration.system_path = system_path
        npt_equilibration.execute(directory, ComputeResources())

        assert path.isfile(npt_equilibration.output_coordinate_file)
        assert path.isfile(npt_equilibration.trajectory_file_path)
        assert path.isfile(npt_equilibration.statistics_file_path)
Exemple #11
0
def data_set_from_data_frame(data_frame):
    """Converts a `pandas.DataFrame` to a `PhysicalPropertyDataSet` object.
    See the `PhysicalPropertyDataSet.to_pandas()` function for information
    on the required columns.

    Parameters
    ----------
    data_frame: pandas.DataFrame
        The data frame to convert.

    Returns
    -------
    PhysicalPropertyDataSet
        The converted data set.
    """

    return_value = PhysicalPropertyDataSet()

    if len(data_frame) == 0:
        return return_value

    # Make sure the base columns are present.
    required_base_columns = [
        "Temperature (K)",
        "Pressure (kPa)",
        "Phase",
        "N Components",
        "Source",
    ]

    assert all(x in data_frame for x in required_base_columns)

    # Make sure the substance columns are present.
    max_components = max(int(x) for x in data_frame["N Components"])
    assert max_components > 0

    required_components_columns = [
        x for i in range(max_components) for x in [
            f"Component {i + 1}",
            f"Role {i + 1}",
            f"Mole Fraction {i + 1}",
            f"Exact Amount {i + 1}",
        ]
    ]

    assert all(x in data_frame for x in required_components_columns)

    property_types = []

    for column_name in data_frame:

        if " Value" not in column_name:
            continue

        column_name_split = column_name.split(" ")

        assert len(column_name_split) >= 2

        property_type = getattr(evaluator.properties, column_name_split[0])
        property_types.append(property_type)

    assert len(property_types) > 0

    # Make sure we don't have duplicate property columns.
    assert len(set(property_types)) == len(property_types)

    properties = []

    for _, row in data_frame.iterrows():

        # Create the substance from the component columns
        number_of_components = row["N Components"]

        substance = Substance()

        for component_index in range(number_of_components):

            smiles = row[f"Component {component_index + 1}"]
            role = Component.Role[row[f"Role {component_index + 1}"]]
            mole_fraction = row[f"Mole Fraction {component_index + 1}"]
            exact_amount = row[f"Exact Amount {component_index + 1}"]

            assert not numpy.isnan(mole_fraction) or not numpy.isnan(
                exact_amount)

            component = Component(smiles, role)

            if not numpy.isnan(mole_fraction):
                substance.add_component(component, MoleFraction(mole_fraction))
            if not numpy.isnan(exact_amount):
                substance.add_component(component, ExactAmount(exact_amount))

        # Extract the state
        pressure = row["Pressure (kPa)"] * unit.kilopascal
        temperature = row["Temperature (K)"] * unit.kelvin

        thermodynamic_state = ThermodynamicState(temperature, pressure)

        phase = PropertyPhase.from_string(row["Phase"])

        source = MeasurementSource(reference=row["Source"])

        for property_type in property_types:

            default_unit = property_type.default_unit()
            value_header = f"{property_type.__name__} Value ({default_unit:~})"

            if numpy.isnan(row[value_header]):
                continue

            value = row[value_header] * default_unit
            uncertainty = 0.0 * default_unit

            physical_property = property_type(
                thermodynamic_state=thermodynamic_state,
                phase=phase,
                substance=substance,
                value=value,
                uncertainty=uncertainty,
                source=source,
            )

            properties.append(physical_property)

    return_value.add_properties(*properties)
    return return_value
    )

    assert state_a == state_d
    assert state_c != state_d

    state_e = ThermodynamicState(
        temperature=0.9995 * unit.kelvin, pressure=0.9995 * unit.pascals
    )

    assert state_a == state_e


@pytest.mark.parametrize(
    "state",
    [
        ThermodynamicState(temperature=1.0 * unit.kelvin),
        ThermodynamicState(temperature=1.0 * unit.kelvin, pressure=1.0 * unit.pascals),
    ],
)
def test_state_valid_checks(state):
    state.validate()


@pytest.mark.parametrize(
    "state",
    [
        ThermodynamicState(),
        ThermodynamicState(temperature=1.0 * unit.pascals),
        ThermodynamicState(temperature=1.0 * unit.pascals, pressure=1.0 * unit.kelvin),
        ThermodynamicState(temperature=-1.0 * unit.kelvin),
    ],
Exemple #13
0
def test_density_dielectric_merging(workflow_merge_function):

    substance = Substance.from_components("C")

    density = evaluator.properties.Density(
        thermodynamic_state=ThermodynamicState(
            temperature=298 * unit.kelvin, pressure=1 * unit.atmosphere
        ),
        phase=PropertyPhase.Liquid,
        substance=substance,
        value=10 * unit.gram / unit.mole,
        uncertainty=1 * unit.gram / unit.mole,
    )

    dielectric = evaluator.properties.DielectricConstant(
        thermodynamic_state=ThermodynamicState(
            temperature=298 * unit.kelvin, pressure=1 * unit.atmosphere
        ),
        phase=PropertyPhase.Liquid,
        substance=substance,
        value=10 * unit.gram / unit.mole,
        uncertainty=1 * unit.gram / unit.mole,
    )

    density_schema = density.default_simulation_schema().workflow_schema
    dielectric_schema = dielectric.default_simulation_schema().workflow_schema

    density_metadata = Workflow.generate_default_metadata(
        density, "smirnoff99Frosst-1.1.0.offxml", []
    )

    dielectric_metadata = Workflow.generate_default_metadata(
        density, "smirnoff99Frosst-1.1.0.offxml", []
    )

    density_workflow = Workflow(density_metadata)
    density_workflow.schema = density_schema

    dielectric_workflow = Workflow(dielectric_metadata)
    dielectric_workflow.schema = dielectric_schema

    workflow_merge_function(density_workflow, dielectric_workflow)

    density_workflow_graph = density_workflow.to_graph()
    dielectric_workflow_graph = dielectric_workflow.to_graph()

    dependants_graph_a = density_workflow_graph._protocol_graph._build_dependants_graph(
        density_workflow_graph.protocols, False, apply_reduction=True
    )
    dependants_graph_b = dielectric_workflow_graph._protocol_graph._build_dependants_graph(
        dielectric_workflow_graph.protocols, False, apply_reduction=True
    )

    merge_order_a = graph.topological_sort(dependants_graph_a)
    merge_order_b = graph.topological_sort(dependants_graph_b)

    for protocol_id_A, protocol_id_B in zip(merge_order_a, merge_order_b):

        if (
            protocol_id_A.find("extract_traj") < 0
            and protocol_id_A.find("extract_stats") < 0
        ):

            assert (
                density_workflow.protocols[protocol_id_A].schema.json()
                == dielectric_workflow.protocols[protocol_id_B].schema.json()
            )

        else:

            assert (
                density_workflow.protocols[protocol_id_A].schema.json()
                != dielectric_workflow.protocols[protocol_id_B].schema.json()
            )
Exemple #14
0
def create_dummy_simulation_data(
    directory_path,
    substance,
    force_field_id="dummy_ff_id",
    coordinate_file_name="output.pdb",
    trajectory_file_name="trajectory.dcd",
    statistics_file_name="statistics.csv",
    statistical_inefficiency=1.0,
    phase=PropertyPhase.Liquid,
    number_of_molecules=1,
    calculation_id=None,
):
    """Creates a dummy `StoredSimulationData` object and
    the corresponding data directory.

    Parameters
    ----------
    directory_path: str
        The path to the dummy data directory to create.
    substance: Substance
    force_field_id
    coordinate_file_name
    trajectory_file_name
    statistics_file_name
    statistical_inefficiency
    phase
    number_of_molecules
    calculation_id

    Returns
    -------
    StoredSimulationData
        The dummy stored data object.
    """

    os.makedirs(directory_path, exist_ok=True)

    data = StoredSimulationData()

    data.substance = substance
    data.force_field_id = force_field_id
    data.thermodynamic_state = ThermodynamicState(1.0 * unit.kelvin)
    data.property_phase = phase

    data.coordinate_file_name = coordinate_file_name
    data.trajectory_file_name = trajectory_file_name
    data.statistics_file_name = statistics_file_name

    with open(os.path.join(directory_path, coordinate_file_name), "w") as file:
        file.write("")
    with open(os.path.join(directory_path, trajectory_file_name), "w") as file:
        file.write("")
    with open(os.path.join(directory_path, statistics_file_name), "w") as file:
        file.write("")

    data.statistical_inefficiency = statistical_inefficiency

    data.number_of_molecules = number_of_molecules

    if calculation_id is None:
        calculation_id = str(uuid.uuid4())

    data.source_calculation_id = calculation_id

    return data
Exemple #15
0
def test_storage_retrieval():
    # Create some dummy properties
    methane = Substance.from_components("C")
    methanol = Substance.from_components("CO")
    mixture = Substance.from_components("C", "CO")
    # Add extra unused data to make sure the wrong data isn't
    # Being retrieved.
    unused_pure = Substance.from_components("CCO")
    unused_mixture = Substance.from_components("CCO", "CO")

    data_to_store = [
        (methane, PropertyPhase.Liquid, 1000),
        (methanol, PropertyPhase.Liquid, 1000),
        (methanol, PropertyPhase.Gas, 1),
        (mixture, PropertyPhase.Liquid, 1000),
        (unused_pure, PropertyPhase.Liquid, 1000),
        (unused_mixture, PropertyPhase.Liquid, 1000),
    ]
    storage_keys = {}

    state = ThermodynamicState(temperature=1.0 * unit.kelvin)

    properties = [
        # Properties with a full system query.
        Density(
            value=1.0 * unit.gram / unit.litre,
            substance=methanol,
            thermodynamic_state=state,
        ),
        DielectricConstant(
            value=1.0 * unit.dimensionless, substance=methane, thermodynamic_state=state
        ),
        # Properties with a multi-component query.
        EnthalpyOfVaporization(
            value=1.0 * unit.joule / unit.mole,
            substance=methanol,
            thermodynamic_state=state,
        ),
        # Property with a multi-phase query.
        EnthalpyOfMixing(
            value=1.0 * unit.joule / unit.mole,
            substance=mixture,
            thermodynamic_state=state,
        ),
        ExcessMolarVolume(
            value=1.0 * unit.meter ** 3, substance=mixture, thermodynamic_state=state
        ),
    ]
    expected_data_per_property = {
        Density: {"full_system_data": [(methanol, PropertyPhase.Liquid, 1000)]},
        DielectricConstant: {
            "full_system_data": [(methane, PropertyPhase.Liquid, 1000)]
        },
        EnthalpyOfVaporization: {
            "liquid_data": [(methanol, PropertyPhase.Liquid, 1000)],
            "gas_data": [(methanol, PropertyPhase.Gas, 1)],
        },
        EnthalpyOfMixing: {
            "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)],
            "component_data": [
                [(methane, PropertyPhase.Liquid, 1000)],
                [(methanol, PropertyPhase.Liquid, 1000)],
            ],
        },
        ExcessMolarVolume: {
            "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)],
            "component_data": [
                [(methane, PropertyPhase.Liquid, 1000)],
                [(methanol, PropertyPhase.Liquid, 1000)],
            ],
        },
    }

    force_field = SmirnoffForceFieldSource.from_path("smirnoff99Frosst-1.1.0.offxml")

    with tempfile.TemporaryDirectory() as base_directory:

        # Create a storage backend with some dummy data.
        backend_directory = os.path.join(base_directory, "storage_dir")
        storage_backend = LocalFileStorage(backend_directory)

        force_field_id = storage_backend.store_force_field(force_field)

        for substance, phase, n_mol in data_to_store:

            data_directory = os.path.join(base_directory, substance.identifier)
            data = create_dummy_simulation_data(
                data_directory,
                substance=substance,
                force_field_id=force_field_id,
                phase=phase,
                number_of_molecules=n_mol,
            )
            storage_key = storage_backend.store_object(data, data_directory)
            storage_keys[(substance, phase, n_mol)] = storage_key

        for physical_property in properties:

            schema = registered_calculation_schemas["ReweightingLayer"][
                physical_property.__class__.__name__
            ]

            if callable(schema):
                schema = schema()

            # noinspection PyProtectedMember
            metadata = ReweightingLayer._get_workflow_metadata(
                base_directory, physical_property, "", [], storage_backend, schema,
            )

            assert metadata is not None

            expected_data_list = expected_data_per_property[physical_property.__class__]

            for data_key in expected_data_list:

                assert data_key in metadata

                stored_metadata = metadata[data_key]
                expected_metadata = expected_data_list[data_key]

                assert len(stored_metadata) == len(expected_metadata)

                if isinstance(stored_metadata[0], list):
                    # Flatten any lists of lists.
                    stored_metadata = [
                        item for sublist in stored_metadata for item in sublist
                    ]
                    expected_metadata = [
                        item for sublist in expected_metadata for item in sublist
                    ]

                metadata_storage_keys = [
                    os.path.basename(x) for x, _, _ in stored_metadata
                ]
                expected_storage_keys = [storage_keys[x] for x in expected_metadata]

                assert sorted(metadata_storage_keys) == sorted(expected_storage_keys)
Exemple #16
0
def test_to_pandas():
    """A test to ensure that data sets are convertable to pandas objects."""

    source = CalculationSource("Dummy", {})

    pure_substance = Substance.from_components("C")
    binary_substance = Substance.from_components("C", "O")

    data_set = PhysicalPropertyDataSet()

    for temperature in [
            298 * unit.kelvin, 300 * unit.kelvin, 302 * unit.kelvin
    ]:

        thermodynamic_state = ThermodynamicState(temperature=temperature,
                                                 pressure=1.0 *
                                                 unit.atmosphere)

        density_property = Density(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=pure_substance,
            value=1 * unit.gram / unit.milliliter,
            uncertainty=0.11 * unit.gram / unit.milliliter,
            source=source,
        )

        dielectric_property = DielectricConstant(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=pure_substance,
            value=1 * unit.dimensionless,
            uncertainty=0.11 * unit.dimensionless,
            source=source,
        )

        data_set.add_properties(density_property)
        data_set.add_properties(dielectric_property)

    for temperature in [
            298 * unit.kelvin, 300 * unit.kelvin, 302 * unit.kelvin
    ]:

        thermodynamic_state = ThermodynamicState(temperature=temperature,
                                                 pressure=1.0 *
                                                 unit.atmosphere)

        enthalpy_property = EnthalpyOfMixing(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=binary_substance,
            value=1 * unit.kilojoules / unit.mole,
            uncertainty=0.11 * unit.kilojoules / unit.mole,
            source=source,
        )

        excess_property = ExcessMolarVolume(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=binary_substance,
            value=1 * unit.meter**3 / unit.mole,
            uncertainty=0.11 * unit.meter**3 / unit.mole,
            source=source,
        )

        data_set.add_properties(enthalpy_property)
        data_set.add_properties(excess_property)

    data_set_pandas = data_set.to_pandas()

    required_columns = [
        "Temperature (K)",
        "Pressure (kPa)",
        "Phase",
        "N Components",
        "Source",
        "Component 1",
        "Role 1",
        "Mole Fraction 1",
        "Exact Amount 1",
        "Component 2",
        "Role 2",
        "Mole Fraction 2",
        "Exact Amount 2",
    ]

    assert all(x in data_set_pandas for x in required_columns)

    assert data_set_pandas is not None
    assert data_set_pandas.shape == (12, 21)

    data_set_without_na = data_set_pandas.dropna(axis=1, how="all")
    assert data_set_without_na.shape == (12, 19)