def test_validate_data_set(): valid_property = Density( ThermodynamicState(298 * unit.kelvin, 1 * unit.atmosphere), PropertyPhase.Liquid, Substance.from_components("O"), 0.0 * unit.gram / unit.milliliter, 0.0 * unit.gram / unit.milliliter, ) data_set = PhysicalPropertyDataSet() data_set.add_properties(valid_property) data_set.validate() invalid_property = Density( ThermodynamicState(-1 * unit.kelvin, 1 * unit.atmosphere), PropertyPhase.Liquid, Substance.from_components("O"), 0.0 * unit.gram / unit.milliliter, 0.0 * unit.gram / unit.milliliter, ) with pytest.raises(AssertionError): data_set.add_properties(invalid_property) data_set.add_properties(invalid_property, validate=False) with pytest.raises(AssertionError): data_set.validate()
def test_generate_request_options(self): training_set = create_data_set("data-set-1", 1) target = create_evaluator_target("evaluator-target-1", [training_set.id]) target.allow_direct_simulation = True target.allow_reweighting = True target.n_molecules = 512 target.n_effective_samples = 10 request_options = OptimizationInputFactory._generate_request_options( target, training_set.to_evaluator()) assert request_options.calculation_layers == [ "ReweightingLayer", "SimulationLayer", ] assert request_options.calculation_schemas != UNDEFINED expected_simulation_schema = Density.default_simulation_schema( n_molecules=512) expected_reweighting_schema = Density.default_reweighting_schema( n_effective_samples=10) assert ( request_options.calculation_schemas["Density"] ["SimulationLayer"].json() == expected_simulation_schema.json()) assert ( request_options.calculation_schemas["Density"] ["ReweightingLayer"].json() == expected_reweighting_schema.json())
def _build_entry(*smiles: str) -> Density: """Builds a density data entry measured at ambient conditions and for a system containing the specified smiles patterns in equal amounts. Parameters ---------- smiles The smiles to build components for. Returns ------- The built components. """ assert len(smiles) > 0 return Density( thermodynamic_state=ThermodynamicState( temperature=298.15 * unit.kelvin, pressure=101.325 * unit.kilopascal, ), phase=PropertyPhase.Liquid, value=1.0 * Density.default_unit(), uncertainty=1.0 * Density.default_unit(), source=MeasurementSource(doi=" "), substance=Substance.from_components(*smiles), )
def estimated_reference_sets(): estimated_density = Density( thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin, pressure=1.0 * unit.atmosphere), phase=PropertyPhase.Liquid, substance=Substance.from_components("O", "CC=O"), value=1.0 * unit.kilogram / unit.meter**3, uncertainty=0.1 * unit.kilogram / unit.meter**3, ) estimated_density.id = "1" estimated_enthalpy = EnthalpyOfMixing( thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin, pressure=1.0 * unit.atmosphere), phase=PropertyPhase.Liquid, substance=Substance.from_components("O", "CC=O"), value=1.0 * unit.kilocalorie / unit.mole, uncertainty=0.1 * unit.kilojoule / unit.mole, ) estimated_enthalpy.id = "2" estimated_data_set = PhysicalPropertyDataSet() estimated_data_set.add_properties(estimated_density, estimated_enthalpy) reference_density = DataSetEntry( id=1, property_type="Density", temperature=298.15, pressure=101.325, value=0.001, std_error=0.0001, doi=" ", components=[ Component(smiles="O", mole_fraction=0.5), Component(smiles="CC=O", mole_fraction=0.5), ], ) reference_enthalpy = DataSetEntry( id=2, property_type="EnthalpyOfMixing", temperature=298.15, pressure=101.325, value=4.184, std_error=0.1, doi=" ", components=[ Component(smiles="O", mole_fraction=0.5), Component(smiles="CC=O", mole_fraction=0.5), ], ) reference_data_set = DataSet( id="ref", description=" ", authors=[Author(name=" ", email="*****@*****.**", institute=" ")], entries=[reference_density, reference_enthalpy], ) return estimated_data_set, reference_data_set
def test_physical_property_state_methods(): dummy_property = create_dummy_property(Density) property_state = dummy_property.__getstate__() recreated_property = Density() recreated_property.__setstate__(property_state) recreated_state = recreated_property.__getstate__() original_json = json.dumps(property_state, cls=TypedJSONEncoder) recreated_json = json.dumps(recreated_state, cls=TypedJSONEncoder) assert original_json == recreated_json
def create_filterable_data_set(): """Creates a dummy data with a diverse set of properties to be filtered, namely: - a liquid density measured at 298 K and 0.5 atm with 1 component containing only carbon. - a gaseous dielectric measured at 288 K and 1 atm with 2 components containing only nitrogen. - a solid EoM measured at 308 K and 1.5 atm with 3 components containing only oxygen. Returns ------- PhysicalPropertyDataSet The created data set. """ source = CalculationSource("Dummy", {}) carbon_substance = create_dummy_substance(number_of_components=1, elements=["C"]) density_property = Density( thermodynamic_state=ThermodynamicState(temperature=298 * unit.kelvin, pressure=0.5 * unit.atmosphere), phase=PropertyPhase.Liquid, substance=carbon_substance, value=1 * unit.gram / unit.milliliter, uncertainty=0.11 * unit.gram / unit.milliliter, source=source, ) nitrogen_substance = create_dummy_substance(number_of_components=2, elements=["N"]) dielectric_property = DielectricConstant( thermodynamic_state=ThermodynamicState(temperature=288 * unit.kelvin, pressure=1 * unit.atmosphere), phase=PropertyPhase.Gas, substance=nitrogen_substance, value=1 * unit.dimensionless, uncertainty=0.11 * unit.dimensionless, source=source, ) oxygen_substance = create_dummy_substance(number_of_components=3, elements=["O"]) enthalpy_property = EnthalpyOfMixing( thermodynamic_state=ThermodynamicState(temperature=308 * unit.kelvin, pressure=1.5 * unit.atmosphere), phase=PropertyPhase.Solid, substance=oxygen_substance, value=1 * unit.kilojoules / unit.mole, uncertainty=0.11 * unit.kilojoules / unit.mole, source=source, ) data_set = PhysicalPropertyDataSet() data_set.add_properties(density_property, dielectric_property, enthalpy_property) return data_set
def test_same_component_batching(): thermodynamic_state = ThermodynamicState(temperature=1.0 * unit.kelvin, pressure=1.0 * unit.atmosphere) data_set = PhysicalPropertyDataSet() data_set.add_properties( Density( thermodynamic_state=thermodynamic_state, substance=Substance.from_components("O", "C"), value=0.0 * unit.kilogram / unit.meter**3, ), EnthalpyOfVaporization( thermodynamic_state=thermodynamic_state, substance=Substance.from_components("O", "C"), value=0.0 * unit.kilojoule / unit.mole, ), Density( thermodynamic_state=thermodynamic_state, substance=Substance.from_components("O", "CO"), value=0.0 * unit.kilogram / unit.meter**3, ), EnthalpyOfVaporization( thermodynamic_state=thermodynamic_state, substance=Substance.from_components("O", "CO"), value=0.0 * unit.kilojoule / unit.mole, ), ) options = RequestOptions() submission = EvaluatorClient._Submission() submission.dataset = data_set submission.options = options with DaskLocalCluster() as calculation_backend: server = EvaluatorServer(calculation_backend) batches = server._batch_by_same_component(submission, "") assert len(batches) == 2 assert len(batches[0].queued_properties) == 2 assert len(batches[1].queued_properties) == 2
def test_from_pandas(): """A test to ensure that data sets may be created from pandas objects.""" thermodynamic_state = ThermodynamicState(temperature=298.15 * unit.kelvin, pressure=1.0 * unit.atmosphere) original_data_set = PhysicalPropertyDataSet() original_data_set.add_properties( Density( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=Substance.from_components("CO", "O"), value=1.0 * unit.kilogram / unit.meter**3, uncertainty=1.0 * unit.kilogram / unit.meter**3, source=MeasurementSource(doi="10.5281/zenodo.596537"), ), EnthalpyOfVaporization( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.from_string("Liquid + Gas"), substance=Substance.from_components("C"), value=2.0 * unit.kilojoule / unit.mole, source=MeasurementSource(reference="2"), ), DielectricConstant( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=Substance.from_components("C"), value=3.0 * unit.dimensionless, source=MeasurementSource(reference="3"), ), ) data_frame = original_data_set.to_pandas() recreated_data_set = PhysicalPropertyDataSet.from_pandas(data_frame) assert len(original_data_set) == len(recreated_data_set) for original_property in original_data_set: recreated_property = next(x for x in recreated_data_set if x.id == original_property.id) assert (original_property.thermodynamic_state == recreated_property.thermodynamic_state) assert original_property.phase == recreated_property.phase assert original_property.substance == recreated_property.substance assert numpy.isclose(original_property.value, recreated_property.value) if original_property.uncertainty == UNDEFINED: assert original_property.uncertainty == recreated_property.uncertainty else: assert numpy.isclose(original_property.uncertainty, recreated_property.uncertainty) assert original_property.source.doi == recreated_property.source.doi assert original_property.source.reference == recreated_property.source.reference
def test_filter_ionic_liquid(): thermodynamic_state = ThermodynamicState( temperature=298.15 * unit.kelvin, pressure=101.325 * unit.kilopascal, ) # Ensure ionic liquids are filtered. data_set = PhysicalPropertyDataSet() data_set.add_properties( Density( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, value=1.0 * Density.default_unit(), uncertainty=1.0 * Density.default_unit(), source=MeasurementSource(doi=" "), substance=Substance.from_components("[Na+].[Cl-]"), ), Density( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, value=1.0 * Density.default_unit(), uncertainty=1.0 * Density.default_unit(), source=MeasurementSource(doi=" "), substance=Substance.from_components("C"), ), ) data_frame = data_set.to_pandas() filtered_frame = FilterByIonicLiquid.apply( data_frame, FilterByIonicLiquidSchema(), ) assert len(filtered_frame) == 1
def simple_evaluator_data_set(): """Create a simple evaluator `PhysicalPropertyDataSet` which contains a simple binary density measurement. Returns ------- PhysicalPropertyDataSet """ evaluator_density = Density( thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin, pressure=1.0 * unit.atmosphere), phase=PropertyPhase.Liquid, substance=Substance.from_components("O", "CC=O"), value=1.0 * unit.kilogram / unit.meter**3, uncertainty=0.1 * unit.kilogram / unit.meter**3, source=MeasurementSource(doi="10.1000/xyz123"), ) evaluator_density.id = "1" evaluator_data_set = PhysicalPropertyDataSet() evaluator_data_set.add_properties(evaluator_density) return evaluator_data_set
def data_frame() -> pandas.DataFrame: data_set = PhysicalPropertyDataSet() data_set.add_properties( Density( thermodynamic_state=ThermodynamicState( temperature=298.15 * unit.kelvin, pressure=101.325 * unit.kilopascal, ), phase=PropertyPhase.Liquid, value=1.0 * Density.default_unit(), uncertainty=1.0 * Density.default_unit(), source=MeasurementSource(doi=" "), substance=Substance.from_components("C"), ), Density( thermodynamic_state=ThermodynamicState( temperature=305.15 * unit.kelvin, pressure=101.325 * unit.kilopascal, ), phase=PropertyPhase.Liquid, value=1.0 * Density.default_unit(), uncertainty=1.0 * Density.default_unit(), source=MeasurementSource(doi=" "), substance=Substance.from_components("C"), ), Density( thermodynamic_state=ThermodynamicState( temperature=298.15 * unit.kelvin, pressure=105.325 * unit.kilopascal, ), phase=PropertyPhase.Liquid, value=1.0 * Density.default_unit(), uncertainty=1.0 * Density.default_unit(), source=MeasurementSource(doi=" "), substance=Substance.from_components("C"), ), ) return data_set.to_pandas()
def define_data_set(reweighting: bool) -> PhysicalPropertyDataSet: # Define a common state to compute estimates at states = [ ThermodynamicState(temperature=296.15 * unit.kelvin, pressure=1.0 * unit.atmosphere), ThermodynamicState(temperature=298.15 * unit.kelvin, pressure=1.0 * unit.atmosphere), ThermodynamicState(temperature=300.15 * unit.kelvin, pressure=1.0 * unit.atmosphere), ] data_set = PhysicalPropertyDataSet() # Solvation free energies. if not reweighting: ethanol_substance = Substance.from_components("CCO") ethanol_substance.add_component( Component("CC=O", Component.Role.Solute), ExactAmount(1)) ethanal_substance = Substance.from_components("CC=O") ethanal_substance.add_component( Component("CCO", Component.Role.Solute), ExactAmount(1)) data_set.add_properties( SolvationFreeEnergy( thermodynamic_state=states[1], phase=PropertyPhase.Liquid, substance=ethanol_substance, value=0.0 * SolvationFreeEnergy.default_unit(), ), SolvationFreeEnergy( thermodynamic_state=states[1], phase=PropertyPhase.Liquid, substance=ethanal_substance, value=0.0 * SolvationFreeEnergy.default_unit(), ), *CurationWorkflow.apply( PhysicalPropertyDataSet(), CurationWorkflowSchema(component_schemas=[ ImportFreeSolvSchema(), FilterBySubstancesSchema(substances_to_include=[("O", "CO")]), ]), ), ) for state in states: # Excess properties. data_set.add_properties( ExcessMolarVolume( thermodynamic_state=state, phase=PropertyPhase.Liquid, substance=Substance.from_components("CC=O", "CCO"), value=0.0 * ExcessMolarVolume.default_unit(), ), EnthalpyOfMixing( thermodynamic_state=state, phase=PropertyPhase.Liquid, substance=Substance.from_components("CC=O", "CCO"), value=0.0 * EnthalpyOfMixing.default_unit(), ), ) # Pure properties data_set.add_properties( Density( thermodynamic_state=state, phase=PropertyPhase.Liquid, substance=Substance.from_components("CCO"), value=0.0 * Density.default_unit(), ), EnthalpyOfVaporization( thermodynamic_state=state, phase=PropertyPhase(PropertyPhase.Liquid | PropertyPhase.Gas), substance=Substance.from_components("CCO"), value=0.0 * EnthalpyOfVaporization.default_unit(), ), DielectricConstant( thermodynamic_state=state, phase=PropertyPhase.Liquid, substance=Substance.from_components("CCO"), value=0.0 * DielectricConstant.default_unit(), ), ) return data_set
def test_analysed_result_from_evaluator(): """Tests the `AnalysedResult.from_evaluator` function.""" expected_mean = 0.0 expected_std = numpy.random.rand() + 1.0 values = numpy.random.normal(expected_mean, expected_std, 1000) estimated_properties = [] reference_entries = [] for index, value in enumerate(values): property_id = index + 1 estimated_density = Density( thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin, pressure=1.0 * unit.atmosphere), phase=PropertyPhase.Liquid, substance=Substance.from_components("O"), value=value * Density.default_unit(), uncertainty=0.0 * Density.default_unit(), ) estimated_density.id = str(property_id) estimated_properties.append(estimated_density) reference_density = DataSetEntry( id=property_id, property_type="Density", temperature=298.15, pressure=101.325, value=expected_mean, std_error=None, doi=" ", components=[Component(smiles="O", mole_fraction=1.0)], ) reference_entries.append(reference_density) estimated_data_set = PhysicalPropertyDataSet() estimated_data_set.add_properties(*estimated_properties) reference_data_set = DataSet( id="ref", description=" ", authors=[Author(name=" ", email="*****@*****.**", institute=" ")], entries=reference_entries, ) analysis_environments = [ChemicalEnvironment.Aqueous] analysed_results = DataSetResult.from_evaluator( reference_data_set=reference_data_set, estimated_data_set=estimated_data_set, analysis_environments=analysis_environments, statistic_types=[StatisticType.RMSE], bootstrap_iterations=1000, ) assert len(analysed_results.result_entries) == len(estimated_properties) full_statistics = next( iter(x for x in analysed_results.statistic_entries if x.category is None)) assert full_statistics.property_type == "Density" assert full_statistics.n_components == 1 assert full_statistics.statistic_type == StatisticType.RMSE assert numpy.isclose(full_statistics.value, expected_std, rtol=0.10)
def test_storage_retrieval(): # Create some dummy properties methane = Substance.from_components("C") methanol = Substance.from_components("CO") mixture = Substance.from_components("C", "CO") # Add extra unused data to make sure the wrong data isn't # Being retrieved. unused_pure = Substance.from_components("CCO") unused_mixture = Substance.from_components("CCO", "CO") data_to_store = [ (methane, PropertyPhase.Liquid, 1000), (methanol, PropertyPhase.Liquid, 1000), (methanol, PropertyPhase.Gas, 1), (mixture, PropertyPhase.Liquid, 1000), (unused_pure, PropertyPhase.Liquid, 1000), (unused_mixture, PropertyPhase.Liquid, 1000), ] storage_keys = {} state = ThermodynamicState(temperature=1.0 * unit.kelvin) properties = [ # Properties with a full system query. Density( value=1.0 * unit.gram / unit.litre, substance=methanol, thermodynamic_state=state, ), DielectricConstant( value=1.0 * unit.dimensionless, substance=methane, thermodynamic_state=state ), # Properties with a multi-component query. EnthalpyOfVaporization( value=1.0 * unit.joule / unit.mole, substance=methanol, thermodynamic_state=state, ), # Property with a multi-phase query. EnthalpyOfMixing( value=1.0 * unit.joule / unit.mole, substance=mixture, thermodynamic_state=state, ), ExcessMolarVolume( value=1.0 * unit.meter ** 3, substance=mixture, thermodynamic_state=state ), ] expected_data_per_property = { Density: {"full_system_data": [(methanol, PropertyPhase.Liquid, 1000)]}, DielectricConstant: { "full_system_data": [(methane, PropertyPhase.Liquid, 1000)] }, EnthalpyOfVaporization: { "liquid_data": [(methanol, PropertyPhase.Liquid, 1000)], "gas_data": [(methanol, PropertyPhase.Gas, 1)], }, EnthalpyOfMixing: { "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)], "component_data": [ [(methane, PropertyPhase.Liquid, 1000)], [(methanol, PropertyPhase.Liquid, 1000)], ], }, ExcessMolarVolume: { "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)], "component_data": [ [(methane, PropertyPhase.Liquid, 1000)], [(methanol, PropertyPhase.Liquid, 1000)], ], }, } force_field = SmirnoffForceFieldSource.from_path("smirnoff99Frosst-1.1.0.offxml") with tempfile.TemporaryDirectory() as base_directory: # Create a storage backend with some dummy data. backend_directory = os.path.join(base_directory, "storage_dir") storage_backend = LocalFileStorage(backend_directory) force_field_id = storage_backend.store_force_field(force_field) for substance, phase, n_mol in data_to_store: data_directory = os.path.join(base_directory, substance.identifier) data = create_dummy_simulation_data( data_directory, substance=substance, force_field_id=force_field_id, phase=phase, number_of_molecules=n_mol, ) storage_key = storage_backend.store_object(data, data_directory) storage_keys[(substance, phase, n_mol)] = storage_key for physical_property in properties: schema = registered_calculation_schemas["ReweightingLayer"][ physical_property.__class__.__name__ ] if callable(schema): schema = schema() # noinspection PyProtectedMember metadata = ReweightingLayer._get_workflow_metadata( base_directory, physical_property, "", [], storage_backend, schema, ) assert metadata is not None expected_data_list = expected_data_per_property[physical_property.__class__] for data_key in expected_data_list: assert data_key in metadata stored_metadata = metadata[data_key] expected_metadata = expected_data_list[data_key] assert len(stored_metadata) == len(expected_metadata) if isinstance(stored_metadata[0], list): # Flatten any lists of lists. stored_metadata = [ item for sublist in stored_metadata for item in sublist ] expected_metadata = [ item for sublist in expected_metadata for item in sublist ] metadata_storage_keys = [ os.path.basename(x) for x, _, _ in stored_metadata ] expected_storage_keys = [storage_keys[x] for x in expected_metadata] assert sorted(metadata_storage_keys) == sorted(expected_storage_keys)
def complete_evaluator_data_set(): """Create a more comprehensive `PhysicalPropertyDataSet` which contains one measurement for each of: * pure density * binary density * pure enthalpy of vaporization * binary enthalpy of mixing * binary excess molar volume * hydration free energy Returns ------- PhysicalPropertyDataSet """ thermodynamic_state = ThermodynamicState(298.15 * unit.kelvin, pressure=1.0 * unit.atmosphere) source = MeasurementSource(doi="10.1000/xyz123") solvation_substance = Substance() solvation_substance.add_component(Component("O"), MoleFraction(1.0)) solvation_substance.add_component(Component("CCCO"), ExactAmount(1)) evaluator_properties = [ Density( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=Substance.from_components("O"), value=1.0 * unit.kilogram / unit.meter**3, uncertainty=0.1 * unit.kilogram / unit.meter**3, source=source, ), Density( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=Substance.from_components("O", "CC=O"), value=1.0 * unit.kilogram / unit.meter**3, uncertainty=0.1 * unit.kilogram / unit.meter**3, source=source, ), EnthalpyOfVaporization( thermodynamic_state=thermodynamic_state, phase=PropertyPhase(PropertyPhase.Liquid | PropertyPhase.Gas), substance=Substance.from_components("CCO"), value=1.0 * EnthalpyOfVaporization.default_unit(), uncertainty=0.1 * EnthalpyOfVaporization.default_unit(), source=source, ), EnthalpyOfMixing( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=Substance.from_components("CCCCO", "CC(C=O)C"), value=1.0 * EnthalpyOfMixing.default_unit(), uncertainty=0.1 * EnthalpyOfMixing.default_unit(), source=source, ), ExcessMolarVolume( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=Substance.from_components("C(=O)CCCO", "CCCCCC"), value=1.0 * ExcessMolarVolume.default_unit(), uncertainty=0.1 * ExcessMolarVolume.default_unit(), source=source, ), SolvationFreeEnergy( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=solvation_substance, value=1.0 * SolvationFreeEnergy.default_unit(), uncertainty=0.1 * SolvationFreeEnergy.default_unit(), source=source, ), ] for index, evaluator_property in enumerate(evaluator_properties): evaluator_property.id = str(index + 1) evaluator_data_set = PhysicalPropertyDataSet() evaluator_data_set.add_properties(*evaluator_properties) return evaluator_data_set
def test_reindex_data_set(): """Tests that the ``reindex_data_set`` function behaves as expected.""" setup_timestamp_logging(logging.INFO) evaluator_data_set = PhysicalPropertyDataSet() evaluator_data_set.add_properties( Density( thermodynamic_state=ThermodynamicState( temperature=298.15 * unit.kelvin, pressure=1.0 * unit.atmosphere), phase=PropertyPhase.Liquid, substance=substances.Substance.from_components("O"), value=1.0 * Density.default_unit(), uncertainty=1.0 * Density.default_unit(), ), Density( thermodynamic_state=ThermodynamicState( temperature=298.15 * unit.kelvin, pressure=1.0 * unit.atmosphere), phase=PropertyPhase.Liquid, substance=substances.Substance.from_components("C", "O"), value=1.0 * Density.default_unit(), uncertainty=1.0 * Density.default_unit(), ), Density( thermodynamic_state=ThermodynamicState( temperature=300.0 * unit.kelvin, pressure=1.0 * unit.atmosphere), phase=PropertyPhase.Liquid, substance=substances.Substance.from_components("C", "O"), value=1.0 * Density.default_unit(), uncertainty=1.0 * Density.default_unit(), ), ) data_set = DataSet( id="data-set", description=" ", authors=[Author(name=" ", email="*****@*****.**", institute=" ")], entries=[ DataSetEntry( id=1, property_type="Density", temperature=298.15, pressure=101.325, value=1.0, std_error=1.0, doi=" ", components=[ Component(smiles="O", mole_fraction=0.5), Component(smiles="C", mole_fraction=0.5), ], ), DataSetEntry( id=2, property_type="Density", temperature=298.15, pressure=101.325, value=1.0, std_error=1.0, doi=" ", components=[Component(smiles="O", mole_fraction=1.0)], ), ], ) un_indexed_id = evaluator_data_set.properties[2].id reindex_data_set(evaluator_data_set, data_set) assert evaluator_data_set.properties[0].id == "2" assert evaluator_data_set.properties[1].id == "1" assert evaluator_data_set.properties[2].id == un_indexed_id data_set_collection = DataSetCollection(data_sets=[ DataSet( id="0", description=" ", authors=[Author(name=" ", email="*****@*****.**", institute=" ")], entries=[ DataSetEntry( id=3, property_type="Density", temperature=298.15, pressure=101.325, value=1.0, std_error=1.0, doi=" ", components=[ Component(smiles="O", mole_fraction=0.5), Component(smiles="C", mole_fraction=0.5), ], ) ], ), DataSet( id="1", description=" ", authors=[Author(name=" ", email="*****@*****.**", institute=" ")], entries=[ DataSetEntry( id=4, property_type="Density", temperature=298.15, pressure=101.325, value=1.0, std_error=1.0, doi=" ", components=[Component(smiles="O", mole_fraction=1.0)], ) ], ), ]) reindex_data_set(evaluator_data_set, data_set_collection) assert evaluator_data_set.properties[0].id == "4" assert evaluator_data_set.properties[1].id == "3" assert evaluator_data_set.properties[2].id == un_indexed_id
def test_to_pandas(): """A test to ensure that data sets are convertable to pandas objects.""" source = CalculationSource("Dummy", {}) pure_substance = Substance.from_components("C") binary_substance = Substance.from_components("C", "O") data_set = PhysicalPropertyDataSet() for temperature in [ 298 * unit.kelvin, 300 * unit.kelvin, 302 * unit.kelvin ]: thermodynamic_state = ThermodynamicState(temperature=temperature, pressure=1.0 * unit.atmosphere) density_property = Density( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=pure_substance, value=1 * unit.gram / unit.milliliter, uncertainty=0.11 * unit.gram / unit.milliliter, source=source, ) dielectric_property = DielectricConstant( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=pure_substance, value=1 * unit.dimensionless, uncertainty=0.11 * unit.dimensionless, source=source, ) data_set.add_properties(density_property) data_set.add_properties(dielectric_property) for temperature in [ 298 * unit.kelvin, 300 * unit.kelvin, 302 * unit.kelvin ]: thermodynamic_state = ThermodynamicState(temperature=temperature, pressure=1.0 * unit.atmosphere) enthalpy_property = EnthalpyOfMixing( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=binary_substance, value=1 * unit.kilojoules / unit.mole, uncertainty=0.11 * unit.kilojoules / unit.mole, source=source, ) excess_property = ExcessMolarVolume( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=binary_substance, value=1 * unit.meter**3 / unit.mole, uncertainty=0.11 * unit.meter**3 / unit.mole, source=source, ) data_set.add_properties(enthalpy_property) data_set.add_properties(excess_property) data_set_pandas = data_set.to_pandas() required_columns = [ "Id", "Temperature (K)", "Pressure (kPa)", "Phase", "N Components", "Source", "Component 1", "Role 1", "Mole Fraction 1", "Exact Amount 1", "Component 2", "Role 2", "Mole Fraction 2", "Exact Amount 2", ] assert all(x in data_set_pandas for x in required_columns) assert data_set_pandas is not None assert data_set_pandas.shape == (12, 22) data_set_without_na = data_set_pandas.dropna(axis=1, how="all") assert data_set_without_na.shape == (12, 20)