def valid_generalized_mean_property_predictor_data(): """Produce valid data used for tests.""" from citrine.informatics.descriptors import FormulationDescriptor from citrine.informatics.data_sources import GemTableDataSource formulation_descriptor = FormulationDescriptor('simple mixture') return dict(module_type='PREDICTOR', status='VALID', status_info=[], archived=False, display_name='Mean property predictor', schema_id='29e53222-3217-4f81-b3b8-4197a8211ade', id=str(uuid.uuid4()), config=dict( type='GeneralizedMeanProperty', name='Mean property predictor', description='Computes mean ingredient properties', input=formulation_descriptor.dump(), properties=['density'], p=2, training_data=[ GemTableDataSource(uuid.uuid4(), 0, formulation_descriptor).dump() ], impute_properties=True, default_properties={'density': 1.0}, label='solvent'))
def valid_mean_property_predictor_data(): """Produce valid data used for tests.""" from citrine.informatics.descriptors import FormulationDescriptor, RealDescriptor from citrine.informatics.data_sources import GemTableDataSource formulation_descriptor = FormulationDescriptor('simple mixture') density = RealDescriptor(key='density', lower_bound=0, upper_bound=100, units='g/cm^3') return dict(module_type='PREDICTOR', status='READY', status_info=[], archived=False, display_name='Mean property predictor', id=str(uuid.uuid4()), config=dict( type='MeanProperty', name='Mean property predictor', description='Computes mean ingredient properties', input=formulation_descriptor.dump(), properties=[density.dump()], p=2, training_data=[ GemTableDataSource(uuid.uuid4(), 0, formulation_descriptor).dump() ], impute_properties=True, default_properties={'density': 1.0}, label='solvent'))
def valid_formulation_design_space_data(): """Produce valid formulation design space data.""" from citrine.informatics.constraints import IngredientCountConstraint from citrine.informatics.descriptors import FormulationDescriptor descriptor = FormulationDescriptor('formulation') constraint = IngredientCountConstraint(formulation_descriptor=descriptor, min=0, max=1) return dict(module_type='DESIGN_SPACE', status='VALIDATING', status_info=None, archived=True, display_name='formulation design space', id=str(uuid.uuid4()), config=dict(type='FormulationDesignSpace', name='formulation design space', description='formulates some things', formulation_descriptor=descriptor.dump(), ingredients=['foo'], labels={'bar': ['foo']}, constraints=[constraint.dump()], resolution=0.1))
def valid_ingredient_fractions_predictor_data(): """Produce valid data used for tests.""" from citrine.informatics.descriptors import FormulationDescriptor return dict(module_type='PREDICTOR', status='READY', status_info=[], archived=False, display_name='Ingredient fractions predictor', id=str(uuid.uuid4()), config=dict(type='IngredientFractions', name='Ingredient fractions predictor', description='Computes ingredient fractions', input=FormulationDescriptor('ingredients').dump(), ingredients=['Blue dye', 'Red dye']))
def test_deprecated_ingredients_to_simple_mixture(): """make sure deprecation warnings are issued.""" with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always") i2sm = IngredientsToSimpleMixturePredictor( name="deprecated", description="", output=FormulationDescriptor("formulation"), id_to_quantity={"quantity 1": RealDescriptor("foo", lower_bound=0, upper_bound=1, units="")}, labels={"label": {"foo"}} ) assert i2sm.name == "deprecated" assert i2sm.labels == {"label": {"foo"}} assert len(caught) == 1 w = caught[0] assert issubclass(w.category, DeprecationWarning)
def valid_label_fractions_predictor_data(): """Produce valid data used for tests.""" from citrine.informatics.descriptors import FormulationDescriptor return dict( module_type='PREDICTOR', status='READY', status_info=[], archived=False, display_name='Label fractions predictor', id=str(uuid.uuid4()), config=dict( type='LabelFractions', name='Label fractions predictor', description='Computes relative proportions of labeled ingredients', input=FormulationDescriptor('simple mixture').dump(), labels=['solvent']))
def test_deprecated_gmpp(): """Make sure deprecation warnings are issued""" with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always") gmpp = GeneralizedMeanPropertyPredictor( name='deprecated', description='p as float', input_descriptor=FormulationDescriptor('formulation'), properties=['foo'], p=2.0, impute_properties=False ) assert gmpp.p == 2 assert len(caught) == 2 for w in caught: assert issubclass(w.category, DeprecationWarning) msg = str(w.message) assert msg.startswith('p must be an integer') or \ msg.startswith('GeneralizedMeanPropertyPredictor is deprecated')
def valid_ing_to_simple_mixture_predictor_data(): """Produce valid data used for tests.""" from citrine.informatics.descriptors import FormulationDescriptor, RealDescriptor return dict(module_type='PREDICTOR', status='VALID', status_info=[], archived=False, display_name='Ingredients to simple mixture predictor', schema_id='873e4541-da8a-4698-a981-732c0c729c3d', id=str(uuid.uuid4()), config=dict( type='IngredientsToSimpleMixture', name='Ingredients to simple mixture predictor', description='Constructs mixtures from ingredients', output=FormulationDescriptor('simple mixture').dump(), id_to_quantity={ 'water': RealDescriptor('water quantity', 0, 1).dump(), 'salt': RealDescriptor('salt quantity', 0, 1).dump() }, labels={ 'solvent': ['water'], 'solute': ['salt'], }))
def test_formulation_deserialization(valid_formulation_design_space_data): """Ensure that a deserialized FormulationDesignSpace looks sane. Deserialization is done both directly (using FormulationDesignSpace) and polymorphically (using DesignSpace) """ expected_descriptor = FormulationDescriptor('formulation') expected_constraint = IngredientCountConstraint( formulation_descriptor=expected_descriptor, min=0, max=1) for designSpaceClass in [DesignSpace, FormulationDesignSpace]: design_space: FormulationDesignSpace = designSpaceClass.build( valid_formulation_design_space_data) assert design_space.name == 'formulation design space' assert design_space.description == 'formulates some things' assert design_space.formulation_descriptor.key == expected_descriptor.key assert design_space.ingredients == {'foo'} assert design_space.labels == {'bar': {'foo'}} assert len(design_space.constraints) == 1 actual_constraint: IngredientCountConstraint = next( iter(design_space.constraints)) assert actual_constraint.formulation_descriptor == expected_descriptor assert actual_constraint.min == expected_constraint.min assert actual_constraint.max == expected_constraint.max assert design_space.resolution == 0.1
def valid_simple_mixture_predictor_data(): """Produce valid data used for tests.""" from citrine.informatics.data_sources import GemTableDataSource from citrine.informatics.descriptors import FormulationDescriptor input_formulation = FormulationDescriptor('input formulation') output_formulation = FormulationDescriptor('output formulation') return dict( module_type='PREDICTOR', status='READY', status_info=[], archived=False, display_name='Simple mixture predictor', id=str(uuid.uuid4()), config=dict(type='SimpleMixture', name='Simple mixture predictor', description='simple mixture description', input=input_formulation.dump(), output=output_formulation.dump(), training_data=[ GemTableDataSource(uuid.uuid4(), 0, input_formulation).dump() ]), )
import pytest from citrine.informatics.data_sources import DataSource, CSVDataSource, GemTableDataSource from citrine.informatics.descriptors import RealDescriptor, FormulationDescriptor from citrine.resources.file_link import FileLink @pytest.fixture(params=[ CSVDataSource(FileLink("foo.spam", "http://example.com"), { "spam": RealDescriptor("eggs", lower_bound=0, upper_bound=1.0, units="") }, ["identifier"]), GemTableDataSource(uuid.uuid4(), 1), GemTableDataSource(uuid.uuid4(), "2"), GemTableDataSource(uuid.uuid4(), "2", FormulationDescriptor("formulation")), ]) def data_source(request): return request.param def test_deser_from_parent(data_source): # Serialize and deserialize the descriptors, making sure they are round-trip serializable data = data_source.dump() data_source_deserialized = DataSource.build(data) assert data_source == data_source_deserialized def test_invalid_eq(data_source): other = None assert not data_source == other
x = RealDescriptor("x", 0, 100, "") y = RealDescriptor("y", 0, 100, "") z = RealDescriptor("z", 0, 100, "") shear_modulus = RealDescriptor('Property~Shear modulus', lower_bound=0, upper_bound=100, units='GPa') youngs_modulus = RealDescriptor('Property~Young\'s modulus', lower_bound=0, upper_bound=100, units='GPa') poissons_ratio = RealDescriptor('Property~Poisson\'s ratio', lower_bound=-1, upper_bound=0.5, units='') formulation = FormulationDescriptor('formulation') formulation_output = FormulationDescriptor('output formulation') water_quantity = RealDescriptor('water quantity', 0, 1) salt_quantity = RealDescriptor('salt quantity', 0, 1) data_source = GemTableDataSource( uuid.UUID('e5c51369-8e71-4ec6-b027-1f92bdc14762'), 0) formulation_data_source = GemTableDataSource( uuid.UUID('6894a181-81d2-4304-9dfa-a6c5b114d8bc'), 0, formulation) @pytest.fixture def simple_predictor() -> SimpleMLPredictor: """Build a SimpleMLPredictor for testing.""" return SimpleMLPredictor( name='ML predictor', description='Predicts z from input x and latent variable y',
import pytest from citrine.informatics.descriptors import FormulationDescriptor from citrine.informatics.constraints import ScalarRangeConstraint, CategoricalConstraint, \ IngredientCountConstraint, IngredientFractionConstraint, LabelFractionConstraint from citrine.informatics.design_spaces import ProductDesignSpace, EnumeratedDesignSpace, FormulationDesignSpace from citrine.informatics.objectives import ScalarMaxObjective, ScalarMinObjective from citrine.informatics.processors import GridProcessor, EnumeratedProcessor from citrine.informatics.scores import LIScore, EIScore, EVScore from citrine.informatics.reports import ModelSummary, FeatureImportanceReport informatics_string_data = [ (IngredientCountConstraint( formulation_descriptor=FormulationDescriptor('x'), min=0, max=1), "<IngredientCountConstraint 'x'>"), (IngredientFractionConstraint( formulation_descriptor=FormulationDescriptor('x'), ingredient='y', min=0, max=1), "<IngredientFractionConstraint 'x'::'y'>"), (LabelFractionConstraint(formulation_descriptor=FormulationDescriptor('x'), label='y', min=0, max=1), "<LabelFractionConstraint 'x'::'y'>"), (ScalarRangeConstraint('z'), "<ScalarRangeConstraint 'z'>"), (CategoricalConstraint('x', []), "<CategoricalConstraint 'x'>"), (ProductDesignSpace(name='my design space', description='does some things'), "<ProductDesignSpace 'my design space'>"), (EnumeratedDesignSpace('enumerated', 'desc', [], []), "<EnumeratedDesignSpace 'enumerated'>"),
"""Tests for citrine.informatics.constraints.""" import pytest from citrine.informatics.constraints import * from citrine.informatics.descriptors import FormulationDescriptor formulation_descriptor = FormulationDescriptor('formulation') @pytest.fixture def scalar_range_constraint() -> ScalarRangeConstraint: """Build a ScalarRangeConstraint.""" return ScalarRangeConstraint(descriptor_key='z', min=1.0, max=10.0, min_inclusive=False) @pytest.fixture def categorical_constraint() -> AcceptableCategoriesConstraint: """Build a CategoricalConstraint.""" return AcceptableCategoriesConstraint(descriptor_key='x', acceptable_categories=['y', 'z']) @pytest.fixture def ingredient_fraction_constraint() -> IngredientFractionConstraint: """Build an IngredientFractionConstraint.""" return IngredientFractionConstraint( formulation_descriptor=formulation_descriptor, ingredient='foo',
def test_mean_feature_properties(): num_properties = 3 project = FakeProject(FakeDescriptorMethods(num_properties=num_properties)) smiles = MolecularStructureDescriptor("smiles") chem = ChemicalFormulaDescriptor("formula") formulation = FormulationDescriptor("formulation") mol_featurizer = MolecularStructureFeaturizer(name="", description="", descriptor=smiles) chem_featurizer = ChemicalFormulaFeaturizer(name="", description="", input_descriptor=chem) for featurizer in [mol_featurizer, chem_featurizer]: # A standard case. Here we request one model for all ingredients and one for a label. models, outputs = build_mean_feature_property_predictors( project=project, featurizer=featurizer, formulation_descriptor=formulation, p=7, impute_properties=False, make_all_ingredients_model=True, labels=["some label"] ) assert len(outputs) == num_properties * 2 assert len(models) == 2 for model in models: assert model.p == 7 assert model.impute_properties == False assert model.input_descriptor == formulation assert len(model.properties) == num_properties # It's not necessary for the models to be returned in this order, # but this is how the logic is currently set up. assert models[0].label is None assert models[1].label == "some label" # expect an error if the featurizer model is not of allowed type not_featurizer = LabelFractionsPredictor(name="", description="", input_descriptor=formulation, labels={"label"}) with pytest.raises(TypeError): build_mean_feature_property_predictors( project=project, featurizer=not_featurizer, formulation_descriptor=formulation, p=1 ) # expect an error if there are no mean property models requested with pytest.raises(ValueError): build_mean_feature_property_predictors( project=project, featurizer=mol_featurizer, formulation_descriptor=formulation, p=1, make_all_ingredients_model=False, labels=None ) # expect an error if the featurizer model returns no real properties no_props_project = FakeProject(FakeDescriptorMethods(num_properties=0)) with pytest.raises(RuntimeError): build_mean_feature_property_predictors( project=no_props_project, featurizer=mol_featurizer, formulation_descriptor=formulation, p=1 ) # expect an error if labels is not specified as a list with pytest.raises(TypeError): build_mean_feature_property_predictors( project=no_props_project, featurizer=mol_featurizer, formulation_descriptor=formulation, p=1, labels="not inside a list!" )
def valid_product_design_space_data(): """Produce valid product design space data.""" from citrine.informatics.descriptors import FormulationDescriptor return dict(module_type='DESIGN_SPACE', status='VALIDATING', status_info=None, archived=False, display_name='my design space', id=str(uuid.uuid4()), config=dict( type='ProductDesignSpace', name='my design space', description='does some things', subspaces=[ dict(module_type='DESIGN_SPACE', status='READY', id=str(uuid.uuid4()), archived=False, name='first subspace', instance=dict( type='FormulationDesignSpace', name='first subspace', description='', formulation_descriptor=FormulationDescriptor( 'X').dump(), ingredients=['foo'], labels={'bar': {'foo'}}, constraints=[], resolution=0.1)), dict(module_type='DESIGN_SPACE', status='CREATED', id=None, archived=False, name='second subspace', instance=dict( type='FormulationDesignSpace', name='second subspace', description='formulates some things', formulation_descriptor=FormulationDescriptor( 'Y').dump(), ingredients=['baz'], labels={}, constraints=[], resolution=0.1)) ], dimensions=[ dict(type='ContinuousDimension', template_id=str(uuid.uuid4()), descriptor=dict( type='Real', descriptor_key='alpha', units='', lower_bound=5.0, upper_bound=10.0, ), lower_bound=6.0, upper_bound=7.0), dict(type='EnumeratedDimension', template_id=str(uuid.uuid4()), descriptor=dict( type='Categorical', descriptor_key='color', descriptor_values=['blue', 'green', 'red'], ), list=['red']) ]))