def enumerated_design_space() -> EnumeratedDesignSpace: """Build an EnumeratedDesignSpace for testing.""" x = RealDescriptor('x', lower_bound=0.0, upper_bound=1.0) color = CategoricalDescriptor('color', ['r', 'g', 'b']) data = [dict(x=0, color='r'), dict(x=1.0, color='b')] return EnumeratedDesignSpace('enumerated', 'desc', descriptors=[x, color], data=data)
def product_design_space() -> ProductDesignSpace: """Build a ProductDesignSpace for testing.""" alpha = RealDescriptor('alpha', 0, 100, "") beta = RealDescriptor('beta', 0, 100, "") gamma = CategoricalDescriptor('gamma', ['a', 'b', 'c']) dimensions = [ ContinuousDimension(alpha, 0, 10), ContinuousDimension(beta, 0, 10), EnumeratedDimension(gamma, ['a', 'c']) ] return ProductDesignSpace(name='my design space', description='does some things', dimensions=dimensions)
def test_joined_oversize_warnings(large_joint_design_space): """Test that oversized joined space warnings are raised""" with pytest.raises(UserWarning, match="239203125"): # Fail on warning (so code stops running) with warnings.catch_warnings(): warnings.simplefilter('error') delta = RealDescriptor('delta', 0, 100) epsilon = RealDescriptor('epsilon', 0, 100) zeta = CategoricalDescriptor('zeta', ['a', 'b', 'c']) design_grid = { 'delta': [0, 50, 100], 'epsilon': [0, 25, 50, 75, 100], 'zeta': ['a', 'b', 'c'] } basic_space_2 = enumerate_cartesian_product( design_grid=design_grid, descriptors=[delta, epsilon, zeta], name='basic space 2', description='') eta = RealDescriptor('eta', 0, 100) theta = RealDescriptor('theta', 0, 100) iota = CategoricalDescriptor('iota', ['a', 'b', 'c']) design_grid = { 'eta': [0, 50, 100], 'theta': [0, 25, 50, 75, 100], 'iota': ['a', 'b', 'c'] } basic_space_3 = enumerate_cartesian_product( design_grid=design_grid, descriptors=[eta, theta, iota], name='basic space 3', description='') cartesian_join_design_spaces(subspaces=[ basic_space_2, basic_space_3, large_joint_design_space ], name='too big join space', description='')
def basic_cartesian_space() -> EnumeratedDesignSpace: """Build basic cartesian space for testing.""" alpha = RealDescriptor('alpha', 0, 100) beta = RealDescriptor('beta', 0, 100) gamma = CategoricalDescriptor('gamma', ['a', 'b', 'c']) design_grid = { 'alpha': [0, 50, 100], 'beta': [0, 25, 50, 75, 100], 'gamma': ['a', 'b', 'c'] } basic_space = enumerate_cartesian_product(design_grid=design_grid, descriptors=[alpha, beta, gamma], name='basic space', description='') return basic_space
def template_to_descriptor(template: AttributeTemplate) -> Descriptor: """ Convert a GEMD attribute template into an AI Engine Descriptor. IntBounds cannot be converted because they have no matching descriptor type. CompositionBounds can only be converted when every component is an element, in which case they are converted to ChemicalFormulaDescriptors. Parameters ---------- template: AttributeTemplate Template to convert into a descriptor Returns ------- Descriptor Descriptor with a key matching the template name and type corresponding to the bounds """ bounds = template.bounds if isinstance(bounds, RealBounds): return RealDescriptor(key=template.name, lower_bound=bounds.lower_bound, upper_bound=bounds.upper_bound, units=bounds.default_units) if isinstance(bounds, CategoricalBounds): return CategoricalDescriptor(key=template.name, categories=bounds.categories) if isinstance(bounds, MolecularStructureBounds): return MolecularStructureDescriptor(key=template.name) if isinstance(bounds, CompositionBounds): if set(bounds.components).issubset(EmpiricalFormula.all_elements()): return ChemicalFormulaDescriptor(key=template.name) else: msg = "Cannot create descriptor for CompositionBounds with non-atomic components" raise NoEquivalentDescriptorError(msg) if isinstance(bounds, IntegerBounds): raise NoEquivalentDescriptorError( "Cannot create a descriptor for integer-valued data") raise ValueError("Template has unrecognized bounds: {}".format( type(bounds)))
def test_valid_template_conversions(): expected = [ (PropertyTemplate(name="density", bounds=RealBounds(lower_bound=0, upper_bound=100, default_units="g/cm^3")), density_desc), (ConditionTemplate( name="speed", bounds=CategoricalBounds(categories=["low", "high"])), CategoricalDescriptor(key="speed", categories=["low", "high"])), (ParameterTemplate(name="solvent", bounds=MolecularStructureBounds()), MolecularStructureDescriptor(key="solvent")), (PropertyTemplate(name="formula", bounds=CompositionBounds( components=EmpiricalFormula.all_elements())), ChemicalFormulaDescriptor(key="formula")) ] for tmpl, desc in expected: assert template_to_descriptor(tmpl) == desc
def from_predictor_responses(self, predictor: Predictor, inputs: List[Descriptor]): if isinstance(predictor, (MolecularStructureFeaturizer, ChemicalFormulaFeaturizer)): if isinstance(predictor, MolecularStructureFeaturizer): input_descriptor = predictor.descriptor else: input_descriptor = predictor.input_descriptor return [ RealDescriptor(f"{input_descriptor.key} real property {i}", lower_bound=0, upper_bound=1, units="") for i in range(self.num_properties) ] + [CategoricalDescriptor(f"{input_descriptor.key} categorical property", ["cat1", "cat2"])] elif isinstance(predictor, MeanPropertyPredictor): label_str = predictor.label or "all ingredients" return [ RealDescriptor( f"mean of {prop.key} for {label_str} in {predictor.input_descriptor.key}", lower_bound=0, upper_bound=1, units="" ) for prop in predictor.properties ]
def enumerated_dimension() -> EnumeratedDimension: """Build an EnumeratedDimension.""" color = CategoricalDescriptor('color', categories={'red', 'green', 'blue'}) return EnumeratedDimension(color, values=['red', 'red', 'blue'])
"""Tests for citrine.informatics.descriptors.""" import pytest from citrine.informatics.descriptors import RealDescriptor, Descriptor, InorganicDescriptor, CategoricalDescriptor @pytest.fixture(params=[ RealDescriptor('alpha', 0, 100), InorganicDescriptor('formula'), CategoricalDescriptor("my categorical", ["a", "b"]), CategoricalDescriptor("categorical", ["*"]) ]) def descriptor(request): return request.param def test_deser_from_parent(descriptor): # Serialize and deserialize the descriptors, making sure they are round-trip serializable descriptor_data = descriptor.dump() descriptor_deserialized = Descriptor.build(descriptor_data) assert descriptor == descriptor_deserialized def test_invalid_eq(descriptor): other = None assert not descriptor == other