def enumerated_design_space() -> EnumeratedDesignSpace:
    """Build an EnumeratedDesignSpace for testing."""
    x = RealDescriptor('x', lower_bound=0.0, upper_bound=1.0)
    color = CategoricalDescriptor('color', ['r', 'g', 'b'])
    data = [dict(x=0, color='r'), dict(x=1.0, color='b')]
    return EnumeratedDesignSpace('enumerated',
                                 'desc',
                                 descriptors=[x, color],
                                 data=data)
Esempio n. 2
0
def product_design_space() -> ProductDesignSpace:
    """Build a ProductDesignSpace for testing."""
    alpha = RealDescriptor('alpha', 0, 100, "")
    beta = RealDescriptor('beta', 0, 100, "")
    gamma = CategoricalDescriptor('gamma', ['a', 'b', 'c'])
    dimensions = [
        ContinuousDimension(alpha, 0, 10),
        ContinuousDimension(beta, 0, 10),
        EnumeratedDimension(gamma, ['a', 'c'])
    ]
    return ProductDesignSpace(name='my design space', description='does some things', dimensions=dimensions)
def test_joined_oversize_warnings(large_joint_design_space):
    """Test that oversized joined space warnings are raised"""
    with pytest.raises(UserWarning, match="239203125"):
        # Fail on warning (so code stops running)
        with warnings.catch_warnings():
            warnings.simplefilter('error')

            delta = RealDescriptor('delta', 0, 100)
            epsilon = RealDescriptor('epsilon', 0, 100)
            zeta = CategoricalDescriptor('zeta', ['a', 'b', 'c'])
            design_grid = {
                'delta': [0, 50, 100],
                'epsilon': [0, 25, 50, 75, 100],
                'zeta': ['a', 'b', 'c']
            }
            basic_space_2 = enumerate_cartesian_product(
                design_grid=design_grid,
                descriptors=[delta, epsilon, zeta],
                name='basic space 2',
                description='')

            eta = RealDescriptor('eta', 0, 100)
            theta = RealDescriptor('theta', 0, 100)
            iota = CategoricalDescriptor('iota', ['a', 'b', 'c'])
            design_grid = {
                'eta': [0, 50, 100],
                'theta': [0, 25, 50, 75, 100],
                'iota': ['a', 'b', 'c']
            }
            basic_space_3 = enumerate_cartesian_product(
                design_grid=design_grid,
                descriptors=[eta, theta, iota],
                name='basic space 3',
                description='')

            cartesian_join_design_spaces(subspaces=[
                basic_space_2, basic_space_3, large_joint_design_space
            ],
                                         name='too big join space',
                                         description='')
def basic_cartesian_space() -> EnumeratedDesignSpace:
    """Build basic cartesian space for testing."""
    alpha = RealDescriptor('alpha', 0, 100)
    beta = RealDescriptor('beta', 0, 100)
    gamma = CategoricalDescriptor('gamma', ['a', 'b', 'c'])
    design_grid = {
        'alpha': [0, 50, 100],
        'beta': [0, 25, 50, 75, 100],
        'gamma': ['a', 'b', 'c']
    }
    basic_space = enumerate_cartesian_product(design_grid=design_grid,
                                              descriptors=[alpha, beta, gamma],
                                              name='basic space',
                                              description='')
    return basic_space
def template_to_descriptor(template: AttributeTemplate) -> Descriptor:
    """
    Convert a GEMD attribute template into an AI Engine Descriptor.

    IntBounds cannot be converted because they have no matching descriptor type.
    CompositionBounds can only be converted when every component is an element, in which case
    they are converted to ChemicalFormulaDescriptors.

    Parameters
    ----------
    template: AttributeTemplate
        Template to convert into a descriptor

    Returns
    -------
    Descriptor
        Descriptor with a key matching the template name and type corresponding to the bounds

    """
    bounds = template.bounds
    if isinstance(bounds, RealBounds):
        return RealDescriptor(key=template.name,
                              lower_bound=bounds.lower_bound,
                              upper_bound=bounds.upper_bound,
                              units=bounds.default_units)
    if isinstance(bounds, CategoricalBounds):
        return CategoricalDescriptor(key=template.name,
                                     categories=bounds.categories)
    if isinstance(bounds, MolecularStructureBounds):
        return MolecularStructureDescriptor(key=template.name)
    if isinstance(bounds, CompositionBounds):
        if set(bounds.components).issubset(EmpiricalFormula.all_elements()):
            return ChemicalFormulaDescriptor(key=template.name)
        else:
            msg = "Cannot create descriptor for CompositionBounds with non-atomic components"
            raise NoEquivalentDescriptorError(msg)
    if isinstance(bounds, IntegerBounds):
        raise NoEquivalentDescriptorError(
            "Cannot create a descriptor for integer-valued data")
    raise ValueError("Template has unrecognized bounds: {}".format(
        type(bounds)))
def test_valid_template_conversions():
    expected = [
        (PropertyTemplate(name="density",
                          bounds=RealBounds(lower_bound=0,
                                            upper_bound=100,
                                            default_units="g/cm^3")),
         density_desc),
        (ConditionTemplate(
            name="speed",
            bounds=CategoricalBounds(categories=["low", "high"])),
         CategoricalDescriptor(key="speed", categories=["low", "high"])),
        (ParameterTemplate(name="solvent", bounds=MolecularStructureBounds()),
         MolecularStructureDescriptor(key="solvent")),
        (PropertyTemplate(name="formula",
                          bounds=CompositionBounds(
                              components=EmpiricalFormula.all_elements())),
         ChemicalFormulaDescriptor(key="formula"))
    ]

    for tmpl, desc in expected:
        assert template_to_descriptor(tmpl) == desc
    def from_predictor_responses(self, predictor: Predictor, inputs: List[Descriptor]):
        if isinstance(predictor, (MolecularStructureFeaturizer, ChemicalFormulaFeaturizer)):
            if isinstance(predictor, MolecularStructureFeaturizer):
                input_descriptor = predictor.descriptor
            else:
                input_descriptor = predictor.input_descriptor
            return [
                RealDescriptor(f"{input_descriptor.key} real property {i}", lower_bound=0, upper_bound=1, units="")
                       for i in range(self.num_properties)
            ] + [CategoricalDescriptor(f"{input_descriptor.key} categorical property", ["cat1", "cat2"])]

        elif isinstance(predictor, MeanPropertyPredictor):
            label_str = predictor.label or "all ingredients"
            return [
                RealDescriptor(
                    f"mean of {prop.key} for {label_str} in {predictor.input_descriptor.key}",
                    lower_bound=0,
                    upper_bound=1,
                    units=""
                )
                for prop in predictor.properties
            ]
def enumerated_dimension() -> EnumeratedDimension:
    """Build an EnumeratedDimension."""
    color = CategoricalDescriptor('color', categories={'red', 'green', 'blue'})
    return EnumeratedDimension(color, values=['red', 'red', 'blue'])
"""Tests for citrine.informatics.descriptors."""
import pytest

from citrine.informatics.descriptors import RealDescriptor, Descriptor, InorganicDescriptor, CategoricalDescriptor


@pytest.fixture(params=[
    RealDescriptor('alpha', 0, 100),
    InorganicDescriptor('formula'),
    CategoricalDescriptor("my categorical", ["a", "b"]),
    CategoricalDescriptor("categorical", ["*"])
])
def descriptor(request):
    return request.param


def test_deser_from_parent(descriptor):
    # Serialize and deserialize the descriptors, making sure they are round-trip serializable
    descriptor_data = descriptor.dump()
    descriptor_deserialized = Descriptor.build(descriptor_data)
    assert descriptor == descriptor_deserialized


def test_invalid_eq(descriptor):
    other = None
    assert not descriptor == other