コード例 #1
0
def valid_generalized_mean_property_predictor_data():
    """Produce valid data used for tests."""
    from citrine.informatics.descriptors import FormulationDescriptor
    from citrine.informatics.data_sources import GemTableDataSource
    formulation_descriptor = FormulationDescriptor('simple mixture')
    return dict(module_type='PREDICTOR',
                status='VALID',
                status_info=[],
                archived=False,
                display_name='Mean property predictor',
                schema_id='29e53222-3217-4f81-b3b8-4197a8211ade',
                id=str(uuid.uuid4()),
                config=dict(
                    type='GeneralizedMeanProperty',
                    name='Mean property predictor',
                    description='Computes mean ingredient properties',
                    input=formulation_descriptor.dump(),
                    properties=['density'],
                    p=2,
                    training_data=[
                        GemTableDataSource(uuid.uuid4(), 0,
                                           formulation_descriptor).dump()
                    ],
                    impute_properties=True,
                    default_properties={'density': 1.0},
                    label='solvent'))
コード例 #2
0
def valid_mean_property_predictor_data():
    """Produce valid data used for tests."""
    from citrine.informatics.descriptors import FormulationDescriptor, RealDescriptor
    from citrine.informatics.data_sources import GemTableDataSource
    formulation_descriptor = FormulationDescriptor('simple mixture')
    density = RealDescriptor(key='density',
                             lower_bound=0,
                             upper_bound=100,
                             units='g/cm^3')
    return dict(module_type='PREDICTOR',
                status='READY',
                status_info=[],
                archived=False,
                display_name='Mean property predictor',
                id=str(uuid.uuid4()),
                config=dict(
                    type='MeanProperty',
                    name='Mean property predictor',
                    description='Computes mean ingredient properties',
                    input=formulation_descriptor.dump(),
                    properties=[density.dump()],
                    p=2,
                    training_data=[
                        GemTableDataSource(uuid.uuid4(), 0,
                                           formulation_descriptor).dump()
                    ],
                    impute_properties=True,
                    default_properties={'density': 1.0},
                    label='solvent'))
コード例 #3
0
def valid_formulation_design_space_data():
    """Produce valid formulation design space data."""
    from citrine.informatics.constraints import IngredientCountConstraint
    from citrine.informatics.descriptors import FormulationDescriptor
    descriptor = FormulationDescriptor('formulation')
    constraint = IngredientCountConstraint(formulation_descriptor=descriptor,
                                           min=0,
                                           max=1)
    return dict(module_type='DESIGN_SPACE',
                status='VALIDATING',
                status_info=None,
                archived=True,
                display_name='formulation design space',
                id=str(uuid.uuid4()),
                config=dict(type='FormulationDesignSpace',
                            name='formulation design space',
                            description='formulates some things',
                            formulation_descriptor=descriptor.dump(),
                            ingredients=['foo'],
                            labels={'bar': ['foo']},
                            constraints=[constraint.dump()],
                            resolution=0.1))
コード例 #4
0
def valid_ingredient_fractions_predictor_data():
    """Produce valid data used for tests."""
    from citrine.informatics.descriptors import FormulationDescriptor
    return dict(module_type='PREDICTOR',
                status='READY',
                status_info=[],
                archived=False,
                display_name='Ingredient fractions predictor',
                id=str(uuid.uuid4()),
                config=dict(type='IngredientFractions',
                            name='Ingredient fractions predictor',
                            description='Computes ingredient fractions',
                            input=FormulationDescriptor('ingredients').dump(),
                            ingredients=['Blue dye', 'Red dye']))
コード例 #5
0
def test_deprecated_ingredients_to_simple_mixture():
    """make sure deprecation warnings are issued."""
    with warnings.catch_warnings(record=True) as caught:
        warnings.simplefilter("always")
        i2sm = IngredientsToSimpleMixturePredictor(
            name="deprecated",
            description="",
            output=FormulationDescriptor("formulation"),
            id_to_quantity={"quantity 1": RealDescriptor("foo", lower_bound=0, upper_bound=1, units="")},
            labels={"label": {"foo"}}
        )
        assert i2sm.name == "deprecated"
        assert i2sm.labels == {"label": {"foo"}}
        assert len(caught) == 1
        w = caught[0]
        assert issubclass(w.category, DeprecationWarning)
コード例 #6
0
def valid_label_fractions_predictor_data():
    """Produce valid data used for tests."""
    from citrine.informatics.descriptors import FormulationDescriptor
    return dict(
        module_type='PREDICTOR',
        status='READY',
        status_info=[],
        archived=False,
        display_name='Label fractions predictor',
        id=str(uuid.uuid4()),
        config=dict(
            type='LabelFractions',
            name='Label fractions predictor',
            description='Computes relative proportions of labeled ingredients',
            input=FormulationDescriptor('simple mixture').dump(),
            labels=['solvent']))
コード例 #7
0
def test_deprecated_gmpp():
    """Make sure deprecation warnings are issued"""
    with warnings.catch_warnings(record=True) as caught:
        warnings.simplefilter("always")
        gmpp = GeneralizedMeanPropertyPredictor(
            name='deprecated',
            description='p as float',
            input_descriptor=FormulationDescriptor('formulation'),
            properties=['foo'],
            p=2.0,
            impute_properties=False
        )
        assert gmpp.p == 2
        assert len(caught) == 2
        for w in caught:
            assert issubclass(w.category, DeprecationWarning)
            msg = str(w.message)
            assert msg.startswith('p must be an integer') or \
                   msg.startswith('GeneralizedMeanPropertyPredictor is deprecated')
コード例 #8
0
def valid_ing_to_simple_mixture_predictor_data():
    """Produce valid data used for tests."""
    from citrine.informatics.descriptors import FormulationDescriptor, RealDescriptor
    return dict(module_type='PREDICTOR',
                status='VALID',
                status_info=[],
                archived=False,
                display_name='Ingredients to simple mixture predictor',
                schema_id='873e4541-da8a-4698-a981-732c0c729c3d',
                id=str(uuid.uuid4()),
                config=dict(
                    type='IngredientsToSimpleMixture',
                    name='Ingredients to simple mixture predictor',
                    description='Constructs mixtures from ingredients',
                    output=FormulationDescriptor('simple mixture').dump(),
                    id_to_quantity={
                        'water': RealDescriptor('water quantity', 0, 1).dump(),
                        'salt': RealDescriptor('salt quantity', 0, 1).dump()
                    },
                    labels={
                        'solvent': ['water'],
                        'solute': ['salt'],
                    }))
コード例 #9
0
def test_formulation_deserialization(valid_formulation_design_space_data):
    """Ensure that a deserialized FormulationDesignSpace looks sane.
    Deserialization is done both directly (using FormulationDesignSpace)
    and polymorphically (using DesignSpace)
    """
    expected_descriptor = FormulationDescriptor('formulation')
    expected_constraint = IngredientCountConstraint(
        formulation_descriptor=expected_descriptor, min=0, max=1)
    for designSpaceClass in [DesignSpace, FormulationDesignSpace]:
        design_space: FormulationDesignSpace = designSpaceClass.build(
            valid_formulation_design_space_data)
        assert design_space.name == 'formulation design space'
        assert design_space.description == 'formulates some things'
        assert design_space.formulation_descriptor.key == expected_descriptor.key
        assert design_space.ingredients == {'foo'}
        assert design_space.labels == {'bar': {'foo'}}
        assert len(design_space.constraints) == 1
        actual_constraint: IngredientCountConstraint = next(
            iter(design_space.constraints))
        assert actual_constraint.formulation_descriptor == expected_descriptor
        assert actual_constraint.min == expected_constraint.min
        assert actual_constraint.max == expected_constraint.max
        assert design_space.resolution == 0.1
コード例 #10
0
def valid_simple_mixture_predictor_data():
    """Produce valid data used for tests."""
    from citrine.informatics.data_sources import GemTableDataSource
    from citrine.informatics.descriptors import FormulationDescriptor
    input_formulation = FormulationDescriptor('input formulation')
    output_formulation = FormulationDescriptor('output formulation')
    return dict(
        module_type='PREDICTOR',
        status='READY',
        status_info=[],
        archived=False,
        display_name='Simple mixture predictor',
        id=str(uuid.uuid4()),
        config=dict(type='SimpleMixture',
                    name='Simple mixture predictor',
                    description='simple mixture description',
                    input=input_formulation.dump(),
                    output=output_formulation.dump(),
                    training_data=[
                        GemTableDataSource(uuid.uuid4(), 0,
                                           input_formulation).dump()
                    ]),
    )
コード例 #11
0
import pytest

from citrine.informatics.data_sources import DataSource, CSVDataSource, GemTableDataSource
from citrine.informatics.descriptors import RealDescriptor, FormulationDescriptor
from citrine.resources.file_link import FileLink


@pytest.fixture(params=[
    CSVDataSource(FileLink("foo.spam", "http://example.com"), {
        "spam":
        RealDescriptor("eggs", lower_bound=0, upper_bound=1.0, units="")
    }, ["identifier"]),
    GemTableDataSource(uuid.uuid4(), 1),
    GemTableDataSource(uuid.uuid4(), "2"),
    GemTableDataSource(uuid.uuid4(), "2",
                       FormulationDescriptor("formulation")),
])
def data_source(request):
    return request.param


def test_deser_from_parent(data_source):
    # Serialize and deserialize the descriptors, making sure they are round-trip serializable
    data = data_source.dump()
    data_source_deserialized = DataSource.build(data)
    assert data_source == data_source_deserialized


def test_invalid_eq(data_source):
    other = None
    assert not data_source == other
コード例 #12
0
x = RealDescriptor("x", 0, 100, "")
y = RealDescriptor("y", 0, 100, "")
z = RealDescriptor("z", 0, 100, "")
shear_modulus = RealDescriptor('Property~Shear modulus',
                               lower_bound=0,
                               upper_bound=100,
                               units='GPa')
youngs_modulus = RealDescriptor('Property~Young\'s modulus',
                                lower_bound=0,
                                upper_bound=100,
                                units='GPa')
poissons_ratio = RealDescriptor('Property~Poisson\'s ratio',
                                lower_bound=-1,
                                upper_bound=0.5,
                                units='')
formulation = FormulationDescriptor('formulation')
formulation_output = FormulationDescriptor('output formulation')
water_quantity = RealDescriptor('water quantity', 0, 1)
salt_quantity = RealDescriptor('salt quantity', 0, 1)
data_source = GemTableDataSource(
    uuid.UUID('e5c51369-8e71-4ec6-b027-1f92bdc14762'), 0)
formulation_data_source = GemTableDataSource(
    uuid.UUID('6894a181-81d2-4304-9dfa-a6c5b114d8bc'), 0, formulation)


@pytest.fixture
def simple_predictor() -> SimpleMLPredictor:
    """Build a SimpleMLPredictor for testing."""
    return SimpleMLPredictor(
        name='ML predictor',
        description='Predicts z from input x and latent variable y',
コード例 #13
0
import pytest

from citrine.informatics.descriptors import FormulationDescriptor
from citrine.informatics.constraints import ScalarRangeConstraint, CategoricalConstraint, \
    IngredientCountConstraint, IngredientFractionConstraint, LabelFractionConstraint
from citrine.informatics.design_spaces import ProductDesignSpace, EnumeratedDesignSpace, FormulationDesignSpace
from citrine.informatics.objectives import ScalarMaxObjective, ScalarMinObjective
from citrine.informatics.processors import GridProcessor, EnumeratedProcessor
from citrine.informatics.scores import LIScore, EIScore, EVScore
from citrine.informatics.reports import ModelSummary, FeatureImportanceReport

informatics_string_data = [
    (IngredientCountConstraint(
        formulation_descriptor=FormulationDescriptor('x'), min=0,
        max=1), "<IngredientCountConstraint 'x'>"),
    (IngredientFractionConstraint(
        formulation_descriptor=FormulationDescriptor('x'),
        ingredient='y',
        min=0,
        max=1), "<IngredientFractionConstraint 'x'::'y'>"),
    (LabelFractionConstraint(formulation_descriptor=FormulationDescriptor('x'),
                             label='y',
                             min=0,
                             max=1), "<LabelFractionConstraint 'x'::'y'>"),
    (ScalarRangeConstraint('z'), "<ScalarRangeConstraint 'z'>"),
    (CategoricalConstraint('x', []), "<CategoricalConstraint 'x'>"),
    (ProductDesignSpace(name='my design space',
                        description='does some things'),
     "<ProductDesignSpace 'my design space'>"),
    (EnumeratedDesignSpace('enumerated', 'desc', [],
                           []), "<EnumeratedDesignSpace 'enumerated'>"),
コード例 #14
0
"""Tests for citrine.informatics.constraints."""
import pytest

from citrine.informatics.constraints import *
from citrine.informatics.descriptors import FormulationDescriptor

formulation_descriptor = FormulationDescriptor('formulation')


@pytest.fixture
def scalar_range_constraint() -> ScalarRangeConstraint:
    """Build a ScalarRangeConstraint."""
    return ScalarRangeConstraint(descriptor_key='z',
                                 min=1.0,
                                 max=10.0,
                                 min_inclusive=False)


@pytest.fixture
def categorical_constraint() -> AcceptableCategoriesConstraint:
    """Build a CategoricalConstraint."""
    return AcceptableCategoriesConstraint(descriptor_key='x',
                                          acceptable_categories=['y', 'z'])


@pytest.fixture
def ingredient_fraction_constraint() -> IngredientFractionConstraint:
    """Build an IngredientFractionConstraint."""
    return IngredientFractionConstraint(
        formulation_descriptor=formulation_descriptor,
        ingredient='foo',
コード例 #15
0
def test_mean_feature_properties():
    num_properties = 3
    project = FakeProject(FakeDescriptorMethods(num_properties=num_properties))
    smiles = MolecularStructureDescriptor("smiles")
    chem = ChemicalFormulaDescriptor("formula")
    formulation = FormulationDescriptor("formulation")
    mol_featurizer = MolecularStructureFeaturizer(name="", description="", descriptor=smiles)
    chem_featurizer = ChemicalFormulaFeaturizer(name="", description="", input_descriptor=chem)

    for featurizer in [mol_featurizer, chem_featurizer]:
        # A standard case. Here we request one model for all ingredients and one for a label.
        models, outputs = build_mean_feature_property_predictors(
            project=project,
            featurizer=featurizer,
            formulation_descriptor=formulation,
            p=7,
            impute_properties=False,
            make_all_ingredients_model=True,
            labels=["some label"]
        )

        assert len(outputs) == num_properties * 2
        assert len(models) == 2
        for model in models:
            assert model.p == 7
            assert model.impute_properties == False
            assert model.input_descriptor == formulation
            assert len(model.properties) == num_properties

    # It's not necessary for the models to be returned in this order,
    # but this is how the logic is currently set up.
    assert models[0].label is None
    assert models[1].label == "some label"


    # expect an error if the featurizer model is not of allowed type
    not_featurizer = LabelFractionsPredictor(name="", description="", input_descriptor=formulation, labels={"label"})
    with pytest.raises(TypeError):
        build_mean_feature_property_predictors(
            project=project,
            featurizer=not_featurizer,
            formulation_descriptor=formulation,
            p=1
        )

    # expect an error if there are no mean property models requested
    with pytest.raises(ValueError):
        build_mean_feature_property_predictors(
            project=project,
            featurizer=mol_featurizer,
            formulation_descriptor=formulation,
            p=1,
            make_all_ingredients_model=False,
            labels=None
        )

    # expect an error if the featurizer model returns no real properties
    no_props_project = FakeProject(FakeDescriptorMethods(num_properties=0))
    with pytest.raises(RuntimeError):
        build_mean_feature_property_predictors(
            project=no_props_project,
            featurizer=mol_featurizer,
            formulation_descriptor=formulation,
            p=1
        )

    # expect an error if labels is not specified as a list
    with pytest.raises(TypeError):
        build_mean_feature_property_predictors(
            project=no_props_project,
            featurizer=mol_featurizer,
            formulation_descriptor=formulation,
            p=1,
            labels="not inside a list!"
        )
コード例 #16
0
def valid_product_design_space_data():
    """Produce valid product design space data."""
    from citrine.informatics.descriptors import FormulationDescriptor
    return dict(module_type='DESIGN_SPACE',
                status='VALIDATING',
                status_info=None,
                archived=False,
                display_name='my design space',
                id=str(uuid.uuid4()),
                config=dict(
                    type='ProductDesignSpace',
                    name='my design space',
                    description='does some things',
                    subspaces=[
                        dict(module_type='DESIGN_SPACE',
                             status='READY',
                             id=str(uuid.uuid4()),
                             archived=False,
                             name='first subspace',
                             instance=dict(
                                 type='FormulationDesignSpace',
                                 name='first subspace',
                                 description='',
                                 formulation_descriptor=FormulationDescriptor(
                                     'X').dump(),
                                 ingredients=['foo'],
                                 labels={'bar': {'foo'}},
                                 constraints=[],
                                 resolution=0.1)),
                        dict(module_type='DESIGN_SPACE',
                             status='CREATED',
                             id=None,
                             archived=False,
                             name='second subspace',
                             instance=dict(
                                 type='FormulationDesignSpace',
                                 name='second subspace',
                                 description='formulates some things',
                                 formulation_descriptor=FormulationDescriptor(
                                     'Y').dump(),
                                 ingredients=['baz'],
                                 labels={},
                                 constraints=[],
                                 resolution=0.1))
                    ],
                    dimensions=[
                        dict(type='ContinuousDimension',
                             template_id=str(uuid.uuid4()),
                             descriptor=dict(
                                 type='Real',
                                 descriptor_key='alpha',
                                 units='',
                                 lower_bound=5.0,
                                 upper_bound=10.0,
                             ),
                             lower_bound=6.0,
                             upper_bound=7.0),
                        dict(type='EnumeratedDimension',
                             template_id=str(uuid.uuid4()),
                             descriptor=dict(
                                 type='Categorical',
                                 descriptor_key='color',
                                 descriptor_values=['blue', 'green', 'red'],
                             ),
                             list=['red'])
                    ]))