Esempio n. 1
0
def valid_expression_predictor_data():
    """Produce valid data used for tests."""
    from citrine.informatics.descriptors import RealDescriptor
    shear_modulus = RealDescriptor('Property~Shear modulus',
                                   lower_bound=0,
                                   upper_bound=100,
                                   units='GPa')
    youngs_modulus = RealDescriptor('Property~Young\'s modulus',
                                    lower_bound=0,
                                    upper_bound=100,
                                    units='GPa')
    poissons_ratio = RealDescriptor('Property~Poisson\'s ratio',
                                    lower_bound=-1,
                                    upper_bound=0.5,
                                    units='')
    return dict(
        module_type='PREDICTOR',
        status='VALID',
        status_info=[],
        archived=False,
        display_name='Expression predictor',
        schema_id='f1601161-bb98-4fa9-bdd2-a2a673547532',
        id=str(uuid.uuid4()),
        config=dict(
            type='AnalyticExpression',
            name='Expression predictor',
            description=
            'Computes shear modulus from Youngs modulus and Poissons ratio',
            expression='Y / (2 * (1 + v))',
            output=shear_modulus.dump(),
            aliases={
                'Y': youngs_modulus.dump(),
                'v': poissons_ratio.dump(),
            }))
Esempio n. 2
0
def valid_simple_ml_predictor_data():
    """Produce valid data used for tests."""
    from citrine.informatics.data_sources import GemTableDataSource
    from citrine.informatics.descriptors import RealDescriptor
    x = RealDescriptor("x", 0, 100, "")
    y = RealDescriptor("y", 0, 100, "")
    z = RealDescriptor("z", 0, 100, "")
    data_source = GemTableDataSource(
        table_id=uuid.UUID('e5c51369-8e71-4ec6-b027-1f92bdc14762'),
        table_version=2)
    return dict(
        module_type='PREDICTOR',
        status='VALID',
        status_info=[],
        archived=False,
        display_name='ML predictor',
        schema_id='08d20e5f-e329-4de0-a90a-4b5e36b91703',
        id=str(uuid.uuid4()),
        config=dict(
            type='Simple',
            name='ML predictor',
            description='Predicts z from input x and latent variable y',
            inputs=[x.dump()],
            outputs=[z.dump()],
            latent_variables=[y.dump()],
            training_data=[data_source.dump()]))
Esempio n. 3
0
def valid_predictor_report_data():
    """Produce valid data used for tests."""
    from citrine.informatics.descriptors import RealDescriptor
    x = RealDescriptor("x", 0, 1, "")
    y = RealDescriptor("y", 0, 100, "")
    z = RealDescriptor("z", 0, 101, "")
    return dict(
        id='7c2dda5d-675a-41b6-829c-e485163f0e43',
        module_id='31c7f311-6f3d-4a93-9387-94cc877f170c',
        status='OK',
        create_time='2020-04-23T15:46:26Z',
        update_time='2020-04-23T15:46:26Z',
        report=dict(models=[
            dict(name='GeneralLoloModel_1',
                 type='ML Model',
                 inputs=[x.key],
                 outputs=[y.key],
                 display_name='ML Model',
                 model_settings=[
                     dict(name='Algorithm',
                          value='Ensemble of non-linear estimators',
                          children=[
                              dict(name='Number of estimators',
                                   value=64,
                                   children=[]),
                              dict(name='Leaf model',
                                   value='Mean',
                                   children=[]),
                              dict(name='Use jackknife',
                                   value=True,
                                   children=[])
                          ])
                 ],
                 feature_importances=[
                     dict(response_key='y',
                          importances=dict(x=1.00),
                          top_features=5)
                 ],
                 predictor_configuration_name="Predict y from x with ML"),
            dict(name='GeneralLosslessModel_2',
                 type='Analytic Model',
                 inputs=[x.key, y.key],
                 outputs=[z.key],
                 display_name='GeneralLosslessModel_2',
                 model_settings=[
                     dict(name="Expression",
                          value="(z) <- (x + y)",
                          children=[])
                 ],
                 feature_importances=[],
                 predictor_configuration_name="Expression for z",
                 predictor_configuration_uid=
                 "249bf32c-6f3d-4a93-9387-94cc877f170c")
        ],
                    descriptors=[x.dump(), y.dump(),
                                 z.dump()]))
Esempio n. 4
0
def valid_deprecated_expression_predictor_data():
    """Produce valid data used for tests."""
    from citrine.informatics.descriptors import RealDescriptor
    shear_modulus = RealDescriptor('Property~Shear modulus',
                                   lower_bound=0,
                                   upper_bound=100,
                                   units='GPa')
    return dict(
        module_type='PREDICTOR',
        status='VALID',
        status_info=[],
        archived=False,
        display_name='Expression predictor',
        schema_id='866e72a6-0a01-4c5f-8c35-146eb2540166',
        id=str(uuid.uuid4()),
        config=dict(
            type='Expression',
            name='Expression predictor',
            description=
            'Computes shear modulus from Youngs modulus and Poissons ratio',
            expression='Y / (2 * (1 + v))',
            output=shear_modulus.dump(),
            aliases={
                'Y': "Property~Young's modulus",
                'v': "Property~Poisson's ratio",
            }))
def valid_mean_property_predictor_data():
    """Produce valid data used for tests."""
    from citrine.informatics.descriptors import FormulationDescriptor, RealDescriptor
    from citrine.informatics.data_sources import GemTableDataSource
    formulation_descriptor = FormulationDescriptor('simple mixture')
    density = RealDescriptor(key='density',
                             lower_bound=0,
                             upper_bound=100,
                             units='g/cm^3')
    return dict(module_type='PREDICTOR',
                status='READY',
                status_info=[],
                archived=False,
                display_name='Mean property predictor',
                id=str(uuid.uuid4()),
                config=dict(
                    type='MeanProperty',
                    name='Mean property predictor',
                    description='Computes mean ingredient properties',
                    input=formulation_descriptor.dump(),
                    properties=[density.dump()],
                    p=2,
                    training_data=[
                        GemTableDataSource(uuid.uuid4(), 0,
                                           formulation_descriptor).dump()
                    ],
                    impute_properties=True,
                    default_properties={'density': 1.0},
                    label='solvent'))
def invalid_predictor_data():
    """Produce valid data used for tests."""
    from citrine.informatics.descriptors import RealDescriptor
    x = RealDescriptor("x", 0, 100, "")
    y = RealDescriptor("y", 0, 100, "")
    z = RealDescriptor("z", 0, 100, "")
    return dict(module_type='PREDICTOR',
                status='INVALID',
                status_info=['Something is wrong', 'Very wrong'],
                archived=True,
                display_name='my predictor',
                id=str(uuid.uuid4()),
                config=dict(type='invalid',
                            name='my predictor',
                            description='does some things',
                            inputs=[x.dump(), y.dump()],
                            output=z.dump()))
def old_auto_ml_predictor_data(valid_gem_data_source_dict):
    """Produce valid data used for tests."""
    from citrine.informatics.descriptors import RealDescriptor
    x = RealDescriptor("x", 0, 100, "")
    y = RealDescriptor("y", 0, 100, "")
    z = RealDescriptor("z", 0, 100, "")
    return dict(module_type='PREDICTOR',
                status='READY',
                status_info=[],
                archived=False,
                display_name='AutoML predictor',
                id=str(uuid.uuid4()),
                config=dict(type='AutoML',
                            name='AutoML predictor',
                            description='Predicts z from input x',
                            inputs=[x.dump()],
                            responses=[z.dump()],
                            training_data=[valid_gem_data_source_dict]))
Esempio n. 8
0
def test_bad_predictor_report_build(valid_predictor_report_data):
    """Modify the predictor report to be non-ideal and check the behavior."""
    too_many_descriptors = deepcopy(valid_predictor_report_data)
    # Multiple descriptors with the same key
    other_x = RealDescriptor("x", 0, 100, "")
    too_many_descriptors['report']['descriptors'].append(other_x.dump())
    with warnings.catch_warnings(record=True) as w:
        Report.build(too_many_descriptors)
        assert len(w) == 1
        assert issubclass(w[-1].category, RuntimeWarning)

    # A key that appears in inputs and/or outputs, but there is no corresponding descriptor.
    # This is done twice for coverage, once to catch a missing input and once for a missing output.
    too_few_descriptors = deepcopy(valid_predictor_report_data)
    too_few_descriptors['report']['descriptors'].pop()
    with pytest.raises(RuntimeError):
        Report.build(too_few_descriptors)
    too_few_descriptors['report']['descriptors'] = []
    with pytest.raises(RuntimeError):
        Report.build(too_few_descriptors)