def valid_expression_predictor_data(): """Produce valid data used for tests.""" from citrine.informatics.descriptors import RealDescriptor shear_modulus = RealDescriptor('Property~Shear modulus', lower_bound=0, upper_bound=100, units='GPa') youngs_modulus = RealDescriptor('Property~Young\'s modulus', lower_bound=0, upper_bound=100, units='GPa') poissons_ratio = RealDescriptor('Property~Poisson\'s ratio', lower_bound=-1, upper_bound=0.5, units='') return dict( module_type='PREDICTOR', status='VALID', status_info=[], archived=False, display_name='Expression predictor', schema_id='f1601161-bb98-4fa9-bdd2-a2a673547532', id=str(uuid.uuid4()), config=dict( type='AnalyticExpression', name='Expression predictor', description= 'Computes shear modulus from Youngs modulus and Poissons ratio', expression='Y / (2 * (1 + v))', output=shear_modulus.dump(), aliases={ 'Y': youngs_modulus.dump(), 'v': poissons_ratio.dump(), }))
def valid_simple_ml_predictor_data(): """Produce valid data used for tests.""" from citrine.informatics.data_sources import GemTableDataSource from citrine.informatics.descriptors import RealDescriptor x = RealDescriptor("x", 0, 100, "") y = RealDescriptor("y", 0, 100, "") z = RealDescriptor("z", 0, 100, "") data_source = GemTableDataSource( table_id=uuid.UUID('e5c51369-8e71-4ec6-b027-1f92bdc14762'), table_version=2) return dict( module_type='PREDICTOR', status='VALID', status_info=[], archived=False, display_name='ML predictor', schema_id='08d20e5f-e329-4de0-a90a-4b5e36b91703', id=str(uuid.uuid4()), config=dict( type='Simple', name='ML predictor', description='Predicts z from input x and latent variable y', inputs=[x.dump()], outputs=[z.dump()], latent_variables=[y.dump()], training_data=[data_source.dump()]))
def valid_predictor_report_data(): """Produce valid data used for tests.""" from citrine.informatics.descriptors import RealDescriptor x = RealDescriptor("x", 0, 1, "") y = RealDescriptor("y", 0, 100, "") z = RealDescriptor("z", 0, 101, "") return dict( id='7c2dda5d-675a-41b6-829c-e485163f0e43', module_id='31c7f311-6f3d-4a93-9387-94cc877f170c', status='OK', create_time='2020-04-23T15:46:26Z', update_time='2020-04-23T15:46:26Z', report=dict(models=[ dict(name='GeneralLoloModel_1', type='ML Model', inputs=[x.key], outputs=[y.key], display_name='ML Model', model_settings=[ dict(name='Algorithm', value='Ensemble of non-linear estimators', children=[ dict(name='Number of estimators', value=64, children=[]), dict(name='Leaf model', value='Mean', children=[]), dict(name='Use jackknife', value=True, children=[]) ]) ], feature_importances=[ dict(response_key='y', importances=dict(x=1.00), top_features=5) ], predictor_configuration_name="Predict y from x with ML"), dict(name='GeneralLosslessModel_2', type='Analytic Model', inputs=[x.key, y.key], outputs=[z.key], display_name='GeneralLosslessModel_2', model_settings=[ dict(name="Expression", value="(z) <- (x + y)", children=[]) ], feature_importances=[], predictor_configuration_name="Expression for z", predictor_configuration_uid= "249bf32c-6f3d-4a93-9387-94cc877f170c") ], descriptors=[x.dump(), y.dump(), z.dump()]))
def valid_deprecated_expression_predictor_data(): """Produce valid data used for tests.""" from citrine.informatics.descriptors import RealDescriptor shear_modulus = RealDescriptor('Property~Shear modulus', lower_bound=0, upper_bound=100, units='GPa') return dict( module_type='PREDICTOR', status='VALID', status_info=[], archived=False, display_name='Expression predictor', schema_id='866e72a6-0a01-4c5f-8c35-146eb2540166', id=str(uuid.uuid4()), config=dict( type='Expression', name='Expression predictor', description= 'Computes shear modulus from Youngs modulus and Poissons ratio', expression='Y / (2 * (1 + v))', output=shear_modulus.dump(), aliases={ 'Y': "Property~Young's modulus", 'v': "Property~Poisson's ratio", }))
def valid_mean_property_predictor_data(): """Produce valid data used for tests.""" from citrine.informatics.descriptors import FormulationDescriptor, RealDescriptor from citrine.informatics.data_sources import GemTableDataSource formulation_descriptor = FormulationDescriptor('simple mixture') density = RealDescriptor(key='density', lower_bound=0, upper_bound=100, units='g/cm^3') return dict(module_type='PREDICTOR', status='READY', status_info=[], archived=False, display_name='Mean property predictor', id=str(uuid.uuid4()), config=dict( type='MeanProperty', name='Mean property predictor', description='Computes mean ingredient properties', input=formulation_descriptor.dump(), properties=[density.dump()], p=2, training_data=[ GemTableDataSource(uuid.uuid4(), 0, formulation_descriptor).dump() ], impute_properties=True, default_properties={'density': 1.0}, label='solvent'))
def invalid_predictor_data(): """Produce valid data used for tests.""" from citrine.informatics.descriptors import RealDescriptor x = RealDescriptor("x", 0, 100, "") y = RealDescriptor("y", 0, 100, "") z = RealDescriptor("z", 0, 100, "") return dict(module_type='PREDICTOR', status='INVALID', status_info=['Something is wrong', 'Very wrong'], archived=True, display_name='my predictor', id=str(uuid.uuid4()), config=dict(type='invalid', name='my predictor', description='does some things', inputs=[x.dump(), y.dump()], output=z.dump()))
def old_auto_ml_predictor_data(valid_gem_data_source_dict): """Produce valid data used for tests.""" from citrine.informatics.descriptors import RealDescriptor x = RealDescriptor("x", 0, 100, "") y = RealDescriptor("y", 0, 100, "") z = RealDescriptor("z", 0, 100, "") return dict(module_type='PREDICTOR', status='READY', status_info=[], archived=False, display_name='AutoML predictor', id=str(uuid.uuid4()), config=dict(type='AutoML', name='AutoML predictor', description='Predicts z from input x', inputs=[x.dump()], responses=[z.dump()], training_data=[valid_gem_data_source_dict]))
def test_bad_predictor_report_build(valid_predictor_report_data): """Modify the predictor report to be non-ideal and check the behavior.""" too_many_descriptors = deepcopy(valid_predictor_report_data) # Multiple descriptors with the same key other_x = RealDescriptor("x", 0, 100, "") too_many_descriptors['report']['descriptors'].append(other_x.dump()) with warnings.catch_warnings(record=True) as w: Report.build(too_many_descriptors) assert len(w) == 1 assert issubclass(w[-1].category, RuntimeWarning) # A key that appears in inputs and/or outputs, but there is no corresponding descriptor. # This is done twice for coverage, once to catch a missing input and once for a missing output. too_few_descriptors = deepcopy(valid_predictor_report_data) too_few_descriptors['report']['descriptors'].pop() with pytest.raises(RuntimeError): Report.build(too_few_descriptors) too_few_descriptors['report']['descriptors'] = [] with pytest.raises(RuntimeError): Report.build(too_few_descriptors)