def test_simple_deserialization(valid_data): """Ensure that a deserialized Measurement Run looks sane.""" measurement_run: MeasurementRun = MeasurementRun.build(valid_data) assert measurement_run.uids == {'id': valid_data['uids']['id']} assert measurement_run.name == 'Taste test' assert measurement_run.notes is None assert measurement_run.tags == [] assert measurement_run.conditions == [] assert measurement_run.parameters == [] assert measurement_run.properties[0] == Property('sweetness', origin="measured", value=NominalInteger(7)) assert measurement_run.properties[1] == Property('fluffiness', origin="measured", value=NominalInteger(10)) assert measurement_run.file_links == [] assert measurement_run.template is None assert measurement_run.material == MaterialRun( 'sponge cake', uids={'id': valid_data['material']['uids']['id']}, sample_type='experimental') assert measurement_run.material.audit_info == AuditInfo( **valid_data['material']['audit_info']) assert measurement_run.material.dataset == UUID( valid_data['material']['dataset']) assert measurement_run.spec is None assert measurement_run.typ == 'measurement_run' assert measurement_run.audit_info == AuditInfo(**valid_data['audit_info']) assert measurement_run.dataset == UUID(valid_data['dataset'])
def test_invalid_assignment(): """Test that invalid assignments throw the appropriate errors.""" with pytest.raises(ValueError): Property(value=NominalReal(10, '')) with pytest.raises(TypeError): Property(name="property", value=10) with pytest.raises(TypeError): Property(name="property", template=ProcessTemplate("wrong kind of template")) with pytest.raises(ValueError): Property(name="property", origin=None)
def test_material_spec(): """Test that Process/Material Spec link survives serialization.""" # Create a ProcessSpec proc_spec = ProcessSpec(name="a process spec", tags=["tag1", "tag2"]) # Create MaterialSpec without a ProcessSpec prop = Property(name="The material is a solid", value=DiscreteCategorical(probabilities="solid")) mat_spec = MaterialSpec(name="a material spec", properties=PropertyAndConditions(prop)) assert mat_spec.process is None, \ "MaterialSpec should be initialized with no ProcessSpec, by default" # Assign a ProcessSpec to mat_spec, first ensuring that the type is enforced with pytest.raises(TypeError): mat_spec.process = 17 mat_spec.process = proc_spec # Assert circular links assert dumps(proc_spec.output_material.process) == dumps(proc_spec), \ "ProcessSpec should link to MaterialSpec that links back to itself" assert dumps(mat_spec.process.output_material) == dumps(mat_spec), \ "MaterialSpec should link to ProcessSpec that links back to itself" # Make copies of both specs mat_spec_copy = loads(dumps(mat_spec)) proc_spec_copy = loads(dumps(proc_spec)) assert proc_spec_copy.output_material == mat_spec, \ "Serialization should preserve link from ProcessSpec to MaterialSpec" assert mat_spec_copy.process == proc_spec, \ "Serialization should preserve link from MaterialSpec to ProcessSpec"
def test_serialized_history(): """Test the serialization of a complete material history.""" # Create several runs and specs linked together buy_spec = LinkByUID("id", "pr723") cookie_dough_spec = MaterialSpec("cookie dough spec", process=buy_spec) buy_cookie_dough = ProcessRun("Buy cookie dough", uids={'id': '32283'}, spec=buy_spec) cookie_dough = MaterialRun("cookie dough", process=buy_cookie_dough, spec=cookie_dough_spec) bake = ProcessRun("bake cookie dough", conditions=[ Condition("oven temp", origin='measured', value=NominalReal(357, 'degF'))]) IngredientRun(material=cookie_dough, process=bake, number_fraction=NominalReal(1, '')) cookie = MaterialRun("cookie", process=bake, tags=["chocolate chip", "drop"]) MeasurementRun("taste", material=cookie, properties=[ Property("taste", value=DiscreteCategorical("scrumptious"))]) cookie_history = complete_material_history(cookie) # There are 7 entities in the serialized list: cookie dough (spec & run), buy cookie dough, # cookie dough ingredient, bake cookie dough, cookie, taste assert len(cookie_history) == 7 for entity in cookie_history: assert len(entity['uids']) > 0, "Serializing material history should assign uids." # Check that the measurement points to the material taste_dict = next(x for x in cookie_history if x.get('type') == 'measurement_run') cookie_dict = next(x for x in cookie_history if x.get('name') == 'cookie') scope = taste_dict.get('material').get('scope') assert taste_dict.get('material').get('id') == cookie_dict.get('uids').get(scope) # Check that both the material spec and the process run point to the same process spec. # Because that spec was initially a LinkByUID, this also tests the methods ability to # serialize a LinkByUID. cookie_dough_spec_dict = next(x for x in cookie_history if x.get('type') == 'material_spec') buy_cookie_dough_dict = next(x for x in cookie_history if x.get('name') == 'Buy cookie dough') assert cookie_dough_spec_dict.get('process') == buy_spec.as_dict() assert buy_cookie_dough_dict.get('spec') == buy_spec.as_dict()
def test_json_serde(): """Test that values can be ser/de using our custom json loads/dumps.""" # Enums are only used in the context of another class -- # it is not possible to deserialize to enum with the current # serialization strategy (plain string) without this context. original = Property(name="foo", origin=Origin.MEASURED) copy = loads(dumps(original)) assert original == copy
def test_attribute_serde(): """An attribute with a link to an attribute template should be copy-able.""" prop_tmpl = PropertyTemplate(name='prop_tmpl', bounds=RealBounds(0, 2, 'm')) prop = Property(name='prop', template=prop_tmpl, value=NominalReal(1, 'm')) meas_spec = MeasurementSpec("a spec") meas = MeasurementRun("a measurement", spec=meas_spec, properties=[prop]) assert loads(dumps(prop)) == prop assert loads(dumps(meas)) == meas assert isinstance(prop.template, PropertyTemplate)
def test_build(): """Test that build recreates the material.""" spec = MaterialSpec( "A spec", properties=PropertyAndConditions( property=Property("a property", value=NominalReal(3, ''))), tags=["a tag"]) mat = MaterialRun(name="a material", spec=spec) mat_dict = mat.as_dict() mat_dict['spec'] = mat.spec.as_dict() assert MaterialRun.build(mat_dict) == mat
def test_soft_measurement_material_attachment(): """Test that soft attachments are formed from materials to measurements.""" cake = MaterialRun("A cake") smell_test = MeasurementRun("use your nose", material=cake, properties=[ Property( name="Smell", value=DiscreteCategorical("yummy")) ]) taste_test = MeasurementRun("taste", material=cake) assert cake.measurements == [smell_test, taste_test]
def test_equality(): """Test that equality check works as expected.""" spec = MaterialSpec( "A spec", properties=PropertyAndConditions( property=Property("a property", value=NominalReal(3, ''))), tags=["a tag"]) mat1 = MaterialRun("A material", spec=spec) mat2 = MaterialRun("A material", spec=spec, tags=["A tag"]) assert mat1 == deepcopy(mat1) assert mat1 != mat2 assert mat1 != "A material"
def make_flexural_test_measurement(my_id, deflection, extra_tags=frozenset()): """ Compute the stree, strain, and modulus. According to https://en.wikipedia.org/wiki/Three-point_flexural_test """ stress = 3 * applied_force * span / (2 * thickness * thickness * width) strain = 6 * deflection * thickness / (span * span) modulus = stress / strain measurement = MeasurementRun( name="3 Point Bend", uids={"my_id": my_id}, tags=["3_pt_bend", "mechanical", "flex"] + list(extra_tags), properties=[ Property( name="flexural stress", value=NormalReal(stress, std=(0.01 * stress), units="MPa"), origin=Origin.MEASURED ), Property( name="flexural strain", value=NormalReal(strain, std=(0.01 * strain), units=""), origin=Origin.MEASURED ), Property( name="flexural modulus", value=NormalReal(modulus, std=(0.01 * modulus), units="MPa"), origin=Origin.MEASURED ), Property( name="deflection", value=NominalReal(deflection, units="mm"), origin=Origin.MEASURED ) ] ) return measurement
def test_material_run(): """ Test the ability to create a MaterialRun that is linked to a MaterialSpec. Make sure all enumerated values are respected, and check consistency after serializing and deserializing. """ # Define a property, and make sure that an inappropriate value for origin throws ValueError with pytest.raises(ValueError): prop = Property(name="A property", origin="bad origin", value=NominalReal(17, units='')) # Create a MaterialSpec with a property prop = Property(name="A property", origin="specified", value=NominalReal(17, units='')) mat_spec = MaterialSpec(name="a specification for a material", properties=PropertyAndConditions(prop), notes="Funny lookin'") # Make sure that when property is serialized, origin (an enumeration) is serialized as a string copy_prop = json.loads(dumps(mat_spec)) copy_origin = copy_prop["context"][0]["properties"][0]['property'][ 'origin'] assert isinstance(copy_origin, str) # Create a MaterialRun, and make sure an inappropriate value for sample_type throws ValueError with pytest.raises(ValueError): mat = MaterialRun(spec=mat_spec, sample_type="imaginary") mat = MaterialRun(spec=mat_spec, sample_type="virtual") # ensure that serialization does not change the MaterialRun copy = loads(dumps(mat)) assert dumps(copy) == dumps(mat), \ "Material run is modified by serialization or deserialization"
def test_list_validation(): """Test that lists are validated by gemd.""" with pytest.raises(ValueError): # labels must be a list of string, but contains an int IngredientSpec(labels=["Label 1", 17], name="foo") ingredient = IngredientSpec(labels=["Label 1", "label 2"], name="foo") with pytest.raises(TypeError): # cannot append an int to a list of strings ingredient.labels.append(17) with pytest.raises(ValueError): # list of conditions cannot contain a property MeasurementRun("A measurement", conditions=[Property("not a condition")])
def ingest_table(material_run, table): """Ingest a material run into an existing table.""" for _, row in table.iterrows(): exp = MeasurementRun() for prop_name in known_properties: if prop_name in row: exp.properties.append(Property(name=prop_name, value=NominalReal(row[prop_name], ''))) for cond_name in known_conditions: if cond_name in row: exp.conditions.append(Condition(name=cond_name, value=NominalReal(row[cond_name], ''))) exp.material = material_run return material_run
def test_simple_deserialization(valid_data): """Ensure that a deserialized Material Spec looks sane.""" material_spec: MaterialSpec = MaterialSpec.build(valid_data) assert material_spec.uids == {'id': valid_data['uids']['id']} assert material_spec.name == 'spec of material' assert material_spec.tags == [] assert material_spec.notes is None assert material_spec.process is None assert material_spec.properties[0] == \ PropertyAndConditions(Property("color", origin='specified', value=NominalCategorical("tan")), conditions=[Condition('temperature', origin='specified', value=NominalReal(300, units='kelvin'))]) assert material_spec.template is None assert material_spec.file_links == [] assert material_spec.typ == 'material_spec'
def test_recursive_foreach(): """Test that recursive foreach will actually walk through a material history.""" mat_run = MaterialRun("foo") process_run = ProcessRun("bar") IngredientRun(process=process_run, material=mat_run) output = MaterialRun("material", process=process_run) # property templates are trickier than templates because they are referenced in attributes template = PropertyTemplate("prop", bounds=RealBounds(0, 1, "")) prop = Property("prop", value=NominalReal(1.0, ""), template=template) MeasurementRun("check", material=output, properties=prop) types = [] recursive_foreach(output, lambda x: types.append(x.typ)) expected = [ "ingredient_run", "material_run", "material_run", "process_run", "measurement_run", "property_template" ] assert sorted(types) == sorted(expected)
def make_strehlow_objects(table=None, template_scope=DEMO_TEMPLATE_SCOPE): """Make a table with Strehlow & Cook data.""" tmpl = make_templates(template_scope) if table is None: table = import_table() # Specs msr_spec = MeasurementSpec(name='Band gap', template=tmpl["Band gap measurement"] ) def real_mapper(prop): """Mapping methods for RealBounds.""" if 'uncertainty' in prop['scalars'][0]: if prop['units'] == 'eV': # Arbitrarily convert to attojoules mean = convert_units(value=float(prop['scalars'][0]['value']), starting_unit=prop['units'], final_unit='aJ' ) std = convert_units(value=float(prop['scalars'][0]['value']), starting_unit=prop['units'], final_unit='aJ' ) val = NormalReal(mean=mean, units='aJ', std=std ) else: val = NormalReal(mean=float(prop['scalars'][0]['value']), units=prop['units'], std=float(prop['scalars'][0]['uncertainty']) ) else: val = NominalReal(nominal=float(prop['scalars'][0]['value']), units=prop['units'] ) return val content_map = { RealBounds: real_mapper, CategoricalBounds: lambda prop: NominalCategorical(category=prop['scalars'][0]['value']), type(None): lambda bnd: 'Label' } datapoints = [] compounds = dict() for row in table: formula = formula_clean(row['chemicalFormula']) if formula not in compounds: compounds[formula] = MaterialSpec( name=formula_latex(formula), template=tmpl["Chemical"], process=ProcessSpec(name="Sample preparation", template=tmpl["Sample preparation"] )) spec = compounds[formula] run = make_instance(spec) datapoints.append(run) if not spec.properties: spec.properties.append( PropertyAndConditions( property=Property(name=spec.template.properties[0][0].name, value=EmpiricalFormula(formula=formula), template=spec.template.properties[0][0]) )) msr = make_instance(msr_spec) msr.material = run # 2 categories in the PIF need to be split to avoid repeat Attribute Templates in a Run name_map = { 'Phase': 'Crystal system', 'Transition': 'Bands' } origin_map = { 'EXPERIMENTAL': Origin.MEASURED, 'COMPUTATIONAL': Origin.COMPUTED } seen = set() # Some conditions come in from multiple properties on the same object for prop in row['properties']: origin = origin_map.get(prop.get('dataType', None), Origin.UNKNOWN) if 'method' in prop: method = 'Method: ' + prop['method']['name'] else: method = 'Method: unreported' for attr in [prop] + prop.get('conditions', []): if attr['name'] in seen: # Early return if it's a repeat continue seen.add(attr['name']) template = tmpl[attr['name']] # Figure out if we need to split this column if attr['name'] in name_map: value = attr['scalars'][0]['value'] if value not in template.bounds.categories: template = tmpl[name_map[attr['name']]] # Move into GEMD structure if type(template) == PropertyTemplate: msr.properties.append( Property(name=template.name, template=template, value=content_map[type(template.bounds)](attr), origin=origin, notes=method )) elif type(template) == ConditionTemplate: msr.conditions.append( Condition(name=template.name, template=template, value=content_map[type(template.bounds)](attr), origin=origin, notes=method )) return datapoints
def ingest_material_run(data, material_spec=None, process_run=None): """Ingest material run with data, a material spec, and an originating process run.""" if isinstance(data, list): return [ingest_material_run(x, material_spec) for x in data] if not isinstance(data, dict): raise ValueError("This ingester operates on dict, but got {}".format(type(data))) material = MaterialRun() sample_id = data.get("sample_id") if sample_id: material.add_uid("given_sample_id", sample_id) tags = data.get("tags") if tags: material.tags = tags for experiment in data.get("experiments", []): measurement = MeasurementRun() for name in set(known_properties.keys()).intersection(experiment.keys()): prop = Property( name=name, template=known_properties[name], value=_parse_value(experiment[name]) ) measurement.properties.append(prop) for name in set(known_conditions.keys()).intersection(experiment.keys()): cond = Condition( name=name, template=known_conditions[name], value=_parse_value(experiment[name]) ) measurement.conditions.append(cond) for name in set(known_parameters.keys()).intersection(experiment.keys()): param = Parameter( name=name, template=known_parameters[name], value=_parse_value(experiment[name]) ) measurement.parameters.append(param) scan_id = experiment.get("scan_id") if scan_id: measurement.add_uid("given_scan_id", scan_id) tags = experiment.get("tags") if tags: measurement.tags = tags measurement.material = material if material_spec: material.material_spec = material_spec if process_run: material.process = process_run return material
def test_invalid_assignment(caplog): """Test that invalid assignments throw the appropriate errors.""" with pytest.raises(TypeError): Property(value=NominalReal(10, '')) with pytest.raises(TypeError): Property(name="property", value=10) with pytest.raises(TypeError): Property(name="property", template=ProcessTemplate("wrong kind of template")) with pytest.raises(ValueError): Property(name="property", origin=None) valid_prop = Property(name="property", value=NominalReal(10, ''), template=PropertyTemplate("template", bounds=RealBounds( 0, 100, ''))) good_val = valid_prop.value bad_val = NominalReal(-10.0, '') assert len(caplog.records ) == 0, "Warning caught before logging tests were reached." with validation_level(WarningLevel.IGNORE): valid_prop.value = bad_val assert len(caplog.records ) == 0, "Validation warned even though level is IGNORE." assert valid_prop.value == bad_val, "IGNORE allowed the bad value to be set." valid_prop.value = good_val assert len(caplog.records ) == 0, "Validation warned even though level is IGNORE." with validation_level(WarningLevel.WARNING): valid_prop.value = bad_val assert len(caplog.records ) == 1, "Validation didn't warn on out of bounds value." assert valid_prop.value == bad_val, "WARNING allowed the bad value to be set." valid_prop.value = good_val assert len( caplog.records) == 1, "Validation DID warn on a valid value." with validation_level(WarningLevel.FATAL): with pytest.raises(ValueError): valid_prop.value = bad_val assert valid_prop.value == good_val, "FATAL didn't allow the bad value to be set." with validation_level(WarningLevel.FATAL): with pytest.raises(ValueError): valid_prop.template = PropertyTemplate("template", bounds=RealBounds(0, 1, '')) assert valid_prop.value == good_val, "FATAL didn't allow the bad value to be set."
def test_get_object_id_from_base_attribute(): with pytest.raises(ValueError): get_object_id(Property('some property'))
def make_data_island(density, bulk_modulus, firing_temperature, binders, powders, tag=None): """Helper function to create a relatively involved data island.""" binder_specs = keymap(lambda x: MaterialSpec(name=x), binders) powder_specs = keymap(lambda x: MaterialSpec(name=x), powders) binder_runs = keymap(lambda x: MaterialRun(name=x.name, spec=x), binder_specs) powder_runs = keymap(lambda x: MaterialRun(name=x.name, spec=x), powder_specs) all_input_materials = keymap(lambda x: x.spec.name, merge(binder_runs, powder_runs)) mixing_composition = Condition( name="composition", value=NominalComposition(all_input_materials)) mixing_process = ProcessRun(name="Mixing", tags=["mixing"], conditions=[mixing_composition]) binder_ingredients = [] for run in binder_runs: binder_ingredients.append( IngredientRun( material=run, process=mixing_process, mass_fraction=NominalReal(binders[run.spec.name], ''), )) powder_ingredients = [] for run in powder_runs: powder_ingredients.append( IngredientRun( material=run, process=mixing_process, mass_fraction=NominalReal(powders[run.spec.name], ''), )) green_sample = MaterialRun("Green", process=mixing_process) measured_firing_temperature = Condition( name="Firing Temperature", value=UniformReal(firing_temperature - 0.5, firing_temperature + 0.5, 'degC'), template=firing_temperature_template) specified_firing_setting = Parameter(name="Firing setting", value=DiscreteCategorical("hot")) firing_spec = ProcessSpec("Firing", template=firing_template) firing_process = ProcessRun(name=firing_spec.name, conditions=[measured_firing_temperature], parameters=[specified_firing_setting], spec=firing_spec) IngredientRun(material=green_sample, process=firing_process, mass_fraction=NormalReal(1.0, 0.0, ''), volume_fraction=NormalReal(1.0, 0.0, ''), number_fraction=NormalReal(1.0, 0.0, '')) measured_density = Property(name="Density", value=NominalReal(density, ''), template=density_template) measured_modulus = Property(name="Bulk modulus", value=NormalReal(bulk_modulus, bulk_modulus / 100.0, '')) measurement_spec = MeasurementSpec("Mechanical Properties", template=measurement_template) measurement = MeasurementRun( measurement_spec.name, properties=[measured_density, measured_modulus], spec=measurement_spec) tags = [tag] if tag else [] material_spec = MaterialSpec("Coupon", template=material_template) material_run = MaterialRun(material_spec.name, process=firing_process, tags=tags, spec=material_spec) measurement.material = material_run return material_run