def test_process_spec(): """Tests that the Process Spec/Run connection persists when serializing.""" # Create the ProcessSpec condition1 = Condition(name="a condition on the process in general") spec = ProcessSpec(conditions=condition1) # Create the ProcessRun with a link to the ProcessSpec from above condition2 = Condition(name="a condition on this process run in particular") process = ProcessRun(conditions=condition2, spec=spec) copy_process = loads(dumps(process)) assert dumps(copy_process.spec) == dumps(spec), \ "Process spec should be preserved through serialization"
def test_serialize(): """Serializing a nested object should be identical to individually serializing each piece.""" condition = Condition(name="A condition", value=NominalReal(7, '')) parameter = Parameter(name="A parameter", value=NormalReal(mean=17, std=1, units='')) input_material = MaterialRun(tags="input") process = ProcessRun(tags="A tag on a process run") ingredient = IngredientRun(material=input_material, process=process) material = MaterialRun(tags=["A tag on a material"], process=process) measurement = MeasurementRun(tags="A tag on a measurement", conditions=condition, parameters=parameter, material=material) # serialize the root of the tree native_object = json.loads(dumps(measurement)) # ingredients don't get serialized on the process assert (len(native_object["context"]) == 5) assert (native_object["object"]["type"] == LinkByUID.typ) # serialize all of the nodes native_batch = json.loads( dumps([material, process, measurement, ingredient])) assert (len(native_batch["context"]) == 5) assert (len(native_batch["object"]) == 4) assert (all(x["type"] == LinkByUID.typ for x in native_batch["object"]))
def test_serialized_history(): """Test the serialization of a complete material history.""" # Create several runs and specs linked together buy_spec = LinkByUID("id", "pr723") cookie_dough_spec = MaterialSpec("cookie dough spec", process=buy_spec) buy_cookie_dough = ProcessRun("Buy cookie dough", uids={'id': '32283'}, spec=buy_spec) cookie_dough = MaterialRun("cookie dough", process=buy_cookie_dough, spec=cookie_dough_spec) bake = ProcessRun("bake cookie dough", conditions=[ Condition("oven temp", origin='measured', value=NominalReal(357, 'degF'))]) IngredientRun(material=cookie_dough, process=bake, number_fraction=NominalReal(1, '')) cookie = MaterialRun("cookie", process=bake, tags=["chocolate chip", "drop"]) MeasurementRun("taste", material=cookie, properties=[ Property("taste", value=DiscreteCategorical("scrumptious"))]) cookie_history = complete_material_history(cookie) # There are 7 entities in the serialized list: cookie dough (spec & run), buy cookie dough, # cookie dough ingredient, bake cookie dough, cookie, taste assert len(cookie_history) == 7 for entity in cookie_history: assert len(entity['uids']) > 0, "Serializing material history should assign uids." # Check that the measurement points to the material taste_dict = next(x for x in cookie_history if x.get('type') == 'measurement_run') cookie_dict = next(x for x in cookie_history if x.get('name') == 'cookie') scope = taste_dict.get('material').get('scope') assert taste_dict.get('material').get('id') == cookie_dict.get('uids').get(scope) # Check that both the material spec and the process run point to the same process spec. # Because that spec was initially a LinkByUID, this also tests the methods ability to # serialize a LinkByUID. cookie_dough_spec_dict = next(x for x in cookie_history if x.get('type') == 'material_spec') buy_cookie_dough_dict = next(x for x in cookie_history if x.get('name') == 'Buy cookie dough') assert cookie_dough_spec_dict.get('process') == buy_spec.as_dict() assert buy_cookie_dough_dict.get('spec') == buy_spec.as_dict()
def test_deserialize(): """Round-trip serde should leave the object unchanged.""" condition = Condition(name="A condition", value=NominalReal(7, '')) parameter = Parameter(name="A parameter", value=NormalReal(mean=17, std=1, units='')) measurement = MeasurementRun(tags="A tag on a measurement", conditions=condition, parameters=parameter) copy_meas = GEMDJson().copy(measurement) assert(copy_meas.conditions[0].value == measurement.conditions[0].value) assert(copy_meas.parameters[0].value == measurement.parameters[0].value) assert(copy_meas.uids["auto"] == measurement.uids["auto"])
def test_repeated_objects(): """Test that objects aren't double counted.""" ct = ConditionTemplate(name="color", bounds=CategoricalBounds(categories=["black", "white"])) pt = ProcessTemplate(name="painting", conditions=[ct]) ps = ProcessSpec(name='painting', template=pt, conditions=Condition(name='Paint color', value=NominalCategorical("black"), template=ct ) ) assert len(recursive_flatmap(ps, lambda x: [x])) == 3
def ingest_table(material_run, table): """Ingest a material run into an existing table.""" for _, row in table.iterrows(): exp = MeasurementRun() for prop_name in known_properties: if prop_name in row: exp.properties.append(Property(name=prop_name, value=NominalReal(row[prop_name], ''))) for cond_name in known_conditions: if cond_name in row: exp.conditions.append(Condition(name=cond_name, value=NominalReal(row[cond_name], ''))) exp.material = material_run return material_run
def test_simple_deserialization(valid_data): """Ensure that a deserialized Process Run looks sane.""" process_run: ProcessRun = ProcessRun.build(valid_data) assert process_run.uids == { 'id': valid_data['uids']['id'], 'my_id': 'process1-v1' } assert process_run.tags == ['baking::cakes', 'danger::low'] assert process_run.conditions[0] == Condition(name='oven temp', value=NominalReal(203.0, ''), origin='measured') assert process_run.parameters == [] assert process_run.file_links == [] assert process_run.template is None assert process_run.output_material is None assert process_run.spec == \ ProcessSpec(name="Spec for proc 1", uids={'id': valid_data['spec']['uids']['id']}, conditions=[Condition(name='oven temp', value=UniformReal(175, 225, ''), origin='specified')] ) assert process_run.name == 'Process 1' assert process_run.notes == 'make sure to use oven mitts' assert process_run.typ == 'process_run'
def test_simple_deserialization(valid_data): """Ensure that a deserialized Material Spec looks sane.""" material_spec: MaterialSpec = MaterialSpec.build(valid_data) assert material_spec.uids == {'id': valid_data['uids']['id']} assert material_spec.name == 'spec of material' assert material_spec.tags == [] assert material_spec.notes is None assert material_spec.process is None assert material_spec.properties[0] == \ PropertyAndConditions(Property("color", origin='specified', value=NominalCategorical("tan")), conditions=[Condition('temperature', origin='specified', value=NominalReal(300, units='kelvin'))]) assert material_spec.template is None assert material_spec.file_links == [] assert material_spec.typ == 'material_spec'
def make_strehlow_objects(table=None, template_scope=DEMO_TEMPLATE_SCOPE): """Make a table with Strehlow & Cook data.""" tmpl = make_templates(template_scope) if table is None: table = import_table() # Specs msr_spec = MeasurementSpec(name='Band gap', template=tmpl["Band gap measurement"] ) def real_mapper(prop): """Mapping methods for RealBounds.""" if 'uncertainty' in prop['scalars'][0]: if prop['units'] == 'eV': # Arbitrarily convert to attojoules mean = convert_units(value=float(prop['scalars'][0]['value']), starting_unit=prop['units'], final_unit='aJ' ) std = convert_units(value=float(prop['scalars'][0]['value']), starting_unit=prop['units'], final_unit='aJ' ) val = NormalReal(mean=mean, units='aJ', std=std ) else: val = NormalReal(mean=float(prop['scalars'][0]['value']), units=prop['units'], std=float(prop['scalars'][0]['uncertainty']) ) else: val = NominalReal(nominal=float(prop['scalars'][0]['value']), units=prop['units'] ) return val content_map = { RealBounds: real_mapper, CategoricalBounds: lambda prop: NominalCategorical(category=prop['scalars'][0]['value']), type(None): lambda bnd: 'Label' } datapoints = [] compounds = dict() for row in table: formula = formula_clean(row['chemicalFormula']) if formula not in compounds: compounds[formula] = MaterialSpec( name=formula_latex(formula), template=tmpl["Chemical"], process=ProcessSpec(name="Sample preparation", template=tmpl["Sample preparation"] )) spec = compounds[formula] run = make_instance(spec) datapoints.append(run) if not spec.properties: spec.properties.append( PropertyAndConditions( property=Property(name=spec.template.properties[0][0].name, value=EmpiricalFormula(formula=formula), template=spec.template.properties[0][0]) )) msr = make_instance(msr_spec) msr.material = run # 2 categories in the PIF need to be split to avoid repeat Attribute Templates in a Run name_map = { 'Phase': 'Crystal system', 'Transition': 'Bands' } origin_map = { 'EXPERIMENTAL': Origin.MEASURED, 'COMPUTATIONAL': Origin.COMPUTED } seen = set() # Some conditions come in from multiple properties on the same object for prop in row['properties']: origin = origin_map.get(prop.get('dataType', None), Origin.UNKNOWN) if 'method' in prop: method = 'Method: ' + prop['method']['name'] else: method = 'Method: unreported' for attr in [prop] + prop.get('conditions', []): if attr['name'] in seen: # Early return if it's a repeat continue seen.add(attr['name']) template = tmpl[attr['name']] # Figure out if we need to split this column if attr['name'] in name_map: value = attr['scalars'][0]['value'] if value not in template.bounds.categories: template = tmpl[name_map[attr['name']]] # Move into GEMD structure if type(template) == PropertyTemplate: msr.properties.append( Property(name=template.name, template=template, value=content_map[type(template.bounds)](attr), origin=origin, notes=method )) elif type(template) == ConditionTemplate: msr.conditions.append( Condition(name=template.name, template=template, value=content_map[type(template.bounds)](attr), origin=origin, notes=method )) return datapoints
def test_enumeration_serde(): """An enumeration should get serialized as a string.""" condition = Condition(name="A condition", notes=Origin.UNKNOWN) copy_condition = GEMDJson().copy(condition) assert copy_condition.notes == Origin.get_value(condition.notes)
def make_data_island(density, bulk_modulus, firing_temperature, binders, powders, tag=None): """Helper function to create a relatively involved data island.""" binder_specs = keymap(lambda x: MaterialSpec(name=x), binders) powder_specs = keymap(lambda x: MaterialSpec(name=x), powders) binder_runs = keymap(lambda x: MaterialRun(name=x.name, spec=x), binder_specs) powder_runs = keymap(lambda x: MaterialRun(name=x.name, spec=x), powder_specs) all_input_materials = keymap(lambda x: x.spec.name, merge(binder_runs, powder_runs)) mixing_composition = Condition( name="composition", value=NominalComposition(all_input_materials)) mixing_process = ProcessRun(name="Mixing", tags=["mixing"], conditions=[mixing_composition]) binder_ingredients = [] for run in binder_runs: binder_ingredients.append( IngredientRun( material=run, process=mixing_process, mass_fraction=NominalReal(binders[run.spec.name], ''), )) powder_ingredients = [] for run in powder_runs: powder_ingredients.append( IngredientRun( material=run, process=mixing_process, mass_fraction=NominalReal(powders[run.spec.name], ''), )) green_sample = MaterialRun("Green", process=mixing_process) measured_firing_temperature = Condition( name="Firing Temperature", value=UniformReal(firing_temperature - 0.5, firing_temperature + 0.5, 'degC'), template=firing_temperature_template) specified_firing_setting = Parameter(name="Firing setting", value=DiscreteCategorical("hot")) firing_spec = ProcessSpec("Firing", template=firing_template) firing_process = ProcessRun(name=firing_spec.name, conditions=[measured_firing_temperature], parameters=[specified_firing_setting], spec=firing_spec) IngredientRun(material=green_sample, process=firing_process, mass_fraction=NormalReal(1.0, 0.0, ''), volume_fraction=NormalReal(1.0, 0.0, ''), number_fraction=NormalReal(1.0, 0.0, '')) measured_density = Property(name="Density", value=NominalReal(density, ''), template=density_template) measured_modulus = Property(name="Bulk modulus", value=NormalReal(bulk_modulus, bulk_modulus / 100.0, '')) measurement_spec = MeasurementSpec("Mechanical Properties", template=measurement_template) measurement = MeasurementRun( measurement_spec.name, properties=[measured_density, measured_modulus], spec=measurement_spec) tags = [tag] if tag else [] material_spec = MaterialSpec("Coupon", template=material_template) material_run = MaterialRun(material_spec.name, process=firing_process, tags=tags, spec=material_spec) measurement.material = material_run return material_run
def ingest_material_run(data, material_spec=None, process_run=None): """Ingest material run with data, a material spec, and an originating process run.""" if isinstance(data, list): return [ingest_material_run(x, material_spec) for x in data] if not isinstance(data, dict): raise ValueError("This ingester operates on dict, but got {}".format(type(data))) material = MaterialRun() sample_id = data.get("sample_id") if sample_id: material.add_uid("given_sample_id", sample_id) tags = data.get("tags") if tags: material.tags = tags for experiment in data.get("experiments", []): measurement = MeasurementRun() for name in set(known_properties.keys()).intersection(experiment.keys()): prop = Property( name=name, template=known_properties[name], value=_parse_value(experiment[name]) ) measurement.properties.append(prop) for name in set(known_conditions.keys()).intersection(experiment.keys()): cond = Condition( name=name, template=known_conditions[name], value=_parse_value(experiment[name]) ) measurement.conditions.append(cond) for name in set(known_parameters.keys()).intersection(experiment.keys()): param = Parameter( name=name, template=known_parameters[name], value=_parse_value(experiment[name]) ) measurement.parameters.append(param) scan_id = experiment.get("scan_id") if scan_id: measurement.add_uid("given_scan_id", scan_id) tags = experiment.get("tags") if tags: measurement.tags = tags measurement.material = material if material_spec: material.material_spec = material_spec if process_run: material.process = process_run return material
def test_template_assignment(): """Test that an object and its attributes can both be assigned templates.""" humidity_template = ConditionTemplate("Humidity", bounds=RealBounds(0.5, 0.75, "")) template = ProcessTemplate("Dry", conditions=[[humidity_template, RealBounds(0.5, 0.65, "")]]) ProcessSpec("Dry a polymer", template=template, conditions=[ Condition("Humidity", value=NominalReal(0.6, ""), template=humidity_template)])
def test_invalid_instance(): """Calling make_instance on a non-spec should throw a TypeError.""" not_specs = [MeasurementRun("meas"), Condition("cond"), UniformReal(0, 1, ''), 'foo', 10] for not_spec in not_specs: with pytest.raises(TypeError): make_instance(not_spec)