예제 #1
0
def test_process_spec():
    """Tests that the Process Spec/Run connection persists when serializing."""
    # Create the ProcessSpec
    condition1 = Condition(name="a condition on the process in general")
    spec = ProcessSpec(conditions=condition1)

    # Create the ProcessRun with a link to the ProcessSpec from above
    condition2 = Condition(name="a condition on this process run in particular")
    process = ProcessRun(conditions=condition2, spec=spec)

    copy_process = loads(dumps(process))
    assert dumps(copy_process.spec) == dumps(spec), \
        "Process spec should be preserved through serialization"
예제 #2
0
def test_serialize():
    """Serializing a nested object should be identical to individually serializing each piece."""
    condition = Condition(name="A condition", value=NominalReal(7, ''))
    parameter = Parameter(name="A parameter",
                          value=NormalReal(mean=17, std=1, units=''))
    input_material = MaterialRun(tags="input")
    process = ProcessRun(tags="A tag on a process run")
    ingredient = IngredientRun(material=input_material, process=process)
    material = MaterialRun(tags=["A tag on a material"], process=process)
    measurement = MeasurementRun(tags="A tag on a measurement",
                                 conditions=condition,
                                 parameters=parameter,
                                 material=material)

    # serialize the root of the tree
    native_object = json.loads(dumps(measurement))
    # ingredients don't get serialized on the process
    assert (len(native_object["context"]) == 5)
    assert (native_object["object"]["type"] == LinkByUID.typ)

    # serialize all of the nodes
    native_batch = json.loads(
        dumps([material, process, measurement, ingredient]))
    assert (len(native_batch["context"]) == 5)
    assert (len(native_batch["object"]) == 4)
    assert (all(x["type"] == LinkByUID.typ for x in native_batch["object"]))
예제 #3
0
def test_serialized_history():
    """Test the serialization of a complete material history."""
    # Create several runs and specs linked together
    buy_spec = LinkByUID("id", "pr723")
    cookie_dough_spec = MaterialSpec("cookie dough spec", process=buy_spec)
    buy_cookie_dough = ProcessRun("Buy cookie dough", uids={'id': '32283'}, spec=buy_spec)
    cookie_dough = MaterialRun("cookie dough", process=buy_cookie_dough, spec=cookie_dough_spec)
    bake = ProcessRun("bake cookie dough", conditions=[
        Condition("oven temp", origin='measured', value=NominalReal(357, 'degF'))])
    IngredientRun(material=cookie_dough,
                  process=bake, number_fraction=NominalReal(1, ''))
    cookie = MaterialRun("cookie", process=bake, tags=["chocolate chip", "drop"])
    MeasurementRun("taste", material=cookie, properties=[
        Property("taste", value=DiscreteCategorical("scrumptious"))])

    cookie_history = complete_material_history(cookie)
    # There are 7 entities in the serialized list: cookie dough (spec & run), buy cookie dough,
    # cookie dough ingredient, bake cookie dough, cookie, taste
    assert len(cookie_history) == 7
    for entity in cookie_history:
        assert len(entity['uids']) > 0, "Serializing material history should assign uids."

    # Check that the measurement points to the material
    taste_dict = next(x for x in cookie_history if x.get('type') == 'measurement_run')
    cookie_dict = next(x for x in cookie_history if x.get('name') == 'cookie')
    scope = taste_dict.get('material').get('scope')
    assert taste_dict.get('material').get('id') == cookie_dict.get('uids').get(scope)

    # Check that both the material spec and the process run point to the same process spec.
    # Because that spec was initially a LinkByUID, this also tests the methods ability to
    # serialize a LinkByUID.
    cookie_dough_spec_dict = next(x for x in cookie_history if x.get('type') == 'material_spec')
    buy_cookie_dough_dict = next(x for x in cookie_history if x.get('name') == 'Buy cookie dough')
    assert cookie_dough_spec_dict.get('process') == buy_spec.as_dict()
    assert buy_cookie_dough_dict.get('spec') == buy_spec.as_dict()
예제 #4
0
def test_deserialize():
    """Round-trip serde should leave the object unchanged."""
    condition = Condition(name="A condition", value=NominalReal(7, ''))
    parameter = Parameter(name="A parameter", value=NormalReal(mean=17, std=1, units=''))
    measurement = MeasurementRun(tags="A tag on a measurement", conditions=condition,
                                 parameters=parameter)
    copy_meas = GEMDJson().copy(measurement)
    assert(copy_meas.conditions[0].value == measurement.conditions[0].value)
    assert(copy_meas.parameters[0].value == measurement.parameters[0].value)
    assert(copy_meas.uids["auto"] == measurement.uids["auto"])
예제 #5
0
def test_repeated_objects():
    """Test that objects aren't double counted."""
    ct = ConditionTemplate(name="color",
                           bounds=CategoricalBounds(categories=["black", "white"]))
    pt = ProcessTemplate(name="painting", conditions=[ct])
    ps = ProcessSpec(name='painting',
                     template=pt,
                     conditions=Condition(name='Paint color',
                                          value=NominalCategorical("black"),
                                          template=ct
                                          )
                     )
    assert len(recursive_flatmap(ps, lambda x: [x])) == 3
예제 #6
0
def ingest_table(material_run, table):
    """Ingest a material run into an existing table."""
    for _, row in table.iterrows():
        exp = MeasurementRun()
        for prop_name in known_properties:
            if prop_name in row:
                exp.properties.append(Property(name=prop_name,
                                               value=NominalReal(row[prop_name], '')))
        for cond_name in known_conditions:
            if cond_name in row:
                exp.conditions.append(Condition(name=cond_name,
                                                value=NominalReal(row[cond_name], '')))
        exp.material = material_run

    return material_run
def test_simple_deserialization(valid_data):
    """Ensure that a deserialized Process Run looks sane."""
    process_run: ProcessRun = ProcessRun.build(valid_data)
    assert process_run.uids == {
        'id': valid_data['uids']['id'],
        'my_id': 'process1-v1'
    }
    assert process_run.tags == ['baking::cakes', 'danger::low']
    assert process_run.conditions[0] == Condition(name='oven temp',
                                                  value=NominalReal(203.0, ''),
                                                  origin='measured')
    assert process_run.parameters == []
    assert process_run.file_links == []
    assert process_run.template is None
    assert process_run.output_material is None
    assert process_run.spec == \
           ProcessSpec(name="Spec for proc 1",
                       uids={'id': valid_data['spec']['uids']['id']},
                       conditions=[Condition(name='oven temp', value=UniformReal(175, 225, ''),
                                             origin='specified')]
                       )
    assert process_run.name == 'Process 1'
    assert process_run.notes == 'make sure to use oven mitts'
    assert process_run.typ == 'process_run'
예제 #8
0
def test_simple_deserialization(valid_data):
    """Ensure that a deserialized Material Spec looks sane."""
    material_spec: MaterialSpec = MaterialSpec.build(valid_data)
    assert material_spec.uids == {'id': valid_data['uids']['id']}
    assert material_spec.name == 'spec of material'
    assert material_spec.tags == []
    assert material_spec.notes is None
    assert material_spec.process is None
    assert material_spec.properties[0] == \
        PropertyAndConditions(Property("color", origin='specified',
                                       value=NominalCategorical("tan")),
                              conditions=[Condition('temperature', origin='specified',
                                                    value=NominalReal(300, units='kelvin'))])
    assert material_spec.template is None
    assert material_spec.file_links == []
    assert material_spec.typ == 'material_spec'
예제 #9
0
def make_strehlow_objects(table=None, template_scope=DEMO_TEMPLATE_SCOPE):
    """Make a table with Strehlow & Cook data."""
    tmpl = make_templates(template_scope)

    if table is None:
        table = import_table()

    # Specs
    msr_spec = MeasurementSpec(name='Band gap',
                               template=tmpl["Band gap measurement"]
                               )

    def real_mapper(prop):
        """Mapping methods for RealBounds."""
        if 'uncertainty' in prop['scalars'][0]:
            if prop['units'] == 'eV':  # Arbitrarily convert to attojoules
                mean = convert_units(value=float(prop['scalars'][0]['value']),
                                     starting_unit=prop['units'],
                                     final_unit='aJ'
                                     )
                std = convert_units(value=float(prop['scalars'][0]['value']),
                                    starting_unit=prop['units'],
                                    final_unit='aJ'
                                    )
                val = NormalReal(mean=mean,
                                 units='aJ',
                                 std=std
                                 )
            else:
                val = NormalReal(mean=float(prop['scalars'][0]['value']),
                                 units=prop['units'],
                                 std=float(prop['scalars'][0]['uncertainty'])
                                 )
        else:
            val = NominalReal(nominal=float(prop['scalars'][0]['value']),
                              units=prop['units']
                              )
        return val

    content_map = {
        RealBounds: real_mapper,
        CategoricalBounds: lambda prop: NominalCategorical(category=prop['scalars'][0]['value']),
        type(None): lambda bnd: 'Label'
    }

    datapoints = []
    compounds = dict()
    for row in table:
        formula = formula_clean(row['chemicalFormula'])
        if formula not in compounds:
            compounds[formula] = MaterialSpec(
                name=formula_latex(formula),
                template=tmpl["Chemical"],
                process=ProcessSpec(name="Sample preparation",
                                    template=tmpl["Sample preparation"]
                                    ))
        spec = compounds[formula]
        run = make_instance(spec)
        datapoints.append(run)

        if not spec.properties:
            spec.properties.append(
                PropertyAndConditions(
                    property=Property(name=spec.template.properties[0][0].name,
                                      value=EmpiricalFormula(formula=formula),
                                      template=spec.template.properties[0][0])
                ))

        msr = make_instance(msr_spec)
        msr.material = run

        # 2 categories in the PIF need to be split to avoid repeat Attribute Templates in a Run
        name_map = {
            'Phase': 'Crystal system',
            'Transition': 'Bands'
        }
        origin_map = {
            'EXPERIMENTAL': Origin.MEASURED,
            'COMPUTATIONAL': Origin.COMPUTED
        }
        seen = set()  # Some conditions come in from multiple properties on the same object
        for prop in row['properties']:
            origin = origin_map.get(prop.get('dataType', None), Origin.UNKNOWN)
            if 'method' in prop:
                method = 'Method: ' + prop['method']['name']
            else:
                method = 'Method: unreported'
            for attr in [prop] + prop.get('conditions', []):
                if attr['name'] in seen:
                    # Early return if it's a repeat
                    continue
                seen.add(attr['name'])

                template = tmpl[attr['name']]
                # Figure out if we need to split this column
                if attr['name'] in name_map:
                    value = attr['scalars'][0]['value']
                    if value not in template.bounds.categories:
                        template = tmpl[name_map[attr['name']]]

                # Move into GEMD structure
                if type(template) == PropertyTemplate:
                    msr.properties.append(
                        Property(name=template.name,
                                 template=template,
                                 value=content_map[type(template.bounds)](attr),
                                 origin=origin,
                                 notes=method
                                 ))
                elif type(template) == ConditionTemplate:
                    msr.conditions.append(
                        Condition(name=template.name,
                                  template=template,
                                  value=content_map[type(template.bounds)](attr),
                                  origin=origin,
                                  notes=method
                                  ))

    return datapoints
예제 #10
0
def test_enumeration_serde():
    """An enumeration should get serialized as a string."""
    condition = Condition(name="A condition", notes=Origin.UNKNOWN)
    copy_condition = GEMDJson().copy(condition)
    assert copy_condition.notes == Origin.get_value(condition.notes)
def make_data_island(density,
                     bulk_modulus,
                     firing_temperature,
                     binders,
                     powders,
                     tag=None):
    """Helper function to create a relatively involved data island."""
    binder_specs = keymap(lambda x: MaterialSpec(name=x), binders)
    powder_specs = keymap(lambda x: MaterialSpec(name=x), powders)

    binder_runs = keymap(lambda x: MaterialRun(name=x.name, spec=x),
                         binder_specs)
    powder_runs = keymap(lambda x: MaterialRun(name=x.name, spec=x),
                         powder_specs)

    all_input_materials = keymap(lambda x: x.spec.name,
                                 merge(binder_runs, powder_runs))
    mixing_composition = Condition(
        name="composition", value=NominalComposition(all_input_materials))
    mixing_process = ProcessRun(name="Mixing",
                                tags=["mixing"],
                                conditions=[mixing_composition])
    binder_ingredients = []
    for run in binder_runs:
        binder_ingredients.append(
            IngredientRun(
                material=run,
                process=mixing_process,
                mass_fraction=NominalReal(binders[run.spec.name], ''),
            ))

    powder_ingredients = []
    for run in powder_runs:
        powder_ingredients.append(
            IngredientRun(
                material=run,
                process=mixing_process,
                mass_fraction=NominalReal(powders[run.spec.name], ''),
            ))

    green_sample = MaterialRun("Green", process=mixing_process)

    measured_firing_temperature = Condition(
        name="Firing Temperature",
        value=UniformReal(firing_temperature - 0.5, firing_temperature + 0.5,
                          'degC'),
        template=firing_temperature_template)

    specified_firing_setting = Parameter(name="Firing setting",
                                         value=DiscreteCategorical("hot"))
    firing_spec = ProcessSpec("Firing", template=firing_template)
    firing_process = ProcessRun(name=firing_spec.name,
                                conditions=[measured_firing_temperature],
                                parameters=[specified_firing_setting],
                                spec=firing_spec)
    IngredientRun(material=green_sample,
                  process=firing_process,
                  mass_fraction=NormalReal(1.0, 0.0, ''),
                  volume_fraction=NormalReal(1.0, 0.0, ''),
                  number_fraction=NormalReal(1.0, 0.0, ''))

    measured_density = Property(name="Density",
                                value=NominalReal(density, ''),
                                template=density_template)
    measured_modulus = Property(name="Bulk modulus",
                                value=NormalReal(bulk_modulus,
                                                 bulk_modulus / 100.0, ''))
    measurement_spec = MeasurementSpec("Mechanical Properties",
                                       template=measurement_template)
    measurement = MeasurementRun(
        measurement_spec.name,
        properties=[measured_density, measured_modulus],
        spec=measurement_spec)

    tags = [tag] if tag else []

    material_spec = MaterialSpec("Coupon", template=material_template)
    material_run = MaterialRun(material_spec.name,
                               process=firing_process,
                               tags=tags,
                               spec=material_spec)
    measurement.material = material_run
    return material_run
예제 #12
0
def ingest_material_run(data, material_spec=None, process_run=None):
    """Ingest material run with data, a material spec, and an originating process run."""
    if isinstance(data, list):
        return [ingest_material_run(x, material_spec) for x in data]

    if not isinstance(data, dict):
        raise ValueError("This ingester operates on dict, but got {}".format(type(data)))

    material = MaterialRun()

    sample_id = data.get("sample_id")
    if sample_id:
        material.add_uid("given_sample_id", sample_id)

    tags = data.get("tags")
    if tags:
        material.tags = tags

    for experiment in data.get("experiments", []):
        measurement = MeasurementRun()

        for name in set(known_properties.keys()).intersection(experiment.keys()):
            prop = Property(
                name=name,
                template=known_properties[name],
                value=_parse_value(experiment[name])
            )
            measurement.properties.append(prop)

        for name in set(known_conditions.keys()).intersection(experiment.keys()):
            cond = Condition(
                name=name,
                template=known_conditions[name],
                value=_parse_value(experiment[name])
            )
            measurement.conditions.append(cond)

        for name in set(known_parameters.keys()).intersection(experiment.keys()):
            param = Parameter(
                name=name,
                template=known_parameters[name],
                value=_parse_value(experiment[name])
            )
            measurement.parameters.append(param)

        scan_id = experiment.get("scan_id")
        if scan_id:
            measurement.add_uid("given_scan_id", scan_id)

        tags = experiment.get("tags")
        if tags:
            measurement.tags = tags

        measurement.material = material

    if material_spec:
        material.material_spec = material_spec

    if process_run:
        material.process = process_run

    return material
def test_template_assignment():
    """Test that an object and its attributes can both be assigned templates."""
    humidity_template = ConditionTemplate("Humidity", bounds=RealBounds(0.5, 0.75, ""))
    template = ProcessTemplate("Dry", conditions=[[humidity_template, RealBounds(0.5, 0.65, "")]])
    ProcessSpec("Dry a polymer", template=template, conditions=[
        Condition("Humidity", value=NominalReal(0.6, ""), template=humidity_template)])
예제 #14
0
def test_invalid_instance():
    """Calling make_instance on a non-spec should throw a TypeError."""
    not_specs = [MeasurementRun("meas"), Condition("cond"), UniformReal(0, 1, ''), 'foo', 10]
    for not_spec in not_specs:
        with pytest.raises(TypeError):
            make_instance(not_spec)