Esempio n. 1
0
    def process(self, item):
        if self.graph_parallel and not self.allow_child_process and \
                current_process().name != "MainProcess":
            logger.warning(
                "It appears derive_quantities() is running "
                "in a child process, possibly in a parallelized "
                "Runner.\nThis is not recommended and will deteriorate "
                "performance.")
        # Define quantities corresponding to materials doc fields
        # Attach quantities to materials
        item = MontyDecoder().process_decoded(item)
        logger.info("Populating material for %s", item['task_id'])
        material = Material()

        if 'created_at' in item.keys():
            date_created = item['created_at']
        else:
            date_created = None

        provenance = ProvenanceElement(
            source={
                "source": self.source_name,
                "source_key": item['task_id'],
                "date_created": date_created
            })

        for mkey, property_name in self.materials_symbol_map.items():
            value = pydash.get(item, mkey)
            if value:
                material.add_quantity(
                    QuantityFactory.create_quantity(
                        property_name,
                        value,
                        units=Registry("units").get(property_name, None),
                        provenance=provenance))

        # Add custom things, e. g. computed entry
        computed_entry = get_entry(item)
        if computed_entry:
            material.add_quantity(
                QuantityFactory.create_quantity("computed_entry",
                                                computed_entry,
                                                provenance=provenance))
        else:
            logger.info("Unable to create computed entry for {}".format(
                item['task_id']))
        material.add_quantity(
            QuantityFactory.create_quantity("external_identifier_mp",
                                            item['task_id'],
                                            provenance=provenance))

        input_quantities = material.symbol_quantities_dict

        # Use graph to generate expanded quantity pool
        logger.info("Evaluating graph for %s", item['task_id'])

        new_material = self._graph_evaluator.evaluate(
            material, timeout=self.graph_timeout)

        # Format document and return
        logger.info("Creating doc for %s", item['task_id'])
        # Gives the initial inputs that were used to derive properties of a
        # certain material.

        doc = {
            "inputs": [
                StorageQuantity.from_quantity(q)
                for q in chain.from_iterable(input_quantities.values())
            ]
        }

        for symbol, quantities in new_material.symbol_quantities_dict.items():
            # If no new quantities of a given symbol were derived (i.e. if the initial
            # input quantity/ies is/are the only one/s listed in the new material) then don't add
            # that quantity to the propnet entry document as a derived quantity.
            if len(quantities) == len(input_quantities[symbol]):
                continue
            sub_doc = {}
            try:
                # Write out all quantities as dicts including the
                # internal ID for provenance tracing
                qs = [
                    jsanitize(StorageQuantity.from_quantity(q), strict=True)
                    for q in quantities
                ]
            except AttributeError as ex:
                # Check to see if this is an error caused by an object
                # that is not JSON serializable
                msg = ex.args[0]
                if "object has no attribute 'as_dict'" in msg:
                    # Write error to db and logger
                    errmsg = "Quantity of Symbol '{}' is not ".format(symbol.name) + \
                        "JSON serializable. Cannot write quantities to database!"
                    logger.error(errmsg)
                    sub_doc['error'] = errmsg
                    qs = []
                else:
                    # If not, re-raise the error
                    raise ex
            sub_doc['quantities'] = qs
            doc[symbol.name] = sub_doc

        aggregated_quantities = new_material.get_aggregated_quantities()

        for symbol, quantity in aggregated_quantities.items():
            if symbol.name not in doc:
                # No new quantities were derived
                continue
            # Store mean and std dev for aggregated quantities
            sub_doc = {
                "mean": unumpy.nominal_values(quantity.value).tolist(),
                "std_dev": unumpy.std_devs(quantity.value).tolist(),
                "units":
                quantity.units.format_babel() if quantity.units else None,
                "title": quantity.symbol.display_names[0]
            }
            # Symbol Name -> Sub_Document, listing all Quantities of that type.
            doc[symbol.name].update(sub_doc)

        doc.update({
            "task_id": item["task_id"],
            "pretty_formula": item.get("pretty_formula"),
            "deprecated": item.get("deprecated", False)
        })

        if self.include_sandboxed:
            doc.update({'sbxn': item.get("sbxn", [])})

        return jsanitize(doc, strict=True)
Esempio n. 2
0
    def process_item(self, item):
        # Define quantities corresponding to materials doc fields
        # Attach quantities to materials
        item = MontyDecoder().process_decoded(item)
        logger.info("Populating material for %s", item['task_id'])
        material = Material()

        if 'created_at' in item.keys():
            date_created = item['created_at']
        else:
            date_created = ""

        provenance = ProvenanceElement(
            source={
                "source": self.source_name,
                "source_key": item['task_id'],
                "date_created": date_created
            })

        for mkey, property_name in self.materials_symbol_map.items():
            value = get(item, mkey)
            if value:
                material.add_quantity(
                    QuantityFactory.create_quantity(property_name,
                                                    value,
                                                    provenance=provenance))

        # Add custom things, e. g. computed entry
        computed_entry = get_entry(item)
        material.add_quantity(
            QuantityFactory.create_quantity("computed_entry",
                                            computed_entry,
                                            provenance=provenance))
        material.add_quantity(
            QuantityFactory.create_quantity("external_identifier_mp",
                                            item['task_id'],
                                            provenance=provenance))

        input_quantities = material.get_quantities()

        # Use graph to generate expanded quantity pool
        logger.info("Evaluating graph for %s", item['task_id'])
        graph = Graph()
        graph.remove_models({
            "dimensionality_cheon":
            DEFAULT_MODEL_DICT['dimensionality_cheon'],
            "dimensionality_gorai":
            DEFAULT_MODEL_DICT['dimensionality_gorai']
        })
        new_material = graph.evaluate(material)

        # Format document and return
        logger.info("Creating doc for %s", item['task_id'])
        # Gives the initial inputs that were used to derive properties of a
        # certain material.

        doc = {
            "inputs":
            [StorageQuantity.from_quantity(q) for q in input_quantities]
        }
        for symbol, quantity in new_material.get_aggregated_quantities().items(
        ):
            all_qs = new_material._symbol_to_quantity[symbol]
            # Only add new quantities
            # TODO: Condition insufficiently general.
            #       Can end up with initial quantities added as "new quantities"
            if len(all_qs) == 1 and list(all_qs)[0] in input_quantities:
                continue
            # Write out all quantities as dicts including the
            # internal ID for provenance tracing
            qs = [StorageQuantity.from_quantity(q).as_dict() for q in all_qs]
            # THE listing of all Quantities of a given symbol.
            sub_doc = {
                "quantities": qs,
                "mean": unumpy.nominal_values(quantity.value).tolist(),
                "std_dev": unumpy.std_devs(quantity.value).tolist(),
                "units":
                quantity.units.format_babel() if quantity.units else None,
                "title": quantity._symbol_type.display_names[0]
            }
            # Symbol Name -> Sub_Document, listing all Quantities of that type.
            doc[symbol.name] = sub_doc

        doc.update({
            "task_id": item["task_id"],
            "pretty_formula": item["pretty_formula"]
        })
        return jsanitize(doc, strict=True)