def process(self, item): if self.graph_parallel and not self.allow_child_process and \ current_process().name != "MainProcess": logger.warning( "It appears derive_quantities() is running " "in a child process, possibly in a parallelized " "Runner.\nThis is not recommended and will deteriorate " "performance.") # Define quantities corresponding to materials doc fields # Attach quantities to materials item = MontyDecoder().process_decoded(item) logger.info("Populating material for %s", item['task_id']) material = Material() if 'created_at' in item.keys(): date_created = item['created_at'] else: date_created = None provenance = ProvenanceElement( source={ "source": self.source_name, "source_key": item['task_id'], "date_created": date_created }) for mkey, property_name in self.materials_symbol_map.items(): value = pydash.get(item, mkey) if value: material.add_quantity( QuantityFactory.create_quantity( property_name, value, units=Registry("units").get(property_name, None), provenance=provenance)) # Add custom things, e. g. computed entry computed_entry = get_entry(item) if computed_entry: material.add_quantity( QuantityFactory.create_quantity("computed_entry", computed_entry, provenance=provenance)) else: logger.info("Unable to create computed entry for {}".format( item['task_id'])) material.add_quantity( QuantityFactory.create_quantity("external_identifier_mp", item['task_id'], provenance=provenance)) input_quantities = material.symbol_quantities_dict # Use graph to generate expanded quantity pool logger.info("Evaluating graph for %s", item['task_id']) new_material = self._graph_evaluator.evaluate( material, timeout=self.graph_timeout) # Format document and return logger.info("Creating doc for %s", item['task_id']) # Gives the initial inputs that were used to derive properties of a # certain material. doc = { "inputs": [ StorageQuantity.from_quantity(q) for q in chain.from_iterable(input_quantities.values()) ] } for symbol, quantities in new_material.symbol_quantities_dict.items(): # If no new quantities of a given symbol were derived (i.e. if the initial # input quantity/ies is/are the only one/s listed in the new material) then don't add # that quantity to the propnet entry document as a derived quantity. if len(quantities) == len(input_quantities[symbol]): continue sub_doc = {} try: # Write out all quantities as dicts including the # internal ID for provenance tracing qs = [ jsanitize(StorageQuantity.from_quantity(q), strict=True) for q in quantities ] except AttributeError as ex: # Check to see if this is an error caused by an object # that is not JSON serializable msg = ex.args[0] if "object has no attribute 'as_dict'" in msg: # Write error to db and logger errmsg = "Quantity of Symbol '{}' is not ".format(symbol.name) + \ "JSON serializable. Cannot write quantities to database!" logger.error(errmsg) sub_doc['error'] = errmsg qs = [] else: # If not, re-raise the error raise ex sub_doc['quantities'] = qs doc[symbol.name] = sub_doc aggregated_quantities = new_material.get_aggregated_quantities() for symbol, quantity in aggregated_quantities.items(): if symbol.name not in doc: # No new quantities were derived continue # Store mean and std dev for aggregated quantities sub_doc = { "mean": unumpy.nominal_values(quantity.value).tolist(), "std_dev": unumpy.std_devs(quantity.value).tolist(), "units": quantity.units.format_babel() if quantity.units else None, "title": quantity.symbol.display_names[0] } # Symbol Name -> Sub_Document, listing all Quantities of that type. doc[symbol.name].update(sub_doc) doc.update({ "task_id": item["task_id"], "pretty_formula": item.get("pretty_formula"), "deprecated": item.get("deprecated", False) }) if self.include_sandboxed: doc.update({'sbxn': item.get("sbxn", [])}) return jsanitize(doc, strict=True)
def process_item(self, item): # Define quantities corresponding to materials doc fields # Attach quantities to materials item = MontyDecoder().process_decoded(item) logger.info("Populating material for %s", item['task_id']) material = Material() if 'created_at' in item.keys(): date_created = item['created_at'] else: date_created = "" provenance = ProvenanceElement( source={ "source": self.source_name, "source_key": item['task_id'], "date_created": date_created }) for mkey, property_name in self.materials_symbol_map.items(): value = get(item, mkey) if value: material.add_quantity( QuantityFactory.create_quantity(property_name, value, provenance=provenance)) # Add custom things, e. g. computed entry computed_entry = get_entry(item) material.add_quantity( QuantityFactory.create_quantity("computed_entry", computed_entry, provenance=provenance)) material.add_quantity( QuantityFactory.create_quantity("external_identifier_mp", item['task_id'], provenance=provenance)) input_quantities = material.get_quantities() # Use graph to generate expanded quantity pool logger.info("Evaluating graph for %s", item['task_id']) graph = Graph() graph.remove_models({ "dimensionality_cheon": DEFAULT_MODEL_DICT['dimensionality_cheon'], "dimensionality_gorai": DEFAULT_MODEL_DICT['dimensionality_gorai'] }) new_material = graph.evaluate(material) # Format document and return logger.info("Creating doc for %s", item['task_id']) # Gives the initial inputs that were used to derive properties of a # certain material. doc = { "inputs": [StorageQuantity.from_quantity(q) for q in input_quantities] } for symbol, quantity in new_material.get_aggregated_quantities().items( ): all_qs = new_material._symbol_to_quantity[symbol] # Only add new quantities # TODO: Condition insufficiently general. # Can end up with initial quantities added as "new quantities" if len(all_qs) == 1 and list(all_qs)[0] in input_quantities: continue # Write out all quantities as dicts including the # internal ID for provenance tracing qs = [StorageQuantity.from_quantity(q).as_dict() for q in all_qs] # THE listing of all Quantities of a given symbol. sub_doc = { "quantities": qs, "mean": unumpy.nominal_values(quantity.value).tolist(), "std_dev": unumpy.std_devs(quantity.value).tolist(), "units": quantity.units.format_babel() if quantity.units else None, "title": quantity._symbol_type.display_names[0] } # Symbol Name -> Sub_Document, listing all Quantities of that type. doc[symbol.name] = sub_doc doc.update({ "task_id": item["task_id"], "pretty_formula": item["pretty_formula"] }) return jsanitize(doc, strict=True)