def check_process_nodes_sealed(nodes): """Check ``ProcessNode`` s are sealed Only sealed ``ProcessNode`` s may be exported. :param nodes: :py:class:`~aiida.orm.nodes.process.process.ProcessNode` s to be checked. Should be their PK(s). :type nodes: list, int :raises `~aiida.tools.importexport.common.exceptions.ExportValidationError`: if a ``ProcessNode`` is not sealed or `nodes` is not a `list`, `set`, or `int`. """ if not nodes: return # Check `nodes` type, and if necessary change to set if isinstance(nodes, set): pass elif isinstance(nodes, list): nodes = set(nodes) elif isinstance(nodes, int): nodes = set([nodes]) else: raise exceptions.ExportValidationError( 'nodes must be either an int or set/list of ints') filters = {'id': {'in': nodes}, 'attributes.sealed': True} sealed_nodes = set(QueryBuilder().append(ProcessNode, filters=filters, project=['id']).all(flat=True)) if sealed_nodes != nodes: raise exceptions.ExportValidationError( 'All ProcessNodes must be sealed before they can be exported. ' 'Node(s) with PK(s): {} is/are not sealed.'.format(', '.join( str(pk) for pk in nodes - sealed_nodes)))
def _retrieve_linked_nodes_query(current_node, input_type, output_type, direction, link_type_value): """Helper function for :py:func:`~aiida.tools.importexport.dbexport.utils.retrieve_linked_nodes` A general :py:class:`~aiida.orm.querybuilder.QueryBuilder` query, retrieving linked Nodes and returning link information and the found Nodes. :param current_node: The current Node's PK. :type current_node: int :param input_type: Source Node class for Link :type input_type: :py:class:`~aiida.orm.nodes.data.data.Data`, :py:class:`~aiida.orm.nodes.process.process.ProcessNode`. :param output_type: Target Node class for Link :type output_type: :py:class:`~aiida.orm.nodes.data.data.Data`, :py:class:`~aiida.orm.nodes.process.process.ProcessNode`. :param direction: Link direction, must be either ``'forward'`` or ``'backward'``. :type direction: str :param link_type_value: A :py:class:`~aiida.common.links.LinkType` value, e.g. ``LinkType.RETURN.value``. :type link_type_value: str :return: Dictionary of link information to be used for the export archive and set of found Nodes. """ found_nodes = set() links_uuid_dict = {} filters_input = {} filters_output = {} if direction == 'forward': filters_input['id'] = current_node elif direction == 'backward': filters_output['id'] = current_node else: raise exceptions.ExportValidationError( 'direction must be either "forward" or "backward"') builder = QueryBuilder() builder.append(input_type, project=['uuid', 'id'], tag='input', filters=filters_input) builder.append(output_type, project=['uuid', 'id'], with_incoming='input', filters=filters_output, edge_filters={'type': link_type_value}, edge_project=['label', 'type']) for input_uuid, input_pk, output_uuid, output_pk, link_label, link_type in builder.iterall( ): links_uuid_entry = { 'input': str(input_uuid), 'output': str(output_uuid), 'label': str(link_label), 'type': str(link_type) } links_uuid_dict[frozenset(links_uuid_entry.items())] = links_uuid_entry node_pk = output_pk if direction == 'forward' else input_pk found_nodes.add(node_pk) return links_uuid_dict, found_nodes
def retrieve_linked_nodes(process_nodes, data_nodes, **kwargs): # pylint: disable=too-many-statements """Recursively retrieve linked Nodes and the links The rules for recursively following links/edges in the provenance graph are as follows, where the Node types in bold symbolize the Node that is currently being exported, i.e., it is this Node onto which the Link in question has been found. +----------------------+---------------------+---------------------+----------------+---------+ |**LinkType_Direction**| **From** | **To** |Follow (default)|Togglable| +======================+=====================+=====================+================+=========+ | INPUT_CALC_FORWARD | **Data** | CalculationNode | False | True | +----------------------+---------------------+---------------------+----------------+---------+ | INPUT_CALC_BACKWARD | Data | **CalculationNode** | True | False | +----------------------+---------------------+---------------------+----------------+---------+ | CREATE_FORWARD | **CalculationNode** | Data | True | False | +----------------------+---------------------+---------------------+----------------+---------+ | CREATE_BACKWARD | CalculationNode | **Data** | True | True | +----------------------+---------------------+---------------------+----------------+---------+ | RETURN_FORWARD | **WorkflowNode** | Data | True | False | +----------------------+---------------------+---------------------+----------------+---------+ | RETURN_BACKWARD | WorkflowNode | **Data** | False | True | +----------------------+---------------------+---------------------+----------------+---------+ | INPUT_WORK_FORWARD | **Data** | WorkflowNode | False | True | +----------------------+---------------------+---------------------+----------------+---------+ | INPUT_WORK_BACKWARD | Data | **WorkflowNode** | True | False | +----------------------+---------------------+---------------------+----------------+---------+ | CALL_CALC_FORWARD | **WorkflowNode** | CalculationNode | True | False | +----------------------+---------------------+---------------------+----------------+---------+ | CALL_CALC_BACKWARD | WorkflowNode | **CalculationNode** | False | True | +----------------------+---------------------+---------------------+----------------+---------+ | CALL_WORK_FORWARD | **WorkflowNode** | WorkflowNode | True | False | +----------------------+---------------------+---------------------+----------------+---------+ | CALL_WORK_BACKWARD | WorkflowNode | **WorkflowNode** | False | True | +----------------------+---------------------+---------------------+----------------+---------+ :param process_nodes: Set of :py:class:`~aiida.orm.nodes.process.process.ProcessNode` node PKs. :param data_nodes: Set of :py:class:`~aiida.orm.nodes.data.data.Data` node PKs. :param input_calc_forward: Follow INPUT_CALC links in the forward direction (recursively). :param create_backward: Follow CREATE links in the backward direction (recursively). :param return_backward: Follow RETURN links in the backward direction (recursively). :param input_work_forward: Follow INPUT_WORK links in the forward direction (recursively :param call_calc_backward: Follow CALL_CALC links in the backward direction (recursively). :param call_work_backward: Follow CALL_WORK links in the backward direction (recursively). :return: Set of retrieved Nodes, list of links information, and updated dict of LINK_FLAGS. :raises `~aiida.tools.importexport.common.exceptions.ExportValidationError`: if wrong or too many kwargs are given. """ from aiida.common.links import LinkType, GraphTraversalRules from aiida.orm import Data # Initialization and set flags according to rules retrieved_nodes = set() links_uuid_dict = {} traversal_rules = {} # Create the dictionary with graph traversal rules to be used in determing complete node set to be exported for name, rule in GraphTraversalRules.EXPORT.value.items(): # Check that rules that are not toggleable are not specified in the keyword arguments if not rule.toggleable and name in kwargs: raise exceptions.ExportValidationError( 'traversal rule {} is not toggleable'.format(name)) # Use the rule value passed in the keyword arguments, or if not the case, use the default traversal_rules[name] = kwargs.pop(name, rule.default) # We repeat until there are no further nodes to be visited while process_nodes or data_nodes: # If is is a ProcessNode if process_nodes: current_node_pk = process_nodes.pop() # If it is already visited continue to the next node if current_node_pk in retrieved_nodes: continue # Otherwise say that it is a node to be exported else: retrieved_nodes.add(current_node_pk) # INPUT_CALC(Data, CalculationNode) - Backward if traversal_rules['input_calc_backward']: links_uuids, found_nodes = _retrieve_linked_nodes_query( current_node_pk, input_type=Data, output_type=ProcessNode, direction='backward', link_type_value=LinkType.INPUT_CALC.value) data_nodes.update(found_nodes - retrieved_nodes) links_uuid_dict.update(links_uuids) # CREATE(CalculationNode, Data) - Forward if traversal_rules['create_forward']: links_uuids, found_nodes = _retrieve_linked_nodes_query( current_node_pk, input_type=ProcessNode, output_type=Data, direction='forward', link_type_value=LinkType.CREATE.value) data_nodes.update(found_nodes - retrieved_nodes) links_uuid_dict.update(links_uuids) # RETURN(WorkflowNode, Data) - Forward if traversal_rules['return_forward']: links_uuids, found_nodes = _retrieve_linked_nodes_query( current_node_pk, input_type=ProcessNode, output_type=Data, direction='forward', link_type_value=LinkType.RETURN.value) data_nodes.update(found_nodes - retrieved_nodes) links_uuid_dict.update(links_uuids) # INPUT_WORK(Data, WorkflowNode) - Backward if traversal_rules['input_work_backward']: links_uuids, found_nodes = _retrieve_linked_nodes_query( current_node_pk, input_type=Data, output_type=ProcessNode, direction='backward', link_type_value=LinkType.INPUT_WORK.value) data_nodes.update(found_nodes - retrieved_nodes) links_uuid_dict.update(links_uuids) # CALL_CALC(WorkflowNode, CalculationNode) - Forward if traversal_rules['call_calc_forward']: links_uuids, found_nodes = _retrieve_linked_nodes_query( current_node_pk, input_type=ProcessNode, output_type=ProcessNode, direction='forward', link_type_value=LinkType.CALL_CALC.value) process_nodes.update(found_nodes - retrieved_nodes) links_uuid_dict.update(links_uuids) # CALL_CALC(WorkflowNode, CalculationNode) - Backward if traversal_rules['call_calc_backward']: links_uuids, found_nodes = _retrieve_linked_nodes_query( current_node_pk, input_type=ProcessNode, output_type=ProcessNode, direction='backward', link_type_value=LinkType.CALL_CALC.value) process_nodes.update(found_nodes - retrieved_nodes) links_uuid_dict.update(links_uuids) # CALL_WORK(WorkflowNode, WorkflowNode) - Forward if traversal_rules['call_work_forward']: links_uuids, found_nodes = _retrieve_linked_nodes_query( current_node_pk, input_type=ProcessNode, output_type=ProcessNode, direction='forward', link_type_value=LinkType.CALL_WORK.value) process_nodes.update(found_nodes - retrieved_nodes) links_uuid_dict.update(links_uuids) # CALL_WORK(WorkflowNode, WorkflowNode) - Backward if traversal_rules['call_work_backward']: links_uuids, found_nodes = _retrieve_linked_nodes_query( current_node_pk, input_type=ProcessNode, output_type=ProcessNode, direction='backward', link_type_value=LinkType.CALL_WORK.value) process_nodes.update(found_nodes - retrieved_nodes) links_uuid_dict.update(links_uuids) # If it is a Data else: current_node_pk = data_nodes.pop() # If it is already visited continue to the next node if current_node_pk in retrieved_nodes: continue # Otherwise say that it is a node to be exported else: retrieved_nodes.add(current_node_pk) # INPUT_CALC(Data, CalculationNode) - Forward if traversal_rules['input_calc_forward']: links_uuids, found_nodes = _retrieve_linked_nodes_query( current_node_pk, input_type=Data, output_type=ProcessNode, direction='forward', link_type_value=LinkType.INPUT_CALC.value) process_nodes.update(found_nodes - retrieved_nodes) links_uuid_dict.update(links_uuids) # CREATE(CalculationNode, Data) - Backward if traversal_rules['create_backward']: links_uuids, found_nodes = _retrieve_linked_nodes_query( current_node_pk, input_type=ProcessNode, output_type=Data, direction='backward', link_type_value=LinkType.CREATE.value) process_nodes.update(found_nodes - retrieved_nodes) links_uuid_dict.update(links_uuids) # RETURN(WorkflowNode, Data) - Backward if traversal_rules['return_backward']: links_uuids, found_nodes = _retrieve_linked_nodes_query( current_node_pk, input_type=ProcessNode, output_type=Data, direction='backward', link_type_value=LinkType.RETURN.value) process_nodes.update(found_nodes - retrieved_nodes) links_uuid_dict.update(links_uuids) # INPUT_WORK(Data, WorkflowNode) - Forward if traversal_rules['input_work_forward']: links_uuids, found_nodes = _retrieve_linked_nodes_query( current_node_pk, input_type=Data, output_type=ProcessNode, direction='forward', link_type_value=LinkType.INPUT_WORK.value) process_nodes.update(found_nodes - retrieved_nodes) links_uuid_dict.update(links_uuids) return retrieved_nodes, list(links_uuid_dict.values()), traversal_rules
def _write_entity_data( total_entities: int, entity_queries: Dict[str, orm.QueryBuilder], writer: ArchiveWriterAbstract, batch_size: int ) -> Dict[str, Set[int]]: """Iterate through data returned from entity queries, serialize the DB fields, then write to the export.""" all_fields_info, unique_identifiers = get_all_fields_info() entity_separator = '_' exported_entity_pks: Dict[str, Set[int]] = defaultdict(set) unsealed_node_pks: Set[int] = set() with get_progress_reporter()(total=total_entities, desc='Writing entity data') as progress: for entity_name, entity_query in entity_queries.items(): foreign_fields = {k: v for k, v in all_fields_info[entity_name].items() if 'requires' in v} for value in foreign_fields.values(): ref_model_name = value['requires'] fill_in_query( entity_query, entity_name, ref_model_name, [entity_name], entity_separator, ) for query_results in entity_query.iterdict(batch_size=batch_size): progress.update() for key, value in query_results.items(): pk = value['id'] # This is an empty result of an outer join. # It should not be taken into account. if pk is None: continue # Get current entity current_entity = key.split(entity_separator)[-1] # don't allow duplication if pk in exported_entity_pks[current_entity]: continue exported_entity_pks[current_entity].add(pk) fields = serialize_dict( value, remove_fields=['id'], rename_fields=model_fields_to_file_fields[current_entity], ) if current_entity == NODE_ENTITY_NAME and fields['node_type'].startswith('process.'): if fields['attributes'].get('sealed', False) is not True: unsealed_node_pks.add(pk) writer.write_entity_data(current_entity, pk, unique_identifiers[current_entity], fields) if unsealed_node_pks: raise exceptions.ExportValidationError( 'All ProcessNodes must be sealed before they can be exported. ' f"Node(s) with PK(s): {', '.join(str(pk) for pk in unsealed_node_pks)} is/are not sealed." ) return exported_entity_pks