def test_delete_through_utility_method(self): """Test deletion works correctly through the `aiida.backends.utils.delete_nodes_and_connections`.""" from aiida.common import timezone from aiida.backends.utils import delete_nodes_and_connections data_one = Data().store() data_two = Data().store() calculation = CalculationNode() calculation.add_incoming(data_one, LinkType.INPUT_CALC, 'input_one') calculation.add_incoming(data_two, LinkType.INPUT_CALC, 'input_two') calculation.store() log_one = Log(timezone.now(), 'test', 'INFO', data_one.pk).store() log_two = Log(timezone.now(), 'test', 'INFO', data_two.pk).store() assert len(Log.objects.get_logs_for(data_one)) == 1 assert Log.objects.get_logs_for(data_one)[0].pk == log_one.pk assert len(Log.objects.get_logs_for(data_two)) == 1 assert Log.objects.get_logs_for(data_two)[0].pk == log_two.pk delete_nodes_and_connections([data_two.pk]) assert len(Log.objects.get_logs_for(data_one)) == 1 assert Log.objects.get_logs_for(data_one)[0].pk == log_one.pk assert len(Log.objects.get_logs_for(data_two)) == 0
def delete_nodes(pks, verbosity=0, dry_run=False, force=False, **kwargs): """Delete nodes by a list of pks. This command will delete not only the specified nodes, but also the ones that are linked to these and should be also deleted in order to keep a consistent provenance according to the rules explained in the concepts section of the documentation. In summary: 1. If a DATA node is deleted, any process nodes linked to it will also be deleted. 2. If a CALC node is deleted, any incoming WORK node (callers) will be deleted as well whereas any incoming DATA node (inputs) will be kept. Outgoing DATA nodes (outputs) will be deleted by default but this can be disabled. 3. If a WORK node is deleted, any incoming WORK node (callers) will be deleted as well, but all DATA nodes will be kept. Outgoing WORK or CALC nodes will be kept by default, but deletion of either of both kind of connected nodes can be enabled. These rules are 'recursive', so if a CALC node is deleted, then its output DATA nodes will be deleted as well, and then any CALC node that may have those as inputs, and so on. :param pks: a list of the PKs of the nodes to delete :param bool force: do not ask for confirmation to delete nodes. :param int verbosity: 0 prints nothing, 1 prints just sums and total, 2 prints individual nodes. :param kwargs: graph traversal rules. See :const:`aiida.common.links.GraphTraversalRules` what rule names are toggleable and what the defaults are. :param bool dry_run: Just perform a dry run and do not delete anything. Print statistics according to the verbosity level set. :param bool force: Do not ask for confirmation to delete nodes. """ # pylint: disable=too-many-arguments,too-many-branches,too-many-locals,too-many-statements from aiida.backends.utils import delete_nodes_and_connections from aiida.common import exceptions from aiida.orm import Node, QueryBuilder, load_node from aiida.tools.graph.graph_traversers import get_nodes_delete starting_pks = [] for pk in pks: try: load_node(pk) except exceptions.NotExistent: echo.echo_warning( f'warning: node with pk<{pk}> does not exist, skipping') else: starting_pks.append(pk) # An empty set might be problematic for the queries done below. if not starting_pks: if verbosity: echo.echo('Nothing to delete') return pks_set_to_delete = get_nodes_delete(starting_pks, **kwargs)['nodes'] if verbosity > 0: echo.echo('I {} delete {} node{}'.format( 'would' if dry_run else 'will', len(pks_set_to_delete), 's' if len(pks_set_to_delete) > 1 else '')) if verbosity > 1: builder = QueryBuilder().append( Node, filters={'id': { 'in': pks_set_to_delete }}, project=('uuid', 'id', 'node_type', 'label')) echo.echo(f"The nodes I {'would' if dry_run else 'will'} delete:") for uuid, pk, type_string, label in builder.iterall(): try: short_type_string = type_string.split('.')[-2] except IndexError: short_type_string = type_string echo.echo(f' {uuid} {pk} {short_type_string} {label}') if dry_run: if verbosity > 0: echo.echo( '\nThis was a dry run, exiting without deleting anything') return # Asking for user confirmation here if force: pass else: echo.echo_warning( f'YOU ARE ABOUT TO DELETE {len(pks_set_to_delete)} NODES! THIS CANNOT BE UNDONE!' ) if not click.confirm('Shall I continue?'): echo.echo('Exiting without deleting') return # Recover the list of folders to delete before actually deleting the nodes. I will delete the folders only later, # so that if there is a problem during the deletion of the nodes in the DB, I don't delete the folders repositories = [load_node(pk)._repository for pk in pks_set_to_delete] # pylint: disable=protected-access if verbosity > 0: echo.echo('Starting node deletion...') delete_nodes_and_connections(pks_set_to_delete) if verbosity > 0: echo.echo( 'Nodes deleted from database, deleting files from the repository now...' ) # If we are here, we managed to delete the entries from the DB. # I can now delete the folders for repository in repositories: repository.erase(force=True) if verbosity > 0: echo.echo('Deletion completed.')
def delete_nodes( pks: Iterable[int], verbosity: Optional[int] = None, dry_run: Union[bool, Callable[[Set[int]], bool]] = True, force: Optional[bool] = None, **traversal_rules: bool ) -> Tuple[Set[int], bool]: """Delete nodes given a list of "starting" PKs. This command will delete not only the specified nodes, but also the ones that are linked to these and should be also deleted in order to keep a consistent provenance according to the rules explained in the Topics - Provenance section of the documentation. In summary: 1. If a DATA node is deleted, any process nodes linked to it will also be deleted. 2. If a CALC node is deleted, any incoming WORK node (callers) will be deleted as well whereas any incoming DATA node (inputs) will be kept. Outgoing DATA nodes (outputs) will be deleted by default but this can be disabled. 3. If a WORK node is deleted, any incoming WORK node (callers) will be deleted as well, but all DATA nodes will be kept. Outgoing WORK or CALC nodes will be kept by default, but deletion of either of both kind of connected nodes can be enabled. These rules are 'recursive', so if a CALC node is deleted, then its output DATA nodes will be deleted as well, and then any CALC node that may have those as inputs, and so on. .. deprecated:: 1.6.0 The `verbosity` keyword will be removed in `v2.0.0`, set the level of `DELETE_LOGGER` instead. .. deprecated:: 1.6.0 The `force` keyword will be removed in `v2.0.0`, use the `dry_run` option instead. :param pks: a list of starting PKs of the nodes to delete (the full set will be based on the traversal rules) :param dry_run: If True, return the pks to delete without deleting anything. If False, delete the pks without confirmation If callable, a function that return True/False, based on the pks, e.g. ``dry_run=lambda pks: True`` :param traversal_rules: graph traversal rules. See :const:`aiida.common.links.GraphTraversalRules` for what rule names are toggleable and what the defaults are. :returns: (pks to delete, whether they were deleted) """ # pylint: disable=too-many-arguments,too-many-branches,too-many-locals,too-many-statements if verbosity is not None: warnings.warn( 'The verbosity option is deprecated and will be removed in `aiida-core==2.0.0`. ' 'Set the level of DELETE_LOGGER instead', AiidaDeprecationWarning ) # pylint: disable=no-member if force is not None: warnings.warn( 'The force option is deprecated and will be removed in `aiida-core==2.0.0`. ' 'Use dry_run instead', AiidaDeprecationWarning ) # pylint: disable=no-member if force is True: dry_run = False def _missing_callback(_pks: Iterable[int]): for _pk in _pks: DELETE_LOGGER.warning(f'warning: node with pk<{_pk}> does not exist, skipping') pks_set_to_delete = get_nodes_delete(pks, get_links=False, missing_callback=_missing_callback, **traversal_rules)['nodes'] DELETE_LOGGER.info('%s Node(s) marked for deletion', len(pks_set_to_delete)) if pks_set_to_delete and DELETE_LOGGER.level == logging.DEBUG: builder = QueryBuilder().append( Node, filters={'id': { 'in': pks_set_to_delete }}, project=('uuid', 'id', 'node_type', 'label') ) DELETE_LOGGER.debug('Node(s) to delete:') for uuid, pk, type_string, label in builder.iterall(): try: short_type_string = type_string.split('.')[-2] except IndexError: short_type_string = type_string DELETE_LOGGER.debug(f' {uuid} {pk} {short_type_string} {label}') if dry_run is True: DELETE_LOGGER.info('This was a dry run, exiting without deleting anything') return (pks_set_to_delete, False) # confirm deletion if callable(dry_run) and dry_run(pks_set_to_delete): DELETE_LOGGER.info('This was a dry run, exiting without deleting anything') return (pks_set_to_delete, False) if not pks_set_to_delete: return (pks_set_to_delete, True) # Recover the list of folders to delete before actually deleting the nodes. I will delete the folders only later, # so that if there is a problem during the deletion of the nodes in the DB, I don't delete the folders repositories = [load_node(pk)._repository for pk in pks_set_to_delete] # pylint: disable=protected-access DELETE_LOGGER.info('Starting node deletion...') delete_nodes_and_connections(pks_set_to_delete) DELETE_LOGGER.info('Nodes deleted from database, deleting files from the repository now...') # If we are here, we managed to delete the entries from the DB. # I can now delete the folders for repository in repositories: repository.erase(force=True) DELETE_LOGGER.info('Deletion of nodes completed.') return (pks_set_to_delete, True)
def delete_nodes(pks, follow_calls=False, follow_returns=False, dry_run=False, force=False, disable_checks=False, verbosity=0): """ Delete nodes by a list of pks :note: The script will also delete all children calculations generated from the specified nodes. :param pks: a list of the PKs of the nodes to delete :param bool follow_calls: Follow calls :param bool follow_returns: Follow returns. This is a very dangerous option, since anything returned by a workflow might have been used as input in many other calculations. Use with care, and never combine with force. :param bool dry_run: Do not delete, a dry run, with statistics printed according to verbosity levels. :param bool force: Do not ask for confirmation to delete nodes. :param bool disable_checks: If True, will not check whether calculations are losing created data or called instances. If checks are disabled, also logging is disabled. :param bool force: Do not ask for confirmation to delete nodes. :param int verbosity: The verbosity levels, 0 prints nothing, 1 prints just sums and total, 2 prints individual nodes. """ from aiida.orm.querybuilder import QueryBuilder from aiida.common.links import LinkType from aiida.orm.node import Node from aiida.orm.calculation import Calculation from aiida.orm.data import Data from aiida.orm import load_node from aiida.orm.backend import construct_backend from aiida.backends.utils import delete_nodes_and_connections backend = construct_backend() user_email = backend.users.get_automatic_user().email if not pks: # If I was passed an empty list, I don't to anything # I prefer checking explicitly, an empty set might be problematic for the queries done below. if verbosity: print "Nothing to delete" return # The following code is just for the querying of downwards provenance. # Ideally, there should be a module to interface with, but this is the solution for now. # By only dealing with ids, and keeping track of what has been already # visited in the query, there's good performance and no infinite loops. link_types_to_follow = [LinkType.CREATE.value, LinkType.INPUT.value] if follow_calls: link_types_to_follow.append(LinkType.CALL.value) if follow_returns: link_types_to_follow.append(LinkType.RETURN.value) edge_filters = {'type': {'in': link_types_to_follow}} # Operational set always includes the recently (in the last iteration added) nodes. operational_set = set().union(set(pks)) # Union to copy the set! pks_set_to_delete = set().union(set(pks)) while operational_set: # new_pks_set are the the pks of all nodes that are connected to the operational node set # with the links specified. new_pks_set = set([ i for i, in QueryBuilder(). append(Node, filters={ 'id': { 'in': operational_set } }).append(Node, project='id', edge_filters=edge_filters).iterall() ]) # The operational set is only those pks that haven't been yet put into the pks_set_to_delete. operational_set = new_pks_set.difference(pks_set_to_delete) # I add these pks in the pks_set_to_delete with a union pks_set_to_delete = pks_set_to_delete.union(new_pks_set) if verbosity > 0: print "I {} delete {} node{}".format( 'would' if dry_run else 'will', len(pks_set_to_delete), 's' if len(pks_set_to_delete) > 1 else '') if verbosity > 1: qb = QueryBuilder().append( Node, filters={'id': { 'in': pks_set_to_delete }}, project=('uuid', 'id', 'type', 'label')) print "The nodes I {} delete:".format( 'would' if dry_run else 'will') for uuid, pk, type_string, label in qb.iterall(): try: short_type_string = type_string.split('.')[-2] except IndexError: short_type_string = type_string print " {} {} {} {}".format(uuid, pk, short_type_string, label) # Here I am checking whether I am deleting ## A data instance without also deleting the creator, which brakes relationship between a calculation and its data ## A calculation instance that was called, without also deleting the caller. if not disable_checks: called_qb = QueryBuilder() called_qb.append(Calculation, filters={'id': { '!in': pks_set_to_delete }}, project='id') called_qb.append(Calculation, project='type', edge_project='label', filters={'id': { 'in': pks_set_to_delete }}, edge_filters={'type': { '==': LinkType.CALL.value }}) caller_to_called2delete = called_qb.all() if verbosity > 0 and caller_to_called2delete: calculation_pks_losing_called = set( zip(*caller_to_called2delete)[0]) print "\n{} calculation{} {} lose at least one called instance".format( len(calculation_pks_losing_called), 's' if len(calculation_pks_losing_called) > 1 else '', 'would' if dry_run else 'will') if verbosity > 1: print "These are the calculations that {} lose a called instance:".format( 'would' if dry_run else 'will') for calc_losing_called_pk in calculation_pks_losing_called: print ' ', load_node(calc_losing_called_pk) created_qb = QueryBuilder() created_qb.append(Calculation, filters={'id': { '!in': pks_set_to_delete }}, project='id') created_qb.append(Data, project='type', edge_project='label', filters={'id': { 'in': pks_set_to_delete }}, edge_filters={'type': { '==': LinkType.CREATE.value }}) creator_to_created2delete = created_qb.all() if verbosity > 0 and creator_to_created2delete: calculation_pks_losing_created = set( zip(*creator_to_created2delete)[0]) print "\n{} calculation{} {} lose at least one created data-instance".format( len(calculation_pks_losing_created), 's' if len(calculation_pks_losing_created) > 1 else '', 'would' if dry_run else 'will') if verbosity > 1: print "These are the calculations that {} lose a created data-instance:".format( 'would' if dry_run else 'will') for calc_losing_created_pk in calculation_pks_losing_created: print ' ', load_node(calc_losing_created_pk) if dry_run: if verbosity > 0: print "\nThis was a dry run, exiting without deleting anything" return # Asking for user confirmation here if force: pass else: print "YOU ARE ABOUT TO DELETE {} NODES! THIS CANNOT BE UNDONE!".format( len(pks_set_to_delete)) if raw_input("Shall I continue? [Y/N] ").lower() != 'y': print "Exiting without deleting" return # Recover the list of folders to delete before actually deleting # the nodes. I will delete the folders only later, so that if # there is a problem during the deletion of the nodes in # the DB, I don't delete the folders folders = [load_node(_).folder for _ in pks_set_to_delete] delete_nodes_and_connections(pks_set_to_delete) if not disable_checks: # I pass now to the log the information for calculations losing created data or called instances for calc_pk, calc_type_string, link_label in caller_to_called2delete: calc = load_node(calc_pk) calc.logger.warning("User {} deleted " "an instance of type {} " "called with the label {} " "by this calculation".format( user_email, calc_type_string, link_label)) for calc_pk, data_type_string, link_label in creator_to_created2delete: calc = load_node(calc_pk) calc.logger.warning("User {} deleted " "an instance of type {} " "created with the label {} " "by this calculation".format( user_email, data_type_string, link_label)) # If we are here, we managed to delete the entries from the DB. # I can now delete the folders for f in folders: f.erase()
def delete_nodes(pks, verbosity=0, dry_run=False, force=False, **kwargs): """Delete nodes by a list of pks. This command will delete not only the specified nodes, but also the ones that are linked to these and should be also deleted in order to keep a consistent provenance according to the rules explained in the concepts section of the documentation. In summary: 1. If a DATA node is deleted, any process nodes linked to it will also be deleted. 2. If a CALC node is deleted, any incoming WORK node (callers) will be deleted as well whereas any incoming DATA node (inputs) will be kept. Outgoing DATA nodes (outputs) will be deleted by default but this can be disabled. 3. If a WORK node is deleted, any incoming WORK node (callers) will be deleted as well, but all DATA nodes will be kept. Outgoing WORK or CALC nodes will be kept by default, but deletion of either of both kind of connected nodes can be enabled. These rules are 'recursive', so if a CALC node is deleted, then its output DATA nodes will be deleted as well, and then any CALC node that may have those as inputs, and so on. :param pks: a list of the PKs of the nodes to delete :param bool force: do not ask for confirmation to delete nodes. :param int verbosity: 0 prints nothing, 1 prints just sums and total, 2 prints individual nodes. :param kwargs: graph traversal rules. See :const:`aiida.common.links.GraphTraversalRules` what rule names are toggleable and what the defaults are. :param bool dry_run: Just perform a dry run and do not delete anything. Print statistics according to the verbosity level set. :param bool force: Do not ask for confirmation to delete nodes. """ # pylint: disable=too-many-arguments,too-many-branches,too-many-locals,too-many-statements from aiida.backends.utils import delete_nodes_and_connections from aiida.common import exceptions from aiida.common.links import GraphTraversalRules from aiida.orm import Node, QueryBuilder, load_node starting_pks = [] for pk in pks: try: load_node(pk) except exceptions.NotExistent: echo.echo_warning('warning: node with pk<{}> does not exist, skipping'.format(pk)) else: starting_pks.append(pk) # An empty set might be problematic for the queries done below. if not starting_pks: if verbosity: echo.echo('Nothing to delete') return follow_forwards = [] follow_backwards = [] # Create the dictionary with graph traversal rules to be used in determing complete node set to be exported for name, rule in GraphTraversalRules.DELETE.value.items(): # Check that rules that are not toggleable are not specified in the keyword arguments if not rule.toggleable and name in kwargs: raise exceptions.ExportValidationError('traversal rule {} is not toggleable'.format(name)) follow = kwargs.pop(name, rule.default) if follow: if rule.direction == 'forward': follow_forwards.append(rule.link_type.value) elif rule.direction == 'backward': follow_backwards.append(rule.link_type.value) else: raise InternalError('unrecognized direction `{}` for graph traversal rule'.format(rule.direction)) links_backwards = {'type': {'in': follow_backwards}} links_forwards = {'type': {'in': follow_forwards}} operational_set = set().union(set(starting_pks)) accumulator_set = set().union(set(starting_pks)) while operational_set: new_pks_set = set() query_nodes = QueryBuilder() query_nodes.append(Node, filters={'id': {'in': operational_set}}, tag='sources') query_nodes.append( Node, filters={'id': { '!in': accumulator_set }}, edge_filters=links_forwards, with_incoming='sources', project='id' ) new_pks_set.update(i for i, in query_nodes.iterall()) query_nodes = QueryBuilder() query_nodes.append(Node, filters={'id': {'in': operational_set}}, tag='sources') query_nodes.append( Node, filters={'id': { '!in': accumulator_set }}, edge_filters=links_backwards, with_outgoing='sources', project='id' ) new_pks_set.update(i for i, in query_nodes.iterall()) operational_set = new_pks_set.difference(accumulator_set) accumulator_set.update(new_pks_set) pks_set_to_delete = accumulator_set if verbosity > 0: echo.echo( 'I {} delete {} node{}'.format( 'would' if dry_run else 'will', len(pks_set_to_delete), 's' if len(pks_set_to_delete) > 1 else '' ) ) if verbosity > 1: builder = QueryBuilder().append( Node, filters={'id': { 'in': pks_set_to_delete }}, project=('uuid', 'id', 'node_type', 'label') ) echo.echo('The nodes I {} delete:'.format('would' if dry_run else 'will')) for uuid, pk, type_string, label in builder.iterall(): try: short_type_string = type_string.split('.')[-2] except IndexError: short_type_string = type_string echo.echo(' {} {} {} {}'.format(uuid, pk, short_type_string, label)) if dry_run: if verbosity > 0: echo.echo('\nThis was a dry run, exiting without deleting anything') return # Asking for user confirmation here if force: pass else: echo.echo_warning('YOU ARE ABOUT TO DELETE {} NODES! THIS CANNOT BE UNDONE!'.format(len(pks_set_to_delete))) if not click.confirm('Shall I continue?'): echo.echo('Exiting without deleting') return # Recover the list of folders to delete before actually deleting the nodes. I will delete the folders only later, # so that if there is a problem during the deletion of the nodes in the DB, I don't delete the folders repositories = [load_node(pk)._repository for pk in pks_set_to_delete] # pylint: disable=protected-access if verbosity > 0: echo.echo('Starting node deletion...') delete_nodes_and_connections(pks_set_to_delete) if verbosity > 0: echo.echo('Nodes deleted from database, deleting files from the repository now...') # If we are here, we managed to delete the entries from the DB. # I can now delete the folders for repository in repositories: repository.erase(force=True) if verbosity > 0: echo.echo('Deletion completed.')