Example #1
0
    def test_delete_through_utility_method(self):
        """Test deletion works correctly through the `aiida.backends.utils.delete_nodes_and_connections`."""
        from aiida.common import timezone
        from aiida.backends.utils import delete_nodes_and_connections

        data_one = Data().store()
        data_two = Data().store()
        calculation = CalculationNode()
        calculation.add_incoming(data_one, LinkType.INPUT_CALC, 'input_one')
        calculation.add_incoming(data_two, LinkType.INPUT_CALC, 'input_two')
        calculation.store()

        log_one = Log(timezone.now(), 'test', 'INFO', data_one.pk).store()
        log_two = Log(timezone.now(), 'test', 'INFO', data_two.pk).store()

        assert len(Log.objects.get_logs_for(data_one)) == 1
        assert Log.objects.get_logs_for(data_one)[0].pk == log_one.pk
        assert len(Log.objects.get_logs_for(data_two)) == 1
        assert Log.objects.get_logs_for(data_two)[0].pk == log_two.pk

        delete_nodes_and_connections([data_two.pk])

        assert len(Log.objects.get_logs_for(data_one)) == 1
        assert Log.objects.get_logs_for(data_one)[0].pk == log_one.pk
        assert len(Log.objects.get_logs_for(data_two)) == 0
Example #2
0
def delete_nodes(pks, verbosity=0, dry_run=False, force=False, **kwargs):
    """Delete nodes by a list of pks.

    This command will delete not only the specified nodes, but also the ones that are
    linked to these and should be also deleted in order to keep a consistent provenance
    according to the rules explained in the concepts section of the documentation.
    In summary:

    1. If a DATA node is deleted, any process nodes linked to it will also be deleted.

    2. If a CALC node is deleted, any incoming WORK node (callers) will be deleted as
    well whereas any incoming DATA node (inputs) will be kept. Outgoing DATA nodes
    (outputs) will be deleted by default but this can be disabled.

    3. If a WORK node is deleted, any incoming WORK node (callers) will be deleted as
    well, but all DATA nodes will be kept. Outgoing WORK or CALC nodes will be kept by
    default, but deletion of either of both kind of connected nodes can be enabled.

    These rules are 'recursive', so if a CALC node is deleted, then its output DATA
    nodes will be deleted as well, and then any CALC node that may have those as
    inputs, and so on.

    :param pks: a list of the PKs of the nodes to delete
    :param bool force: do not ask for confirmation to delete nodes.
    :param int verbosity: 0 prints nothing,
                          1 prints just sums and total,
                          2 prints individual nodes.

    :param kwargs: graph traversal rules. See :const:`aiida.common.links.GraphTraversalRules` what rule names
        are toggleable and what the defaults are.
    :param bool dry_run:
        Just perform a dry run and do not delete anything. Print statistics according
        to the verbosity level set.
    :param bool force:
        Do not ask for confirmation to delete nodes.
    """
    # pylint: disable=too-many-arguments,too-many-branches,too-many-locals,too-many-statements
    from aiida.backends.utils import delete_nodes_and_connections
    from aiida.common import exceptions
    from aiida.orm import Node, QueryBuilder, load_node
    from aiida.tools.graph.graph_traversers import get_nodes_delete

    starting_pks = []
    for pk in pks:
        try:
            load_node(pk)
        except exceptions.NotExistent:
            echo.echo_warning(
                f'warning: node with pk<{pk}> does not exist, skipping')
        else:
            starting_pks.append(pk)

    # An empty set might be problematic for the queries done below.
    if not starting_pks:
        if verbosity:
            echo.echo('Nothing to delete')
        return

    pks_set_to_delete = get_nodes_delete(starting_pks, **kwargs)['nodes']

    if verbosity > 0:
        echo.echo('I {} delete {} node{}'.format(
            'would' if dry_run else 'will', len(pks_set_to_delete),
            's' if len(pks_set_to_delete) > 1 else ''))
        if verbosity > 1:
            builder = QueryBuilder().append(
                Node,
                filters={'id': {
                    'in': pks_set_to_delete
                }},
                project=('uuid', 'id', 'node_type', 'label'))
            echo.echo(f"The nodes I {'would' if dry_run else 'will'} delete:")
            for uuid, pk, type_string, label in builder.iterall():
                try:
                    short_type_string = type_string.split('.')[-2]
                except IndexError:
                    short_type_string = type_string
                echo.echo(f'   {uuid} {pk} {short_type_string} {label}')

    if dry_run:
        if verbosity > 0:
            echo.echo(
                '\nThis was a dry run, exiting without deleting anything')
        return

    # Asking for user confirmation here
    if force:
        pass
    else:
        echo.echo_warning(
            f'YOU ARE ABOUT TO DELETE {len(pks_set_to_delete)} NODES! THIS CANNOT BE UNDONE!'
        )
        if not click.confirm('Shall I continue?'):
            echo.echo('Exiting without deleting')
            return

    # Recover the list of folders to delete before actually deleting the nodes. I will delete the folders only later,
    # so that if there is a problem during the deletion of the nodes in the DB, I don't delete the folders
    repositories = [load_node(pk)._repository for pk in pks_set_to_delete]  # pylint: disable=protected-access

    if verbosity > 0:
        echo.echo('Starting node deletion...')
    delete_nodes_and_connections(pks_set_to_delete)

    if verbosity > 0:
        echo.echo(
            'Nodes deleted from database, deleting files from the repository now...'
        )

    # If we are here, we managed to delete the entries from the DB.
    # I can now delete the folders
    for repository in repositories:
        repository.erase(force=True)

    if verbosity > 0:
        echo.echo('Deletion completed.')
Example #3
0
def delete_nodes(
    pks: Iterable[int],
    verbosity: Optional[int] = None,
    dry_run: Union[bool, Callable[[Set[int]], bool]] = True,
    force: Optional[bool] = None,
    **traversal_rules: bool
) -> Tuple[Set[int], bool]:
    """Delete nodes given a list of "starting" PKs.

    This command will delete not only the specified nodes, but also the ones that are
    linked to these and should be also deleted in order to keep a consistent provenance
    according to the rules explained in the Topics - Provenance section of the documentation.
    In summary:

    1. If a DATA node is deleted, any process nodes linked to it will also be deleted.

    2. If a CALC node is deleted, any incoming WORK node (callers) will be deleted as
    well whereas any incoming DATA node (inputs) will be kept. Outgoing DATA nodes
    (outputs) will be deleted by default but this can be disabled.

    3. If a WORK node is deleted, any incoming WORK node (callers) will be deleted as
    well, but all DATA nodes will be kept. Outgoing WORK or CALC nodes will be kept by
    default, but deletion of either of both kind of connected nodes can be enabled.

    These rules are 'recursive', so if a CALC node is deleted, then its output DATA
    nodes will be deleted as well, and then any CALC node that may have those as
    inputs, and so on.

    .. deprecated:: 1.6.0
        The `verbosity` keyword will be removed in `v2.0.0`, set the level of `DELETE_LOGGER` instead.

    .. deprecated:: 1.6.0
        The `force` keyword will be removed in `v2.0.0`, use the `dry_run` option instead.

    :param pks: a list of starting PKs of the nodes to delete
        (the full set will be based on the traversal rules)

    :param dry_run:
        If True, return the pks to delete without deleting anything.
        If False, delete the pks without confirmation
        If callable, a function that return True/False, based on the pks, e.g. ``dry_run=lambda pks: True``

    :param traversal_rules: graph traversal rules.
        See :const:`aiida.common.links.GraphTraversalRules` for what rule names
        are toggleable and what the defaults are.

    :returns: (pks to delete, whether they were deleted)

    """
    # pylint: disable=too-many-arguments,too-many-branches,too-many-locals,too-many-statements

    if verbosity is not None:
        warnings.warn(
            'The verbosity option is deprecated and will be removed in `aiida-core==2.0.0`. '
            'Set the level of DELETE_LOGGER instead', AiidaDeprecationWarning
        )  # pylint: disable=no-member

    if force is not None:
        warnings.warn(
            'The force option is deprecated and will be removed in `aiida-core==2.0.0`. '
            'Use dry_run instead', AiidaDeprecationWarning
        )  # pylint: disable=no-member
        if force is True:
            dry_run = False

    def _missing_callback(_pks: Iterable[int]):
        for _pk in _pks:
            DELETE_LOGGER.warning(f'warning: node with pk<{_pk}> does not exist, skipping')

    pks_set_to_delete = get_nodes_delete(pks, get_links=False, missing_callback=_missing_callback,
                                         **traversal_rules)['nodes']

    DELETE_LOGGER.info('%s Node(s) marked for deletion', len(pks_set_to_delete))

    if pks_set_to_delete and DELETE_LOGGER.level == logging.DEBUG:
        builder = QueryBuilder().append(
            Node, filters={'id': {
                'in': pks_set_to_delete
            }}, project=('uuid', 'id', 'node_type', 'label')
        )
        DELETE_LOGGER.debug('Node(s) to delete:')
        for uuid, pk, type_string, label in builder.iterall():
            try:
                short_type_string = type_string.split('.')[-2]
            except IndexError:
                short_type_string = type_string
            DELETE_LOGGER.debug(f'   {uuid} {pk} {short_type_string} {label}')

    if dry_run is True:
        DELETE_LOGGER.info('This was a dry run, exiting without deleting anything')
        return (pks_set_to_delete, False)

    # confirm deletion
    if callable(dry_run) and dry_run(pks_set_to_delete):
        DELETE_LOGGER.info('This was a dry run, exiting without deleting anything')
        return (pks_set_to_delete, False)

    if not pks_set_to_delete:
        return (pks_set_to_delete, True)

    # Recover the list of folders to delete before actually deleting the nodes. I will delete the folders only later,
    # so that if there is a problem during the deletion of the nodes in the DB, I don't delete the folders
    repositories = [load_node(pk)._repository for pk in pks_set_to_delete]  # pylint: disable=protected-access

    DELETE_LOGGER.info('Starting node deletion...')
    delete_nodes_and_connections(pks_set_to_delete)

    DELETE_LOGGER.info('Nodes deleted from database, deleting files from the repository now...')

    # If we are here, we managed to delete the entries from the DB.
    # I can now delete the folders
    for repository in repositories:
        repository.erase(force=True)

    DELETE_LOGGER.info('Deletion of nodes completed.')

    return (pks_set_to_delete, True)
Example #4
0
def delete_nodes(pks,
                 follow_calls=False,
                 follow_returns=False,
                 dry_run=False,
                 force=False,
                 disable_checks=False,
                 verbosity=0):
    """
    Delete nodes by a list of pks

    :note: The script will also delete all children calculations generated from the specified nodes.

    :param pks: a list of the PKs of the nodes to delete
    :param bool follow_calls: Follow calls
    :param bool follow_returns:
        Follow returns. This is a very dangerous option, since anything returned by a workflow might have
        been used as input in many other calculations. Use with care, and never combine with force.
    :param bool dry_run: Do not delete, a dry run, with statistics printed according to verbosity levels.
    :param bool force: Do not ask for confirmation to delete nodes.
    :param bool disable_checks:
        If True, will not check whether calculations are losing created data or called instances.
        If checks are disabled, also logging is disabled.
    :param bool force: Do not ask for confirmation to delete nodes.
    :param int verbosity:
        The verbosity levels, 0 prints nothing, 1 prints just sums and total, 2 prints individual nodes.
    """

    from aiida.orm.querybuilder import QueryBuilder
    from aiida.common.links import LinkType
    from aiida.orm.node import Node
    from aiida.orm.calculation import Calculation
    from aiida.orm.data import Data
    from aiida.orm import load_node
    from aiida.orm.backend import construct_backend
    from aiida.backends.utils import delete_nodes_and_connections

    backend = construct_backend()
    user_email = backend.users.get_automatic_user().email

    if not pks:
        # If I was passed an empty list, I don't to anything
        # I prefer checking explicitly, an empty set might be problematic for the queries done below.
        if verbosity:
            print "Nothing to delete"
        return

    # The following code is just for the querying of downwards provenance.
    # Ideally, there should be a module to interface with, but this is the solution for now.
    # By only dealing with ids, and keeping track of what has been already
    # visited in the query, there's good performance and no infinite loops.
    link_types_to_follow = [LinkType.CREATE.value, LinkType.INPUT.value]
    if follow_calls:
        link_types_to_follow.append(LinkType.CALL.value)
    if follow_returns:
        link_types_to_follow.append(LinkType.RETURN.value)

    edge_filters = {'type': {'in': link_types_to_follow}}

    # Operational set always includes the recently (in the last iteration added) nodes.
    operational_set = set().union(set(pks))  # Union to copy the set!
    pks_set_to_delete = set().union(set(pks))
    while operational_set:
        # new_pks_set are the the pks of all nodes that are connected to the operational node set
        # with the links specified.
        new_pks_set = set([
            i for i, in QueryBuilder().
            append(Node, filters={
                'id': {
                    'in': operational_set
                }
            }).append(Node, project='id', edge_filters=edge_filters).iterall()
        ])
        # The operational set is only those pks that haven't been yet put into the pks_set_to_delete.
        operational_set = new_pks_set.difference(pks_set_to_delete)

        # I add these pks in the pks_set_to_delete with a union
        pks_set_to_delete = pks_set_to_delete.union(new_pks_set)

    if verbosity > 0:
        print "I {} delete {} node{}".format(
            'would' if dry_run else 'will', len(pks_set_to_delete),
            's' if len(pks_set_to_delete) > 1 else '')
        if verbosity > 1:
            qb = QueryBuilder().append(
                Node,
                filters={'id': {
                    'in': pks_set_to_delete
                }},
                project=('uuid', 'id', 'type', 'label'))
            print "The nodes I {} delete:".format(
                'would' if dry_run else 'will')
            for uuid, pk, type_string, label in qb.iterall():
                try:
                    short_type_string = type_string.split('.')[-2]
                except IndexError:
                    short_type_string = type_string
                print "   {} {} {} {}".format(uuid, pk, short_type_string,
                                              label)

    # Here I am checking whether I am deleting
    ## A data instance without also deleting the creator, which brakes relationship between a calculation and its data
    ## A calculation instance that was called, without also deleting the caller.

    if not disable_checks:
        called_qb = QueryBuilder()
        called_qb.append(Calculation,
                         filters={'id': {
                             '!in': pks_set_to_delete
                         }},
                         project='id')
        called_qb.append(Calculation,
                         project='type',
                         edge_project='label',
                         filters={'id': {
                             'in': pks_set_to_delete
                         }},
                         edge_filters={'type': {
                             '==': LinkType.CALL.value
                         }})
        caller_to_called2delete = called_qb.all()

        if verbosity > 0 and caller_to_called2delete:
            calculation_pks_losing_called = set(
                zip(*caller_to_called2delete)[0])
            print "\n{} calculation{} {} lose at least one called instance".format(
                len(calculation_pks_losing_called),
                's' if len(calculation_pks_losing_called) > 1 else '',
                'would' if dry_run else 'will')
            if verbosity > 1:
                print "These are the calculations that {} lose a called instance:".format(
                    'would' if dry_run else 'will')
                for calc_losing_called_pk in calculation_pks_losing_called:
                    print '  ', load_node(calc_losing_called_pk)

        created_qb = QueryBuilder()
        created_qb.append(Calculation,
                          filters={'id': {
                              '!in': pks_set_to_delete
                          }},
                          project='id')
        created_qb.append(Data,
                          project='type',
                          edge_project='label',
                          filters={'id': {
                              'in': pks_set_to_delete
                          }},
                          edge_filters={'type': {
                              '==': LinkType.CREATE.value
                          }})

        creator_to_created2delete = created_qb.all()
        if verbosity > 0 and creator_to_created2delete:
            calculation_pks_losing_created = set(
                zip(*creator_to_created2delete)[0])
            print "\n{} calculation{} {} lose at least one created data-instance".format(
                len(calculation_pks_losing_created),
                's' if len(calculation_pks_losing_created) > 1 else '',
                'would' if dry_run else 'will')
            if verbosity > 1:
                print "These are the calculations that {} lose a created data-instance:".format(
                    'would' if dry_run else 'will')
                for calc_losing_created_pk in calculation_pks_losing_created:
                    print '  ', load_node(calc_losing_created_pk)

    if dry_run:
        if verbosity > 0:
            print "\nThis was a dry run, exiting without deleting anything"
        return

    # Asking for user confirmation here
    if force:
        pass
    else:
        print "YOU ARE ABOUT TO DELETE {} NODES! THIS CANNOT BE UNDONE!".format(
            len(pks_set_to_delete))
        if raw_input("Shall I continue? [Y/N] ").lower() != 'y':
            print "Exiting without deleting"
            return

    # Recover the list of folders to delete before actually deleting
    # the nodes.  I will delete the folders only later, so that if
    # there is a problem during the deletion of the nodes in
    # the DB, I don't delete the folders
    folders = [load_node(_).folder for _ in pks_set_to_delete]

    delete_nodes_and_connections(pks_set_to_delete)

    if not disable_checks:
        # I pass now to the log the information for calculations losing created data or called instances
        for calc_pk, calc_type_string, link_label in caller_to_called2delete:
            calc = load_node(calc_pk)
            calc.logger.warning("User {} deleted "
                                "an instance of type {} "
                                "called with the label {} "
                                "by this calculation".format(
                                    user_email, calc_type_string, link_label))

        for calc_pk, data_type_string, link_label in creator_to_created2delete:
            calc = load_node(calc_pk)
            calc.logger.warning("User {} deleted "
                                "an instance of type {} "
                                "created with the label {} "
                                "by this calculation".format(
                                    user_email, data_type_string, link_label))

    # If we are here, we managed to delete the entries from the DB.
    # I can now delete the folders
    for f in folders:
        f.erase()
Example #5
0
def delete_nodes(pks, verbosity=0, dry_run=False, force=False, **kwargs):
    """Delete nodes by a list of pks.

    This command will delete not only the specified nodes, but also the ones that are
    linked to these and should be also deleted in order to keep a consistent provenance
    according to the rules explained in the concepts section of the documentation.
    In summary:

    1. If a DATA node is deleted, any process nodes linked to it will also be deleted.

    2. If a CALC node is deleted, any incoming WORK node (callers) will be deleted as
    well whereas any incoming DATA node (inputs) will be kept. Outgoing DATA nodes
    (outputs) will be deleted by default but this can be disabled.

    3. If a WORK node is deleted, any incoming WORK node (callers) will be deleted as
    well, but all DATA nodes will be kept. Outgoing WORK or CALC nodes will be kept by
    default, but deletion of either of both kind of connected nodes can be enabled.

    These rules are 'recursive', so if a CALC node is deleted, then its output DATA
    nodes will be deleted as well, and then any CALC node that may have those as
    inputs, and so on.

    :param pks: a list of the PKs of the nodes to delete
    :param bool force: do not ask for confirmation to delete nodes.
    :param int verbosity: 0 prints nothing,
                          1 prints just sums and total,
                          2 prints individual nodes.

    :param kwargs: graph traversal rules. See :const:`aiida.common.links.GraphTraversalRules` what rule names
        are toggleable and what the defaults are.
    :param bool dry_run:
        Just perform a dry run and do not delete anything. Print statistics according
        to the verbosity level set.
    :param bool force:
        Do not ask for confirmation to delete nodes.
    """
    # pylint: disable=too-many-arguments,too-many-branches,too-many-locals,too-many-statements
    from aiida.backends.utils import delete_nodes_and_connections
    from aiida.common import exceptions
    from aiida.common.links import GraphTraversalRules
    from aiida.orm import Node, QueryBuilder, load_node

    starting_pks = []
    for pk in pks:
        try:
            load_node(pk)
        except exceptions.NotExistent:
            echo.echo_warning('warning: node with pk<{}> does not exist, skipping'.format(pk))
        else:
            starting_pks.append(pk)

    # An empty set might be problematic for the queries done below.
    if not starting_pks:
        if verbosity:
            echo.echo('Nothing to delete')
        return

    follow_forwards = []
    follow_backwards = []

    # Create the dictionary with graph traversal rules to be used in determing complete node set to be exported
    for name, rule in GraphTraversalRules.DELETE.value.items():

        # Check that rules that are not toggleable are not specified in the keyword arguments
        if not rule.toggleable and name in kwargs:
            raise exceptions.ExportValidationError('traversal rule {} is not toggleable'.format(name))

        follow = kwargs.pop(name, rule.default)

        if follow:
            if rule.direction == 'forward':
                follow_forwards.append(rule.link_type.value)
            elif rule.direction == 'backward':
                follow_backwards.append(rule.link_type.value)
            else:
                raise InternalError('unrecognized direction `{}` for graph traversal rule'.format(rule.direction))

    links_backwards = {'type': {'in': follow_backwards}}
    links_forwards = {'type': {'in': follow_forwards}}

    operational_set = set().union(set(starting_pks))
    accumulator_set = set().union(set(starting_pks))

    while operational_set:
        new_pks_set = set()

        query_nodes = QueryBuilder()
        query_nodes.append(Node, filters={'id': {'in': operational_set}}, tag='sources')
        query_nodes.append(
            Node,
            filters={'id': {
                '!in': accumulator_set
            }},
            edge_filters=links_forwards,
            with_incoming='sources',
            project='id'
        )
        new_pks_set.update(i for i, in query_nodes.iterall())

        query_nodes = QueryBuilder()
        query_nodes.append(Node, filters={'id': {'in': operational_set}}, tag='sources')
        query_nodes.append(
            Node,
            filters={'id': {
                '!in': accumulator_set
            }},
            edge_filters=links_backwards,
            with_outgoing='sources',
            project='id'
        )
        new_pks_set.update(i for i, in query_nodes.iterall())

        operational_set = new_pks_set.difference(accumulator_set)
        accumulator_set.update(new_pks_set)

    pks_set_to_delete = accumulator_set

    if verbosity > 0:
        echo.echo(
            'I {} delete {} node{}'.format(
                'would' if dry_run else 'will', len(pks_set_to_delete), 's' if len(pks_set_to_delete) > 1 else ''
            )
        )
        if verbosity > 1:
            builder = QueryBuilder().append(
                Node, filters={'id': {
                    'in': pks_set_to_delete
                }}, project=('uuid', 'id', 'node_type', 'label')
            )
            echo.echo('The nodes I {} delete:'.format('would' if dry_run else 'will'))
            for uuid, pk, type_string, label in builder.iterall():
                try:
                    short_type_string = type_string.split('.')[-2]
                except IndexError:
                    short_type_string = type_string
                echo.echo('   {} {} {} {}'.format(uuid, pk, short_type_string, label))

    if dry_run:
        if verbosity > 0:
            echo.echo('\nThis was a dry run, exiting without deleting anything')
        return

    # Asking for user confirmation here
    if force:
        pass
    else:
        echo.echo_warning('YOU ARE ABOUT TO DELETE {} NODES! THIS CANNOT BE UNDONE!'.format(len(pks_set_to_delete)))
        if not click.confirm('Shall I continue?'):
            echo.echo('Exiting without deleting')
            return

    # Recover the list of folders to delete before actually deleting the nodes. I will delete the folders only later,
    # so that if there is a problem during the deletion of the nodes in the DB, I don't delete the folders
    repositories = [load_node(pk)._repository for pk in pks_set_to_delete]  # pylint: disable=protected-access

    if verbosity > 0:
        echo.echo('Starting node deletion...')
    delete_nodes_and_connections(pks_set_to_delete)

    if verbosity > 0:
        echo.echo('Nodes deleted from database, deleting files from the repository now...')

    # If we are here, we managed to delete the entries from the DB.
    # I can now delete the folders
    for repository in repositories:
        repository.erase(force=True)

    if verbosity > 0:
        echo.echo('Deletion completed.')