def handle(self, *args, **options):
        cursor = connection.cursor()
        projects = Project.objects.all()
        project_ids = [p.id for p in projects]

        self.stdout.write(
            'Recreating treenode_edge, treenode_connector_edge, connector_geom'
        )
        rebuild_edge_tables(log=lambda msg: self.stdout.write(msg))

        self.stdout.write('Recreating catmaid_stats_summary')
        cursor.execute("TRUNCATE catmaid_stats_summary")
        for p in projects:
            populate_stats_summary(p.id, False, False)

        self.stdout.write('Recreating catmaid_skeleton_summary')
        cursor.execute("""
            TRUNCATE catmaid_skeleton_summary;
            SELECT refresh_skeleton_summary_table();
        """)

        self.stdout.write('Recreating node_query_cache')
        update_node_query_cache(log=lambda x: self.stdout.write(x))

        self.stdout.write('Done')
    def handle(self, *args, **options):
        project_ids = options['project_id']
        if not project_ids:
            self.stdout.write('Since no project IDs were given, all projects will be updated')

        # Check arguments
        dryrun = options['dryrun']

        if dryrun:
            self.stdout.write('DRY RUN - no changes will be made')
        else:
            self.stdout.write('This will make changes to the database')

        run = input('Continue? [y/N]: ')
        if run not in ('Y', 'y'):
            self.stdout.write('Canceled on user request')
            return

        try:

            rebuild_edge_tables(project_ids, log=lambda msg: self.stdout.write(msg))

            if dryrun:
                # For a dry run, cancel the transaction by raising an exception
                raise DryRunRollback()

            self.stdout.write('Successfully rebuilt edge tables')

        except DryRunRollback:
            self.stdout.write('Dry run completed')
    def handle(self, *args, **options):
        cursor = connection.cursor()
        projects = Project.objects.all()
        project_ids = [p.id for p in projects]

        self.stdout.write('Recreating treenode_edge, treenode_connector_edge, connector_geom')
        rebuild_edge_tables(log=lambda msg: self.stdout.write(msg))

        self.stdout.write('Recreating catmaid_stats_summary')
        cursor.execute("TRUNCATE catmaid_stats_summary")
        for p in projects:
            populate_stats_summary(p.id, False, False)

        self.stdout.write('Recreating catmaid_skeleton_summary')
        cursor.execute("""
            TRUNCATE catmaid_skeleton_summary;
            SELECT refresh_skeleton_summary_table();
        """)

        self.stdout.write('Recreating node_query_cache')
        update_node_query_cache(log=lambda x: self.stdout.write(x))

        self.stdout.write('Done')
Example #4
0
    def import_data(self):
        """ Imports data from a file and overrides its properties, if wanted.
        This method also deactivates auto commit (if it is activated)
        temporary.
        """
        cursor = connection.cursor()
        # Defer all constraint checks
        cursor.execute('SET CONSTRAINTS ALL DEFERRED')

        # Drop summary table trigger to make insertion faster
        cursor.execute("""
            DROP TRIGGER on_edit_treenode_update_summary_and_edges ON treenode;
            DROP TRIGGER on_insert_treenode_update_summary_and_edges ON treenode;
            DROP TRIGGER on_delete_treenode_update_summary_and_edges ON treenode;
        """)

        # Get all existing users so that we can map them basedon their username.
        mapped_user_ids = set()
        mapped_user_target_ids = set()

        # Map data types to lists of object of the respective type
        import_data = defaultdict(list)
        n_objects = 0

        # Read the file and sort by type
        logger.info("Loading data from {}".format(self.source))
        with open(self.source, "r") as data:
            loaded_data = serializers.deserialize(self.format, data)
            for deserialized_object in progressbar.progressbar(loaded_data,
                    max_value=progressbar.UnknownLength, redirect_stdout=True):
                obj = deserialized_object.object
                import_data[type(obj)].append(deserialized_object)
                n_objects += 1

        if n_objects == 0:
            raise CommandError("Nothing to import, no importable data found")

        created_users = dict()
        if import_data.get(User):
            import_users = dict((u.object.id, u) for u in import_data.get(User))
            logger.info("Found {} referenceable users in import data".format(len(import_users)))
        else:
            import_users = dict()
            logger.info("Found no referenceable users in import data")

        # Get CATMAID model classes, which are the ones we want to allow
        # optional modification of user, project and ID fields.
        app = apps.get_app_config('catmaid')
        user_updatable_classes = set(app.get_models())

        logger.info("Adjusting {} import objects to target database".format(n_objects))

        # Needed for name uniquness of classes, class_instances and relations
        existing_classes = dict(Class.objects.filter(project_id=self.target.id) \
                .values_list('class_name', 'id'))
        existing_relations = dict(Relation.objects.filter(project_id=self.target.id) \
                .values_list('relation_name', 'id'))
        existing_class_instances = dict(ClassInstance.objects.filter(project_id=self.target.id) \
                .values_list('name', 'id'))

        existing_concept_ids = set(Concept.objects.all().values_list('id', flat=True))

        # Find classes for neurons and skeletons in import data
        if Class in import_data:
            allowed_duplicate_classes = tuple(c.object.id
                    for c in import_data.get(Class)
                    if c.object.class_name in ('neuron', 'skeleton'))
        else:
            allowed_duplicate_classes = tuple()

        n_reused = 0
        n_moved = 0
        append_only = not self.preserve_ids
        need_separate_import = []
        objects_to_save = defaultdict(list)
        import_objects_by_type_and_id = defaultdict(dict)
        for object_type, import_objects in six.iteritems(import_data):
            # Allow user reference updates in CATMAID objects
            if object_type not in user_updatable_classes:
                need_separate_import.append(object_type)
                continue

            # Stores in append-only mode import IDs and links them to the
            # respective objects. This is needed, to update foreign keys to this
            # ID when it is replaced with a new ID.
            objects_by_id = import_objects_by_type_and_id[object_type]

            is_class = object_type == Class
            is_relation = object_type == Relation
            is_class_instance = object_type == ClassInstance

            # CATMAID model objects are inspected for user fields
            for deserialized_object in import_objects:
                obj = deserialized_object.object

                # Semantic data like classes and class instances are expected to
                # be unique with respect to their names. Existing objects with
                # the same ID will get a new ID even if --preserve-ids is set.
                existing_obj_id = None
                concept_id_exists = obj.id in existing_concept_ids
                if is_class:
                    existing_obj_id = existing_classes.get(obj.class_name)
                if is_relation:
                    existing_obj_id = existing_relations.get(obj.relation_name)
                if is_class_instance:
                    existing_obj_id = existing_class_instances.get(obj.name)

                    # Neurons (class instances of class "neuron" and "skeleton")
                    # are a special case.  There can be multiple neurons with
                    # the same name, something that is not allowed in other
                    # cases. In this particular case, however, class instance
                    # reuse is not wanted.
                    if existing_obj_id and obj.class_column_id in allowed_duplicate_classes:
                        existing_obj_id = None
                        concept_id_exists = False

                if existing_obj_id is not None:
                    # Add mapping so that existing references to it can be
                    # updated. The object itself is not marked for saving,
                    # because it exists already.
                    current_id = obj.id
                    objects_by_id[current_id] = obj
                    obj.id = existing_obj_id
                    n_reused += 1
                    continue

                # If there is already an known object with the ID of the object
                # we are importing at the moment and the current model is a
                # class, relation or class_instance, then the imported object
                # will get a new ID, even with --preservie-ids set. We reuse
                # these types.
                if concept_id_exists:
                    current_id = obj.id
                    objects_by_id[current_id] = obj
                    obj.id = None
                    n_moved += 1

                # Replace existing data if requested
                self.override_fields(obj)

                # Map users based on username, optionally create unmapped users.
                self.map_or_create_users(obj, import_users, mapped_user_ids,
                            mapped_user_target_ids, created_users)

                # Remove pre-defined ID and keep track of updated IDs in
                # append-only mode (default).
                if append_only:
                    current_id = obj.id
                    objects_by_id[current_id] = obj
                    # By setting id to None, Django will create a new object and
                    # set the new ID.
                    obj.id = None

                # Remember for saving
                objects_to_save[object_type].append(deserialized_object)

        if len(created_users) > 0:
            logger.info("Created {} new users: {}".format(len(created_users),
                    ", ".join(sorted(created_users.keys()))))
        else:
            logger.info("No unmapped users imported")

        # Finally save all objects. Make sure they are saved in order:
        logger.info("Storing {} database objects including {} moved objects, reusing additional {} existing objects" \
                .format(n_objects - n_reused, n_moved, n_reused))

        # In append-only mode, the foreign keys to objects with changed IDs have
        # to be updated. In preserve-ids mode only IDs to classes and relations
        # will be updated. Saving model objects after an update of referenced
        # keys is only needed in append-only mode.
        self.reset_ids(user_updatable_classes, objects_to_save,
                import_objects_by_type_and_id, existing_classes)

        other_tasks = set(objects_to_save.keys()) - set(ordered_save_tasks)
        for object_type in ordered_save_tasks + list(other_tasks):
            objects = objects_to_save.get(object_type)
            if objects:
                logger.info("- Importing objects of type " + object_type.__name__)
                for deserialized_object in progressbar.progressbar(objects,
                        max_value=len(objects), redirect_stdout=True):
                    deserialized_object.save()

        logger.info("- Importing all other objects")
        for other_model in progressbar.progressbar(need_separate_import,
                max_value=len(need_separate_import), redirect_stdout=True):
            other_objects = import_data[other_model]
            if other_model == User:
                # If user model objects are imported and users were mapped, ask
                # user if alrady mapped users should be skipped during import.
                # We don't need to take of newly created users, because they are
                # only created if no model is found. Therefore all other model
                # objects can be imported.
                if mapped_user_target_ids:
                    mapped_usernames = set(self.user_id_map.get(u) for u in mapped_user_target_ids)
                    import_usernames = set(import_users.keys())
                    not_imported_usernames = import_usernames - mapped_usernames
                    already_imported_usernames  = import_usernames - not_imported_usernames

                    if already_imported_usernames:
                        print("The following usernames are mapped to " +
                                "existing users, but the import data " +
                                "also contains objects for these users: " +
                                ", ".join(already_imported_usernames))
                        ignore_users = ask_yes_no("Skip those users in input "
                                "data and don't import them? [y/n]")
                        if ignore_users:
                            logger.info("Won't import mapped users: " +
                                    ", ".join(already_imported_usernames))
                            other_objects = [u for u in other_objects \
                                    if u.object.username not in already_imported_usernames]
                        else:
                            logger.info("Will import all listed users in import data")

            for deserialized_object in other_objects:
                if deserialized_object.object.username in created_users.keys():
                    deserialized_object.save()

        # Reset counters to current maximum IDs
        cursor.execute('''
            SELECT setval('concept_id_seq', coalesce(max("id"), 1), max("id") IS NOT null)
            FROM concept;
            SELECT setval('location_id_seq', coalesce(max("id"), 1), max("id") IS NOT null)
            FROM location;
            SELECT setval('auth_user_id_seq', coalesce(max("id"), 1), max("id") IS NOT null)
            FROM auth_user;
        ''')

        cursor.execute("""
            CREATE TRIGGER on_insert_treenode_update_summary_and_edges
            AFTER INSERT ON treenode
            REFERENCING NEW TABLE as inserted_treenode
            FOR EACH STATEMENT EXECUTE PROCEDURE on_insert_treenode_update_summary_and_edges();

            CREATE TRIGGER on_edit_treenode_update_summary_and_edges
            AFTER UPDATE ON treenode
            REFERENCING NEW TABLE as new_treenode OLD TABLE as old_treenode
            FOR EACH STATEMENT EXECUTE PROCEDURE on_edit_treenode_update_summary_and_edges();

            CREATE TRIGGER on_delete_treenode_update_summary_and_edges
            AFTER DELETE ON treenode
            REFERENCING OLD TABLE as deleted_treenode
            FOR EACH STATEMENT EXECUTE PROCEDURE on_delete_treenode_update_summary_and_edges();
        """)

        logger.info("Updating edge tables")
        rebuild_edge_tables(log=lambda msg: logger.info(msg))

        logger.info("Updated skeleton summary tables")
        cursor.execute("""
            DELETE FROM catmaid_skeleton_summary;
            SELECT refresh_skeleton_summary_table();
        """)
Example #5
0
    def transform(self):
        """Iterate over all layers, find all location entries in the database
        on this layer, transform with the layer's transformation and write them
        back.
        """
        start_time = time.time()
        cursor = connection.cursor()
        n_total_reviews_reset = 0
        if self.skeleton_ids:
            join = """
                JOIN (
                    SELECT t.id
                    FROM treenode t
                    JOIN UNNEST(%(skeleton_ids)s::bigint[]) skeleton(id)
                        ON skeleton.id = t.id
                ) sub
                    ON sub.id = location.id
            """
        else:
            join = ''

        # Remove if not needed
        seen:Set[int] = set()
        hit = 0

        def take_if_not_seen(entry):
            nonlocal hit
            if entry[3] in seen:
                hit += 1
                return False
            else:
                seen.add(entry[3])
            return True

        # Temporary disable row-level review update trigger. This is needed,
        # because even though we are modifying a parent table, row-level
        # trigger on child tables like the review check are executed. Therefore,
        # we need to prevent the review check and only remove manually, if
        # requested. In order to revert this DDL operation after we transformed
        # the locations, we also need to make sure no trigger operations are
        # pending when we attempt to do this. A simple way of doing is to
        # enforce immediate trigger execution in this transaction.
        cursor.execute("""
            SET CONSTRAINTS ALL IMMEDIATE;

            ALTER TABLE treenode
            DISABLE TRIGGER on_edit_treenode_check_review;

            ALTER TABLE connector
            DISABLE TRIGGER on_edit_connector_check_review;
        """)

        for n, l in enumerate(self.layers):
            log(f'Transforming layer {n+1}/{len(self.layers)}: [{l.z_start}, {l.z_end})')
            cursor.execute("""
                SELECT location_x, location_y, %(last_editor_id)s, location.id
                FROM location
                {join}
                WHERE project_id = %(project_id)s
                AND location_z >= %(z_start)s
                AND location_z < %(z_end)s
            """.format(join=join), {
                'project_id': self.project_id,
                'z_start': l.z_start,
                'z_end': l.z_end,
                'last_editor_id': self.last_editor.id,
                'skeleton_ids': self.skeleton_ids,
            })

            # Get lists rather than tuples and transform points
            reset_reviews_for = []
            locations = list(filter(take_if_not_seen, map(list, cursor.fetchall())))
            for loc in locations:
                dist = l.transform_point_entry(loc, self.post_transformer)
                if self.review_reset_distance and dist > self.review_reset_distance:
                    reset_reviews_for.append(loc[3])

            log(f'  Found and transformed {len(locations)} locations, considering {len(reset_reviews_for)} locations for review reset, offset: {l.offset_x}, {l.offset_y}')

            # Write points back into database
            execute_batch(cursor, """
                UPDATE location
                SET location_x = %s, location_y = %s, editor_id = %s
                WHERE id = %s
            """, locations, page_size=100)

            n_reset_reviews = 0
            if self.review_reset_distance and reset_reviews_for:
                cursor.execute("""
                    DELETE FROM review
                    WHERE id = ANY(%(reset_reviews_for)s::bigint[])
                    RETURNING id
                """, {
                    'reset_reviews_for': reset_reviews_for,
                })
                n_reset_reviews = len(list(cursor.fetchall()))
                n_total_reviews_reset += n_reset_reviews

            log(f'  Updated locations in database, reset {n_reset_reviews} reviews')

        log(f'Rebuilding edge table of project {self.project_id}')
        rebuild_edge_tables(project_ids=[self.project_id], log=log)

        log(f'Rebuilding skeleton summary for project {self.project_id}')
        cursor.execute("""
            SELECT refresh_skeleton_summary_table_for_project(%(project_id)s::int);
         """, {
            'project_id': self.project_id,
        })

        end_time = time.time()
        log(f'Transformation complete (took {end_time - start_time:.2f} sec), reset {n_total_reviews_reset} reviews, {hit} re-checked nodes')

        # Re-enable review update trigger.
        cursor.execute("""
            ALTER TABLE treenode
            ENABLE TRIGGER on_edit_treenode_check_review;

            ALTER TABLE connector
            ENABLE TRIGGER on_edit_connector_check_review;

            SET CONSTRAINTS ALL DEFERRED;
        """)