def handle(self, *args, **options): cursor = connection.cursor() projects = Project.objects.all() project_ids = [p.id for p in projects] self.stdout.write( 'Recreating treenode_edge, treenode_connector_edge, connector_geom' ) rebuild_edge_tables(log=lambda msg: self.stdout.write(msg)) self.stdout.write('Recreating catmaid_stats_summary') cursor.execute("TRUNCATE catmaid_stats_summary") for p in projects: populate_stats_summary(p.id, False, False) self.stdout.write('Recreating catmaid_skeleton_summary') cursor.execute(""" TRUNCATE catmaid_skeleton_summary; SELECT refresh_skeleton_summary_table(); """) self.stdout.write('Recreating node_query_cache') update_node_query_cache(log=lambda x: self.stdout.write(x)) self.stdout.write('Done')
def handle(self, *args, **options): project_ids = options['project_id'] if not project_ids: self.stdout.write('Since no project IDs were given, all projects will be updated') # Check arguments dryrun = options['dryrun'] if dryrun: self.stdout.write('DRY RUN - no changes will be made') else: self.stdout.write('This will make changes to the database') run = input('Continue? [y/N]: ') if run not in ('Y', 'y'): self.stdout.write('Canceled on user request') return try: rebuild_edge_tables(project_ids, log=lambda msg: self.stdout.write(msg)) if dryrun: # For a dry run, cancel the transaction by raising an exception raise DryRunRollback() self.stdout.write('Successfully rebuilt edge tables') except DryRunRollback: self.stdout.write('Dry run completed')
def handle(self, *args, **options): cursor = connection.cursor() projects = Project.objects.all() project_ids = [p.id for p in projects] self.stdout.write('Recreating treenode_edge, treenode_connector_edge, connector_geom') rebuild_edge_tables(log=lambda msg: self.stdout.write(msg)) self.stdout.write('Recreating catmaid_stats_summary') cursor.execute("TRUNCATE catmaid_stats_summary") for p in projects: populate_stats_summary(p.id, False, False) self.stdout.write('Recreating catmaid_skeleton_summary') cursor.execute(""" TRUNCATE catmaid_skeleton_summary; SELECT refresh_skeleton_summary_table(); """) self.stdout.write('Recreating node_query_cache') update_node_query_cache(log=lambda x: self.stdout.write(x)) self.stdout.write('Done')
def import_data(self): """ Imports data from a file and overrides its properties, if wanted. This method also deactivates auto commit (if it is activated) temporary. """ cursor = connection.cursor() # Defer all constraint checks cursor.execute('SET CONSTRAINTS ALL DEFERRED') # Drop summary table trigger to make insertion faster cursor.execute(""" DROP TRIGGER on_edit_treenode_update_summary_and_edges ON treenode; DROP TRIGGER on_insert_treenode_update_summary_and_edges ON treenode; DROP TRIGGER on_delete_treenode_update_summary_and_edges ON treenode; """) # Get all existing users so that we can map them basedon their username. mapped_user_ids = set() mapped_user_target_ids = set() # Map data types to lists of object of the respective type import_data = defaultdict(list) n_objects = 0 # Read the file and sort by type logger.info("Loading data from {}".format(self.source)) with open(self.source, "r") as data: loaded_data = serializers.deserialize(self.format, data) for deserialized_object in progressbar.progressbar(loaded_data, max_value=progressbar.UnknownLength, redirect_stdout=True): obj = deserialized_object.object import_data[type(obj)].append(deserialized_object) n_objects += 1 if n_objects == 0: raise CommandError("Nothing to import, no importable data found") created_users = dict() if import_data.get(User): import_users = dict((u.object.id, u) for u in import_data.get(User)) logger.info("Found {} referenceable users in import data".format(len(import_users))) else: import_users = dict() logger.info("Found no referenceable users in import data") # Get CATMAID model classes, which are the ones we want to allow # optional modification of user, project and ID fields. app = apps.get_app_config('catmaid') user_updatable_classes = set(app.get_models()) logger.info("Adjusting {} import objects to target database".format(n_objects)) # Needed for name uniquness of classes, class_instances and relations existing_classes = dict(Class.objects.filter(project_id=self.target.id) \ .values_list('class_name', 'id')) existing_relations = dict(Relation.objects.filter(project_id=self.target.id) \ .values_list('relation_name', 'id')) existing_class_instances = dict(ClassInstance.objects.filter(project_id=self.target.id) \ .values_list('name', 'id')) existing_concept_ids = set(Concept.objects.all().values_list('id', flat=True)) # Find classes for neurons and skeletons in import data if Class in import_data: allowed_duplicate_classes = tuple(c.object.id for c in import_data.get(Class) if c.object.class_name in ('neuron', 'skeleton')) else: allowed_duplicate_classes = tuple() n_reused = 0 n_moved = 0 append_only = not self.preserve_ids need_separate_import = [] objects_to_save = defaultdict(list) import_objects_by_type_and_id = defaultdict(dict) for object_type, import_objects in six.iteritems(import_data): # Allow user reference updates in CATMAID objects if object_type not in user_updatable_classes: need_separate_import.append(object_type) continue # Stores in append-only mode import IDs and links them to the # respective objects. This is needed, to update foreign keys to this # ID when it is replaced with a new ID. objects_by_id = import_objects_by_type_and_id[object_type] is_class = object_type == Class is_relation = object_type == Relation is_class_instance = object_type == ClassInstance # CATMAID model objects are inspected for user fields for deserialized_object in import_objects: obj = deserialized_object.object # Semantic data like classes and class instances are expected to # be unique with respect to their names. Existing objects with # the same ID will get a new ID even if --preserve-ids is set. existing_obj_id = None concept_id_exists = obj.id in existing_concept_ids if is_class: existing_obj_id = existing_classes.get(obj.class_name) if is_relation: existing_obj_id = existing_relations.get(obj.relation_name) if is_class_instance: existing_obj_id = existing_class_instances.get(obj.name) # Neurons (class instances of class "neuron" and "skeleton") # are a special case. There can be multiple neurons with # the same name, something that is not allowed in other # cases. In this particular case, however, class instance # reuse is not wanted. if existing_obj_id and obj.class_column_id in allowed_duplicate_classes: existing_obj_id = None concept_id_exists = False if existing_obj_id is not None: # Add mapping so that existing references to it can be # updated. The object itself is not marked for saving, # because it exists already. current_id = obj.id objects_by_id[current_id] = obj obj.id = existing_obj_id n_reused += 1 continue # If there is already an known object with the ID of the object # we are importing at the moment and the current model is a # class, relation or class_instance, then the imported object # will get a new ID, even with --preservie-ids set. We reuse # these types. if concept_id_exists: current_id = obj.id objects_by_id[current_id] = obj obj.id = None n_moved += 1 # Replace existing data if requested self.override_fields(obj) # Map users based on username, optionally create unmapped users. self.map_or_create_users(obj, import_users, mapped_user_ids, mapped_user_target_ids, created_users) # Remove pre-defined ID and keep track of updated IDs in # append-only mode (default). if append_only: current_id = obj.id objects_by_id[current_id] = obj # By setting id to None, Django will create a new object and # set the new ID. obj.id = None # Remember for saving objects_to_save[object_type].append(deserialized_object) if len(created_users) > 0: logger.info("Created {} new users: {}".format(len(created_users), ", ".join(sorted(created_users.keys())))) else: logger.info("No unmapped users imported") # Finally save all objects. Make sure they are saved in order: logger.info("Storing {} database objects including {} moved objects, reusing additional {} existing objects" \ .format(n_objects - n_reused, n_moved, n_reused)) # In append-only mode, the foreign keys to objects with changed IDs have # to be updated. In preserve-ids mode only IDs to classes and relations # will be updated. Saving model objects after an update of referenced # keys is only needed in append-only mode. self.reset_ids(user_updatable_classes, objects_to_save, import_objects_by_type_and_id, existing_classes) other_tasks = set(objects_to_save.keys()) - set(ordered_save_tasks) for object_type in ordered_save_tasks + list(other_tasks): objects = objects_to_save.get(object_type) if objects: logger.info("- Importing objects of type " + object_type.__name__) for deserialized_object in progressbar.progressbar(objects, max_value=len(objects), redirect_stdout=True): deserialized_object.save() logger.info("- Importing all other objects") for other_model in progressbar.progressbar(need_separate_import, max_value=len(need_separate_import), redirect_stdout=True): other_objects = import_data[other_model] if other_model == User: # If user model objects are imported and users were mapped, ask # user if alrady mapped users should be skipped during import. # We don't need to take of newly created users, because they are # only created if no model is found. Therefore all other model # objects can be imported. if mapped_user_target_ids: mapped_usernames = set(self.user_id_map.get(u) for u in mapped_user_target_ids) import_usernames = set(import_users.keys()) not_imported_usernames = import_usernames - mapped_usernames already_imported_usernames = import_usernames - not_imported_usernames if already_imported_usernames: print("The following usernames are mapped to " + "existing users, but the import data " + "also contains objects for these users: " + ", ".join(already_imported_usernames)) ignore_users = ask_yes_no("Skip those users in input " "data and don't import them? [y/n]") if ignore_users: logger.info("Won't import mapped users: " + ", ".join(already_imported_usernames)) other_objects = [u for u in other_objects \ if u.object.username not in already_imported_usernames] else: logger.info("Will import all listed users in import data") for deserialized_object in other_objects: if deserialized_object.object.username in created_users.keys(): deserialized_object.save() # Reset counters to current maximum IDs cursor.execute(''' SELECT setval('concept_id_seq', coalesce(max("id"), 1), max("id") IS NOT null) FROM concept; SELECT setval('location_id_seq', coalesce(max("id"), 1), max("id") IS NOT null) FROM location; SELECT setval('auth_user_id_seq', coalesce(max("id"), 1), max("id") IS NOT null) FROM auth_user; ''') cursor.execute(""" CREATE TRIGGER on_insert_treenode_update_summary_and_edges AFTER INSERT ON treenode REFERENCING NEW TABLE as inserted_treenode FOR EACH STATEMENT EXECUTE PROCEDURE on_insert_treenode_update_summary_and_edges(); CREATE TRIGGER on_edit_treenode_update_summary_and_edges AFTER UPDATE ON treenode REFERENCING NEW TABLE as new_treenode OLD TABLE as old_treenode FOR EACH STATEMENT EXECUTE PROCEDURE on_edit_treenode_update_summary_and_edges(); CREATE TRIGGER on_delete_treenode_update_summary_and_edges AFTER DELETE ON treenode REFERENCING OLD TABLE as deleted_treenode FOR EACH STATEMENT EXECUTE PROCEDURE on_delete_treenode_update_summary_and_edges(); """) logger.info("Updating edge tables") rebuild_edge_tables(log=lambda msg: logger.info(msg)) logger.info("Updated skeleton summary tables") cursor.execute(""" DELETE FROM catmaid_skeleton_summary; SELECT refresh_skeleton_summary_table(); """)
def transform(self): """Iterate over all layers, find all location entries in the database on this layer, transform with the layer's transformation and write them back. """ start_time = time.time() cursor = connection.cursor() n_total_reviews_reset = 0 if self.skeleton_ids: join = """ JOIN ( SELECT t.id FROM treenode t JOIN UNNEST(%(skeleton_ids)s::bigint[]) skeleton(id) ON skeleton.id = t.id ) sub ON sub.id = location.id """ else: join = '' # Remove if not needed seen:Set[int] = set() hit = 0 def take_if_not_seen(entry): nonlocal hit if entry[3] in seen: hit += 1 return False else: seen.add(entry[3]) return True # Temporary disable row-level review update trigger. This is needed, # because even though we are modifying a parent table, row-level # trigger on child tables like the review check are executed. Therefore, # we need to prevent the review check and only remove manually, if # requested. In order to revert this DDL operation after we transformed # the locations, we also need to make sure no trigger operations are # pending when we attempt to do this. A simple way of doing is to # enforce immediate trigger execution in this transaction. cursor.execute(""" SET CONSTRAINTS ALL IMMEDIATE; ALTER TABLE treenode DISABLE TRIGGER on_edit_treenode_check_review; ALTER TABLE connector DISABLE TRIGGER on_edit_connector_check_review; """) for n, l in enumerate(self.layers): log(f'Transforming layer {n+1}/{len(self.layers)}: [{l.z_start}, {l.z_end})') cursor.execute(""" SELECT location_x, location_y, %(last_editor_id)s, location.id FROM location {join} WHERE project_id = %(project_id)s AND location_z >= %(z_start)s AND location_z < %(z_end)s """.format(join=join), { 'project_id': self.project_id, 'z_start': l.z_start, 'z_end': l.z_end, 'last_editor_id': self.last_editor.id, 'skeleton_ids': self.skeleton_ids, }) # Get lists rather than tuples and transform points reset_reviews_for = [] locations = list(filter(take_if_not_seen, map(list, cursor.fetchall()))) for loc in locations: dist = l.transform_point_entry(loc, self.post_transformer) if self.review_reset_distance and dist > self.review_reset_distance: reset_reviews_for.append(loc[3]) log(f' Found and transformed {len(locations)} locations, considering {len(reset_reviews_for)} locations for review reset, offset: {l.offset_x}, {l.offset_y}') # Write points back into database execute_batch(cursor, """ UPDATE location SET location_x = %s, location_y = %s, editor_id = %s WHERE id = %s """, locations, page_size=100) n_reset_reviews = 0 if self.review_reset_distance and reset_reviews_for: cursor.execute(""" DELETE FROM review WHERE id = ANY(%(reset_reviews_for)s::bigint[]) RETURNING id """, { 'reset_reviews_for': reset_reviews_for, }) n_reset_reviews = len(list(cursor.fetchall())) n_total_reviews_reset += n_reset_reviews log(f' Updated locations in database, reset {n_reset_reviews} reviews') log(f'Rebuilding edge table of project {self.project_id}') rebuild_edge_tables(project_ids=[self.project_id], log=log) log(f'Rebuilding skeleton summary for project {self.project_id}') cursor.execute(""" SELECT refresh_skeleton_summary_table_for_project(%(project_id)s::int); """, { 'project_id': self.project_id, }) end_time = time.time() log(f'Transformation complete (took {end_time - start_time:.2f} sec), reset {n_total_reviews_reset} reviews, {hit} re-checked nodes') # Re-enable review update trigger. cursor.execute(""" ALTER TABLE treenode ENABLE TRIGGER on_edit_treenode_check_review; ALTER TABLE connector ENABLE TRIGGER on_edit_connector_check_review; SET CONSTRAINTS ALL DEFERRED; """)