def make_flat_model_from_schema( table_name: str, Schema: str, segmentation_source: dict = None, table_metadata: dict = None, ): if not annotation_models.contains_model(table_name, flat=True): if issubclass(Schema, ReferenceAnnotation): flat_schema = Schema else: flat_schema = create_flattened_schema(Schema) annotation_dict = create_table_dict( table_name=table_name, Schema=flat_schema, segmentation_source=segmentation_source, table_metadata=table_metadata, with_crud_columns=False, ) FlatAnnotationModel = type(table_name, (FlatBase, ), annotation_dict) annotation_models.set_model(table_name, FlatAnnotationModel, flat=True) return annotation_models.get_model(table_name, flat=True)
def get_flat_schema(schema_type: str): try: Schema = type_mapping[schema_type] FlatSchema = create_flattened_schema(Schema) return FlatSchema except KeyError: msg = f"Schema type: {schema_type} is not a known annotation type" raise UnknownAnnotationTypeException(msg)
def get_flat_schema(type): try: Schema = type_mapping[type] FlatSchema = create_flattened_schema(Schema) return FlatSchema except KeyError: msg = 'type {} is not a known annotation type'.format(type) raise UnknownAnnotationTypeException(msg)
def split_annotation_schema(Schema): """Split an EM Annotation schema into seperate annotation (spatial position) and segmentation (supervoxel and root_id) schemas Parameters ---------- Schema : EMAnnotation Schema A Schema defined by EMAnnotationSchemas Returns ------- flat_annotation_schema A flattened annotation marshmallow schema flat_segmentation_schema A flattened segmentation marshmallow schema Raises ------ Exception Schema is not flattened, i.e. nested schema type """ flat_schema = create_flattened_schema(Schema) annotation_columns = {} segmentation_columns = {} sv_and_root_id_column_keys = get_flat_sv_and_root_id_columns(Schema()) for key, field in flat_schema._declared_fields.items(): if isinstance(field, mm.fields.Nested): raise Exception(f"Schema {flat_schema} must be flattened before splitting") field_type = type(field) # pick off the columns which are the supervoxel and rootid of # BoundSpatialPoint fields, and seperate them as segmentation columns # TODO: make NumericFields illegal and/or fix logic to pick out # actual fields associated with BoundSpatialPoints if key not in sv_and_root_id_column_keys: annotation_columns[key] = field else: segmentation_columns[key] = field schema_name = Schema.__name__ if hasattr(Schema, "__name__") else Schema flat_annotation_schema = convert_dict_to_schema( f"{schema_name}_annotation", annotation_columns ) flat_segmentation_schema = convert_dict_to_schema( f"{schema_name}_segmentation", segmentation_columns ) return flat_annotation_schema, flat_segmentation_schema
def get_analysis_table(aligned_volume: str, datastack: str, table_name: str, mat_version: int = 1): """Helper method that returns a table model. Args: aligned_volume (str): aligned_volume name datastack (str): datastack name table_name (str): table to reflect a model mat_version (int, optional): target database version Returns: SQLAlchemy model: returns a sqlalchemy model of a target table """ anno_db = dynamic_annotation_cache.get_db(aligned_volume) schema_name = anno_db.get_table_schema(table_name) SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI") analysis_sql_uri = create_analysis_sql_uri(SQL_URI_CONFIG, datastack, mat_version) analysis_engine = create_engine(analysis_sql_uri) meta = MetaData() meta.reflect(bind=analysis_engine) anno_schema = get_schema(schema_name) flat_schema = create_flattened_schema(anno_schema) if not analysis_engine.dialect.has_table(analysis_engine, table_name): annotation_dict = create_table_dict( table_name=table_name, Schema=flat_schema, segmentation_source=None, table_metadata=None, with_crud_columns=False, ) analysis_table = type(table_name, (Base, ), annotation_dict) else: analysis_table = meta.tables[table_name] analysis_engine.dispose() return analysis_table
def format_data(data: List, bulk_upload_info: dict): schema = bulk_upload_info["schema"] upload_creation_time = bulk_upload_info["upload_creation_time"] base_df = pd.DataFrame(data[0]) for data in data[1:]: temp_df = pd.DataFrame(data) base_df = pd.concat([base_df, temp_df], axis=1) records = base_df.to_dict("records") schema = get_schema(schema) FlattendSchema = create_flattened_schema(schema) ( flat_annotation_schema, flat_segmentation_schema, ) = em_models.split_annotation_schema(FlattendSchema) anno_data = split_annotation_data(records, flat_annotation_schema, upload_creation_time) seg_data = split_annotation_data(records, flat_segmentation_schema, upload_creation_time) return [anno_data, seg_data]
def merge_tables(self, mat_metadata: dict): """Merge all the annotation and segmentation rows into a new table that are flagged as valid. Drop the original split tables after inserting all the rows into the new table. Args: mat_metadata (dict): datastack info for the aligned_volume from the infoservice analysis_version (int): materialized version number Raises: e: error during table merging operation Returns: str: number of rows copied """ analysis_version = mat_metadata["analysis_version"] annotation_table_name = mat_metadata["annotation_table_name"] segmentation_table_name = mat_metadata["segmentation_table_name"] temp_table_name = mat_metadata["temp_mat_table_name"] schema = mat_metadata["schema"] datastack = mat_metadata["datastack"] # create dynamic sql_uri SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI") analysis_sql_uri = create_analysis_sql_uri(SQL_URI_CONFIG, datastack, analysis_version) # get schema and match column order for sql query anno_schema = get_schema(schema) flat_schema = create_flattened_schema(anno_schema) ordered_model_columns = create_table_dict( table_name=annotation_table_name, Schema=flat_schema, segmentation_source=None, table_metadata=None, with_crud_columns=False, ) AnnotationModel = create_annotation_model(mat_metadata, with_crud_columns=False) SegmentationModel = create_segmentation_model(mat_metadata) query_columns = {} crud_columns = ["created", "deleted", "superceded_id"] for col in AnnotationModel.__table__.columns: if col.name not in crud_columns: query_columns[col.name] = col for col in SegmentationModel.__table__.columns: if not col.name == "id": query_columns[col.name] = col sorted_columns = OrderedDict([(key, query_columns[key]) for key in ordered_model_columns if key in query_columns.keys()]) sorted_columns_list = list(sorted_columns.values()) columns = [f'"{col.table}".{col.name}' for col in sorted_columns_list] mat_session, mat_engine = create_session(analysis_sql_uri) query = f""" SELECT {', '.join(columns)} FROM {AnnotationModel.__table__.name} JOIN "{SegmentationModel.__table__.name}" ON {AnnotationModel.id} = "{SegmentationModel.__table__.name}".id WHERE {AnnotationModel.id} = "{SegmentationModel.__table__.name}".id AND {AnnotationModel.valid} = true """ try: mat_db_connection = mat_engine.connect() with mat_db_connection.begin(): insert_query = mat_db_connection.execute( f"CREATE TABLE {temp_table_name} AS ({query});") row_count = insert_query.rowcount drop_query = mat_db_connection.execute( f'DROP TABLE {annotation_table_name}, "{segmentation_table_name}" CASCADE;' ) alter_query = mat_db_connection.execute( f"ALTER TABLE {temp_table_name} RENAME TO {annotation_table_name};" ) mat_session.close() mat_engine.dispose() return f"Number of rows copied: {row_count}" except Exception as e: celery_logger.error(e) raise (e)