def _parse_table_metadata_params( self, schema_type: str, table_name: str, table_metadata: dict, existing_tables: list, ): reference_table = None track_updates = None for param, value in table_metadata.items(): if param == "reference_table": Schema = get_schema(schema_type) if not issubclass(Schema, ReferenceAnnotation): raise TypeError( "Reference table must be a ReferenceAnnotation schema type" ) if table_name is value: raise SelfReferenceTableError( f"{reference_table} must target a different table not {table_name}" ) if value not in existing_tables: raise TableNameNotFound( f"Reference table target: '{value}' does not exist") reference_table = value elif param == "track_target_id_updates": track_updates = value return reference_table, track_updates
def get_schema_view(annotation_type): Schema = get_schema(annotation_type) ds = [] for col, field in Schema._declared_fields.items(): if isinstance(field, mm.fields.Nested): print(dir(field.schema), field.schema) schema = field.schema.__class__.__name__ else: schema = "" ds.append( { "field_name": col, "description": field.metadata.get("description", ""), "type": type(field).__name__, "schema": schema, } ) df = pd.DataFrame(ds) return render_template( "schema.html", df_table=df[["field_name", "type", "description", "schema"]].to_html(), schema_type=annotation_type, version=__version__, )
def get_type_schema(annotation_type): try: Schema = get_schema(annotation_type) except UnknownAnnotationTypeException: abort(404) json_schema = JSONSchema() return json_schema.dump(Schema())
def make_segmentation_model(table_name: str, schema_type: str, segmentation_source: dict = None): Schema = get_schema(schema_type) return make_segmentation_model_from_schema(table_name, segmentation_source, Schema)
def _get_flattened_schema(self, schema_type: str): schema_type = get_schema(schema_type) ( flat_annotation_schema, flat_segmentation_schema, ) = em_models.split_annotation_schema(schema_type) return flat_annotation_schema, flat_segmentation_schema
def make_flat_model( table_name: str, schema_type: str, segmentation_source: dict = None, table_metadata: dict = None, ): Schema = get_schema(schema_type) return make_flat_model_from_schema(table_name, Schema, segmentation_source, table_metadata)
def test_sqlalchemy_orm_bulk_insert( sql_uri, aligned_volume_name, table_name, pcg_table_name, schema_name, dataframe, chunksize, ): data_mapping = {} client = DynamicAnnotationClient(aligned_volume_name, sql_uri) mat_client = DynamicMaterializationClient(aligned_volume_name, sql_uri) schema = get_schema(schema_name) AnnotationModel = client.get_annotation_model(aligned_volume, table_name) SegmentationModel = mat_client.get_segmentation_model( aligned_volume, table_name, pcg_table_name) anno_cols = AnnotationModel.__table__.columns.keys() seg_cols = SegmentationModel.__table__.columns.keys() if isinstance(dataframe, pd.DataFrame): data_cols = dataframe.columns.tolist() anno_matching = common_column_set(anno_cols, data_cols) seg_matching = common_column_set(seg_cols, data_cols) data_mapping["position_keys"] = anno_matching data_mapping["segmentation_keys"] = seg_matching data_mapping.update({"anno_model": anno_cols, "seg_model": seg_cols}) table_id = build_table_id(aligned_volume, table_name) results = process_dataframe(schema, schema_name, dataframe, data_mapping, chunksize) client_engine = client.engine insert_start = time.time() for data in results: copy_string_iterator(client_engine, str(AnnotationModel.__table__), anno_cols, data[1]) copy_string_iterator(client_engine, str(SegmentationModel.__table__), seg_cols, data[2]) insert_end = time.time() print(f"INSERT TIME {insert_end-insert_start}")
def _get_flattened_schema_data(self, schema_type: str, data: dict) -> dict: schema_type = get_schema(schema_type) schema = schema_type(context={"postgis": True}) data = schema.load(data, unknown=EXCLUDE) check_is_nested = any(isinstance(i, dict) for i in data.values()) if check_is_nested: data = flatten_dict(data) ( flat_annotation_schema, flat_segmentation_schema, ) = em_models.split_annotation_schema(schema_type) return ( self._map_values_to_schema(data, flat_annotation_schema), self._map_values_to_schema(data, flat_segmentation_schema), )
def make_annotation_model(table_name: str, schema_type: str, with_crud_columns: bool=True): """make an annotation model Args: table_name (str): name of table in database schema_type (str): schema type for table version (int, optional): version number. Defaults to None. with_crud_columns (bool, optional): whether to include created, deleted colums. Defaults to True. Returns: SqlAlchemy.Model: a sqlalchemy model """ Schema = get_schema(schema_type) return make_annotation_model_from_schema(table_name, Schema, with_crud_columns)
def get_analysis_table(aligned_volume: str, datastack: str, table_name: str, mat_version: int = 1): """Helper method that returns a table model. Args: aligned_volume (str): aligned_volume name datastack (str): datastack name table_name (str): table to reflect a model mat_version (int, optional): target database version Returns: SQLAlchemy model: returns a sqlalchemy model of a target table """ anno_db = dynamic_annotation_cache.get_db(aligned_volume) schema_name = anno_db.get_table_schema(table_name) SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI") analysis_sql_uri = create_analysis_sql_uri(SQL_URI_CONFIG, datastack, mat_version) analysis_engine = create_engine(analysis_sql_uri) meta = MetaData() meta.reflect(bind=analysis_engine) anno_schema = get_schema(schema_name) flat_schema = create_flattened_schema(anno_schema) if not analysis_engine.dialect.has_table(analysis_engine, table_name): annotation_dict = create_table_dict( table_name=table_name, Schema=flat_schema, segmentation_source=None, table_metadata=None, with_crud_columns=False, ) analysis_table = type(table_name, (Base, ), annotation_dict) else: analysis_table = meta.tables[table_name] analysis_engine.dispose() return analysis_table
def get_schema_view(annotation_type): Schema = get_schema(annotation_type) ds = [] for col, field in Schema._declared_fields.items(): if isinstance(field, mm.fields.Nested): print(dir(field.schema), field.schema) schema = field.schema.__class__.__name__ else: schema = '' ds.append({ 'field_name': col, 'description': field.metadata.get('description', ''), 'type': type(field).__name__, 'schema': schema }) df = pd.DataFrame(ds) return render_template('schema.html', df_table=df[['field_name', 'type', 'description', 'schema']].to_html(), schema_type=annotation_type, version=__version__)
def format_data(data: List, bulk_upload_info: dict): schema = bulk_upload_info["schema"] upload_creation_time = bulk_upload_info["upload_creation_time"] base_df = pd.DataFrame(data[0]) for data in data[1:]: temp_df = pd.DataFrame(data) base_df = pd.concat([base_df, temp_df], axis=1) records = base_df.to_dict("records") schema = get_schema(schema) FlattendSchema = create_flattened_schema(schema) ( flat_annotation_schema, flat_segmentation_schema, ) = em_models.split_annotation_schema(FlattendSchema) anno_data = split_annotation_data(records, flat_annotation_schema, upload_creation_time) seg_data = split_annotation_data(records, flat_segmentation_schema, upload_creation_time) return [anno_data, seg_data]
def make_annotation_model( table_name: str, schema_type: str, table_metadata: dict = None, with_crud_columns: bool = True, ): """make an annotation model Args: table_name (str): name of table in database schema_type (str): schema type for table table_metadata (dict, optional): table_metadata dict with_crud_columns (bool, optional): whether to include created, deleted columns. Defaults to True. Returns: SqlAlchemy.Model: a sqlalchemy model """ Schema = get_schema(schema_type) return make_annotation_model_from_schema(table_name, Schema, table_metadata, with_crud_columns)
def make_annotation_model(dataset, annotation_type): Schema = get_schema(annotation_type) return make_annotation_model_from_schema(dataset, annotation_type, Schema)
def test_get_types(): types = get_types() for type_ in types: schema = get_schema(type_) assert issubclass(schema, AnnotationSchema) or issubclass( schema, SpatialPoint)
def test_bad_type(): with pytest.raises(UnknownAnnotationTypeException): get_schema('NOTAVALIDTYPE')
def merge_tables(self, mat_metadata: dict): """Merge all the annotation and segmentation rows into a new table that are flagged as valid. Drop the original split tables after inserting all the rows into the new table. Args: mat_metadata (dict): datastack info for the aligned_volume from the infoservice analysis_version (int): materialized version number Raises: e: error during table merging operation Returns: str: number of rows copied """ analysis_version = mat_metadata["analysis_version"] annotation_table_name = mat_metadata["annotation_table_name"] segmentation_table_name = mat_metadata["segmentation_table_name"] temp_table_name = mat_metadata["temp_mat_table_name"] schema = mat_metadata["schema"] datastack = mat_metadata["datastack"] # create dynamic sql_uri SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI") analysis_sql_uri = create_analysis_sql_uri(SQL_URI_CONFIG, datastack, analysis_version) # get schema and match column order for sql query anno_schema = get_schema(schema) flat_schema = create_flattened_schema(anno_schema) ordered_model_columns = create_table_dict( table_name=annotation_table_name, Schema=flat_schema, segmentation_source=None, table_metadata=None, with_crud_columns=False, ) AnnotationModel = create_annotation_model(mat_metadata, with_crud_columns=False) SegmentationModel = create_segmentation_model(mat_metadata) query_columns = {} crud_columns = ["created", "deleted", "superceded_id"] for col in AnnotationModel.__table__.columns: if col.name not in crud_columns: query_columns[col.name] = col for col in SegmentationModel.__table__.columns: if not col.name == "id": query_columns[col.name] = col sorted_columns = OrderedDict([(key, query_columns[key]) for key in ordered_model_columns if key in query_columns.keys()]) sorted_columns_list = list(sorted_columns.values()) columns = [f'"{col.table}".{col.name}' for col in sorted_columns_list] mat_session, mat_engine = create_session(analysis_sql_uri) query = f""" SELECT {', '.join(columns)} FROM {AnnotationModel.__table__.name} JOIN "{SegmentationModel.__table__.name}" ON {AnnotationModel.id} = "{SegmentationModel.__table__.name}".id WHERE {AnnotationModel.id} = "{SegmentationModel.__table__.name}".id AND {AnnotationModel.valid} = true """ try: mat_db_connection = mat_engine.connect() with mat_db_connection.begin(): insert_query = mat_db_connection.execute( f"CREATE TABLE {temp_table_name} AS ({query});") row_count = insert_query.rowcount drop_query = mat_db_connection.execute( f'DROP TABLE {annotation_table_name}, "{segmentation_table_name}" CASCADE;' ) alter_query = mat_db_connection.execute( f"ALTER TABLE {temp_table_name} RENAME TO {annotation_table_name};" ) mat_session.close() mat_engine.dispose() return f"Number of rows copied: {row_count}" except Exception as e: celery_logger.error(e) raise (e)
"position": [5, 5, 10], "root_id": 9223372036854775899, "supervoxel_id": 89851029364932800 }, "post_pt": { "position": [10, 5, 10], "root_id": 9223372036854775898, "supervoxel_id": 106205165316472881 }, "ctr_pt": { "position": [7, 5, 10] }, "size": 40.5 } # get the schema to deserialize the test data SynapseSchema = get_schema('synapse') schema = SynapseSchema(context={'postgis': True}) # use the schema to deserialize the schema d = schema.load(synapse_d).data d = flatten_dict(d) # get the appropriate sqlalchemy model # for the annotation type and dataset SynapseModel = model_dict['test']['synapse'] # remove the type field because we don't want it as a column d.pop('type', None) # # create a new model instance with data synapse = SynapseModel(**d)