Beispiel #1
0
def _update_temporal_coverage():
    with session_scope() as session:
        query = """
                WITH temporal_coverage as (
        	        select resources.dataset_id, 
        	            min(temporal_coverage_index.start_time) as start_time, 
        	            max(temporal_coverage_index.end_time) as end_time 
        	        from temporal_coverage_index 
        	        JOIN resources on resources.id = temporal_coverage_index.indexed_id 
        	        --and resources.is_queryable is TRUE
        	        group by resources.dataset_id
                )
                update datasets 
                SET temporal_coverage_start = tc.start_time,
                    temporal_coverage_end = tc.end_time,
                    json_metadata = datasets.json_metadata ||
        	        jsonb_build_object(
        		        'temporal_coverage',
        			    jsonb_build_object(
        				    'start_time', tc.start_time,
        				    'end_time', tc.end_time
        		        )
        	        )
                from temporal_coverage tc 
                where datasets.id = tc.dataset_id"""

        try:
            session.execute(query)
            return {"updated_temporal_coverage": True}
        except Exception as e:
            return {"updated_temporal_coverage": False, "error": e}
Beispiel #2
0
def update_dataset_viz_status(update_definition: Dict) -> Dict:
    if len(update_definition) == 0:
        raise BadRequestException(
            {'InvalidQueryDefinition': f"Query definition must not be empty; received {update_definition}"})

    if "dataset_id" not in update_definition:
        raise BadRequestException({'MissingRequiredParameter': "dataset_id"})

    if "viz_config_id" not in update_definition:
        raise BadRequestException({'MissingRequiredParameter': "viz_config_id"})

    viz_config_id = update_definition["viz_config_id"]

    dataset_id = update_definition["dataset_id"]
    try:
        uuid.UUID(str(dataset_id))
    except Exception:
        raise BadRequestException({'InvalidParameter': f"'dataset_id' must be proper uuid; received {dataset_id}"})

    with session_scope() as session:

        dataset = Dataset.find_by_record_id(dataset_id, session=session)

        if viz_config_id not in dataset.json_metadata:
            raise BadRequestException({'InvalidParameter': f"Dataset {dataset_id} does not have viz_config with id {viz_config_id}"})


        # update dataset metadata
        dataset.json_metadata[viz_config_id]["visualized"] = True
        flag_modified(dataset, "json_metadata")

    return {"success": True}
 def find_by_record_id(
         record_id: str,
         session: session_scope = None) -> Optional[StandardVariableDB]:
     if session is None:
         with session_scope() as sess:
             return sess.query(StandardVariableDB).filter(
                 StandardVariableDB.id == record_id).first()
     else:
         return session.query(StandardVariableDB).filter(
             StandardVariableDB.id == record_id).first()
Beispiel #4
0
 def find_by_record_id(
         record_id: str,
         session: session_scope = None) -> Optional[ProvenanceDB]:
     record_id = uuid.uuid3(uuid.NAMESPACE_DNS, record_id)
     if session is None:
         with session_scope() as sess:
             return sess.query(ProvenanceDB).filter(
                 ProvenanceDB.id == record_id).first()
     else:
         return session.query(ProvenanceDB).filter(
             ProvenanceDB.id == record_id).first()
    def find_by_record_ids(
            record_ids: Iterable[str],
            session: session_scope = None) -> Iterable[StandardVariableDB]:
        if session is None:
            with session_scope() as sess:
                return sess.query(StandardVariableDB).filter(
                    StandardVariableDB.id.in_(record_ids)).all()

        else:
            return session.query(StandardVariableDB).filter(
                StandardVariableDB.id.in_(record_ids)).all()
    def find_by_dataset_id_and_name(
            dataset_ids_and_names: Iterable[Tuple[str, str]],
            session: session_scope = None) -> Iterable[VariableDB]:
        if session is None:
            with session_scope() as sess:
                return sess.query(VariableDB).filter(
                    tuple_(VariableDB.dataset_id,
                           VariableDB.name).in_(dataset_ids_and_names)).all()

        else:
            return session.query(VariableDB).filter(
                tuple_(VariableDB.dataset_id,
                       VariableDB.name).in_(dataset_ids_and_names)).all()
Beispiel #7
0
def register_provenance(provenance_definition: Dict) -> Dict:

    if len(provenance_definition) == 0:
        raise BadRequestException({'Missing parameter or its value is empty': "provenance"})

    with session_scope() as session:
        builder = ProvenanceCollectionBuilder(session)
        builder.instantiate_provenance_arr([provenance_definition])
        builder.validate_schema()
        if len(builder.schema_validation_errors) > 0:
            raise BadRequestException({"ProvenanceSchemaValidationError": builder.schema_validation_errors})
        builder.build_record_associations()
        provenance_arr = builder.persist()

    return {"provenance": provenance_arr[0]}
Beispiel #8
0
    def find_by_record_id(
            record_id: str,
            session: session_scope = None) -> Optional[DatasetDB]:
        attributes = [
            DatasetDB.id, DatasetDB.provenance_id, DatasetDB.name,
            DatasetDB.description, DatasetDB.json_metadata,
            DatasetDB.created_at,
            func.ST_AsGeoJSON(
                DatasetDB.spatial_coverage).label('spatial_coverage_geojson')
        ]
        if session is None:
            with session_scope() as sess:
                return sess.query(*attributes).filter(
                    DatasetDB.id == record_id).first()

        else:
            return session.query(*attributes).filter(
                DatasetDB.id == record_id).first()
Beispiel #9
0
def _update_variables_list():
    with session_scope() as session:
        query = """
                with variables_summary as (
        	        select variables.dataset_id as dataset_id, 
        	            string_agg(variables.name, ',') as variables_list 
        	        from variables 
        	        group by dataset_id
                )
                UPDATE datasets
                set variables_list = variables_summary.variables_list
                from variables_summary 
                where datasets.id = variables_summary.dataset_id"""

        try:
            session.execute(query)
            return {"updated_variables_list": True}
        except Exception as e:
            return {"updated_variables_list": False, "error": e}
Beispiel #10
0
def register_standard_variables(standard_variable_definitions: list) -> dict:

    if len(standard_variable_definitions) == 0:
        raise BadRequestException({'Missing parameter or its value is empty': "standard_variables"})
    elif len(standard_variable_definitions) > 500:
        raise BadRequestException({
            'NumRecordsExceedsThresholdError':
                f"Maximum number of records per call cannot exceed 500; received {len(standard_variable_definitions)}"
        })

    with session_scope() as session:
        builder = StandardVariableCollectionBuilder(session)
        builder.instantiate_variables(standard_variable_definitions)
        builder.validate_schema()
        if len(builder.schema_validation_errors) > 0:
            raise BadRequestException({"StandardVariableSchemaValidationError": builder.schema_validation_errors})
        builder.build_record_associations()
        standard_variables = builder.persist()

    return {"result": "success", "standard_variables": standard_variables}
Beispiel #11
0
def _update_resource_summary():
    with session_scope() as session:
        query = """
                WITH resource_summary as (
        	        select dataset_id, 
        	            count(id) as resource_count 
        	        from resources 
        	        group by dataset_id
                )
                update datasets 
                SET json_metadata = jsonb_build_object(
        	        'resource_count', rs.resource_count
                ) || datasets.json_metadata
                from resource_summary rs
                where rs.dataset_id = datasets.id"""

        try:
            session.execute(query)
            return {"updated_resource_summary": True}
        except Exception as e:
            return {"updated_resource_summary": False, "error": e}
Beispiel #12
0
def _update_polygon_point_coverage():
    with session_scope() as session:
        query = """
                with spatial_coverage as (
        	        select 
        	            dataset_id, 
        	            ST_union(ST_Simplify(st_buffer(spatial_coverage_index.spatial_coverage, 0.2), 0.2)) as dataset_spatial_coverage 
        	        from resources
        	        inner join spatial_coverage_index on resources.id = spatial_coverage_index.indexed_id 
        	        --and resources.is_queryable is TRUE
        	        where st_geometrytype(spatial_coverage_index.spatial_coverage) like '%Point'
        	        group by dataset_id
                )
                update datasets
                SET spatial_coverage = sc.dataset_spatial_coverage
                from spatial_coverage sc 
                where sc.dataset_id = datasets.id"""

        try:
            session.execute(query)
            return {"updated_point_spatial_coverage": True}
        except Exception as e:
            return {"updated_point_spatial_coverage": False, "error": e}
Beispiel #13
0
def register_datasets(dataset_definitions: list) -> dict:
    if len(dataset_definitions) == 0:
        raise BadRequestException('Missing parameter or its value is empty: "datasets"')
    elif len(dataset_definitions) > 500:
        raise BadRequestException({
            "NumRecordsExceedsThresholdError":
                f"Maximum number of records per call cannot exceed 500; received {len(dataset_definitions)}"
        })

    with session_scope() as session:
        builder = DatasetCollectionBuilder(session)
        builder.instantiate_variables(dataset_definitions)
        builder.validate_schema()
        if len(builder.schema_validation_errors) > 0:
            raise BadRequestException({"DatasetSchemaValidationError": builder.schema_validation_errors})

        builder.build_record_associations()
        if len(builder.data_validation_errors) > 0:
            raise BadRequestException({"DatasetDataValidationError": builder.data_validation_errors})

        datasets = builder.persist()

    return {"result": "success", "datasets": datasets}
Beispiel #14
0
def delete_resource(delete_definition: Dict) -> Dict:
    if len(delete_definition) == 0:
        raise BadRequestException({
            'InvalidQueryDefinition':
            f"Query definition must not be empty; received {delete_definition}"
        })

    if "provenance_id" not in delete_definition:
        raise BadRequestException(
            {'MissingRequiredParameter': "provenance_id"})

    if "resource_id" not in delete_definition:
        raise BadRequestException({'MissingRequiredParameter': "resource_id"})

    provenance_id = delete_definition["provenance_id"]
    resource_id = delete_definition["resource_id"]

    try:
        uuid.UUID(str(provenance_id))
    except Exception:
        raise BadRequestException({
            'InvalidParameter':
            f"'provenance_id' must be proper uuid; received {provenance_id}"
        })

    try:
        uuid.UUID(str(resource_id))
    except Exception:
        raise BadRequestException({
            'InvalidParameter':
            f"'resource_id' must be proper uuid; received {resource_id}"
        })

    with session_scope() as session:
        resource = Resource.find_by_record_id(resource_id, session)

        if resource is None:
            raise BadRequestException({
                'InvalidParameter':
                f"'Resource does not exist: '{str(resource_id)}'"
            })

        elif str(resource.provenance_id) != str(provenance_id):
            raise BadRequestException({
                'InvalidParameter':
                f"provenance_id '{str(provenance_id)}' does not match"
            })

        else:

            session.execute(
                f"DELETE FROM spatial_coverage_index WHERE indexed_id = '{str(resource_id)}'"
            )
            session.execute(
                f"DELETE FROM temporal_coverage_index WHERE indexed_id = '{str(resource_id)}'"
            )
            session.execute(
                f"DELETE FROM resources_variables WHERE resource_id = '{str(resource_id)}'"
            )
            session.execute(
                f"DELETE FROM resources WHERE id = '{str(resource_id)}'")

            return {"result": "success"}
Beispiel #15
0
def search_datasets_v2(query_definition: dict) -> list:
    if len(query_definition) == 0:
        raise BadRequestException({
            'InvalidQueryDefinition':
            f"Query definition must not be empty; received {query_definition}"
        })
        # parse query operators
        # search_ops = body.pop('search_operators', "and").lower()
        # sort_by = body.pop("sort_by", None)
        # assert search_ops == "or" or search_ops == "and"
    limit = int(query_definition.pop("limit", 500))
    field_names = query_definition.keys()

    allowed_query_words = frozenset([
        "search_query", "spatial_coverage", "temporal_coverage",
        "provenance_id"
    ])

    if query_definition == {}:
        raise BadRequestException(
            {'InvalidQueryDefinition': f"Query definition must not be empty"})

    if not all([
            field_name in allowed_query_words
            for field_name in list(query_definition.keys())
    ]):
        raise BadRequestException({
            'InvalidQueryDefinition':
            f"Invalid search field(s); must be either of {allowed_query_words}"
        })

    search_query = query_definition.get("search_query")

    if search_query is not None and not isinstance(search_query, list):
        raise BadRequestException({
            'InvalidQueryDefinition':
            f"Invalid value type for 'search_query': {search_query}; must be an array"
        })

    provenance_id = query_definition.get("provenance_id")

    if provenance_id is not None:
        try:
            uuid.UUID(str(provenance_id))
            # assert(uuid_val.version == 4)
        except ValueError:
            raise BadRequestException({
                'InvalidQueryDefinition':
                f"'provenance_id' value must be a valid UUID v4; received {provenance_id}"
            })

    spatial_coverage = query_definition.get("spatial_coverage")
    # if spatial_coverage is not None:
    temporal_coverage = query_definition.get("temporal_coverage")

    # execute the query
    try:
        with session_scope() as session:

            # Get Dataset

            datasets_query = _generate_select_datasets_query(
                provenance_id=provenance_id,
                search_query=search_query,
                spatial_coverage=spatial_coverage,
                temporal_coverage=temporal_coverage,
                limit=limit)
            print(datasets_query)

            # query = query.limit(limit)
            # results = query.all()
            datasets_results = session.execute(datasets_query)

            datasets_dict = {}
            for row in datasets_results:
                dataset_id = str(row[0])
                dataset_metadata = {}
                if row[3] is not None:
                    dataset_metadata = row[3]

                dataset_record = {
                    "dataset_id": dataset_id,
                    "dataset_name": str(row[1]),
                    "dataset_description": str(row[2]),
                    "dataset_metadata": dataset_metadata,
                    "dataset_spatial_coverage": ujson.loads(row[4])
                }

                if dataset_id not in datasets_dict:
                    datasets_dict[dataset_id] = dataset_record

            # dataset_ids = list(datasets_dict.keys())

            results_json = []
            for dataset_id, dataset_record in datasets_dict.items():
                # dataset_record["variables"] = list(dataset_record["variables"].values())
                results_json.append(dataset_record)

            return results_json

    except Exception as e:
        traceback.print_exc(file=sys.stdout)
        raise InternalServerException(e)
Beispiel #16
0
def delete_dataset(delete_definition: Dict) -> Dict:
    if len(delete_definition) == 0:
        raise BadRequestException({
            'InvalidQueryDefinition':
            f"Query definition must not be empty; received {delete_definition}"
        })

    if "provenance_id" not in delete_definition:
        raise BadRequestException(
            {'MissingRequiredParameter': "provenance_id"})

    if "dataset_id" not in delete_definition:
        raise BadRequestException({'MissingRequiredParameter': "dataset_id"})

    provenance_id = delete_definition["provenance_id"]
    dataset_id = delete_definition["dataset_id"]

    try:
        uuid.UUID(str(provenance_id))
    except Exception:
        raise BadRequestException({
            'InvalidParameter':
            f"'provenance_id' must be proper uuid; received {provenance_id}"
        })

    try:
        uuid.UUID(str(dataset_id))
    except Exception:
        raise BadRequestException({
            'InvalidParameter':
            f"'dataset_id' must be proper uuid; received {dataset_id}"
        })

    with session_scope() as session:
        dataset = Dataset.find_by_record_id(dataset_id, session)

        if dataset is None:
            raise BadRequestException({
                'InvalidParameter':
                f"'Dataset does not exist: '{str(dataset_id)}'"
            })

        elif str(dataset.provenance_id) != str(provenance_id):
            raise BadRequestException({
                'InvalidParameter':
                f"provenance_id '{str(provenance_id)}' does not match"
            })

        else:
            # create temp table to hold variable_ids that need to be deleted for variables, resources_variables, and
            # variables_standard_variables tables
            session.execute("DROP TABLE IF EXISTS delete_variables")
            session.execute(
                f"CREATE TEMPORARY TABLE delete_variables AS SELECT variables.id AS variable_id FROM variables WHERE dataset_id = '{dataset_id}'"
            )
            session.execute(
                "CREATE index idx_variable_id ON delete_variables (variable_id)"
            )

            # create temp table to hold resource_ids that need to be deleted for resources, spatial_coverage_index,
            # and temporal_coverage_index tables
            session.execute("DROP TABLE IF EXISTS delete_resources")
            session.execute(
                f"CREATE TEMPORARY TABLE delete_resources AS SELECT resources.id AS resource_id FROM resources WHERE dataset_id = '{dataset_id}'"
            )
            session.execute(
                "CREATE index idx_resource_id ON delete_resources (resource_id)"
            )

            # Delete variables_standard_variables
            session.execute(
                "DELETE FROM variables_standard_variables USING delete_variables WHERE variables_standard_variables.variable_id = delete_variables.variable_id"
            )

            # Delete resources_variables
            session.execute(
                "DELETE FROM resources_variables USING delete_variables WHERE resources_variables.variable_id = delete_variables.variable_id"
            )

            # Delete variables
            session.execute(
                f"DELETE FROM variables WHERE dataset_id = '{str(dataset_id)}'"
            )

            # Delete temporal_coverage_index
            session.execute(
                "DELETE FROM temporal_coverage_index USING delete_resources WHERE temporal_coverage_index.indexed_id = delete_resources.resource_id"
            )

            # Delete spatial_coverage_index
            session.execute(
                "DELETE FROM spatial_coverage_index USING delete_resources WHERE spatial_coverage_index.indexed_id = delete_resources.resource_id"
            )

            # Delete resources
            session.execute(
                f"DELETE FROM resources WHERE dataset_id = '{str(dataset_id)}'"
            )

            # Delete dataset
            session.execute(
                f"DELETE FROM datasets WHERE id = '{str(dataset_id)}'")

            return {"result": "success"}
Beispiel #17
0
def update_dataset(update_definition: Dict) -> Dict:
    if len(update_definition) == 0:
        raise BadRequestException(
            {'InvalidQueryDefinition': f"Query definition must not be empty; received {update_definition}"})

    if "dataset_id" not in update_definition:
        raise BadRequestException({'MissingRequiredParameter': "dataset_id"})

    dataset_id = update_definition["dataset_id"]
    try:
        uuid.UUID(str(dataset_id))
        # assert(uuid_val.version == 4)
    except ValueError:
        raise BadRequestException(
            {'InvalidQueryDefinition': f"'dataset_id' value must be a valid UUID v4; received {dataset_id}"})

    name = update_definition.get("name")

    description = update_definition.get("description")
    json_metadata = update_definition.get('metadata')

    if json_metadata is not None and not isinstance(json_metadata, dict):
        raise BadRequestException(
            {'InvalidQueryDefinition': f"'metadata' value must be a JSON object; received {json_metadata}"})
    
    changes = {}
    try:
        with session_scope() as session:
            dataset = Dataset.find_by_record_id(dataset_id, session)
            update_query_arr = ['UPDATE datasets SET']
            set_query_part_arr = []

            if name is not None:
                # dataset.name
                current_name = dataset.name
                # dataset.name = name
                set_query_part_arr.append(f"name = '{name}'")
                changes["name"] = _get_change_record(current_name, name)
                
            if description is not None:
                current_description = dataset.description
                set_query_part_arr.append(f"description = '{description}'")
                changes["description"] = _get_change_record(current_description, description)

            if json_metadata is not None and isinstance(json_metadata, dict):
                metadata = dataset.json_metadata
                if metadata is None:
                    metadata = {}

                current_metadata = {k: v for k, v in metadata.items()}
                metadata.update(json_metadata)

                keys_to_delete = []
                for k, v in metadata.items():
                    if v is None:
                        keys_to_delete.append(k)

                for k in keys_to_delete:
                    del metadata[k]

                set_query_part_arr.append(f"json_metadata = $${json.dumps(metadata)}$$::json")

                changes["metadata"] = _get_change_record(current_metadata, metadata)

            if len(set_query_part_arr) > 0:
                set_query_part = ', '.join(set_query_part_arr)
                update_query_arr.append(set_query_part)
                update_query_arr.append(f"WHERE datasets.id = '{dataset_id}'")
                update_query = " ".join(update_query_arr)

                print(update_query)
                session.execute(update_query)

        return {"success": True, "dataset_id": dataset_id, "changes": changes}

    except Exception as e:
        traceback.print_exc(file=sys.stdout)
        raise InternalServerException(e)
Beispiel #18
0
def update_standard_variable(update_definition: Dict) -> Dict:
    if len(update_definition) == 0:
        raise BadRequestException(
            {'InvalidQueryDefinition': f"Query definition must not be empty; received {update_definition}"})

    if "standard_variable_id" not in update_definition:
        raise BadRequestException({'MissingRequiredParameter': "standard_variable_id"})

    standard_variable_id = update_definition["standard_variable_id"]
    try:
        uuid.UUID(str(standard_variable_id))
        # assert(uuid_val.version == 4)
    except ValueError:
        raise BadRequestException(
            {'InvalidQueryDefinition': f"'dataset_id' value must be a valid UUID v4; received {standard_variable_id}"})

    name = update_definition.get("name")
    ontology = update_definition.get("ontology")
    uri = update_definition.get("uri")
    description = update_definition.get("description")

    changes = {}

    try:
        with session_scope() as session:
            standard_variable = StandardVariable.find_by_record_id(standard_variable_id, session)
            update_query_arr = ['UPDATE standard_variables SET']
            set_query_part_arr = []

            if name is not None:
                current_name = standard_variable.name
                set_query_part_arr.append(f"name = '{name}'")

                changes["name"] = _get_change_record(current_name, name)

            if ontology is not None:
                current_ontology = standard_variable.ontology
                set_query_part_arr.append(f"ontology = '{ontology}'")

                changes["ontology"] = _get_change_record(current_ontology, ontology)

            if uri is not None:
                current_uri = standard_variable.uri
                set_query_part_arr.append(f"uri = '{uri}'")
                changes["uri"] = _get_change_record(current_uri, uri)

            if description is not None:
                current_description = standard_variable.description
                set_query_part_arr.append(f"description = '{description}'")
                changes["description"] = _get_change_record(current_description, ontology)

            if len(set_query_part_arr) > 0:
                set_query_part = ', '.join(set_query_part_arr)
                update_query_arr.append(set_query_part)
                update_query_arr.append(f"WHERE standard_variables.id = '{standard_variable_id}'")
                update_query = " ".join(update_query_arr)

                print(update_query)
                session.execute(update_query)

        return {"success": True, "standard_variable_id": standard_variable_id, "changes": changes}

    except Exception as e:
        traceback.print_exc(file=sys.stdout)
        raise InternalServerException(e)