Ejemplo n.º 1
0
 def validate(self, record: Any, validation_result: ValidationResult):
     attribute_value = getattr(record, self.attribute)
     try:
         uuid_val = uuid.UUID(str(attribute_value))
         #assert(uuid_val.version == 4)
     except ValueError:
         validation_result.add_error(f"{self.attribute} must be a valid UUID v4; received {attribute_value}")
     except AssertionError:
         validation_result.add_error(f"{self.attribute} must be a valid UUID v4; received {attribute_value}")
Ejemplo n.º 2
0
    def build_record_associations(self):
        provenance_arr = Provenance.find_by_record_ids(
            self.db_records_references["provenance_record_ids"], self.session)
        valid_provenance_associations = {}
        for provenance in provenance_arr:
            valid_provenance_associations[str(provenance.id)] = provenance
        valid_provenance_record_ids = set(valid_provenance_associations.keys())

        validation_results_with_errors = []
        for dataset in self.datasets:
            if dataset.provenance_id not in valid_provenance_record_ids:
                validation_result = ValidationResult(record=dataset.to_json())
                validation_result.add_error([
                    f"Invalid value for 'provenance_id': {dataset.provenance_id}"
                ])
                validation_results_with_errors.append(validation_result)

        self.data_validation_errors = validation_results_with_errors
Ejemplo n.º 3
0
    def build_record_associations(self):
        validation_results_with_errors = []

        datasets = Dataset.find_by_record_ids(
            self.db_records_references["dataset_ids"], self.session)
        provenance_arr = Provenance.find_by_record_ids(
            self.db_records_references["provenance_ids"], self.session)
        variables = Variable.find_by_record_ids(
            self.db_records_references["variable_ids"], self.session)

        valid_dataset_associations = {}
        for dataset in datasets:
            valid_dataset_associations[str(dataset.id)] = dataset

        valid_dataset_ids = set(valid_dataset_associations.keys())

        valid_provenance_associations = {}
        for provenance in provenance_arr:
            valid_provenance_associations[str(provenance.id)] = provenance

        valid_provenance_ids = set(valid_provenance_associations.keys())

        valid_variable_associations = {}
        for variable in variables:
            valid_variable_associations[str(variable.id)] = variable

        valid_variable_ids = set(valid_variable_associations.keys())

        resource_record_ids = set([])
        for resource in self.resources:
            validation_result = ValidationResult(record=resource.to_json())

            resource_record_id = str(resource.record_id)

            if resource_record_id in resource_record_ids:
                validation_result.add_error(
                    f"Duplicate record_id '{resource_record_id}' found in this batch; record_ids must be unique"
                )
            else:
                resource_record_ids.add(resource_record_id)

            if resource.dataset_id not in valid_dataset_ids:
                validation_result.add_error(
                    f"Invalid value for 'dataset_id': {resource.dataset_id}")

            if resource.provenance_id not in valid_provenance_ids:
                validation_result.add_error(
                    f"Invalid value for 'provenance_id': {resource.provenance_id}"
                )

            invalid_variable_ids = set(
                resource.variable_ids) - valid_variable_ids
            if len(invalid_variable_ids) > 0:
                validation_result.add_error(
                    f"Invalid value for 'variable_ids': {invalid_variable_ids}"
                )

            if not validation_result.is_valid():
                validation_results_with_errors.append(validation_result)

        # Associate dataset
        # Associate standard_variables
        # Associate temporal_index
        # Associate spatial_index
        self.data_validation_errors = validation_results_with_errors
Ejemplo n.º 4
0
    def build_record_associations(self):
        validation_results_with_errors = []

        datasets = Dataset.find_by_record_ids(
            self.db_records_references["dataset_ids"], self.session)
        standard_variables = StandardVariable.find_by_record_ids(
            self.db_records_references["standard_variable_ids"], self.session)

        valid_dataset_associations = {}
        for dataset in datasets:
            valid_dataset_associations[str(dataset.id)] = dataset

        valid_dataset_ids = set(valid_dataset_associations.keys())

        valid_standard_variables_associations = {}
        for standard_variable in standard_variables:
            valid_standard_variables_associations[str(
                standard_variable.id)] = standard_variable

        valid_standard_variable_ids = set(
            valid_standard_variables_associations.keys())

        # make sure that there are no duplicate dataset_id/name in the payload
        dataset_id_name_counts = {}
        for variable in self.variables:
            key = (str(variable.dataset_id), str(variable.name))
            if key not in dataset_id_name_counts:
                dataset_id_name_counts[key] = 1
            else:
                dataset_id_name_counts[key] += 1

        for variable in self.variables:
            validation_result = ValidationResult(record=variable.to_json())
            if variable.dataset_id not in valid_dataset_ids:
                validation_result.add_error(
                    f"Invalid value for 'dataset_id': {variable.dataset_id}")

            invalid_standard_variable_ids = set(
                variable.standard_variable_ids) - valid_standard_variable_ids
            if len(invalid_standard_variable_ids) > 0:
                validation_result.add_error(
                    f"Invalid value for 'standard_variable_ids': {invalid_standard_variable_ids}"
                )

            dataset_id = str(variable.dataset_id)
            name = str(variable.name)
            key_count = dataset_id_name_counts[(dataset_id, name)]
            if key_count > 1:
                validation_result.add_error(
                    f"Duplicate value for (dataset_id, name): ({dataset_id}), ({name})"
                )

            if not validation_result.is_valid():
                validation_results_with_errors.append(validation_result)

        # Validate uniqueness of dataset_id/name
        if len(validation_results_with_errors) == 0:
            prelim_dataset_id_and_name_to_var = {(str(v.dataset_id),
                                                  str(v.name)): v
                                                 for v in self.variables}
            existing_variables = Variable.find_by_dataset_id_and_name(
                list(prelim_dataset_id_and_name_to_var.keys()), self.session)
            for existing_variable in existing_variables:
                record_id = str(existing_variable.id)
                dataset_id = str(existing_variable.dataset_id)
                name = existing_variable.name

                variable = prelim_dataset_id_and_name_to_var[(dataset_id,
                                                              name)]
                if variable.record_id != record_id:
                    validation_result = ValidationResult(
                        record=variable.to_json())

                    msg = f"Record already exists for variable with dataset_id '{dataset_id}' and name '{name}': '{record_id}'"
                    validation_result.add_error(msg)
                    validation_results_with_errors.append(validation_result)

        # Associate dataset
        # Associate standard_variables
        # Associate temporal_index
        # Associate spatial_index
        self.data_validation_errors = validation_results_with_errors
Ejemplo n.º 5
0
    def validate(self, record: Any, validation_result: ValidationResult):
        from datetime import datetime

        attribute_value = getattr(record, self.attribute)
        if not self.ignore_empty_values and not attribute_value:
            validation_result.add_error(f"{self.attribute} must not be empty; received {attribute_value}")
        elif self.ignore_empty_values and not attribute_value:
            return True
        elif not isinstance(attribute_value, dict):
            help_msg = "must be a dictionary with keys 'type' and 'value'"
            validation_result.add_error(f"Invalid format for 'spatial_coverage': {attribute_value}; {help_msg}")
        else:
            if "start_time" not in attribute_value:
                validation_result.add_error(f"{self.attribute} must contain 'start_time' key")
            else:
                start_time = attribute_value['start_time']
                try:
                    datetime.strptime(start_time, self.iso8601_format)
                except ValueError:
                    validation_result.add_error(f"{start_time} does not match ISO8601 datetime format '{self.iso8601_format}'")

            if "end_time" not in attribute_value:
                validation_result.add_error(f"{self.attribute} must contain 'end_time' key")
            else:
                end_time = attribute_value['end_time']
                try:
                    datetime.strptime(end_time, self.iso8601_format)
                except ValueError:
                    validation_result.add_error(f"{end_time} does not match ISO8601 datetime format '{self.iso8601_format}'")
Ejemplo n.º 6
0
    def validate(self, record: Any, validation_result: ValidationResult):
        attribute_value = getattr(record, self.attribute)

        if type(attribute_value) != dict:
            help_msg = "must be a JSON object"
            validation_result.add_error(f"Invalid format for '{self.attribute}': '{attribute_value}'; {help_msg}")
Ejemplo n.º 7
0
    def validate(self, record: Any, validation_result: ValidationResult):
        attribute_value = getattr(record, self.attribute)

        value_considered_empty = [attribute_value == empty_value for empty_value in self.empty_values]
        if any(value_considered_empty):
            validation_result.add_error(f"{self.attribute} must not be empty; received {attribute_value}")
Ejemplo n.º 8
0
    def validate(self, record: Any, validation_result: ValidationResult):
        attribute_value = getattr(record, self.attribute)
        if not self.ignore_empty_values and not attribute_value:
            validation_result.add_error(f"{self.attribute} must not be empty; received {attribute_value}")
        elif self.ignore_empty_values and not attribute_value:
            return True
        elif not isinstance(attribute_value, dict):
            help_msg = "must be a dictionary with keys 'type' and 'value'"
            validation_result.add_error(f"Invalid format for 'spatial_coverage': {attribute_value}; {help_msg}")
        else:
            if 'type' not in attribute_value:
                validation_result.add_error(f"Missing required key 'type' in {self.attribute}")

            if 'value' not in attribute_value:
                validation_result.add_error(f"Missing required key 'value' in {self.attribute}")

            spatial_coverage_type = attribute_value['type']
            spatial_coverage_value = attribute_value['value']

            if spatial_coverage_type not in self.supported_types:
                help_msg = f"must be one of the supported types: {self.supported_types}"
                msg = f"Invalid spatial coverage type: {spatial_coverage_type}; {help_msg}"
                validation_result.add_error(msg)

            if spatial_coverage_type == "WKT_POLYGON" and not self._is_valid_wkt_polygon(spatial_coverage_value):
                help_msg = f"must match the following regex: '{self.wkt_polygon_regex.pattern}'"
                msg = f"Invalid value for {spatial_coverage_type} type: {spatial_coverage_value}; {help_msg}"
                validation_result.add_error(msg)

            elif spatial_coverage_type == "BoundingBox" and not self._is_valid_bounding_box(spatial_coverage_value):
                help_msg = f"must be a dictionary containing 'xmin', 'ymin', 'xmax', 'ymax' keys with numeric values"
                msg = f"Invalid value for {spatial_coverage_type} type: {spatial_coverage_value}; {help_msg}"
                validation_result.add_error(msg)

            elif spatial_coverage_type == "Point" and not self._is_valid_wkt_point(spatial_coverage_value):
                help_msg = f"must be a dictionary containing 'x' and 'y' keys with numeric values"
                msg = f"Invalid value for {spatial_coverage_type} type: {spatial_coverage_value}; {help_msg}"
                validation_result.add_error(msg)