def validate(self, record: Any, validation_result: ValidationResult): attribute_value = getattr(record, self.attribute) try: uuid_val = uuid.UUID(str(attribute_value)) #assert(uuid_val.version == 4) except ValueError: validation_result.add_error(f"{self.attribute} must be a valid UUID v4; received {attribute_value}") except AssertionError: validation_result.add_error(f"{self.attribute} must be a valid UUID v4; received {attribute_value}")
def build_record_associations(self): provenance_arr = Provenance.find_by_record_ids( self.db_records_references["provenance_record_ids"], self.session) valid_provenance_associations = {} for provenance in provenance_arr: valid_provenance_associations[str(provenance.id)] = provenance valid_provenance_record_ids = set(valid_provenance_associations.keys()) validation_results_with_errors = [] for dataset in self.datasets: if dataset.provenance_id not in valid_provenance_record_ids: validation_result = ValidationResult(record=dataset.to_json()) validation_result.add_error([ f"Invalid value for 'provenance_id': {dataset.provenance_id}" ]) validation_results_with_errors.append(validation_result) self.data_validation_errors = validation_results_with_errors
def build_record_associations(self): validation_results_with_errors = [] datasets = Dataset.find_by_record_ids( self.db_records_references["dataset_ids"], self.session) provenance_arr = Provenance.find_by_record_ids( self.db_records_references["provenance_ids"], self.session) variables = Variable.find_by_record_ids( self.db_records_references["variable_ids"], self.session) valid_dataset_associations = {} for dataset in datasets: valid_dataset_associations[str(dataset.id)] = dataset valid_dataset_ids = set(valid_dataset_associations.keys()) valid_provenance_associations = {} for provenance in provenance_arr: valid_provenance_associations[str(provenance.id)] = provenance valid_provenance_ids = set(valid_provenance_associations.keys()) valid_variable_associations = {} for variable in variables: valid_variable_associations[str(variable.id)] = variable valid_variable_ids = set(valid_variable_associations.keys()) resource_record_ids = set([]) for resource in self.resources: validation_result = ValidationResult(record=resource.to_json()) resource_record_id = str(resource.record_id) if resource_record_id in resource_record_ids: validation_result.add_error( f"Duplicate record_id '{resource_record_id}' found in this batch; record_ids must be unique" ) else: resource_record_ids.add(resource_record_id) if resource.dataset_id not in valid_dataset_ids: validation_result.add_error( f"Invalid value for 'dataset_id': {resource.dataset_id}") if resource.provenance_id not in valid_provenance_ids: validation_result.add_error( f"Invalid value for 'provenance_id': {resource.provenance_id}" ) invalid_variable_ids = set( resource.variable_ids) - valid_variable_ids if len(invalid_variable_ids) > 0: validation_result.add_error( f"Invalid value for 'variable_ids': {invalid_variable_ids}" ) if not validation_result.is_valid(): validation_results_with_errors.append(validation_result) # Associate dataset # Associate standard_variables # Associate temporal_index # Associate spatial_index self.data_validation_errors = validation_results_with_errors
def build_record_associations(self): validation_results_with_errors = [] datasets = Dataset.find_by_record_ids( self.db_records_references["dataset_ids"], self.session) standard_variables = StandardVariable.find_by_record_ids( self.db_records_references["standard_variable_ids"], self.session) valid_dataset_associations = {} for dataset in datasets: valid_dataset_associations[str(dataset.id)] = dataset valid_dataset_ids = set(valid_dataset_associations.keys()) valid_standard_variables_associations = {} for standard_variable in standard_variables: valid_standard_variables_associations[str( standard_variable.id)] = standard_variable valid_standard_variable_ids = set( valid_standard_variables_associations.keys()) # make sure that there are no duplicate dataset_id/name in the payload dataset_id_name_counts = {} for variable in self.variables: key = (str(variable.dataset_id), str(variable.name)) if key not in dataset_id_name_counts: dataset_id_name_counts[key] = 1 else: dataset_id_name_counts[key] += 1 for variable in self.variables: validation_result = ValidationResult(record=variable.to_json()) if variable.dataset_id not in valid_dataset_ids: validation_result.add_error( f"Invalid value for 'dataset_id': {variable.dataset_id}") invalid_standard_variable_ids = set( variable.standard_variable_ids) - valid_standard_variable_ids if len(invalid_standard_variable_ids) > 0: validation_result.add_error( f"Invalid value for 'standard_variable_ids': {invalid_standard_variable_ids}" ) dataset_id = str(variable.dataset_id) name = str(variable.name) key_count = dataset_id_name_counts[(dataset_id, name)] if key_count > 1: validation_result.add_error( f"Duplicate value for (dataset_id, name): ({dataset_id}), ({name})" ) if not validation_result.is_valid(): validation_results_with_errors.append(validation_result) # Validate uniqueness of dataset_id/name if len(validation_results_with_errors) == 0: prelim_dataset_id_and_name_to_var = {(str(v.dataset_id), str(v.name)): v for v in self.variables} existing_variables = Variable.find_by_dataset_id_and_name( list(prelim_dataset_id_and_name_to_var.keys()), self.session) for existing_variable in existing_variables: record_id = str(existing_variable.id) dataset_id = str(existing_variable.dataset_id) name = existing_variable.name variable = prelim_dataset_id_and_name_to_var[(dataset_id, name)] if variable.record_id != record_id: validation_result = ValidationResult( record=variable.to_json()) msg = f"Record already exists for variable with dataset_id '{dataset_id}' and name '{name}': '{record_id}'" validation_result.add_error(msg) validation_results_with_errors.append(validation_result) # Associate dataset # Associate standard_variables # Associate temporal_index # Associate spatial_index self.data_validation_errors = validation_results_with_errors
def validate(self, record: Any, validation_result: ValidationResult): from datetime import datetime attribute_value = getattr(record, self.attribute) if not self.ignore_empty_values and not attribute_value: validation_result.add_error(f"{self.attribute} must not be empty; received {attribute_value}") elif self.ignore_empty_values and not attribute_value: return True elif not isinstance(attribute_value, dict): help_msg = "must be a dictionary with keys 'type' and 'value'" validation_result.add_error(f"Invalid format for 'spatial_coverage': {attribute_value}; {help_msg}") else: if "start_time" not in attribute_value: validation_result.add_error(f"{self.attribute} must contain 'start_time' key") else: start_time = attribute_value['start_time'] try: datetime.strptime(start_time, self.iso8601_format) except ValueError: validation_result.add_error(f"{start_time} does not match ISO8601 datetime format '{self.iso8601_format}'") if "end_time" not in attribute_value: validation_result.add_error(f"{self.attribute} must contain 'end_time' key") else: end_time = attribute_value['end_time'] try: datetime.strptime(end_time, self.iso8601_format) except ValueError: validation_result.add_error(f"{end_time} does not match ISO8601 datetime format '{self.iso8601_format}'")
def validate(self, record: Any, validation_result: ValidationResult): attribute_value = getattr(record, self.attribute) if type(attribute_value) != dict: help_msg = "must be a JSON object" validation_result.add_error(f"Invalid format for '{self.attribute}': '{attribute_value}'; {help_msg}")
def validate(self, record: Any, validation_result: ValidationResult): attribute_value = getattr(record, self.attribute) value_considered_empty = [attribute_value == empty_value for empty_value in self.empty_values] if any(value_considered_empty): validation_result.add_error(f"{self.attribute} must not be empty; received {attribute_value}")
def validate(self, record: Any, validation_result: ValidationResult): attribute_value = getattr(record, self.attribute) if not self.ignore_empty_values and not attribute_value: validation_result.add_error(f"{self.attribute} must not be empty; received {attribute_value}") elif self.ignore_empty_values and not attribute_value: return True elif not isinstance(attribute_value, dict): help_msg = "must be a dictionary with keys 'type' and 'value'" validation_result.add_error(f"Invalid format for 'spatial_coverage': {attribute_value}; {help_msg}") else: if 'type' not in attribute_value: validation_result.add_error(f"Missing required key 'type' in {self.attribute}") if 'value' not in attribute_value: validation_result.add_error(f"Missing required key 'value' in {self.attribute}") spatial_coverage_type = attribute_value['type'] spatial_coverage_value = attribute_value['value'] if spatial_coverage_type not in self.supported_types: help_msg = f"must be one of the supported types: {self.supported_types}" msg = f"Invalid spatial coverage type: {spatial_coverage_type}; {help_msg}" validation_result.add_error(msg) if spatial_coverage_type == "WKT_POLYGON" and not self._is_valid_wkt_polygon(spatial_coverage_value): help_msg = f"must match the following regex: '{self.wkt_polygon_regex.pattern}'" msg = f"Invalid value for {spatial_coverage_type} type: {spatial_coverage_value}; {help_msg}" validation_result.add_error(msg) elif spatial_coverage_type == "BoundingBox" and not self._is_valid_bounding_box(spatial_coverage_value): help_msg = f"must be a dictionary containing 'xmin', 'ymin', 'xmax', 'ymax' keys with numeric values" msg = f"Invalid value for {spatial_coverage_type} type: {spatial_coverage_value}; {help_msg}" validation_result.add_error(msg) elif spatial_coverage_type == "Point" and not self._is_valid_wkt_point(spatial_coverage_value): help_msg = f"must be a dictionary containing 'x' and 'y' keys with numeric values" msg = f"Invalid value for {spatial_coverage_type} type: {spatial_coverage_value}; {help_msg}" validation_result.add_error(msg)