def _validate_package_fields( package: Union[FilePackage, GlobalPackage] ) -> Tuple[List[PanoField], List[ValidationError]]: errors: List[ValidationError] = [] fields = [] field_paths_by_id: Dict[Tuple, List[Path]] = defaultdict(list) for field_data, field_path in package.read_fields(): try: _validate_data(field_data, JsonSchemas.field()) field = PanoField.from_dict(field_data) fields.append(field) field_paths_by_id[(field.data_source, field.slug)].append(field_path) except InvalidYamlFile as e: errors.append(e) except JsonSchemaValidationError as e: errors.append(JsonSchemaError(path=field_path, error=e)) # check for duplicate field slugs for (dataset_slug, field_slug), paths in field_paths_by_id.items(): if len(paths) > 1: errors.append( DuplicateFieldSlugError(field_slug=field_slug, dataset_slug=dataset_slug, paths=paths)) return fields, errors
def map_error_to_field(error: MissingFieldFileError) -> PanoField: return PanoField( slug=error.field_slug, field_type='dimension', data_source=error.dataset_slug, display_name=error.field_slug, group='CLI', data_type='text', aggregation=Aggregation(type='group_by', params=None), )
def get_state(target_dataset: Optional[str] = None) -> VirtualState: """ Build a representation of what VDS and models are on local filesystem. """ file_reader = FileReader() packages = file_reader.get_packages() data_sources = [] models = [] fields = [] if target_dataset is None: for field, path in file_reader.get_global_package().read_fields(): field['file_name'] = path.name fields.append(PanoField.from_dict(field)) for package in packages: data_source = package.read_data_source() data_source['package'] = package.name pvds = PanoVirtualDataSource.from_dict(data_source) if target_dataset is not None and target_dataset != pvds.dataset_slug: continue data_sources.append(pvds) for model, path in package.read_models(): model['package'] = package.name model['file_name'] = path.name model['virtual_data_source'] = pvds.dataset_slug models.append(PanoModel.from_dict(model)) for field, path in package.read_fields(): field['package'] = package.name field['file_name'] = path.name field['data_source'] = pvds.dataset_slug fields.append(PanoField.from_dict(field)) return VirtualState(data_sources=data_sources, models=models, fields=fields)
def write_field(self, field: PanoField, *, package: Optional[str] = None, file_name: Optional[str] = None): """Write model to local filesystem.""" if file_name is None: file_name = f'{field.slug}{FileExtension.FIELD_YAML.value}' package = package if package is not None else field.data_source if package is not None: # dataset-scope field path = Paths.fields_dir(self.cwd / package) / file_name else: # company-scope field path = Paths.fields_dir(self.cwd) / file_name logger.debug(f'About to write field {field.id}') write_yaml(path, field.to_dict())
def map_error_to_field(error: MissingFieldFileError, loaded_models: Dict[str, PanoModel]) -> PanoField: # try to find the field in scanned state model = loaded_models.get(error.model_name) data_type = ValidationType.text if model: # model with this field was scanned so let's try to find this field field = [ model_field for model_field in model.fields if error.field_slug in model_field.field_map ] if len(field) == 1: # exactly this field was scanned so let's determine its correct validation type field_data_type = EnumHelper.from_value_safe( ValidationType, field[0].data_type) if field_data_type: data_type = field_data_type field_type = TaxonTypeEnum.metric if data_type in METRIC_VALIDATION_TYPES else TaxonTypeEnum.dimension if field_type is TaxonTypeEnum.dimension: aggregation = Aggregation(type='group_by', params=None) else: aggregation = Aggregation(type='sum', params=None) return PanoField( slug=error.field_slug, field_type=field_type.value, display_name=error.field_slug, data_source=error.dataset_slug, group='CLI', data_type=data_type.value, aggregation=aggregation, )
def write_scanned_field(self, field: PanoField): """"Write scanned field to local filesystem.""" path = Paths.scanned_fields_dir( ) / f'{field.slug}{FileExtension.FIELD_YAML.value}' logger.debug(f'About to write field {field.slug}') write_yaml(path, field.to_dict())