예제 #1
0
def _validate_package_fields(
    package: Union[FilePackage, GlobalPackage]
) -> Tuple[List[PanoField], List[ValidationError]]:
    errors: List[ValidationError] = []
    fields = []
    field_paths_by_id: Dict[Tuple, List[Path]] = defaultdict(list)
    for field_data, field_path in package.read_fields():
        try:
            _validate_data(field_data, JsonSchemas.field())
            field = PanoField.from_dict(field_data)
            fields.append(field)
            field_paths_by_id[(field.data_source,
                               field.slug)].append(field_path)
        except InvalidYamlFile as e:
            errors.append(e)
        except JsonSchemaValidationError as e:
            errors.append(JsonSchemaError(path=field_path, error=e))

    # check for duplicate field slugs
    for (dataset_slug, field_slug), paths in field_paths_by_id.items():
        if len(paths) > 1:
            errors.append(
                DuplicateFieldSlugError(field_slug=field_slug,
                                        dataset_slug=dataset_slug,
                                        paths=paths))

    return fields, errors
예제 #2
0
def map_error_to_field(error: MissingFieldFileError) -> PanoField:
    return PanoField(
        slug=error.field_slug,
        field_type='dimension',
        data_source=error.dataset_slug,
        display_name=error.field_slug,
        group='CLI',
        data_type='text',
        aggregation=Aggregation(type='group_by', params=None),
    )
예제 #3
0
def get_state(target_dataset: Optional[str] = None) -> VirtualState:
    """
    Build a representation of what VDS and models are on local filesystem.
    """
    file_reader = FileReader()
    packages = file_reader.get_packages()
    data_sources = []
    models = []
    fields = []

    if target_dataset is None:
        for field, path in file_reader.get_global_package().read_fields():
            field['file_name'] = path.name
            fields.append(PanoField.from_dict(field))

    for package in packages:
        data_source = package.read_data_source()
        data_source['package'] = package.name
        pvds = PanoVirtualDataSource.from_dict(data_source)

        if target_dataset is not None and target_dataset != pvds.dataset_slug:
            continue

        data_sources.append(pvds)
        for model, path in package.read_models():
            model['package'] = package.name
            model['file_name'] = path.name
            model['virtual_data_source'] = pvds.dataset_slug
            models.append(PanoModel.from_dict(model))

        for field, path in package.read_fields():
            field['package'] = package.name
            field['file_name'] = path.name
            field['data_source'] = pvds.dataset_slug
            fields.append(PanoField.from_dict(field))

    return VirtualState(data_sources=data_sources,
                        models=models,
                        fields=fields)
예제 #4
0
    def write_field(self,
                    field: PanoField,
                    *,
                    package: Optional[str] = None,
                    file_name: Optional[str] = None):
        """Write model to local filesystem."""
        if file_name is None:
            file_name = f'{field.slug}{FileExtension.FIELD_YAML.value}'

        package = package if package is not None else field.data_source
        if package is not None:
            # dataset-scope field
            path = Paths.fields_dir(self.cwd / package) / file_name
        else:
            # company-scope field
            path = Paths.fields_dir(self.cwd) / file_name

        logger.debug(f'About to write field {field.id}')
        write_yaml(path, field.to_dict())
예제 #5
0
def map_error_to_field(error: MissingFieldFileError,
                       loaded_models: Dict[str, PanoModel]) -> PanoField:
    # try to find the field in scanned state
    model = loaded_models.get(error.model_name)
    data_type = ValidationType.text

    if model:
        # model with this field was scanned so let's try to find this field
        field = [
            model_field for model_field in model.fields
            if error.field_slug in model_field.field_map
        ]

        if len(field) == 1:
            # exactly this field was scanned so let's determine its correct validation type
            field_data_type = EnumHelper.from_value_safe(
                ValidationType, field[0].data_type)
            if field_data_type:
                data_type = field_data_type

    field_type = TaxonTypeEnum.metric if data_type in METRIC_VALIDATION_TYPES else TaxonTypeEnum.dimension

    if field_type is TaxonTypeEnum.dimension:
        aggregation = Aggregation(type='group_by', params=None)
    else:
        aggregation = Aggregation(type='sum', params=None)

    return PanoField(
        slug=error.field_slug,
        field_type=field_type.value,
        display_name=error.field_slug,
        data_source=error.dataset_slug,
        group='CLI',
        data_type=data_type.value,
        aggregation=aggregation,
    )
예제 #6
0
 def write_scanned_field(self, field: PanoField):
     """"Write scanned field to local filesystem."""
     path = Paths.scanned_fields_dir(
     ) / f'{field.slug}{FileExtension.FIELD_YAML.value}'
     logger.debug(f'About to write field {field.slug}')
     write_yaml(path, field.to_dict())