Exemple #1
0
def build_schema_for_cubes():
    """
    Returns
    -------
    dict
        each element is str -> strictyaml.Map
        where key is name of cube,
        value is a schema used for validation and type-coercion
    """
    schemas = {}
    for class_of_object in SUPPORTED_CUBES:
        res = build_schema_from_signature(class_of_object)

        # "selection" isn't used in __init__, but we will need it later
        res["selection"] = Seq(Str())

        # shortcut for strategy intialization
        if is_key_in_schema("strategy", res):
            signature_validation = {}
            for strategy_class in SUPPORTED_STRATEGIES:
                local_signature_validation = build_schema_from_signature(
                    strategy_class)
                signature_validation.update(local_signature_validation)
            res[Optional("strategy_params")] = Map(signature_validation)

        # we will deal with "values" later, but we can check at least some simple things already
        if class_of_object.__name__ == "CubeCreator":
            element = Map({"name": Str(), "values": Seq(Any())})
            res["parameters"] = Seq(element)
        if class_of_object.__name__ == "RegularizersModifierCube":
            element = Map({
                Optional("name"): Str(),
                Optional("regularizer"): Any(),
                Optional("tau_grid"): Seq(Float())
            })
            res["regularizer_parameters"] = element | Seq(element)

        res = Map(res)

        specific_schema = Map({class_of_object.__name__: res})
        schemas[class_of_object.__name__] = specific_schema
    return schemas
Exemple #2
0
    def load(path: Path, schema_pointer):
        """Load and validate .yaml file."""
        schema = copy.deepcopy(schema_pointer)
        with path.open() as f:
            yaml = f.read()
            data = yaml_load(yaml, Any())
            is_template = path.name == "template.yaml"

            # Replace real Country and Timezone values with fakes
            if is_template:
                schema["woo/woocommerce_default_country"] = Enum(["LL"])
                schema["wp/timezone_string"] = Enum(["Region/Country"])
                schema["wp/DEFAULT_WPLANG"] = Enum(["ll_LL"])
                schema["woo/woocommerce_currency"] = Enum(["LLL"])

            if "woo/woocommerce_tax_classes" in data:
                # Inspect that tax classes and taxes match

                # create enum for taxes from defined tax_classes
                tax_classes = [
                    str(tax).lower().replace(" ", "-")
                    for tax in data["woo/woocommerce_tax_classes"]
                ]
                # +1 is for standard schema which is never defined in tax class
                for x in range(len(tax_classes) + 1):
                    # start counting with 1
                    schema[f"wootax/{x+1}"] = Map({
                        "country":
                        Enum(["LL"]) if is_template else Enum(COUNTRIES),
                        "state":
                        Str(),
                        "rate":
                        Decimal(),
                        "name":
                        Str(),
                        "priority":
                        Int(),
                        "compound":
                        Int(),
                        "shipping":
                        Int(),
                        "order":
                        Int(),
                        "class":
                        Enum([""]) if x == 0 else Enum(tax_classes),
                        "locations":
                        Map({}),
                    })
            try:
                return yaml_load(yaml, Map(schema), path)
            except YAMLError:
                raise

        return as_document(schema)
Exemple #3
0
def choose_validator(param):
    """
    Parameters
    ----------
    param : inspect.Parameter

    Returns
    -------
    instance of strictyaml.Validator
    """
    if param.annotation is int:
        return Int()
    if param.annotation is float:
        return Float()
    if param.annotation is bool:
        return Bool()
    if param.annotation is str:
        return Str()
    if param.name in ARTM_TYPES:
        return ARTM_TYPES[param.name]
    return Any()
Exemple #4
0
from strictyaml import Any, Enum, Int, Map, MapPattern, Optional, Regex, Seq, Str

limits_schema = Map(
    {
        "time": Int(),
        Optional("memory", default=8192): Int(),
        Optional("output"): Int(),
        Optional("cores"): Int(),
    }
)

module_schema = Regex(r"\.?\w+(\.\w+)*")

plugin_schema = Map(
    {"module": module_schema, Optional("config"): MapPattern(Str(), Any())}
)

task_sources = Enum(["local", "url"])

schema = Map(
    {
        "title": Str(),
        Optional("description"): Str(),
        "limits": limits_schema,
        "steps": Map(
            {"run": Seq(plugin_schema), Optional("analysis"): Seq(plugin_schema)}
        ),
        "observers": Seq(plugin_schema),
        "tasks": MapPattern(Str(), MapPattern(Str(), Any())),
        "tools": MapPattern(
            Str(),
Exemple #5
0
#!/usr/bin/python3
import os
import sys
cur_dir = os.path.dirname(os.path.abspath(__file__))
# subdir = 'strictyaml-1.0.6'
subdir = 'strictyaml'
sys.path.append(os.path.join(cur_dir, subdir))
# Depende de ruamel: python3-ruamel.yaml
# Depende de dateutil: python3-dateutil
from strictyaml import (load, Map, Str, Int, Seq, Any, Bool, Optional,
                        MapPattern, YAMLError)
from strictyaml.exceptions import InconsistentIndentationDisallowed

schema_ver = MapPattern(Str(), Any())
# fname = 'scanner_error.yaml'
# fname = 'indent.yaml'
fname = 'test.yaml'
with open(fname) as f:
    s = f.read()
try:
    parsed = load(s, schema_ver, label=fname)
except InconsistentIndentationDisallowed as e:
    print('Use the same indentation across the file')
    print(e)
    sys.exit(1)
except YAMLError as e:
    print('YAML parsing error:')
    print(e)
    sys.exit(1)

schema = Map({
Exemple #6
0
from strictyaml import Any, Map, MapPattern, Optional, Str

addon = Map({
    "spec": Str(),
    Optional("install"): MapPattern(Str(), Any(), minimum_keys=1),
    "meta": Map({
        "name": Str(),
        "version": Str()
    }),
    Optional("config"): MapPattern(Str(), Any(), minimum_keys=1),
})

application = Map({
    "spec":
    Str(),
    Optional("meta"):
    Map({
        Optional("name"): Str(),
        Optional("version"): Str()
    }),
    Optional("services"):
    MapPattern(Str(), Map({"type": Str()})),
    Optional("addons"):
    MapPattern(
        Str(),
        Map({
            "manager": Str(),
            "hash": Str(),
            Optional("settings"): MapPattern(Str(), Any(), minimum_keys=1),
        }),
        minimum_keys=1,
     Str(),
     Map(
         {
             HTTP: Map(
                 {
                     URL: Str(),
                     Optional(HEADERS, {}): MapPattern(Str(), Str())
                     | EmptyDict(),
                     Optional(TIMEOUT): HumanReadableTimePeriod(),
                 }
             ),
             PARSER: Map(
                 {
                     Optional(BUILT_IN, None): PyEnum(models.Feed.BuiltInParser),
                     Optional(CUSTOM, None): Str(),
                     Optional(OPTIONS): MapPattern(Str(), Any()) | EmptyDict(),
                 }
             ),
             Optional(AUTO_UPDATE, {ENABLED: False, PERIOD: -1}): Map(
                 {
                     Optional(ENABLED, True): Bool(),
                     Optional(PERIOD, -1): HumanReadableTimePeriod(),
                 }
             ),
             Optional(REQUIRED_FOR_INSTALL, False): Bool(),
         }
     ),
 ),
 Optional(SERVICE_MAPS, default_service_map_config): MapPattern(
     Str(),
     Map(
Exemple #8
0
    Str(),
    Optional(ModelMetadataKeys.MAJOR_VERSION):
    Bool(),
    Optional(ModelMetadataKeys.INFERENCE_MODEL):
    Map({
        Optional("targetName"): Str(),
        Optional("positiveClassLabel"): Str(),
        Optional("negativeClassLabel"): Str(),
        Optional("classLabels"): Seq(Str()),
        Optional("classLabelsFile"): Str(),
        Optional("predictionThreshold"): Int(),
    }),
    Optional(ModelMetadataKeys.TRAINING_MODEL):
    Map({Optional("trainOnProject"): Str()}),
    Optional(ModelMetadataKeys.HYPERPARAMETERS):
    Any(),
    Optional(ModelMetadataKeys.VALIDATION_SCHEMA):
    get_type_schema_yaml_validator(),
    Optional(ModelMetadataKeys.CUSTOM_PREDICTOR):
    Any(),
})


def validate_config_fields(model_config, *fields):
    missing_sections = []
    for f in fields:
        if f not in model_config:
            missing_sections.append(f)

    if missing_sections:
        raise DrumCommonException(
        ),
        Optional(ModelMetadataKeys.MODEL_ID): Str(),
        Optional(ModelMetadataKeys.DESCRIPTION): Str(),
        Optional(ModelMetadataKeys.MAJOR_VERSION): Bool(),
        Optional(ModelMetadataKeys.INFERENCE_MODEL): Map(
            {
                Optional("targetName"): Str(),
                Optional("positiveClassLabel"): Str(),
                Optional("negativeClassLabel"): Str(),
                Optional("classLabels"): Seq(Str()),
                Optional("classLabelsFile"): Str(),
                Optional("predictionThreshold"): Int(),
            }
        ),
        Optional(ModelMetadataKeys.TRAINING_MODEL): Map({Optional("trainOnProject"): Str()}),
        Optional(ModelMetadataKeys.HYPERPARAMETERS): Any(),
        Optional(ModelMetadataKeys.VALIDATION_SCHEMA): Any(),
        Optional(ModelMetadataKeys.CUSTOM_PREDICTOR): Any(),
    }
)


def validate_config_fields(model_config, *fields):
    missing_sections = []
    for f in fields:
        if f not in model_config:
            missing_sections.append(f)

    if missing_sections:
        raise DrumCommonException(
            "The following keys are missing in {} file.\n"
Exemple #10
0
def read_and_validate_config(strictyaml_config: str) -> YAML:
    config_schema = Map({"pipelines": Any()})
    return load(strictyaml_config, config_schema)
Exemple #11
0
class Structure:
    __scheme = Map(
        {
            "app": Str(),
            "layers": Seq(Map({"name": Str(), Optional("children"): Seq(Any())})),
        }
    )

    def __init__(self):
        self.__file = None
        self.__global_layer = None

    @property
    def layers(self):
        return self.__global_layer

    def structure_from_file(self, absolute_file_path: str):
        self.__file_exist(absolute_file_path)
        try:
            with open(absolute_file_path, "r") as file:
                raw_yaml = file.read()
            output_yaml = load(raw_yaml, self.__scheme)
            self.__validate_layer_children(output_yaml.data["layers"], [])
        except YAMLValidationError:
            # TODO log error, handle more error types
            raise BadYAMLError
        except AssertionError:
            raise BadYAMLError

        global_dict = dict()
        global_dict["name"] = "global"
        global_dict["children"] = output_yaml.data["layers"]

        self.__file = absolute_file_path
        self.__global_layer = DictImporter().import_(global_dict)

        return self

    def show_structure(self) -> str:
        if self.__global_layer:
            return str(RenderTree(self.__global_layer))
        return "Structure yet to be defined."

    @classmethod
    def __file_exist(cls, file_path: str):
        if not isinstance(file_path, str) or not file_path.endswith((".yml", ".yaml")):
            # TODO: add exception reason
            raise BadYAMLError()
        if not path.isfile(file_path):
            raise BadYAMLError()

    @classmethod
    def __validate_layer_children(cls, chunk, name_memo):
        for layer in chunk:
            assert "name" in layer
            assert isinstance(layer["name"], str)
            assert layer["name"] not in name_memo
            name_memo.append(layer["name"])

            def _seq_but_not_str(obj):
                return isinstance(obj, Sequence) and not isinstance(
                    obj, (str, bytes, bytearray)
                )

            if "children" in layer:
                assert _seq_but_not_str(layer["children"])
                cls.__validate_layer_children(layer["children"], name_memo)

    def flatten_layers(self) -> list:
        """
        method to flatten the structure as if searched with BFS

        :return: list of flatten nodes by layers
        """
        if self.layers:
            from anytree import LevelOrderIter

            return [node for node in LevelOrderIter(self.layers)]
        raise Exception("No structure defined")
Exemple #12
0
 {
     "app_name":
     Str(
         doc={
             "text":
             "The name of the application, which is used in various ways to uniquely identify the resources belonging to it",
             "label": "top",
         }),
     "resources":
     Seq(
         Any(
             doc={
                 "any_options": [
                     "function",
                     "http_api",
                     "publisher",
                     "object_store",
                     "keyvalue_store",
                     "stream_analytics",
                 ]
             }),
         doc={
             "text":
             "The list of resources that comprise the application",
             "label": "top",
             "title": "resource_types",
         },
     ),
 },
 doc={
     "text":
Exemple #13
0
 def _get_route_mapping(cls) -> dict:
     return {"range": Float() | Str(), STEPS_TAG: Seq(Any())}
Exemple #14
0
from typing import Sequence

import strictyaml
from strictyaml import Any, Enum, Map, MapPattern, Seq, Str

from labby.hw.core import Device

SCHEMA = Map({
    "devices":
    Seq(
        Map({
            "name": Str(),
            "type": Enum("power_supply"),
            "driver": Str(),
            "args": MapPattern(Str(), Any()),
        })),
})


class Config:
    config: strictyaml.YAML
    devices: Sequence[Device]

    def __init__(self, yaml_contents: str) -> None:
        self.config = strictyaml.load(yaml_contents, SCHEMA)
        self.devices = [
            Device.create(device["name"], device["driver"].data,
                          device["args"].data)
            for device in self.config["devices"]
        ]
Exemple #15
0
from typing import Sequence

import strictyaml
from strictyaml import Any, Map, MapPattern, Optional, Seq, Str

from labby.experiment import BaseInputParameters, BaseOutputData, Experiment

SCHEMA = Map({
    "sequence":
    Seq(
        Map({
            "experiment_type": Str(),
            Optional("params"): MapPattern(Str(), Any()),
        }), ),
})


class ExperimentSequence:
    filename: str
    sequence_config: strictyaml.YAML
    experiments: Sequence[Experiment[BaseInputParameters, BaseOutputData]]

    def __init__(self, filename: str, yaml_contents: str) -> None:
        self.filename = filename
        self.sequence_config = strictyaml.load(yaml_contents, SCHEMA)
        self.experiments = [
            Experiment.create(
                experiment["experiment_type"],
                f"{index:03d}",
                experiment["params"].data if "params" in experiment else None,
            ) for index, experiment in enumerate(
Exemple #16
0
        ),
        Optional(ModelMetadataKeys.MODEL_ID): Str(),
        Optional(ModelMetadataKeys.DESCRIPTION): Str(),
        Optional(ModelMetadataKeys.MAJOR_VERSION): Bool(),
        Optional(ModelMetadataKeys.INFERENCE_MODEL): Map(
            {
                "targetName": Str(),
                Optional("positiveClassLabel"): Str(),
                Optional("negativeClassLabel"): Str(),
                Optional("classLabels"): Seq(Str()),
                Optional("classLabelsFile"): Str(),
                Optional("predictionThreshold"): Int(),
            }
        ),
        Optional(ModelMetadataKeys.TRAINING_MODEL): Map({Optional("trainOnProject"): Str()}),
        Optional(ModelMetadataKeys.HYPERPARAMETERS): Any(),
        Optional(ModelMetadataKeys.CUSTOM_PREDICTOR): Any(),
    }
)


def validate_config_fields(model_config, *fields):
    missing_sections = []
    for f in fields:
        if f not in model_config:
            missing_sections.append(f)

    if missing_sections:
        raise DrumCommonException(
            "The following keys are missing in {} file.\n"
            "Missing keys: {}".format(MODEL_CONFIG_FILENAME, missing_sections)
Exemple #17
0
ARTM_TYPES = {
    "tau": Float(),
    "topic_names": Str() | Seq(Str()) | EmptyNone(),
    # TODO: handle class_ids in model and in regularizers separately
    "class_ids": Str() | Seq(Str()) | EmptyNone(),
    "gamma": Float() | EmptyNone(),
    "seed": Int(),
    "num_document_passes": Int(),
    "num_processors": Int(),
    "cache_theta": Bool(),
    "reuse_theta": Bool(),
    "theta_name": Str()
}


element = Any()
base_schema = Map({
    'regularizers': Seq(element),
    'stages': Seq(element),
    'model': Map({
        "dataset_path": Str(),
        "modalities_to_use": Seq(Str()),
        "main_modality": Str()
    }),
    'topics': Map({
        "background_topics": Seq(Str()),
        "specific_topics": Seq(Str()),
    })
})
SUPPORTED_CUBES = [CubeCreator, RegularizersModifierCube]
SUPPORTED_STRATEGIES = [PerplexityStrategy, GreedyStrategy]
ARTM_TYPES = {
    "tau": Float(),
    "topic_names": Str() | Seq(Str()) | EmptyNone(),
    # TODO: handle class_ids in model and in regularizers separately
    "class_ids": Str() | Seq(Str()) | EmptyNone(),
    "gamma": Float() | EmptyNone(),
    "seed": Int(),
    "num_document_passes": Int(),
    "num_processors": Int(),
    "cache_theta": Bool(),
    "reuse_theta": Bool(),
    "theta_name": Str()
}


element = Any()
base_schema = Map({
    'regularizers': Seq(element),
    Optional('scores'): Seq(element),
    'stages': Seq(element),
    'model': Map({
        "dataset_path": Str(),
        Optional("modalities_to_use"): Seq(Str()),
        Optional("modalities_weights"): Any(),
        "main_modality": Str(),
    }),
    'topics': Map({
        "background_topics": Seq(Str()) | Int() | EmptyList(),
        "specific_topics": Seq(Str()) | Int() | EmptyList(),
    })
})
Exemple #19
0
    }) is False)
    assert (config.contains_codecommit_with_event({
        "sources": [
            {
                "from": "SomethingElse",
                "event_for_source_changes": True
            },
            {
                "from": "CodeCommit",
                "event_for_source_changes": False
            },
        ]
    }) is False)


@patch("pipegen.config.generate_schema", return_value=Any())
def test_load_config(patched_generate_schema):
    """Tests load_config()"""
    check_config = """
    key: value
    hello: stuff
    """
    assert config.load_config(check_config, {}) == {
        "key": "value",
        "hello": "stuff"
    }
    patched_generate_schema.assert_called_once()

    check_config = """
    key: value
    hello: {{ vars.my_var }}
Exemple #20
0
#  for now just hardcode most common / important types
ARTM_TYPES = {
    "tau": Float(),
    "topic_names": Str() | Seq(Str()) | EmptyNone(),
    # TODO: handle class_ids in model and in regularizers separately
    "class_ids": Str() | Seq(Str()) | EmptyNone(),
    "gamma": Float() | EmptyNone(),
    "seed": Int(),
    "num_document_passes": Int(),
    "num_processors": Int(),
    "cache_theta": Bool(),
    "reuse_theta": Bool(),
    "theta_name": Str()
}

_ELEMENT = Any()

# TODO: maybe better _DICTIONARY_FILTER_SCHEMA = build_schema_from_function(artm.Dictionary.filter)
# TODO: modalities, filter params - these all are dataset's options, not model's
#  maybe make separate YML block for dataset?

BASE_SCHEMA = Map({
    'regularizers':
    Seq(_ELEMENT),
    Optional('scores'):
    Seq(_ELEMENT),
    'stages':
    Seq(_ELEMENT),
    'model':
    Map({
        "dataset_path":
from strictyaml import Map, MapPattern, Optional
from strictyaml import Str, Int, Seq, Enum, Any, as_document

JSONSCHEMA_TYPE_SNIPPET = {
    "type": Enum(["object", "integer", "string", "array"]),
    Optional("required"): Seq(Str()),
    Optional("properties"): MapPattern(Str(), Any()),
    Optional("items"): Any(),
}

JSONSCHEMA_SCHEMA = Map(JSONSCHEMA_TYPE_SNIPPET)


def get_schema(snippet):
    if snippet['type'] == "integer":
        return Int()
    elif snippet['type'] == "string":
        return Str()
    elif snippet['type'] == "array":
        return Seq(get_schema(snippet["items"]))
    elif snippet['type'] == "object":
        map_schema = {}
        for key, subschema in snippet['properties'].items():
            if key in snippet.get('required', []):
                map_schema[Optional(key)] = get_schema(subschema)
            else:
                map_schema[key] = get_schema(subschema)
        return Map(map_schema)


def load_schema(json_schema):
Exemple #22
0
    "attribute": attribute_selector,
    "text is": text_is_selector,
    "text contains": text_contains_selector,
    "xpath": xpath_selector,
}


ELEMENTS_SCHEMA = MapPattern(
    Str(),
    Str()
    | Map(
        {
            Optional("in iframe"): Str(),
            Optional("which"): Enum(["last"]) | Int(),
            Optional("but parent"): Int(),
            Optional("subelements"): Any(),
            # SELECTORS
            Optional("id"): Str(),
            Optional("class"): Str(),
            Optional("attribute"): Str(),
            Optional("text is"): Str(),
            Optional("text contains"): Str(),
            Optional("xpath"): Str(),
        }
    ),
)


def revalidate_subelements(elements):
    for name, options in elements.items():
        if "subelements" in options: