def build_schema_for_cubes(): """ Returns ------- dict each element is str -> strictyaml.Map where key is name of cube, value is a schema used for validation and type-coercion """ schemas = {} for class_of_object in SUPPORTED_CUBES: res = build_schema_from_signature(class_of_object) # "selection" isn't used in __init__, but we will need it later res["selection"] = Seq(Str()) # shortcut for strategy intialization if is_key_in_schema("strategy", res): signature_validation = {} for strategy_class in SUPPORTED_STRATEGIES: local_signature_validation = build_schema_from_signature( strategy_class) signature_validation.update(local_signature_validation) res[Optional("strategy_params")] = Map(signature_validation) # we will deal with "values" later, but we can check at least some simple things already if class_of_object.__name__ == "CubeCreator": element = Map({"name": Str(), "values": Seq(Any())}) res["parameters"] = Seq(element) if class_of_object.__name__ == "RegularizersModifierCube": element = Map({ Optional("name"): Str(), Optional("regularizer"): Any(), Optional("tau_grid"): Seq(Float()) }) res["regularizer_parameters"] = element | Seq(element) res = Map(res) specific_schema = Map({class_of_object.__name__: res}) schemas[class_of_object.__name__] = specific_schema return schemas
def load(path: Path, schema_pointer): """Load and validate .yaml file.""" schema = copy.deepcopy(schema_pointer) with path.open() as f: yaml = f.read() data = yaml_load(yaml, Any()) is_template = path.name == "template.yaml" # Replace real Country and Timezone values with fakes if is_template: schema["woo/woocommerce_default_country"] = Enum(["LL"]) schema["wp/timezone_string"] = Enum(["Region/Country"]) schema["wp/DEFAULT_WPLANG"] = Enum(["ll_LL"]) schema["woo/woocommerce_currency"] = Enum(["LLL"]) if "woo/woocommerce_tax_classes" in data: # Inspect that tax classes and taxes match # create enum for taxes from defined tax_classes tax_classes = [ str(tax).lower().replace(" ", "-") for tax in data["woo/woocommerce_tax_classes"] ] # +1 is for standard schema which is never defined in tax class for x in range(len(tax_classes) + 1): # start counting with 1 schema[f"wootax/{x+1}"] = Map({ "country": Enum(["LL"]) if is_template else Enum(COUNTRIES), "state": Str(), "rate": Decimal(), "name": Str(), "priority": Int(), "compound": Int(), "shipping": Int(), "order": Int(), "class": Enum([""]) if x == 0 else Enum(tax_classes), "locations": Map({}), }) try: return yaml_load(yaml, Map(schema), path) except YAMLError: raise return as_document(schema)
def choose_validator(param): """ Parameters ---------- param : inspect.Parameter Returns ------- instance of strictyaml.Validator """ if param.annotation is int: return Int() if param.annotation is float: return Float() if param.annotation is bool: return Bool() if param.annotation is str: return Str() if param.name in ARTM_TYPES: return ARTM_TYPES[param.name] return Any()
from strictyaml import Any, Enum, Int, Map, MapPattern, Optional, Regex, Seq, Str limits_schema = Map( { "time": Int(), Optional("memory", default=8192): Int(), Optional("output"): Int(), Optional("cores"): Int(), } ) module_schema = Regex(r"\.?\w+(\.\w+)*") plugin_schema = Map( {"module": module_schema, Optional("config"): MapPattern(Str(), Any())} ) task_sources = Enum(["local", "url"]) schema = Map( { "title": Str(), Optional("description"): Str(), "limits": limits_schema, "steps": Map( {"run": Seq(plugin_schema), Optional("analysis"): Seq(plugin_schema)} ), "observers": Seq(plugin_schema), "tasks": MapPattern(Str(), MapPattern(Str(), Any())), "tools": MapPattern( Str(),
#!/usr/bin/python3 import os import sys cur_dir = os.path.dirname(os.path.abspath(__file__)) # subdir = 'strictyaml-1.0.6' subdir = 'strictyaml' sys.path.append(os.path.join(cur_dir, subdir)) # Depende de ruamel: python3-ruamel.yaml # Depende de dateutil: python3-dateutil from strictyaml import (load, Map, Str, Int, Seq, Any, Bool, Optional, MapPattern, YAMLError) from strictyaml.exceptions import InconsistentIndentationDisallowed schema_ver = MapPattern(Str(), Any()) # fname = 'scanner_error.yaml' # fname = 'indent.yaml' fname = 'test.yaml' with open(fname) as f: s = f.read() try: parsed = load(s, schema_ver, label=fname) except InconsistentIndentationDisallowed as e: print('Use the same indentation across the file') print(e) sys.exit(1) except YAMLError as e: print('YAML parsing error:') print(e) sys.exit(1) schema = Map({
from strictyaml import Any, Map, MapPattern, Optional, Str addon = Map({ "spec": Str(), Optional("install"): MapPattern(Str(), Any(), minimum_keys=1), "meta": Map({ "name": Str(), "version": Str() }), Optional("config"): MapPattern(Str(), Any(), minimum_keys=1), }) application = Map({ "spec": Str(), Optional("meta"): Map({ Optional("name"): Str(), Optional("version"): Str() }), Optional("services"): MapPattern(Str(), Map({"type": Str()})), Optional("addons"): MapPattern( Str(), Map({ "manager": Str(), "hash": Str(), Optional("settings"): MapPattern(Str(), Any(), minimum_keys=1), }), minimum_keys=1,
Str(), Map( { HTTP: Map( { URL: Str(), Optional(HEADERS, {}): MapPattern(Str(), Str()) | EmptyDict(), Optional(TIMEOUT): HumanReadableTimePeriod(), } ), PARSER: Map( { Optional(BUILT_IN, None): PyEnum(models.Feed.BuiltInParser), Optional(CUSTOM, None): Str(), Optional(OPTIONS): MapPattern(Str(), Any()) | EmptyDict(), } ), Optional(AUTO_UPDATE, {ENABLED: False, PERIOD: -1}): Map( { Optional(ENABLED, True): Bool(), Optional(PERIOD, -1): HumanReadableTimePeriod(), } ), Optional(REQUIRED_FOR_INSTALL, False): Bool(), } ), ), Optional(SERVICE_MAPS, default_service_map_config): MapPattern( Str(), Map(
Str(), Optional(ModelMetadataKeys.MAJOR_VERSION): Bool(), Optional(ModelMetadataKeys.INFERENCE_MODEL): Map({ Optional("targetName"): Str(), Optional("positiveClassLabel"): Str(), Optional("negativeClassLabel"): Str(), Optional("classLabels"): Seq(Str()), Optional("classLabelsFile"): Str(), Optional("predictionThreshold"): Int(), }), Optional(ModelMetadataKeys.TRAINING_MODEL): Map({Optional("trainOnProject"): Str()}), Optional(ModelMetadataKeys.HYPERPARAMETERS): Any(), Optional(ModelMetadataKeys.VALIDATION_SCHEMA): get_type_schema_yaml_validator(), Optional(ModelMetadataKeys.CUSTOM_PREDICTOR): Any(), }) def validate_config_fields(model_config, *fields): missing_sections = [] for f in fields: if f not in model_config: missing_sections.append(f) if missing_sections: raise DrumCommonException(
), Optional(ModelMetadataKeys.MODEL_ID): Str(), Optional(ModelMetadataKeys.DESCRIPTION): Str(), Optional(ModelMetadataKeys.MAJOR_VERSION): Bool(), Optional(ModelMetadataKeys.INFERENCE_MODEL): Map( { Optional("targetName"): Str(), Optional("positiveClassLabel"): Str(), Optional("negativeClassLabel"): Str(), Optional("classLabels"): Seq(Str()), Optional("classLabelsFile"): Str(), Optional("predictionThreshold"): Int(), } ), Optional(ModelMetadataKeys.TRAINING_MODEL): Map({Optional("trainOnProject"): Str()}), Optional(ModelMetadataKeys.HYPERPARAMETERS): Any(), Optional(ModelMetadataKeys.VALIDATION_SCHEMA): Any(), Optional(ModelMetadataKeys.CUSTOM_PREDICTOR): Any(), } ) def validate_config_fields(model_config, *fields): missing_sections = [] for f in fields: if f not in model_config: missing_sections.append(f) if missing_sections: raise DrumCommonException( "The following keys are missing in {} file.\n"
def read_and_validate_config(strictyaml_config: str) -> YAML: config_schema = Map({"pipelines": Any()}) return load(strictyaml_config, config_schema)
class Structure: __scheme = Map( { "app": Str(), "layers": Seq(Map({"name": Str(), Optional("children"): Seq(Any())})), } ) def __init__(self): self.__file = None self.__global_layer = None @property def layers(self): return self.__global_layer def structure_from_file(self, absolute_file_path: str): self.__file_exist(absolute_file_path) try: with open(absolute_file_path, "r") as file: raw_yaml = file.read() output_yaml = load(raw_yaml, self.__scheme) self.__validate_layer_children(output_yaml.data["layers"], []) except YAMLValidationError: # TODO log error, handle more error types raise BadYAMLError except AssertionError: raise BadYAMLError global_dict = dict() global_dict["name"] = "global" global_dict["children"] = output_yaml.data["layers"] self.__file = absolute_file_path self.__global_layer = DictImporter().import_(global_dict) return self def show_structure(self) -> str: if self.__global_layer: return str(RenderTree(self.__global_layer)) return "Structure yet to be defined." @classmethod def __file_exist(cls, file_path: str): if not isinstance(file_path, str) or not file_path.endswith((".yml", ".yaml")): # TODO: add exception reason raise BadYAMLError() if not path.isfile(file_path): raise BadYAMLError() @classmethod def __validate_layer_children(cls, chunk, name_memo): for layer in chunk: assert "name" in layer assert isinstance(layer["name"], str) assert layer["name"] not in name_memo name_memo.append(layer["name"]) def _seq_but_not_str(obj): return isinstance(obj, Sequence) and not isinstance( obj, (str, bytes, bytearray) ) if "children" in layer: assert _seq_but_not_str(layer["children"]) cls.__validate_layer_children(layer["children"], name_memo) def flatten_layers(self) -> list: """ method to flatten the structure as if searched with BFS :return: list of flatten nodes by layers """ if self.layers: from anytree import LevelOrderIter return [node for node in LevelOrderIter(self.layers)] raise Exception("No structure defined")
{ "app_name": Str( doc={ "text": "The name of the application, which is used in various ways to uniquely identify the resources belonging to it", "label": "top", }), "resources": Seq( Any( doc={ "any_options": [ "function", "http_api", "publisher", "object_store", "keyvalue_store", "stream_analytics", ] }), doc={ "text": "The list of resources that comprise the application", "label": "top", "title": "resource_types", }, ), }, doc={ "text":
def _get_route_mapping(cls) -> dict: return {"range": Float() | Str(), STEPS_TAG: Seq(Any())}
from typing import Sequence import strictyaml from strictyaml import Any, Enum, Map, MapPattern, Seq, Str from labby.hw.core import Device SCHEMA = Map({ "devices": Seq( Map({ "name": Str(), "type": Enum("power_supply"), "driver": Str(), "args": MapPattern(Str(), Any()), })), }) class Config: config: strictyaml.YAML devices: Sequence[Device] def __init__(self, yaml_contents: str) -> None: self.config = strictyaml.load(yaml_contents, SCHEMA) self.devices = [ Device.create(device["name"], device["driver"].data, device["args"].data) for device in self.config["devices"] ]
from typing import Sequence import strictyaml from strictyaml import Any, Map, MapPattern, Optional, Seq, Str from labby.experiment import BaseInputParameters, BaseOutputData, Experiment SCHEMA = Map({ "sequence": Seq( Map({ "experiment_type": Str(), Optional("params"): MapPattern(Str(), Any()), }), ), }) class ExperimentSequence: filename: str sequence_config: strictyaml.YAML experiments: Sequence[Experiment[BaseInputParameters, BaseOutputData]] def __init__(self, filename: str, yaml_contents: str) -> None: self.filename = filename self.sequence_config = strictyaml.load(yaml_contents, SCHEMA) self.experiments = [ Experiment.create( experiment["experiment_type"], f"{index:03d}", experiment["params"].data if "params" in experiment else None, ) for index, experiment in enumerate(
), Optional(ModelMetadataKeys.MODEL_ID): Str(), Optional(ModelMetadataKeys.DESCRIPTION): Str(), Optional(ModelMetadataKeys.MAJOR_VERSION): Bool(), Optional(ModelMetadataKeys.INFERENCE_MODEL): Map( { "targetName": Str(), Optional("positiveClassLabel"): Str(), Optional("negativeClassLabel"): Str(), Optional("classLabels"): Seq(Str()), Optional("classLabelsFile"): Str(), Optional("predictionThreshold"): Int(), } ), Optional(ModelMetadataKeys.TRAINING_MODEL): Map({Optional("trainOnProject"): Str()}), Optional(ModelMetadataKeys.HYPERPARAMETERS): Any(), Optional(ModelMetadataKeys.CUSTOM_PREDICTOR): Any(), } ) def validate_config_fields(model_config, *fields): missing_sections = [] for f in fields: if f not in model_config: missing_sections.append(f) if missing_sections: raise DrumCommonException( "The following keys are missing in {} file.\n" "Missing keys: {}".format(MODEL_CONFIG_FILENAME, missing_sections)
ARTM_TYPES = { "tau": Float(), "topic_names": Str() | Seq(Str()) | EmptyNone(), # TODO: handle class_ids in model and in regularizers separately "class_ids": Str() | Seq(Str()) | EmptyNone(), "gamma": Float() | EmptyNone(), "seed": Int(), "num_document_passes": Int(), "num_processors": Int(), "cache_theta": Bool(), "reuse_theta": Bool(), "theta_name": Str() } element = Any() base_schema = Map({ 'regularizers': Seq(element), 'stages': Seq(element), 'model': Map({ "dataset_path": Str(), "modalities_to_use": Seq(Str()), "main_modality": Str() }), 'topics': Map({ "background_topics": Seq(Str()), "specific_topics": Seq(Str()), }) }) SUPPORTED_CUBES = [CubeCreator, RegularizersModifierCube] SUPPORTED_STRATEGIES = [PerplexityStrategy, GreedyStrategy]
ARTM_TYPES = { "tau": Float(), "topic_names": Str() | Seq(Str()) | EmptyNone(), # TODO: handle class_ids in model and in regularizers separately "class_ids": Str() | Seq(Str()) | EmptyNone(), "gamma": Float() | EmptyNone(), "seed": Int(), "num_document_passes": Int(), "num_processors": Int(), "cache_theta": Bool(), "reuse_theta": Bool(), "theta_name": Str() } element = Any() base_schema = Map({ 'regularizers': Seq(element), Optional('scores'): Seq(element), 'stages': Seq(element), 'model': Map({ "dataset_path": Str(), Optional("modalities_to_use"): Seq(Str()), Optional("modalities_weights"): Any(), "main_modality": Str(), }), 'topics': Map({ "background_topics": Seq(Str()) | Int() | EmptyList(), "specific_topics": Seq(Str()) | Int() | EmptyList(), }) })
}) is False) assert (config.contains_codecommit_with_event({ "sources": [ { "from": "SomethingElse", "event_for_source_changes": True }, { "from": "CodeCommit", "event_for_source_changes": False }, ] }) is False) @patch("pipegen.config.generate_schema", return_value=Any()) def test_load_config(patched_generate_schema): """Tests load_config()""" check_config = """ key: value hello: stuff """ assert config.load_config(check_config, {}) == { "key": "value", "hello": "stuff" } patched_generate_schema.assert_called_once() check_config = """ key: value hello: {{ vars.my_var }}
# for now just hardcode most common / important types ARTM_TYPES = { "tau": Float(), "topic_names": Str() | Seq(Str()) | EmptyNone(), # TODO: handle class_ids in model and in regularizers separately "class_ids": Str() | Seq(Str()) | EmptyNone(), "gamma": Float() | EmptyNone(), "seed": Int(), "num_document_passes": Int(), "num_processors": Int(), "cache_theta": Bool(), "reuse_theta": Bool(), "theta_name": Str() } _ELEMENT = Any() # TODO: maybe better _DICTIONARY_FILTER_SCHEMA = build_schema_from_function(artm.Dictionary.filter) # TODO: modalities, filter params - these all are dataset's options, not model's # maybe make separate YML block for dataset? BASE_SCHEMA = Map({ 'regularizers': Seq(_ELEMENT), Optional('scores'): Seq(_ELEMENT), 'stages': Seq(_ELEMENT), 'model': Map({ "dataset_path":
from strictyaml import Map, MapPattern, Optional from strictyaml import Str, Int, Seq, Enum, Any, as_document JSONSCHEMA_TYPE_SNIPPET = { "type": Enum(["object", "integer", "string", "array"]), Optional("required"): Seq(Str()), Optional("properties"): MapPattern(Str(), Any()), Optional("items"): Any(), } JSONSCHEMA_SCHEMA = Map(JSONSCHEMA_TYPE_SNIPPET) def get_schema(snippet): if snippet['type'] == "integer": return Int() elif snippet['type'] == "string": return Str() elif snippet['type'] == "array": return Seq(get_schema(snippet["items"])) elif snippet['type'] == "object": map_schema = {} for key, subschema in snippet['properties'].items(): if key in snippet.get('required', []): map_schema[Optional(key)] = get_schema(subschema) else: map_schema[key] = get_schema(subschema) return Map(map_schema) def load_schema(json_schema):
"attribute": attribute_selector, "text is": text_is_selector, "text contains": text_contains_selector, "xpath": xpath_selector, } ELEMENTS_SCHEMA = MapPattern( Str(), Str() | Map( { Optional("in iframe"): Str(), Optional("which"): Enum(["last"]) | Int(), Optional("but parent"): Int(), Optional("subelements"): Any(), # SELECTORS Optional("id"): Str(), Optional("class"): Str(), Optional("attribute"): Str(), Optional("text is"): Str(), Optional("text contains"): Str(), Optional("xpath"): Str(), } ), ) def revalidate_subelements(elements): for name, options in elements.items(): if "subelements" in options: