Any(None, object), Required('credentials'): object, }) postgres_credentials_contract = Schema({ Required('dbname'): basestring, Required('host'): basestring, Required('user'): basestring, Required('pass'): basestring, Required('port'): Any(All(int, Range(min=0, max=65535)), basestring), Required('schema'): basestring, }) snowflake_credentials_contract = Schema({ Required('account'): basestring, Required('user'): basestring, Required('password'): basestring, Required('database'): basestring, Required('schema'): basestring, Required('warehouse'): basestring, Optional('role'): basestring, }) bigquery_auth_methods = ['oauth', 'service-account', 'service-account-json']
return x raise Invalid(f"invalid mode {x}") def lb_dev_state(x): if x in ["normal"]: return x raise Invalid(f"Invalid dev_state {x}") TZ_SCHEMA = Schema({ "zone_str": str, "dst_offset": int, "index": All(int, Range(min=0)), "tz_str": str }) CURRENT_CONSUMPTION_SCHEMA = Schema( Any( { "voltage": Any(All(float, Range(min=0, max=300)), None), "power": Any(Coerce(float, Range(min=0)), None), "total": Any(Coerce(float, Range(min=0)), None), "current": Any(All(float, Range(min=0)), None), "voltage_mv": Any(All(float, Range(min=0, max=300000)), int, None), # TODO can this be int? "power_mw": Any(Coerce(float, Range(min=0)), None), "total_wh": Any(Coerce(float, Range(min=0)), None), "current_ma": Any(All(float, Range(min=0)), int,
_location_reminder_schema = { # There must be a WHERE object Required('where'): { # Must be a string w/ at least one character Required('name'): Any(str, Length(min=1)), # Must be a lat/long pair # Note, there's a lot of other info that can be properly encoded in a lat/lon pair, but # the todoist API does not use any of that... so all we need to do is make sure that the # user has provided two integers and that they are within the correct ranges: # # The latitude must be a number between -90 and 90 # The longitude must be a number between -180 and 180. ## Required('latitude'): Any(float, Range(min=-90, max=90)), Required('longitude'): Any(float, Range(min=-180, max=180)), # We have a central point, but now we need to define a radius to define our bounding box # The radius is any integer that is > 0. We Default to 10 meter circle Optional('radius', default=10, msg="radius must be a positive whole number of meters"): Any(int, Range(min=0)), # Now that we have our bounding box defined, we now set the trigger. Ingress or egress? Optional('trigger', default=_reminder_triggers[0]): In(_reminder_triggers, msg="Location based reminder can be trigger on ONE OF {}".format( _reminder_triggers)) } }
def _tab_types_response(team): tab_types = (TabType.query.filter(TabType.team == team).order_by( TabType.name).all()) return jsonify([{ 'id': tab_type.id, 'name': tab_type.name, 'price': str(tab_type.price), } for tab_type in tab_types]) tab_type_schema = Schema({ Required('name'): All(str, Length(min=1, max=20)), Required('price'): All(Coerce(float), Range(min=0.01, max=999)) }) @api.route('/teams/<team_slug>/tab-types') @jwt_required() @team_view def tab_types(team): return _tab_types_response(team) @api.route('/teams/<team_slug>/tab-types', methods=['POST']) @jwt_required() @team_view @team_admin_required def create_tab_type(team):
OPMODES = { None: HVAC_MODE_OFF, 0: HVAC_MODE_AUTO, 1: HVAC_MODE_AUTO, 2: HVAC_MODE_AUTO, 3: HVAC_MODE_HEAT_COOL, 4: HVAC_MODE_HEAT, 5: HVAC_MODE_COOL } OPMODETOLOXONE = {HVAC_MODE_HEAT_COOL: 3, HVAC_MODE_HEAT: 4, HVAC_MODE_COOL: 5} PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend({ Optional(CONF_HVAC_AUTO_MODE, default=0): All(int, Range(min=0, max=2)), }) # noinspection PyUnusedLocal async def async_setup_platform(hass, config, async_add_devices, discovery_info={}): # value_template = config.get(CONF_VALUE_TEMPLATE) # auto_mode = 0 if config.get(CONF_HVAC_AUTO_MODE) is None else config.get(CONF_HVAC_AUTO_MODE) # # if value_template is not None: # value_template.hass = hass # config = hass.data[DOMAIN] return True
# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. import os from taskgraph.parameters import extend_parameters_schema from voluptuous import All, Any, Range, Required extend_parameters_schema( { Required("pull_request_number"): Any(All(int, Range(min=1)), None), Required("version"): str, } ) def get_decision_parameters(graph_config, parameters): head_tag = parameters["head_tag"] parameters["version"] = head_tag[1:] if head_tag else "" pr_number = os.environ.get("MOZILLAVPN_PULL_REQUEST_NUMBER", None) parameters["pull_request_number"] = None if pr_number is None else int(pr_number)
raise Invalid(msg or ('expected %s' % _type.__name__)) else: return value return validator codedict_schema = Schema({Extra: dict}) provider_schema = Schema( { 'enable': typecheck(bool), 'lock': typecheck(bool), 'name': All(str, Length(min=1)), 'long_name': All(str, Length(min=1)), 'version': All(int, Range(min=1)), 'slug': All(str, Length(min=1)), 'region': All(str, Length(min=1)), 'website': All(str, Length(min=9)), 'metadata': Any(None, dict), Optional('terms_of_use'): Any(None, str), }, required=True) data_tree_datasets_schema = Schema( { 'dataset_code': All(str, Length(min=1)), 'name': All(str, Length(min=1)), 'last_update': Any(None, typecheck(datetime)), 'metadata': Any(None, dict), },
Required('submission_due'): Any(datetime_validator, None), Required('text_response', default='required'): Any(All(utf8_validator, In(NECESSITY_OPTIONS)), None), Required('file_upload_response', default=None): Any(All(utf8_validator, In(NECESSITY_OPTIONS)), None), 'allow_file_upload': bool, # Backwards compatibility. Required('file_upload_type', default=None): Any(All(utf8_validator, In(VALID_UPLOAD_FILE_TYPES)), None), 'white_listed_file_types': utf8_validator, Required('allow_latex'): bool, Required('leaderboard_show'): int, Optional('teams_enabled'): bool, Required('assessments'): [ Schema({ Required('name'): All(utf8_validator, In(VALID_ASSESSMENT_TYPES)), Required('start', default=None): Any(datetime_validator, None), Required('due', default=None): Any(datetime_validator, None), 'required': bool, 'must_grade': All(int, Range(min=0)), 'must_be_graded_by': All(int, Range(min=0)), 'examples': [ Schema({ Required('answer'): [utf8_validator], Required('options_selected'): [ Schema({ Required('criterion'): utf8_validator, Required('option'): utf8_validator }) ] }) ], 'examples_xml': utf8_validator, }) ],
def test_new_required_test(): schema = Schema({ 'my_key': All(int, Range(1, 20)), }, required=True) assert_true(schema.required)
from voluptuous import Schema, Length, All, Required, REMOVE_EXTRA, Range schemas = { '/games/': { 'POST': Schema( { Required('name'): All(str, Length(min=1, max=50)), Required('grid'): All(str, Length(min=100, max=100)) }, extra=REMOVE_EXTRA), 'PATCH': Schema( { Required('x'): All(int, Range(min=0, max=9)), Required('y'): All(int, Range(min=0, max=9)) }, extra=REMOVE_EXTRA) } }
from flask import jsonify, request from sqlalchemy import inspect, select from sqlalchemy.exc import IntegrityError, SQLAlchemyError, DataError from voluptuous import All, Coerce, MultipleInvalid, Range, Schema from werkzeug.exceptions import BadRequest from app.exceptions import InvalidUsageError from app.models import FeedbackResponse, SurveyResponse from app import app, db, logger import settings schema = Schema({ 'added_ms': Coerce(int), 'form': str, 'page': All(Coerce(int), Range(min=1)), 'period': str, 'per_page': All(Coerce(int), Range(min=1, max=100)), 'ru_ref': str, 'survey_id': str, }) def create_tables(): logger.info("Creating tables") db.create_all() if os.getenv("CREATE_TABLES", False): create_tables()
from voluptuous import All, Optional, Range, Schema from .config_logging import LoggingConfig, SCHEMA as LOGGING_SCHEMA SCHEMA = Schema({ Optional("lookback", default=5): All(int, Range(min=0)), Optional("timeout", default=10): All(int, Range(min=0)), Optional("retain", default=7): All(int, Range(min=1)), Optional("folder", default="/recordings"): str, Optional("extension", default="mp4"): str, Optional("hwaccel_args", default=[]): [str], Optional("codec", default="copy"): str, Optional("filter_args", default=[]): [str], Optional("segments_folder", default="/segments"): str, Optional("thumbnail", default={}): { Optional("save_to_disk", default=False): bool, Optional("send_to_mqtt", default=False): bool, }, Optional("logging"): LOGGING_SCHEMA, }) class Thumbnail: def __init__(self, thumbnail): self._save_to_disk = thumbnail["save_to_disk"] self._send_to_mqtt = thumbnail["send_to_mqtt"] @property def save_to_disk(self): return self._save_to_disk
Optional('teams_enabled'): bool, Optional('selected_teamset_id'): utf8_validator, Required('assessments'): [ Schema({ Required('name'): All(utf8_validator, In(VALID_ASSESSMENT_TYPES)), Required('start', default=None): Any(datetime_validator, None), Required('due', default=None): Any(datetime_validator, None), 'required': bool, 'must_grade': All(int, Range(min=0)), 'must_be_graded_by': All(int, Range(min=0)), 'examples': [ Schema({ Required('answer'): [utf8_validator], Required('options_selected'): [ Schema({ Required('criterion'): utf8_validator, Required('option'): utf8_validator }) ] }) ], 'examples_xml': utf8_validator,
@truth def index_route_requirement(task_routes): rel_routes = [r.startswith("index.releases.") for r in task_routes] return len(rel_routes) >= 2 COMMON_TASK_SCHEMA = Schema( All( task_signature_test, { # Must pass task signature test, and the below Schema 'requires': Any([Match(TASKCLUSTER_ID_REGEX)], None), Required('reruns', msg="Required for releasetasks schema."): Range(min=0, max=100), Required('taskId', msg="Required for TaskCluster schema."): Match(TASKCLUSTER_ID_REGEX), Required('task', msg="Required for TaskCluster schema."): All( task_provisionerId_test, Schema( { Required('created', msg="Required for TaskCluster schema."): str, Required('deadline', msg="Required for TaskCluster schema."): str, Required('extra', msg="Required for releasetasks schema."): {
class Config(object): # pylint: disable=too-many-instance-attributes """Class that manages configuration files for a dvc repo. Args: dvc_dir (str): optional path to `.dvc` directory, that is used to access repo-specific configs like .dvc/config and .dvc/config.local. validate (bool): optional flag to tell dvc if it should validate the config or just load it as is. 'True' by default. Raises: ConfigError: thrown when config has an invalid format. """ APPNAME = "dvc" APPAUTHOR = "iterative" # NOTE: used internally in RemoteLOCAL to know config # location, that url should resolved relative to. PRIVATE_CWD = "_cwd" CONFIG = "config" CONFIG_LOCAL = "config.local" CREDENTIALPATH = "credentialpath" LEVEL_LOCAL = 0 LEVEL_REPO = 1 LEVEL_GLOBAL = 2 LEVEL_SYSTEM = 3 SECTION_CORE = "core" SECTION_CORE_LOGLEVEL = "loglevel" SECTION_CORE_LOGLEVEL_SCHEMA = All( Lower, Choices("info", "debug", "warning", "error") ) SECTION_CORE_REMOTE = "remote" SECTION_CORE_INTERACTIVE = "interactive" SECTION_CORE_ANALYTICS = "analytics" SECTION_CORE_CHECKSUM_JOBS = "checksum_jobs" SECTION_CACHE = "cache" SECTION_CACHE_DIR = "dir" SECTION_CACHE_TYPE = "type" SECTION_CACHE_PROTECTED = "protected" SECTION_CACHE_SHARED = "shared" SECTION_CACHE_SHARED_SCHEMA = All(Lower, Choices("group")) SECTION_CACHE_LOCAL = "local" SECTION_CACHE_S3 = "s3" SECTION_CACHE_GS = "gs" SECTION_CACHE_SSH = "ssh" SECTION_CACHE_HDFS = "hdfs" SECTION_CACHE_AZURE = "azure" SECTION_CACHE_SLOW_LINK_WARNING = "slow_link_warning" SECTION_CACHE_SCHEMA = { SECTION_CACHE_LOCAL: str, SECTION_CACHE_S3: str, SECTION_CACHE_GS: str, SECTION_CACHE_HDFS: str, SECTION_CACHE_SSH: str, SECTION_CACHE_AZURE: str, SECTION_CACHE_DIR: str, SECTION_CACHE_TYPE: supported_cache_type, Optional(SECTION_CACHE_PROTECTED, default=False): Bool, SECTION_CACHE_SHARED: SECTION_CACHE_SHARED_SCHEMA, PRIVATE_CWD: str, Optional(SECTION_CACHE_SLOW_LINK_WARNING, default=True): Bool, } SECTION_CORE_SCHEMA = { SECTION_CORE_LOGLEVEL: SECTION_CORE_LOGLEVEL_SCHEMA, SECTION_CORE_REMOTE: Lower, Optional(SECTION_CORE_INTERACTIVE, default=False): Bool, Optional(SECTION_CORE_ANALYTICS, default=True): Bool, SECTION_CORE_CHECKSUM_JOBS: All(Coerce(int), Range(1)), } # aws specific options SECTION_AWS_CREDENTIALPATH = CREDENTIALPATH SECTION_AWS_ENDPOINT_URL = "endpointurl" SECTION_AWS_LIST_OBJECTS = "listobjects" SECTION_AWS_REGION = "region" SECTION_AWS_PROFILE = "profile" SECTION_AWS_USE_SSL = "use_ssl" SECTION_AWS_SSE = "sse" SECTION_AWS_ACL = "acl" # gcp specific options SECTION_GCP_CREDENTIALPATH = CREDENTIALPATH SECTION_GCP_PROJECTNAME = "projectname" # azure specific option SECTION_AZURE_CONNECTION_STRING = "connection_string" # Alibabacloud oss options SECTION_OSS_ACCESS_KEY_ID = "oss_key_id" SECTION_OSS_ACCESS_KEY_SECRET = "oss_key_secret" SECTION_OSS_ENDPOINT = "oss_endpoint" # GDrive options SECTION_GDRIVE_CLIENT_ID = "gdrive_client_id" SECTION_GDRIVE_CLIENT_SECRET = "gdrive_client_secret" SECTION_GDRIVE_USER_CREDENTIALS_FILE = "gdrive_user_credentials_file" SECTION_REMOTE_REGEX = r'^\s*remote\s*"(?P<name>.*)"\s*$' SECTION_REMOTE_FMT = 'remote "{}"' SECTION_REMOTE_URL = "url" SECTION_REMOTE_USER = "******" SECTION_REMOTE_PORT = "port" SECTION_REMOTE_KEY_FILE = "keyfile" SECTION_REMOTE_TIMEOUT = "timeout" SECTION_REMOTE_PASSWORD = "******" SECTION_REMOTE_ASK_PASSWORD = "******" SECTION_REMOTE_GSS_AUTH = "gss_auth" SECTION_REMOTE_NO_TRAVERSE = "no_traverse" SECTION_REMOTE_SCHEMA = { Required(SECTION_REMOTE_URL): str, SECTION_AWS_REGION: str, SECTION_AWS_PROFILE: str, SECTION_AWS_CREDENTIALPATH: str, SECTION_AWS_ENDPOINT_URL: str, Optional(SECTION_AWS_LIST_OBJECTS, default=False): Bool, Optional(SECTION_AWS_USE_SSL, default=True): Bool, SECTION_AWS_SSE: str, SECTION_AWS_ACL: str, SECTION_GCP_PROJECTNAME: str, SECTION_CACHE_TYPE: supported_cache_type, Optional(SECTION_CACHE_PROTECTED, default=False): Bool, SECTION_REMOTE_USER: str, SECTION_REMOTE_PORT: Coerce(int), SECTION_REMOTE_KEY_FILE: str, SECTION_REMOTE_TIMEOUT: Coerce(int), SECTION_REMOTE_PASSWORD: str, SECTION_REMOTE_ASK_PASSWORD: Bool, SECTION_REMOTE_GSS_AUTH: Bool, SECTION_AZURE_CONNECTION_STRING: str, SECTION_OSS_ACCESS_KEY_ID: str, SECTION_OSS_ACCESS_KEY_SECRET: str, SECTION_OSS_ENDPOINT: str, SECTION_GDRIVE_CLIENT_ID: str, SECTION_GDRIVE_CLIENT_SECRET: str, SECTION_GDRIVE_USER_CREDENTIALS_FILE: str, PRIVATE_CWD: str, Optional(SECTION_REMOTE_NO_TRAVERSE, default=True): Bool, } SECTION_STATE = "state" SECTION_STATE_ROW_LIMIT = "row_limit" SECTION_STATE_ROW_CLEANUP_QUOTA = "row_cleanup_quota" SECTION_STATE_SCHEMA = { SECTION_STATE_ROW_LIMIT: All(Coerce(int), Range(1)), SECTION_STATE_ROW_CLEANUP_QUOTA: All(Coerce(int), Range(0, 100)), } SCHEMA = { Optional(SECTION_CORE, default={}): SECTION_CORE_SCHEMA, Match(SECTION_REMOTE_REGEX): SECTION_REMOTE_SCHEMA, Optional(SECTION_CACHE, default={}): SECTION_CACHE_SCHEMA, Optional(SECTION_STATE, default={}): SECTION_STATE_SCHEMA, } COMPILED_SCHEMA = Schema(SCHEMA) def __init__(self, dvc_dir=None, validate=True): self.dvc_dir = dvc_dir self.validate = validate if not dvc_dir: try: from dvc.repo import Repo self.dvc_dir = os.path.join(Repo.find_dvc_dir()) except NotDvcRepoError: self.dvc_dir = None else: self.dvc_dir = os.path.abspath(os.path.realpath(dvc_dir)) self.load() @staticmethod def get_global_config_dir(): """Returns global config location. E.g. ~/.config/dvc/config. Returns: str: path to the global config directory. """ from appdirs import user_config_dir return user_config_dir( appname=Config.APPNAME, appauthor=Config.APPAUTHOR ) @staticmethod def get_system_config_dir(): """Returns system config location. E.g. /etc/dvc.conf. Returns: str: path to the system config directory. """ from appdirs import site_config_dir return site_config_dir( appname=Config.APPNAME, appauthor=Config.APPAUTHOR ) @staticmethod def init(dvc_dir): """Initializes dvc config. Args: dvc_dir (str): path to .dvc directory. Returns: dvc.config.Config: config object. """ config_file = os.path.join(dvc_dir, Config.CONFIG) open(config_file, "w+").close() return Config(dvc_dir) def _resolve_cache_path(self, config): cache = config.get(self.SECTION_CACHE) if cache is None: return cache_dir = cache.get(self.SECTION_CACHE_DIR) if cache_dir is None: return cache[self.PRIVATE_CWD] = os.path.dirname(config.filename) def _resolve_paths(self, config): if config.filename is None: return config ret = copy.deepcopy(config) self._resolve_cache_path(ret) for section in ret.values(): if self.SECTION_REMOTE_URL not in section.keys(): continue section[self.PRIVATE_CWD] = os.path.dirname(ret.filename) return ret def _load_configs(self): system_config_file = os.path.join( self.get_system_config_dir(), self.CONFIG ) global_config_file = os.path.join( self.get_global_config_dir(), self.CONFIG ) self._system_config = configobj.ConfigObj(system_config_file) self._global_config = configobj.ConfigObj(global_config_file) self._repo_config = configobj.ConfigObj() self._local_config = configobj.ConfigObj() if not self.dvc_dir: return config_file = os.path.join(self.dvc_dir, self.CONFIG) config_local_file = os.path.join(self.dvc_dir, self.CONFIG_LOCAL) self._repo_config = configobj.ConfigObj(config_file) self._local_config = configobj.ConfigObj(config_local_file) @property def config_local_file(self): return self._local_config.filename @property def config_file(self): return self._repo_config.filename def load(self): """Loads config from all the config files. Raises: dvc.config.ConfigError: thrown if config has invalid format. """ self._load_configs() self.config = configobj.ConfigObj() for c in [ self._system_config, self._global_config, self._repo_config, self._local_config, ]: c = self._resolve_paths(c) c = self._lower(c) self.config.merge(c) if not self.validate: return d = self.config.dict() try: d = self.COMPILED_SCHEMA(d) except Invalid as exc: raise ConfigError(str(exc), cause=exc) self.config = configobj.ConfigObj(d, write_empty_values=True) def save(self, config=None): """Saves config to config files. Raises: dvc.config.ConfigError: thrown if failed to write config file. """ if config is not None: clist = [config] else: clist = [ self._system_config, self._global_config, self._repo_config, self._local_config, ] for conf in clist: self._save(conf) self.load() @staticmethod def _save(config): if config.filename is None: return logger.debug("Writing '{}'.".format(config.filename)) dname = os.path.dirname(os.path.abspath(config.filename)) try: os.makedirs(dname) except OSError as exc: if exc.errno != errno.EEXIST: raise config.write() def unset(self, section, opt=None, level=None, force=False): """Unsets specified option and/or section in the config. Args: section (str): section name. opt (str): optional option name. level (int): config level to use. force (bool): don't error-out even if section doesn't exist. False by default. Raises: dvc.config.ConfigError: thrown if section doesn't exist and `force != True`. """ config = self.get_configobj(level) if section not in config.keys(): if force: return raise ConfigError("section '{}' doesn't exist".format(section)) if opt: if opt not in config[section].keys(): if force: return raise ConfigError( "option '{}.{}' doesn't exist".format(section, opt) ) del config[section][opt] if not config[section]: del config[section] else: del config[section] self.save(config) def set(self, section, opt, value, level=None, force=True): """Sets specified option in the config. Args: section (str): section name. opt (str): option name. value: value to set option to. level (int): config level to use. force (bool): set option even if section already exists. True by default. Raises: dvc.config.ConfigError: thrown if section already exists and `force != True`. """ config = self.get_configobj(level) if section not in config.keys(): config[section] = {} elif not force: raise ConfigError( "Section '{}' already exists. Use `-f|--force` to overwrite " "section with new value.".format(section) ) config[section][opt] = value self.save(config) def get(self, section, opt=None, level=None): """Return option value from the config. Args: section (str): section name. opt (str): option name. level (int): config level to use. Returns: value (str, int): option value. """ config = self.get_configobj(level) if section not in config.keys(): raise ConfigError("section '{}' doesn't exist".format(section)) if opt not in config[section].keys(): raise ConfigError( "option '{}.{}' doesn't exist".format(section, opt) ) return config[section][opt] @staticmethod def _lower(config): new_config = configobj.ConfigObj() for s_key, s_value in config.items(): new_s = {} for key, value in s_value.items(): new_s[key.lower()] = str(value) new_config[s_key.lower()] = new_s return new_config def get_configobj(self, level): configs = { self.LEVEL_LOCAL: self._local_config, self.LEVEL_REPO: self._repo_config, self.LEVEL_GLOBAL: self._global_config, self.LEVEL_SYSTEM: self._system_config, } return configs.get(level, self._repo_config) def list_options(self, section_regex, option, level=None): ret = {} config = self.get_configobj(level) for section in config.keys(): r = re.match(section_regex, section) if r: name = r.group("name") value = config[section].get(option, "") ret[name] = value return ret
def test_range_inside(): s = Schema(Range(min=0, max=10)) assert_equal(5, s(5))
grader_base_path = "./graders" submission_schema = Schema({ Required("tid"): check( ("This does not look like a valid tid.", [str, Length(max=100)])), Required("pid"): check( ("This does not look like a valid pid.", [str, Length(max=100)])), Required("key"): check( ("This does not look like a valid key.", [str, Length(max=100)])) }) problem_schema = Schema({ Required("name"): check( ("The problem's display name must be a string.", [str])), Required("score"): check( ("Score must be a positive integer.", [int, Range(min=0)])), Required("category"): check( ("Category must be a string.", [str])), Required("grader"): check( ("The grader path must be a string.", [str])), Required("description"): check( ("The problem description must be a string.", [str])), Required("threshold"): check( ("Threshold must be a positive integer.", [int, Range(min=0)])), "disabled": check( ("A problem's disabled state is either True or False.", [ lambda disabled: type(disabled) == bool])), "autogen": check( ("A problem should either be autogenerated or not, True/False", [ lambda autogen: type(autogen) == bool])),
def test_range_outside(): s = Schema(Range(min=0, max=10)) assert_raises(MultipleInvalid, s, 12) assert_raises(MultipleInvalid, s, -1)
if os.getenv(ENV_RASPBERRYPI3) == "true": return HWACCEL_RPI3_DECODER_CODEC return codec SCHEMA = Schema( All( [{ Required("name"): All(str, Length(min=1)), Optional("mqtt_name", default=None): Any(All(str, Length(min=1)), None), Required("host"): All(str, Length(min=1)), Required("port"): All(int, Range(min=1)), Optional("username", default=None): Any(All(str, Length(min=1)), None), Optional("password", default=None): Any(All(str, Length(min=1)), None), Required("path"): All(str, Length(min=1)), Optional("width", default=None): Any(int, None), Optional("height", default=None): Any(int, None), Optional("fps", default=None): Any(All(int, Range(min=1)), None), Optional("global_args", default=CAMERA_GLOBAL_ARGS): list, Optional("input_args", default=CAMERA_INPUT_ARGS):
def test_range_no_upper_limit(): s = Schema(Range(min=0)) assert_equal(123, s(123)) assert_raises(MultipleInvalid, s, -1)
def job(extra_context_variables=[]): context_variables = CONTEXT_VARIABLES + extra_context_variables lava_lxc = { Required("name"): str, Required("distribution"): str, Required("release"): str, Optional("arch"): str, Optional("mirror"): str, Optional("persist"): bool, Optional("security_mirror"): str, Optional("template"): str, Optional("timeout"): timeout(), Optional("verbose"): bool, } return All( { Required("job_name"): All(str, Length(min=1, max=200)), Optional("device_type"): All(str, Length(min=1, max=200)), Required("timeouts"): { Required("job"): timeout(), Optional("action"): timeout(), Optional("actions"): {str: timeout()}, Optional("connection"): timeout(), Optional("connections"): {str: timeout()}, }, Required("visibility"): Any("public", "personal", {"group": [str]}), Optional("context"): Schema( {In(context_variables): Any(int, str, [int, str])}, extra=False ), Optional("metadata"): {str: object}, Optional("priority"): Any("high", "medium", "low", Range(min=0, max=100)), Optional("tags"): [str], Optional("secrets"): dict, Optional("environment"): dict, Optional("protocols"): { Optional("lava-lxc"): Any(lava_lxc, {str: lava_lxc}), Optional("lava-multinode"): { Required("roles"): { str: Any( { Required("device_type"): str, Required("count"): Range(min=0), Optional("context"): Schema( {In(context_variables): Any(int, str, [int, str])}, extra=False, ), Optional("tags"): [str], Optional("environment"): dict, Optional("essential"): bool, Optional("timeout"): timeout(), }, { Required("connection"): str, Required("count"): Range(min=0), Required("expect_role"): str, Required("host_role"): str, Optional("essential"): bool, Optional("request"): str, Optional("tags"): [str], Optional("timeout"): timeout(), Optional("context"): Schema( {In(context_variables): Any(int, str, [int, str])}, extra=False, ), }, ) }, Optional("timeout"): timeout(), }, Optional("lava-vland"): Any( {str: {str: {Required("tags"): [str]}}}, {str: {Required("tags"): [str]}}, ), Optional("lava-xnbd"): { Required("port"): Any("auto", int), Optional("timeout"): timeout(), }, }, Optional("notify"): notify(), Optional("reboot_to_fastboot"): bool, Required("actions"): [{Any("boot", "command", "deploy", "test"): dict}], }, extra_checks, )
def test_range_no_lower_limit(): s = Schema(Range(max=10)) assert_equal(-1, s(-1)) assert_raises(MultipleInvalid, s, 123)
submission_schema = Schema({ Required("tid"): check(("This does not look like a valid tid.", [str, Length(max=100)])), Required("pid"): check(("This does not look like a valid pid.", [str, Length(max=100)])), Required("key"): check(("This does not look like a valid key.", [str, Length(max=100)])) }) problem_schema = Schema({ Required("name"): check(("The problem's display name must be a string.", [str])), Required("sanitized_name"): check(("The problems's sanitized name must be a string.", [str])), Required("score"): check(("Score must be a positive integer.", [int, Range(min=0)])), Required("author"): check(("Author must be a string.", [str])), Required("category"): check(("Category must be a string.", [str])), Required("instances"): check(("The instances must be a list.", [list])), Required("hints"): check(("Hints must be a list.", [list])), "description": check(("The problem description must be a string.", [str])), "version": check(("A version must be a string.", [str])), "tags": check(("Tags must be described as a list.", [list])), "organization":
def test_range_excludes_none(): s = Schema(Range(min=0, max=10)) assert_raises(MultipleInvalid, s, None)
return schemas[""](data) if parsed.scheme not in schemas: raise Invalid(f"Unsupported URL type {parsed.scheme}://") return schemas[parsed.scheme](data) return validate class RelPath(str): pass REMOTE_COMMON = { "url": str, "checksum_jobs": All(Coerce(int), Range(1)), Optional("no_traverse"): Bool, # obsoleted "verify": Bool, } LOCAL_COMMON = { "type": supported_cache_type, Optional("protected", default=False): Bool, # obsoleted "shared": All(Lower, Choices("group")), Optional("slow_link_warning", default=True): Bool, } HTTP_COMMON = { "auth": All(Lower, Choices("basic", "digest", "custom")), "custom_auth_header": str, "user": str, "password": str, "ask_password": Bool,
def test_range_excludes_string(): s = Schema(Range(min=0, max=10)) assert_raises(MultipleInvalid, s, "abc")
def test_range_exlcudes_nan(): s = Schema(Range(min=0, max=10)) assert_raises(MultipleInvalid, s, float('nan'))
def test_range_excludes_unordered_object(): class MyObject(object): pass s = Schema(Range(min=0, max=10)) assert_raises(MultipleInvalid, s, MyObject())
class DonutModel(Model): """ Time-series VAE model, "Donut" """ TYPE = 'donut' SCHEMA = Model.SCHEMA.extend({ Required('bucket_interval'): schemas.TimeDelta( min=0, min_included=False, ), Required('interval'): schemas.TimeDelta(min=0, min_included=False), Required('offset'): schemas.TimeDelta(min=0), Required('span'): Any(None, "auto", All(int, Range(min=1))), Optional('min_span'): All(int, Range(min=1)), Optional('max_span'): All(int, Range(min=1)), Optional('seasonality', default=DEFAULT_SEASONALITY): schemas.seasonality, Optional('forecast'): Any(None, "auto", All(int, Range(min=1))), Optional('grace_period', default=0): schemas.TimeDelta(min=0, min_included=True), 'default_datasink': schemas.key, }) def __init__(self, settings, state=None): global _hp_span_min, _hp_span_max super().__init__(settings, state) settings = self.validate(settings) self.bucket_interval = parse_timedelta( settings.get('bucket_interval')).total_seconds() self.interval = parse_timedelta( settings.get('interval')).total_seconds() self.offset = parse_timedelta(settings.get('offset')).total_seconds() self.span = settings.get('span') self.means = None self.stds = None self.scores = None self._keras_model = None self._encoder_model = None self._decoder_model = None if self.span is None or self.span == "auto": self.min_span = settings.get('min_span') or _hp_span_min self.max_span = settings.get('max_span') or _hp_span_max else: self.min_span = self.span self.max_span = self.span self.grace_period = parse_timedelta( settings['grace_period']).total_seconds() self.current_eval = None if len(self.features) > 1: raise errors.LoudMLException( "This model type supports one unique feature") def enum_features(self, is_input=None, is_output=None): j = 0 for i, feature in enumerate(self.features): if feature.is_input == is_input or feature.is_output == is_output: yield i, j, feature j += 1 @property def type(self): return self.TYPE @property def W(self): return self.span def get_hp_span(self, label): if (self.max_span - self.min_span) <= 0: space = self.span else: space = self.min_span + \ hp.randint(label, (self.max_span - self.min_span)) return space def set_run_params(self, params=None): """ Set running parameters to make them persistent """ if params is None: self._settings.pop('run', None) else: self._settings['run'] = params def set_run_state(self, params=None): """ Set running forecast parameters to make them persistent """ if params is None: self._state.pop('run', None) else: self._state['run'] = params def get_run_state(self): return self._state.get('run') or {} def compute_nb_buckets(self, from_ts, to_ts): """ Compute the number of bucket between `from_ts` and `to_ts` """ return int((to_ts - from_ts) / self.bucket_interval) + 2 def apply_defaults(self, x): """ Apply default feature value to np array """ feature = self.features[0] if feature.default == "previous": previous = None for j, value in enumerate(x): if np.isnan(value): x[j] = previous else: previous = x[j] elif not np.isnan(feature.default): x[np.isnan(x)] = feature.default def scale_dataset( self, dataset, ): """ Scale dataset values """ out = _get_scores( dataset, _mean=self.means[0], _std=self.stds[0], ) return out def unscale_dataset( self, dataset, ): """ Revert scaling dataset values """ out = _revert_scores( dataset, _mean=self.means[0], _std=self.stds[0], ) return out def stat_dataset(self, dataset): """ Compute dataset sets and keep them as reference """ self.means = np.array([np.nanmean(dataset, axis=0)]) self.stds = np.array([np.nanstd(dataset, axis=0)]) self.stds[self.stds == 0] = 1.0 def set_auto_threshold(self): """ Compute best threshold values automatically """ # 68–95–99.7 three-sigma rule self.min_threshold = 68 self.max_threshold = 99.7 def _set_xpu_config(self, num_cpus, num_gpus): config = tf.ConfigProto( allow_soft_placement=True, device_count={ 'CPU': num_cpus, 'GPU': num_gpus }, ) config.gpu_options.allow_growth = True # config.log_device_placement = True # config.intra_op_parallelism_threads=num_cores # config.inter_op_parallelism_threads=num_cores sess = tf.Session(config=config) K.set_session(sess) def _train_on_dataset( self, dataset, train_size=0.67, batch_size=64, num_epochs=100, num_cpus=1, num_gpus=0, max_evals=None, progress_cb=None, abnormal=None, ): if max_evals is None: # latent_dim*intermediate_dim max_evals = self.settings.get('max_evals', 21) self.current_eval = 0 self.stat_dataset(dataset) dataset = self.scale_dataset(dataset) def cross_val_model(params): keras_model = None # Destroys the current TF graph and creates a new one. # Useful to avoid clutter from old models / layers. K.clear_session() self._set_xpu_config(num_cpus, num_gpus) self.span = W = params.span (X_miss, X_train), (X_miss_val, X_test) = self.train_test_split( dataset, train_size=train_size, abnormal=abnormal, ) if len(X_train) == 0: raise errors.NoData("insufficient training data") if len(X_test) == 0: raise errors.NoData("insufficient validation data") # expected input data shape: (batch_size, timesteps,) # network parameters input_shape = (W, ) intermediate_dim = params.intermediate_dim latent_dim = params.latent_dim # VAE model = encoder + decoder # build encoder model main_input = Input(shape=input_shape) # bool vector to flag missing data points aux_input = Input(shape=input_shape) aux_output = Lambda(lambda x: x)(aux_input) x = Dense(intermediate_dim, kernel_regularizer=regularizers.l2(0.01), activation='relu')(main_input) z_mean = Dense(latent_dim, name='z_mean')(x) z_log_var = Dense(latent_dim, name='z_log_var')(x) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling, output_shape=(latent_dim, ), name='z')([z_mean, z_log_var]) # build decoder model x = Dense(intermediate_dim, kernel_regularizer=regularizers.l2(0.01), activation='relu', name='dense_1')(z) main_output = Dense(W, activation='linear', name='dense_2')(x) # instantiate Donut model keras_model = _Model([main_input, aux_input], [main_output, aux_output], name='donut') add_loss(keras_model, W) optimizer_cls = None if params.optimizer == 'adam': optimizer_cls = tf.keras.optimizers.Adam() keras_model.compile(optimizer=optimizer_cls, ) _stop = EarlyStopping( monitor='val_loss', patience=5, verbose=_verbose, mode='auto', ) keras_model.fit_generator( generator(X_train, X_miss, batch_size, keras_model), epochs=num_epochs, steps_per_epoch=len(X_train) / batch_size, verbose=_verbose, validation_data=([X_test, X_miss_val], None), callbacks=[_stop], workers=0, # https://github.com/keras-team/keras/issues/5511 ) # How well did it do? score = keras_model.evaluate( [X_test, X_miss_val], batch_size=batch_size, verbose=_verbose, ) self.current_eval += 1 if progress_cb is not None: progress_cb(self.current_eval, max_evals) return score, keras_model hyperparameters = HyperParameters() # Parameter search space def objective(args): hyperparameters.assign(args) try: score, _ = cross_val_model(hyperparameters) return {'loss': score, 'status': STATUS_OK} except Exception as exn: logging.warning("iteration failed: %s", exn) return {'loss': None, 'status': STATUS_FAIL} space = hp.choice('case', [{ 'span': self.get_hp_span('span'), 'latent_dim': hp.choice('latent_dim', [3, 5, 8]), 'intermediate_dim': hp.choice('i1', [21, 34, 55, 89, 144, 233, 377]), 'optimizer': hp.choice('optimizer', ['adam']), }]) # The Trials object will store details of each iteration trials = Trials() # Run the hyperparameter search using the tpe algorithm try: best = fmin( objective, space, algo=tpe.suggest, max_evals=max_evals, trials=trials, ) except ValueError: raise errors.NoData( "training failed, try to increase the time range") # Get the values of the optimal parameters best_params = space_eval(space, best) score, self._keras_model = cross_val_model( HyperParameters(best_params)) self.span = best_params['span'] return (best_params, score) def _train_ckpt_on_dataset( self, dataset, train_size=0.67, batch_size=64, num_epochs=100, progress_cb=None, abnormal=None, ): self.current_eval = 0 self.stat_dataset(dataset) dataset = self.scale_dataset(dataset) (X_miss, X_train), (X_miss_val, X_test) = self.train_test_split( dataset, train_size=train_size, ) _stop = EarlyStopping( monitor='val_loss', patience=5, verbose=_verbose, mode='auto', ) self._keras_model.fit( [X_train, X_miss], epochs=num_epochs, batch_size=batch_size, verbose=_verbose, validation_data=([X_test, X_miss_val], None), callbacks=[_stop], ) # How well did it do? score = self._keras_model.evaluate( [X_test, X_miss_val], batch_size=batch_size, verbose=_verbose, ) return score def compute_bucket_scores(self, y_true, y_pred, y_low, y_high): """ Compute scores and mean squared error """ feature = self.features[0] diff = y_true - y_pred ano_type = feature.anomaly_type mu = (y_low + y_high) / 2.0 std = (y_high - mu) / 3.0 score = 2 * norm.cdf(abs(y_true - mu), loc=0, scale=std) - 1 # Required to handle the 'low' condition if diff < 0: score *= -1 if ano_type == 'low': score = -min(score, 0) elif ano_type == 'high': score = max(score, 0) else: score = abs(score) score = 100 * max(0, min(1, score)) mse = np.nanmean((diff**2), axis=None) return score, mse def compute_scores(self, observed, predicted, low, high): """ Compute timeseries scores and MSE """ nb_buckets = len(observed) scores = np.empty((nb_buckets, ), dtype=float) mses = np.empty((nb_buckets), dtype=float) for i in range(nb_buckets): scores[i], mses[i] = self.compute_bucket_scores( observed[i], predicted[i], low[i], high[i], ) return scores, mses def _format_dataset(self, x, accept_missing=True, abnormal=None): """ Format dataset for time-series training & inference input: [v0, v1, v2, v3, v4 ..., vn] len: W output: missing = [0, 0, 1..., 0] X = [ [v0, v1, v2], # span = W [v1, v2, v3], [v2, v3, v4], ... [..., .., vn], ] Buckets with missing values are flagged in the missing array. """ missing = [] data_x = [] for i in range(len(x) - self.W + 1): j = i + self.W if accept_missing or not np.isnan(x[i:j]).any(): # arxiv.org/abs/1802.03903 # set user defined abnormal data points to zero if abnormal is None: is_nan = np.isnan(x[i:j]) else: is_nan = np.logical_or( np.isnan(x[i:j]), abnormal[i:j], ) missing.append(is_nan) _x = np.copy(x[i:j]) # set missing points to zero _x[is_nan] = 0.0 data_x.append(_x) return np.array(missing), np.array(data_x) def train_test_split(self, dataset, abnormal=None, train_size=0.67): """ Splits data to training and testing parts """ ntrn = round(len(dataset) * train_size) X_train_missing, X_train = self._format_dataset(dataset[0:ntrn], abnormal=abnormal) X_test_missing, X_test = self._format_dataset(dataset[ntrn:]) return (X_train_missing, X_train), (X_test_missing, X_test) def train( self, datasource, from_date, to_date="now", train_size=0.67, batch_size=256, num_epochs=100, num_cpus=1, num_gpus=0, max_evals=None, progress_cb=None, incremental=False, windows=[], ): """ Train model """ set_seed() self.means, self.stds = None, None self.scores = None period = self.build_date_range(from_date, to_date) logging.info( "train(%s) range=%s train_size=%f batch_size=%d epochs=%d)", self.name, period, train_size, batch_size, num_epochs, ) # Prepare dataset nb_buckets = self.compute_nb_buckets(period.from_ts, period.to_ts) dataset = np.full((nb_buckets, ), np.nan, dtype=float) abnormal = _format_windows( period.from_ts, period.to_ts, self.bucket_interval, windows, ) # Fill dataset data = datasource.get_times_data(self, period.from_ts, period.to_ts) # FIXME: query abnormal points flagged i = None for i, (_, val, timeval) in enumerate(data): dataset[i] = val if i is None: raise errors.NoData( "no data found for time range {}".format(period)) self.apply_defaults(dataset) nb_buckets_found = i + 1 if nb_buckets_found < nb_buckets: dataset = np.resize(dataset, (nb_buckets_found, )) logging.info("found %d time periods", nb_buckets_found) if incremental: best_params = self._state.get('best_params', dict()) # Destroys the current TF graph and creates a new one. # Useful to avoid clutter from old models / layers. self.load(num_cpus, num_gpus) score = self._train_ckpt_on_dataset( dataset, train_size, batch_size, num_epochs, progress_cb=progress_cb, abnormal=abnormal, ) else: best_params, score = self._train_on_dataset( dataset, train_size, batch_size, num_epochs, num_cpus, num_gpus, max_evals, progress_cb=progress_cb, abnormal=abnormal, ) self.current_eval = None for key, val in best_params.items(): if not isinstance(val, str) and \ not isinstance(val, int) and \ not isinstance(val, float): best_params[key] = np.asscalar(val) model_b64 = _serialize_keras_model(self._keras_model) self._state = { 'h5py': model_b64, 'best_params': best_params, 'means': self.means.tolist(), 'stds': self.stds.tolist(), 'loss': score, } self.unload() # prediction = self.predict( # datasource, # from_date, # to_date, # num_cpus=num_cpus, # num_gpus=num_gpus, # ) # prediction.stat() return { 'loss': score, } def unload(self): """ Unload current model """ self._keras_model = None self._encoder_model = None self._decoder_model = None K.clear_session() def load(self, num_cpus, num_gpus): """ Load current model """ if not self.is_trained: raise errors.ModelNotTrained() if self._keras_model: # Already loaded return set_seed() K.clear_session() self._set_xpu_config(num_cpus, num_gpus) if self._state.get('h5py', None) is not None: self._keras_model = _load_keras_model(self._state.get('h5py')) # instantiate encoder model self._encoder_model = _get_encoder(self._keras_model) # instantiate decoder model self._decoder_model = _get_decoder(self._keras_model) else: raise errors.ModelNotTrained() if 'means' in self._state: self.means = np.array(self._state['means']) if 'stds' in self._state: self.stds = np.array(self._state['stds']) if 'scores' in self._state: self.scores = np.array(self._state['scores']) if self.min_threshold == 0 and self.max_threshold == 0: self.set_auto_threshold() logging.info( "setting threshold range min=%f max=%f", self.min_threshold, self.max_threshold, ) @property def is_trained(self): """ Tells if model is trained """ return self._state is not None and ('weights' in self._state or 'h5py' in self._state) @property def _span(self): if self._state and 'span' in self._state['best_params']: return self._state['best_params']['span'] else: return self.span @property def _window(self): return self._span def predict( self, datasource, from_date, to_date, num_cpus=1, num_gpus=0, ): global g_mcmc_count global g_mc_count global g_mc_batch_size period = self.build_date_range(from_date, to_date) # This is the number of buckets that the function MUST return predict_len = int( (period.to_ts - period.from_ts) / self.bucket_interval) logging.info("predict(%s) range=%s", self.name, period) self.load(num_cpus, num_gpus) # Build history time range # Extra data are required to predict first buckets _window = self._window - 1 hist = DateRange( period.from_ts - _window * self.bucket_interval, period.to_ts, ) # Prepare dataset nb_buckets = int((hist.to_ts - hist.from_ts) / self.bucket_interval) dataset = np.full((nb_buckets, ), np.nan, dtype=float) X = [] # Fill dataset logging.info("extracting data for range=%s", hist) data = datasource.get_times_data(self, hist.from_ts, hist.to_ts) # Only a subset of history will be used for computing the prediction X_until = None # right bound for prediction i = None for i, (_, val, timeval) in enumerate(data): dataset[i] = val dt = make_datetime(timeval) ts = dt.timestamp() if ts < period.to_ts: X.append(make_ts(timeval)) X_until = i + 1 if i is None: raise errors.NoData("no data found for time range {}".format(hist)) self.apply_defaults(dataset) nb_buckets_found = i + 1 if nb_buckets_found < nb_buckets: dataset = np.resize(dataset, (nb_buckets_found, )) logging.info("found %d time periods", nb_buckets_found) real = np.copy(dataset) norm_dataset = self.scale_dataset(dataset) missing, X_test = self._format_dataset(norm_dataset[:X_until]) if len(X_test) == 0: raise errors.LoudMLException("not enough data for prediction") # force last col to missing missing[:, -1] = True logging.info("generating prediction") x_ = X_test.copy() # MCMC for _ in range(g_mcmc_count): z_mean, _, _ = self._encoder_model.predict( [x_, missing], batch_size=g_mc_batch_size) x_decoded = self._decoder_model.predict(z_mean, batch_size=g_mc_batch_size) x_[missing] = x_decoded[missing] y = np.full((predict_len, ), np.nan, dtype=float) y_low = np.full((predict_len, ), np.nan, dtype=float) y_high = np.full((predict_len, ), np.nan, dtype=float) no_missing_point = np.full((g_mc_count, self.W), False, dtype=bool) for j, x in enumerate(x_): y[j] = x[-1] # MC integration _, _, Z = self._encoder_model.predict( [np.tile(x, [g_mc_count, 1]), no_missing_point], batch_size=g_mc_batch_size, ) x_decoded = self._decoder_model.predict(Z, batch_size=g_mc_batch_size) std = np.std(x_decoded[:, -1]) y_low[j] = x[-1] - 3 * std y_high[j] = x[-1] + 3 * std y = self.unscale_dataset(y) y_low = self.unscale_dataset(y_low) y_high = self.unscale_dataset(y_high) # Build final result timestamps = X[_window:] shape = (predict_len, len(self.features)) observed = np.full(shape, np.nan, dtype=float) observed = real[_window:] self.apply_defaults(observed) self.apply_defaults(y) return TimeSeriesPrediction( self, timestamps=timestamps, observed=observed, predicted=y, lower=y_low, upper=y_high, ) def generate_fake_prediction(self): now_ts = datetime.datetime.now().timestamp() timestamps = [ now_ts - 2 * self.bucket_interval, now_ts - self.bucket_interval, now_ts, ] normal = [0.0] * len(self.features) anomaly = [sys.float_info.max] * len(self.features) return TimeSeriesPrediction( self, timestamps=timestamps, observed=np.array([normal, anomaly, normal]), predicted=np.array([normal, normal, normal]), ) def forecast( self, datasource, from_date, to_date, percent_interval=0.68, percent_noise=0, num_cpus=1, num_gpus=0, ): global g_mcmc_count global g_mc_count global g_mc_batch_size period = self.build_date_range(from_date, to_date) # This is the number of buckets that the function MUST return forecast_len = int( (period.to_ts - period.from_ts) / self.bucket_interval) logging.info("forecast(%s) range=%s", self.name, period) self.load(num_cpus, num_gpus) # Build history time range # Extra data are required to predict first buckets _window = self._window - 1 hist = DateRange( period.from_ts - _window * self.bucket_interval, period.to_ts, ) # Prepare dataset nb_buckets = int((hist.to_ts - hist.from_ts) / self.bucket_interval) dataset = np.full((nb_buckets, ), np.nan, dtype=float) X = [] # Fill dataset logging.info("extracting data for range=%s", hist) data = datasource.get_times_data(self, hist.from_ts, hist.to_ts) # Only a subset of history will be used for computing the prediction X_until = None # right bound for prediction i = None for i, (_, val, timeval) in enumerate(data): dataset[i] = val dt = make_datetime(timeval) ts = dt.timestamp() if ts < period.to_ts: X.append(make_ts(timeval)) X_until = i + 1 if i is None: raise errors.NoData("no data found for time range {}".format(hist)) self.apply_defaults(dataset) nb_buckets_found = i + 1 if nb_buckets_found < nb_buckets: dataset = np.resize(dataset, (nb_buckets_found, )) logging.info("found %d time periods", nb_buckets_found) real = np.copy(dataset) norm_dataset = self.scale_dataset(dataset) _, X_test = self._format_dataset(norm_dataset[:X_until]) if len(X_test) == 0: raise errors.LoudMLException("not enough data for prediction") logging.info("generating prediction") x_ = X_test.copy() p = norm().ppf(1 - (1 - percent_interval) / 2) missing = np.full((self._window, ), False, dtype=bool) # force last col to missing missing[-1] = True y = np.full((forecast_len, ), np.nan, dtype=float) y_low = np.full((forecast_len, ), np.nan, dtype=float) y_high = np.full((forecast_len, ), np.nan, dtype=float) x = x_[0] noise = percent_noise * float(self.bucket_interval) / (24 * 3600) for j, _ in enumerate(x_): # MCMC for _ in range(g_mcmc_count): z_mean, _, _ = self._encoder_model.predict( [np.array([x]), np.array([missing])], batch_size=g_mc_batch_size, ) x_decoded = self._decoder_model.predict( z_mean, batch_size=g_mc_batch_size) x[missing] = x_decoded[0][missing] # uncertainty is modeled using a random uniform noise distribution # that increases over time expand = np.random.uniform(-noise * j, noise * j, len(x)) x *= 1 + expand # MC integration _, _, Z = self._encoder_model.predict( [ np.tile(x, [g_mc_count, 1]), np.tile(missing, [g_mc_count, 1]) ], batch_size=g_mc_batch_size, ) x_decoded = self._decoder_model.predict(Z, batch_size=g_mc_batch_size) std = np.std(x_decoded[:, -1]) y_low[j] = x[-1] - p * std y_high[j] = x[-1] + p * std y[j] = x[-1] x = np.roll(x, -1) # set missing point to zero x[-1] = 0 y = self.unscale_dataset(y) y_low = self.unscale_dataset(y_low) y_high = self.unscale_dataset(y_high) # Build final result timestamps = X[_window:] shape = (forecast_len, len(self.features)) observed = np.full(shape, np.nan, dtype=float) observed = real[_window:] self.apply_defaults(observed) self.apply_defaults(y) return TimeSeriesPrediction( self, timestamps=timestamps, observed=observed, predicted=y, lower=y_low, upper=y_high, ) def detect_anomalies(self, prediction, hooks=[]): """ Detect anomalies on observed data by comparing them to the values predicted by the model """ prediction.stat() stats = [] anomaly_indices = [] for i, ts in enumerate(prediction.timestamps): last_anomaly_ts = self._state.get('last_anomaly_ts', 0) in_grace_period = (ts - last_anomaly_ts) < self.grace_period dt = ts_to_datetime(ts) date_str = datetime_to_str(dt) is_anomaly = False anomalies = {} predicted = prediction.predicted[i] observed = prediction.observed[i] score = prediction.scores[i] mse = prediction.mses[i] max_score = 0 feature = self.features[0] max_score = max(max_score, score) if (not in_grace_period) and score >= self.max_threshold: anomalies[feature.name] = { 'type': 'low' if observed < predicted else 'high', 'score': score, } if len(anomalies): is_anomaly = True anomaly_indices.append(i) anomaly = self._state.get('anomaly') if anomaly is None: if is_anomaly: # This is a new anomaly # TODO have a Model.logger to prefix all logs with model name logging.warning( "detected anomaly for model '%s' at %s (score = %.1f)", self.name, date_str, max_score, ) self._state['anomaly'] = { 'start_ts': ts, 'max_score': max_score, } for hook in hooks: logging.debug("notifying '%s' hook", hook.name) data = prediction.format_bucket_data(i) try: hook.on_anomaly_start( dt=dt, score=max_score, predicted=data['predicted'], observed=data['observed'], anomalies=anomalies, ) except Exception as exn: # XXX: catch all the exception to avoid # interruption logging.exception(exn) else: if is_anomaly: anomaly['max_score'] = max(anomaly['max_score'], max_score) logging.warning( "anomaly still in progress for model '%s' at %s (score = %.1f)", self.name, date_str, max_score, ) elif score < self.min_threshold: logging.info( "anomaly ended for model '%s' at %s (score = %.1f)", self.name, date_str, max_score, ) for hook in hooks: logging.debug("notifying '%s' hook", hook.name) hook.on_anomaly_end(dt, max_score) self._state['anomaly'] = None self._state['last_anomaly_ts'] = ts stats.append({ 'mse': nan_to_none(mse), 'score': max_score, 'anomaly': is_anomaly, 'anomalies': anomalies, }) prediction.stats = stats prediction.anomaly_indices = anomaly_indices def predict2( self, datasource, from_date, to_date, mse_rtol, _state={}, num_cpus=1, num_gpus=0, ): return self.predict( datasource, from_date, to_date, num_cpus=num_cpus, num_gpus=num_gpus, ) def plot_results( self, datasource, from_date, to_date, num_cpus=1, num_gpus=0, x_dim=-1, y_dim=-1, output=None, ): """ # Arguments: models (tuple): encoder and decoder models data (tuple): test data and label model_name (string): which model is using this function """ global g_mc_batch_size # Agg = Anti-grain geometry engine # running inside a Docker image. No Xwindow import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt period = self.build_date_range(from_date, to_date) logging.info("plot_results(%s) range=%s", self.name, period) self.load(num_cpus, num_gpus) _, latent_dim = self._encoder_model.outputs[0].get_shape() # Build history time range # Extra data are required to predict first buckets _window = self._window - 1 hist = DateRange( period.from_ts - _window * self.bucket_interval, period.to_ts, ) # Prepare dataset nb_buckets = int((hist.to_ts - hist.from_ts) / self.bucket_interval) dataset = np.full((nb_buckets, ), np.nan, dtype=float) # Fill dataset logging.info("extracting data for range=%s", hist) data = datasource.get_times_data(self, hist.from_ts, hist.to_ts) # Only a subset of history will be used for computing the prediction X_until = None # right bound for prediction i = None for i, (_, val, timeval) in enumerate(data): dataset[i] = val dt = make_datetime(timeval) ts = dt.timestamp() if ts < period.to_ts: X_until = i + 1 if i is None: raise errors.NoData("no data found for time range {}".format(hist)) self.apply_defaults(dataset) nb_buckets_found = i + 1 if nb_buckets_found < nb_buckets: dataset = np.resize(dataset, (nb_buckets_found, )) logging.info("found %d time periods", nb_buckets_found) norm_dataset = self.scale_dataset(dataset) X_miss_val, X_test = self._format_dataset(norm_dataset[:X_until]) if len(X_test) == 0: raise errors.LoudMLException("not enough data for prediction") # display a 2D plot of the digit classes in the latent space z_mean, _, _ = self._encoder_model.predict([X_test, X_miss_val], batch_size=g_mc_batch_size) if x_dim < 0 or y_dim < 0: mses = [] for (x, y) in itertools.combinations(range(0, latent_dim), 2): _mean = np.mean(z_mean, axis=0)[[x, y]] mse = ((z_mean[:, [x, y]] - _mean)**2).mean(axis=0) mses.append([x, y, mse[0] + mse[1]]) mses = sorted(mses, key=lambda x: x[2]) x_dim = mses[0][0] y_dim = mses[0][1] excl = [x for x in range(latent_dim) if x != x_dim and x != y_dim] plt.figure(figsize=(12, 10)) if latent_dim > 3: ax = plt.axes(projection='3d') ax.set_zticks([]) else: ax = plt.axes() # Hide grid lines ax.grid(False) # Hide axes ticks ax.set_xticks([]) ax.set_yticks([]) if latent_dim > 3: zc = np.array( [[z_mean[i, excl[0]], z_mean[i, excl[1]], z_mean[i, excl[2]]] for i, _ in enumerate(z_mean)]) # (x-min(x))/(max(x)-min(x)). RGBA values should be within 0-1 range zc = (zc - np.min(zc, axis=0)) / \ (np.max(zc, axis=0) - np.min(zc, axis=0)) if latent_dim > 5: ax.set_zlabel("z[{}]".format(excl[3])) ax.scatter(z_mean[:, x_dim], z_mean[:, y_dim], z_mean[:, excl[3]], c=zc) else: zc[:, 0] = 0 ax.set_zlabel("z[{}]".format(excl[0])) ax.scatter(z_mean[:, x_dim], z_mean[:, y_dim], z_mean[:, excl[0]], c=zc) else: plt.scatter(z_mean[:, x_dim], z_mean[:, y_dim], c=z_mean[:, excl[0]]) plt.colorbar() plt.xlabel("z[{}]".format(x_dim)) plt.ylabel("z[{}]".format(y_dim)) if output is None: plt.show() else: plt.savefig(output)
class TestSmartBulb(TestCase): # these schemas should go to the mainlib as # they can be useful when adding support for new features/devices # as well as to check that faked devices are operating properly. sysinfo_schema = Schema({ 'active_mode': check_mode, 'alias': basestring, 'ctrl_protocols': { 'name': basestring, 'version': basestring, }, 'description': basestring, 'dev_state': basestring, 'deviceId': basestring, 'disco_ver': basestring, 'heapsize': int, 'hwId': basestring, 'hw_ver': basestring, 'is_color': check_int_bool, 'is_dimmable': check_int_bool, 'is_factory': bool, 'is_variable_color_temp': check_int_bool, 'light_state': { 'brightness': All(int, Range(min=0, max=100)), 'color_temp': int, 'hue': All(int, Range(min=0, max=255)), 'mode': basestring, 'on_off': check_int_bool, 'saturation': All(int, Range(min=0, max=255)), }, 'mic_mac': basestring, 'mic_type': basestring, 'model': basestring, 'oemId': basestring, 'preferred_state': [{ 'brightness': All(int, Range(min=0, max=100)), 'color_temp': int, 'hue': All(int, Range(min=0, max=255)), 'index': int, 'saturation': All(int, Range(min=0, max=255)), }], 'rssi': All(int, Range(max=0)), 'sw_ver': basestring, }) current_consumption_schema = Schema({ 'power_mw': int, }) tz_schema = Schema({ 'zone_str': basestring, 'dst_offset': int, 'index': All(int, Range(min=0)), 'tz_str': basestring, }) def setUp(self): self.bulb = SmartBulb(BULB_IP, protocol=FakeTransportProtocol(sysinfo_lb130)) def tearDown(self): self.bulb = None def test_initialize(self): self.assertIsNotNone(self.bulb.sys_info) self.sysinfo_schema(self.bulb.sys_info) def test_initialize_invalid_connection(self): bulb = SmartBulb('127.0.0.1', protocol=FakeTransportProtocol(sysinfo_lb130, invalid=True)) with self.assertRaises(SmartPlugException): bulb.sys_info['model'] def test_query_helper(self): with self.assertRaises(SmartPlugException): self.bulb._query_helper("test", "testcmd", {}) # TODO check for unwrapping? @skipIf(SKIP_STATE_TESTS, "SKIP_STATE_TESTS is True, skipping") def test_state(self): def set_invalid(x): self.bulb.state = x set_invalid_int = partial(set_invalid, 1234) self.assertRaises(ValueError, set_invalid_int) set_invalid_str = partial(set_invalid, "1234") self.assertRaises(ValueError, set_invalid_str) set_invalid_bool = partial(set_invalid, True) self.assertRaises(ValueError, set_invalid_bool) orig_state = self.bulb.state if orig_state == SmartBulb.BULB_STATE_OFF: self.bulb.state = SmartBulb.BULB_STATE_ON self.assertTrue(self.bulb.state == SmartBulb.BULB_STATE_ON) self.bulb.state = SmartBulb.BULB_STATE_OFF self.assertTrue(self.bulb.state == SmartBulb.BULB_STATE_OFF) elif orig_state == SmartBulb.BULB_STATE_ON: self.bulb.state = SmartBulb.BULB_STATE_OFF self.assertTrue(self.bulb.state == SmartBulb.BULB_STATE_OFF) self.bulb.state = SmartBulb.BULB_STATE_ON self.assertTrue(self.bulb.state == SmartBulb.BULB_STATE_ON) def test_get_sysinfo(self): # initialize checks for this already, but just to be sure self.sysinfo_schema(self.bulb.get_sysinfo()) @skipIf(SKIP_STATE_TESTS, "SKIP_STATE_TESTS is True, skipping") def test_turns_and_isses(self): orig_state = self.bulb.state if orig_state == SmartBulb.BULB_STATE_ON: self.bulb.state = SmartBulb.BULB_STATE_OFF self.assertTrue(self.bulb.state == SmartBulb.BULB_STATE_OFF) self.bulb.state = SmartBulb.BULB_STATE_ON self.assertTrue(self.bulb.state == SmartBulb.BULB_STATE_ON) else: self.bulb.state = SmartBulb.BULB_STATE_ON self.assertTrue(self.bulb.state == SmartBulb.BULB_STATE_ON) self.bulb.state = SmartBulb.BULB_STATE_OFF self.assertTrue(self.bulb.state == SmartBulb.BULB_STATE_OFF) def test_get_emeter_realtime(self): self.current_consumption_schema((self.bulb.get_emeter_realtime())) def test_get_emeter_daily(self): self.assertEqual(self.bulb.get_emeter_daily(year=1900, month=1), {}) k, v = self.bulb.get_emeter_daily().popitem() self.assertTrue(isinstance(k, int)) self.assertTrue(isinstance(v, int)) def test_get_emeter_monthly(self): self.assertEqual(self.bulb.get_emeter_monthly(year=1900), {}) d = self.bulb.get_emeter_monthly() k, v = d.popitem() self.assertTrue(isinstance(k, int)) self.assertTrue(isinstance(v, int)) @skip("not clearing your stats..") def test_erase_emeter_stats(self): self.fail() def test_current_consumption(self): x = self.bulb.current_consumption() self.assertTrue(isinstance(x, int)) self.assertTrue(x >= 0.0) def test_alias(self): test_alias = "TEST1234" original = self.bulb.alias self.assertTrue(isinstance(original, basestring)) self.bulb.alias = test_alias self.assertEqual(self.bulb.alias, test_alias) self.bulb.alias = original self.assertEqual(self.bulb.alias, original) def test_icon(self): self.assertEqual(set(self.bulb.icon.keys()), {'icon', 'hash'}) def test_rssi(self): self.sysinfo_schema({'rssi': self.bulb.rssi}) # wrapping for vol