def expand_bot_path(filename): ''' ''' # try "core/" first_try = os_path_join(os_path_dirname(__file__), filename) if os_path_isfile(first_try): return first_try # try "core/.." second_try = os_path_join(os_path_dirname(__file__), '..', filename) if os_path_isfile(second_try): return second_try raise IOError('File "{0}" not found under "{1}" or "{2}"'.format( filename, first_try, second_try))
def enable_units(path=None): """Enables units support in a particular instance of GPkit. Posynomials created after calling this are incompatible with those created before. If gpkit is imported multiple times, this needs to be run each time.""" # pylint: disable=invalid-name,global-statement global DimensionalityError, UNIT_REGISTRY, ureg, units try: import pint if path: # let user load their own unit definitions UNIT_REGISTRY = pint.UnitRegistry(path) if UNIT_REGISTRY is None: UNIT_REGISTRY = pint.UnitRegistry() # use pint default path = os_sep.join([os_path_dirname(__file__), "pint"]) UNIT_REGISTRY.load_definitions(os_sep.join([path, "usd_cpi.txt"])) # next line patches https://github.com/hgrecco/pint/issues/366 UNIT_REGISTRY.define("nautical_mile = 1852 m = nmi") ureg = UNIT_REGISTRY DimensionalityError = pint.DimensionalityError units = GPkitUnits() except ImportError: print("Optional Python units library (Pint) not installed;" " unit support disabled.") disable_units()
def _loosedb_raw_object_write(loosedb, presumedhex: shahex, objloose: bytes): # assert not loosedb.has_object(_hex2bin(presumedhex)) objpath = loosedb.db_path(loosedb.object_path(presumedhex)) # assert not os_path_exists(objpath) os_makedirs(os_path_dirname(objpath), exist_ok=True) with _file_open_mkdirp(objpath) as f: f.write(objloose) # FIXME: #loosedb.update_cache(force=True) assert loosedb.has_object(_hex2bin(presumedhex))
def goBlue(self): filename = self.SOURCELIST.getFilename() if not filename: return sourceDir = self.SOURCELIST.getCurrentDirectory() if os_path_isdir(filename): text = _("Rename directory") filename = os_path_basename(os_path_dirname(filename)) else: text = _("Rename file") self.session.openWithCallback(self.doRename, VirtualKeyBoard, title = text, text = filename)
def monitorsPath(self, path): if path == self.path: return True if self.watch_type == WATCH_FILE: return False elif self.watch_type == WATCH_DIR: if os_path_dirname(path) == self.path: return True elif len(path) > len(self.path): # and self.watch_type == WATCH_DIR_RECURSIVE if path[:len(self.path)] == self.path: return True return False
def move_files_to_folder(*args, **kwargs): # Maximum backup allowed by user BACKUP_COUNT = bpy.context.user_preferences.filepaths.save_version # If saving backups option is 'ON' if BACKUP_COUNT: # Function level constants PATH = bpy.data.filepath # Full path FILE = bpy.path.display_name_from_filepath(PATH) # File name CWD = os_path_dirname(PATH) # Current Working Directory CBD = os_path_join(CWD, BACKUP_FOLDER_NAME) # Current Backup Directory REXT = r"{}\.blend(\d+)$".format(FILE) # Regex to catch backups EXT = "{}.blend{}" # Extension placeholder OLD = EXT.format(FILE, BACKUP_COUNT) # Oldest backup name # Create backup directory if not exists try: os_makedirs(CBD) except OSError as e: if e.errno != EEXIST: # If other error appears then "dir already exists" reraise # the caught error again and print out the traceback raise OSError("\n".join(traceback_extract_stack())) from None # Get all files in current directory, move them to the # backup folder, if they are backup files and maintain # the backup folder's instances for filename in reversed(sorted(os_listdir(CWD))): # If file is a backup file try: index = int(re_findall(REXT, filename)[-1]) # If file's index is greater than the # current number of backups allowed the full path # of the file will be returned and will be deleted # else os.remove will raise FileNotFoundError os_remove( increase_index_and_move( src_folder=CWD, dst_folder=CBD, file=FILE, extension=EXT, src_index=index, dst_index=index, max_index=BACKUP_COUNT, ) ) # If file is not a backup file except (IndexError, FileNotFoundError): pass # If everything went fine, print out information if PRINT_INFO: print(INFO_TEXT.format(CWD, CBD))
def save_obj(obj, name): """ This function save an object as a pickle. :param obj: object to save :param name: name of the pickle file. :return: - """ # if any directory on path doesn't exist - create it os_makedirs(os_path_dirname(name), exist_ok=True) with open(name + '.pkl', 'wb') as f: pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def monitorsPath(self, path): if path == self.path: return True if self.watch_type == WATCH_FILE: return False elif self.watch_type == WATCH_DIR: if os_path_dirname(path) == self.path: return True elif len(path) > len( self.path): # and self.watch_type == WATCH_DIR_RECURSIVE if path[:len(self.path)] == self.path: return True return False
def tmp_file_path(path: str, suffix: str) -> str: """ Get path to a temporary file Parameters ---------- path suffix Returns ------- Path to some otherwise unused file. """ in_dir = os_path_dirname(path) fname = f"{os_path_basename(path)}." tf = tempfile.NamedTemporaryFile(dir=in_dir, prefix=fname, suffix=suffix, delete=False) fname = tf.name tf.close() return fname
(('div',), {'class': 'nm_supported__wrapper'}), ] # <div class=miniapp socialbox id=@STYLE> MEDIA_LIST = [] def decompose_spec(article_dec): decompose_listed_subtrees_and_mark_media_descendants(article_dec, DECOMP, MEDIA_LIST) # 'mindeközben' titles if article_dec.find('div', class_='mindenkozben_post_content content'): article_dec.find('h3', class_='title').decompose() return article_dec BLACKLIST_SPEC = [url.strip() for url in open(os_path_join(os_path_dirname(os_path_abspath(__file__)), 'index_koronavirus_BLACKLIST.txt')).readlines()] + \ [url.strip() for url in open(os_path_join(os_path_dirname(os_path_abspath(__file__)), 'index_BLACKLIST.txt')).readlines()] LINK_FILTER_SUBSTRINGS_SPEC = re.compile('|'.join(['LINK_FILTER_DUMMY_STRING'])) MULTIPAGE_URL_END = re.compile(r'^\b$') # (r'.*/\?p=.*') # https://index.hu/belfold/2020/02/29/eloben_kozvetitjuk_az_eddigi_legnagyobb_magyar_lottonyeremeny_kihuzasa/?p=1 def next_page_of_article_spec(curr_html): """ bs = BeautifulSoup(curr_html, 'lxml') pages = bs.find('div', class_='pagination clearfix') if pages is not None: for p in pages.find_all('a', class_='next'): if 'rel' not in p.attrs.keys():
def m66(current_skyline_app, parent_pid, timeseries, algorithm_parameters): """ A time series data points are anomalous if the 6th median is 6 standard deviations (six-sigma) from the time series 6th median standard deviation and persists for x_windows, where `x_windows = int(window / 2)`. This algorithm finds SIGNIFICANT cahngepoints in a time series, similar to PELT and Bayesian Online Changepoint Detection, however it is more robust to instaneous outliers and more conditionally selective of changepoints. :param current_skyline_app: the Skyline app executing the algorithm. This will be passed to the algorithm by Skyline. This is **required** for error handling and logging. You do not have to worry about handling the argument in the scope of the custom algorithm itself, but the algorithm must accept it as the first agrument. :param parent_pid: the parent pid which is executing the algorithm, this is **required** for error handling and logging. You do not have to worry about handling this argument in the scope of algorithm, but the algorithm must accept it as the second argument. :param timeseries: the time series as a list e.g. ``[[1578916800.0, 29.0], [1578920400.0, 55.0], ... [1580353200.0, 55.0]]`` :param algorithm_parameters: a dictionary of any required parameters for the custom_algorithm and algorithm itself for example: ``algorithm_parameters={ 'nth_median': 6, 'sigma': 6, 'window': 5, 'return_anomalies' = True, }`` :type current_skyline_app: str :type parent_pid: int :type timeseries: list :type algorithm_parameters: dict :return: True, False or Non :rtype: boolean Example CUSTOM_ALGORITHMS configuration: 'm66': { 'namespaces': [ 'skyline.analyzer.run_time', 'skyline.analyzer.total_metrics', 'skyline.analyzer.exceptions' ], 'algorithm_source': '/opt/skyline/github/skyline/skyline/custom_algorithms/m66.py', 'algorithm_parameters': { 'nth_median': 6, 'sigma': 6, 'window': 5, 'resolution': 60, 'minimum_sparsity': 0, 'determine_duration': False, 'return_anomalies': True, 'save_plots_to': False, 'save_plots_to_absolute_dir': False, 'filename_prefix': False }, 'max_execution_time': 1.0 'consensus': 1, 'algorithms_allowed_in_consensus': ['m66'], 'run_3sigma_algorithms': False, 'run_before_3sigma': False, 'run_only_if_consensus': False, 'use_with': ['crucible', 'luminosity'], 'debug_logging': False, }, """ # You MUST define the algorithm_name algorithm_name = 'm66' # Define the default state of None and None, anomalous does not default to # False as that is not correct, False is only correct if the algorithm # determines the data point is not anomalous. The same is true for the # anomalyScore. anomalous = None anomalyScore = None return_anomalies = False anomalies = [] anomalies_dict = {} anomalies_dict['algorithm'] = algorithm_name realtime_analysis = False current_logger = None dev_null = None # If you wanted to log, you can but this should only be done during # testing and development def get_log(current_skyline_app): current_skyline_app_logger = current_skyline_app + 'Log' current_logger = logging.getLogger(current_skyline_app_logger) return current_logger start = timer() # Use the algorithm_parameters to determine the sample_period debug_logging = None try: debug_logging = algorithm_parameters['debug_logging'] except: debug_logging = False if debug_logging: try: current_logger = get_log(current_skyline_app) current_logger.debug( 'debug :: %s :: debug_logging enabled with algorithm_parameters - %s' % (algorithm_name, str(algorithm_parameters))) except Exception as e: # This except pattern MUST be used in ALL custom algortihms to # facilitate the traceback from any errors. The algorithm we want to # run super fast and without spamming the log with lots of errors. # But we do not want the function returning and not reporting # anything to the log, so the pythonic except is used to "sample" any # algorithm errors to a tmp file and report once per run rather than # spewing tons of errors into the log e.g. analyzer.log dev_null = e record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) # Return None and None as the algorithm could not determine True or False del dev_null if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) # Allow the m66 parameters to be passed in the algorithm_parameters window = 6 try: window = algorithm_parameters['window'] except KeyError: window = 6 except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e nth_median = 6 try: nth_median = algorithm_parameters['nth_median'] except KeyError: nth_median = 6 except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e n_sigma = 6 try: n_sigma = algorithm_parameters['sigma'] except KeyError: n_sigma = 6 except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e resolution = 0 try: resolution = algorithm_parameters['resolution'] except KeyError: resolution = 0 except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e determine_duration = False try: determine_duration = algorithm_parameters['determine_duration'] except KeyError: determine_duration = False except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e minimum_sparsity = 0 try: minimum_sparsity = algorithm_parameters['minimum_sparsity'] except KeyError: minimum_sparsity = 0 except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e shift_to_start_of_window = True try: shift_to_start_of_window = algorithm_parameters[ 'shift_to_start_of_window'] except KeyError: shift_to_start_of_window = True except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e save_plots_to = False try: save_plots_to = algorithm_parameters['save_plots_to'] except KeyError: save_plots_to = False except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e save_plots_to_absolute_dir = False try: save_plots_to_absolute_dir = algorithm_parameters[ 'save_plots_to_absolute_dir'] except KeyError: save_plots_to_absolute_dir = False except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e filename_prefix = False try: filename_prefix = algorithm_parameters['filename_prefix'] except KeyError: filename_prefix = False except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e if debug_logging: current_logger.debug('debug :: algorithm_parameters :: %s' % (str(algorithm_parameters))) return_anomalies = False try: return_anomalies = algorithm_parameters['return_anomalies'] except KeyError: return_anomalies = False except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e try: realtime_analysis = algorithm_parameters['realtime_analysis'] except KeyError: realtime_analysis = False except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e save_plots_to = False try: save_plots_to = algorithm_parameters['save_plots_to'] except KeyError: save_plots_to = False except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e save_plots_to_absolute_dir = False try: save_plots_to_absolute_dir = algorithm_parameters[ 'save_plots_to_absolute_dir'] except KeyError: save_plots_to = False except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e filename_prefix = False try: filename_prefix = algorithm_parameters['filename_prefix'] except KeyError: filename_prefix = False except Exception as e: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) dev_null = e try: base_name = algorithm_parameters['base_name'] except Exception as e: # This except pattern MUST be used in ALL custom algortihms to # facilitate the traceback from any errors. The algorithm we want to # run super fast and without spamming the log with lots of errors. # But we do not want the function returning and not reporting # anything to the log, so the pythonic except is used to "sample" any # algorithm errors to a tmp file and report once per run rather than # spewing tons of errors into the log e.g. analyzer.log record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) # Return None and None as the algorithm could not determine True or False dev_null = e del dev_null if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (False, None, anomalies) return (False, None) if debug_logging: current_logger.debug('debug :: %s :: base_name - %s' % (algorithm_name, str(base_name))) anomalies_dict['metric'] = base_name anomalies_dict['anomalies'] = {} use_bottleneck = True if save_plots_to: use_bottleneck = False if use_bottleneck: import bottleneck as bn # ALWAYS WRAP YOUR ALGORITHM IN try and the BELOW except try: start_preprocessing = timer() # INFO: Sorting time series of 10079 data points took 0.002215 seconds timeseries = sorted(timeseries, key=lambda x: x[0]) if debug_logging: current_logger.debug('debug :: %s :: time series of length - %s' % (algorithm_name, str(len(timeseries)))) # Testing the data to ensure it meets minimum requirements, in the case # of Skyline's use of the m66 algorithm this means that: # - the time series must have at least 75% of its full_duration do_not_use_sparse_data = False if current_skyline_app == 'luminosity': do_not_use_sparse_data = True if minimum_sparsity == 0: do_not_use_sparse_data = False total_period = 0 total_datapoints = 0 calculate_variables = False if do_not_use_sparse_data: calculate_variables = True if determine_duration: calculate_variables = True if calculate_variables: try: start_timestamp = int(timeseries[0][0]) end_timestamp = int(timeseries[-1][0]) total_period = end_timestamp - start_timestamp total_datapoints = len(timeseries) except SystemExit as e: if debug_logging: current_logger.debug( 'debug_logging :: %s :: SystemExit called, exiting - %s' % (algorithm_name, e)) if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) except: traceback_msg = traceback.format_exc() record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg) if debug_logging: current_logger.error(traceback_msg) current_logger.error( 'error :: debug_logging :: %s :: failed to determine total_period and total_datapoints' % (algorithm_name)) timeseries = [] if not timeseries: if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) if current_skyline_app == 'analyzer': # Default for analyzer at required period to 18 hours period_required = int(FULL_DURATION * 0.75) else: # Determine from timeseries if total_period < FULL_DURATION: period_required = int(FULL_DURATION * 0.75) else: period_required = int(total_period * 0.75) if determine_duration: period_required = int(total_period * 0.75) if do_not_use_sparse_data: # If the time series does not have 75% of its full_duration it does # not have sufficient data to sample try: if total_period < period_required: if debug_logging: current_logger.debug( 'debug :: %s :: time series does not have sufficient data' % (algorithm_name)) if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) except SystemExit as e: if debug_logging: current_logger.debug( 'debug_logging :: %s :: SystemExit called, exiting - %s' % (algorithm_name, e)) if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) except: traceback_msg = traceback.format_exc() record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg) if debug_logging: current_logger.error(traceback_msg) current_logger.error( 'error :: debug_logging :: %s :: falied to determine if time series has sufficient data' % (algorithm_name)) if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) # If the time series does not have 75% of its full_duration # datapoints it does not have sufficient data to sample # Determine resolution from the last 30 data points # INFO took 0.002060 seconds if not resolution: resolution_timestamps = [] metric_resolution = False for metric_datapoint in timeseries[-30:]: timestamp = int(metric_datapoint[0]) resolution_timestamps.append(timestamp) timestamp_resolutions = [] if resolution_timestamps: last_timestamp = None for timestamp in resolution_timestamps: if last_timestamp: resolution = timestamp - last_timestamp timestamp_resolutions.append(resolution) last_timestamp = timestamp else: last_timestamp = timestamp try: del resolution_timestamps except: pass if timestamp_resolutions: try: timestamp_resolutions_count = Counter( timestamp_resolutions) ordered_timestamp_resolutions_count = timestamp_resolutions_count.most_common( ) metric_resolution = int( ordered_timestamp_resolutions_count[0][0]) except SystemExit as e: if debug_logging: current_logger.debug( 'debug_logging :: %s :: SystemExit called, exiting - %s' % (algorithm_name, e)) if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) except: traceback_msg = traceback.format_exc() record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg) if debug_logging: current_logger.error(traceback_msg) current_logger.error( 'error :: debug_logging :: %s :: failed to determine if time series has sufficient data' % (algorithm_name)) try: del timestamp_resolutions except: pass else: metric_resolution = resolution minimum_datapoints = None if metric_resolution: minimum_datapoints = int(period_required / metric_resolution) if minimum_datapoints: if total_datapoints < minimum_datapoints: if debug_logging: current_logger.debug( 'debug :: %s :: time series does not have sufficient data, minimum_datapoints required is %s and time series has %s' % (algorithm_name, str(minimum_datapoints), str(total_datapoints))) if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) # Is the time series fully populated? # full_duration_datapoints = int(full_duration / metric_resolution) total_period_datapoints = int(total_period / metric_resolution) # minimum_percentage_sparsity = 95 minimum_percentage_sparsity = 90 sparsity = int(total_datapoints / (total_period_datapoints / 100)) if sparsity < minimum_percentage_sparsity: if debug_logging: current_logger.debug( 'debug :: %s :: time series does not have sufficient data, minimum_percentage_sparsity required is %s and time series has %s' % (algorithm_name, str(minimum_percentage_sparsity), str(sparsity))) if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) if len(set(item[1] for item in timeseries)) == 1: if debug_logging: current_logger.debug( 'debug :: %s :: time series does not have sufficient variability, all the values are the same' % algorithm_name) anomalous = False anomalyScore = 0.0 if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) end_preprocessing = timer() preprocessing_runtime = end_preprocessing - start_preprocessing if debug_logging: current_logger.debug( 'debug :: %s :: preprocessing took %.6f seconds' % (algorithm_name, preprocessing_runtime)) if not timeseries: if debug_logging: current_logger.debug('debug :: %s :: m66 not run as no data' % (algorithm_name)) anomalies = [] if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) if debug_logging: current_logger.debug('debug :: %s :: timeseries length: %s' % (algorithm_name, str(len(timeseries)))) anomalies_dict['timestamp'] = int(timeseries[-1][0]) anomalies_dict['from_timestamp'] = int(timeseries[0][0]) start_analysis = timer() try: # bottleneck is used because it is much faster # pd dataframe method (1445 data point - 24hrs): took 0.077915 seconds # bottleneck method (1445 data point - 24hrs): took 0.005692 seconds # numpy and pandas rolling # 2021-07-30 12:37:31 :: 2827897 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 136.93 seconds # 2021-07-30 12:44:53 :: 2855884 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 148.82 seconds # 2021-07-30 12:48:41 :: 2870822 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 145.62 seconds # 2021-07-30 12:55:00 :: 2893634 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 139.00 seconds # 2021-07-30 12:59:31 :: 2910443 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 144.80 seconds # 2021-07-30 13:02:31 :: 2922928 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 143.35 seconds # 2021-07-30 14:12:56 :: 3132457 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 129.25 seconds # 2021-07-30 14:22:35 :: 3164370 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 125.72 seconds # 2021-07-30 14:28:24 :: 3179687 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 222.43 seconds # 2021-07-30 14:33:45 :: 3179687 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 244.00 seconds # 2021-07-30 14:36:27 :: 3214047 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 141.10 seconds # numpy and bottleneck # 2021-07-30 16:41:52 :: 3585162 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 73.92 seconds # 2021-07-30 16:46:46 :: 3585162 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 68.84 seconds # 2021-07-30 16:51:48 :: 3585162 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 70.55 seconds # numpy and bottleneck (passing resolution and not calculating in m66) # 2021-07-30 16:57:46 :: 3643253 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 65.59 seconds if use_bottleneck: if len(timeseries) < 10: if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) x_np = np.asarray([x[1] for x in timeseries]) # Fast Min-Max scaling data = (x_np - x_np.min()) / (x_np.max() - x_np.min()) # m66 - calculate to nth_median median_count = 0 while median_count < nth_median: median_count += 1 rolling_median_s = bn.move_median(data, window=window) median = rolling_median_s.tolist() data = median if median_count == nth_median: break # m66 - calculate the moving standard deviation for the # nth_median array rolling_std_s = bn.move_std(data, window=window) std_nth_median_array = np.nan_to_num(rolling_std_s, copy=False, nan=0.0, posinf=None, neginf=None) std_nth_median = std_nth_median_array.tolist() if debug_logging: current_logger.debug( 'debug :: %s :: std_nth_median calculated with bn' % (algorithm_name)) else: df = pd.DataFrame(timeseries, columns=['date', 'value']) df['date'] = pd.to_datetime(df['date'], unit='s') datetime_index = pd.DatetimeIndex(df['date'].values) df = df.set_index(datetime_index) df.drop('date', axis=1, inplace=True) original_df = df.copy() # MinMax scale df = (df - df.min()) / (df.max() - df.min()) # window = 6 data = df['value'].tolist() if len(data) < 10: if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) # m66 - calculate to nth_median median_count = 0 while median_count < nth_median: median_count += 1 s = pd.Series(data) rolling_median_s = s.rolling(window).median() median = rolling_median_s.tolist() data = median if median_count == nth_median: break # m66 - calculate the moving standard deviation for the # nth_median array s = pd.Series(data) rolling_std_s = s.rolling(window).std() nth_median_column = 'std_nth_median_%s' % str(nth_median) df[nth_median_column] = rolling_std_s.tolist() std_nth_median = df[nth_median_column].fillna(0).tolist() # m66 - calculate the standard deviation for the entire nth_median # array metric_stddev = np.std(std_nth_median) std_nth_median_n_sigma = [] anomalies_found = False for value in std_nth_median: # m66 - if the value in the 6th median array is > six-sigma of # the metric_stddev the datapoint is anomalous if value > (metric_stddev * n_sigma): std_nth_median_n_sigma.append(1) anomalies_found = True else: std_nth_median_n_sigma.append(0) std_nth_median_n_sigma_column = 'std_median_%s_%s_sigma' % ( str(nth_median), str(n_sigma)) if not use_bottleneck: df[std_nth_median_n_sigma_column] = std_nth_median_n_sigma anomalies = [] # m66 - only label anomalous if the n_sigma triggers are persisted # for (window / 2) if anomalies_found: current_triggers = [] for index, item in enumerate(timeseries): if std_nth_median_n_sigma[index] == 1: current_triggers.append(index) else: if len(current_triggers) > int(window / 2): for trigger_index in current_triggers: # Shift the anomaly back to the beginning of the # window if shift_to_start_of_window: anomalies.append( timeseries[(trigger_index - (window * int( (nth_median / 2))))]) else: anomalies.append(timeseries[trigger_index]) current_triggers = [] # Process any remaining current_triggers if len(current_triggers) > int(window / 2): for trigger_index in current_triggers: # Shift the anomaly back to the beginning of the # window if shift_to_start_of_window: anomalies.append( timeseries[(trigger_index - (window * int( (nth_median / 2))))]) else: anomalies.append(timeseries[trigger_index]) if not anomalies: anomalous = False if anomalies: anomalous = True anomalies_data = [] anomaly_timestamps = [int(item[0]) for item in anomalies] for item in timeseries: if int(item[0]) in anomaly_timestamps: anomalies_data.append(1) else: anomalies_data.append(0) if not use_bottleneck: df['anomalies'] = anomalies_data anomalies_list = [] for ts, value in timeseries: if int(ts) in anomaly_timestamps: anomalies_list.append([int(ts), value]) anomalies_dict['anomalies'][int(ts)] = value if anomalies and save_plots_to: try: from adtk.visualization import plot metric_dir = base_name.replace('.', '/') timestamp_dir = str(int(timeseries[-1][0])) save_path = '%s/%s/%s/%s' % (save_plots_to, algorithm_name, metric_dir, timestamp_dir) if save_plots_to_absolute_dir: save_path = '%s' % save_plots_to anomalies_dict['file_path'] = save_path save_to_file = '%s/%s.%s.png' % (save_path, algorithm_name, base_name) if filename_prefix: save_to_file = '%s/%s.%s.%s.png' % ( save_path, filename_prefix, algorithm_name, base_name) save_to_path = os_path_dirname(save_to_file) title = '%s\n%s - median %s %s-sigma persisted (window=%s)' % ( base_name, algorithm_name, str(nth_median), str(n_sigma), str(window)) if not os_path_exists(save_to_path): try: mkdir_p(save_to_path) except Exception as e: current_logger.error( 'error :: %s :: failed to create dir - %s - %s' % (algorithm_name, save_to_path, e)) if os_path_exists(save_to_path): try: plot(original_df['value'], anomaly=df['anomalies'], anomaly_color='red', title=title, save_to_file=save_to_file) if debug_logging: current_logger.debug( 'debug :: %s :: plot saved to - %s' % (algorithm_name, save_to_file)) anomalies_dict['image'] = save_to_file except Exception as e: current_logger.error( 'error :: %s :: failed to plot - %s - %s' % (algorithm_name, base_name, e)) anomalies_file = '%s/%s.%s.anomalies_list.txt' % ( save_path, algorithm_name, base_name) with open(anomalies_file, 'w') as fh: fh.write(str(anomalies_list)) # os.chmod(anomalies_file, mode=0o644) data_file = '%s/data.txt' % (save_path) with open(data_file, 'w') as fh: fh.write(str(anomalies_dict)) except SystemExit as e: if debug_logging: current_logger.debug( 'debug_logging :: %s :: SystemExit called during save plot, exiting - %s' % (algorithm_name, e)) if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) except Exception as e: traceback_msg = traceback.format_exc() record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg) if debug_logging: current_logger.error(traceback_msg) current_logger.error( 'error :: %s :: failed to plot or save anomalies file - %s - %s' % (algorithm_name, base_name, e)) try: del df except: pass except SystemExit as e: if debug_logging: current_logger.debug( 'debug_logging :: %s :: SystemExit called, during analysis, exiting - %s' % (algorithm_name, e)) if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) except: traceback_msg = traceback.format_exc() record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg) if debug_logging: current_logger.error(traceback_msg) current_logger.error( 'error :: debug_logging :: %s :: failed to run on ts' % (algorithm_name)) if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) end_analysis = timer() analysis_runtime = end_analysis - start_analysis if debug_logging: current_logger.debug( 'debug :: analysis with %s took %.6f seconds' % (algorithm_name, analysis_runtime)) if anomalous: anomalyScore = 1.0 else: anomalyScore = 0.0 if debug_logging: current_logger.info( '%s :: anomalous - %s, anomalyScore - %s' % (algorithm_name, str(anomalous), str(anomalyScore))) if debug_logging: end = timer() processing_runtime = end - start current_logger.info('%s :: completed in %.6f seconds' % (algorithm_name, processing_runtime)) try: del timeseries except: pass if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) except SystemExit as e: if debug_logging: current_logger.debug( 'debug_logging :: %s :: SystemExit called (before StopIteration), exiting - %s' % (algorithm_name, e)) if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore) except StopIteration: # This except pattern MUST be used in ALL custom algortihms to # facilitate the traceback from any errors. The algorithm we want to # run super fast and without spamming the log with lots of errors. # But we do not want the function returning and not reporting # anything to the log, so the pythonic except is used to "sample" any # algorithm errors to a tmp file and report once per run rather than # spewing tons of errors into the log e.g. analyzer.log if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (False, None, anomalies) return (False, None) except: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) # Return None and None as the algorithm could not determine True or False if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (False, None, anomalies) return (False, None) if current_skyline_app == 'webapp': return (anomalous, anomalyScore, anomalies, anomalies_dict) if return_anomalies: return (anomalous, anomalyScore, anomalies) return (anomalous, anomalyScore)
from unittest import TestCase, main as unittest_main from os import pardir as os_pardir from os.path import ( join as os_path_join, dirname as os_path_dirname, abspath as os_path_abspath) from sys import path as sys_path sys_path.append(os_path_join( os_path_dirname(os_path_abspath(__file__)), os_pardir)) from vorbis.helper_funcs import ( ilog, float32_unpack, lookup1_values, bit_reverse) from .test_decoders import hex_str_to_bin_str # noinspection PyMethodMayBeStatic class HelperFunctionsTests(TestCase): def test_ilog_zero(self): self.assertEqual(ilog(0), 0) def test_ilog_positive_number(self): self.assertEqual(ilog(1), 1) self.assertEqual(ilog(7), 3) def test_ilog_negative_number(self): self.assertEqual(ilog(-1111), 0) self.assertEqual(ilog(-112312), 0) def test_lookup1_values(self):
Requirements ------------ numpy MOSEK or CVXOPT scipy(optional): for complete sparse matrix support sympy(optional): for latex printing in iPython Notebook Attributes ---------- settings : dict Contains settings loaded from ``./env/settings`` """ from os import sep as os_sep from os.path import dirname as os_path_dirname SETTINGS_PATH = os_sep.join([os_path_dirname(__file__), "env", "settings"]) __version__ = "0.4.0" UNIT_REGISTRY = None SIGNOMIALS_ENABLED = False # global variable initializations DimensionalityError = ValueError units = None def enable_units(path=None): """Enables units support in a particular instance of GPkit. Posynomials created after calling this are incompatible with those created before.
scipy(optional): for complete sparse matrix support sympy(optional): for latex printing in iPython Notebook Attributes ---------- settings : dict Contains settings loaded from ``./env/settings`` """ __version__ = "0.3.4" UNIT_REGISTRY = None SIGNOMIALS_ENABLED = False from os import sep as os_sep from os.path import dirname as os_path_dirname UNITDEF_PATH = os_sep.join([os_path_dirname(__file__), "gpkit_units.txt"]) SETTINGS_PATH = os_sep.join([os_path_dirname(__file__), "env", "settings"]) def enable_units(path=UNITDEF_PATH): """Enables units support in a particular instance of GPkit. Posynomials created after calling this are incompatible with those created before. If gpkit is imported multiple times, this needs to be run each time.""" global units, DimensionalityError, UNIT_REGISTRY try: import pint if UNIT_REGISTRY is None: UNIT_REGISTRY = pint.UnitRegistry(path)
else: return getattr(self, fallback)(other) return newfn for op in "eq ge le add mul div truediv floordiv".split(): dunder = "__%s__" % op trunder = "___%s___" % op original = getattr(units.Quantity, dunder) setattr(units.Quantity, trunder, original) newfn = skip_if_gpkit_objects(fallback=trunder) setattr(units.Quantity, dunder, newfn) # Load settings from os import sep as os_sep from os.path import dirname as os_path_dirname settings_path = os_sep.join([os_path_dirname(__file__), "env", "settings"]) try: with open(settings_path) as settingsfile: lines = [line[:-1].split(" : ") for line in settingsfile if len(line.split(" : ")) == 2] settings = {name: value.split(", ") for name, value in lines} for name, value in settings.items(): # hack to flatten 1-element lists, unlesss they're the solver list if len(value) == 1 and name != "installed_solvers": settings[name] = value[0] try: del lines del line except NameError: pass except IOError:
from typing import List, TextIO, Tuple from unittest import TestCase, main as unittest_main from os import (pardir as os_pardir, mkdir as os_mkdir, remove as os_remove) from os.path import (join as os_path_join, dirname as os_path_dirname, abspath as os_path_abspath, exists as os_path_exists) from sys import path as sys_path from shutil import rmtree as shutil_rmtree sys_path.append( os_path_join(os_path_dirname(os_path_abspath(__file__)), os_pardir)) from cvs.changes_codec import (ChangesCodec, ProgramDataFormatError, CodecException) class GetLastCommitNumber(TestCase): _changes_codec: ChangesCodec = ChangesCodec() def setUp(self) -> None: os_mkdir(self._changes_codec._CVS_DIR_PATH) os_mkdir(self._changes_codec._COMMITTED_PATH) def tearDown(self) -> None: if os_path_exists(self._changes_codec._CVS_DIR_PATH): shutil_rmtree(self._changes_codec._CVS_DIR_PATH) def test_no_commits(self): self.assertEqual(self._changes_codec._get_last_commit_number(), 0) def test_only_commit_files(self): self._create_files_in_committed_dir(['3', '5', '7'])
def run_launcher_logic(): parser = ArgumentParser( description='Videogame-platformer where squares fight for the win!') parser.add_argument('--version', help="print program's current version number and exit", action='version', version=get_current_version()) parser.add_argument('-d', '--debug', help="turn on debug info printing", action='store_true') arguments: Namespace = parser.parse_args() # Improvement: make GUI version of launcher. For now launcher just loads # some map map_name: str = input( 'Enter map name. Raw maps names have format: "raw <name>". Non raw ' 'maps will be loaded from "maps" folder.\n') game_map: Optional[GameMap] = None if map_name.startswith('raw '): try: game_map = getattr(RawMapsContainer, 'get_map_' + map_name.split(' ')[1])() except AttributeError: exit_with_exception( 'Wrong raw map name', LauncherException('Wrong raw map name: ' + map_name), arguments.debug) else: try: map_path: str = os_path_join( os_path_dirname(os_path_abspath(__file__)), os_pardir, 'maps', map_name) with open(map_path, 'rb') as map_file_handle: game_map = pickle_load(map_file_handle) except OSError as occurred_err: exit_with_exception('Cannot open file: ' + map_name, LauncherException(*occurred_err.args), arguments.debug) if game_map is None: exit_with_exception("Something broke inside game", ApplicationException('[game_map] is [None]'), arguments.debug) game_engine: GameEngine = GameEngine(game_map) gui: GameGUI = GameGUI() try: gui.init(game_map, game_engine.get_event_listeners()) def game_loop(game_engine_: GameEngine, gui_: GameGUI): def time_alignment(): """Time alignment for CPU power saving Игра работает в режиме 60 итераций игрового цикла (обновление И рендер уровня в одной итерации) в секунду. По сути, секунда разбита на 60 частей. Выравнивание происходит таким образом, что в начале каждой 1\60 части секунды должна начинаться КАЖДАЯ итерация игрового цикла. НЕТ гарантии, что при таком подходе не будет потеряна одна из 1\60-ой частей секунды Таким образом, каждое обновление уровня происходит с рассчетом ТОЛЬКО на текущую 1/60 часть секунды. Это позволяет избавиться от дробных величин при модификации позиции движущихся объектов. """ # All time below in milliseconds # # one_iteration_time = 1000 / 60 = 16.666666666666668 # millis_in_current_second = ( # current_time_in_seconds() * 1000 % 1000) time_sleep((16.666666666666668 - ((current_time_in_seconds() * 1000 % 1000) % 16.666666666666668)) / 1000) # Game loop locates in a daemon thread so it will proceed until # user interface thread is closed while True: game_engine_.update_map() gui_.render() time_alignment() Thread(target=game_loop, args=(game_engine, gui), daemon=True).start() # Right here several renderings CANNOT be lost gui.run_gui_loop() except ApplicationException as occurred_exc: # Improvement: Different messages for user. Switch only message! exit_with_exception("Some exception occurred", occurred_exc, arguments.debug)
def adtk_level_shift(current_skyline_app, parent_pid, timeseries, algorithm_parameters): """ A timeseries is anomalous if a level shift occurs in a 5 window period bound by a factor of 9 of the normal range based on historical interquartile range. :param current_skyline_app: the Skyline app executing the algorithm. This will be passed to the algorithm by Skyline. This is **required** for error handling and logging. You do not have to worry about handling the argument in the scope of the custom algorithm itself, but the algorithm must accept it as the first agrument. :param parent_pid: the parent pid which is executing the algorithm, this is **required** for error handling and logging. You do not have to worry about handling this argument in the scope of algorithm, but the algorithm must accept it as the second argument. :param timeseries: the time series as a list e.g. ``[[1578916800.0, 29.0], [1578920400.0, 55.0], ... [1580353200.0, 55.0]]`` :param algorithm_parameters: a dictionary of any required parameters for the custom_algorithm and algorithm itself. For the matrixprofile custom algorithm the following parameters are required, example: ``algorithm_parameters={ 'c': 9.0, 'run_every': 5, 'side': 'both', 'window': 5 }`` :type current_skyline_app: str :type parent_pid: int :type timeseries: list :type algorithm_parameters: dict :return: True, False or Non :rtype: boolean Performance is of paramount importance in Skyline, especially in terms of computational complexity, along with execution time and CPU usage. The adtk LevelShiftAD algortihm is not O(n) and it is not fast either, not when compared to the normal three-sigma triggered algorithms. However it is useful if you care about detecting all level shifts. The normal three-sigma triggered algorithms do not always detect a level shift, especially if the level shift does not breach the three-sigma limits. Therefore you may find over time that you encounter alerts that contain level shifts that you thought should have been detected. On these types of metrics and events, the adtk LevelShiftAD algortihm can be implemented to detect and alert on these. It is not recommended to run on all your metrics as it would immediately triple the analyzer runtime every if only run every 5 windows/ minutes. Due to the computational complexity and long run time of the adtk LevelShiftAD algorithm on the size of timeseries data used by Skyline, if you consider the following timings of all three-sigma triggered algorithms and compare them to the to the adtk_level_shift results in the last 2 rows of the below log, it is clear that the running adtk_level_shift on all metrics is probably not desirable, even if it is possible to do, it is very noisy. 2021-03-06 10:46:38 :: 1582754 :: algorithm run count - histogram_bins run 567 times 2021-03-06 10:46:38 :: 1582754 :: algorithm timings count - histogram_bins has 567 timings 2021-03-06 10:46:38 :: 1582754 :: algorithm timing - histogram_bins - total: 1.051136 - median: 0.001430 2021-03-06 10:46:38 :: 1582754 :: algorithm run count - first_hour_average run 567 times 2021-03-06 10:46:38 :: 1582754 :: algorithm timings count - first_hour_average has 567 timings 2021-03-06 10:46:38 :: 1582754 :: algorithm timing - first_hour_average - total: 1.322432 - median: 0.001835 2021-03-06 10:46:38 :: 1582754 :: algorithm run count - stddev_from_average run 567 times 2021-03-06 10:46:38 :: 1582754 :: algorithm timings count - stddev_from_average has 567 timings 2021-03-06 10:46:38 :: 1582754 :: algorithm timing - stddev_from_average - total: 1.097290 - median: 0.001641 2021-03-06 10:46:38 :: 1582754 :: algorithm run count - grubbs run 567 times 2021-03-06 10:46:38 :: 1582754 :: algorithm timings count - grubbs has 567 timings 2021-03-06 10:46:38 :: 1582754 :: algorithm timing - grubbs - total: 1.742929 - median: 0.002438 2021-03-06 10:46:38 :: 1582754 :: algorithm run count - ks_test run 147 times 2021-03-06 10:46:38 :: 1582754 :: algorithm timings count - ks_test has 147 timings 2021-03-06 10:46:38 :: 1582754 :: algorithm timing - ks_test - total: 0.127648 - median: 0.000529 2021-03-06 10:46:38 :: 1582754 :: algorithm run count - mean_subtraction_cumulation run 40 times 2021-03-06 10:46:38 :: 1582754 :: algorithm timings count - mean_subtraction_cumulation has 40 timings 2021-03-06 10:46:38 :: 1582754 :: algorithm timing - mean_subtraction_cumulation - total: 0.152515 - median: 0.003152 2021-03-06 10:46:39 :: 1582754 :: algorithm run count - median_absolute_deviation run 35 times 2021-03-06 10:46:39 :: 1582754 :: algorithm timings count - median_absolute_deviation has 35 timings 2021-03-06 10:46:39 :: 1582754 :: algorithm timing - median_absolute_deviation - total: 0.143770 - median: 0.003248 2021-03-06 10:46:39 :: 1582754 :: algorithm run count - stddev_from_moving_average run 30 times 2021-03-06 10:46:39 :: 1582754 :: algorithm timings count - stddev_from_moving_average has 30 timings 2021-03-06 10:46:39 :: 1582754 :: algorithm timing - stddev_from_moving_average - total: 0.125173 - median: 0.003092 2021-03-06 10:46:39 :: 1582754 :: algorithm run count - least_squares run 16 times 2021-03-06 10:46:39 :: 1582754 :: algorithm timings count - least_squares has 16 timings 2021-03-06 10:46:39 :: 1582754 :: algorithm timing - least_squares - total: 0.089108 - median: 0.005538 2021-03-06 10:46:39 :: 1582754 :: algorithm run count - abs_stddev_from_median run 1 times 2021-03-06 10:46:39 :: 1582754 :: algorithm timings count - abs_stddev_from_median has 1 timings 2021-03-06 10:46:39 :: 1582754 :: algorithm timing - abs_stddev_from_median - total: 0.036797 - median: 0.036797 2021-03-06 10:46:39 :: 1582754 :: algorithm run count - adtk_level_shift run 271 times 2021-03-06 10:46:39 :: 1582754 :: algorithm timings count - adtk_level_shift has 271 timings 2021-03-06 10:46:39 :: 1582754 :: algorithm timing - adtk_level_shift - total: 13.729565 - median: 0.035791 ... ... 2021-03-06 10:46:39 :: 1582754 :: seconds to run :: 27.93 # THE TOTAL ANALYZER RUNTIME Therefore the analysis methodology implemented for the adtk_level_shift custom_algorithm is as folows: - When new metrics are added either to the configuration or by actual new metrics coming online that match the ``algorithm_parameters['namespace']``, Skyline implements sharding on new metrics into time slots to prevent a thundering herd situation from developing. A newly added metrics will eventually be assigned into a time shard and be added and the last analysed timestamp will be added to the ``analyzer.last.adtk_level_shift`` Redis hash key to determine the next scheduled run with ``algorithm_parameters['namespace']`` - A ``run_every`` parameter is implemented so that the algorithm can be configured to run on a metric once every ``run_every`` minutes. The default is to run it every 5 minutes using window 5 (rolling) and trigger as anomalous if the algorithm labels any of the last 5 datapoints as anomalous. This means that there could be up to a 5 minute delay on an alert on the 60 second, 168 SECOND_ORDER_RESOLUTION_HOURS metrics in the example, but a ``c=9.0`` level shift would be detected and would be alerted on (if both analyzer and mirage triggered on it). This periodic running of the algorithm is a tradeoff so that the adtk_level_shift load and runtime can be spread over ``run_every`` minutes. - The algorithm is not run against metrics that are sparsely populated. When the algorithm is run on sparsely populated metrics it results in lots of false positives and noise. The Skyline CUSTOM_ALGORITHMS implementation of the adtk LevelShiftAD algorithm is configured as the example shown below. However please note that the algorithm_parameters shown in this example configuration are suitiable for metrics that have a 60 second relation and have a :mod:`settings.ALERTS` Mirage SECOND_ORDER_RESOLUTION_HOURS of 168 (7 days). For metrics with a different resolution/frequency may require different values appropriate for metric resolution. : Example CUSTOM_ALGORITHMS configuration: 'adtk_level_shift': { 'namespaces': [ 'skyline.analyzer.run_time', 'skyline.analyzer.total_metrics', 'skyline.analyzer.exceptions' ], 'algorithm_source': '/opt/skyline/github/skyline/skyline/custom_algorithms/adtk_level_shift.py', 'algorithm_parameters': {'c': 9.0, 'run_every': 5, 'side': 'both', 'window': 5}, 'max_execution_time': 0.5, 'consensus': 1, 'algorithms_allowed_in_consensus': ['adtk_level_shift'], 'run_3sigma_algorithms': True, 'run_before_3sigma': True, 'run_only_if_consensus': False, 'use_with': ["analyzer", "mirage"], 'debug_logging': False, }, """ # You MUST define the algorithm_name algorithm_name = 'adtk_level_shift' # Define the default state of None and None, anomalous does not default to # False as that is not correct, False is only correct if the algorithm # determines the data point is not anomalous. The same is true for the # anomalyScore. anomalous = None anomalyScore = None # @aded 20210308 - Feature #3978: luminosity - classify_metrics # Feature #3642: Anomaly type classification return_anomalies = False anomalies = [] realtime_analysis = True current_logger = None # If you wanted to log, you can but this should only be done during # testing and development def get_log(current_skyline_app): current_skyline_app_logger = current_skyline_app + 'Log' current_logger = logging.getLogger(current_skyline_app_logger) return current_logger start = timer() # Use the algorithm_parameters to determine the sample_period debug_logging = None try: debug_logging = algorithm_parameters['debug_logging'] except: debug_logging = False if debug_logging: try: current_logger = get_log(current_skyline_app) current_logger.debug('debug :: %s :: debug_logging enabled with algorithm_parameters - %s' % ( algorithm_name, str(algorithm_parameters))) except: # This except pattern MUST be used in ALL custom algortihms to # facilitate the traceback from any errors. The algorithm we want to # run super fast and without spamming the log with lots of errors. # But we do not want the function returning and not reporting # anything to the log, so the pythonic except is used to "sample" any # algorithm errors to a tmp file and report once per run rather than # spewing tons of errors into the log e.g. analyzer.log record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) # Return None and None as the algorithm could not determine True or False return (False, None) # Allow the LevelShiftAD window parameter to be passed in the # algorithm_parameters window = 5 try: window = algorithm_parameters['window'] except: pass # Allow the LevelShiftAD c parameter to be passed in the # algorithm_parameters c = 9.0 try: c = algorithm_parameters['c'] except: pass run_every = window try: run_every = algorithm_parameters['run_every'] except: pass side = 'both' try: side = algorithm_parameters['side'] except: pass if debug_logging: current_logger.debug('debug :: algorithm_parameters :: %s' % ( str(algorithm_parameters))) # @added 20210308 - Feature #3978: luminosity - classify_metrics # Feature #3642: Anomaly type classification try: return_anomalies = algorithm_parameters['return_anomalies'] except: return_anomalies = False try: realtime_analysis = algorithm_parameters['realtime_analysis'] except: realtime_analysis = True # @added 20210316 - Feature #3978: luminosity - classify_metrics # Feature #3642: Anomaly type classification save_plots_to = False try: save_plots_to = algorithm_parameters['save_plots_to'] except: pass # @added 20210323 - Feature #3978: luminosity - classify_metrics # Feature #3642: Anomaly type classification save_plots_to_absolute_dir = False try: save_plots_to_absolute_dir = algorithm_parameters['save_plots_to_absolute_dir'] except: pass filename_prefix = False try: filename_prefix = algorithm_parameters['filename_prefix'] except: pass # @added 20210318 - Feature #3978: luminosity - classify_metrics # Feature #3642: Anomaly type classification run_PersistAD = False try: run_PersistAD = algorithm_parameters['run_PersistAD'] except: pass if debug_logging: current_logger.debug('debug :: algorithm_parameters :: %s' % ( str(algorithm_parameters))) try: base_name = algorithm_parameters['base_name'] except: # This except pattern MUST be used in ALL custom algortihms to # facilitate the traceback from any errors. The algorithm we want to # run super fast and without spamming the log with lots of errors. # But we do not want the function returning and not reporting # anything to the log, so the pythonic except is used to "sample" any # algorithm errors to a tmp file and report once per run rather than # spewing tons of errors into the log e.g. analyzer.log record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) # Return None and None as the algorithm could not determine True or False if return_anomalies: return (False, None, anomalies) else: return (False, None) if debug_logging: current_logger.debug('debug :: %s :: base_name - %s' % ( algorithm_name, str(base_name))) # Due to the load and runtime of LevelShiftAD it is only run in analyzer # periodically if current_skyline_app == 'analyzer': redis_conn_decoded = get_redis_conn_decoded(current_skyline_app) last_hash_key = 'analyzer.last.%s' % algorithm_name last_check = None try: raw_last_check = redis_conn_decoded.hget(last_hash_key, base_name) last_check = int(raw_last_check) except: last_check = None last_window_timestamps = [int(item[0]) for item in timeseries[-run_every:]] if last_check in last_window_timestamps: if debug_logging: current_logger.debug('debug :: %s :: run_every period is not over yet, skipping base_name - %s' % ( algorithm_name, str(base_name))) if return_anomalies: return (False, None, anomalies) else: return (False, None) # If there is no last timestamp, shard the metric, it will eventually # be added. if not last_check: now = datetime.datetime.now() now_seconds = int(now.second) if now_seconds == 0: now_seconds = 1 period_seconds = int(60 / run_every) shard = int(period_seconds) last_shard = 60 shard = int(period_seconds) shards = [shard] while shard < last_shard: shard = shard + period_seconds shards.append((shard)) shard_value = round(now_seconds / shards[0]) * shards[0] if shard_value <= shards[0]: shard_value = shards[0] metric_as_bytes = str(base_name).encode() value = zlib.adler32(metric_as_bytes) shard_index = [(index + 1) for index, s_value in enumerate(shards) if s_value == shard_value][0] modulo_result = value % shard_index if modulo_result == 0: if debug_logging: current_logger.debug('debug :: %s :: skipping as not sharded into this run - %s' % ( algorithm_name, str(base_name))) if return_anomalies: return (False, None, anomalies) else: return (False, None) if debug_logging: current_logger.debug('debug :: %s :: analysing %s' % ( algorithm_name, str(base_name))) try: int_metric_timestamp = int(timeseries[-1][0]) except: int_metric_timestamp = 0 if int_metric_timestamp: try: redis_conn_decoded.hset( last_hash_key, base_name, int_metric_timestamp) except: pass # ALWAYS WRAP YOUR ALGORITHM IN try and the BELOW except try: start_preprocessing = timer() # INFO: Sorting time series of 10079 data points took 0.002215 seconds timeseries = sorted(timeseries, key=lambda x: x[0]) if debug_logging: current_logger.debug('debug :: %s :: time series of length - %s' % ( algorithm_name, str(len(timeseries)))) # Testing the data to ensure it meets minimum requirements, in the case # of Skyline's use of the LevelShiftAD algorithm this means that: # - the time series must have at least 75% of its full_duration # - the time series must have at least 99% of the data points for the # in the sample being analysed. do_not_use_sparse_data = False if current_skyline_app == 'analyzer': do_not_use_sparse_data = True # @added 20210305 - Feature #3970: custom_algorithm - adtk_level_shift # Task #3664:: POC with adtk # With mirage also do not run LevelShiftAD on sparsely populated data if current_skyline_app == 'mirage': do_not_use_sparse_data = True # @aded 20210309 - Feature #3978: luminosity - classify_metrics # Feature #3642: Anomaly type classification if current_skyline_app == 'luminosity': do_not_use_sparse_data = True if do_not_use_sparse_data: total_period = 0 total_datapoints = 0 try: start_timestamp = int(timeseries[0][0]) end_timestamp = int(timeseries[-1][0]) total_period = end_timestamp - start_timestamp total_datapoints = len(timeseries) except SystemExit as e: if debug_logging: current_logger.debug('debug_logging :: %s :: SystemExit called, exiting - %s' % ( algorithm_name, e)) if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) except: traceback_msg = traceback.format_exc() record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg) if debug_logging: current_logger.error(traceback_msg) current_logger.error('error :: debug_logging :: %s :: failed to determine total_period and total_datapoints' % ( algorithm_name)) timeseries = [] if not timeseries: if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) if current_skyline_app == 'analyzer': # Default for analyzer at required period to 18 hours period_required = int(FULL_DURATION * 0.75) else: # Determine from timeseries if total_period < FULL_DURATION: period_required = int(FULL_DURATION * 0.75) else: period_required = int(total_period * 0.75) # If the time series does not have 75% of its full_duration it does not # have sufficient data to sample try: if total_period < period_required: if debug_logging: current_logger.debug('debug :: %s :: time series does not have sufficient data' % ( algorithm_name)) if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) except SystemExit as e: if debug_logging: current_logger.debug('debug_logging :: %s :: SystemExit called, exiting - %s' % ( algorithm_name, e)) if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) except: traceback_msg = traceback.format_exc() record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg) if debug_logging: current_logger.error(traceback_msg) current_logger.error('error :: debug_logging :: %s :: falied to determine if time series has sufficient data' % ( algorithm_name)) if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) # If the time series does not have 75% of its full_duration data points # it does not have sufficient data to sample # Determine resolution from the last 30 data points # INFO took 0.002060 seconds resolution_timestamps = [] metric_resolution = False for metric_datapoint in timeseries[-30:]: timestamp = int(metric_datapoint[0]) resolution_timestamps.append(timestamp) timestamp_resolutions = [] if resolution_timestamps: last_timestamp = None for timestamp in resolution_timestamps: if last_timestamp: resolution = timestamp - last_timestamp timestamp_resolutions.append(resolution) last_timestamp = timestamp else: last_timestamp = timestamp try: del resolution_timestamps except: pass if timestamp_resolutions: try: timestamp_resolutions_count = Counter(timestamp_resolutions) ordered_timestamp_resolutions_count = timestamp_resolutions_count.most_common() metric_resolution = int(ordered_timestamp_resolutions_count[0][0]) except SystemExit as e: if debug_logging: current_logger.debug('debug_logging :: %s :: SystemExit called, exiting - %s' % ( algorithm_name, e)) if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) except: traceback_msg = traceback.format_exc() record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg) if debug_logging: current_logger.error(traceback_msg) current_logger.error('error :: debug_logging :: %s :: failed to determine if time series has sufficient data' % ( algorithm_name)) try: del timestamp_resolutions except: pass minimum_datapoints = None if metric_resolution: minimum_datapoints = int(period_required / metric_resolution) if minimum_datapoints: if total_datapoints < minimum_datapoints: if debug_logging: current_logger.debug('debug :: %s :: time series does not have sufficient data, minimum_datapoints required is %s and time series has %s' % ( algorithm_name, str(minimum_datapoints), str(total_datapoints))) if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) # Is the time series fully populated? # full_duration_datapoints = int(full_duration / metric_resolution) total_period_datapoints = int(total_period / metric_resolution) # minimum_percentage_sparsity = 95 minimum_percentage_sparsity = 90 sparsity = int(total_datapoints / (total_period_datapoints / 100)) if sparsity < minimum_percentage_sparsity: if debug_logging: current_logger.debug('debug :: %s :: time series does not have sufficient data, minimum_percentage_sparsity required is %s and time series has %s' % ( algorithm_name, str(minimum_percentage_sparsity), str(sparsity))) if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) if len(set(item[1] for item in timeseries)) == 1: if debug_logging: current_logger.debug('debug :: %s :: time series does not have sufficient variability, all the values are the same' % algorithm_name) anomalous = False anomalyScore = 0.0 if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) end_preprocessing = timer() preprocessing_runtime = end_preprocessing - start_preprocessing if debug_logging: current_logger.debug('debug :: %s :: preprocessing took %.6f seconds' % ( algorithm_name, preprocessing_runtime)) if not timeseries: if debug_logging: current_logger.debug('debug :: %s :: LevelShiftAD not run as no data' % ( algorithm_name)) anomalies = [] if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) else: if debug_logging: current_logger.debug('debug :: %s :: timeseries length: %s' % ( algorithm_name, str(len(timeseries)))) if len(timeseries) < 100: if debug_logging: current_logger.debug('debug :: %s :: time series does not have sufficient data' % ( algorithm_name)) if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) start_analysis = timer() try: df = pd.DataFrame(timeseries, columns=['date', 'value']) df['date'] = pd.to_datetime(df['date'], unit='s') datetime_index = pd.DatetimeIndex(df['date'].values) df = df.set_index(datetime_index) df.drop('date', axis=1, inplace=True) s = validate_series(df) level_shift_ad = LevelShiftAD(c=c, side=side, window=window) anomaly_df = level_shift_ad.fit_detect(s) anomalies = anomaly_df.loc[anomaly_df['value'] > 0] anomalous = False if len(anomalies) > 0: anomaly_timestamps = list(anomalies.index.astype(np.int64) // 10**9) if realtime_analysis: last_window_timestamps = [int(item[0]) for item in timeseries[-window:]] # if timeseries[-1][0] in anomaly_timestamps: for timestamp in last_window_timestamps: if timestamp in anomaly_timestamps: anomalous = True break else: anomalous = True # Convert anomalies dataframe to anomalies_list anomalies_list = [] # @added 20210316 - Feature #3978: luminosity - classify_metrics # Feature #3642: Anomaly type classification # Convert anomalies dataframe to anomalies_dict anomalies_dict = {} anomalies_dict['metric'] = base_name anomalies_dict['timestamp'] = int(timeseries[-1][0]) anomalies_dict['from_timestamp'] = int(timeseries[0][0]) anomalies_dict['algorithm'] = algorithm_name anomalies_dict['anomalies'] = {} for ts, value in timeseries: if int(ts) in anomaly_timestamps: anomalies_list.append([int(ts), value]) anomalies_dict['anomalies'][int(ts)] = value anomalies = list(anomalies_list) # @added 20210316 - Feature #3978: luminosity - classify_metrics # Feature #3642: Anomaly type classification if save_plots_to: try: from adtk.visualization import plot metric_dir = base_name.replace('.', '/') timestamp_dir = str(int(timeseries[-1][0])) save_path = '%s/%s/%s/%s' % ( save_plots_to, algorithm_name, metric_dir, timestamp_dir) if save_plots_to_absolute_dir: save_path = '%s' % save_plots_to anomalies_dict['file_path'] = save_path save_to_file = '%s/%s.%s.png' % ( save_path, algorithm_name, base_name) if filename_prefix: save_to_file = '%s/%s.%s.%s.png' % ( save_path, filename_prefix, algorithm_name, base_name) save_to_path = os_path_dirname(save_to_file) title = '%s\n%s' % (algorithm_name, base_name) if not os_path_exists(save_to_path): try: mkdir_p(save_to_path) except Exception as e: current_logger.error('error :: %s :: failed to create dir - %s - %s' % ( algorithm_name, save_to_path, e)) if os_path_exists(save_to_path): try: plot(s, anomaly=anomaly_df, anomaly_color='red', title=title, save_to_file=save_to_file) if debug_logging: current_logger.debug('debug :: %s :: plot saved to - %s' % ( algorithm_name, save_to_file)) except Exception as e: current_logger.error('error :: %s :: failed to plot - %s - %s' % ( algorithm_name, base_name, e)) anomalies_file = '%s/%s.%s.anomalies_list.txt' % ( save_path, algorithm_name, base_name) with open(anomalies_file, 'w') as fh: fh.write(str(anomalies_list)) # os.chmod(anomalies_file, mode=0o644) data_file = '%s/data.txt' % (save_path) with open(data_file, 'w') as fh: fh.write(str(anomalies_dict)) except SystemExit as e: if debug_logging: current_logger.debug('debug_logging :: %s :: SystemExit called during save plot, exiting - %s' % ( algorithm_name, e)) if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) except Exception as e: traceback_msg = traceback.format_exc() record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg) if debug_logging: current_logger.error(traceback_msg) current_logger.error('error :: %s :: failed to plot or save anomalies file - %s - %s' % ( algorithm_name, base_name, e)) else: anomalies = [] # @added 20210318 - Feature #3978: luminosity - classify_metrics # Feature #3642: Anomaly type classification if anomalies and run_PersistAD and not realtime_analysis: persist_ad_algorithm_parameters = {} try: persist_ad_algorithm_parameters = algorithm_parameters['persist_ad_algorithm_parameters'] except: pass persist_ad_window = 20 try: persist_ad_window = persist_ad_algorithm_parameters['window'] except: pass persist_ad_c = 9.9 try: persist_ad_c = persist_ad_algorithm_parameters['c'] except: pass try: from adtk.detector import PersistAD persist_ad = PersistAD(c=persist_ad_c, side='both', window=persist_ad_window) persist_ad_anomaly_df = persist_ad.fit_detect(s) persist_ad_anomalies = persist_ad_anomaly_df.loc[persist_ad_anomaly_df['value'] > 0] if len(persist_ad_anomalies) > 0: current_logger.info('%s :: %s anomalies found with PersistAD on %s' % ( algorithm_name, str(len(persist_ad_anomalies)), base_name)) persist_ad_anomaly_timestamps = list(persist_ad_anomalies.index.astype(np.int64) // 10**9) # Convert persist_ad_anomalies dataframe to persist_ad_anomalies_list persist_ad_anomalies_list = [] persist_ad_anomalies_dict = {} persist_ad_anomalies_dict['metric'] = base_name persist_ad_anomalies_dict['timestamp'] = int(timeseries[-1][0]) persist_ad_anomalies_dict['from_timestamp'] = int(timeseries[0][0]) persist_ad_anomalies_dict['algorithm'] = 'adtk_PersistAD' persist_ad_anomalies_dict['anomalies'] = {} for ts, value in timeseries: if int(ts) in persist_ad_anomaly_timestamps: persist_ad_anomalies_list.append([int(ts), value]) persist_ad_anomalies_dict['anomalies'][int(ts)] = value persist_ad_anomalies = list(persist_ad_anomalies_list) if save_plots_to: try: from adtk.visualization import plot metric_dir = base_name.replace('.', '/') timestamp_dir = str(int(timeseries[-1][0])) save_path = '%s/%s/%s/%s' % ( save_plots_to, algorithm_name, metric_dir, timestamp_dir) if save_plots_to_absolute_dir: save_path = '%s' % save_plots_to persist_ad_anomalies_dict['file_path'] = save_path save_to_file = '%s/%s.PersistAD.%s.png' % ( save_path, algorithm_name, base_name) if filename_prefix: save_to_file = '%s/%s.%s.%s.png' % ( save_path, filename_prefix, algorithm_name, base_name) save_to_path = os_path_dirname(save_to_file) title = '%s - PersistAD verification\n%s' % (algorithm_name, base_name) if not os_path_exists(save_to_path): try: mkdir_p(save_to_path) except Exception as e: current_logger.error('error :: %s :: failed to create dir - %s - %s' % ( algorithm_name, save_to_path, e)) if os_path_exists(save_to_path): try: plot(s, anomaly=persist_ad_anomaly_df, anomaly_color='red', title=title, save_to_file=save_to_file) if debug_logging: current_logger.debug('debug :: %s :: plot saved to - %s' % ( algorithm_name, save_to_file)) except Exception as e: current_logger.error('error :: %s :: failed to plot - %s - %s' % ( algorithm_name, base_name, e)) anomalies_file = '%s/%s.%s.PersistAD.anomalies_list.txt' % ( save_path, algorithm_name, base_name) with open(anomalies_file, 'w') as fh: fh.write(str(persist_ad_anomalies)) # os.chmod(anomalies_file, mode=0o644) data_file = '%s/PersistAD.data.txt' % (save_path) with open(data_file, 'w') as fh: fh.write(str(persist_ad_anomalies_dict)) except Exception as e: traceback_msg = traceback.format_exc() record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg) if debug_logging: current_logger.error(traceback_msg) current_logger.error('error :: %s :: failed to plot or save PersistAD anomalies file - %s - %s' % ( algorithm_name, base_name, e)) except Exception as e: traceback_msg = traceback.format_exc() record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg) if debug_logging: current_logger.error(traceback_msg) current_logger.error('error :: %s :: failed to analysis with PersistAD anomalies file - %s - %s' % ( algorithm_name, base_name, e)) try: del df except: pass except SystemExit as e: if debug_logging: current_logger.debug('debug_logging :: %s :: SystemExit called, during analysis, exiting - %s' % ( algorithm_name, e)) if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) except: traceback_msg = traceback.format_exc() record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg) if debug_logging: current_logger.error(traceback_msg) current_logger.error('error :: debug_logging :: %s :: failed to run on ts' % ( algorithm_name)) if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) end_analysis = timer() analysis_runtime = end_analysis - start_analysis if debug_logging: current_logger.debug('debug :: %s :: LevelShiftAD took %.6f seconds' % ( algorithm_name, analysis_runtime)) if anomalous: anomalyScore = 1.0 else: anomalyScore = 0.0 if debug_logging: current_logger.info('%s :: anomalous - %s, anomalyScore - %s' % ( algorithm_name, str(anomalous), str(anomalyScore))) if debug_logging: end = timer() processing_runtime = end - start current_logger.info('%s :: completed analysis in %.6f seconds' % ( algorithm_name, processing_runtime)) try: del timeseries except: pass if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) except SystemExit as e: if debug_logging: current_logger.debug('debug_logging :: %s :: SystemExit called (before StopIteration), exiting - %s' % ( algorithm_name, e)) if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore) except StopIteration: # This except pattern MUST be used in ALL custom algortihms to # facilitate the traceback from any errors. The algorithm we want to # run super fast and without spamming the log with lots of errors. # But we do not want the function returning and not reporting # anything to the log, so the pythonic except is used to "sample" any # algorithm errors to a tmp file and report once per run rather than # spewing tons of errors into the log e.g. analyzer.log if return_anomalies: return (False, None, anomalies) else: return (False, None) except: record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc()) # Return None and None as the algorithm could not determine True or False if return_anomalies: return (False, None, anomalies) else: return (False, None) if return_anomalies: return (anomalous, anomalyScore, anomalies) else: return (anomalous, anomalyScore)
from unittest import TestCase, main as unittest_main from os import pardir as os_pardir, remove as os_remove from os.path import (join as os_path_join, dirname as os_path_dirname, abspath as os_path_abspath, exists as os_path_exists) from sys import path as sys_path from urllib.request import urlopen from shutil import copyfileobj as shutil_copyfileobj sys_path.append( os_path_join(os_path_dirname(os_path_abspath(__file__)), os_pardir)) from vorbis.vorbis_main import PacketsProcessor, CorruptedFileDataError TEST_FILE_1_PATH = os_path_join(os_path_dirname(os_path_abspath(__file__)), 'test_audiofiles', 'test_1.ogg') TEST_FILE_NOT_OGG_PATH = os_path_join( os_path_dirname(os_path_abspath(__file__)), 'test_audiofiles', 'test_wrong_ogg_file.ogg') TEST_FILE_NOT_VORBIS_PATH = os_path_join( os_path_dirname(os_path_abspath(__file__)), 'test_audiofiles', 'test_wrong_vorbis_file.ogg') TEST_FILE_NOT_OGG_URL: str = ( r'https://raw.githubusercontent.com/susimus/ogg_vorbis/master/tests' r'/test_audiofiles/test_wrong_ogg_file.ogg') TEST_FILE_NOT_VORBIS_URL: str = ( r'https://raw.githubusercontent.com/susimus/ogg_vorbis/master/tests' r'/test_audiofiles/test_wrong_vorbis_file.ogg')
def r3_dnn_apply_keras(target_dirname, old_stft_obj=None, cuda=False, saving_to_disk=True): LOGGER.info( '{}: r3: Denoising original stft with neural network model...'.format( target_dirname)) ''' r3_dnn_apply takes an old_stft object (or side effect load from disk) and saves a new_stft object ''' scan_battery_dirname = os_path_dirname(target_dirname) model_dirname = os_path_dirname(os_path_dirname(scan_battery_dirname)) # load stft data if old_stft_obj is None: old_stft_fpath = os_path_join(target_dirname, 'old_stft.mat') with h5py_File(old_stft_fpath, 'r') as f: stft = np_concatenate( [f['old_stft_real'][:], f['old_stft_imag'][:]], axis=1) else: stft = np_concatenate( [old_stft_obj['old_stft_real'], old_stft_obj['old_stft_imag']], axis=1) N_beams, N_elements_2, N_segments, N_fft = stft.shape N_elements = N_elements_2 // 2 # combine stft_real and stft_imag # move element position axis stft = np_moveaxis(stft, 1, 2) # TODO: Duplicate? # reshape the to flatten first two axes stft = np_reshape( stft, [N_beams * N_segments, N_elements_2, N_fft]) # TODO: Duplicate? # process stft with networks k_mask = list(range(3, 6)) for frequency in k_mask: process_each_frequency_keras(model_dirname, stft, frequency) # reshape the stft data stft = np_reshape( stft, [N_beams, N_segments, N_elements_2, N_fft]) # TODO: Duplicate? # set zero outside analysis frequency range discard_mask = np_ones_like(stft, dtype=bool) discard_mask[:, :, :, k_mask] = False # pylint: disable=E1137 stft[discard_mask] = 0 del discard_mask # mirror data to negative frequencies using conjugate symmetry end_index = N_fft // 2 stft[:, :, :, end_index + 1:] = np_flip(stft[:, :, :, 1:end_index], axis=3) stft[:, :, N_elements:2 * N_elements, end_index + 1:] = -1 * stft[:, :, N_elements:2 * N_elements, end_index + 1:] # move element position axis stft = np_moveaxis(stft, 1, 2) # TODO: Duplicate? # change variable names # new_stft_real = stft[:, :N_elements, :, :] new_stft_real = stft[:, :N_elements, :, :].transpose() # new_stft_imag = stft[:, N_elements:, :, :] new_stft_imag = stft[:, N_elements:, :, :].transpose() del stft # change dimensions # new_stft_real = new_stft_real.transpose() # new_stft_imag = new_stft_imag.transpose() # save new stft data new_stft_obj = { 'new_stft_real': new_stft_real, 'new_stft_imag': new_stft_imag } if saving_to_disk is True: new_stft_fname = os_path_join(target_dirname, 'new_stft.mat') savemat(new_stft_fname, new_stft_obj) LOGGER.info('{}: r3 Done.'.format(target_dirname)) return new_stft_obj
def _file_open_mkdirp(path: str): os_makedirs(os_path_dirname(path), exist_ok=True) return open(path, "wb")
## Copyright 2019 Dynatrace LLC ## ## Licensed under the Apache License, Version 2.0 (the "License"); ## you may not use this file except in compliance with the License. ## You may obtain a copy of the License at ## ## http://www.apache.org/licenses/LICENSE-2.0 ## ## Unless required by applicable law or agreed to in writing, software ## distributed under the License is distributed on an "AS IS" BASIS, ## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ## See the License for the specific language governing permissions and ## limitations under the License. ## --------------------------------------------------------------------------- """Filenames used by compose module tests """ from os.path import dirname as os_path_dirname from pathlib import Path as pathlib_Path CONFIG_SAMPLE_DIR = pathlib_Path(os_path_dirname(__file__)) / "config-samples" COMPOSE_SMALL = CONFIG_SAMPLE_DIR / "compose-small.yaml" COMPOSE_BIG = CONFIG_SAMPLE_DIR / "compose-big.yaml" COMPOSE_CYCLE = CONFIG_SAMPLE_DIR / "compose-cycle.yaml" COMPOSE_TRICKY = CONFIG_SAMPLE_DIR / "compose-tricky.yaml" TEA_TASKS_DIR = pathlib_Path(os_path_dirname(__file__)) / "tea-tasks" BOIL_WATER = TEA_TASKS_DIR / "boil_water.py" POUR_WATER = TEA_TASKS_DIR / "pour_water.py" PREP_INFUSER = TEA_TASKS_DIR / "prep_infuser.py" STEEP_TEA = TEA_TASKS_DIR / "steep_tea.py"
def main(argv: list) -> int: """Passphrase CLI interface.""" passphrase = Passphrase() # Set defaults passphrase.entropy_bits_req = ENTROPY_BITS_MIN passwordlen_default = passphrase.password_length_needed() amount_n_default = 0 passphrase.amount_n = amount_n_default # To avoid loading the wordlist unnecessarily, I'm hardcoding this value # It's ok, it's only used to show help information amount_w_default = 6 parser = ArgumentParser( formatter_class=RawDescriptionHelpFormatter, description='{version_string}\n\n' 'Generates a cryptographically secure passphrase, based on ' 'a wordlist, or a\npassword, and prints it to standard output.\n' 'By default, it uses an embedded EFF Large Wordlist for passphrases.\n' 'Passphrases with less than {wordsamountmin} words are considered ' 'insecure. A safe bet is \nbetween {wordsamountmin} and 7 words, ' 'plus at least a number.\n' 'For passwords, use at least {passwdmin} characters, but prefer ' '{passwdpref} or more, using the\ncomplete characters set.\n\n' 'Instead of words and numbers, a password (random string of ' 'printable\ncharacters from Python String standard) can be generated ' 'by\n-p | --password, specifying the length. It uses uppercase, ' 'lowercase, digits\nand punctuation characters unless otherwise ' 'specified.\n' 'Also, a UUID v4 string can be generated by --uuid4 or a coin can be ' 'thrown\nwith --coin.\n' 'A custom wordlist can be specified by -i | --input, the format must ' 'be: \nsingle column, one word per line. If -d | --diceware is used, ' 'the input\nfile is treated as a diceware wordlist (two columns).' '\nOptionally, -o | --output can be used to specify an output file ' '(existing \nfile is overwritten).\n' 'The number of words is {wordsamountmin} by default, but it ' 'can be changed by -w | --words.\n' 'The number of numbers is {numsamountmin} by default, but it can be ' 'changed by\n-n | --numbers. The generated numbers are between ' '{minnum} and {maxnum}.\n' 'The default separator is a blank space, but any character or ' 'character\nsequence can be specified by -s | --separator.\n' '\nExample output:\n' '\tDefault parameters:\tchalice sheath postcard modular cider size\n' '\tWords=3, Numbers=2:\tdepraved widow office 184022 320264\n' '\tPassword, 20 chars:\tsF#s@B+iR#ZIL-yUWKPR'.format( version_string=__version_string__, minnum=passphrase.randnum_min, maxnum=passphrase.randnum_max, wordsamountmin=amount_w_default, numsamountmin=amount_n_default, passwdmin=passwordlen_default, passwdpref=passwordlen_default + 4)) parser.add_argument( '--version', action='store_true', help='print program version and licensing information and exit') parser.add_argument( '--insecure', action='store_true', default=False, help="force password/passphrase generation even if the system's " "entropy is too low") parser.add_argument( '--no-newline', action='store_true', default=False, help="don't print newline at the end of the passphrase/password") parser.add_argument( '-m', '--mute', action='store_true', default=False, help="muted mode: it won't print output, only informational, warning " "or error messages (usefull with -o | --output)") parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='print additional information (can coexist with -m | --mute)') parser.add_argument( '-e', '--entropybits', type=_bigger_than_zero, default=ENTROPY_BITS_MIN, help='specify the number of bits to use for entropy calculations ' '(defaults to {})'.format(ENTROPY_BITS_MIN)) parser.add_argument('--uuid4', action='store_true', default=False, help='generate an UUID v4 string') parser.add_argument('--coin', action='store_true', default=False, help='generate a random coin throw: heads or tails') parser.add_argument( '-p', '--password', type=_bigger_than_zero, const=-1, nargs='?', help='generate a password of the specified length from all printable ' 'or selected characters') parser.add_argument( '--use-uppercase', type=_bigger_than_zero, const=0, nargs='?', help='use uppercase characters for password generation or give the ' 'amount of uppercase characters in the passphrase: zero or no ' 'input for all uppercase or any number of uppercase ' 'characters wanted (the rest are lowercase)') parser.add_argument( '--use-lowercase', type=_bigger_than_zero, const=0, nargs='?', help='use lowercase characters for password generation or give the ' 'amount of lowercase characters in the passphrase: zero or no ' 'input for all lowercase (default) or any number of lowercase ' 'characters wanted (the rest are uppercase)') parser.add_argument('--use-digits', action='store_true', default=False, help='use digits for password generation') parser.add_argument( '--use-alphanumeric', action='store_true', default=False, help='use lowercase and uppercase characters, and digits for password ' 'generation (equivalent to --use-lowercase --use-uppercase ' '--use-digits)') parser.add_argument( '--use-punctuation', action='store_true', default=False, help='use punctuation characters for password generation') parser.add_argument('-w', '--words', type=_bigger_than_zero, help='specify the amount of words (0 or more)') parser.add_argument('-n', '--numbers', type=_bigger_than_zero, default=amount_n_default, help='specify the amount of numbers (0 or more)') parser.add_argument( '-s', '--separator', type=str, default=' ', help='specify a separator character (space by default)') parser.add_argument( '-o', '--output', type=str, help='specify an output file (existing file is overwritten)') parser.add_argument( '-i', '--input', type=str, help='specify an input file (it must have the following format: ' 'single column, one word per line)') parser.add_argument( '-d', '--diceware', action='store_true', default=False, help='specify input file as a diceware list (format: two colums)') args = parser.parse_args(argv) inputfile = args.input outputfile = args.output separator = args.separator is_diceware = args.diceware passwordlen = args.password amount_w = args.words amount_n = args.numbers show_version = args.version mute = args.mute verbose = args.verbose no_newline = args.no_newline gen_uuid4 = args.uuid4 gen_coin = args.coin p_uppercase = args.use_uppercase p_lowercase = args.use_lowercase p_digits = args.use_digits p_punctuation = args.use_punctuation p_alphanumeric = args.use_alphanumeric entropy_bits = args.entropybits gen_insecure = args.insecure if show_version: print(__version_string__) return 0 if verbose: Aux.print_stderr(__version_string__) # Check system entropy system_entropy = Aux.system_entropy() if system_entropy < SYSTEM_ENTROPY_BITS_MIN: Aux.print_stderr( 'Warning: the system has too few entropy: {} bits; randomness ' 'quality could be poor'.format(system_entropy)) if not gen_insecure: Aux.print_stderr('Error: system entropy too low: {system_entropy} ' '< {system_entropy_min}'.format( system_entropy=system_entropy, system_entropy_min=SYSTEM_ENTROPY_BITS_MIN)) return 1 if verbose: Aux.print_stderr( 'Using {} bits of entropy for calculations (if any). The minimum ' 'recommended is {}'.format(entropy_bits, ENTROPY_BITS_MIN)) # Check selected entropy check_chosen_entropy = False if gen_uuid4 or gen_coin else not ( amount_n and amount_w and passwordlen is None) if check_chosen_entropy and entropy_bits < ENTROPY_BITS_MIN: Aux.print_stderr( 'Warning: insecure number of bits for entropy calculations ' 'chosen! Should be bigger than {}'.format(ENTROPY_BITS_MIN)) passphrase.entropy_bits_req = entropy_bits # Generate whatever is requested if gen_uuid4: # Generate uuid4 if verbose: Aux.print_stderr('Generating UUID v4') gen_what = 'UUID v4' gen_ent = 120 passphrase.generate_uuid4() passphrase.separator = '-' elif gen_coin: # Generate a coin throw if verbose: Aux.print_stderr('Throwing a coin') gen_what = 'coin' gen_ent = 1 passphrase = 'Heads' if randbool() else 'Tails' elif passwordlen is not None: # Generate a password gen_what = 'password' p_uppercase = True if p_uppercase is not None else False p_lowercase = True if p_lowercase is not None else False if (p_uppercase or p_lowercase or p_digits or p_punctuation or p_alphanumeric): passphrase.password_use_uppercase = (p_uppercase or p_alphanumeric) passphrase.password_use_lowercase = (p_lowercase or p_alphanumeric) passphrase.password_use_digits = (p_digits or p_alphanumeric) passphrase.password_use_punctuation = p_punctuation min_len = passphrase.password_length_needed() if passwordlen < 1: passwordlen = min_len elif passwordlen < min_len: Aux.print_stderr( 'Warning: insecure password length chosen! Should be bigger ' 'than or equal to {}'.format(min_len)) passphrase.passwordlen = passwordlen gen_ent = passphrase.generated_password_entropy() if verbose: verbose_string = ('Generating password of {} characters long ' 'using '.format(passwordlen)) verbose_string += ('uppercase characters, ' if (passphrase.password_use_uppercase or p_alphanumeric) else '') verbose_string += ('lowercase characters, ' if (passphrase.password_use_lowercase or p_alphanumeric) else '') verbose_string += ('digits, ' if (passphrase.password_use_digits or p_alphanumeric) else '') verbose_string += ('punctuation characters, ' if (passphrase.password_use_punctuation) else '') Aux.print_stderr(verbose_string[:-2] if ( verbose_string[-2:] == ', ') else verbose_string) passphrase.generate_password() passphrase.separator = '' else: # Generate a passphrase gen_what = 'passphrase' # Read wordlist if indicated if inputfile is None: passphrase.load_internal_wordlist() else: try: passphrase.import_words_from_file(inputfile, is_diceware) except IOError: Aux.print_stderr( "Error: input file {} is empty or it can't be opened or " "read".format(inputfile)) return 1 passphrase.amount_n = amount_n amount_w_good = passphrase.words_amount_needed() if amount_w is None: amount_w = amount_w_good elif amount_w < amount_w_good: Aux.print_stderr( 'Warning: insecure amount of words chosen! Should be ' 'bigger than or equal to {}'.format(amount_w_good)) passphrase.amount_w = amount_w gen_ent = passphrase.generated_passphrase_entropy() if verbose: Aux.print_stderr( 'Generating a passphrase of {} words and {} ' 'numbers using {}'.format( amount_w, amount_n, 'internal wordlist' if inputfile is None else ('external wordlist: ' + inputfile + (' (diceware-like)' if is_diceware else '')))) case = (-1 * p_lowercase) if p_lowercase else p_uppercase passphrase.generate(case) passphrase.separator = separator if verbose: Aux.print_stderr('The entropy of this {what} is {ent:.2f} bits'.format( what=gen_what, ent=gen_ent)) if not gen_coin and gen_ent < ENTROPY_BITS_MIN: Aux.print_stderr('Warning: the {} is too short!'.format(gen_what)) if not mute: if no_newline: print(passphrase, end='') else: print(passphrase) if outputfile is not None: # ensure path to file exists or create dir_ = os_path_dirname(outputfile) if dir_: try: os_makedirs(dir_, exist_ok=True) except PermissionError: Aux.print_stderr( 'Error: permission denied to create directory {}'.format( dir_, )) return 1 try: with open(outputfile, mode='wt', encoding='utf-8') as outfile: linefeed = '' if no_newline else '\n' outfile.write(str(passphrase) + linefeed) except IOError: Aux.print_stderr( "Error: file {} can't be opened or written".format( outputfile, )) return 1 return 0
from unittest import TestCase, main as unittest_main from os import pardir as os_pardir, remove as os_remove from os.path import (join as os_path_join, dirname as os_path_dirname, abspath as os_path_abspath, exists as os_path_exists) from typing import List from sys import path as sys_path from urllib.request import urlopen from shutil import copyfileobj as shutil_copyfileobj sys_path.append( os_path_join(os_path_dirname(os_path_abspath(__file__)), os_pardir)) from vorbis.decoders import (DataReader, SetupHeaderDecoder, EndOfPacketException) from vorbis.helper_funcs import float32_unpack TEST_FILE_1_PATH = os_path_join(os_path_dirname(os_path_abspath(__file__)), 'test_audiofiles', 'test_1.ogg') TEST_FILE_1_URL: str = ( r'https://raw.githubusercontent.com/susimus/ogg_vorbis/master/' r'tests/test_audiofiles/test_1.ogg') test_file_1_was_downloaded: bool = False # noinspection PyPep8Naming def setUpModule(): global TEST_FILE_1_PATH if not os_path_exists(TEST_FILE_1_PATH):
# -*- coding: utf-8 -*- import os from datetime import timedelta from os.path import abspath as os_path_abspath from os.path import dirname as os_path_dirname from os.path import join as os_path_join current_dir = os_path_abspath(os_path_dirname(__file__)) # BASIC DEBUG = True SQLALCHEMY_DATABASE_URI = 'sqlite:///' + os_path_join(current_dir, '../', 'data.sqlite') USE_TOKEN_AUTH = True # EMAIL MAIL_SERVER = 'smtp.googlemail.com' MAIL_PORT = 465 MAIL_USE_TLS = False MAIL_USE_SSL = True MAIL_USERNAME = os.getenv('GMAIL_USERNAME') MAIL_PASSWORD = os.getenv('GMAIL_PASSWORD') # SECURITY SECRET_KEY = os.getenv('SECRET_KEY') or 'secret_secret_secret' SECURITY_REGISTERABLE = True SECURITY_REGISTER_URL = '/auth/register' SECURITY_PASSWORD_HASH = os.getenv('SECURITY_PASSWORD_HASH') or 'sha512_crypt' SECURITY_PASSWORD_SALT = os.getenv( 'SECURITY_PASSWORD_SALT') or 'salt_salt_salt' JWT_EXPIRATION_DELTA = timedelta(days=10)
from logging import basicConfig as logging_basicConfig, \ DEBUG as logging_DEBUG, \ INFO as logging_INFO, \ getLogger as logging_getLogger from lib.process_single_scan_battery_keras import process_single_scan_battery_keras from lib.utils import copy_anything # SCAN_BATTERIES_TARGETS_GLOB_STRING = 'data/BEAM_Reverb_20181004_L74_70mm/target_*_SCR_*_0dB' # SCAN_BATTERIES_DIRNAME = 'data/BEAM_Reverb_20181004_L74_70mm_selected' SCAN_BATTERIES_DIRNAME = 'scan_batteries' MODEL_SAVE_FNAME = 'model.joblib' MODELS_DIRNAME = 'DNNs' SCRIPT_FNAME = os_path_basename(__file__) PROJECT_DIRNAME = os_path_dirname(__file__) LIB_DIRNAME = os_path_join(PROJECT_DIRNAME, 'lib') def evaluate_one_model_keras(model_dirpath): # rename _trained as _evaluating new_folder_name = model_dirpath.replace('_trained', '_evaluating') shutil_move(model_dirpath, new_folder_name) model_name = os_path_basename(new_folder_name) copied_scan_battery_dirname = os_path_join( new_folder_name, os_path_basename(SCAN_BATTERIES_DIRNAME)) copy_anything(SCAN_BATTERIES_DIRNAME, copied_scan_battery_dirname) time_start = time_time() # with Pool() as pool:
def decompose_spec(article_dec): decompose_listed_subtrees_and_mark_media_descendants(article_dec, DECOMP, MEDIA_LIST) for c in article_dec.children: if isinstance(c, Tag) and c.name == 'h2': c.decompose() publi = article_dec.find('div', class_='head') if publi is not None and publi.find('h3') is not None: publi.decompose() return article_dec LINK_FILTER_SUBSTRINGS_SPEC = re.compile('|'.join(['LINK_FILTER_DUMMY_STRING'])) BLACKLIST_SPEC = [url.strip() for url in open(os_path_join(os_path_dirname(os_path_abspath(__file__)), 'valasz_BLACKLIST.txt')).readlines()] MULTIPAGE_URL_END = re.compile(r'.*?page=.') def next_page_of_article_spec(curr_html): bs = BeautifulSoup(curr_html, 'lxml') if bs.find('article', class_='percro-percre-lista') is not None: next_tag = bs.find('a', rel='next') if next_tag is not None and 'href' in next_tag.attrs.keys(): next_link = next_tag.attrs['href'] link = f'http://valasz.hu{next_link}' return link return None
def process_each_frequency(model_dirname, stft, frequency, using_cuda=True): ''' Setter method on stft. ''' is_using_cuda = using_cuda and torch_cuda_is_cuda_available() my_device = torch_device('cuda:0' if is_using_cuda else 'cpu') # 1. Instantiate Neural Network Model model_params_fname = os_path_join( os_path_join(model_dirname, 'k_' + str(frequency)), MODEL_PARAMS_FNAME) model_save_fpath = os_path_join(model_dirname, 'k_' + str(frequency), MODEL_SAVE_FNAME) model = get_which_model_from_params_fname(model_params_fname) model.load_state_dict(torch_load(os_path_join( os_path_dirname(model_save_fpath), 'model.dat'), map_location=my_device), strict=True) model.eval() model = model.to(my_device) if False: model.printing = True from lib.print_layer import PrintLayer new_model_net = [] for layer in model.net: new_model_net.append(layer) new_model_net.append(PrintLayer(layer)) from torch.nn import Sequential model.net = Sequential(*new_model_net) # 2. Get X_test LOGGER.debug('r3.process_each_frequency: stft.shape = {}'.format( stft.shape)) aperture_data = stft[:, :, frequency] # or stft_frequency # 2.1. normalize by L1 norm aperture_data_norm = np_linalg_norm(aperture_data, ord=np_inf, axis=1) aperture_data /= aperture_data_norm[:, np_newaxis] # load into torch and onto gpu aperture_dataset_eval = ApertureDatasetEval(aperture_data) aperture_dataset_loader = DataLoader(aperture_dataset_eval, batch_size=EVAL_BATCH_SIZE, shuffle=False, num_workers=DATALOADER_NUM_WORKERS, pin_memory=using_cuda) # 3. Predict if is_using_cuda is True: torch_cuda_empty_cache() aperture_data_new = predict(model, aperture_dataset_loader, my_device) del aperture_data, model, aperture_dataset_eval, aperture_dataset_loader, my_device if is_using_cuda is True: torch_cuda_empty_cache() # 4. Postprocess on y_hat # rescale the data and store new data in stft stft[:, :, frequency] = aperture_data_new * aperture_data_norm[:, np_newaxis] del aperture_data_new, aperture_data_norm