예제 #1
0
파일: bot.py 프로젝트: pageflt/tatianna
def expand_bot_path(filename):
    '''
    '''
    # try "core/"
    first_try = os_path_join(os_path_dirname(__file__), filename)
    if os_path_isfile(first_try):
        return first_try

    # try "core/.."
    second_try = os_path_join(os_path_dirname(__file__), '..', filename)
    if os_path_isfile(second_try):
        return second_try

    raise IOError('File "{0}" not found under "{1}" or "{2}"'.format(
        filename, first_try, second_try))
예제 #2
0
파일: bot.py 프로젝트: apalos/tatianna
def expand_bot_path(filename):
    '''
    '''
    # try "core/"
    first_try = os_path_join(os_path_dirname(__file__), filename)
    if os_path_isfile(first_try):
        return first_try

    # try "core/.."
    second_try = os_path_join(os_path_dirname(__file__), '..', filename)
    if os_path_isfile(second_try):
        return second_try

    raise IOError('File "{0}" not found under "{1}" or "{2}"'.format(
                  filename, first_try, second_try))
예제 #3
0
def enable_units(path=None):
    """Enables units support in a particular instance of GPkit.

    Posynomials created after calling this are incompatible with those created
    before.

    If gpkit is imported multiple times, this needs to be run each time."""
    # pylint: disable=invalid-name,global-statement
    global DimensionalityError, UNIT_REGISTRY, ureg, units
    try:
        import pint
        if path:
            # let user load their own unit definitions
            UNIT_REGISTRY = pint.UnitRegistry(path)
        if UNIT_REGISTRY is None:
            UNIT_REGISTRY = pint.UnitRegistry()  # use pint default
            path = os_sep.join([os_path_dirname(__file__), "pint"])
            UNIT_REGISTRY.load_definitions(os_sep.join([path, "usd_cpi.txt"]))
            # next line patches https://github.com/hgrecco/pint/issues/366
            UNIT_REGISTRY.define("nautical_mile = 1852 m = nmi")

        ureg = UNIT_REGISTRY
        DimensionalityError = pint.DimensionalityError
        units = GPkitUnits()
    except ImportError:
        print("Optional Python units library (Pint) not installed;"
              " unit support disabled.")
        disable_units()
예제 #4
0
def enable_units(path=None):
    """Enables units support in a particular instance of GPkit.

    Posynomials created after calling this are incompatible with those created
    before.

    If gpkit is imported multiple times, this needs to be run each time."""
    # pylint: disable=invalid-name,global-statement
    global DimensionalityError, UNIT_REGISTRY, ureg, units
    try:
        import pint
        if path:
            # let user load their own unit definitions
            UNIT_REGISTRY = pint.UnitRegistry(path)
        if UNIT_REGISTRY is None:
            UNIT_REGISTRY = pint.UnitRegistry()  # use pint default
            path = os_sep.join([os_path_dirname(__file__), "pint"])
            UNIT_REGISTRY.load_definitions(os_sep.join([path, "usd_cpi.txt"]))
            # next line patches https://github.com/hgrecco/pint/issues/366
            UNIT_REGISTRY.define("nautical_mile = 1852 m = nmi")

        ureg = UNIT_REGISTRY
        DimensionalityError = pint.DimensionalityError
        units = GPkitUnits()
    except ImportError:
        print("Optional Python units library (Pint) not installed;"
              " unit support disabled.")
        disable_units()
예제 #5
0
def _loosedb_raw_object_write(loosedb, presumedhex: shahex, objloose: bytes):
    # assert not loosedb.has_object(_hex2bin(presumedhex))
    objpath = loosedb.db_path(loosedb.object_path(presumedhex))
    # assert not os_path_exists(objpath)
    os_makedirs(os_path_dirname(objpath), exist_ok=True)
    with _file_open_mkdirp(objpath) as f:
        f.write(objloose)
    # FIXME:
    #loosedb.update_cache(force=True)
    assert loosedb.has_object(_hex2bin(presumedhex))
예제 #6
0
 def goBlue(self):
     filename = self.SOURCELIST.getFilename()
     if not filename:
         return
     sourceDir = self.SOURCELIST.getCurrentDirectory()
     if os_path_isdir(filename):
         text = _("Rename directory")
         filename = os_path_basename(os_path_dirname(filename))
     else:
         text = _("Rename file")
     self.session.openWithCallback(self.doRename, VirtualKeyBoard, title = text, text = filename)
 def monitorsPath(self, path):
     if path == self.path:
         return True
     if self.watch_type == WATCH_FILE:
         return False
     elif self.watch_type == WATCH_DIR:
         if os_path_dirname(path) == self.path:
             return True
     elif len(path) > len(self.path): # and self.watch_type == WATCH_DIR_RECURSIVE
         if path[:len(self.path)] == self.path:
             return True
     return False
def move_files_to_folder(*args, **kwargs):
    # Maximum backup allowed by user
    BACKUP_COUNT = bpy.context.user_preferences.filepaths.save_version
    # If saving backups option is 'ON'
    if BACKUP_COUNT:
        # Function level constants
        PATH = bpy.data.filepath  # Full path
        FILE = bpy.path.display_name_from_filepath(PATH)  # File name
        CWD = os_path_dirname(PATH)  # Current Working Directory
        CBD = os_path_join(CWD, BACKUP_FOLDER_NAME)  # Current Backup Directory
        REXT = r"{}\.blend(\d+)$".format(FILE)  # Regex to catch backups
        EXT = "{}.blend{}"  # Extension placeholder
        OLD = EXT.format(FILE, BACKUP_COUNT)  # Oldest backup name

        # Create backup directory if not exists
        try:
            os_makedirs(CBD)
        except OSError as e:
            if e.errno != EEXIST:
                # If other error appears then "dir already exists" reraise
                # the caught error again and print out the traceback
                raise OSError("\n".join(traceback_extract_stack())) from None

        # Get all files in current directory, move them to the
        # backup folder, if they are backup files and maintain
        # the backup folder's instances
        for filename in reversed(sorted(os_listdir(CWD))):
            # If file is a backup file
            try:
                index = int(re_findall(REXT, filename)[-1])
                # If file's index is greater than the
                # current number of backups allowed the full path
                # of the file will be returned and will be deleted
                # else os.remove will raise FileNotFoundError
                os_remove(
                    increase_index_and_move(
                        src_folder=CWD,
                        dst_folder=CBD,
                        file=FILE,
                        extension=EXT,
                        src_index=index,
                        dst_index=index,
                        max_index=BACKUP_COUNT,
                    )
                )
            # If file is not a backup file
            except (IndexError, FileNotFoundError):
                pass

        # If everything went fine, print out information
        if PRINT_INFO:
            print(INFO_TEXT.format(CWD, CBD))
예제 #9
0
def save_obj(obj, name):
    """
    This function save an object as a pickle.
    :param obj: object to save
    :param name: name of the pickle file.
    :return: -
    """

    # if any directory on path doesn't exist - create it
    os_makedirs(os_path_dirname(name), exist_ok=True)

    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
예제 #10
0
 def monitorsPath(self, path):
     if path == self.path:
         return True
     if self.watch_type == WATCH_FILE:
         return False
     elif self.watch_type == WATCH_DIR:
         if os_path_dirname(path) == self.path:
             return True
     elif len(path) > len(
             self.path):  # and self.watch_type == WATCH_DIR_RECURSIVE
         if path[:len(self.path)] == self.path:
             return True
     return False
예제 #11
0
def tmp_file_path(path: str, suffix: str) -> str:
    """ Get path to a temporary file

    Parameters
    ----------
    path
    suffix

    Returns
    -------
        Path to some otherwise unused file.
    """
    in_dir = os_path_dirname(path)
    fname = f"{os_path_basename(path)}."

    tf = tempfile.NamedTemporaryFile(dir=in_dir,
                                     prefix=fname,
                                     suffix=suffix,
                                     delete=False)
    fname = tf.name
    tf.close()
    return fname
예제 #12
0
          (('div',), {'class': 'nm_supported__wrapper'}),
          ]
# <div class=miniapp socialbox id=@STYLE>
MEDIA_LIST = []


def decompose_spec(article_dec):
    decompose_listed_subtrees_and_mark_media_descendants(article_dec, DECOMP, MEDIA_LIST)
    # 'mindeközben' titles
    if article_dec.find('div', class_='mindenkozben_post_content content'):
        article_dec.find('h3', class_='title').decompose()
    return article_dec


BLACKLIST_SPEC = [url.strip() for url in
                  open(os_path_join(os_path_dirname(os_path_abspath(__file__)), 'index_koronavirus_BLACKLIST.txt')).readlines()] + \
                 [url.strip() for url in
                  open(os_path_join(os_path_dirname(os_path_abspath(__file__)), 'index_BLACKLIST.txt')).readlines()]

LINK_FILTER_SUBSTRINGS_SPEC = re.compile('|'.join(['LINK_FILTER_DUMMY_STRING']))

MULTIPAGE_URL_END = re.compile(r'^\b$')  # (r'.*/\?p=.*')
# https://index.hu/belfold/2020/02/29/eloben_kozvetitjuk_az_eddigi_legnagyobb_magyar_lottonyeremeny_kihuzasa/?p=1


def next_page_of_article_spec(curr_html):
    """    bs = BeautifulSoup(curr_html, 'lxml')
        pages = bs.find('div', class_='pagination clearfix')
        if pages is not None:
            for p in pages.find_all('a', class_='next'):
                if 'rel' not in p.attrs.keys():
예제 #13
0
def m66(current_skyline_app, parent_pid, timeseries, algorithm_parameters):
    """
    A time series data points are anomalous if the 6th median is 6 standard
    deviations (six-sigma) from the time series 6th median standard deviation
    and persists for x_windows, where `x_windows = int(window / 2)`.
    This algorithm finds SIGNIFICANT cahngepoints in a time series, similar to
    PELT and Bayesian Online Changepoint Detection, however it is more robust to
    instaneous outliers and more conditionally selective of changepoints.

    :param current_skyline_app: the Skyline app executing the algorithm.  This
        will be passed to the algorithm by Skyline.  This is **required** for
        error handling and logging.  You do not have to worry about handling the
        argument in the scope of the custom algorithm itself,  but the algorithm
        must accept it as the first agrument.
    :param parent_pid: the parent pid which is executing the algorithm, this is
        **required** for error handling and logging.  You do not have to worry
        about handling this argument in the scope of algorithm, but the
        algorithm must accept it as the second argument.
    :param timeseries: the time series as a list e.g. ``[[1578916800.0, 29.0],
        [1578920400.0, 55.0], ... [1580353200.0, 55.0]]``
    :param algorithm_parameters: a dictionary of any required parameters for the
        custom_algorithm and algorithm itself for example:
        ``algorithm_parameters={
            'nth_median': 6,
            'sigma': 6,
            'window': 5,
            'return_anomalies' = True,
        }``
    :type current_skyline_app: str
    :type parent_pid: int
    :type timeseries: list
    :type algorithm_parameters: dict
    :return: True, False or Non
    :rtype: boolean

    Example CUSTOM_ALGORITHMS configuration:

    'm66': {
        'namespaces': [
            'skyline.analyzer.run_time', 'skyline.analyzer.total_metrics',
            'skyline.analyzer.exceptions'
        ],
        'algorithm_source': '/opt/skyline/github/skyline/skyline/custom_algorithms/m66.py',
        'algorithm_parameters': {
            'nth_median': 6, 'sigma': 6, 'window': 5, 'resolution': 60,
            'minimum_sparsity': 0, 'determine_duration': False,
            'return_anomalies': True, 'save_plots_to': False,
            'save_plots_to_absolute_dir': False, 'filename_prefix': False
        },
        'max_execution_time': 1.0
        'consensus': 1,
        'algorithms_allowed_in_consensus': ['m66'],
        'run_3sigma_algorithms': False,
        'run_before_3sigma': False,
        'run_only_if_consensus': False,
        'use_with': ['crucible', 'luminosity'],
        'debug_logging': False,
    },

    """

    # You MUST define the algorithm_name
    algorithm_name = 'm66'

    # Define the default state of None and None, anomalous does not default to
    # False as that is not correct, False is only correct if the algorithm
    # determines the data point is not anomalous.  The same is true for the
    # anomalyScore.
    anomalous = None
    anomalyScore = None

    return_anomalies = False
    anomalies = []
    anomalies_dict = {}
    anomalies_dict['algorithm'] = algorithm_name

    realtime_analysis = False

    current_logger = None
    dev_null = None

    # If you wanted to log, you can but this should only be done during
    # testing and development
    def get_log(current_skyline_app):
        current_skyline_app_logger = current_skyline_app + 'Log'
        current_logger = logging.getLogger(current_skyline_app_logger)
        return current_logger

    start = timer()

    # Use the algorithm_parameters to determine the sample_period
    debug_logging = None
    try:
        debug_logging = algorithm_parameters['debug_logging']
    except:
        debug_logging = False
    if debug_logging:
        try:
            current_logger = get_log(current_skyline_app)
            current_logger.debug(
                'debug :: %s :: debug_logging enabled with algorithm_parameters - %s'
                % (algorithm_name, str(algorithm_parameters)))
        except Exception as e:
            # This except pattern MUST be used in ALL custom algortihms to
            # facilitate the traceback from any errors.  The algorithm we want to
            # run super fast and without spamming the log with lots of errors.
            # But we do not want the function returning and not reporting
            # anything to the log, so the pythonic except is used to "sample" any
            # algorithm errors to a tmp file and report once per run rather than
            # spewing tons of errors into the log e.g. analyzer.log
            dev_null = e
            record_algorithm_error(current_skyline_app, parent_pid,
                                   algorithm_name, traceback.format_exc())
            # Return None and None as the algorithm could not determine True or False
            del dev_null
            if current_skyline_app == 'webapp':
                return (anomalous, anomalyScore, anomalies, anomalies_dict)
            if return_anomalies:
                return (anomalous, anomalyScore, anomalies)
            return (anomalous, anomalyScore)

    # Allow the m66 parameters to be passed in the algorithm_parameters
    window = 6
    try:
        window = algorithm_parameters['window']
    except KeyError:
        window = 6
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e

    nth_median = 6
    try:
        nth_median = algorithm_parameters['nth_median']
    except KeyError:
        nth_median = 6
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e

    n_sigma = 6
    try:
        n_sigma = algorithm_parameters['sigma']
    except KeyError:
        n_sigma = 6
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e

    resolution = 0
    try:
        resolution = algorithm_parameters['resolution']
    except KeyError:
        resolution = 0
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e

    determine_duration = False
    try:
        determine_duration = algorithm_parameters['determine_duration']
    except KeyError:
        determine_duration = False
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e

    minimum_sparsity = 0
    try:
        minimum_sparsity = algorithm_parameters['minimum_sparsity']
    except KeyError:
        minimum_sparsity = 0
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e

    shift_to_start_of_window = True
    try:
        shift_to_start_of_window = algorithm_parameters[
            'shift_to_start_of_window']
    except KeyError:
        shift_to_start_of_window = True
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e

    save_plots_to = False
    try:
        save_plots_to = algorithm_parameters['save_plots_to']
    except KeyError:
        save_plots_to = False
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e

    save_plots_to_absolute_dir = False
    try:
        save_plots_to_absolute_dir = algorithm_parameters[
            'save_plots_to_absolute_dir']
    except KeyError:
        save_plots_to_absolute_dir = False
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e
    filename_prefix = False
    try:
        filename_prefix = algorithm_parameters['filename_prefix']
    except KeyError:
        filename_prefix = False
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e

    if debug_logging:
        current_logger.debug('debug :: algorithm_parameters :: %s' %
                             (str(algorithm_parameters)))

    return_anomalies = False
    try:
        return_anomalies = algorithm_parameters['return_anomalies']
    except KeyError:
        return_anomalies = False
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e

    try:
        realtime_analysis = algorithm_parameters['realtime_analysis']
    except KeyError:
        realtime_analysis = False
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e

    save_plots_to = False
    try:
        save_plots_to = algorithm_parameters['save_plots_to']
    except KeyError:
        save_plots_to = False
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e

    save_plots_to_absolute_dir = False
    try:
        save_plots_to_absolute_dir = algorithm_parameters[
            'save_plots_to_absolute_dir']
    except KeyError:
        save_plots_to = False
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e
    filename_prefix = False
    try:
        filename_prefix = algorithm_parameters['filename_prefix']
    except KeyError:
        filename_prefix = False
    except Exception as e:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        dev_null = e

    try:
        base_name = algorithm_parameters['base_name']
    except Exception as e:
        # This except pattern MUST be used in ALL custom algortihms to
        # facilitate the traceback from any errors.  The algorithm we want to
        # run super fast and without spamming the log with lots of errors.
        # But we do not want the function returning and not reporting
        # anything to the log, so the pythonic except is used to "sample" any
        # algorithm errors to a tmp file and report once per run rather than
        # spewing tons of errors into the log e.g. analyzer.log
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        # Return None and None as the algorithm could not determine True or False
        dev_null = e
        del dev_null
        if current_skyline_app == 'webapp':
            return (anomalous, anomalyScore, anomalies, anomalies_dict)
        if return_anomalies:
            return (False, None, anomalies)
        return (False, None)
    if debug_logging:
        current_logger.debug('debug :: %s :: base_name - %s' %
                             (algorithm_name, str(base_name)))

    anomalies_dict['metric'] = base_name
    anomalies_dict['anomalies'] = {}

    use_bottleneck = True
    if save_plots_to:
        use_bottleneck = False
    if use_bottleneck:
        import bottleneck as bn

    # ALWAYS WRAP YOUR ALGORITHM IN try and the BELOW except
    try:
        start_preprocessing = timer()

        # INFO: Sorting time series of 10079 data points took 0.002215 seconds
        timeseries = sorted(timeseries, key=lambda x: x[0])
        if debug_logging:
            current_logger.debug('debug :: %s :: time series of length - %s' %
                                 (algorithm_name, str(len(timeseries))))

        # Testing the data to ensure it meets minimum requirements, in the case
        # of Skyline's use of the m66 algorithm this means that:
        # - the time series must have at least 75% of its full_duration
        do_not_use_sparse_data = False
        if current_skyline_app == 'luminosity':
            do_not_use_sparse_data = True

        if minimum_sparsity == 0:
            do_not_use_sparse_data = False

        total_period = 0
        total_datapoints = 0

        calculate_variables = False
        if do_not_use_sparse_data:
            calculate_variables = True
        if determine_duration:
            calculate_variables = True

        if calculate_variables:
            try:
                start_timestamp = int(timeseries[0][0])
                end_timestamp = int(timeseries[-1][0])
                total_period = end_timestamp - start_timestamp
                total_datapoints = len(timeseries)
            except SystemExit as e:
                if debug_logging:
                    current_logger.debug(
                        'debug_logging :: %s :: SystemExit called, exiting - %s'
                        % (algorithm_name, e))
                if current_skyline_app == 'webapp':
                    return (anomalous, anomalyScore, anomalies, anomalies_dict)
                if return_anomalies:
                    return (anomalous, anomalyScore, anomalies)
                return (anomalous, anomalyScore)
            except:
                traceback_msg = traceback.format_exc()
                record_algorithm_error(current_skyline_app, parent_pid,
                                       algorithm_name, traceback_msg)
                if debug_logging:
                    current_logger.error(traceback_msg)
                    current_logger.error(
                        'error :: debug_logging :: %s :: failed to determine total_period and total_datapoints'
                        % (algorithm_name))
                timeseries = []
            if not timeseries:
                if current_skyline_app == 'webapp':
                    return (anomalous, anomalyScore, anomalies, anomalies_dict)
                if return_anomalies:
                    return (anomalous, anomalyScore, anomalies)
                return (anomalous, anomalyScore)

            if current_skyline_app == 'analyzer':
                # Default for analyzer at required period to 18 hours
                period_required = int(FULL_DURATION * 0.75)
            else:
                # Determine from timeseries
                if total_period < FULL_DURATION:
                    period_required = int(FULL_DURATION * 0.75)
                else:
                    period_required = int(total_period * 0.75)

            if determine_duration:
                period_required = int(total_period * 0.75)

        if do_not_use_sparse_data:
            # If the time series does not have 75% of its full_duration it does
            # not have sufficient data to sample
            try:
                if total_period < period_required:
                    if debug_logging:
                        current_logger.debug(
                            'debug :: %s :: time series does not have sufficient data'
                            % (algorithm_name))
                    if current_skyline_app == 'webapp':
                        return (anomalous, anomalyScore, anomalies,
                                anomalies_dict)
                    if return_anomalies:
                        return (anomalous, anomalyScore, anomalies)
                    return (anomalous, anomalyScore)
            except SystemExit as e:
                if debug_logging:
                    current_logger.debug(
                        'debug_logging :: %s :: SystemExit called, exiting - %s'
                        % (algorithm_name, e))
                if current_skyline_app == 'webapp':
                    return (anomalous, anomalyScore, anomalies, anomalies_dict)
                if return_anomalies:
                    return (anomalous, anomalyScore, anomalies)
                return (anomalous, anomalyScore)
            except:
                traceback_msg = traceback.format_exc()
                record_algorithm_error(current_skyline_app, parent_pid,
                                       algorithm_name, traceback_msg)
                if debug_logging:
                    current_logger.error(traceback_msg)
                    current_logger.error(
                        'error :: debug_logging :: %s :: falied to determine if time series has sufficient data'
                        % (algorithm_name))
                if current_skyline_app == 'webapp':
                    return (anomalous, anomalyScore, anomalies, anomalies_dict)
                if return_anomalies:
                    return (anomalous, anomalyScore, anomalies)
                return (anomalous, anomalyScore)

            # If the time series does not have 75% of its full_duration
            # datapoints it does not have sufficient data to sample

            # Determine resolution from the last 30 data points
            # INFO took 0.002060 seconds
            if not resolution:
                resolution_timestamps = []
                metric_resolution = False
                for metric_datapoint in timeseries[-30:]:
                    timestamp = int(metric_datapoint[0])
                    resolution_timestamps.append(timestamp)
                timestamp_resolutions = []
                if resolution_timestamps:
                    last_timestamp = None
                    for timestamp in resolution_timestamps:
                        if last_timestamp:
                            resolution = timestamp - last_timestamp
                            timestamp_resolutions.append(resolution)
                            last_timestamp = timestamp
                        else:
                            last_timestamp = timestamp
                    try:
                        del resolution_timestamps
                    except:
                        pass
                if timestamp_resolutions:
                    try:
                        timestamp_resolutions_count = Counter(
                            timestamp_resolutions)
                        ordered_timestamp_resolutions_count = timestamp_resolutions_count.most_common(
                        )
                        metric_resolution = int(
                            ordered_timestamp_resolutions_count[0][0])
                    except SystemExit as e:
                        if debug_logging:
                            current_logger.debug(
                                'debug_logging :: %s :: SystemExit called, exiting - %s'
                                % (algorithm_name, e))
                        if current_skyline_app == 'webapp':
                            return (anomalous, anomalyScore, anomalies,
                                    anomalies_dict)
                        if return_anomalies:
                            return (anomalous, anomalyScore, anomalies)
                        return (anomalous, anomalyScore)
                    except:
                        traceback_msg = traceback.format_exc()
                        record_algorithm_error(current_skyline_app, parent_pid,
                                               algorithm_name, traceback_msg)
                        if debug_logging:
                            current_logger.error(traceback_msg)
                            current_logger.error(
                                'error :: debug_logging :: %s :: failed to determine if time series has sufficient data'
                                % (algorithm_name))
                    try:
                        del timestamp_resolutions
                    except:
                        pass
            else:
                metric_resolution = resolution

            minimum_datapoints = None
            if metric_resolution:
                minimum_datapoints = int(period_required / metric_resolution)
            if minimum_datapoints:
                if total_datapoints < minimum_datapoints:
                    if debug_logging:
                        current_logger.debug(
                            'debug :: %s :: time series does not have sufficient data, minimum_datapoints required is %s and time series has %s'
                            % (algorithm_name, str(minimum_datapoints),
                               str(total_datapoints)))
                    if current_skyline_app == 'webapp':
                        return (anomalous, anomalyScore, anomalies,
                                anomalies_dict)
                    if return_anomalies:
                        return (anomalous, anomalyScore, anomalies)
                    return (anomalous, anomalyScore)

            # Is the time series fully populated?
            # full_duration_datapoints = int(full_duration / metric_resolution)
            total_period_datapoints = int(total_period / metric_resolution)
            # minimum_percentage_sparsity = 95
            minimum_percentage_sparsity = 90
            sparsity = int(total_datapoints / (total_period_datapoints / 100))
            if sparsity < minimum_percentage_sparsity:
                if debug_logging:
                    current_logger.debug(
                        'debug :: %s :: time series does not have sufficient data, minimum_percentage_sparsity required is %s and time series has %s'
                        % (algorithm_name, str(minimum_percentage_sparsity),
                           str(sparsity)))
                if current_skyline_app == 'webapp':
                    return (anomalous, anomalyScore, anomalies, anomalies_dict)
                if return_anomalies:
                    return (anomalous, anomalyScore, anomalies)
                return (anomalous, anomalyScore)
            if len(set(item[1] for item in timeseries)) == 1:
                if debug_logging:
                    current_logger.debug(
                        'debug :: %s :: time series does not have sufficient variability, all the values are the same'
                        % algorithm_name)
                anomalous = False
                anomalyScore = 0.0
                if current_skyline_app == 'webapp':
                    return (anomalous, anomalyScore, anomalies, anomalies_dict)
                if return_anomalies:
                    return (anomalous, anomalyScore, anomalies)
                return (anomalous, anomalyScore)

        end_preprocessing = timer()
        preprocessing_runtime = end_preprocessing - start_preprocessing
        if debug_logging:
            current_logger.debug(
                'debug :: %s :: preprocessing took %.6f seconds' %
                (algorithm_name, preprocessing_runtime))

        if not timeseries:
            if debug_logging:
                current_logger.debug('debug :: %s :: m66 not run as no data' %
                                     (algorithm_name))
            anomalies = []
            if current_skyline_app == 'webapp':
                return (anomalous, anomalyScore, anomalies, anomalies_dict)
            if return_anomalies:
                return (anomalous, anomalyScore, anomalies)
            return (anomalous, anomalyScore)
        if debug_logging:
            current_logger.debug('debug :: %s :: timeseries length: %s' %
                                 (algorithm_name, str(len(timeseries))))

        anomalies_dict['timestamp'] = int(timeseries[-1][0])
        anomalies_dict['from_timestamp'] = int(timeseries[0][0])

        start_analysis = timer()
        try:
            # bottleneck is used because it is much faster
            # pd dataframe method (1445 data point - 24hrs): took 0.077915 seconds
            # bottleneck method (1445 data point - 24hrs): took 0.005692 seconds
            # numpy and pandas rolling
            # 2021-07-30 12:37:31 :: 2827897 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 136.93 seconds
            # 2021-07-30 12:44:53 :: 2855884 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 148.82 seconds
            # 2021-07-30 12:48:41 :: 2870822 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 145.62 seconds
            # 2021-07-30 12:55:00 :: 2893634 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 139.00 seconds
            # 2021-07-30 12:59:31 :: 2910443 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 144.80 seconds
            # 2021-07-30 13:02:31 :: 2922928 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 143.35 seconds
            # 2021-07-30 14:12:56 :: 3132457 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 129.25 seconds
            # 2021-07-30 14:22:35 :: 3164370 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 125.72 seconds
            # 2021-07-30 14:28:24 :: 3179687 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 222.43 seconds
            # 2021-07-30 14:33:45 :: 3179687 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 244.00 seconds
            # 2021-07-30 14:36:27 :: 3214047 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 141.10 seconds
            # numpy and bottleneck
            # 2021-07-30 16:41:52 :: 3585162 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 73.92 seconds
            # 2021-07-30 16:46:46 :: 3585162 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 68.84 seconds
            # 2021-07-30 16:51:48 :: 3585162 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 70.55 seconds
            # numpy and bottleneck (passing resolution and not calculating in m66)
            # 2021-07-30 16:57:46 :: 3643253 :: cloudbursts :: find_cloudbursts completed on 1530 metrics in 65.59 seconds

            if use_bottleneck:
                if len(timeseries) < 10:
                    if current_skyline_app == 'webapp':
                        return (anomalous, anomalyScore, anomalies,
                                anomalies_dict)
                    if return_anomalies:
                        return (anomalous, anomalyScore, anomalies)
                    return (anomalous, anomalyScore)

                x_np = np.asarray([x[1] for x in timeseries])
                # Fast Min-Max scaling
                data = (x_np - x_np.min()) / (x_np.max() - x_np.min())

                # m66 - calculate to nth_median
                median_count = 0
                while median_count < nth_median:
                    median_count += 1
                    rolling_median_s = bn.move_median(data, window=window)
                    median = rolling_median_s.tolist()
                    data = median
                    if median_count == nth_median:
                        break

                # m66 - calculate the moving standard deviation for the
                # nth_median array
                rolling_std_s = bn.move_std(data, window=window)
                std_nth_median_array = np.nan_to_num(rolling_std_s,
                                                     copy=False,
                                                     nan=0.0,
                                                     posinf=None,
                                                     neginf=None)
                std_nth_median = std_nth_median_array.tolist()
                if debug_logging:
                    current_logger.debug(
                        'debug :: %s :: std_nth_median calculated with bn' %
                        (algorithm_name))
            else:
                df = pd.DataFrame(timeseries, columns=['date', 'value'])
                df['date'] = pd.to_datetime(df['date'], unit='s')
                datetime_index = pd.DatetimeIndex(df['date'].values)
                df = df.set_index(datetime_index)
                df.drop('date', axis=1, inplace=True)
                original_df = df.copy()
                # MinMax scale
                df = (df - df.min()) / (df.max() - df.min())
                # window = 6
                data = df['value'].tolist()

                if len(data) < 10:
                    if current_skyline_app == 'webapp':
                        return (anomalous, anomalyScore, anomalies,
                                anomalies_dict)
                    if return_anomalies:
                        return (anomalous, anomalyScore, anomalies)
                    return (anomalous, anomalyScore)

                # m66 - calculate to nth_median
                median_count = 0
                while median_count < nth_median:
                    median_count += 1
                    s = pd.Series(data)
                    rolling_median_s = s.rolling(window).median()
                    median = rolling_median_s.tolist()
                    data = median
                    if median_count == nth_median:
                        break

                # m66 - calculate the moving standard deviation for the
                # nth_median array
                s = pd.Series(data)
                rolling_std_s = s.rolling(window).std()

                nth_median_column = 'std_nth_median_%s' % str(nth_median)
                df[nth_median_column] = rolling_std_s.tolist()
                std_nth_median = df[nth_median_column].fillna(0).tolist()

            # m66 - calculate the standard deviation for the entire nth_median
            # array
            metric_stddev = np.std(std_nth_median)
            std_nth_median_n_sigma = []
            anomalies_found = False

            for value in std_nth_median:
                # m66 - if the value in the 6th median array is > six-sigma of
                # the metric_stddev the datapoint is anomalous
                if value > (metric_stddev * n_sigma):
                    std_nth_median_n_sigma.append(1)
                    anomalies_found = True
                else:
                    std_nth_median_n_sigma.append(0)
            std_nth_median_n_sigma_column = 'std_median_%s_%s_sigma' % (
                str(nth_median), str(n_sigma))
            if not use_bottleneck:
                df[std_nth_median_n_sigma_column] = std_nth_median_n_sigma

            anomalies = []
            # m66 - only label anomalous if the n_sigma triggers are persisted
            # for (window / 2)
            if anomalies_found:
                current_triggers = []
                for index, item in enumerate(timeseries):
                    if std_nth_median_n_sigma[index] == 1:
                        current_triggers.append(index)
                    else:
                        if len(current_triggers) > int(window / 2):
                            for trigger_index in current_triggers:
                                # Shift the anomaly back to the beginning of the
                                # window
                                if shift_to_start_of_window:
                                    anomalies.append(
                                        timeseries[(trigger_index -
                                                    (window * int(
                                                        (nth_median / 2))))])
                                else:
                                    anomalies.append(timeseries[trigger_index])
                        current_triggers = []
                # Process any remaining current_triggers
                if len(current_triggers) > int(window / 2):
                    for trigger_index in current_triggers:
                        # Shift the anomaly back to the beginning of the
                        # window
                        if shift_to_start_of_window:
                            anomalies.append(
                                timeseries[(trigger_index - (window * int(
                                    (nth_median / 2))))])
                        else:
                            anomalies.append(timeseries[trigger_index])
            if not anomalies:
                anomalous = False

            if anomalies:
                anomalous = True
                anomalies_data = []
                anomaly_timestamps = [int(item[0]) for item in anomalies]
                for item in timeseries:
                    if int(item[0]) in anomaly_timestamps:
                        anomalies_data.append(1)
                    else:
                        anomalies_data.append(0)
                if not use_bottleneck:
                    df['anomalies'] = anomalies_data
                anomalies_list = []
                for ts, value in timeseries:
                    if int(ts) in anomaly_timestamps:
                        anomalies_list.append([int(ts), value])
                        anomalies_dict['anomalies'][int(ts)] = value

            if anomalies and save_plots_to:
                try:
                    from adtk.visualization import plot
                    metric_dir = base_name.replace('.', '/')
                    timestamp_dir = str(int(timeseries[-1][0]))
                    save_path = '%s/%s/%s/%s' % (save_plots_to, algorithm_name,
                                                 metric_dir, timestamp_dir)
                    if save_plots_to_absolute_dir:
                        save_path = '%s' % save_plots_to
                    anomalies_dict['file_path'] = save_path
                    save_to_file = '%s/%s.%s.png' % (save_path, algorithm_name,
                                                     base_name)
                    if filename_prefix:
                        save_to_file = '%s/%s.%s.%s.png' % (
                            save_path, filename_prefix, algorithm_name,
                            base_name)
                    save_to_path = os_path_dirname(save_to_file)
                    title = '%s\n%s - median %s %s-sigma persisted (window=%s)' % (
                        base_name, algorithm_name, str(nth_median),
                        str(n_sigma), str(window))

                    if not os_path_exists(save_to_path):
                        try:
                            mkdir_p(save_to_path)
                        except Exception as e:
                            current_logger.error(
                                'error :: %s :: failed to create dir - %s - %s'
                                % (algorithm_name, save_to_path, e))
                    if os_path_exists(save_to_path):
                        try:
                            plot(original_df['value'],
                                 anomaly=df['anomalies'],
                                 anomaly_color='red',
                                 title=title,
                                 save_to_file=save_to_file)
                            if debug_logging:
                                current_logger.debug(
                                    'debug :: %s :: plot saved to - %s' %
                                    (algorithm_name, save_to_file))
                            anomalies_dict['image'] = save_to_file
                        except Exception as e:
                            current_logger.error(
                                'error :: %s :: failed to plot - %s - %s' %
                                (algorithm_name, base_name, e))
                    anomalies_file = '%s/%s.%s.anomalies_list.txt' % (
                        save_path, algorithm_name, base_name)
                    with open(anomalies_file, 'w') as fh:
                        fh.write(str(anomalies_list))
                        # os.chmod(anomalies_file, mode=0o644)
                    data_file = '%s/data.txt' % (save_path)
                    with open(data_file, 'w') as fh:
                        fh.write(str(anomalies_dict))
                except SystemExit as e:
                    if debug_logging:
                        current_logger.debug(
                            'debug_logging :: %s :: SystemExit called during save plot, exiting - %s'
                            % (algorithm_name, e))
                    if current_skyline_app == 'webapp':
                        return (anomalous, anomalyScore, anomalies,
                                anomalies_dict)
                    if return_anomalies:
                        return (anomalous, anomalyScore, anomalies)
                    return (anomalous, anomalyScore)
                except Exception as e:
                    traceback_msg = traceback.format_exc()
                    record_algorithm_error(current_skyline_app, parent_pid,
                                           algorithm_name, traceback_msg)
                    if debug_logging:
                        current_logger.error(traceback_msg)
                        current_logger.error(
                            'error :: %s :: failed to plot or save anomalies file - %s - %s'
                            % (algorithm_name, base_name, e))

            try:
                del df
            except:
                pass
        except SystemExit as e:
            if debug_logging:
                current_logger.debug(
                    'debug_logging :: %s :: SystemExit called, during analysis, exiting - %s'
                    % (algorithm_name, e))
            if current_skyline_app == 'webapp':
                return (anomalous, anomalyScore, anomalies, anomalies_dict)
            if return_anomalies:
                return (anomalous, anomalyScore, anomalies)
            return (anomalous, anomalyScore)
        except:
            traceback_msg = traceback.format_exc()
            record_algorithm_error(current_skyline_app, parent_pid,
                                   algorithm_name, traceback_msg)
            if debug_logging:
                current_logger.error(traceback_msg)
                current_logger.error(
                    'error :: debug_logging :: %s :: failed to run on ts' %
                    (algorithm_name))
            if current_skyline_app == 'webapp':
                return (anomalous, anomalyScore, anomalies, anomalies_dict)
            if return_anomalies:
                return (anomalous, anomalyScore, anomalies)
            return (anomalous, anomalyScore)

        end_analysis = timer()
        analysis_runtime = end_analysis - start_analysis

        if debug_logging:
            current_logger.debug(
                'debug :: analysis with %s took %.6f seconds' %
                (algorithm_name, analysis_runtime))

        if anomalous:
            anomalyScore = 1.0
        else:
            anomalyScore = 0.0

        if debug_logging:
            current_logger.info(
                '%s :: anomalous - %s, anomalyScore - %s' %
                (algorithm_name, str(anomalous), str(anomalyScore)))

        if debug_logging:
            end = timer()
            processing_runtime = end - start
            current_logger.info('%s :: completed in %.6f seconds' %
                                (algorithm_name, processing_runtime))
        try:
            del timeseries
        except:
            pass
        if current_skyline_app == 'webapp':
            return (anomalous, anomalyScore, anomalies, anomalies_dict)
        if return_anomalies:
            return (anomalous, anomalyScore, anomalies)
        return (anomalous, anomalyScore)
    except SystemExit as e:
        if debug_logging:
            current_logger.debug(
                'debug_logging :: %s :: SystemExit called (before StopIteration), exiting - %s'
                % (algorithm_name, e))
        if current_skyline_app == 'webapp':
            return (anomalous, anomalyScore, anomalies, anomalies_dict)
        if return_anomalies:
            return (anomalous, anomalyScore, anomalies)
        return (anomalous, anomalyScore)
    except StopIteration:
        # This except pattern MUST be used in ALL custom algortihms to
        # facilitate the traceback from any errors.  The algorithm we want to
        # run super fast and without spamming the log with lots of errors.
        # But we do not want the function returning and not reporting
        # anything to the log, so the pythonic except is used to "sample" any
        # algorithm errors to a tmp file and report once per run rather than
        # spewing tons of errors into the log e.g. analyzer.log
        if current_skyline_app == 'webapp':
            return (anomalous, anomalyScore, anomalies, anomalies_dict)
        if return_anomalies:
            return (False, None, anomalies)
        return (False, None)
    except:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name,
                               traceback.format_exc())
        # Return None and None as the algorithm could not determine True or False
        if current_skyline_app == 'webapp':
            return (anomalous, anomalyScore, anomalies, anomalies_dict)
        if return_anomalies:
            return (False, None, anomalies)
        return (False, None)

    if current_skyline_app == 'webapp':
        return (anomalous, anomalyScore, anomalies, anomalies_dict)
    if return_anomalies:
        return (anomalous, anomalyScore, anomalies)
    return (anomalous, anomalyScore)
예제 #14
0
from unittest import TestCase, main as unittest_main
from os import pardir as os_pardir
from os.path import (
    join as os_path_join,
    dirname as os_path_dirname,
    abspath as os_path_abspath)
from sys import path as sys_path

sys_path.append(os_path_join(
    os_path_dirname(os_path_abspath(__file__)),
    os_pardir))

from vorbis.helper_funcs import (
    ilog, float32_unpack, lookup1_values, bit_reverse)
from .test_decoders import hex_str_to_bin_str


# noinspection PyMethodMayBeStatic
class HelperFunctionsTests(TestCase):
    def test_ilog_zero(self):
        self.assertEqual(ilog(0), 0)

    def test_ilog_positive_number(self):
        self.assertEqual(ilog(1), 1)
        self.assertEqual(ilog(7), 3)

    def test_ilog_negative_number(self):
        self.assertEqual(ilog(-1111), 0)
        self.assertEqual(ilog(-112312), 0)

    def test_lookup1_values(self):
예제 #15
0
파일: __init__.py 프로젝트: bscohen1/gpkit
    Requirements
    ------------
    numpy
    MOSEK or CVXOPT
    scipy(optional): for complete sparse matrix support
    sympy(optional): for latex printing in iPython Notebook

    Attributes
    ----------
    settings : dict
        Contains settings loaded from ``./env/settings``
"""
from os import sep as os_sep
from os.path import dirname as os_path_dirname
SETTINGS_PATH = os_sep.join([os_path_dirname(__file__), "env", "settings"])

__version__ = "0.4.0"
UNIT_REGISTRY = None
SIGNOMIALS_ENABLED = False

# global variable initializations
DimensionalityError = ValueError
units = None


def enable_units(path=None):
    """Enables units support in a particular instance of GPkit.

    Posynomials created after calling this are incompatible with those created
    before.
예제 #16
0
파일: __init__.py 프로젝트: galbramc/gpkit
    scipy(optional): for complete sparse matrix support
    sympy(optional): for latex printing in iPython Notebook

    Attributes
    ----------
    settings : dict
        Contains settings loaded from ``./env/settings``
"""

__version__ = "0.3.4"
UNIT_REGISTRY = None
SIGNOMIALS_ENABLED = False

from os import sep as os_sep
from os.path import dirname as os_path_dirname
UNITDEF_PATH = os_sep.join([os_path_dirname(__file__), "gpkit_units.txt"])
SETTINGS_PATH = os_sep.join([os_path_dirname(__file__), "env", "settings"])


def enable_units(path=UNITDEF_PATH):
    """Enables units support in a particular instance of GPkit.

    Posynomials created after calling this are incompatible with those created
    before.

    If gpkit is imported multiple times, this needs to be run each time."""
    global units, DimensionalityError, UNIT_REGISTRY
    try:
        import pint
        if UNIT_REGISTRY is None:
            UNIT_REGISTRY = pint.UnitRegistry(path)
예제 #17
0
            else:
                return getattr(self, fallback)(other)
        return newfn

    for op in "eq ge le add mul div truediv floordiv".split():
        dunder = "__%s__" % op
        trunder = "___%s___" % op
        original = getattr(units.Quantity, dunder)
        setattr(units.Quantity, trunder, original)
        newfn = skip_if_gpkit_objects(fallback=trunder)
        setattr(units.Quantity, dunder, newfn)

# Load settings
from os import sep as os_sep
from os.path import dirname as os_path_dirname
settings_path = os_sep.join([os_path_dirname(__file__), "env", "settings"])
try:
    with open(settings_path) as settingsfile:
        lines = [line[:-1].split(" : ") for line in settingsfile
                 if len(line.split(" : ")) == 2]
        settings = {name: value.split(", ") for name, value in lines}
        for name, value in settings.items():
            # hack to flatten 1-element lists, unlesss they're the solver list
            if len(value) == 1 and name != "installed_solvers":
                settings[name] = value[0]
    try:
        del lines
        del line
    except NameError:
        pass
except IOError:
예제 #18
0
from typing import List, TextIO, Tuple
from unittest import TestCase, main as unittest_main
from os import (pardir as os_pardir, mkdir as os_mkdir, remove as os_remove)
from os.path import (join as os_path_join, dirname as os_path_dirname, abspath
                     as os_path_abspath, exists as os_path_exists)
from sys import path as sys_path
from shutil import rmtree as shutil_rmtree

sys_path.append(
    os_path_join(os_path_dirname(os_path_abspath(__file__)), os_pardir))

from cvs.changes_codec import (ChangesCodec, ProgramDataFormatError,
                               CodecException)


class GetLastCommitNumber(TestCase):
    _changes_codec: ChangesCodec = ChangesCodec()

    def setUp(self) -> None:
        os_mkdir(self._changes_codec._CVS_DIR_PATH)
        os_mkdir(self._changes_codec._COMMITTED_PATH)

    def tearDown(self) -> None:
        if os_path_exists(self._changes_codec._CVS_DIR_PATH):
            shutil_rmtree(self._changes_codec._CVS_DIR_PATH)

    def test_no_commits(self):
        self.assertEqual(self._changes_codec._get_last_commit_number(), 0)

    def test_only_commit_files(self):
        self._create_files_in_committed_dir(['3', '5', '7'])
예제 #19
0
def run_launcher_logic():
    parser = ArgumentParser(
        description='Videogame-platformer where squares fight for the win!')

    parser.add_argument('--version',
                        help="print program's current version number and exit",
                        action='version',
                        version=get_current_version())

    parser.add_argument('-d',
                        '--debug',
                        help="turn on debug info printing",
                        action='store_true')

    arguments: Namespace = parser.parse_args()

    # Improvement: make GUI version of launcher. For now launcher just loads
    #  some map

    map_name: str = input(
        'Enter map name. Raw maps names have format: "raw <name>". Non raw '
        'maps will be loaded from "maps" folder.\n')

    game_map: Optional[GameMap] = None

    if map_name.startswith('raw '):
        try:
            game_map = getattr(RawMapsContainer,
                               'get_map_' + map_name.split(' ')[1])()

        except AttributeError:
            exit_with_exception(
                'Wrong raw map name',
                LauncherException('Wrong raw map name: ' + map_name),
                arguments.debug)
    else:
        try:
            map_path: str = os_path_join(
                os_path_dirname(os_path_abspath(__file__)), os_pardir, 'maps',
                map_name)

            with open(map_path, 'rb') as map_file_handle:
                game_map = pickle_load(map_file_handle)

        except OSError as occurred_err:
            exit_with_exception('Cannot open file: ' + map_name,
                                LauncherException(*occurred_err.args),
                                arguments.debug)

    if game_map is None:
        exit_with_exception("Something broke inside game",
                            ApplicationException('[game_map] is [None]'),
                            arguments.debug)

    game_engine: GameEngine = GameEngine(game_map)
    gui: GameGUI = GameGUI()

    try:
        gui.init(game_map, game_engine.get_event_listeners())

        def game_loop(game_engine_: GameEngine, gui_: GameGUI):
            def time_alignment():
                """Time alignment for CPU power saving

                Игра работает в режиме 60 итераций игрового цикла (обновление И
                рендер уровня в одной итерации) в секунду.

                По сути, секунда разбита на 60 частей. Выравнивание происходит
                таким образом, что в начале каждой 1\60 части секунды должна
                начинаться КАЖДАЯ итерация игрового цикла. НЕТ гарантии, что
                при таком подходе не будет потеряна одна из 1\60-ой частей
                секунды

                Таким образом, каждое обновление уровня происходит с рассчетом
                ТОЛЬКО на текущую 1/60 часть секунды. Это позволяет избавиться
                от дробных величин при модификации позиции движущихся объектов.
                """
                # All time below in milliseconds
                #
                # one_iteration_time = 1000 / 60 = 16.666666666666668
                # millis_in_current_second = (
                #     current_time_in_seconds() * 1000 % 1000)
                time_sleep((16.666666666666668 -
                            ((current_time_in_seconds() * 1000 % 1000) %
                             16.666666666666668)) / 1000)

            # Game loop locates in a daemon thread so it will proceed until
            # user interface thread is closed
            while True:
                game_engine_.update_map()

                gui_.render()

                time_alignment()

        Thread(target=game_loop, args=(game_engine, gui), daemon=True).start()
        # Right here several renderings CANNOT be lost
        gui.run_gui_loop()

    except ApplicationException as occurred_exc:
        # Improvement: Different messages for user. Switch only message!
        exit_with_exception("Some exception occurred", occurred_exc,
                            arguments.debug)
예제 #20
0
def adtk_level_shift(current_skyline_app, parent_pid, timeseries, algorithm_parameters):
    """
    A timeseries is anomalous if a level shift occurs in a 5 window period bound
    by a factor of 9 of the normal range based on historical interquartile range.

    :param current_skyline_app: the Skyline app executing the algorithm.  This
        will be passed to the algorithm by Skyline.  This is **required** for
        error handling and logging.  You do not have to worry about handling the
        argument in the scope of the custom algorithm itself,  but the algorithm
        must accept it as the first agrument.
    :param parent_pid: the parent pid which is executing the algorithm, this is
        **required** for error handling and logging.  You do not have to worry
        about handling this argument in the scope of algorithm, but the
        algorithm must accept it as the second argument.
    :param timeseries: the time series as a list e.g. ``[[1578916800.0, 29.0],
        [1578920400.0, 55.0], ... [1580353200.0, 55.0]]``
    :param algorithm_parameters: a dictionary of any required parameters for the
        custom_algorithm and algorithm itself.  For the matrixprofile custom
        algorithm the following parameters are required, example:
        ``algorithm_parameters={
            'c': 9.0,
            'run_every': 5,
            'side': 'both',
            'window': 5
        }``
    :type current_skyline_app: str
    :type parent_pid: int
    :type timeseries: list
    :type algorithm_parameters: dict
    :return: True, False or Non
    :rtype: boolean

    Performance is of paramount importance in Skyline, especially in terms of
    computational complexity, along with execution time and CPU usage. The
    adtk LevelShiftAD algortihm is not O(n) and it is not fast either, not when
    compared to the normal three-sigma triggered algorithms.  However it is
    useful if you care about detecting all level shifts.  The normal three-sigma
    triggered algorithms do not always detect a level shift, especially if the
    level shift does not breach the three-sigma limits.  Therefore you may find
    over time that you encounter alerts that contain level shifts that you
    thought should have been detected.  On these types of metrics and events,
    the adtk LevelShiftAD algortihm can be implemented to detect and alert on
    these.  It is not recommended to run on all your metrics as it would
    immediately triple the analyzer runtime every if only run every 5 windows/
    minutes.

    Due to the computational complexity and long run time of the adtk
    LevelShiftAD algorithm on the size of timeseries data used by Skyline, if
    you consider the following timings of all three-sigma triggered algorithms
    and compare them to the to the adtk_level_shift results in the last 2 rows
    of the below log, it is clear that the running adtk_level_shift on all
    metrics is probably not desirable, even if it is possible to do, it is very
    noisy.

    2021-03-06 10:46:38 :: 1582754 :: algorithm run count - histogram_bins run 567 times
    2021-03-06 10:46:38 :: 1582754 :: algorithm timings count - histogram_bins has 567 timings
    2021-03-06 10:46:38 :: 1582754 :: algorithm timing - histogram_bins - total: 1.051136 - median: 0.001430
    2021-03-06 10:46:38 :: 1582754 :: algorithm run count - first_hour_average run 567 times
    2021-03-06 10:46:38 :: 1582754 :: algorithm timings count - first_hour_average has 567 timings
    2021-03-06 10:46:38 :: 1582754 :: algorithm timing - first_hour_average - total: 1.322432 - median: 0.001835
    2021-03-06 10:46:38 :: 1582754 :: algorithm run count - stddev_from_average run 567 times
    2021-03-06 10:46:38 :: 1582754 :: algorithm timings count - stddev_from_average has 567 timings
    2021-03-06 10:46:38 :: 1582754 :: algorithm timing - stddev_from_average - total: 1.097290 - median: 0.001641
    2021-03-06 10:46:38 :: 1582754 :: algorithm run count - grubbs run 567 times
    2021-03-06 10:46:38 :: 1582754 :: algorithm timings count - grubbs has 567 timings
    2021-03-06 10:46:38 :: 1582754 :: algorithm timing - grubbs - total: 1.742929 - median: 0.002438
    2021-03-06 10:46:38 :: 1582754 :: algorithm run count - ks_test run 147 times
    2021-03-06 10:46:38 :: 1582754 :: algorithm timings count - ks_test has 147 timings
    2021-03-06 10:46:38 :: 1582754 :: algorithm timing - ks_test - total: 0.127648 - median: 0.000529
    2021-03-06 10:46:38 :: 1582754 :: algorithm run count - mean_subtraction_cumulation run 40 times
    2021-03-06 10:46:38 :: 1582754 :: algorithm timings count - mean_subtraction_cumulation has 40 timings
    2021-03-06 10:46:38 :: 1582754 :: algorithm timing - mean_subtraction_cumulation - total: 0.152515 - median: 0.003152
    2021-03-06 10:46:39 :: 1582754 :: algorithm run count - median_absolute_deviation run 35 times
    2021-03-06 10:46:39 :: 1582754 :: algorithm timings count - median_absolute_deviation has 35 timings
    2021-03-06 10:46:39 :: 1582754 :: algorithm timing - median_absolute_deviation - total: 0.143770 - median: 0.003248
    2021-03-06 10:46:39 :: 1582754 :: algorithm run count - stddev_from_moving_average run 30 times
    2021-03-06 10:46:39 :: 1582754 :: algorithm timings count - stddev_from_moving_average has 30 timings
    2021-03-06 10:46:39 :: 1582754 :: algorithm timing - stddev_from_moving_average - total: 0.125173 - median: 0.003092
    2021-03-06 10:46:39 :: 1582754 :: algorithm run count - least_squares run 16 times
    2021-03-06 10:46:39 :: 1582754 :: algorithm timings count - least_squares has 16 timings
    2021-03-06 10:46:39 :: 1582754 :: algorithm timing - least_squares - total: 0.089108 - median: 0.005538
    2021-03-06 10:46:39 :: 1582754 :: algorithm run count - abs_stddev_from_median run 1 times
    2021-03-06 10:46:39 :: 1582754 :: algorithm timings count - abs_stddev_from_median has 1 timings
    2021-03-06 10:46:39 :: 1582754 :: algorithm timing - abs_stddev_from_median - total: 0.036797 - median: 0.036797
    2021-03-06 10:46:39 :: 1582754 :: algorithm run count - adtk_level_shift run 271 times
    2021-03-06 10:46:39 :: 1582754 :: algorithm timings count - adtk_level_shift has 271 timings
    2021-03-06 10:46:39 :: 1582754 :: algorithm timing - adtk_level_shift - total: 13.729565 - median: 0.035791
    ...
    ...
    2021-03-06 10:46:39 :: 1582754 :: seconds to run     :: 27.93  # THE TOTAL ANALYZER RUNTIME

    Therefore the analysis methodology implemented for the adtk_level_shift
    custom_algorithm is as folows:

    - When new metrics are added either to the configuration or by actual new
    metrics coming online that match the ``algorithm_parameters['namespace']``,
    Skyline implements sharding on new metrics into time slots to prevent a
    thundering herd situation from developing.  A newly added metrics will
    eventually be assigned into a time shard and be added and the last analysed
    timestamp will be added to the ``analyzer.last.adtk_level_shift`` Redis hash
    key to determine the next scheduled run with
    ``algorithm_parameters['namespace']``

    - A ``run_every`` parameter is implemented so that the algorithm can be
    configured to run on a metric once every ``run_every`` minutes.  The default
    is to run it every 5 minutes using window 5 (rolling) and trigger as
    anomalous if the algorithm labels any of the last 5 datapoints as anomalous.
    This means that there could be up to a 5 minute delay on an alert on the
    60 second, 168 SECOND_ORDER_RESOLUTION_HOURS metrics in the example, but a
    ``c=9.0`` level shift would be detected and would be alerted on (if both
    analyzer and mirage triggered on it).  This periodic running of the
    algorithm is a tradeoff so that the adtk_level_shift load and runtime can be
    spread over ``run_every`` minutes.

    - The algorithm is not run against metrics that are sparsely populated.
    When the algorithm is run on sparsely populated metrics it results in lots
    of false positives and noise.

    The Skyline CUSTOM_ALGORITHMS implementation of the adtk LevelShiftAD
    algorithm is configured as the example shown below.  However please note
    that the algorithm_parameters shown in this example configuration are
    suitiable for metrics that have a 60 second relation and have a
    :mod:`settings.ALERTS` Mirage SECOND_ORDER_RESOLUTION_HOURS of 168 (7 days).
    For metrics with a different resolution/frequency may require different
    values appropriate for metric resolution.

    :
    Example CUSTOM_ALGORITHMS configuration:

    'adtk_level_shift': {
        'namespaces': [
            'skyline.analyzer.run_time', 'skyline.analyzer.total_metrics',
            'skyline.analyzer.exceptions'
        ],
        'algorithm_source': '/opt/skyline/github/skyline/skyline/custom_algorithms/adtk_level_shift.py',
        'algorithm_parameters': {'c': 9.0, 'run_every': 5, 'side': 'both', 'window': 5},
        'max_execution_time': 0.5,
        'consensus': 1,
        'algorithms_allowed_in_consensus': ['adtk_level_shift'],
        'run_3sigma_algorithms': True,
        'run_before_3sigma': True,
        'run_only_if_consensus': False,
        'use_with': ["analyzer", "mirage"],
        'debug_logging': False,
    },

    """

    # You MUST define the algorithm_name
    algorithm_name = 'adtk_level_shift'

    # Define the default state of None and None, anomalous does not default to
    # False as that is not correct, False is only correct if the algorithm
    # determines the data point is not anomalous.  The same is true for the
    # anomalyScore.
    anomalous = None
    anomalyScore = None

    # @aded 20210308 - Feature #3978: luminosity - classify_metrics
    #                  Feature #3642: Anomaly type classification
    return_anomalies = False
    anomalies = []
    realtime_analysis = True

    current_logger = None

    # If you wanted to log, you can but this should only be done during
    # testing and development
    def get_log(current_skyline_app):
        current_skyline_app_logger = current_skyline_app + 'Log'
        current_logger = logging.getLogger(current_skyline_app_logger)
        return current_logger

    start = timer()

    # Use the algorithm_parameters to determine the sample_period
    debug_logging = None
    try:
        debug_logging = algorithm_parameters['debug_logging']
    except:
        debug_logging = False
    if debug_logging:
        try:
            current_logger = get_log(current_skyline_app)
            current_logger.debug('debug :: %s :: debug_logging enabled with algorithm_parameters - %s' % (
                algorithm_name, str(algorithm_parameters)))
        except:
            # This except pattern MUST be used in ALL custom algortihms to
            # facilitate the traceback from any errors.  The algorithm we want to
            # run super fast and without spamming the log with lots of errors.
            # But we do not want the function returning and not reporting
            # anything to the log, so the pythonic except is used to "sample" any
            # algorithm errors to a tmp file and report once per run rather than
            # spewing tons of errors into the log e.g. analyzer.log
            record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc())
            # Return None and None as the algorithm could not determine True or False
            return (False, None)

    # Allow the LevelShiftAD window parameter to be passed in the
    # algorithm_parameters
    window = 5
    try:
        window = algorithm_parameters['window']
    except:
        pass

    # Allow the LevelShiftAD c parameter to be passed in the
    # algorithm_parameters
    c = 9.0
    try:
        c = algorithm_parameters['c']
    except:
        pass

    run_every = window
    try:
        run_every = algorithm_parameters['run_every']
    except:
        pass

    side = 'both'
    try:
        side = algorithm_parameters['side']
    except:
        pass

    if debug_logging:
        current_logger.debug('debug :: algorithm_parameters :: %s' % (
            str(algorithm_parameters)))

    # @added 20210308 - Feature #3978: luminosity - classify_metrics
    #                   Feature #3642: Anomaly type classification
    try:
        return_anomalies = algorithm_parameters['return_anomalies']
    except:
        return_anomalies = False
    try:
        realtime_analysis = algorithm_parameters['realtime_analysis']
    except:
        realtime_analysis = True

    # @added 20210316 - Feature #3978: luminosity - classify_metrics
    #                   Feature #3642: Anomaly type classification
    save_plots_to = False
    try:
        save_plots_to = algorithm_parameters['save_plots_to']
    except:
        pass

    # @added 20210323 - Feature #3978: luminosity - classify_metrics
    #                   Feature #3642: Anomaly type classification
    save_plots_to_absolute_dir = False
    try:
        save_plots_to_absolute_dir = algorithm_parameters['save_plots_to_absolute_dir']
    except:
        pass
    filename_prefix = False
    try:
        filename_prefix = algorithm_parameters['filename_prefix']
    except:
        pass

    # @added 20210318 - Feature #3978: luminosity - classify_metrics
    #                   Feature #3642: Anomaly type classification
    run_PersistAD = False
    try:
        run_PersistAD = algorithm_parameters['run_PersistAD']
    except:
        pass

    if debug_logging:
        current_logger.debug('debug :: algorithm_parameters :: %s' % (
            str(algorithm_parameters)))

    try:
        base_name = algorithm_parameters['base_name']
    except:
        # This except pattern MUST be used in ALL custom algortihms to
        # facilitate the traceback from any errors.  The algorithm we want to
        # run super fast and without spamming the log with lots of errors.
        # But we do not want the function returning and not reporting
        # anything to the log, so the pythonic except is used to "sample" any
        # algorithm errors to a tmp file and report once per run rather than
        # spewing tons of errors into the log e.g. analyzer.log
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc())
        # Return None and None as the algorithm could not determine True or False
        if return_anomalies:
            return (False, None, anomalies)
        else:
            return (False, None)
    if debug_logging:
        current_logger.debug('debug :: %s :: base_name - %s' % (
            algorithm_name, str(base_name)))

    # Due to the load and runtime of LevelShiftAD it is only run in analyzer
    # periodically
    if current_skyline_app == 'analyzer':
        redis_conn_decoded = get_redis_conn_decoded(current_skyline_app)
        last_hash_key = 'analyzer.last.%s' % algorithm_name
        last_check = None
        try:
            raw_last_check = redis_conn_decoded.hget(last_hash_key, base_name)
            last_check = int(raw_last_check)
        except:
            last_check = None
        last_window_timestamps = [int(item[0]) for item in timeseries[-run_every:]]
        if last_check in last_window_timestamps:
            if debug_logging:
                current_logger.debug('debug :: %s :: run_every period is not over yet, skipping base_name - %s' % (
                    algorithm_name, str(base_name)))
            if return_anomalies:
                return (False, None, anomalies)
            else:
                return (False, None)

        # If there is no last timestamp, shard the metric, it will eventually
        # be added.
        if not last_check:
            now = datetime.datetime.now()
            now_seconds = int(now.second)
            if now_seconds == 0:
                now_seconds = 1
            period_seconds = int(60 / run_every)
            shard = int(period_seconds)
            last_shard = 60
            shard = int(period_seconds)
            shards = [shard]
            while shard < last_shard:
                shard = shard + period_seconds
                shards.append((shard))
            shard_value = round(now_seconds / shards[0]) * shards[0]
            if shard_value <= shards[0]:
                shard_value = shards[0]
            metric_as_bytes = str(base_name).encode()
            value = zlib.adler32(metric_as_bytes)
            shard_index = [(index + 1) for index, s_value in enumerate(shards) if s_value == shard_value][0]
            modulo_result = value % shard_index
            if modulo_result == 0:
                if debug_logging:
                    current_logger.debug('debug :: %s :: skipping as not sharded into this run - %s' % (
                        algorithm_name, str(base_name)))
            if return_anomalies:
                return (False, None, anomalies)
            else:
                return (False, None)
        if debug_logging:
            current_logger.debug('debug :: %s :: analysing %s' % (
                algorithm_name, str(base_name)))

        try:
            int_metric_timestamp = int(timeseries[-1][0])
        except:
            int_metric_timestamp = 0
        if int_metric_timestamp:
            try:
                redis_conn_decoded.hset(
                    last_hash_key, base_name,
                    int_metric_timestamp)
            except:
                pass

    # ALWAYS WRAP YOUR ALGORITHM IN try and the BELOW except
    try:
        start_preprocessing = timer()

        # INFO: Sorting time series of 10079 data points took 0.002215 seconds
        timeseries = sorted(timeseries, key=lambda x: x[0])
        if debug_logging:
            current_logger.debug('debug :: %s :: time series of length - %s' % (
                algorithm_name, str(len(timeseries))))

        # Testing the data to ensure it meets minimum requirements, in the case
        # of Skyline's use of the LevelShiftAD algorithm this means that:
        # - the time series must have at least 75% of its full_duration
        # - the time series must have at least 99% of the data points for the
        #   in the sample being analysed.
        do_not_use_sparse_data = False
        if current_skyline_app == 'analyzer':
            do_not_use_sparse_data = True

        # @added 20210305 - Feature #3970: custom_algorithm - adtk_level_shift
        #                   Task #3664:: POC with adtk
        # With mirage also do not run LevelShiftAD on sparsely populated data
        if current_skyline_app == 'mirage':
            do_not_use_sparse_data = True

        # @aded 20210309 - Feature #3978: luminosity - classify_metrics
        #                  Feature #3642: Anomaly type classification
        if current_skyline_app == 'luminosity':
            do_not_use_sparse_data = True

        if do_not_use_sparse_data:

            total_period = 0
            total_datapoints = 0
            try:
                start_timestamp = int(timeseries[0][0])
                end_timestamp = int(timeseries[-1][0])
                total_period = end_timestamp - start_timestamp
                total_datapoints = len(timeseries)
            except SystemExit as e:
                if debug_logging:
                    current_logger.debug('debug_logging :: %s :: SystemExit called, exiting - %s' % (
                        algorithm_name, e))
                if return_anomalies:
                    return (anomalous, anomalyScore, anomalies)
                else:
                    return (anomalous, anomalyScore)
            except:
                traceback_msg = traceback.format_exc()
                record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg)
                if debug_logging:
                    current_logger.error(traceback_msg)
                    current_logger.error('error :: debug_logging :: %s :: failed to determine total_period and total_datapoints' % (
                        algorithm_name))
                timeseries = []
            if not timeseries:
                if return_anomalies:
                    return (anomalous, anomalyScore, anomalies)
                else:
                    return (anomalous, anomalyScore)

            if current_skyline_app == 'analyzer':
                # Default for analyzer at required period to 18 hours
                period_required = int(FULL_DURATION * 0.75)
            else:
                # Determine from timeseries
                if total_period < FULL_DURATION:
                    period_required = int(FULL_DURATION * 0.75)
                else:
                    period_required = int(total_period * 0.75)

            # If the time series does not have 75% of its full_duration it does not
            # have sufficient data to sample
            try:
                if total_period < period_required:
                    if debug_logging:
                        current_logger.debug('debug :: %s :: time series does not have sufficient data' % (
                            algorithm_name))
                    if return_anomalies:
                        return (anomalous, anomalyScore, anomalies)
                    else:
                        return (anomalous, anomalyScore)
            except SystemExit as e:
                if debug_logging:
                    current_logger.debug('debug_logging :: %s :: SystemExit called, exiting - %s' % (
                        algorithm_name, e))
                if return_anomalies:
                    return (anomalous, anomalyScore, anomalies)
                else:
                    return (anomalous, anomalyScore)
            except:
                traceback_msg = traceback.format_exc()
                record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg)
                if debug_logging:
                    current_logger.error(traceback_msg)
                    current_logger.error('error :: debug_logging :: %s :: falied to determine if time series has sufficient data' % (
                        algorithm_name))
                if return_anomalies:
                    return (anomalous, anomalyScore, anomalies)
                else:
                    return (anomalous, anomalyScore)

            # If the time series does not have 75% of its full_duration data points
            # it does not have sufficient data to sample

            # Determine resolution from the last 30 data points
            # INFO took 0.002060 seconds
            resolution_timestamps = []
            metric_resolution = False
            for metric_datapoint in timeseries[-30:]:
                timestamp = int(metric_datapoint[0])
                resolution_timestamps.append(timestamp)
            timestamp_resolutions = []
            if resolution_timestamps:
                last_timestamp = None
                for timestamp in resolution_timestamps:
                    if last_timestamp:
                        resolution = timestamp - last_timestamp
                        timestamp_resolutions.append(resolution)
                        last_timestamp = timestamp
                    else:
                        last_timestamp = timestamp
                try:
                    del resolution_timestamps
                except:
                    pass
            if timestamp_resolutions:
                try:
                    timestamp_resolutions_count = Counter(timestamp_resolutions)
                    ordered_timestamp_resolutions_count = timestamp_resolutions_count.most_common()
                    metric_resolution = int(ordered_timestamp_resolutions_count[0][0])
                except SystemExit as e:
                    if debug_logging:
                        current_logger.debug('debug_logging :: %s :: SystemExit called, exiting - %s' % (
                            algorithm_name, e))
                    if return_anomalies:
                        return (anomalous, anomalyScore, anomalies)
                    else:
                        return (anomalous, anomalyScore)
                except:
                    traceback_msg = traceback.format_exc()
                    record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg)
                    if debug_logging:
                        current_logger.error(traceback_msg)
                        current_logger.error('error :: debug_logging :: %s :: failed to determine if time series has sufficient data' % (
                            algorithm_name))
                try:
                    del timestamp_resolutions
                except:
                    pass
            minimum_datapoints = None
            if metric_resolution:
                minimum_datapoints = int(period_required / metric_resolution)
            if minimum_datapoints:
                if total_datapoints < minimum_datapoints:
                    if debug_logging:
                        current_logger.debug('debug :: %s :: time series does not have sufficient data, minimum_datapoints required is %s and time series has %s' % (
                            algorithm_name, str(minimum_datapoints),
                            str(total_datapoints)))
                    if return_anomalies:
                        return (anomalous, anomalyScore, anomalies)
                    else:
                        return (anomalous, anomalyScore)

            # Is the time series fully populated?
            # full_duration_datapoints = int(full_duration / metric_resolution)
            total_period_datapoints = int(total_period / metric_resolution)
            # minimum_percentage_sparsity = 95
            minimum_percentage_sparsity = 90
            sparsity = int(total_datapoints / (total_period_datapoints / 100))
            if sparsity < minimum_percentage_sparsity:
                if debug_logging:
                    current_logger.debug('debug :: %s :: time series does not have sufficient data, minimum_percentage_sparsity required is %s and time series has %s' % (
                        algorithm_name, str(minimum_percentage_sparsity),
                        str(sparsity)))
                if return_anomalies:
                    return (anomalous, anomalyScore, anomalies)
                else:
                    return (anomalous, anomalyScore)
            if len(set(item[1] for item in timeseries)) == 1:
                if debug_logging:
                    current_logger.debug('debug :: %s :: time series does not have sufficient variability, all the values are the same' % algorithm_name)
                anomalous = False
                anomalyScore = 0.0
                if return_anomalies:
                    return (anomalous, anomalyScore, anomalies)
                else:
                    return (anomalous, anomalyScore)

        end_preprocessing = timer()
        preprocessing_runtime = end_preprocessing - start_preprocessing
        if debug_logging:
            current_logger.debug('debug :: %s :: preprocessing took %.6f seconds' % (
                algorithm_name, preprocessing_runtime))

        if not timeseries:
            if debug_logging:
                current_logger.debug('debug :: %s :: LevelShiftAD not run as no data' % (
                    algorithm_name))
            anomalies = []
            if return_anomalies:
                return (anomalous, anomalyScore, anomalies)
            else:
                return (anomalous, anomalyScore)
        else:
            if debug_logging:
                current_logger.debug('debug :: %s :: timeseries length: %s' % (
                    algorithm_name, str(len(timeseries))))

        if len(timeseries) < 100:
            if debug_logging:
                current_logger.debug('debug :: %s :: time series does not have sufficient data' % (
                    algorithm_name))
            if return_anomalies:
                return (anomalous, anomalyScore, anomalies)
            else:
                return (anomalous, anomalyScore)

        start_analysis = timer()
        try:
            df = pd.DataFrame(timeseries, columns=['date', 'value'])
            df['date'] = pd.to_datetime(df['date'], unit='s')
            datetime_index = pd.DatetimeIndex(df['date'].values)
            df = df.set_index(datetime_index)
            df.drop('date', axis=1, inplace=True)
            s = validate_series(df)
            level_shift_ad = LevelShiftAD(c=c, side=side, window=window)
            anomaly_df = level_shift_ad.fit_detect(s)
            anomalies = anomaly_df.loc[anomaly_df['value'] > 0]
            anomalous = False
            if len(anomalies) > 0:
                anomaly_timestamps = list(anomalies.index.astype(np.int64) // 10**9)
                if realtime_analysis:
                    last_window_timestamps = [int(item[0]) for item in timeseries[-window:]]
                    # if timeseries[-1][0] in anomaly_timestamps:
                    for timestamp in last_window_timestamps:
                        if timestamp in anomaly_timestamps:
                            anomalous = True
                            break
                else:
                    anomalous = True
                    # Convert anomalies dataframe to anomalies_list
                    anomalies_list = []

                    # @added 20210316 - Feature #3978: luminosity - classify_metrics
                    #                   Feature #3642: Anomaly type classification
                    # Convert anomalies dataframe to anomalies_dict
                    anomalies_dict = {}
                    anomalies_dict['metric'] = base_name
                    anomalies_dict['timestamp'] = int(timeseries[-1][0])
                    anomalies_dict['from_timestamp'] = int(timeseries[0][0])
                    anomalies_dict['algorithm'] = algorithm_name
                    anomalies_dict['anomalies'] = {}

                    for ts, value in timeseries:
                        if int(ts) in anomaly_timestamps:
                            anomalies_list.append([int(ts), value])
                            anomalies_dict['anomalies'][int(ts)] = value
                    anomalies = list(anomalies_list)

                    # @added 20210316 - Feature #3978: luminosity - classify_metrics
                    #                   Feature #3642: Anomaly type classification
                    if save_plots_to:
                        try:
                            from adtk.visualization import plot
                            metric_dir = base_name.replace('.', '/')
                            timestamp_dir = str(int(timeseries[-1][0]))
                            save_path = '%s/%s/%s/%s' % (
                                save_plots_to, algorithm_name, metric_dir,
                                timestamp_dir)
                            if save_plots_to_absolute_dir:
                                save_path = '%s' % save_plots_to
                            anomalies_dict['file_path'] = save_path
                            save_to_file = '%s/%s.%s.png' % (
                                save_path, algorithm_name, base_name)
                            if filename_prefix:
                                save_to_file = '%s/%s.%s.%s.png' % (
                                    save_path, filename_prefix, algorithm_name,
                                    base_name)
                            save_to_path = os_path_dirname(save_to_file)
                            title = '%s\n%s' % (algorithm_name, base_name)
                            if not os_path_exists(save_to_path):
                                try:
                                    mkdir_p(save_to_path)
                                except Exception as e:
                                    current_logger.error('error :: %s :: failed to create dir - %s - %s' % (
                                        algorithm_name, save_to_path, e))
                            if os_path_exists(save_to_path):
                                try:
                                    plot(s, anomaly=anomaly_df, anomaly_color='red', title=title, save_to_file=save_to_file)
                                    if debug_logging:
                                        current_logger.debug('debug :: %s :: plot saved to - %s' % (
                                            algorithm_name, save_to_file))
                                except Exception as e:
                                    current_logger.error('error :: %s :: failed to plot - %s - %s' % (
                                        algorithm_name, base_name, e))
                            anomalies_file = '%s/%s.%s.anomalies_list.txt' % (
                                save_path, algorithm_name, base_name)
                            with open(anomalies_file, 'w') as fh:
                                fh.write(str(anomalies_list))
                                # os.chmod(anomalies_file, mode=0o644)
                            data_file = '%s/data.txt' % (save_path)
                            with open(data_file, 'w') as fh:
                                fh.write(str(anomalies_dict))
                        except SystemExit as e:
                            if debug_logging:
                                current_logger.debug('debug_logging :: %s :: SystemExit called during save plot, exiting - %s' % (
                                    algorithm_name, e))
                            if return_anomalies:
                                return (anomalous, anomalyScore, anomalies)
                            else:
                                return (anomalous, anomalyScore)
                        except Exception as e:
                            traceback_msg = traceback.format_exc()
                            record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg)
                            if debug_logging:
                                current_logger.error(traceback_msg)
                                current_logger.error('error :: %s :: failed to plot or save anomalies file - %s - %s' % (
                                    algorithm_name, base_name, e))
            else:
                anomalies = []

            # @added 20210318 - Feature #3978: luminosity - classify_metrics
            #                   Feature #3642: Anomaly type classification
            if anomalies and run_PersistAD and not realtime_analysis:
                persist_ad_algorithm_parameters = {}
                try:
                    persist_ad_algorithm_parameters = algorithm_parameters['persist_ad_algorithm_parameters']
                except:
                    pass
                persist_ad_window = 20
                try:
                    persist_ad_window = persist_ad_algorithm_parameters['window']
                except:
                    pass
                persist_ad_c = 9.9
                try:
                    persist_ad_c = persist_ad_algorithm_parameters['c']
                except:
                    pass
                try:
                    from adtk.detector import PersistAD
                    persist_ad = PersistAD(c=persist_ad_c, side='both', window=persist_ad_window)
                    persist_ad_anomaly_df = persist_ad.fit_detect(s)
                    persist_ad_anomalies = persist_ad_anomaly_df.loc[persist_ad_anomaly_df['value'] > 0]
                    if len(persist_ad_anomalies) > 0:
                        current_logger.info('%s :: %s anomalies found with PersistAD on %s' % (
                            algorithm_name, str(len(persist_ad_anomalies)),
                            base_name))
                        persist_ad_anomaly_timestamps = list(persist_ad_anomalies.index.astype(np.int64) // 10**9)
                        # Convert persist_ad_anomalies dataframe to persist_ad_anomalies_list
                        persist_ad_anomalies_list = []
                        persist_ad_anomalies_dict = {}
                        persist_ad_anomalies_dict['metric'] = base_name
                        persist_ad_anomalies_dict['timestamp'] = int(timeseries[-1][0])
                        persist_ad_anomalies_dict['from_timestamp'] = int(timeseries[0][0])
                        persist_ad_anomalies_dict['algorithm'] = 'adtk_PersistAD'
                        persist_ad_anomalies_dict['anomalies'] = {}

                        for ts, value in timeseries:
                            if int(ts) in persist_ad_anomaly_timestamps:
                                persist_ad_anomalies_list.append([int(ts), value])
                                persist_ad_anomalies_dict['anomalies'][int(ts)] = value
                        persist_ad_anomalies = list(persist_ad_anomalies_list)
                        if save_plots_to:
                            try:
                                from adtk.visualization import plot
                                metric_dir = base_name.replace('.', '/')
                                timestamp_dir = str(int(timeseries[-1][0]))
                                save_path = '%s/%s/%s/%s' % (
                                    save_plots_to, algorithm_name, metric_dir,
                                    timestamp_dir)
                                if save_plots_to_absolute_dir:
                                    save_path = '%s' % save_plots_to
                                persist_ad_anomalies_dict['file_path'] = save_path
                                save_to_file = '%s/%s.PersistAD.%s.png' % (
                                    save_path, algorithm_name, base_name)
                                if filename_prefix:
                                    save_to_file = '%s/%s.%s.%s.png' % (
                                        save_path, filename_prefix, algorithm_name,
                                        base_name)
                                save_to_path = os_path_dirname(save_to_file)
                                title = '%s - PersistAD verification\n%s' % (algorithm_name, base_name)
                                if not os_path_exists(save_to_path):
                                    try:
                                        mkdir_p(save_to_path)
                                    except Exception as e:
                                        current_logger.error('error :: %s :: failed to create dir - %s - %s' % (
                                            algorithm_name, save_to_path, e))
                                if os_path_exists(save_to_path):
                                    try:
                                        plot(s, anomaly=persist_ad_anomaly_df, anomaly_color='red', title=title, save_to_file=save_to_file)
                                        if debug_logging:
                                            current_logger.debug('debug :: %s :: plot saved to - %s' % (
                                                algorithm_name, save_to_file))
                                    except Exception as e:
                                        current_logger.error('error :: %s :: failed to plot - %s - %s' % (
                                            algorithm_name, base_name, e))
                                anomalies_file = '%s/%s.%s.PersistAD.anomalies_list.txt' % (
                                    save_path, algorithm_name, base_name)
                                with open(anomalies_file, 'w') as fh:
                                    fh.write(str(persist_ad_anomalies))
                                    # os.chmod(anomalies_file, mode=0o644)
                                data_file = '%s/PersistAD.data.txt' % (save_path)
                                with open(data_file, 'w') as fh:
                                    fh.write(str(persist_ad_anomalies_dict))
                            except Exception as e:
                                traceback_msg = traceback.format_exc()
                                record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg)
                                if debug_logging:
                                    current_logger.error(traceback_msg)
                                    current_logger.error('error :: %s :: failed to plot or save PersistAD anomalies file - %s - %s' % (
                                        algorithm_name, base_name, e))
                except Exception as e:
                    traceback_msg = traceback.format_exc()
                    record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg)
                    if debug_logging:
                        current_logger.error(traceback_msg)
                        current_logger.error('error :: %s :: failed to analysis with PersistAD anomalies file - %s - %s' % (
                            algorithm_name, base_name, e))
            try:
                del df
            except:
                pass
        except SystemExit as e:
            if debug_logging:
                current_logger.debug('debug_logging :: %s :: SystemExit called, during analysis, exiting - %s' % (
                    algorithm_name, e))
            if return_anomalies:
                return (anomalous, anomalyScore, anomalies)
            else:
                return (anomalous, anomalyScore)
        except:
            traceback_msg = traceback.format_exc()
            record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback_msg)
            if debug_logging:
                current_logger.error(traceback_msg)
                current_logger.error('error :: debug_logging :: %s :: failed to run on ts' % (
                    algorithm_name))
            if return_anomalies:
                return (anomalous, anomalyScore, anomalies)
            else:
                return (anomalous, anomalyScore)

        end_analysis = timer()
        analysis_runtime = end_analysis - start_analysis

        if debug_logging:
            current_logger.debug('debug :: %s :: LevelShiftAD took %.6f seconds' % (
                algorithm_name, analysis_runtime))

        if anomalous:
            anomalyScore = 1.0
        else:
            anomalyScore = 0.0

        if debug_logging:
            current_logger.info('%s :: anomalous - %s, anomalyScore - %s' % (
                algorithm_name, str(anomalous), str(anomalyScore)))

        if debug_logging:
            end = timer()
            processing_runtime = end - start
            current_logger.info('%s :: completed analysis in %.6f seconds' % (
                algorithm_name, processing_runtime))
        try:
            del timeseries
        except:
            pass
        if return_anomalies:
            return (anomalous, anomalyScore, anomalies)
        else:
            return (anomalous, anomalyScore)

    except SystemExit as e:
        if debug_logging:
            current_logger.debug('debug_logging :: %s :: SystemExit called (before StopIteration), exiting - %s' % (
                algorithm_name, e))
        if return_anomalies:
            return (anomalous, anomalyScore, anomalies)
        else:
            return (anomalous, anomalyScore)
    except StopIteration:
        # This except pattern MUST be used in ALL custom algortihms to
        # facilitate the traceback from any errors.  The algorithm we want to
        # run super fast and without spamming the log with lots of errors.
        # But we do not want the function returning and not reporting
        # anything to the log, so the pythonic except is used to "sample" any
        # algorithm errors to a tmp file and report once per run rather than
        # spewing tons of errors into the log e.g. analyzer.log
        if return_anomalies:
            return (False, None, anomalies)
        else:
            return (False, None)
    except:
        record_algorithm_error(current_skyline_app, parent_pid, algorithm_name, traceback.format_exc())
        # Return None and None as the algorithm could not determine True or False
        if return_anomalies:
            return (False, None, anomalies)
        else:
            return (False, None)

    if return_anomalies:
        return (anomalous, anomalyScore, anomalies)
    else:
        return (anomalous, anomalyScore)
예제 #21
0
from unittest import TestCase, main as unittest_main
from os import pardir as os_pardir, remove as os_remove
from os.path import (join as os_path_join, dirname as os_path_dirname, abspath
                     as os_path_abspath, exists as os_path_exists)
from sys import path as sys_path
from urllib.request import urlopen
from shutil import copyfileobj as shutil_copyfileobj

sys_path.append(
    os_path_join(os_path_dirname(os_path_abspath(__file__)), os_pardir))

from vorbis.vorbis_main import PacketsProcessor, CorruptedFileDataError

TEST_FILE_1_PATH = os_path_join(os_path_dirname(os_path_abspath(__file__)),
                                'test_audiofiles', 'test_1.ogg')

TEST_FILE_NOT_OGG_PATH = os_path_join(
    os_path_dirname(os_path_abspath(__file__)), 'test_audiofiles',
    'test_wrong_ogg_file.ogg')

TEST_FILE_NOT_VORBIS_PATH = os_path_join(
    os_path_dirname(os_path_abspath(__file__)), 'test_audiofiles',
    'test_wrong_vorbis_file.ogg')

TEST_FILE_NOT_OGG_URL: str = (
    r'https://raw.githubusercontent.com/susimus/ogg_vorbis/master/tests'
    r'/test_audiofiles/test_wrong_ogg_file.ogg')

TEST_FILE_NOT_VORBIS_URL: str = (
    r'https://raw.githubusercontent.com/susimus/ogg_vorbis/master/tests'
    r'/test_audiofiles/test_wrong_vorbis_file.ogg')
예제 #22
0
def r3_dnn_apply_keras(target_dirname,
                       old_stft_obj=None,
                       cuda=False,
                       saving_to_disk=True):
    LOGGER.info(
        '{}: r3: Denoising original stft with neural network model...'.format(
            target_dirname))
    '''
    r3_dnn_apply takes an old_stft object (or side effect load from disk)
    and saves a new_stft object
    '''
    scan_battery_dirname = os_path_dirname(target_dirname)
    model_dirname = os_path_dirname(os_path_dirname(scan_battery_dirname))

    # load stft data
    if old_stft_obj is None:
        old_stft_fpath = os_path_join(target_dirname, 'old_stft.mat')
        with h5py_File(old_stft_fpath, 'r') as f:
            stft = np_concatenate(
                [f['old_stft_real'][:], f['old_stft_imag'][:]], axis=1)
    else:
        stft = np_concatenate(
            [old_stft_obj['old_stft_real'], old_stft_obj['old_stft_imag']],
            axis=1)

    N_beams, N_elements_2, N_segments, N_fft = stft.shape
    N_elements = N_elements_2 // 2

    # combine stft_real and stft_imag

    # move element position axis
    stft = np_moveaxis(stft, 1, 2)  # TODO: Duplicate?

    # reshape the to flatten first two axes
    stft = np_reshape(
        stft, [N_beams * N_segments, N_elements_2, N_fft])  # TODO: Duplicate?

    # process stft with networks
    k_mask = list(range(3, 6))
    for frequency in k_mask:
        process_each_frequency_keras(model_dirname, stft, frequency)

    # reshape the stft data
    stft = np_reshape(
        stft, [N_beams, N_segments, N_elements_2, N_fft])  # TODO: Duplicate?

    # set zero outside analysis frequency range
    discard_mask = np_ones_like(stft, dtype=bool)
    discard_mask[:, :, :, k_mask] = False  # pylint: disable=E1137
    stft[discard_mask] = 0
    del discard_mask

    # mirror data to negative frequencies using conjugate symmetry
    end_index = N_fft // 2
    stft[:, :, :, end_index + 1:] = np_flip(stft[:, :, :, 1:end_index], axis=3)
    stft[:, :, N_elements:2 * N_elements, end_index +
         1:] = -1 * stft[:, :, N_elements:2 * N_elements, end_index + 1:]

    # move element position axis
    stft = np_moveaxis(stft, 1, 2)  # TODO: Duplicate?

    # change variable names
    # new_stft_real = stft[:, :N_elements, :, :]
    new_stft_real = stft[:, :N_elements, :, :].transpose()
    # new_stft_imag = stft[:, N_elements:, :, :]
    new_stft_imag = stft[:, N_elements:, :, :].transpose()

    del stft

    # change dimensions
    # new_stft_real = new_stft_real.transpose()
    # new_stft_imag = new_stft_imag.transpose()

    # save new stft data
    new_stft_obj = {
        'new_stft_real': new_stft_real,
        'new_stft_imag': new_stft_imag
    }
    if saving_to_disk is True:
        new_stft_fname = os_path_join(target_dirname, 'new_stft.mat')
        savemat(new_stft_fname, new_stft_obj)
    LOGGER.info('{}: r3 Done.'.format(target_dirname))
    return new_stft_obj
예제 #23
0
def _file_open_mkdirp(path: str):
    os_makedirs(os_path_dirname(path), exist_ok=True)
    return open(path, "wb")
예제 #24
0
## Copyright 2019 Dynatrace LLC
##
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
##
##     http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
## ---------------------------------------------------------------------------
"""Filenames used by compose module tests
"""
from os.path import dirname as os_path_dirname
from pathlib import Path as pathlib_Path

CONFIG_SAMPLE_DIR = pathlib_Path(os_path_dirname(__file__)) / "config-samples"
COMPOSE_SMALL = CONFIG_SAMPLE_DIR / "compose-small.yaml"
COMPOSE_BIG = CONFIG_SAMPLE_DIR / "compose-big.yaml"
COMPOSE_CYCLE = CONFIG_SAMPLE_DIR / "compose-cycle.yaml"
COMPOSE_TRICKY = CONFIG_SAMPLE_DIR / "compose-tricky.yaml"

TEA_TASKS_DIR = pathlib_Path(os_path_dirname(__file__)) / "tea-tasks"
BOIL_WATER = TEA_TASKS_DIR / "boil_water.py"
POUR_WATER = TEA_TASKS_DIR / "pour_water.py"
PREP_INFUSER = TEA_TASKS_DIR / "prep_infuser.py"
STEEP_TEA = TEA_TASKS_DIR / "steep_tea.py"
예제 #25
0
def main(argv: list) -> int:
    """Passphrase CLI interface."""
    passphrase = Passphrase()

    # Set defaults
    passphrase.entropy_bits_req = ENTROPY_BITS_MIN
    passwordlen_default = passphrase.password_length_needed()
    amount_n_default = 0
    passphrase.amount_n = amount_n_default
    # To avoid loading the wordlist unnecessarily, I'm hardcoding this value
    # It's ok, it's only used to show help information
    amount_w_default = 6

    parser = ArgumentParser(
        formatter_class=RawDescriptionHelpFormatter,
        description='{version_string}\n\n'
        'Generates a cryptographically secure passphrase, based on '
        'a wordlist, or a\npassword, and prints it to standard output.\n'
        'By default, it uses an embedded EFF Large Wordlist for passphrases.\n'
        'Passphrases with less than {wordsamountmin} words are considered '
        'insecure. A safe bet is \nbetween {wordsamountmin} and 7 words, '
        'plus at least a number.\n'
        'For passwords, use at least {passwdmin} characters, but prefer '
        '{passwdpref} or more, using the\ncomplete characters set.\n\n'
        'Instead of words and numbers, a password (random string of '
        'printable\ncharacters from Python String standard) can be generated '
        'by\n-p | --password, specifying the length. It uses uppercase, '
        'lowercase, digits\nand punctuation characters unless otherwise '
        'specified.\n'
        'Also, a UUID v4 string can be generated by --uuid4 or a coin can be '
        'thrown\nwith --coin.\n'
        'A custom wordlist can be specified by -i | --input, the format must '
        'be: \nsingle column, one word per line. If -d | --diceware is used, '
        'the input\nfile is treated as a diceware wordlist (two columns).'
        '\nOptionally, -o | --output can be used to specify an output file '
        '(existing \nfile is overwritten).\n'
        'The number of words is {wordsamountmin} by default, but it '
        'can be changed by -w | --words.\n'
        'The number of numbers is {numsamountmin} by default, but it can be '
        'changed by\n-n | --numbers. The generated numbers are between '
        '{minnum} and {maxnum}.\n'
        'The default separator is a blank space, but any character or '
        'character\nsequence can be specified by -s | --separator.\n'
        '\nExample output:\n'
        '\tDefault parameters:\tchalice sheath postcard modular cider size\n'
        '\tWords=3, Numbers=2:\tdepraved widow office 184022 320264\n'
        '\tPassword, 20 chars:\tsF#s@B+iR#ZIL-yUWKPR'.format(
            version_string=__version_string__,
            minnum=passphrase.randnum_min,
            maxnum=passphrase.randnum_max,
            wordsamountmin=amount_w_default,
            numsamountmin=amount_n_default,
            passwdmin=passwordlen_default,
            passwdpref=passwordlen_default + 4))

    parser.add_argument(
        '--version',
        action='store_true',
        help='print program version and licensing information and exit')
    parser.add_argument(
        '--insecure',
        action='store_true',
        default=False,
        help="force password/passphrase generation even if the system's "
        "entropy is too low")
    parser.add_argument(
        '--no-newline',
        action='store_true',
        default=False,
        help="don't print newline at the end of the passphrase/password")
    parser.add_argument(
        '-m',
        '--mute',
        action='store_true',
        default=False,
        help="muted mode: it won't print output, only informational, warning "
        "or error messages (usefull with -o | --output)")
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        default=False,
        help='print additional information (can coexist with -m | --mute)')
    parser.add_argument(
        '-e',
        '--entropybits',
        type=_bigger_than_zero,
        default=ENTROPY_BITS_MIN,
        help='specify the number of bits to use for entropy calculations '
        '(defaults to {})'.format(ENTROPY_BITS_MIN))
    parser.add_argument('--uuid4',
                        action='store_true',
                        default=False,
                        help='generate an UUID v4 string')
    parser.add_argument('--coin',
                        action='store_true',
                        default=False,
                        help='generate a random coin throw: heads or tails')
    parser.add_argument(
        '-p',
        '--password',
        type=_bigger_than_zero,
        const=-1,
        nargs='?',
        help='generate a password of the specified length from all printable '
        'or selected characters')
    parser.add_argument(
        '--use-uppercase',
        type=_bigger_than_zero,
        const=0,
        nargs='?',
        help='use uppercase characters for password generation or give the '
        'amount of uppercase characters in the passphrase: zero or no '
        'input for all uppercase or any number of uppercase '
        'characters wanted (the rest are lowercase)')
    parser.add_argument(
        '--use-lowercase',
        type=_bigger_than_zero,
        const=0,
        nargs='?',
        help='use lowercase characters for password generation or give the '
        'amount of lowercase characters in the passphrase: zero or no '
        'input for all lowercase (default) or any number of lowercase '
        'characters wanted (the rest are uppercase)')
    parser.add_argument('--use-digits',
                        action='store_true',
                        default=False,
                        help='use digits for password generation')
    parser.add_argument(
        '--use-alphanumeric',
        action='store_true',
        default=False,
        help='use lowercase and uppercase characters, and digits for password '
        'generation (equivalent to --use-lowercase --use-uppercase '
        '--use-digits)')
    parser.add_argument(
        '--use-punctuation',
        action='store_true',
        default=False,
        help='use punctuation characters for password generation')
    parser.add_argument('-w',
                        '--words',
                        type=_bigger_than_zero,
                        help='specify the amount of words (0 or more)')
    parser.add_argument('-n',
                        '--numbers',
                        type=_bigger_than_zero,
                        default=amount_n_default,
                        help='specify the amount of numbers (0 or more)')
    parser.add_argument(
        '-s',
        '--separator',
        type=str,
        default=' ',
        help='specify a separator character (space by default)')
    parser.add_argument(
        '-o',
        '--output',
        type=str,
        help='specify an output file (existing file is overwritten)')
    parser.add_argument(
        '-i',
        '--input',
        type=str,
        help='specify an input file (it must have the following format: '
        'single column, one word per line)')
    parser.add_argument(
        '-d',
        '--diceware',
        action='store_true',
        default=False,
        help='specify input file as a diceware list (format: two colums)')

    args = parser.parse_args(argv)

    inputfile = args.input
    outputfile = args.output
    separator = args.separator
    is_diceware = args.diceware
    passwordlen = args.password
    amount_w = args.words
    amount_n = args.numbers
    show_version = args.version
    mute = args.mute
    verbose = args.verbose
    no_newline = args.no_newline
    gen_uuid4 = args.uuid4
    gen_coin = args.coin
    p_uppercase = args.use_uppercase
    p_lowercase = args.use_lowercase
    p_digits = args.use_digits
    p_punctuation = args.use_punctuation
    p_alphanumeric = args.use_alphanumeric
    entropy_bits = args.entropybits
    gen_insecure = args.insecure

    if show_version:
        print(__version_string__)
        return 0

    if verbose:
        Aux.print_stderr(__version_string__)

    # Check system entropy
    system_entropy = Aux.system_entropy()
    if system_entropy < SYSTEM_ENTROPY_BITS_MIN:
        Aux.print_stderr(
            'Warning: the system has too few entropy: {} bits; randomness '
            'quality could be poor'.format(system_entropy))
        if not gen_insecure:
            Aux.print_stderr('Error: system entropy too low: {system_entropy} '
                             '< {system_entropy_min}'.format(
                                 system_entropy=system_entropy,
                                 system_entropy_min=SYSTEM_ENTROPY_BITS_MIN))
            return 1

    if verbose:
        Aux.print_stderr(
            'Using {} bits of entropy for calculations (if any). The minimum '
            'recommended is {}'.format(entropy_bits, ENTROPY_BITS_MIN))

    # Check selected entropy
    check_chosen_entropy = False if gen_uuid4 or gen_coin else not (
        amount_n and amount_w and passwordlen is None)
    if check_chosen_entropy and entropy_bits < ENTROPY_BITS_MIN:
        Aux.print_stderr(
            'Warning: insecure number of bits for entropy calculations '
            'chosen! Should be bigger than {}'.format(ENTROPY_BITS_MIN))
    passphrase.entropy_bits_req = entropy_bits

    # Generate whatever is requested
    if gen_uuid4:
        # Generate uuid4
        if verbose:
            Aux.print_stderr('Generating UUID v4')
        gen_what = 'UUID v4'
        gen_ent = 120

        passphrase.generate_uuid4()
        passphrase.separator = '-'
    elif gen_coin:
        # Generate a coin throw
        if verbose:
            Aux.print_stderr('Throwing a coin')
        gen_what = 'coin'
        gen_ent = 1

        passphrase = 'Heads' if randbool() else 'Tails'
    elif passwordlen is not None:
        # Generate a password
        gen_what = 'password'

        p_uppercase = True if p_uppercase is not None else False
        p_lowercase = True if p_lowercase is not None else False
        if (p_uppercase or p_lowercase or p_digits or p_punctuation
                or p_alphanumeric):
            passphrase.password_use_uppercase = (p_uppercase or p_alphanumeric)
            passphrase.password_use_lowercase = (p_lowercase or p_alphanumeric)
            passphrase.password_use_digits = (p_digits or p_alphanumeric)
            passphrase.password_use_punctuation = p_punctuation

        min_len = passphrase.password_length_needed()
        if passwordlen < 1:
            passwordlen = min_len
        elif passwordlen < min_len:
            Aux.print_stderr(
                'Warning: insecure password length chosen! Should be bigger '
                'than or equal to {}'.format(min_len))

        passphrase.passwordlen = passwordlen
        gen_ent = passphrase.generated_password_entropy()

        if verbose:
            verbose_string = ('Generating password of {} characters long '
                              'using '.format(passwordlen))
            verbose_string += ('uppercase characters, ' if
                               (passphrase.password_use_uppercase
                                or p_alphanumeric) else '')
            verbose_string += ('lowercase characters, ' if
                               (passphrase.password_use_lowercase
                                or p_alphanumeric) else '')
            verbose_string += ('digits, ' if (passphrase.password_use_digits
                                              or p_alphanumeric) else '')
            verbose_string += ('punctuation characters, ' if
                               (passphrase.password_use_punctuation) else '')
            Aux.print_stderr(verbose_string[:-2] if (
                verbose_string[-2:] == ', ') else verbose_string)

        passphrase.generate_password()
        passphrase.separator = ''
    else:
        # Generate a passphrase
        gen_what = 'passphrase'

        # Read wordlist if indicated
        if inputfile is None:
            passphrase.load_internal_wordlist()
        else:
            try:
                passphrase.import_words_from_file(inputfile, is_diceware)
            except IOError:
                Aux.print_stderr(
                    "Error: input file {} is empty or it can't be opened or "
                    "read".format(inputfile))
                return 1

        passphrase.amount_n = amount_n
        amount_w_good = passphrase.words_amount_needed()
        if amount_w is None:
            amount_w = amount_w_good
        elif amount_w < amount_w_good:
            Aux.print_stderr(
                'Warning: insecure amount of words chosen! Should be '
                'bigger than or equal to {}'.format(amount_w_good))

        passphrase.amount_w = amount_w
        gen_ent = passphrase.generated_passphrase_entropy()

        if verbose:
            Aux.print_stderr(
                'Generating a passphrase of {} words and {} '
                'numbers using {}'.format(
                    amount_w, amount_n,
                    'internal wordlist' if inputfile is None else
                    ('external wordlist: ' + inputfile +
                     (' (diceware-like)' if is_diceware else ''))))

        case = (-1 * p_lowercase) if p_lowercase else p_uppercase
        passphrase.generate(case)
        passphrase.separator = separator

    if verbose:
        Aux.print_stderr('The entropy of this {what} is {ent:.2f} bits'.format(
            what=gen_what, ent=gen_ent))

    if not gen_coin and gen_ent < ENTROPY_BITS_MIN:
        Aux.print_stderr('Warning: the {} is too short!'.format(gen_what))

    if not mute:
        if no_newline:
            print(passphrase, end='')
        else:
            print(passphrase)

    if outputfile is not None:
        # ensure path to file exists or create
        dir_ = os_path_dirname(outputfile)
        if dir_:
            try:
                os_makedirs(dir_, exist_ok=True)
            except PermissionError:
                Aux.print_stderr(
                    'Error: permission denied to create directory {}'.format(
                        dir_, ))
                return 1
        try:
            with open(outputfile, mode='wt', encoding='utf-8') as outfile:
                linefeed = '' if no_newline else '\n'
                outfile.write(str(passphrase) + linefeed)

        except IOError:
            Aux.print_stderr(
                "Error: file {} can't be opened or written".format(
                    outputfile, ))
            return 1

    return 0
예제 #26
0
from unittest import TestCase, main as unittest_main
from os import pardir as os_pardir, remove as os_remove
from os.path import (join as os_path_join, dirname as os_path_dirname, abspath
                     as os_path_abspath, exists as os_path_exists)
from typing import List
from sys import path as sys_path
from urllib.request import urlopen
from shutil import copyfileobj as shutil_copyfileobj

sys_path.append(
    os_path_join(os_path_dirname(os_path_abspath(__file__)), os_pardir))

from vorbis.decoders import (DataReader, SetupHeaderDecoder,
                             EndOfPacketException)
from vorbis.helper_funcs import float32_unpack

TEST_FILE_1_PATH = os_path_join(os_path_dirname(os_path_abspath(__file__)),
                                'test_audiofiles', 'test_1.ogg')

TEST_FILE_1_URL: str = (
    r'https://raw.githubusercontent.com/susimus/ogg_vorbis/master/'
    r'tests/test_audiofiles/test_1.ogg')

test_file_1_was_downloaded: bool = False


# noinspection PyPep8Naming
def setUpModule():
    global TEST_FILE_1_PATH

    if not os_path_exists(TEST_FILE_1_PATH):
예제 #27
0
파일: __init__.py 프로젝트: bscohen1/gpkit
    Requirements
    ------------
    numpy
    MOSEK or CVXOPT
    scipy(optional): for complete sparse matrix support
    sympy(optional): for latex printing in iPython Notebook

    Attributes
    ----------
    settings : dict
        Contains settings loaded from ``./env/settings``
"""
from os import sep as os_sep
from os.path import dirname as os_path_dirname
SETTINGS_PATH = os_sep.join([os_path_dirname(__file__), "env", "settings"])

__version__ = "0.4.0"
UNIT_REGISTRY = None
SIGNOMIALS_ENABLED = False

# global variable initializations
DimensionalityError = ValueError
units = None


def enable_units(path=None):
    """Enables units support in a particular instance of GPkit.

    Posynomials created after calling this are incompatible with those created
    before.
# -*- coding: utf-8 -*-
import os
from datetime import timedelta
from os.path import abspath as os_path_abspath
from os.path import dirname as os_path_dirname
from os.path import join as os_path_join

current_dir = os_path_abspath(os_path_dirname(__file__))

# BASIC
DEBUG = True
SQLALCHEMY_DATABASE_URI = 'sqlite:///' + os_path_join(current_dir, '../',
                                                      'data.sqlite')
USE_TOKEN_AUTH = True

# EMAIL
MAIL_SERVER = 'smtp.googlemail.com'
MAIL_PORT = 465
MAIL_USE_TLS = False
MAIL_USE_SSL = True
MAIL_USERNAME = os.getenv('GMAIL_USERNAME')
MAIL_PASSWORD = os.getenv('GMAIL_PASSWORD')

# SECURITY
SECRET_KEY = os.getenv('SECRET_KEY') or 'secret_secret_secret'
SECURITY_REGISTERABLE = True
SECURITY_REGISTER_URL = '/auth/register'
SECURITY_PASSWORD_HASH = os.getenv('SECURITY_PASSWORD_HASH') or 'sha512_crypt'
SECURITY_PASSWORD_SALT = os.getenv(
    'SECURITY_PASSWORD_SALT') or 'salt_salt_salt'
JWT_EXPIRATION_DELTA = timedelta(days=10)
예제 #29
0
from logging import basicConfig as logging_basicConfig, \
                    DEBUG as logging_DEBUG, \
                    INFO as logging_INFO, \
                    getLogger as logging_getLogger

from lib.process_single_scan_battery_keras import process_single_scan_battery_keras
from lib.utils import copy_anything

# SCAN_BATTERIES_TARGETS_GLOB_STRING = 'data/BEAM_Reverb_20181004_L74_70mm/target_*_SCR_*_0dB'

# SCAN_BATTERIES_DIRNAME = 'data/BEAM_Reverb_20181004_L74_70mm_selected'
SCAN_BATTERIES_DIRNAME = 'scan_batteries'
MODEL_SAVE_FNAME = 'model.joblib'
MODELS_DIRNAME = 'DNNs'
SCRIPT_FNAME = os_path_basename(__file__)
PROJECT_DIRNAME = os_path_dirname(__file__)
LIB_DIRNAME = os_path_join(PROJECT_DIRNAME, 'lib')


def evaluate_one_model_keras(model_dirpath):
    # rename _trained as _evaluating
    new_folder_name = model_dirpath.replace('_trained', '_evaluating')
    shutil_move(model_dirpath, new_folder_name)
    model_name = os_path_basename(new_folder_name)
    copied_scan_battery_dirname = os_path_join(
        new_folder_name, os_path_basename(SCAN_BATTERIES_DIRNAME))
    copy_anything(SCAN_BATTERIES_DIRNAME, copied_scan_battery_dirname)

    time_start = time_time()

    # with Pool() as pool:
예제 #30
0

def decompose_spec(article_dec):
    decompose_listed_subtrees_and_mark_media_descendants(article_dec, DECOMP, MEDIA_LIST)
    for c in article_dec.children:
        if isinstance(c, Tag) and c.name == 'h2':
            c.decompose()
    publi = article_dec.find('div', class_='head')
    if publi is not None and publi.find('h3') is not None:
        publi.decompose()
    return article_dec


LINK_FILTER_SUBSTRINGS_SPEC = re.compile('|'.join(['LINK_FILTER_DUMMY_STRING']))

BLACKLIST_SPEC = [url.strip() for url in
                  open(os_path_join(os_path_dirname(os_path_abspath(__file__)), 'valasz_BLACKLIST.txt')).readlines()]

MULTIPAGE_URL_END = re.compile(r'.*?page=.')


def next_page_of_article_spec(curr_html):
    bs = BeautifulSoup(curr_html, 'lxml')
    if bs.find('article', class_='percro-percre-lista') is not None:
        next_tag = bs.find('a', rel='next')
        if next_tag is not None and 'href' in next_tag.attrs.keys():
            next_link = next_tag.attrs['href']
            link = f'http://valasz.hu{next_link}'
            return link
    return None
예제 #31
0
def process_each_frequency(model_dirname, stft, frequency, using_cuda=True):
    '''
    Setter method on stft.
    '''
    is_using_cuda = using_cuda and torch_cuda_is_cuda_available()
    my_device = torch_device('cuda:0' if is_using_cuda else 'cpu')

    # 1. Instantiate Neural Network Model
    model_params_fname = os_path_join(
        os_path_join(model_dirname, 'k_' + str(frequency)), MODEL_PARAMS_FNAME)

    model_save_fpath = os_path_join(model_dirname, 'k_' + str(frequency),
                                    MODEL_SAVE_FNAME)
    model = get_which_model_from_params_fname(model_params_fname)
    model.load_state_dict(torch_load(os_path_join(
        os_path_dirname(model_save_fpath), 'model.dat'),
                                     map_location=my_device),
                          strict=True)
    model.eval()
    model = model.to(my_device)

    if False:
        model.printing = True
        from lib.print_layer import PrintLayer
        new_model_net = []
        for layer in model.net:
            new_model_net.append(layer)
            new_model_net.append(PrintLayer(layer))

        from torch.nn import Sequential
        model.net = Sequential(*new_model_net)
    # 2. Get X_test
    LOGGER.debug('r3.process_each_frequency: stft.shape = {}'.format(
        stft.shape))

    aperture_data = stft[:, :, frequency]  # or stft_frequency

    # 2.1. normalize by L1 norm
    aperture_data_norm = np_linalg_norm(aperture_data, ord=np_inf, axis=1)
    aperture_data /= aperture_data_norm[:, np_newaxis]

    # load into torch and onto gpu
    aperture_dataset_eval = ApertureDatasetEval(aperture_data)
    aperture_dataset_loader = DataLoader(aperture_dataset_eval,
                                         batch_size=EVAL_BATCH_SIZE,
                                         shuffle=False,
                                         num_workers=DATALOADER_NUM_WORKERS,
                                         pin_memory=using_cuda)

    # 3. Predict
    if is_using_cuda is True:
        torch_cuda_empty_cache()

    aperture_data_new = predict(model, aperture_dataset_loader, my_device)

    del aperture_data, model, aperture_dataset_eval, aperture_dataset_loader, my_device
    if is_using_cuda is True:
        torch_cuda_empty_cache()
    # 4. Postprocess on y_hat
    # rescale the data and store new data in stft
    stft[:, :,
         frequency] = aperture_data_new * aperture_data_norm[:, np_newaxis]
    del aperture_data_new, aperture_data_norm