Example #1
0
def user_requested_anomaly7():
    """ Checks if the user requested an anomaly, and returns True/False accordingly. """
    digit = 0
    res = False
    if is_nonzero_file7(summon_filename):
        lines = []
        with open(get_full_path(summon_filename)) as f:
            lines = f.readlines()
        if len(lines) > 0:
            try:
                digit = int(lines[0])
                if digit > 0:
                    res = True
            except Exception as e:
                res = False
                append_logs("ERROR:" + str(e), name4logs, "always")
        else:
            res = False
    else:
        res = False

    # Disable summoning of anomalies after the requested number of anomalies were added
    if res:
        with open(get_full_path(summon_filename), "w") as f:
            if digit > 0:
                f.write(str(digit - 1))
            else:
                f.write("0")

    return res
Example #2
0
def ask_model(lmodel, observations_df,
              scaling):  # TODO: use scaling for KitNET too
    datapoint = None
    try:
        datapoint = observations_df.to_numpy()[-1]
        rmse_score = lmodel.execute(datapoint)
    except Exception as e:
        rmse_score = 0
        append_logs(
            "ERROR: KitNET ask_model failed. datapoint: " + str(datapoint) +
            " . Exception: " + str(e), name4logs, "always")
    return rmse_score
Example #3
0
def launch_scripts():
    """ Launches all the python scripts necessary for Thio to run.

    If you want to disable Telemanom and/or KitNET, just comment them out here and restart 0launcher.
    """
    with open("state_controls/scripts_to_run.txt", "w") as scripts_f:
        scripts_f.write("")
    try:
        run('lib_telemanom_train.py', "thio_telemanom")
        run('lib_telemanom_infer.py', "thio_telemanom")
        run('lib_KitNET_train.py', "thio_kitnet")
        run('lib_KitNET_infer.py', "thio_kitnet")
        run('gui.py', "thio_kitnet")
    except Exception as e:
        helper_funcs.append_logs(str(e), name4logs, "always")
Example #4
0
 def execute(self, x):
     if self.v is None:
         e_msg = 'cant execute x, because a feature mapping not learned or provided. Try running process(x) instead.'
         append_logs(e_msg, "KitNET lib", "always")
         raise RuntimeError(msg)
     else:
         self.n_executed += 1
         # Ensemble Layer
         s_l1 = np.zeros(len(self.ensembleLayer))
         for a in range(len(self.ensembleLayer)):
             # make sub inst
             xi = x[self.v[a]]
             s_l1[a] = self.ensembleLayer[a].execute(xi)
         # OutputLayer
         return self.outputLayer.execute(s_l1)
Example #5
0
def python_script_running7(script_filename):
    """ Returns True if the script with the given filename is currently running, False otherwise.

    Args:
        script_filename (str): e.g. "lib_telemanom_train.py"
    """
    res = False
    try:
        for p in psutil.process_iter():
            if len(p.cmdline()) > 1:
                if script_filename in p.cmdline()[1]:
                    res = True
                    break
    except Exception as e:
        append_logs("Exception: " + str(e), name4logs, "always")
    return res
Example #6
0
def fetched_data_to_dataframe(filename, last_n_values=-1):
    """ Reads the dataset file and converts it into a pandas dataframe, with columns representing channels.

    Args:
       filename (str): the dataset filename (e.g. "syntheticData.txt")
       last_n_values (int): number of the latest datapoints to read
    """
    cols_number = 3 * len(channels)

    my_cols = [str(i) for i in range(cols_number)]  # create some row names

    cols2delete = []
    for c in range(cols_number):
        if (c + 1) % 3 != 0:
            cols2delete.append(c)

    df = pd.DataFrame()
    try:

        # TODO: use isNonZeroFile7 to check if non zero

        # import tailer as tl
        import io
        source = helper_funcs.get_full_path(filename)
        if last_n_values != -1:
            last_lines = helper_funcs.read_last_lines(source, last_n_values)
            source = io.StringIO('\n'.join(last_lines))

        df = pd.read_csv(source,
                         sep=";|§",
                         names=my_cols,
                         header=None,
                         engine="python")
    except Exception as e:
        helper_funcs.append_logs(
            "ERROR in fetchedData_to_DataFrame upon trying to open " +
            filename + " : " + str(e), "parser", "always", "print")

    if not df.empty:
        df = df.drop(df.columns[cols2delete], axis=1)

        # the columns in the fetched file are sorted alphabetically.
        # We sort it here too - to make them be the same columns
        df.columns = sorted(channels)
    return df
Example #7
0
    def __init__(self,
                 n,
                 max_autoencoder_size=10,
                 fm_grace_period=None,
                 ad_grace_period=10000,
                 learning_rate=0.1,
                 hidden_ratio=0.75,
                 feature_map=None):
        # Parameters:
        self.AD_grace_period = ad_grace_period
        if fm_grace_period is None:
            self.FM_grace_period = ad_grace_period
        else:
            self.FM_grace_period = fm_grace_period
        if max_autoencoder_size <= 0:
            self.m = 1
        else:
            self.m = max_autoencoder_size
        self.lr = learning_rate
        self.hr = hidden_ratio
        self.n = n

        # Variables
        self.n_trained = 0  # the number of training instances so far
        self.n_executed = 0  # the number of executed instances so far
        self.v = feature_map
        if self.v is None:
            append_logs(
                "Feature-Mapper: train-mode, Anomaly-Detector: off-mode",
                "KitNET lib", "verbose")
        else:
            self.__createAD__()
            append_logs(
                "Feature-Mapper: execute-mode, Anomaly-Detector: train-mode",
                "KitNET lib", "verbose")
        self.FM = CorClust(
            self.n
        )  # incremental feature cluatering for the feature mapping process
        self.ensembleLayer = []
        self.outputLayer = None
Example #8
0
def get_model(input_dataframe):
    input_arr = input_dataframe.to_numpy()

    dataset_size = len(input_dataframe.index)

    # KitNET params:
    max_ae = 10  # maximum size for any autoencoder in the ensemble layer

    fm_grace = int(
        dataset_size * 0.1
    )  # the number of instances taken to learn the feature mapping (the ensemble's architecture)
    ad_grace = dataset_size - fm_grace  # the number of instances used to train the anomaly detector (ensemble itself)

    append_logs(
        "Dataset_size: " + str(dataset_size) + " . FMgrace: " + str(fm_grace) +
        " . ADgrace: " + str(ad_grace), name4logs, "verbose")

    append_logs("numpy.ndarray tail my input_arr:\n" + str(input_arr[-3:]),
                name4logs, "verbose")

    # Build KitNET
    kit_net_obj = KitNET(input_arr.shape[1], max_ae, fm_grace, ad_grace)

    model = None
    for i in range(input_arr.shape[0]):
        if i % 1000 == 0:
            g_msg = "progress: " + str(i)
            # save_model_to_pickle(model, -1, "pickled_models/kitnet_test_" + str(i) + ".pkl")
            append_logs(g_msg, name4logs, "verbose")
        model = kit_net_obj.train(input_arr[i, ])

    return model, None, True
Example #9
0
 def train(self, x):
     # If the FM is in train-mode, and the user has not supplied a feature mapping
     if self.n_trained <= self.FM_grace_period and self.v is None:
         # update the incremetnal correlation matrix
         self.FM.update(x)
         if self.n_trained == self.FM_grace_period:  # If the feature mapping should be instantiated
             self.v = self.FM.cluster(self.m)
             self.__createAD__()
             t_msg = "The Feature-Mapper found a mapping: " + str(
                 self.n) + " features to " + str(len(
                     self.v)) + " autoencoders."
             append_logs(t_msg, "KitNET lib", "verbose")
             t_msg = "Feature-Mapper: execute-mode, Anomaly-Detector: train-mode"
             append_logs(t_msg, "KitNET lib", "verbose")
     else:  # train
         # Ensemble Layer
         s_l1 = np.zeros(len(self.ensembleLayer))
         for a in range(len(self.ensembleLayer)):
             # make sub instance for autoencoder 'a'
             xi = x[self.v[a]]
             s_l1[a] = self.ensembleLayer[a].train(xi)
         # OutputLayer
         self.outputLayer.train(s_l1)
         if self.n_trained == self.AD_grace_period + self.FM_grace_period:
             t_msg = "Feature-Mapper: execute-mode, Anomaly-Detector: exeute-mode"
             append_logs(t_msg, "KitNET lib", "verbose")
     self.n_trained += 1
     return self
Example #10
0
def fetch_and_save_datapoint(data_channels, use_synthetic_data7):
    """ Returns a string that looks like this: 1582830400.15; bitcoin; eur; 8080.99 § litecoin; eur; 58.08

    Also saves the string to the latest_datapoint file.
    Args:
        data_channels (list of strings): names of channels
        use_synthetic_data7 (bool): True if synthetic data is used, False otherwise
    """
    try:
        price_dic, ts = recieve_datapoint(data_channels, use_synthetic_data7)
        data_point_str = price_dict_to_str(price_dic, "eur", ts)

        if use_synthetic_data7:
            data_filename_for_saving = "dataset/latest_datapoint_synthetic.txt"
        else:
            data_filename_for_saving = "dataset/latest_datapoint_fetched.txt"

        # TODO: move it to 0launcher
        list_to_file(data_filename_for_saving, [data_point_str], "w")
    except Exception as fetch_e:
        data_point_str = None
        f_msg = "failed to get a datapoint: " + str(fetch_e)
        append_logs(f_msg, "0launcher", "always", "print")
    return data_point_str
def data_sanity_check(use_synthetic_data7, data_channels):
    """ Checks if the dataset contains corrupted data.

    The checks cover the case where the user has changed the number of channels, but forgot to delete the old data that
    still has the old number of channels.

    Args:
        use_synthetic_data7 (bool): if True, the synthetic data is used
        data_channels (list of strings): channel names

    """
    # TODO: check if there are at least 2 channels in configs, otherwise KitNET will not work

    if use_synthetic_data7:
        dataset_filename = "dataset/syntheticData.txt"
    else:
        dataset_filename = "dataset/fetchedData.txt"

    bad_shape_msg = dataset_filename + "seems to be in a bad shape, as reading it into a dataframe causes an error" \
                                       " or meaningless output. If you changed the number of channels, deleting   " \
                                       "the data that has the previous number of channels could help "

    # TODO: remove code duplication, as a similar code is used in fetched_data_to_dataframe
    if is_nonzero_file7(dataset_filename):
        cols_number = 3 * len(data_channels)
        my_cols = [str(i) for i in range(cols_number)]  # create some row names
        print("checking...", dataset_filename)
        df = pd.DataFrame()
        try:
            df = pd.read_csv(get_full_path(dataset_filename),
                             sep=";|§",
                             names=my_cols,
                             header=None,
                             engine="python")
        except Exception as e:
            append_logs(bad_shape_msg + " " + str(e), name4logs, "always",
                        "print")
            exit()

        timestamps = pd.DataFrame(df.index).to_numpy()
        latest_timestamp = timestamps[-1]
        if "nan" in str(latest_timestamp):
            append_logs(bad_shape_msg, name4logs, "always", "print")
            exit()
    else:
        append_logs(
            dataset_filename + " doesn't exist or of zero size. First launch?",
            name4logs, "always", "print")
Example #12
0
def get_price_from_substr(istr, original_str):
    """Extracts the float value (e.g. 7.5) and the channel_name from a string like this: "channel_name; unit; 7.5".

    Args:
        istr (str): a string like this: "bitcoin; eur; 7.5"
        original_str (str): at the very first stage of parcing, before this func is called, we receive a string like:
            1585474566.27; bitcoin; eur; 3.664121010741326 § ethereum; eur; 1.0710547987175814 ...
            We pass it here for debug purposes.
    """

    positions_list = find_all_positions_of_character(istr, ';')
    if len(positions_list) > 0:
        temp_list = positions_list[
            -1:]  # get the position of last ";" as a list of 1 element
    else:
        temp_list = []
    if len(temp_list) > 0:
        position = temp_list[0]
        position += 2  # skip "; "
        price_str = istr[position:]  # get the string from this position
        if ("None" in price_str) or ("invalid" in price_str):
            price = -1
            helper_funcs.append_logs(
                "get_price_from_substr: -None- or -invalid- in the input string. Could be just a missing data, "
                "or a sign of something bad. Input: " + str(istr) +
                " . Original str: " + original_str, "parser", "always")
        else:
            try:
                price = float(price_str)  # try to convert it into float
            except Exception as e:
                price = -1  # if can't parse the price, return "-1"
                msg = "ERROR: get_price_from_substr: price = float(price_str) caused an arror: " + str(
                    e) + " . Inputs: istr = " + str(istr)
                helper_funcs.append_logs(msg, "parser", "always", "print")

        position = positions_list[
            0]  # get the position of the first ";" to remove the timestamp
        name_str = istr[:position]

    else:
        price = -1
        name_str = ""
        msg = "get_price_from_substr: len(temp_list) is zero. Caused by this istr: " + istr
        helper_funcs.append_logs(msg, "parser", "always", "print")

    return price, name_str
Example #13
0
from dataset_preprocessing import cleanup_dataset, data_sanity_check

time_between_fetches = 1.0  # how often should the data be fetched from the data provider, in seconds
this_many_last_observations = 500  # to save them TO a separate file

# Reset states
delete_logs()  # delete logs from the previous sessions
with open(get_full_path("state_controls/summonAnomaly.txt"), "w") as f:
    f.write("0")
with open(get_full_path("state_controls/exit7.txt"), "w") as f:
    f.write("0")

data_channels = read_configs()["data_channels"]

use_synthetic_data7 = synthetic_data7()
append_logs("use_synthetic_data7 : " + str(use_synthetic_data7), "0launcher",
            "always", "print")
append_logs("data_channels : " + str(data_channels), "0launcher", "always",
            "print")

cleanup_dataset(use_synthetic_data7)
data_sanity_check(use_synthetic_data7, data_channels)

launch_scripts()

if use_synthetic_data7:
    data_filename = "dataset/syntheticData.txt"
    last_n_filename = "dataset/lastNpoints_synthetic.txt"
else:
    data_filename = "dataset/fetchedData.txt"
    last_n_filename = "dataset/lastNpoints_fetched.txt"
Example #14
0
from helper_funcs import append_logs, infer_and_save_results, exit7, synthetic_data7
import lib_KitNET_calc

ask_model_func = lib_KitNET_calc.ask_model

useOnlyThisManyLatestOfLastN = 50000
filename2write = "risk_scores/kitnet_anomaly.txt"
name4logs = "lib_KitNET_infer"
method_name = "kitnet"

if synthetic_data7():
    output_postfix = "_synthetic"
else:
    output_postfix = "_fetched"

modelpath = "pickled_models/" + method_name + output_postfix + ".pkl"

append_logs("Starting the main circle", name4logs, "always")

old_modification_ts = -1
old_meta_model_dic = None
scales_dic = None

while True:
    exit7()
    old_modification_ts, old_meta_model_dic, scales_dic = infer_and_save_results(
        ask_model_func, modelpath, old_modification_ts, old_meta_model_dic,
        useOnlyThisManyLatestOfLastN, method_name, scales_dic)
    time.sleep(1.0)
Example #15
0
def data_send_loop(add_data_callback_func):
    """Regularly reads the data to plot, and emits it."""

    # Setup the signal-slot mechanism.
    source = Communicate()
    source.data_signal.connect(add_data_callback_func)

    loop_counter = 0

    if use_synthetic_data7:
        data_filename = "dataset/latest_datapoint_synthetic.txt"
    else:
        data_filename = "dataset/latest_datapoint_fetched.txt"

        # TODO: generate this dic automatically
    pause_fetching = {
        "prices_dic": False,
        "kitnet_risk": False,
        "telemanom_risk": False
    }

    while True:
        # TODO: generate this list and the dicts automatically
        list2emit = [None, None, None]
        try:
            prices_dic = dict()
            kitnet_risk = dict()
            telemanom_risk = dict()

            # to prevent flooding the log with the entries about non-existing file during the first start
            if not pause_fetching["prices_dic"]:
                prices_dic = read_prediction_for_aggregation(
                    data_filename, "realtime_graph")
                if prices_dic is None:
                    pause_fetching["prices_dic"] = True

            if not pause_fetching["kitnet_risk"]:
                # TODO: calculate the number from configs
                kitnet_risk = get_max_anomaly_from_latest(
                    "risk_scores/kitnet_anomaly" + anomaly_file_postfix +
                    ".txt", 10)
                if kitnet_risk is None:
                    pause_fetching["kitnet_risk"] = True

            if not pause_fetching["telemanom_risk"]:
                # TODO: calculate the number from configs
                telemanom_risk = get_max_anomaly_from_latest(
                    "risk_scores/telemanom_anomaly" + anomaly_file_postfix +
                    ".txt", 10)
                if telemanom_risk is None:
                    pause_fetching["telemanom_risk"] = True

            list2emit = [prices_dic, kitnet_risk, telemanom_risk]

            append_logs(str(prices_dic), name4logs, "verbose")
        except Exception as e:
            append_logs(str(e), name4logs, "always")

        time.sleep(sleep_time)  # in seconds
        source.data_signal.emit(list2emit)  # <- Here you emit a signal!

        loop_counter += 1

        if loop_counter % 60 == 0:
            for channel_key, value in pause_fetching.items():
                pause_fetching[channel_key] = False
Example #16
0
""" Creates/updates the Telemanom model, by regularly training it on the latest N datapoints.

It runs in parallel with the lib_telemanom_infer.py, to make training and inference work as separate processes.
"""

from helper_funcs import append_logs, train_and_save_model, exit7
import lib_telemanom_calc

get_model_func = lib_telemanom_calc.get_model
name4logs = "lib_telemanom_train"
method_name = "telemanom"
use_this_many_latest_dp = 30000  # bigger number means better AI but more compute required

# main circle
append_logs("starting the training circling", name4logs, "always", "print")
while True:
    exit7()
    train_and_save_model(use_this_many_latest_dp, get_model_func, method_name)
Example #17
0
The corresponding license texts are at end of this file.
"""

import numpy as np
import time
from scipy.cluster.hierarchy import linkage, to_tree
import pickle

from helper_funcs import append_logs, get_full_path, synthetic_data7

use_synthetic_data7 = synthetic_data7()

name4logs = "lib_KitNET_calc"

msg = "Reading Sample dataset..."
append_logs(msg, name4logs, "verbose")

if use_synthetic_data7:
    filename = "dataset/syntheticData.txt"
else:
    filename = "dataset/fetchedData.txt"

np.seterr(all='ignore')


def sigmoid(x):
    return 1. / (1 + np.exp(-x))


class DenoisingAutoencoderParams:
    """A data class for storing the Denoising Autoencoder params."""