コード例 #1
0
ファイル: edeconnector.py プロジェクト: DIPET-UVT/EDE-Dipet
 def getDmonStatus(self):
     nUrl = "http://%s:%s/dmon/v1/overlord/core/status" % (self.esEndpoint,
                                                           self.dmonPort)
     logger.info(
         '[%s] : [INFO] dmon get core status url -> %s',
         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
         nUrl)
     try:
         rdmonStatus = requests.get(nUrl)
     except Exception as inst:
         logger.error(
             '[%s] : [ERROR] Exception has occured while connecting to dmon with type %s at arguments %s',
             datetime.fromtimestamp(
                 time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst),
             inst.args)
         sys.exit(2)
     return rdmonStatus.json()
コード例 #2
0
ファイル: edeconnector.py プロジェクト: DIPET-UVT/EDE-Dipet
 def localData(self, data):
     data_loc = os.path.join(self.dataDir, data)
     try:
         df = pd.read_csv(data_loc)
     except Exception as inst:
         logger.error(
             '[{}] : [ERROR] Cannot load local data with  {} and {}'.format(
                 datetime.fromtimestamp(
                     time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst),
                 inst.args))
         sys.exit(2)
     logger.info(
         '[{}] : [INFO] Loading local data from {} with shape {}'.format(
             datetime.fromtimestamp(
                 time.time()).strftime('%Y-%m-%d %H:%M:%S'), data_loc,
             df.shape))
     return df
コード例 #3
0
 def roles(self):
     # self.__check_valid_es()
     nUrl = "http://%s:%s/dmon/v1/overlord/nodes/roles" % (self.esEndpoint,
                                                           self.dmonPort)
     logger.info(
         '[%s] : [INFO] dmon get roles url -> %s',
         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
         nUrl)
     try:
         rRoles = requests.get(nUrl)
     except Exception as inst:
         logger.error(
             '[%s] : [ERROR] Exception has occured while connecting to dmon with type %s at arguments %s',
             datetime.fromtimestamp(
                 time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst),
             inst.args)
         sys.exit(2)
     rData = rRoles.json()
     return rData
コード例 #4
0
 def getStormTopology(self):
     nUrl = "http://%s:%s/dmon/v1/overlord/detect/storm" % (self.esEndpoint,
                                                            self.dmonPort)
     logger.info(
         '[%s] : [INFO] dmon get storm topology url -> %s',
         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
         nUrl)
     try:
         rStormTopology = requests.get(nUrl)
     except Exception as inst:
         logger.error(
             '[%s] : [ERROR] Exception has occured while connecting to dmon with type %s at arguments %s',
             datetime.fromtimestamp(
                 time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst),
             inst.args)
         print("Can't connect to dmon at %s port %s" %
               (self.esEndpoint, self.dmonPort))
         sys.exit(2)
     rData = rStormTopology.json()
     return rData
コード例 #5
0
 def __loadClusterModel(self, method, model):
     '''
     :param method: -> method name
     :param model: -> model name
     :return: -> instance of serialized object
     '''
     lmodel = glob.glob(
         os.path.join(self.modelDir, ("%s_%s.pkl" % (method, model))))
     if not lmodel:
         logger.warning(
             '[%s] : [WARN] No %s model with the name %s found',
             datetime.fromtimestamp(
                 time.time()).strftime('%Y-%m-%d %H:%M:%S'), method, model)
         return 0
     else:
         smodel = pickle.load(open(lmodel[0], "rb"))
         logger.info(
             '[%s] : [INFO] Succesfully loaded %s model with the name %s',
             datetime.fromtimestamp(
                 time.time()).strftime('%Y-%m-%d %H:%M:%S'), method, model)
         return smodel
コード例 #6
0
 def dask_clusterMethod(self, cluster_method,
                        mname,
                        data
                        ):
     try:
         logger.info('[{}] : [INFO] Loading Clustering method {}'.format(
             datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(cluster_method)))
         # delattr(cluster_method, 'behaviour')
         # del cluster_method.__dict__['behaviour']
         for k, v in cluster_method.get_params().items():
             logger.info('[{}] : [INFO] Method parameter {} set to {}'.format(
                 datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), k, v))
         try:
             with joblib.parallel_backend('dask'):
                 logger.info('[{}] : [INFO] Using Dask backend for user defined method'.format(
                     datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
                 clf = cluster_method.fit(data)
         except Exception as inst:
             logger.error('[{}] : [ERROR] Failed to fit user defined method with dask backedn with {} and {}'.format(
                 datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args))
             logger.warning('[{}] : [WARN] using default process based backedn for user defined method'.format(
             datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
             clf = cluster_method.fit(data)
     except Exception as inst:
         logger.error('[{}] : [ERROR] Failed to fit {} with {} and {}'.format(
             datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(cluster_method),
             type(inst), inst.args))
         sys.exit(1)
     predictions = clf.predict(data)
     logger.debug('[{}] : [DEBUG] Predicted Anomaly Array {}'.format(
         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), predictions))
     fname = str(clf).split('(')[0]
     self.__serializemodel(clf, fname, mname)
     return clf
コード例 #7
0
 def scale(self, data,
           scaler_type=None,
           rindex='time'):  # todo, integrate
     if not scaler_type:
         logger.warning('[{}] : [WARN] No data scaling used!'.format(
             datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
         return data
     if scaler_type is None:
         scaler_type = {"StandardScaler": {"copy": True, "with_mean": True, "with_std": True}}
         logger.warning('[{}] : [WARN] No user defined scaler using default'.format(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), scaler_type))
     scaler_name = list(scaler_type.keys())[-1]
     scaler_attr = list(scaler_type.values())[-1]
     logger.info('[{}] : [INFO] Scaler set to {} with parameters {}.'.format(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), scaler_name, scaler_attr))
     try:
         sc_mod = importlib.import_module(self.scaler_mod)
         scaler_instance = getattr(sc_mod, scaler_name)
         scaler = scaler_instance(**scaler_attr)
     except Exception as inst:
         logger.error('[{}] : [ERROR] Error while initializing scaler {}'.format(
         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), scaler_name))
         sys.exit(2)
     # Fit and transform data
     logger.info('[{}] : [INFO] Scaling data ...'.format(
         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
     scaled_data = scaler.fit_transform(data)
     # Transform numpy array into dataframe, re-add columns to scaled numpyarray
     df_scaled = pd.DataFrame(scaled_data, columns=data.columns)
     df_scaled[rindex] = list(data.index)
     df_scaled.set_index(rindex, inplace=True)
     scaler_file = '{}.scaler'.format(scaler_name)
     logger.info('[{}] : [INFO] Saving scaler instance {} ...'.format(
         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), scaler_file))
     scale_file_location = os.path.join(self.dataDir, scaler_file)
     joblib.dump(scaler, filename=scale_file_location)
     return df_scaled
コード例 #8
0
 def sdbscanTrain(self, settings, mname, data):
     '''
     :param data: -> dataframe with data
     :param settings: -> settings dictionary
     :param mname: -> name of serialized clusterer
     :return: -> clusterer
     :example settings: -> {eps:0.9, min_samples:10, metric:'euclidean' ,
     algorithm:'auto, leaf_size:30, p:0.2, n_jobs:1}
     '''
     for k, v in settings.items():
         logger.info(
             '[%s] : [INFO] SDBSCAN %s set to %s',
             datetime.fromtimestamp(
                 time.time()).strftime('%Y-%m-%d %H:%M:%S'), k, v)
     sdata = StandardScaler().fit_transform(data)
     try:
         db = DBSCAN(eps=float(settings['eps']),
                     min_samples=int(settings['min_samples']),
                     metric=settings['metric'],
                     algorithm=settings['algorithm'],
                     leaf_size=int(settings['leaf_size']),
                     p=float(settings['p']),
                     n_jobs=int(settings['n_jobs'])).fit(sdata)
     except Exception as inst:
         logger.error(
             '[%s] : [ERROR] Cannot instanciate sDBSCAN with %s and %s',
             datetime.fromtimestamp(
                 time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst),
             inst.args)
         print("Error while  instanciating sDBSCAN with %s and %s" %
               (type(inst), inst.args))
         sys.exit(1)
     labels = db.labels_
     print(labels)
     n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
     print('Estimated number of clusters: %d' % n_clusters_)
     self.__serializemodel(db, 'sdbscan', mname)
     return db
コード例 #9
0
    def dask_isolationForest(self, settings,
                             mname,
                             data
                             ):
        '''
        :param settings: -> settings dictionary
        :param mname: -> name of serialized clusterer
        :param scaler: -> scaler to use on data
        :return: -> isolation forest instance
        :example settings: -> {n_estimators:100, max_samples:100, contamination:0.1, bootstrap:False,
                        max_features:1.0, n_jobs:1, random_state:None, verbose:0}
        '''
        if not settings or settings is None:
            logger.warning('[{}] : [WARN] No IsolationForest parameters defined using defaults'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
            # print(settings)
            settings = {}
        else:
            for k, v in settings.items():
                logger.info('[{}] : [INFO] IsolationForest parameter {} set to {}'.format(
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), k, v))
        try:

            clf = IsolationForest(**settings)
            # print(clf)
        except Exception as inst:
            logger.error('[{}] : [INFO] Failed to instanciate IsolationForest with {} and {}'.format(
            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args))
            sys.exit(1)

        try:
            with joblib.parallel_backend('dask'):
                logger.info('[{}] : [INFO] Using Dask backend for IsolationForest'.format(
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
                clf.fit(data)
        except Exception as inst:
            logger.error('[{}] : [ERROR] Failed to fit IsolationForest with {} and {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args))
            sys.exit(1)

        predict = clf.predict(data)
        anoOnly = np.argwhere(predict == -1)
        logger.info('[{}] : [INFO] Found {} anomalies in training dataset of shape {}.'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), len(anoOnly), data.shape))
        logger.info('[{}] : [DEBUG] Predicted Anomaly Array {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), predict))
        self.__serializemodel(clf, 'isoforest', mname)
        self.__appendPredictions(method='isoforest', mname=mname, data=data, pred=predict)
コード例 #10
0
    def filterWildcard(self, df, wild_card, keep=False):
        """
        :param df: dataframe to filer
        :param wild_card: str wildcard of columns to be filtered
        :param keep: if keep True, only cols with wildcard are kept, if False they will be deleted
        :return: filtered dataframe
        """
        filtr_list = []
        mask = df.columns.str.contains(wild_card)
        filtr_list.extend(list(df.loc[:, mask].columns.values))

        logger.info('[%s] : [INFO] Columns to be filtered based on wildcard: %s',
                     datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), filtr_list)
        if keep:
            df_wild = df[filtr_list]
        else:
            df_wild = df.drop(filtr_list, axis=1)

        logger.info('[%s] : [INFO] Filtered shape:  %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), df_wild.shape)
        # print("Columns of filtered data:")
        # print(df_concat_filtered.columns)
        return df_wild
コード例 #11
0
 def ohEncoding(self, data,
                cols=None,
                replace=True):
     if cols is None:
         cols = []
         for el, v in data.dtypes.items():
             if v == 'object':
                 if el == 'time':
                     pass
                 else:
                     cols.append(el)
         logger.info('[%s] : [INFO] Categorical features not set, detected as categorical: %s',
                     datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), str(cols))
     logger.info('[{}] : [INFO] Categorical features now set to {}'.format(
         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), str(cols)))
     vec = DictVectorizer()
     mkdict = lambda row: dict((col, row[col]) for col in cols)
     vecData = pd.DataFrame(vec.fit_transform(data[cols].apply(mkdict, axis=1)).toarray())
     vecData.columns = vec.get_feature_names()
     vecData.index = data.index
     if replace is True:
         data = data.drop(cols, axis=1)
         data = data.join(vecData)
     return data, vecData, vec
コード例 #12
0
    def dask_sdbscanTrain(self, settings,
                          mname,
                          data,
                          scaler=None):
        '''
        :param data: -> dataframe with data
        :param settings: -> settings dictionary
        :param mname: -> name of serialized clusterer
        :param scaler: -> scaler to use on data
        :return: -> clusterer
        :example settings: -> {eps:0.9, min_samples:10, metric:'euclidean' ,
        algorithm:'auto, leaf_size:30, p:0.2, n_jobs:1}
        '''

        if scaler is None:
            logger.warning('[{}] : [WARN] Scaler not defined'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
        else:
            logger.info('[{}] : [INFO] Scaling data ...'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
            data = scaler.fit_transform(data)

        if not settings or settings is None:
            logger.warning('[{}] : [WARN] No DBScan parameters defined using default'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
            settings = {}
        else:
            for k, v in settings.items():
                logger.info('[{}] : [INFO] DBScan parameter {} set to {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), k, v))

        try:
            db = DBSCAN(**settings).fit(data)
        except Exception as inst:
            logger.error('[{}] : [INFO] Failed to instanciate DBScan with {} and {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args))
            sys.exit(1)
        labels = db.labels_
        logger.info('[{}] : [INFO] DBScan labels: {} '.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), labels))
        n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
        logger.info('[{}] : [INFO] DBScan estimated number of clusters {} '.format(
            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), n_clusters_))
        self.__serializemodel(db, 'sdbscan', mname)
        return db
コード例 #13
0
 def dask_detect(self,
                 method,
                 model,
                 data
                 ):
     smodel = self.__loadClusterModel(method, model)
     anomaliesList = []
     if not smodel:
         dpredict = 0
     else:
         if data.shape[0]:
             try:
                 logger.info('[{}] : [INFO] Loading predictive model {} '.format(
                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), str(smodel).split('(')[0]))
                 for k, v in smodel.get_params().items():
                     logger.info('[{}] : [INFO] Predict model parameter {} set to {}'.format(
                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), k, v))
                     dpredict = smodel.predict(data)
             except Exception as inst:
                 logger.error('[{}] : [ERROR] Failed to load predictive model with {} and {}'.format(
                     datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args))
                 dpredict = 0
         else:
             dpredict = 0
             logger.warning('[{}] : [WARN] DataFrame is empty with shape {} '.format(
             datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), str(data.shape)))
     if type(dpredict) is not int:
         anomalyArray = np.argwhere(dpredict == -1)
         for an in anomalyArray:
             anomalies = {}
             anomalies['utc'] = int(data.iloc[an[0]].name)
             anomalies['hutc'] = ut2hum(int(data.iloc[an[0]].name))
             anomaliesList.append(anomalies)
     anomaliesDict = {}
     anomaliesDict['anomalies'] = anomaliesList
     logger.info('[{}] : [INFO] Detected {} anomalies with model {} using method {} '.format(
         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), len(anomaliesList), model,
         str(smodel).split('(')[0]))
     return anomaliesDict
コード例 #14
0
def main(argv,
         cluster,
         client):
    dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
    modelsDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models')
    queryDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'queries')

    settings = Dict()
    settings.esendpoint = None
    settings.prendpoint = None
    settings.Dask.SchedulerEndpoint = None  # "local"
    settings.Dask.SchedulerPort = 8787
    settings.Dask.EnforceCheck = False
    settings.prkafkaendpoint = None
    settings.prkafkaport = 9092
    settings.prkafkatopic = "edetopic"
    settings.augmentation = None  # augmentation including scaler and user defined methods
    settings.detectionscaler = None
    settings.MPort = 9090
    settings.dmonPort = 5001
    settings.index = "logstash-*"
    settings["from"] = None
    settings.to = None
    settings.query = None
    settings.nodes = None
    settings.qsize = None
    settings.qinterval = None
    settings.fillna = None
    settings.dropna = None
    settings.local = None
    settings.train = None
    settings.hpomethod = None
    settings.tpot = None
    settings.ParamDistribution = None
    settings.detecttype = None # TODO
    settings.traintype = None
    settings.validationtype = None # Todo
    settings.target = None
    settings.load = None
    settings.file = None
    settings.method = None
    settings.detectMethod = None
    settings.trainMethod = None
    settings.cv = None
    settings.trainscore = None
    settings.scorer = None
    settings.returnestimators = None
    settings.analysis = None
    settings.validate = None
    settings.export = None
    settings.trainexport = None
    settings.detect = None  # Bool default None
    settings.cfilter = None
    settings.rfilter = None
    settings.dfilter = None
    settings.sload = None
    settings.smemory = None
    settings.snetwork = None
    settings.heap = None
    settings.checkpoint = None
    settings.delay = None
    settings.interval = None
    settings.resetindex = None
    settings.training = None
    settings.validation = None
    settings.validratio = 0.2
    settings.compare = False
    settings.anomalyOnly = False
    settings.categorical = None
    settings.point = False

    # Only for testing
    settings['validate'] = False
    dask_backend = False

    try:
        opts, args = getopt.getopt(argv, "he:tf:m:vx:d:lq:", ["endpoint=", "file=", "method=", "export=", "detect=", "query="])  # todo:expand comand line options
    except getopt.GetoptError:
        logger.warning('[%s] : [WARN] Invalid argument received exiting', datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
        print("ede.py -f <filelocation>, -t -m <method> -v -x <modelname>")
        sys.exit(0)
    for opt, arg in opts:
        if opt == '-h':
            print("#" * 100)
            print("H2020 ASPIDE")
            print('Event Detection Engine')
            print("-" * 100)
            print('Utilisation:')
            print('-f -> configuration file location')
            print('-t -> activate training mode')
            print('-m -> methods')
            print('   -> allowed methods: skm, em, dbscan, sdbscan, isoforest')
            print('-x -> export model name')
            print('-v -> validation')
            print('-q -> query string for anomaly/event detection')
            print("#" * 100)
            sys.exit(0)
        elif opt in ("-e", "--endpoint"):
            settings['esendpoint'] = arg
        elif opt in ("-t"):
            settings["train"] = True
        elif opt in ("-f", "--file"):
            settings["file"] = arg
        elif opt in ("-m", "--method"):
            settings["method"] = arg
        elif opt in ("-v"):
            settings["validate"] = True
        elif opt in ("-x", "--export"):
            settings["export"] = arg
        elif opt in ("-d", "--detect"):
            settings["detect"] = arg
        elif opt in ("-l", "--list-models"):
            print ("Current saved models are:\n")
            print((getModelList()))
            sys.exit(0)
        elif opt in ("-q", "--query"):
            settings["query"] = arg

    # print("#" * 100)
    # print(queryDir)
    logger.info('[{}] : [INFO] Starting EDE framework ...'.format(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
    logger.info('[{}] : [INFO] Trying to read configuration file ...'.format(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))

    if settings["file"] is None:
        file_conf = 'ede_config.yaml'
        logger.info('[%s] : [INFO] Settings file set to %s',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), file_conf)
    else:
        if os.path.isfile(settings["file"]):
            file_conf = settings["file"]
            logger.info('[%s] : [INFO] Settings file set to %s',
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), file_conf)
        else:
            logger.error('[%s] : [ERROR] Settings file not found at locations %s',
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["file"])
            sys.exit(1)

    readCnf = readConf(file_conf)
    logger.info('[{}] : [INFO] Reading configuration file ...'.format(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))

    # TODO: create def dls(file_conf)
    # Connector
    try:
        logger.info('[{}] : [INFO] Index Name set to : {}'.format(
            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
            readCnf['Connector']['indexname']))
    except:
        logger.warning('[%s] : [WARN] Index not set in conf setting to default value %s',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['index'])

    if settings['esendpoint'] is None:
        try:
            logger.info('[{}] : [INFO] Monitoring ES Backend endpoint in config {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                readCnf['Connector']['ESEndpoint']))
            settings['esendpoint'] = readCnf['Connector']['ESEndpoint']
        except:
            if readCnf['Connector']['PREndpoint'] is None:  # todo; now only available in config file not in commandline
                logger.error('[%s] : [ERROR] ES and PR backend Enpoints not set in conf or commandline!',
                                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
                sys.exit(1)
            else:
                settings['prendpoint'] = readCnf['Connector']['PREndpoint']
                logger.info('[{}] : [INFO] Monitoring PR Endpoint set to {}'.format(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                            settings["prendpoint"]))
    else:
        logger.info('[%s] : [INFO] ES Backend Enpoint set to %s',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['esendpoint'])
    if settings["from"] is None:
        try:
            settings["from"] = readCnf['Connector']['From']
            logger.info('[%s] : [INFO] From timestamp set to %s',
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                        settings["from"])
        except:
            logger.info('[{}] : [INFO] PR Backend endpoint set to {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['prendpoint']))
            if settings['prendpoint'] is not None:
                logger.info('[{}] : [INFO] PR Backedn endpoint set to {}'.format(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['prendpoint']))
            else:
                logger.error('[%s] : [ERROR] From timestamp not set in conf or commandline!',
                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
                sys.exit(1)
    else:
        logger.info('[%s] : [INFO] From timestamp set to %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['from'])

    if settings["to"] is None:
        try:
            settings["to"] = readCnf['Connector']['to']
            logger.info('[%s] : [INFO] To timestamp set to %s',
                                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                                settings["to"])
        except:
            if settings['prendpoint'] is not None:
                pass
            else:
                logger.error('[%s] : [ERROR] To timestamp not set in conf or commandline!',
                                     datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
                sys.exit(1)
    else:
        logger.info('[%s] : [INFO] To timestamp set to %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['to'])

    if settings['query'] is None:
        try:
            settings['query'] = readCnf['Connector']['Query']
            logger.info('[%s] : [INFO] Query set to %s',
                                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                                settings['query'])
        except:
            if settings['prendpoint'] is not None:
                pass
            logger.error('[%s] : [ERROR] Query not set in conf or commandline!',
                                 datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            sys.exit(1)
    else:
        logger.info('[%s] : [INFO] Query set to %s',
                           datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['query'])

    if settings.prkafkaendpoint is None:
        try:
            settings.prkafkaendpoint = readCnf['Connector']['KafkaEndpoint']
            if settings.prkafkaendpoint == 'None':
                settings.prkafkaendpoint = None
            else:
                settings.prkafkatopic = readCnf['Connector']['KafkaTopic']
                settings.prkafkaport = readCnf['Connector']['KafkaPort']
            logger.info('[{}] : [INFO] Kafka Endpoint set to  {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings.prkafkaendpoint))
        except:
            logger.warning('[{}] : [WARN] Kafka Endpoint not set.'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings.prkafkaendpoint))

    if settings["nodes"] is None:
        try:
            if not readCnf['Connector']['nodes']:
                readCnf['Connector']['nodes'] = 0
            settings["nodes"] = readCnf['Connector']['nodes']
            logger.info('[%s] : [INFO] Desired nodes set to %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                    settings['nodes'])
        except:
            logger.warning('[%s] : [WARN] No nodes selected from config file or comandline querying all',
                           datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            settings["nodes"] = 0
    else:
        logger.info('[%s] : [INFO] Desired nodes set to %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["nodes"])

    if settings["qsize"] is None:
        try:
            settings["qsize"] = readCnf['Connector']['QSize']
            logger.info('[%s] : [INFO] Query size set to %s',
                                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                                settings['qsize'])
        except:
            logger.warning('[%s] : [WARN] Query size not set in conf or commandline setting to default',
                                 datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            settings["qsize"] = 'default'
    else:
        logger.info('[%s] : [INFO] Query size set to %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["qsize"])

    if settings["qinterval"] is None:
        try:
            settings["qinterval"] = readCnf['Connector']['MetricsInterval']
            logger.info('[%s] : [INFO] Metric Interval set to %s',
                                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                                settings['qinterval'])
        except:
            logger.warning('[%s] : [WARN] Metric Interval not set in conf or commandline setting to default',
                                 datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            settings["qsize"] = "default"
    else:
        logger.info('[%s] : [INFO] Metric interval set to %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["qinterval"])
    if readCnf['Connector']['Dask']:
        try:
            settings['Dask']['SchedulerEndpoint'] = readCnf['Connector']['Dask']['SchedulerEndpoint']
            settings['Dask']['SchedulerPort'] = readCnf['Connector']['Dask']['SchedulerPort']
            settings['Dask']['EnforceCheck'] = readCnf['Connector']['Dask']['EnforceCheck']
            logger.info('[{}] : [INFO] Dask scheduler  set to: endpoint {}, port {}, check {}'.format(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['Dask']['SchedulerEndpoint'],
                settings['Dask']['SchedulerPort'], settings['Dask']['EnforceCheck']))
            dask_backend = True
        except:
            logger.warning('[{}] : [WARN] Dask scheduler  set to default values'.format(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
            dask_backend = False
    if settings['local'] is None:
        try:
            settings['local'] = readCnf['Connector']['Local']
            logger.info('[{}] : [INFO] Local datasource set to {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['local']))
        except:
            logger.info('[{}] : [INFO] Local datasource set to default'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
            settings['local'] = None
    else:
        logger.info('[{}] : [INFO] Local datasource set to {}'.format(
            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['local']))
    # Mode
    if settings["train"] is None:
        try:
            settings["train"] = readCnf['Mode']['Training']
            logger.info('[%s] : [INFO] Train is set to %s from conf',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['train'])
        except:
            logger.error('[%s] : [ERROR] Train is not set in conf or comandline!',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            sys.exit(1)
    else:
        logger.info('[%s] : [INFO] Train is set to %s from comandline',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['train'])

    # Analysis
    if settings.analysis is None:
        try:
            logger.info('[{}] : [INFO] Loading user defined analysis'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
            settings.analysis = readCnf['Analysis']
        except:
            logger.info('[{}] : [INFO] No user defined analysis detected'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))

    # Validate
    if settings["validate"] is None:
        try:
            settings["validate"] = readCnf['Mode']['Validate']
            logger.info('[%s] : [INFO] Validate is set to %s from conf',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['validate'])
        except:
            logger.error('[%s] : [ERROR] Validate is not set in conf or comandline!',
                                 datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            sys.exit(1)
    else:
        logger.info('[%s] : [INFO] Validate is set to %s from comandline',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['validate'])

    # Detect
    if settings["detect"] is None:
        try:
            settings["detect"] = readCnf['Mode']['Detect']
            logger.info('[%s] : [INFO] Detect is set to %s from conf',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['detect'])
        except:
            logger.error('[%s] : [ERROR] Detect is not set in conf or comandline!',
                                 datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            sys.exit(1)
    else:
        logger.info('[%s] : [INFO] Detect is set to %s from comandline',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['detect'])

    if settings["detectMethod"] is None:
        try:
            settings["detectMethod"] = readCnf['Detect']['Method']
            logger.info('[%s] : [INFO] Detect Method is set to %s from conf',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["detectMethod"])
        except:
            logger.error('[%s] : [ERROR] Detect Method is not set in conf or comandline!',
                                 datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            sys.exit(1)
    else:
        logger.info('[%s] : [INFO] Detect Method is set to %s from comandline',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["detectMethod"])

    if settings["detecttype"] is None:
        try:
            settings["detecttype"] = readCnf['Detect']['Type']
            logger.info('[{}] : [INFO] Detect Type is set to {} from conf'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["detecttype"]))
        except:
            logger.error('[%s] : [ERROR] Detect Type is not set in conf or command line!',
                                 datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            sys.exit(1)
    else:
        logger.info('[%s] : [INFO] Detect Type is set to %s from command line',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["detecttype"])

    if settings["trainMethod"] is None:
        try:
            settings["trainMethod"] = readCnf['Training']['Method']
            logger.info('[%s] : [INFO] Train Method is set to %s from conf',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["trainMethod"])
        except:
            try:
                settings['Training']['TPOTParam']
            except:
                logger.error('[%s] : [ERROR] Train Method is not set in conf or comandline!',
                                     datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
                sys.exit(1)
    else:
        logger.info('[%s] : [INFO] Train Method is set to %s from comandline',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["trainMethod"])

    if settings["traintype"] is None:
        try:
            settings["traintype"] = readCnf['Training']['Type']
            logger.info('[%s] : [INFO] Train Type is set to %s from conf',
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["traintype"])
        except:
            logger.error('[%s] : [ERROR] Train Type is not set in conf or command line!',
                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            sys.exit(1)
    else:
        logger.info('[%s] : [INFO] Train Type is set to %s from command line',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["traintype"])
    if settings.target is None:
        try:
            settings.target = readCnf['Training']['Target']
            logger.info('[{}] : [INFO] Classification Target set to {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings.target))
        except:
            if settings['traintype'] == 'classification':
                logger.warning('[{}] : [WARN] Classification Target not set in config'.format(
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings.target))
            else:
                pass

    if settings.hpomethod is None:
        try:
            settings.hpomethod = readCnf['Training']['HPOMethod']
            logger.info('[{}] : [INFO] HPO method set to {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings.hpomethod))
            try:
                settings.hpoparam = readCnf['Training']['HPOParam']
                for k, v in readCnf['Training']['HPOParam'].items():
                    logger.info('[{}] : [INFO] HPO Method {}  Param {} set to {}'.format(
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings.hpomethod, k, v))
            except:
                logger.warn('[{}] : [WARN] HPO Method Params set to default!'.format(
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
                settings.hpoparam = {}
        except:
            if readCnf['Training']['Type'] == 'hpo':
                logger.error('[{}] : [ERROR] HPO invoked without method! Exiting'.format(
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings.hpomethod))
                sys.exit(1)
            else:
                pass

    if settings.ParamDistribution is None:
        try:
            settings.ParamDistribution = readCnf['Training']['ParamDistribution']
            logger.info('[{}] : [INFO] HPO Parameter Distribution found.'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
        except:
            if readCnf['Training']['Type'] == 'hpo':
                logger.error('[{}] : [ERROR] HPO invoked without Parameter distribution! Exiting'.format(
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings.hpomethod))
                sys.exit(1)
            else:
                pass
    if settings.tpot is None:
        try:
            settings.tpot = readCnf['Training']['TPOTParam']
            logger.info('[{}] : [INFO] TPO Parameters  found.'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
        except:
            try:
                if readCnf['Training']['Type'] == 'tpot':
                    settings.tpot = {}
                    logger.warning('[{}] : [WARN] TPO Parameters not found. Using defaults'.format(
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
                else:
                    pass
            except:
                pass

    if settings["export"] is None:
        try:
            settings["export"] = readCnf['Training']['Export']
            logger.info('[%s] : [INFO] Export is set to %s from conf',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["export"])
        except:
            logger.error('[%s] : [ERROR] Export is not set in conf or comandline!',
                                 datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            sys.exit(1)
    else:
        logger.info('[%s] : [INFO] Model is set to %s from comandline',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["export"])

    if settings.cv is None:
        try:
            settings.cv = readCnf['Training']['CV']
            try:
                logger.info('[{}] : [INFO] Cross Validation set to {}'.format(
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['cv']['Type']))
            except:
                logger.info('[{}] : [INFO] Cross Validation set to {}'.format(
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['cv']))
                try:
                    settings['cv'] = int(settings['cv'])
                except:
                    logger.error('[{}] : [ERROR] Issues with CV definition in Training!'.format(
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
                    sys.exit(1)
        except:
            logger.info('[{}] : [INFO] Cross Validation not defined'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))

    if settings.trainscore is None:
        try:
            settings.trainscore = readCnf['Training']['TrainScore']
            logger.info('[{}] : [INFO] Cross Validation set to include training scores'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
        except:
            settings.trainscore = False

    if settings.scorer is None:
        try:
            settings.scorer = readCnf['Training']['Scorers']
            logger.info('[{}] : [INFO] Training scorers defined'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
        except:
            logger.info('[{}] : [INFO] No Training scorers defined'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))

    if settings.returnestimators is None:
        try:
            settings.returnestimators = readCnf['Training']['ReturnEstimators']
            logger.info('[{}] : [INFO] CV Estimators will be saved'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
        except:
            settings.returnestimators = False

    if settings["load"] is None:
        try:
            settings["load"] = readCnf['Detect']['Load']
            logger.info('[%s] : [INFO] Load is set to %s from conf',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["load"])
        except:
            logger.error('[%s] : [ERROR] Load is not set in conf or comandline!',
                                 datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            sys.exit(1)
    else:
        logger.info('[%s] : [INFO] Load is set to %s from comandline',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["load"])

    if settings.detectionscaler is None:
        try:
            settings.detectionscaler = readCnf['Detect']['Scaler']
            logger.info('[{}] : [INFO] Detection Scaler set to {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings.detectionscaler))
        except:
            settings.detectionscaler = None
            logger.warning('[{}] : [WARN] Detection scaler not specified'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))

    try:
        settings['MethodSettings'] = {}   #todo read settings from commandline ?
        for name, value in readCnf['Training']['MethodSettings'].items():
            # print("%s -> %s" % (name, value))
            settings['MethodSettings'][name] = value
    except:
        settings['MethodSettings'] = None
        logger.warning('[%s] : [WARN] No Method settings detected, using defaults for %s!',
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["method"])

    # Augmentation
    try:
        settings['augmentation'] = readCnf['Augmentation']
        logger.info('[%s] : [INFO] Augmentations loaded',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
    except:
        settings['augmentation'] = None
        logger.info('[%s] : [INFO] Augmentations not defined',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))

    # Point anomaly settings
    try:
        settings["smemory"] = readCnf['Point']['memory']
        logger.info('[%s] : [INFO] System memory is set to %s',
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["smemory"])
    except:
        settings["smemory"] = "default"
        logger.warning('[%s] : [WARN] System memory is not set, using default!',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))

    try:
        settings["sload"] = readCnf['Point']['load']
        logger.info('[%s] : [INFO] System load is  set to %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["sload"])
    except:
        settings["sload"] = "default"
        logger.warning('[%s] : [WARN] System load is not set, using default!',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))

    try:
        settings["snetwork"] = readCnf['Point']['network']
        logger.info('[%s] : [INFO] System netowrk is  set to %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["snetwork"])
    except:
        settings["snetwork"] = "default"
        logger.warning('[%s] : [WARN] System network is not set, using default!',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))

    try:
        settings['heap'] = readCnf['Misc']['heap']
        logger.info('[%s] : [INFO] Heap size set to %s',
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['heap'])
    except:
        settings['heap'] = '512m'
        logger.info('[%s] : [INFO] Heap size set to default %s',
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['heap'])

    # Filter
    try:
        if readCnf['Filter']['Columns']:
            logger.info('[{}] : [INFO] Filter columns set in config as {}.'.format(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), readCnf['Filter']['Columns']))
            settings["cfilter"] = readCnf['Filter']['columns']
        else:
            logger.info('[{}] : [INFO] Filter columns set in config as {}.'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["cfilter"]))
    except:
        pass
    finally:
        logger.info('[%s] : [INFO] Filter column set to %s',
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['cfilter'])

    try:
        # logger.info('[%s] : [INFO] Filter rows set to %s',
        #             datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), readCnf['Filter']['Rows'])
        settings["rfilter"] = readCnf['Filter']['Rows']
    except:
        pass
        # logger.info('[%s] : [INFO] Filter rows  %s',
        #             datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["rfilter"])
    finally:
        logger.info('[%s] : [INFO] Filter rows set to %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['rfilter'])

    try:
        if readCnf['Filter']['DColumns']:
            # print("Filter drop columns -> %s" % readCnf['Filter']['DColumns'])
            settings["dfilter"] = readCnf['Filter']['DColumns']
        else:
            # print("Filter drop columns -> %s" % settings["dfilter"])
            pass
    except:
        # print("Filter drop columns -> %s" % settings["dfilter"])
        pass
    finally:
        logger.info('[%s] : [INFO] Filter drop column set to %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['dfilter'])

    try:
        if readCnf['Filter']['Fillna']:
            settings['fillna'] = readCnf['Filter']['Fillna']
        else:
            settings['fillna'] = False
        logger.info('[{}] : [INFO] Fill None values set to {}'.format(
            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), readCnf['Filter']['Fillna']))
    except:
        logger.info('[{}] : [INFO] Fill None not set, skipping ...'.format(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
        settings['fillna'] = False

    try:
        if readCnf['Filter']['Dropna']:
            settings['dropna'] = readCnf['Filter']['Dropna']
        else:
            settings['dropna'] = False
        logger.info('[{}] : [INFO] Drop None values set to {}'.format(
            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), readCnf['Filter']['Dropna']))
    except:
        logger.info('[{}] : [INFO] Drop None not set, skipping ...'.format(
            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
        settings['dropna'] = False

    if settings["checkpoint"] is None:
        try:

            settings["checkpoint"] = readCnf['Misc']['checkpoint']
            logger.info('[%s] : [INFO] Checkpointing is  set to %s',
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['checkpoint'])
        except:
            settings["checkpoint"] = "True"
            logger.info('[%s] : [INFO] Checkpointing is  set to True',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
    else:
        logger.info('[%s] : [INFO] Checkpointing is  set to %s',
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['checkpoint'])

    if settings["delay"] is None:
        try:

            settings["delay"] = readCnf['Misc']['delay']
            # logger.info('[%s] : [INFO] Delay is  set to %s',
            #         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['delay'])
        except:
            settings["delay"] = "2m"
        logger.info('[%s] : [INFO] Delay is  set to %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['delay'])
    else:
        logger.info('[%s] : [INFO] Delay is  set to %s',
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['delay'])

    if settings["interval"] is None:
        try:

            settings["interval"] = readCnf['Misc']['interval']
            logger.info('[%s] : [INFO] Interval is  set to %s',
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['interval'])
        except:

            settings["interval"] = "15m"
            logger.info('[%s] : [INFO] Interval is  set to %s',
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['interval'])
    else:
        logger.info('[%s] : [INFO] Interval is  set to %s',
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['interval'])

    if settings["resetindex"] is None:
        try:

            settings["resetindex"] = readCnf['Misc']['resetindex']
        except:

            settings["resetindex"] = False
    else:
        logger.info('[%s] : [INFO] Reset index set to %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['resetindex'])

    try:
        settings['dmonPort'] = readCnf['Connector']['dmonport']
        logger.info('[{}] : [INFO] DMon Port is set to {}'.format(
            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
            settings['dmonPort']))
    except:
        logger.info('[%s] : [INFO] DMon Port is set to %s"',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), str(settings['dmonPort']))

    try:
        settings['training'] = readCnf['Detect']['training']
        logger.info('[{}] : [INFO] Classification Training set is {}'.format(
            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
            readCnf['Detect']['training']))
    except:
        logger.info('[%s] : [INFO] Classification Training set is %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), str(settings['training']))

    # try:
    #     print("Classification Validation set is %s" % readCnf['Detect']['validation'])
    #     settings['validation'] = readCnf['Detect']['validation']
    # except:
    #     print("Classification Validation set is default")
    # logger.info('[%s] : [INFO] Classification Validation set is %s',
    #             datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), str(settings['validation']))


    try:
        # print("Classification validation ratio is set to %d" % int(readCnf['Training']['ValidRatio']))
        logger.info('[{}] : [INFO] Classification validation ratio is set to {}'.format(
            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), readCnf['Training']['ValidRatio']))
        if float(readCnf['Training']['ValidRatio']) > 1.0:
            # print("Validation ratio is out of range, must be between 1.0 and 0.1")
            settings['validratio'] = 0.0
            logger.warning('[{}] : [WARN] Validation ratio is out of range, must be between 1.0 and 0.1, overwritting'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), readCnf['Training']['ValidRatio']))
        settings['validratio'] = float(readCnf['Detect']['validratio'])
    except:
        logger.warning('[{}] : [WARN] Validation ratio is set to default'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
    logger.info('[%s] : [INFO] Classification Validation ratio is %s',
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), str(settings['validratio']))

    # try:
    #     print("Classification comparison is set to %s" % readCnf['Detect']['compare'])
    #     settings['compare'] = readCnf['Detect']['compare']
    # except:
    #     print("Classification comparison is default")
    # logger.info('[%s] : [INFO] Classification comparison is %s',
    #             datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['compare'])

    try:
        # print("Classification data generation using only anomalies set to %s" % readCnf['Detect']['anomalyOnly'])
        settings['anomalyOnly'] = readCnf['Detect']['anomalyOnly']
    except:
        # print("Classification data generation using only anomalies set to False")
        pass
    logger.info('[%s] : [INFO] Classification data generation using only anomalies set to %s',
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), str(settings['anomalyOnly']))

    if settings["categorical"] is None:
        try:
            if not readCnf['Augmentation']['Categorical']:
                readCnf['Augmentation']['Categorical'] = None
                logger.info('[{}] : [INFO] Categorical columns defined as: {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                    readCnf['Augmentation']['Categorical']))
            if readCnf['Augmentation']['Categorical'] == '0':
                settings["categorical"] = None
            else:
                settings["categorical"] = readCnf['Augmentation']['Categorical']
            logger.info('[%s] : [INFO] Categorical Features ->  %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                    settings['categorical'])
        except:
            logger.warning('[%s] : [WARN] No Categorical Features selected from config file or comandline! Skipping encoding',
                           datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            settings["categorical"] = None
    else:
        logger.info('[%s] : [INFO] Categorical Features ->  %s',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings["categorical"])

    if not settings["point"]:
        try:
            settings['point'] = readCnf['Misc']['point']
            logger.info('[%s] : [INFO] Point  set to %s',
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['point'])
        except:
            settings['point'] = 'False'
            logger.info('[%s] : [INFO] Point detection set to default %s',
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), settings['point'])

    #print dmonC
    # sys.exit()
    # print("Conf file -> %s" %readCnf)
    # print("Settings  -> %s" %settings)

    engine = aspideedengine.EDEngine(settings,
                                     dataDir=dataDir,
                                     modelsDir=modelsDir,
                                     queryDir=queryDir)
    #engine.printTest()
    engine.initConnector()
    if dask_backend:
        engine.runDask(engine)
    else:
        try:
            engine.runProcess(engine)
        except Exception as inst:
            logger.error('[{}] : [ERROR] Failed Process backend initialization with {} and {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args))
            logger.warning('[{}] : [WARN] Initializing default threaded engine, limited performance to be expected!'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args))
            engine.run(engine)

    logger.info('[{}] : [INFO] Exiting EDE framework'.format(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
コード例 #15
0
    logger.info('[{}] : [INFO] Exiting EDE framework'.format(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))


if __name__ == "__main__":
    def handler(singal_received, frame):
        logger.info('[{}] : [INFO] User break detected. Exiting EDE framework'.format(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
        sys.exit(0)
    signal(SIGINT, handler)
    SchedulerEndpoint, Scale, SchedulerPort, EnforceCheck = check_dask_settings()  # Todo Better solution
    if SchedulerEndpoint:
        if SchedulerEndpoint == "local":
            cluster = LocalCluster(n_workers=int(Scale))
            logger.info('[{}] : [INFO] Starting Dask local Cluster Backend with: {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), cluster))
            client = Client(cluster)
            logger.info('[{}] : [INFO] Dask Client started with: {}'.format(
                datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), client))
        else:
            scheduler_address = "{}:{}".format(SchedulerEndpoint, SchedulerPort)
            client = Client(address=scheduler_address)
            client.get_versions(check=EnforceCheck)
    else:
        cluster = 0
        client = 0
    main(sys.argv[1:],
         cluster,
         client)

コード例 #16
0
    def dict2csv(self, response, query, filename, df=False):
        '''
        :param response: elasticsearch response
        :param query: elasticserch query
        :param filename: name of file
        :param df: if set to true method returns dataframe and doesn't save to file.
        :return: 0 if saved to file and dataframe if not
        '''
        requiredMetrics = []
        logger.info('[%s] : [INFO] Started response to csv conversion',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
        # print "This is the query _------------_-> %s" %query
        # print "This is the response _------------_-> %s" %response
        for key, value in response['aggregations'].items():
            for k, v in value.items():
                for r in v:
                    dictMetrics = {}
                    # print "This is the dictionary ---------> %s " % str(r)
                    for rKey, rValue in r.items():
                        if rKey == 'doc_count' or rKey == 'key_as_string':
                            pass
                        elif rKey == 'key':
                            logger.debug('[%s] : [DEBUG] Request has keys %s and  values %s',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), rKey, rValue)
                            # print "%s -> %s"% (rKey, rValue)
                            dictMetrics['key'] = rValue
                        elif list(query['aggs'].values())[0].values()[1].values()[0].values()[0].values()[0] == 'type_instance.raw' \
                                or list(query['aggs'].values())[0].values()[1].values()[0].values()[0].values()[0] == 'type_instance':
                            logger.debug('[%s] : [DEBUG] Detected Memory type aggregation', datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
                            # print "This is  rValue ________________> %s" % str(rValue)
                            # print "Keys of rValue ________________> %s" % str(rValue.keys())
                            try:
                                for val in rValue['buckets']:
                                        dictMetrics[val['key']] = val['1']['value']
                            except Exception as inst:
                                logger.error('[%s] : [ERROR] Failed to find key with %s and %s',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), rKey, rValue['value'])
                                sys.exit(1)
                        else:
                            # print "Values -> %s" % rValue
                            # print "rKey -> %s" % rKey
                            # print "This is the rValue ___________> %s " % str(rValue)
                            logger.debug('[%s] : [DEBUG] Request has keys %s and flattened values %s',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), rKey, rValue['value'])
                            dictMetrics[rKey] = rValue['value']
                    requiredMetrics.append(dictMetrics)
        # print "Required Metrics -> %s" % requiredMetrics
        csvOut = os.path.join(self.dataDir, filename)
        cheaders = []
        if list(query['aggs'].values())[0].values()[1].values()[0].values()[0].values()[0] == "type_instance.raw" or \
                        list(query['aggs'].values())[0].values()[1].values()[0].values()[0].values()[0] == 'type_instance':
            logger.debug('[%s] : [DEBUG] Detected Memory type query', datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            try:
                cheaders = list(requiredMetrics[0].keys())
            except IndexError:
                logger.error('[%s] : [ERROR] Empty response detected from DMon, stoping detection, check DMon.', datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
                print("Empty response detected from DMon, stoping detection, check DMon")
                sys.exit(1)
        else:
            kvImp = {}

            for qKey, qValue in query['aggs'].items():
                logger.info('[%s] : [INFO] Value aggs from query %s',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), qValue['aggs'])
                for v, t in qValue['aggs'].items():
                    kvImp[v] = t['avg']['field']
                    cheaders.append(v)

            cheaders.append('key')
            for key, value in kvImp.items():
                cheaders[cheaders.index(key)] = value
            for e in requiredMetrics:
                for krep, vrep in kvImp.items():
                    e[vrep] = e.pop(krep)
            logger.info('[%s] : [INFO] Dict translator %s',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), str(kvImp))
        logger.info('[%s] : [INFO] Headers detected %s',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), str(cheaders))
        if not df:
            try:
                with open(csvOut, 'wb') as csvfile:
                    w = csv.DictWriter(csvfile, cheaders)
                    w.writeheader()
                    for metrics in requiredMetrics:
                        if set(cheaders) != set(metrics.keys()):
                            logger.error('[%s] : [ERROR] Headers different from required metrics: headers -> %s, metrics ->%s',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), str(cheaders),
                                         str(list(metrics.keys())))
                            diff = list(set(metrics.keys()) - set(cheaders))
                            print("Headers different from required metrics with %s " % diff)
                            print("Check qInterval setting for all metrics. Try increasing it!")
                            sys.exit(1)
                        w.writerow(metrics)
                csvfile.close()
            except EnvironmentError:
                logger.error('[%s] : [ERROR] File %s could not be created', datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), csvOut)
                sys.exit(1)
            logger.info('[%s] : [INFO] Finished csv %s',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), filename)
            return 0
        else:
            df = pd.DataFrame(requiredMetrics)
            # df.set_index('key', inplace=True)
            logger.info('[%s] : [INFO] Created dataframe',
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            return df
コード例 #17
0
    def computeOnColumns(self, df,
                         operations,
                         remove_filtered=True):
        if operations:
            if 'STD' in list(operations.keys()):
                std = operations['STD']
            else:
                std = None

            if 'Mean' in list(operations.keys()):
                mean = operations['Mean']
            else:
                mean = None

            if 'Median' in list(operations.keys()):
                median = operations['Median']
            else:
                median = None
            all_processed_columns = []
            if std or std is not None:
                for cl_std in std:
                    for ncol_n, fcol_n in cl_std.items():
                        df_std = self.filterColumns(df, lColumns=fcol_n)
                        logger.info('[{}] : [INFO] Computing standard deviation {} on columns {}'.format(
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), ncol_n, fcol_n))
                        std_df = df_std.std(axis=1, skipna=True)
                        df[ncol_n] = std_df
                        for c in fcol_n:
                            all_processed_columns.append(c)
            if mean or mean is not None:
                for cl_mean in mean:
                    for ncol_n, fcol_n in cl_mean.items():
                        df_mean = self.filterColumns(df, lColumns=fcol_n)
                        logger.info('[{}] : [INFO] Computing mean {} on columns {}'.format(
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), ncol_n, fcol_n))
                        mean_df = df_mean.mean(axis=1, skipna=True)
                        df[ncol_n] = mean_df
                        for c in fcol_n:
                            all_processed_columns.append(c)
            if median or median is not None:
                for cl_median in median:
                    for ncol_n, fcol_n in cl_median.items():
                        df_median = self.filterColumns(df, lColumns=fcol_n)
                        logger.info('[{}] : [INFO] Computing median {} on columns {}'.format(
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), ncol_n, fcol_n))
                        median_df = df_median.median(axis=1, skipna=True)
                        df[ncol_n] = median_df
                        for c in fcol_n:
                            all_processed_columns.append(c)
            if "Method" in list(operations.keys()):
                df = self.__operationMethod(operations['Method'], data=df)
            if remove_filtered:
                unique_all_processed_columns = list(set(all_processed_columns))
                logger.warning('[{}] : [WARN] Droping columns used for computation ...'.format(
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), unique_all_processed_columns))
                self.dropColumns(df, unique_all_processed_columns, cp=False)
        else:
            logger.info('[{}] : [INFO] No data operations/augmentations defined'.format(
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
        logger.info('[{}] : [INFO] Augmented data shape {}'.format(
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), df.shape))
        return df
コード例 #18
0
 def dropMissing(self, df):
     logger.info('[{}] : [WARN] Dropping columns with in missing values'.format(
         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
     df.dropna(axis=1, how='all', inplace=True)
コード例 #19
0
    def chainMergeSystem(self, linterface=None, lload=None, lmemory=None, lpack=None):
        logger.info('[%s] : [INFO] Startig system metrics merge .......',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
        # Read files

        if linterface is None and lload is None and lmemory is None and lpack is None:
            allIterface = glob.glob(os.path.join(self.dataDir, "Interface_*.csv"))
            allLoad = glob.glob(os.path.join(self.dataDir, "Load_*.csv"))
            allMemory = glob.glob(os.path.join(self.dataDir, "Memory_*.csv"))
            allPackets = glob.glob(os.path.join(self.dataDir, "Packets_*.csv"))

            # Name of merged files
            mergedInterface = os.path.join(self.dataDir, "Interface.csv")
            mergedLoad = os.path.join(self.dataDir, "Load.csv")
            mergedMemory = os.path.join(self.dataDir, "Memory.csv")
            mergedPacket = os.path.join(self.dataDir, "Packets.csv")
            ftd = 1
        else:
            allIterface = linterface
            allLoad = lload
            allMemory = lmemory
            allPackets = lpack
            ftd = 0

        colNamesInterface = {'rx': 'rx_master', 'tx': 'tx_master'}
        df_interface = self.chainMerge(allIterface, colNamesInterface)

        logger.info('[%s] : [INFO] Interface metrics merge complete',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))

        colNamesPacket = {'rx': 'rx_master', 'tx': 'tx_master'}
        df_packet = self.chainMerge(allPackets, colNamesPacket)

        logger.info('[%s] : [INFO] Packet metrics merge complete',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))

        colNamesLoad = {'shortterm': 'shortterm_master', 'midterm': 'midterm_master', 'longterm': 'longterm_master'}
        df_load = self.chainMerge(allLoad, colNamesLoad)

        logger.info('[%s] : [INFO] Load metrics merge complete',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))

        colNamesMemory = {'cached': 'cached_master', 'buffered': 'buffered_master',
                          'used': 'used_master', 'free': 'free_master'}
        df_memory = self.chainMerge(allMemory, colNamesMemory)
        logger.info('[%s] : [INFO] Memory metrics merge complete',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))

        logger.info('[%s] : [INFO] Sistem metrics merge complete',
                                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
        if ftd:
            self.df2csv(df_interface, mergedInterface)
            self.df2csv(df_packet, mergedPacket)
            self.df2csv(df_load, mergedLoad)
            self.df2csv(df_memory, mergedMemory)
            return 0
        else:
            return df_interface, df_load, df_memory, df_packet
コード例 #20
0
 def handler(singal_received, frame):
     logger.info('[{}] : [INFO] User break detected. Exiting EDE framework'.format(
     datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
     sys.exit(0)
コード例 #21
0
    def detect(self, method,
               model,
               data):
        '''
        :param method: -> method name
        :param model: -> trained clusterer
        :param data: -> dataframe with data
        :return: -> dictionary that contains the list of anomalous timestamps
        '''
        smodel = self.__loadClusterModel(method, model)
        anomalieslist = []
        if not smodel:
            dpredict = 0
        else:
            if data.shape[0]:
                if isinstance(smodel, IsolationForest):
                    logger.info('[{}] : [INFO] Loading predictive model IsolationForest ').format(
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
                    for k, v in smodel.get_params().items():
                        logger.info('[{}] : [INFO] Predict model parameter {} set to {}'.format(
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), k, v))
                    # print("Contamination -> %s" % smodel.contamination)
                    # print("Max_Features -> %s" % smodel.max_features)
                    # print("Max_Samples -> %s" % smodel.max_samples_)
                    # print("Threashold -> %s " % smodel.threshold_)
                    try:
                        dpredict = smodel.predict(data)
                        logger.debug('[{}] : [DEBUG] IsolationForest prediction array: {}').format(
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), str(dpredict))
                    except Exception as inst:
                        logger.error('[%s] : [ERROR] Error while fitting isolationforest model to event with %s and %s',
                             datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args)
                        dpredict = 0

                elif isinstance(smodel, DBSCAN):
                    logger.info('[{}] : [INFO] Loading predictive model DBSCAN ').format(
                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
                    for k, v in smodel.get_params().items():
                        logger.info('[{}] : [INFO] Predict model parameter {} set to {}'.format(
                            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), k, v))
                    # print("Leaf_zise -> %s" % smodel.leaf_size)
                    # print("Algorithm -> %s" % smodel.algorithm)
                    # print("EPS -> %s" % smodel.eps)
                    # print("Min_Samples -> %s" % smodel.min_samples)
                    # print("N_jobs -> %s" % smodel.n_jobs)
                    try:
                        dpredict = smodel.fit_predict(data)
                    except Exception as inst:
                        logger.error('[%s] : [ERROR] Error while fitting sDBSCAN model to event with %s and %s',
                                     datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst),
                                     inst.args)
                        dpredict = 0
            else:
                dpredict = 0
                logger.warning('[%s] : [WARN] Dataframe empty with shape (%s,%s)',
                             datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), str(data.shape[0]),
                             str(data.shape[1]))
                print("Empty dataframe received with shape (%s,%s)" % (str(data.shape[0]),
                             str(data.shape[1])))
            print("dpredict type is %s" % (type(dpredict)))
        if type(dpredict) is not int:
            anomalyarray = np.argwhere(dpredict == -1)
            for an in anomalyarray:
                anomalies = {}
                anomalies['utc'] = int(data.iloc[an[0]].name)
                anomalies['hutc'] = ut2hum(int(data.iloc[an[0]].name))
                anomalieslist.append(anomalies)
        anomaliesDict = {}
        anomaliesDict['anomalies'] = anomalieslist
        logger.info('[%s] : [INFO] Detected anomalies with model %s using method %s are -> %s',
                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), model, method, str(anomaliesDict))
        return anomaliesDict
コード例 #22
0
ファイル: multiprocRun.py プロジェクト: DIPET-UVT/EDE-Dipet
 def run(self):
     logger.info('[{}] : [INFO] Starting engine Detect process  {}'.format(
         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
         self.processID))
     p = multiprocessing.Process(target=self.engine.detectAnomalies)
     return p
コード例 #23
0
 def __init__(self,
              prEndpoint=None,
              esEndpoint=None,
              dmonPort=5001,
              MInstancePort=9200,
              index="logstash-*",
              prKafkaEndpoint=None,
              prKafkaPort=9092,
              prKafkaTopic='edetopic'):
     self.dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                 'data')
     if esEndpoint is None:
         self.esInstance = None
     else:
         self.esInstance = Elasticsearch(esEndpoint)
         self.esEndpoint = esEndpoint
         self.dmonPort = dmonPort
         self.esInstanceEndpoint = MInstancePort
         self.myIndex = index
         logger.info(
             '[{}] : [INFO] EDE ES backend Defined at: {} with port {}'.
             format(
                 datetime.fromtimestamp(
                     time.time()).strftime('%Y-%m-%d %H:%M:%S'), esEndpoint,
                 MInstancePort))
     if prEndpoint is None:
         pass
     else:
         self.prEndpoint = prEndpoint
         self.MInstancePort = MInstancePort
         logger.info(
             '[{}] : [INFO] EDE PR backend Defined at: {} with port {}'.
             format(
                 datetime.fromtimestamp(
                     time.time()).strftime('%Y-%m-%d %H:%M:%S'), prEndpoint,
                 MInstancePort))
         self.dataDir = os.path.join(
             os.path.dirname(os.path.abspath(__file__)), 'data')
     if prKafkaEndpoint is None:
         self.producer = None
         logger.warning('[{}] : [WARN] EDE Kafka reporter not set'.format(
             datetime.fromtimestamp(
                 time.time()).strftime('%Y-%m-%d %H:%M:%S')))
     else:
         self.prKafkaTopic = prKafkaTopic
         try:
             self.producer = KafkaProducer(
                 value_serializer=lambda v: json.dumps(v).encode('utf-8'),
                 bootstrap_servers=[
                     "{}:{}".format(prKafkaEndpoint, prKafkaPort)
                 ],
                 retries=5)
             logger.info(
                 '[{}] : [INFO] EDE Kafka reporter initialized to server {}:{}'
                 .format(
                     datetime.fromtimestamp(
                         time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                     prKafkaEndpoint, prKafkaPort))
         except Exception as inst:
             logger.error(
                 '[{}] : [ERROR] EDE Kafka reporter failed with {} and {}'.
                 format(
                     datetime.fromtimestamp(
                         time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                     type(inst), inst.args))
             self.producer = None
コード例 #24
0
    def dask_detect(
            self,
            method,
            model,
            data,
            anomaly_label=-1  # Todo make anomaly label user definable
    ):
        smodel = self.__loadClusterModel(method, model)
        anomaliesList = []
        anomaliesDict = {}
        shap_values_p = 0
        if not smodel:
            dpredict = 0
        else:
            if data.shape[0]:
                try:
                    logger.info(
                        '[{}] : [INFO] Loading predictive model {} '.format(
                            datetime.fromtimestamp(
                                time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                            str(smodel).split('(')[0]))
                    for k, v in smodel.get_params().items():
                        logger.info(
                            '[{}] : [INFO] Predict model parameter {} set to {}'
                            .format(
                                datetime.fromtimestamp(
                                    time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                                k, v))
                        dpredict = smodel.predict(data)
                except Exception as inst:
                    logger.error(
                        '[{}] : [ERROR] Failed to load predictive model with {} and {}'
                        .format(
                            datetime.fromtimestamp(
                                time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                            type(inst), inst.args))
                    dpredict = 0
            else:
                dpredict = 0
                logger.warning(
                    '[{}] : [WARN] DataFrame is empty with shape {} '.format(
                        datetime.fromtimestamp(
                            time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                        str(data.shape)))
        if list(np.unique(dpredict)) == [0, 1] or isinstance(
                smodel, pyod.models.iforest.IForest):
            anomaly_label = 1
        else:
            anomaly_label = -1

        if type(dpredict) is not int:
            anomalyArray = np.argwhere(dpredict == anomaly_label)
            if self.pred_analysis and anomalyArray.shape[0]:
                try:
                    plot = self.pred_analysis['Plot']
                    # print(self.pred_analysis['Plot'])
                except Exception:
                    plot = False
                feature_importance, shap_values = self.__shap_analysis(
                    model=smodel, data=data, plot=plot)
                anomaliesDict['complete_shap_analysis'] = feature_importance
                shap_values_p = 1
            count = 0
            for an in anomalyArray:
                anomalies = {}
                anomalies['utc'] = int(data.iloc[an[0]].name)
                anomalies['hutc'] = ut2hum(int(data.iloc[an[0]].name))
                if shap_values_p:
                    anomalies['analysis'] = self.__shap_force_layout(
                        shap_values=shap_values, instance=count)
                anomaliesList.append(anomalies)
                count += 1

        anomaliesDict['anomalies'] = anomaliesList
        logger.info(
            '[{}] : [INFO] Detected {} anomalies with model {} using method {} '
            .format(
                datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                len(anomaliesList), model,
                str(smodel).split('(')[0]))
        return anomaliesDict
コード例 #25
0
    def dask_clusterMethod(self, cluster_method, mname, data):
        try:
            logger.info('[{}] : [INFO] Loading Clustering method {}'.format(
                datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                type(cluster_method)))
            # delattr(cluster_method, 'behaviour')
            # del cluster_method.__dict__['behaviour']
            for k, v in cluster_method.get_params().items():
                logger.info(
                    '[{}] : [INFO] Method parameter {} set to {}'.format(
                        datetime.fromtimestamp(
                            time.time()).strftime('%Y-%m-%d %H:%M:%S'), k, v))
            try:
                with joblib.parallel_backend('dask'):
                    logger.info(
                        '[{}] : [INFO] Using Dask backend for user defined method'
                        .format(
                            datetime.fromtimestamp(
                                time.time()).strftime('%Y-%m-%d %H:%M:%S')))
                    clf = cluster_method.fit(data)
            except Exception as inst:
                logger.error(
                    '[{}] : [ERROR] Failed to fit user defined method with dask backend with {} and {}'
                    .format(
                        datetime.fromtimestamp(
                            time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                        type(inst), inst.args))
                logger.warning(
                    '[{}] : [WARN] using default process based backend for user defined method'
                    .format(
                        datetime.fromtimestamp(
                            time.time()).strftime('%Y-%m-%d %H:%M:%S')))
                clf = cluster_method.fit(data)
        except Exception as inst:
            logger.error(
                '[{}] : [ERROR] Failed to fit {} with {} and {}'.format(
                    datetime.fromtimestamp(
                        time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                    type(cluster_method), type(inst), inst.args))
            sys.exit(1)
        predictions = clf.predict(data)
        if list(np.unique(predictions)) == [0, 1]:
            anomaly_marker = 1
            normal_marker = 0
        else:
            anomaly_marker = -1
            normal_marker = 1
        logger.info(
            '[{}] : [INFO] Number of Predicted Anomalies {} from a total of {} datapoints.'
            .format(
                datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                list(predictions).count(anomaly_marker),
                len(list(predictions))))
        logger.debug('[{}] : [DEBUG] Predicted Anomaly Array {}'.format(
            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
            predictions))
        fname = str(clf).split('(')[0]
        self.__serializemodel(clf, fname, mname)
        self.__plot_feature_sep(data,
                                predictions,
                                method=fname,
                                mname=mname,
                                anomaly_label=anomaly_marker,
                                normal_label=normal_marker)
        self.__decision_boundary(clf,
                                 data,
                                 method=fname,
                                 mname=mname,
                                 anomaly_label=anomaly_marker)

        return clf
コード例 #26
0
ファイル: multiprocRun.py プロジェクト: DIPET-UVT/EDE-Dipet
def test(times, processID):
    logger.info('[{}] : [INFO] Starting Engine Point process {}'.format(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), processID))
    time.sleep(times)
    logger.info('[{}] : [INFO] Exit Engine Point process {}'.format(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), processID))
コード例 #27
0
ファイル: threadRun.py プロジェクト: DIPET-UVT/EDE-Dipet
 def run(self):
     logger.info('[{}] : [INFO] Starting Engine Train thread {}'.format(
         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
         self.threadID))
     self.engine.trainMethod()
コード例 #28
0
 def fillMissing(self, df):
     logger.info('[{}] : [WARN] Filling in missing values with 0'.format(
         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
     df.fillna(0, inplace=True)
コード例 #29
0
ファイル: threadRun.py プロジェクト: DIPET-UVT/EDE-Dipet
 def run(self):
     logger.info('[{}] : [INFO] Starting Engine Detect thread {}'.format(
         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
         self.threadID))
     self.engine.detectAnomalies()
コード例 #30
0
    def __decision_boundary(
        self,
        model,
        data,
        method,
        mname,
        anomaly_label=-1,
    ):
        """
        :param model: model to be refitted with 2 features (PCA)
        :param data: dataset after PCA
        :param method: method used for plotting decision boundary
        :param mname: name of the model to be displayed
        :param anomaly_label: label for anomaly instances (differs from method to method)
        """
        logger.info(
            '[{}] : [INFO] Computing PCA with 2 components for decision boundary ...'
            .format(
                datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d %H:%M:%S')))
        transformer = PCA(n_components=2)
        transformer.fit(data)
        data = transformer.transform(data)
        # print("PCA data shape: {}".format(data.shape))
        # fit model
        try:
            model.set_params(
                max_features=data.shape[-1]
            )  # becouse we have only two features we must override previous setting
        except ValueError:
            logger.debug(
                '[{}] : [Debug] Model not effected by max feature parameter, setting encoding and decoding size'
                .format(
                    datetime.fromtimestamp(
                        time.time()).strftime('%Y-%m-%d %H:%M:%S')))
            model.set_params(encoder_neurons=[2, 64, 32],
                             decoder_neurons=[32, 64, 2])

        model.fit(data)
        y_pred_outliers = model.predict(data)

        # get anomaly index
        anomaly_index_rf = np.where(y_pred_outliers == anomaly_label)

        # Get anomalies based on index
        ano_rf = data[anomaly_index_rf]
        # plot the line, the samples, and the nearest vectors to the plane
        xx, yy = np.meshgrid(np.linspace(-15, 25, 80), np.linspace(-5, 20, 80))
        Z = model.decision_function(np.c_[xx.ravel(), yy.ravel()])
        Z = Z.reshape(xx.shape)
        plt.title(f"Decision Boundary for {method} with name {mname}")
        plt.contourf(xx, yy, Z, cmap=plt.cm.Blues_r)
        plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='black')
        b1 = plt.scatter(data[:, 0],
                         data[:, 1],
                         c='white',
                         s=20,
                         edgecolor='k')
        c = plt.scatter(ano_rf[:, 0],
                        ano_rf[:, 1],
                        c='red',
                        s=20,
                        edgecolor='k')
        plt.axis('tight')
        plt.xlim((-15, 25))
        plt.ylim((-5, 20))
        plt.legend([b1, c], [
            "normal",
            "anomaly",
        ], loc="upper left")
        plot_name = f"Decision_Boundary_{method}_{mname}.png"
        plt.savefig(os.path.join(self.modelDir, plot_name))
        plt.close()