Ejemplo n.º 1
0
    def __init__(self):

        ###############################################################
        #
        # Sets up all default requirements and placeholders
        # needed for the NLU engine to run.
        #
        # - Helpers: Useful global functions
        # - JumpWay/jumpWayClient: iotJumpWay class and connection
        # - Logging: Logging class
        #
        ###############################################################

        self.Helpers = Helpers()
        self._confs = self.Helpers.loadConfigs()

        self.JumpWay = JumpWay()

        self.MySql = MySql()
        self.MySql.setMysqlCursorRows()

        self.Logging = Logging()
        self.LogFile = self.Logging.setLogFile(self._confs["aiCore"]["Logs"] +
                                               "Client/")
    def _scale(self, train_df):
        ''' centers the data around 0 and scales it 
        :param: train_df: Dataframe that contains the training data
        :return: dataframe with additional column scaled_FEATURE_X containing scaled features
        :return: trained_scalers: dictionary - Per feature stores scaler object that is needed in the testing 
                 phase to perform identical scaling, with key as column name
        '''

        Logging().log("Scaling Features...")
        trained_scalers = {}
        for col in train_df.columns:

            # 1. consider only relevant columns
            if not col.startswith("FEATURE"):
                continue

            # 2. standard scaler
            scaler = preprocessing.StandardScaler()
            scaler = scaler.fit(train_df[col])

            train_df['scaled_' + col] = scaler.transform(train_df[col])
            trained_scalers[col] = copy.deepcopy(scaler)

        return train_df, trained_scalers
Ejemplo n.º 3
0
class gHumans():
    def __init__(self):

        ###############################################################
        #
        # Sets up all default requirements and placeholders
        # needed for the NLU engine to run.
        #
        # - Helpers: Useful global functions
        # - JumpWay/jumpWayClient: iotJumpWay class and connection
        # - Logging: Logging class
        #
        ###############################################################

        self.Helpers = Helpers()
        self._confs = self.Helpers.loadConfigs()

        self.JumpWay = JumpWay()

        self.MySql = MySql()
        self.MySql.setMysqlCursorRows()

        self.Logging = Logging()
        self.LogFile = self.Logging.setLogFile(self._confs["aiCore"]["Logs"] +
                                               "Client/")

    def getHumanByFace(self, response, entities=None):

        ###############################################################
        #
        # Checks to see who was seen in the system camera within the
        # last few seconds
        #
        ###############################################################

        results = None
        resultsLength = None

        try:
            self.MySql.mysqlDbCur.execute(
                "SELECT users.id, users.name, users.zone FROM a7fh46_users_logs logs INNER JOIN a7fh46_users users ON logs.uid = users.id  WHERE logs.timeSeen > (NOW() - INTERVAL 10 SECOND) "
            )

            results = self.MySql.mysqlDbCur.fetchall()
            resultsLength = len(results)

            if resultsLength > 0:
                if resultsLength == 1:
                    message = "I detected " + str(
                        responseLength) + " human, #" + str(
                            results[0]["id"]) + " " + results[0]["name"]
                else:
                    message = "I detected " + str(responseLength) + " humans"
            else:
                message = "I didn't detect any humans in the system camera feed, please stand in front of the camera"

            return message

        except Exception as errorz:
            print('FAILED1')
            print(errorz)
            return results

    def getCurrentHuman(self, responses, entities=None):

        ###############################################################
        #
        # Checks to see who was seen in the system camera within the
        # last few seconds
        #
        ###############################################################

        results = None
        resultsLength = None

        try:
            self.MySql.mysqlDbCur.execute(
                "SELECT users.id, users.name, users.zone FROM a7fh46_user_current currentH INNER JOIN a7fh46_users users ON currentH.uid = users.id  WHERE currentH.timeSeen > (NOW() - INTERVAL 1 MINUTE) ORDER BY id DESC LIMIT 1"
            )

            results = self.MySql.mysqlDbCur.fetchone()

            if results != None:
                return random.choice(responses).replace(
                    "%%HUMAN%%", results["name"])
            else:
                return "Sorry I could not identify you, this system will now self destruct! You have 5 seconds..."

        except Exception as errorz:
            print('FAILED1')
            print(errorz)
            return results

    def updateHuman(self, responses, entities):

        results = None

        try:
            self.MySql.mysqlDbCur.execute(
                "SELECT id, name FROM a7fh46_users users WHERE name = '%s'" %
                (entities[0]))
            results = self.MySql.mysqlDbCur.fetchone()

            if results != None:
                timeSeen = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
                self.MySql.mysqlDbCur.execute(
                    """
                    INSERT INTO a7fh46_user_current 
                        (uid, lid, fid, zid, did, timeSeen)
                    VALUES 
                        (%s, %s, %s, %s, %s, %s); """,
                    (results["id"], self._confs["iotJumpWay"]["Location"], 0,
                     self._confs["iotJumpWay"]["Zone"],
                     self._confs["iotJumpWay"]["Device"], timeSeen[:-3]))
                self.MySql.mysqlDbConn.commit()
                return random.choice(responses).replace(
                    "%%HUMAN%%", results["name"])

            else:
                return "Sorry I could not identify you, this system will now self destruct! You have 5 seconds..."

        except Exception as errorz:
            print('FAILED2')
            print(errorz)
            return results
Ejemplo n.º 4
0
class NLU():
    def __init__(self):

        self.Helpers = Helpers()
        self.Logging = Logging()
        self._confs = self.Helpers.loadConfigs()
        self.LogFile = self.Logging.setLogFile(self._confs["AI"]["Logs"] +
                                               "NLU/")
        self.ChatLogFile = self.Logging.setLogFile(self._confs["AI"]["Logs"] +
                                                   "Chat/")

        self.Logging.logMessage(self.LogFile, "NLU", "INFO",
                                "NLU Classifier LogFile Set")

        self.startMQTT()

    def commandsCallback(self, topic, payload):

        self.Logging.logMessage(
            self.LogFile, "iotJumpWay", "INFO",
            "Recieved iotJumpWay Command Data : " + str(payload))

        commandData = json.loads(payload.decode("utf-8"))

    def startMQTT(self):

        try:
            self.jumpwayClient = jumpWayDevice.DeviceConnection({
                "locationID":
                self._confs["iotJumpWay"]["Location"],
                "zoneID":
                self._confs["iotJumpWay"]["Zone"],
                "deviceId":
                self._confs["iotJumpWay"]["Device"],
                "deviceName":
                self._confs["iotJumpWay"]["DeviceName"],
                "username":
                self._confs["iotJumpWay"]["MQTT"]["Username"],
                "password":
                self._confs["iotJumpWay"]["MQTT"]["Password"]
            })

            self.jumpwayClient.connectToDevice()
            self.jumpwayClient.subscribeToDeviceChannel("Commands")
            self.jumpwayClient.deviceCommandsCallback = self.commandsCallback

            self.Logging.logMessage(self.LogFile, "iotJumpWay", "INFO",
                                    "iotJumpWay Client Ready")

        except Exception as e:

            self.Logging.logMessage(self.LogFile, "iotJumpWay", "INFO",
                                    "iotJumpWay Client Initiation Failed")

            print(str(e))
            sys.exit()

    def setup(self):

        self.Logging.logMessage(self.LogFile, "NLU", "INFO",
                                "NLU Classifier Initiating")

        self.Data = Data(self.Logging, self.LogFile)
        self.Model = Model()
        self.Context = Context()

        self.user = {}
        self.ner = None
        self.trainingData = self.Data.loadTrainingData()
        self.trainedData = self.Data.loadTrainedData()

        self.trainedWords = self.trainedData["words"]
        self.trainedClasses = self.trainedData["classes"]
        self.x = self.trainedData["x"]
        self.y = self.trainedData["y"]
        self.intentMap = self.trainedData["iMap"][0]

        self.restoreEntitiesModel()
        self.restoreModel()

        self.Logging.logMessage(self.LogFile, "NLU", "INFO", "NLU Ready")

    def restoreEntitiesModel(self):

        if os.path.exists(self._confs["ClassifierSettings"]["EntitiesDat"]):
            self.ner = named_entity_extractor(
                self._confs["ClassifierSettings"]["EntitiesDat"])

            self.Logging.logMessage(self.LogFile, "NER", "OK",
                                    "Restored NLU NER Model")

    def restoreModel(self):

        self.tmodel = self.Model.buildDNN(self.x, self.y)

        self.Logging.logMessage(self.LogFile, "NLU", "INFO",
                                "Restored NLU Model")

    def setupEntities(self):

        if self._confs["ClassifierSettings"]["Entities"] == "Mitie":
            self.entityExtractor = Entities()

        self.Logging.logMessage(self.LogFile, "NER", "INFO",
                                "NLU Entity Extractor Initiated")

    def initiateSession(self, userID):

        self.userID = userID
        if not self.userID in self.user:
            self.user[self.userID] = {}
            self.user[self.userID]["history"] = {}

        self.Logging.logMessage(self.LogFile, "Session", "INFO",
                                "NLU Session Ready For User #" + self.userID)

    def setThresholds(self, threshold):

        self.threshold = float(threshold)
        self.entityThrshld = self._confs["ClassifierSettings"]["Mitie"][
            "Threshold"]

    def predict(self, parsedSentence):

        predictions = [[index, confidence] for index, confidence in enumerate(
            self.tmodel.predict([
                self.Data.makeInferenceBag(parsedSentence, self.trainedWords)
            ])[0]) if confidence > self.threshold]
        predictions.sort(key=lambda x: x[1], reverse=True)

        classification = []
        for prediction in predictions:
            classification.append(
                (self.trainedClasses[prediction[0]], prediction[1]))

        return classification

    def talk(self, sentence, debug=False):

        self.Logging.logMessage(self.LogFile, "GeniSys", "STATUS",
                                "Processing")

        parsed, fallback, entityHolder, parsedSentence = self.entityExtractor.parseEntities(
            sentence, self.ner, self.trainingData)

        classification = self.predict(parsedSentence)

        if len(classification) > 0:

            clearEntities = False
            theIntent = self.trainingData["intents"][self.intentMap[
                classification[0][0]]]

            if len(entityHolder) and not len(theIntent["entities"]):
                clearEntities = True

            if (self.Context.checkSessionContext(self.user[self.userID],
                                                 theIntent)):

                if self.Context.checkClearContext(theIntent, 0):
                    self.user[self.userID]["context"] = ""

                contextIn, contextOut, contextCurrent = self.Context.setContexts(
                    theIntent, self.user[self.userID])

                if fallback and "fallbacks" in theIntent and len(
                        theIntent["fallbacks"]):
                    response = self.entityExtractor.replaceResponseEntities(
                        random.choice(theIntent["fallbacks"]), entityHolder)
                    action, actionResponses = self.Helpers.setAction(theIntent)

                elif "entityType" in theIntent and theIntent[
                        "entityType"] == "Numbers":
                    response = random.choice(theIntent["responses"])
                    action, actionResponses = self.Helpers.setAction(theIntent)

                elif not len(entityHolder) and len(theIntent["entities"]):
                    response = self.entityExtractor.replaceResponseEntities(
                        random.choice(theIntent["fallbacks"]), entityHolder)
                    action, actionResponses = self.Helpers.setAction(theIntent)

                elif clearEntities:
                    entityHolder = []
                    response = random.choice(theIntent["responses"])
                    action, actionResponses = self.Helpers.setAction(theIntent)

                else:
                    response = self.entityExtractor.replaceResponseEntities(
                        random.choice(theIntent["responses"]), entityHolder)
                    action, actionResponses = self.Helpers.setAction(theIntent)

                if action != None:

                    classParts = action.split(".")
                    classFolder = classParts[0]
                    className = classParts[1]

                    module = __import__(classParts[0] + "." + classParts[1],
                                        globals(), locals(), [className])
                    actionClass = getattr(module, className)()
                    response = getattr(actionClass, classParts[2])(
                        random.choice(actionResponses))

                return {
                    "Response":
                    "OK",
                    "ResponseData": [{
                        "Received": sentence,
                        "Intent": classification[0][0],
                        "Confidence": str(classification[0][1]),
                        "Response": response,
                        "ContextIn": contextIn,
                        "ContextOut": contextOut,
                        "Context": contextCurrent,
                        "Action": action,
                        "Entities": entityHolder
                    }]
                }

            else:

                self.user[self.userID]["context"] = ""
                contextIn, contextOut, contextCurrent = self.Context.setContexts(
                    theIntent, self.user[self.userID])

                if fallback and fallback in theIntent and len(
                        theIntent["fallbacks"]):
                    response = self.entityExtractor.replaceResponseEntities(
                        random.choice(theIntent["fallbacks"]), entityHolder)
                    action, actionResponses = None, []

                else:
                    response = self.entityExtractor.replaceResponseEntities(
                        random.choice(theIntent["responses"]), entityHolder)
                    action, actionResponses = self.Helpers.setAction(theIntent)

                if action != None:

                    classParts = action.split(".")
                    classFolder = classParts[0]
                    className = classParts[1]

                    module = __import__(classParts[0] + "." + classParts[1],
                                        globals(), locals(), [className])
                    actionClass = getattr(module, className)()
                    response = getattr(actionClass, classParts[2])(
                        random.choice(actionResponses))

                else:
                    response = self.entityExtractor.replaceResponseEntities(
                        random.choice(theIntent["responses"]), entityHolder)

                return {
                    "Response":
                    "OK",
                    "ResponseData": [{
                        "Received": sentence,
                        "Intent": classification[0][0],
                        "Confidence": str(classification[0][1]),
                        "Response": response,
                        "ContextIn": contextIn,
                        "ContextOut": contextOut,
                        "ContextCurrent": contextCurrent,
                        "Action": action,
                        "Entities": entityHolder
                    }]
                }

        else:

            contextCurrent = self.Context.getCurrentContext(
                self.user[self.userID])

            return {
                "Response":
                "FAILED",
                "ResponseData": [{
                    "Received":
                    sentence,
                    "Intent":
                    "UNKNOWN",
                    "Confidence":
                    "NA",
                    "Responses": [],
                    "Response":
                    random.choice(
                        self._confs["ClassifierSettings"]["defaultResponses"]),
                    "ContextIn":
                    "NA",
                    "ContextOut":
                    "NA",
                    "ContextCurrent":
                    contextCurrent,
                    "Action":
                    "NA",
                    "Entities":
                    entityHolder
                }]
            }
Ejemplo n.º 5
0
class Trainer():
    def __init__(self, jumpWay):

        self.Helpers = Helpers()
        self.Logging = Logging()
        self.jumpwayCl = jumpWay

        self._confs = self.Helpers.loadConfigs()
        self.LogFile = self.Logging.setLogFile(self._confs["AI"]["Logs"] +
                                               "Train/")

        self.Logging.logMessage(self.LogFile, "LogFile", "INFO",
                                "NLU Trainer LogFile Set")

        self.Model = Model()
        self.Data = Data(self.Logging, self.LogFile)
        self.intentMap = {}
        self.words = []
        self.classes = []
        self.dataCorpus = []

        self.setupData()
        self.setupEntities()

    def setupData(self):

        self.trainingData = self.Data.loadTrainingData()

        self.Logging.logMessage(self.LogFile, "Trainer", "INFO",
                                "Loaded NLU Training Data")

        self.words, self.classes, self.dataCorpus, self.intentMap = self.Data.prepareData(
            self.trainingData)
        self.x, self.y = self.Data.finaliseData(self.classes, self.dataCorpus,
                                                self.words)

        self.Logging.logMessage(self.LogFile, "TRAIN", "INFO",
                                "NLU Trainer Data Ready")

    def setupEntities(self):

        if self._confs["ClassifierSettings"]["Entities"] == "Mitie":

            self.entityExtractor = Entities()

            self.Logging.logMessage(self.LogFile, "TRAIN", "OK",
                                    "NLU Trainer Entity Extractor Ready")

            self.entityExtractor.trainEntities(
                self._confs["ClassifierSettings"]["Mitie"]["ModelLocation"],
                self.trainingData)

    def trainModel(self):

        while True:

            self.Logging.logMessage(self.LogFile, "TRAIN", "ACTION",
                                    "Ready To Begin Training ? (Yes/No)")

            userInput = input(">")

            if userInput == 'Yes': break
            if userInput == 'No': exit()

        humanStart, trainingStart = self.Helpers.timerStart()

        self.Logging.logMessage(self.LogFile, "TRAIN", "INFO",
                                "NLU Model Training At " + humanStart)

        self.jumpwayCl.publishToDeviceChannel(
            "Training", {
                "NeuralNet": "NLU",
                "Start": trainingStart,
                "End": "In Progress",
                "Total": "In Progress",
                "Message": "NLU Model Training At " + humanStart
            })
        self.Model.trainDNN(self.x, self.y, self.words, self.classes,
                            self.intentMap)

        trainingEnd, trainingTime, humanEnd = self.Helpers.timerEnd(
            trainingStart)

        self.Logging.logMessage(
            self.LogFile, "TRAIN", "OK", "NLU Model Trained At " + humanEnd +
            " In " + str(trainingEnd) + " Seconds")

        self.jumpwayCl.publishToDeviceChannel(
            "Training", {
                "NeuralNet":
                "NLU",
                "Start":
                trainingStart,
                "End":
                trainingEnd,
                "Total":
                trainingTime,
                "Message":
                "NLU Model Trained At " + humanEnd + " In " +
                str(trainingEnd) + " Seconds"
            })
    def _outlier_removal(self, train_df, remove_empty, nr_iterations,
                         split_windows, std_threshold):
        ''' outliers are removed from the training dataframe per feature by windowing and removing
            all values per window that are further away than std_threshold times the standard 
            deviation
        :param: train_df: Dataframe that contains the training data
        :param: remove_empty: Boolean - if true empty features are removed
        :param: nr_iterations: Number of iterations that are repeated to remove outliers per window
        :param: split_windows: Data is split into split_windows equal length window that are between minimal risk and 1
        :param: std_threshold: data that is further away than std_threshold * std of the feature is removed
        :return: output_dfs: list of dataframes with each having a column scaled_FEATURE_X that is outlierfree now and a column risk which is the risk 
                             for that feature at its row
        '''
        if not self._remove_outliers:
            print("Outlier removal disabled!")
        # 1. Initialize
        output_dfs = []
        iteration = range(nr_iterations)

        first = True

        # Per feature and window
        for col in train_df.columns:

            # 2. only scaled features are considered
            if not col.startswith("scaled_FEATURE"): continue
            #Logging().log("CURRENT -> "+ col)
            result_df = train_df.sort_values("RISK")

            # 3. iterate multiple times over window
            #   on each iteration remove outliers
            for i in iteration:
                sub_dfs = []
                indices = []
                rs = 0
                # 4. iterate over windows
                for r in np.linspace(result_df["RISK"].min(), 1, 10):

                    sub_df = result_df[(rs <= result_df["RISK"])
                                       & (r > result_df["RISK"])]
                    if self._remove_outliers:
                        sub_df = sub_df[(
                            (sub_df[col] - sub_df[col].mean()) /
                            sub_df[col].std()).abs() < std_threshold]
                    sub_dfs.append(sub_df)
                    rs = r
                result_df = pd.concat(sub_dfs)

            # 5. Merge result to common dataframe
            output_dfs.append(result_df[["RISK", col]])

            # 6. Remove empty
            if (remove_empty and len(result_df[col].unique()) < 2):
                continue

            # 7. Plot results
            if self._visualize_outlier:
                Logging().log("Pre - Standard Deviation vorher: " +
                              str(train_df[col].std()))
                Visual().plot_scatter(train_df["RISK"],
                                      train_df[col])  #, "RISK", "feature")
                Logging().log("Post - Standard Deviation nachher: " +
                              str(result_df[col].std()))
                Visual().plot_scatter(result_df["RISK"], result_df[col])

        return output_dfs
    def _build_heat_map(self, output_dfs):
        ''' using convolution for each point of a 2d array risk vs. feature value per feature
            a heat map is generated 
            :param output_dfs: list of dataframes with each having a column scaled_FEATURE_X (that is outlierfree and scaled now) and a column 
                               risk which is the risk for that feature at its row
            :return a dictionary is returned that contains the feature name as key and its 2d heatmap as output
        '''
        dimensions = {}
        for feature_df in output_dfs:  # each output_df has one risk and value
            Logging().log("Processing Feature: " + feature_df.columns[1])

            # Testmode
            if self._test_mode and (feature_df.columns[1]
                                    == "scaled_FEATURE_5"):
                print("Testing thus, break now!")
                break

            try:
                values = np.empty(len(feature_df))
                values.fill(1)

                # Assign X Y Z
                X = feature_df.RISK.as_matrix()
                Y = feature_df[feature_df.columns[1]].as_matrix()
                Z = values

                # create x-y points to be used in heatmap of identical size
                risk_min = min([
                    rm for rm in [df.RISK.min() for df in output_dfs]
                    if not math.isnan(rm)
                ])
                risk_max = 1
                feature_min = min([
                    rm
                    for rm in [df[df.columns[1]].min() for df in output_dfs]
                    if not math.isnan(rm)
                ])
                feature_max = max([
                    rm
                    for rm in [df[df.columns[1]].max() for df in output_dfs]
                    if not math.isnan(rm)
                ])

                xi = np.linspace(risk_min, risk_max, self._grid_area)
                yi = np.linspace(feature_min, feature_max, self._grid_area)

                # Z is a matrix of x-y values interpolated (!)
                zi = griddata((X, Y),
                              Z, (xi[None, :], yi[:, None]),
                              method=self._interpol_method)
                zmin = 0
                zmax = 1
                zi[(zi < zmin) | (zi > zmax)] = None

                # Convolve each  point with a gaussian kernel giving the heat value at point xi,yi being Z
                # Advantage: kee horizontal and vertical influence
                grid_cur = np.nan_to_num(zi)

                # Smooth with a Gaussian kernel
                kernel = Gaussian2DKernel(stddev=self._std_gaus,
                                          x_size=self._kernel_size,
                                          y_size=self._kernel_size)
                grad = scipy_convolve(grid_cur,
                                      kernel,
                                      mode='same',
                                      method='direct')

                # Store the model in memory
                dimensions[feature_df.columns[1]] = [
                    copy.deepcopy(np.absolute(grad)),
                    copy.deepcopy(xi),
                    copy.deepcopy(yi)
                ]
                #print("GRAD")
                #print(str(grad))

                if self._visualize_heatmap:
                    fig, (ax_orig, ax_mag) = plt.subplots(1, 2)
                    ax_orig.imshow(grid_cur[::-1, ::-1], cmap='RdYlGn')
                    ax_orig.set_title('Original')
                    ax_mag.imshow(
                        np.absolute(grad)[::-1, ::-1], cmap='RdYlGn'
                    )  # https://matplotlib.org/examples/color/colormaps_reference.html
                    ax_mag.set_title('Heat')
                    fig.show()
                    plt.show()

            except:
                Logging().log("No chance")
                #traceback.print_exc()
                dimensions[feature_df.columns[1]] = None

        return dimensions, xi
    def _build_one_heat_map(self, feature_df, risk_min, feature_min,
                            feature_max):
        Logging().log("Processing Feature: " + feature_df.columns[1])

        try:
            values = np.empty(len(feature_df))
            values.fill(1)

            # Assign X Y Z
            X = feature_df.RISK.as_matrix()
            Y = feature_df[feature_df.columns[1]].as_matrix()
            Z = values

            # create x-y points to be used in heatmap of identical size
            #risk_min = min([rm for rm in [df.RISK.min() for df in output_dfs] if not math.isnan(rm)])
            risk_max = 1

            xi = np.linspace(risk_min, risk_max, self._grid_area)
            yi = np.linspace(feature_min, feature_max, self._grid_area)

            # Z is a matrix of x-y values interpolated (!)
            zi = griddata((X, Y),
                          Z, (xi[None, :], yi[:, None]),
                          method=self._interpol_method)
            zmin = 0
            zmax = 1
            zi[(zi < zmin) | (zi > zmax)] = None

            # Convolve each  point with a gaussian kernel giving the heat value at point xi,yi being Z
            # Advantage: kee horizontal and vertical influence
            grid_cur = np.nan_to_num(zi)

            # Smooth with a Gaussian kernel
            kernel = Gaussian2DKernel(stddev=self._std_gaus,
                                      x_size=self._kernel_size,
                                      y_size=self._kernel_size)
            grad = scipy_convolve(grid_cur,
                                  kernel,
                                  mode='same',
                                  method='direct')

            # Store the model in memory
            feature_name = feature_df.columns[1]
            result = [
                feature_name,
                [
                    copy.deepcopy(np.absolute(grad)),
                    copy.deepcopy(xi),
                    copy.deepcopy(yi)
                ], grid_cur
            ]

        except:
            #traceback.print_exc()
            feature_name = feature_df.columns[1]
            Logging().log(
                str(feature_df.columns[1]) +
                ": No heat map created due to error")
            result = [feature_name, None, None]

        return result
    def _score_feature_quality(self, test_df, whole_df, model_per_feature,
                               cluster_id, data_in, r_min, r_max,
                               finetuner_index):

        # wenn einmal berechnet wegspeichern und im Zweifel wieder laden
        print("_________SCORING: " + str(r_min) + " to " + str(r_max))

        # jedes model is ein heat map
        tester = HeatmapConvolutionTester(smooth_per_feature=True,
                                          enable_all_print=False,
                                          visualize_summed_curve=False,
                                          visualize_per_feature_curve=False)
        abs_max_rul = whole_df["RUL"].max()  # 217
        segment_thrshld = 0.33 * abs_max_rul
        distances = {}  # key feat name, value list_dist
        phm_scores = {}
        rmse_scores = {}
        tot = len(list(test_df["id"].unique()))
        oo = 0
        for object_id in list(test_df["id"].unique()):
            oo += 1
            Logging.log(
                str(oo) + " of " + str(tot) +
                " - Optimizing based on - OBJECT ID: " + str(object_id))
            cur_df1 = test_df[test_df['id'] == object_id]

            # predict immer einmal random aus erste 33% dann zwischen 33% und 66% und dann zwischen 66 und 100%
            le = int(numpy.ceil(len(cur_df1) / 3))
            z_to_33 = list(range(le))
            random.shuffle(z_to_33)
            if 2 * le > len(cur_df1):
                t_to_66 = []
                s_to_100 = []
                thrshlds = [z_to_33[0]]
            else:
                t_to_66 = list(range(le, 2 * le))
                s_to_100 = list(range(2 * le, len(cur_df1)))
                random.shuffle(t_to_66)
                random.shuffle(s_to_100)
                thrshlds = [z_to_33[0], t_to_66[0], s_to_100[0]]

            cur_df3 = cur_df1.sort_values("RUL", ascending=False)
            for thrshld in thrshlds:
                current_test_df = cur_df3.iloc[:thrshld]

                dist = current_test_df["RUL"].max(
                ) - current_test_df["RUL"].min()
                if dist > segment_thrshld:
                    print(
                        "SHORTENED RUL AREA !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! "
                    )
                    thrs = current_test_df["RUL"].min() + segment_thrshld
                    current_test_df = current_test_df[
                        current_test_df["RUL"] < thrs]

                # do prediction
                try:
                    predicted_risk, predicted_rul, m, all_feature_sum, per_feature_sum, feature_favorites = tester._predict_RUL(
                        data_in,
                        current_test_df,
                        cluster_id,
                        None, [],
                        0,
                        current_test_df["RUL"].min(),
                        test=True,
                        fine=finetuner_index)
                except:
                    print("No prediction to short shit")
                    continue
                true_RUL = current_test_df.iloc[-1]["RUL"]
                true_risk = 1 + m * true_RUL

                # post process

                # asses per_feature_sum
                for col_name in per_feature_sum.keys():
                    #print("--- Feature: " + col_name)
                    cur = per_feature_sum[col_name]
                    cur[1] = cur[1][0]

                    #m_idx = numpy.argmax(cur[1])
                    #da_risk_found = cur[0][m_idx]
                    #predicted_rul_feature = (da_risk_found - 1)/m

                    if numpy.count_nonzero(cur[1]) == 0:
                        if col_name in distances:
                            distances[col_name].append(
                                [1])  # this curve did not help at all
                            phm_scores[col_name].append(["None"])
                            rmse_scores[col_name].append(["None"])
                        else:
                            distances[col_name] = [[1]]
                            phm_scores[col_name] = [["None"]]
                            rmse_scores[col_name] = [["None"]]
                        #print(" - Distance - 1")
                        continue

                    ten_perc = math.ceil(0.05 * len(cur[1]))
                    subs = int(0.1 * len(cur[1]))
                    ind = sorted(
                        numpy.argpartition(cur[1], -ten_perc)
                        [-ten_perc:])  # indices of x percent of highest values

                    # if gap bigger than subs indices == 0.1 risk -> split to to regions
                    gaps = numpy.where(numpy.diff(ind) > subs)
                    if not gaps: runner = [None]
                    if gaps: runner = sorted(list(gaps[0]))
                    prev = 0
                    multi_dist = []
                    phm_scores_lst = []
                    rmse_scores_lst = []
                    for gap_idx in runner:
                        if gap_idx == None:
                            cur_subset_selection = ind
                        else:
                            gap_idx += 1
                            cur_subset_selection = ind[int(prev):int(gap_idx)]

                        values = cur[0][cur_subset_selection]

                        avg = numpy.average(values)
                        dist = avg - true_risk

                        # bevorzuge praktisch frühere weil das den index nicht so zerlegt
                        multi_dist.append(dist)
                        #print(" - Distance - " + str(dist))

                        # analog find phm and risk
                        da_risk_found = avg
                        predicted_rul_feature = (da_risk_found - 1) / m

                        phmScore = self.score_phm(
                            pd.DataFrame(
                                [[true_RUL, predicted_rul_feature, -1]],
                                columns=["RUL", "predicted_RUL", "object_id"]))
                        rmse = self.score_rmse(
                            numpy.array([true_RUL]),
                            numpy.array([predicted_rul_feature]))
                        phm_scores_lst.append(phmScore)
                        rmse_scores_lst.append(rmse)

                        prev = gap_idx
                        '''
                        print("\nFeature: "+ str(col_name)+ "\nplot all - true risk: " + str(true_risk))
                        plt.plot(cur[0], cur[1])
                        plt.plot(cur[0][cur_subset_selection], cur[1][cur_subset_selection], color='red')
                        #plt.plot(cur[0], medfilt(cur[1], 61), color = "green") # KERNEL MUST BE ODD 
                        plt.xlabel("risk - true risk = " + str(true_risk))
                        plt.ylabel("heat - "+str(col_name))
                        plt.show()
                        '''

                    # use this for evaluation
                    #phmScore = self.score_phm(pd.DataFrame([[true_RUL, predicted_rul_feature, -1]], columns = ["RUL", "predicted_RUL", "object_id"]))
                    #rmse = self.score_rmse(numpy.array([true_RUL]), numpy.array([predicted_rul_feature]))

                    if col_name in distances:
                        distances[col_name].append(
                            multi_dist)  # this curve did not help at all
                        phm_scores[col_name].append(phm_scores_lst)
                        rmse_scores[col_name].append(rmse_scores_lst)
                    else:
                        distances[col_name] = [multi_dist]
                        phm_scores[col_name] = [phm_scores_lst]
                        rmse_scores[col_name] = [rmse_scores_lst]

        return distances, phm_scores, rmse_scores
    def run(self, data_in):
        super().run(data_in)  # do not remove this!
        Logging.log("Training da heat...")

        # cross validation params
        iter_idx = 2  # Monte Carlo cross-validation - randomly assign training and test set x times
        percentage_train = 0.8  # percentage of data being trainingset

        # 1. transform to df and keep critical
        whole_df = self._extract_critical_data_frame(data_in)
        whole_df[self._field_in_train_cluster_id] = data_in[
            self._field_in_train_cluster_id]
        lst_of_train_n_test = self._split_to_subsets(
            whole_df, percentage_train, iter_idx)  # split frame multiple times
        # each element being [train_df, test_df]

        # 2. distance - scoring quality of a feature
        dist_score = {
        }  # key: clusterid_featureid e.g. c1 value: list of dict: key: feature_id, value score

        for train_test in lst_of_train_n_test:
            train_df = train_test[0]
            test_df = train_test[1]
            test_df["cluster_id"] = test_df["train_cluster_id"]

            for cluster_id in list(train_df["train_cluster_id"].unique()
                                   ):  # per cluster own model
                #if cluster_id in [1, 5, 4, 0, 3]: # den hab ich schon
                #    continue

                if self._test_mode and not (cluster_id == 3
                                            or cluster_id == 1):
                    continue

                print("\n\n TRAINING CLUSTER: " + str(cluster_id))
                cur_train_df = train_df[train_df[
                    self._field_in_train_cluster_id] == cluster_id]
                cur_test_df = test_df[test_df["cluster_id"] == cluster_id]

                # 2. scale data and remove outliers
                output_dfs, trained_scalers = self._preprocess_data(
                    cur_train_df, self._remove_empty_features,
                    self._nr_outlier_iterations, self._outlier_window_size,
                    self._outlier_std_threshold)
                data_in[
                    "CL_" + str(cluster_id) + "_" + self.
                    _field_out_train_model_trained_scalers] = trained_scalers

                # 3. Train the model
                model_per_feature = self._build_heat_map_parallel(output_dfs)
                data_in[
                    "CL_" + str(cluster_id) + "_" +
                    self._field_out_train_model_grid_area] = self._grid_area

                # 4. Store the models
                data_in["CL_" + str(cluster_id) + "_" +
                        self._field_out_train_model] = model_per_feature

                # 5. score the feature quality for cross validation
                score_dict = self._score_feature_quality(
                    cur_test_df, whole_df, model_per_feature, cluster_id,
                    data_in)
                print("Found scores: " + str(score_dict))
                idfr = "c" + str(cluster_id)
                if idfr not in dist_score:
                    dist_score[idfr] = [score_dict]
                else:
                    dist_score[idfr].append(score_dict)

                try:
                    pathh = os.path.join(r"C:\Users\q416435\Desktop\scores",
                                         "cluster_" + str(cluster_id) + ".csv")
                    print("Writing file to " + pathh)
                    self._csv_file = open(pathh, 'w')
                    for ke in score_dict.keys():
                        self._csv_writer = csv.writer(self._csv_file,
                                                      delimiter=';')
                        self._csv_writer.writerow([ke, str(score_dict[ke])])
                    self._csv_file.close()
                except:
                    pass

        # 3. Perform training for whole model now
        # get final model

        # 4. keep only optimal models now based on dist_score
        #T.B.D.
        # 5. empty metrics
        metrics = dict()

        return data_in, metrics