Beispiel #1
0
    def calculate_emr(learners, error_weight=1, memory_weight=1, runtime_weight=1, lb=1, ub=10):

        learners_names = []
        learners_errors = []
        learners_runtime = []
        learners_memory_usages = []
        learners_emr_scores = []

        for learner in learners:
            learners_names.append(learner.LEARNER_NAME.title())
            learners_errors.append(PredictionEvaluator.calculate(TornadoDic.ERROR_RATE, learner.get_confusion_matrix()))
            learners_runtime.append(learner.get_total_running_time())
            if memory_weight != -1:
                learners_memory_usages.append(asizeof.asizeof(learner))
            else:
                learners_memory_usages.append(0)

        if len(learners) != 1:
            err_min, err_max = LearnersScoreCalculator.get_min_max(learners_errors)
            run_min, run_max = LearnersScoreCalculator.get_min_max(learners_runtime)
            mem_min, mem_max = LearnersScoreCalculator.get_min_max(learners_memory_usages)

            for i in range(0, len(learners_names)):
                error = LearnersScoreCalculator.scale(learners_errors[i], err_min, err_max, lb, ub)
                runtime = LearnersScoreCalculator.scale(learners_runtime[i], run_min, run_max, lb, ub)
                memory_usage = LearnersScoreCalculator.scale(learners_memory_usages[i], mem_min, mem_max, lb, ub)
                learners_emr_scores.append(LearnersScoreCalculator.__cal_emr(ub, error, runtime, memory_usage, error_weight, runtime_weight, memory_weight))

            return learners_emr_scores, learners_errors, learners_memory_usages, learners_runtime
        else:
            return None, learners_errors, learners_memory_usages, learners_runtime
    def run(self, stream, random_seed=1):

        random.seed(random_seed)

        for record in stream:

            self.__instance_counter += 1

            percentage = (self.__instance_counter / len(stream)) * 100
            print("%0.2f" % percentage + "% of instances are prequentially processed!", end="\r")

            if record.__contains__("?"):
                self.__num_rubbish += 1
                continue

            # ---------------------
            #  Data Transformation
            # ---------------------
            r = copy.copy(record)
            for k in range(0, len(r) - 1):
                if self.learner.LEARNER_CATEGORY == TornadoDic.NOM_CLASSIFIER and self.__attributes[k].TYPE == TornadoDic.NUMERIC_ATTRIBUTE:
                    r[k] = Discretizer.find_bin(r[k], self.__nominal_attribute_scheme[k])
                elif self.learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER and self.__attributes[k].TYPE == TornadoDic.NOMINAL_ATTRIBUTE:
                    r[k] = NominalToNumericTransformer.map_attribute_value(r[k], self.__numeric_attribute_scheme[k])
            # NORMALIZING NUMERIC DATA
            if self.learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER:
                r[0:len(r) - 1] = Normalizer.normalize(r[0:len(r) - 1], self.__numeric_attribute_scheme)

            # ----------------------
            #  Prequential Learning
            # ----------------------
            if self.learner.is_ready():
                real_class = r[len(r) - 1]
                predicted_class = self.learner.do_testing(r)
                if self.learner.LEARNER_TYPE == TornadoDic.TRAINABLE:
                    self.learner.do_training(r)
                else:
                    self.learner.do_loading(r)
            else:
                if self.learner.LEARNER_TYPE == TornadoDic.TRAINABLE:
                    self.learner.do_training(r)
                else:
                    self.learner.do_loading(r)

                self.learner.set_ready()
                self.learner.update_confusion_matrix(r[len(r) - 1], r[len(r) - 1])

            learner_error_rate = PredictionEvaluator.calculate(TornadoDic.ERROR_RATE,
                                                               self.learner.get_confusion_matrix())
            learner_error_rate = round(learner_error_rate, 4)
            self.__learner_error_rate_array.append(learner_error_rate)

        print("\n" + "The stream is completely processed.")
        self.__store_stats()
        self.__plot()
        print("THE END!")
        print("\a")
    def run(self, stream_records, random_seed=1):

        random.seed(random_seed)

        for record in stream_records:

            self.__instance_counter += 1

            if self.drift_loc_index < len(self.actual_drift_points) - 1:
                if self.__instance_counter > self.actual_drift_points[
                        self.drift_loc_index] + self.drift_acceptance_interval:
                    self.drift_loc_index += 1

            if self.drift_current_context < len(self.actual_drift_points):
                if self.__instance_counter > self.actual_drift_points[
                        self.drift_current_context]:
                    self.drift_current_context += 1

            percentage = (self.__instance_counter / len(stream_records)) * 100
            print("%0.2f" % percentage + "% of instances are processed!",
                  end="\r")

            if record.__contains__("?"):
                self.__num_rubbish += 1
                continue

            for pair in self.pairs:
                learner = pair[0]
                detector = pair[1]
                index = self.pairs.index(pair)

                # ---------------------
                #  DATA TRANSFORMATION
                # ---------------------
                r = copy.copy(record)
                for k in range(0, len(r) - 1):
                    if learner.LEARNER_CATEGORY == TornadoDic.NOM_CLASSIFIER and self.attributes[
                            k].TYPE == TornadoDic.NUMERIC_ATTRIBUTE:
                        r[k] = Discretizer.find_bin(
                            r[k], self.nominal_attribute_scheme[k])
                    elif learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER and self.attributes[
                            k].TYPE == TornadoDic.NOMINAL_ATTRIBUTE:
                        r[k] = NominalToNumericTransformer.map_attribute_value(
                            r[k], self.numeric_attribute_scheme[k])
                # NORMALIZING NUMERIC DATA
                if learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER:
                    r[0:len(r) - 1] = Normalizer.normalize(
                        r[0:len(r) - 1], self.numeric_attribute_scheme)

                # ----------------------
                #  PREQUENTIAL LEARNING
                # ----------------------
                if learner.is_ready():
                    real_class = r[len(r) - 1]
                    predicted_class = learner.do_testing(r)

                    prediction_status = True
                    if real_class != predicted_class:
                        prediction_status = False

                    # -----------------------
                    #  ANY DRIFTS DETECTED?
                    # -----------------------
                    warning_status, drift_status = detector.detect(
                        prediction_status)
                    if drift_status:

                        # APPEND 1 INTO LOCATED DRIFT POINTS
                        self.pair_located_drift_points[index].append(1)

                        # APPENDING ERROR-RATE, MEMORY USAGE, AND RUNTIME OF CLASSIFIER
                        learner_error_rate = PredictionEvaluator.calculate(
                            TornadoDic.ERROR_RATE,
                            learner.get_confusion_matrix())
                        learner_error_rate = round(learner_error_rate, 4)
                        learner_runtime = learner.get_running_time()
                        learner_mem_use = asizeof.asizeof(learner,
                                                          limit=20) / 1000
                        self.learners_stats[index].append([
                            learner_error_rate, learner_mem_use,
                            learner_runtime
                        ])

                        # APPENDING FP, FN, MEMORY USAGE, AND RUNTIME OF DETECTOR
                        delay, [
                            tp_loc, tp
                        ], fp, fn, mem, runtime = self.detectors_stats[index][
                            len(self.detectors_stats[index]) - 1]
                        actual_drift_loc = self.actual_drift_points[
                            self.drift_loc_index]
                        if actual_drift_loc <= self.__instance_counter <= actual_drift_loc + self.drift_acceptance_interval:
                            if self.__instance_counter - tp_loc < self.drift_acceptance_interval:
                                fp += 1
                            else:
                                tp += 1
                                tp_loc = self.__instance_counter
                        else:
                            fp += 1
                        mem = asizeof.asizeof(detector) / 1000
                        runtime = detector.RUNTIME
                        self.detectors_stats[index].append(
                            [delay, [tp_loc, tp], fp, fn, mem, runtime])

                        learner.reset()
                        detector.reset()
                        continue

                    if learner.LEARNER_TYPE == TornadoDic.TRAINABLE:
                        learner.do_training(r)
                    else:
                        learner.do_loading(r)
                else:
                    if learner.LEARNER_TYPE == TornadoDic.TRAINABLE:
                        learner.do_training(r)
                    else:
                        learner.do_loading(r)

                    learner.set_ready()
                    learner.update_confusion_matrix(r[len(r) - 1],
                                                    r[len(r) - 1])

                self.pair_located_drift_points[index].append(0)

                # APPENDING ERROR-RATE, MEMORY USAGE, AND RUNTIME OF CLASSIFIERS
                learner_error_rate = PredictionEvaluator.calculate(
                    TornadoDic.ERROR_RATE, learner.get_confusion_matrix())
                learner_error_rate = round(learner_error_rate, 4)
                if self.feedback_counter % self.feedback_interval == 0 or self.__instance_counter == len(
                        stream_records):
                    learner_mem_use = asizeof.asizeof(learner, limit=20) / 1000
                else:
                    learner_mem_use = self.learners_stats[index][
                        len(self.learners_stats[index]) - 1][1]
                learner_runtime = learner.get_running_time()
                self.learners_stats[index].append(
                    [learner_error_rate, learner_mem_use, learner_runtime])

                # APPENDING FP, FN, MEMORY USAGE, AND RUNTIME OF DRIFT DETECTORS
                if self.__instance_counter == 1:
                    delay, [tp_loc, tp
                            ], fp, fn, mem, runtime = [0, [0, 0], 0, 0, 0, 0]
                else:
                    delay, [
                        tp_loc, tp
                    ], fp, fn, mem, runtime = self.detectors_stats[index][
                        len(self.detectors_stats[index]) - 1]
                    runtime = detector.RUNTIME
                    # print(runtime)
                    if self.feedback_counter % self.feedback_interval == 0 or self.__instance_counter == len(
                            stream_records):
                        mem = asizeof.asizeof(detector) / 1000
                    if self.drift_current_context >= 1:
                        if self.__instance_counter >= self.actual_drift_points[
                                self.drift_current_context - 1]:
                            fn = self.drift_current_context - tp
                            if self.__instance_counter <= self.actual_drift_points[
                                    self.drift_current_context -
                                    1] + self.drift_acceptance_interval:
                                if tp_loc < self.actual_drift_points[
                                        self.drift_current_context -
                                        1] or tp_loc > self.actual_drift_points[
                                            self.drift_current_context -
                                            1] + self.drift_acceptance_interval:
                                    delay += 1
                self.detectors_stats[index].append(
                    [delay, [tp_loc, tp], fp, fn, mem, runtime])
                # print(instance_counter, detectors_stats[index][len(detectors_stats[index]) - 1])

            # CALCULATE SCORES & OPTIMAL CHOICE
            if self.score_counter % self.score_interval == 0:

                current_stats = []
                for i in range(0, len(self.pairs)):
                    ce, cm, cr = self.learners_stats[i][
                        len(self.learners_stats[i]) - 1]
                    dd, [dtp_loc,
                         dtp], dfp, dfn, dm, dr = self.detectors_stats[i][
                             len(self.detectors_stats[i]) - 1]
                    current_stats.append([ce, dd, dfp, dfn, cm + dm, cr + dr])

                # current_stats = ScoreProcessor.penalize_high_dfp(fp_level, 2, 1, current_stats)
                # ranked_current_stats = ScoreProcessor.rank_matrix(current_stats)
                scaled_current_stats = ScoreProcessor.normalize_matrix(
                    current_stats)
                scaled_current_scores = ScoreProcessor.calculate_weighted_scores(
                    scaled_current_stats, self.w_vec)
                self.pairs_scores.append(scaled_current_scores)
                # print(scaled_current_scores)
                max_score = max(scaled_current_scores)
                indexes = numpy.argwhere(
                    numpy.array(scaled_current_scores) ==
                    max_score).flatten().tolist()
                optimal_index = random.choice(indexes)
                # index = scaled_current_scores.index(max(scaled_current_scores))
                learner_name = self.pairs[optimal_index][0].LEARNER_NAME.upper(
                )
                detector_name = self.pairs[optimal_index][
                    1].DETECTOR_NAME.upper()
                optimal = learner_name + " + " + detector_name
                self.optimal_pair.append([optimal_index, optimal])
                # print(optimal)
                # for i in range(0, len(learners_detectors)):
                #    ce, cm, cr = learners_stats[i][len(learners_stats[i]) - 1]
                #    dd, [dtp_loc, dtp], dfp, dfn, dm, dr = detectors_stats[i][len(detectors_stats[i]) - 1]
                #    print("\t", learners_detectors_names[i], [ce, dd, dfp, dfn, cm + dm, cr + dr])

            self.feedback_counter += 1
            self.score_counter += 1

        self.store_stats()
        self.plot()
        self.archive()
        self.print_stats()

        print("THE END")
        print("\a")
    def run(self, stream, random_seed=1):

        random.seed(random_seed)
        import socket
        HOST = '0.0.0.0'
        PORT = 8000
        server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        server.bind((HOST, PORT))
        server.listen(10)
        # for record in stream:
        while True:

            conn, addr = server.accept()
            clientMessage = str(conn.recv(1024), encoding='utf-8')
            self.__instance_counter += 1
            record = clientMessage.split(',')

            # percentage = (self.__instance_counter / len(stream)) * 100
            # print("%0.2f" % percentage + "% of instances are prequentially processed!", end="\r")

            # if record.__contains__("?"):
            #     self.__num_rubbish += 1
            #     continue

            # print(record)
            # break
            # ---------------------
            #  Data Transformation
            # ---------------------
            r = copy.copy(record)
            print(r)
            # for k in range(0, len(r) - 1):
            #     if self.learner.LEARNER_CATEGORY == TornadoDic.NOM_CLASSIFIER and self.__attributes[k].TYPE == TornadoDic.NUMERIC_ATTRIBUTE:
            #         r[k] = Discretizer.find_bin(r[k], self.__nominal_attribute_scheme[k])
            #     elif self.learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER and self.__attributes[k].TYPE == TornadoDic.NOMINAL_ATTRIBUTE:
            #         r[k] = NominalToNumericTransformer.map_attribute_value(r[k], self.__numeric_attribute_scheme[k])
            # # NORMALIZING NUMERIC DATA
            # if self.learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER:
            #     r[0:len(r) - 1] = Normalizer.normalize(r[0:len(r) - 1], self.__numeric_attribute_scheme)

            # ----------------------
            #  Prequential Learning
            # ----------------------
            if self.learner.is_ready():

                real_class = r[-1]
                predicted_class = self.learner.do_testing(r)

                prediction_status = True
                if real_class != predicted_class:
                    prediction_status = False

                # -----------------------
                #  Drift Detected?
                # -----------------------
                warning_status, drift_status = self.drift_detector.detect(
                    prediction_status)
                if drift_status:
                    self.__drift_points_boolean.append(1)
                    self.__located_drift_points.append(self.__instance_counter)
                    print("\n ->>> " + self.learner.LEARNER_NAME.title() +
                          " faced a drift at instance " +
                          str(self.__instance_counter) + ".")
                    # print("%0.2f" % percentage, " of instances are prequentially processed!", end="\r")

                    learner_error_rate = PredictionEvaluator.calculate(
                        TornadoDic.ERROR_RATE,
                        self.learner.get_global_confusion_matrix())
                    self.__learner_error_rate_array.append(
                        round(learner_error_rate, 4))
                    self.__learner_memory_usage.append(
                        asizeof.asizeof(self.learner, limit=20))
                    self.__learner_runtime.append(
                        self.learner.get_running_time())

                    self.__drift_detection_memory_usage.append(
                        asizeof.asizeof(self.drift_detector, limit=20))
                    self.__drift_detection_runtime.append(
                        self.drift_detector.RUNTIME)

                    self.learner.reset()
                    self.drift_detector.reset()

                    serverMessage = 'true'
                    conn.sendall(serverMessage.encode())
                    conn.close()

                    continue

                # if self.learner.LEARNER_TYPE == TornadoDic.TRAINABLE:
                #     self.learner.do_training(r)
                # else:
                #     self.learner.do_loading(r)
                serverMessage = 'false'
                conn.sendall(serverMessage.encode())
                conn.close()
            else:
                if self.learner.LEARNER_TYPE == TornadoDic.TRAINABLE:
                    self.learner.do_training(r)
                else:
                    self.learner.do_loading(r)

                self.learner.set_ready()
                self.learner.update_confusion_matrix(r[len(r) - 1],
                                                     r[len(r) - 1])

            learner_error_rate = PredictionEvaluator.calculate(
                TornadoDic.ERROR_RATE, self.learner.get_confusion_matrix())
            learner_error_rate = round(learner_error_rate, 4)
            self.__learner_error_rate_array.append(learner_error_rate)

            if self.__memory_check_step != -1:
                if self.__instance_counter % self.__memory_check_step == 0:
                    self.__drift_detection_memory_usage.append(
                        asizeof.asizeof(self.drift_detector, limit=20))

            self.__drift_points_boolean.append(0)

        print("\n" + "The stream is completely processed.")
        self.__store_stats()
        self.__plot()
        print("\n\r" + "THE END!")
        print("\a")
Beispiel #5
0
    def run(self, stream, random_seed=1):

        random.seed(random_seed)

        for record in stream:

            self.__instance_counter += 1

            percentage = (self.__instance_counter / len(stream)) * 100
            print("%0.2f" % percentage +
                  "% of instances are prequentially processed!",
                  end="\r")

            if record.__contains__("?"):
                self.__num_rubbish += 1
                continue

            # ---------------------
            #  Data Transformation
            # ---------------------
            r = copy.copy(record)
            for k in range(0, len(r) - 1):
                if self.learner.LEARNER_CATEGORY == TornadoDic.NOM_CLASSIFIER and self.__attributes[
                        k].TYPE == TornadoDic.NUMERIC_ATTRIBUTE:
                    r[k] = Discretizer.find_bin(
                        r[k], self.__nominal_attribute_scheme[k])
                elif self.learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER and self.__attributes[
                        k].TYPE == TornadoDic.NOMINAL_ATTRIBUTE:
                    r[k] = NominalToNumericTransformer.map_attribute_value(
                        r[k], self.__numeric_attribute_scheme[k])
            # NORMALIZING NUMERIC DATA
            if self.learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER:
                r[0:len(r) - 1] = Normalizer.normalize(
                    r[0:len(r) - 1], self.__numeric_attribute_scheme)

            # ----------------------
            #  Prequential Learning
            # ----------------------
            if self.learner.is_ready():

                real_class = r[len(r) - 1]
                predicted_class = self.learner.do_testing(r)

                if self.drift_detector.DETECTOR_NAME == "CDDM":

                    proba = self.learner.get_prediction_prob(r)
                    warning_status, drift_status = self.drift_detector.detect(
                        proba, real_class)

                else:

                    prediction_status = True
                    if real_class != predicted_class:
                        prediction_status = False

                    warning_status, drift_status = self.drift_detector.detect(
                        prediction_status)

                # -----------------------
                #  Drift Detected?
                # -----------------------
                if drift_status:
                    self.__drift_points_boolean.append(1)
                    self.__located_drift_points.append(self.__instance_counter)
                    print("\n ->>> " + self.learner.LEARNER_NAME.title() +
                          " faced a drift at instance " +
                          str(self.__instance_counter) + ".")
                    print("%0.2f" % percentage,
                          " of instances are prequentially processed!",
                          end="\r")

                    learner_error_rate = PredictionEvaluator.calculate(
                        TornadoDic.ERROR_RATE,
                        self.learner.get_global_confusion_matrix())
                    self.__learner_error_rate_array.append(
                        round(learner_error_rate, 4))
                    self.__learner_memory_usage.append(
                        asizeof.asizeof(self.learner, limit=20))
                    self.__learner_runtime.append(
                        self.learner.get_running_time())

                    self.__drift_detection_memory_usage.append(
                        asizeof.asizeof(self.drift_detector, limit=20))
                    self.__drift_detection_runtime.append(
                        self.drift_detector.RUNTIME)

                    self.learner.reset()
                    self.drift_detector.reset()

                    continue

                if self.learner.LEARNER_TYPE == TornadoDic.TRAINABLE:
                    self.learner.do_training(r)
                else:
                    self.learner.do_loading(r)
            else:
                if self.learner.LEARNER_TYPE == TornadoDic.TRAINABLE:
                    self.learner.do_training(r)
                else:
                    self.learner.do_loading(r)

                self.learner.set_ready()
                self.learner.update_confusion_matrix(r[len(r) - 1],
                                                     r[len(r) - 1])

            learner_error_rate = PredictionEvaluator.calculate(
                TornadoDic.ERROR_RATE, self.learner.get_confusion_matrix())
            learner_error_rate = round(learner_error_rate, 4)
            self.__learner_error_rate_array.append(learner_error_rate)

            if self.__memory_check_step != -1:
                if self.__instance_counter % self.__memory_check_step == 0:
                    self.__drift_detection_memory_usage.append(
                        asizeof.asizeof(self.drift_detector, limit=20))

            self.__drift_points_boolean.append(0)

        print("\n" + "The stream is completely processed.")
        self.__store_stats()
        self.__plot()
        print("\n\r" + "THE END!")
        print("\a")
Beispiel #6
0
 def get_error(self):
     return PredictionEvaluator.calculate(TornadoDic.ERROR_RATE, self.get_confusion_matrix())