Beispiel #1
0
    def _update_plots(self, sample_id, data_buffer):
        self._sample_ids.append(sample_id)
        memory_time = {}
        for metric_id, data_ids in data_buffer.data_dict.items():
            # update_xy_limits = True
            update_xy_limits = metric_id not in [
                constants.RUNNING_TIME, constants.MODEL_SIZE
            ]
            y_min = 0.0
            y_max = 1.0
            pad = 0.1  # Default padding to set above and bellow plots
            plot_tracker = self._plot_trackers[metric_id]
            if metric_id == constants.TRUE_VS_PREDICTED:
                # Process true values
                data_id = constants.Y_TRUE
                plot_tracker.data[data_id].append(
                    data_buffer.get_data(metric_id=metric_id, data_id=data_id))
                plot_tracker.line_objs[data_id].set_data(
                    self._sample_ids, plot_tracker.data[data_id])
                # Process predicted values
                data_id = constants.Y_PRED
                data = data_buffer.get_data(metric_id=metric_id,
                                            data_id=data_id)
                for i in range(self.n_models):
                    plot_tracker.data[data_id][i].append(data[i])
                    plot_tracker.line_objs[data_id][i].set_data(
                        self._sample_ids, plot_tracker.data[data_id][i])
                    y_min = min([
                        plot_tracker.data[data_id][i][-1],
                        plot_tracker.data[constants.Y_TRUE][-1], y_min
                    ])
                    y_max = max([
                        plot_tracker.data[data_id][i][-1],
                        plot_tracker.data[constants.Y_TRUE][-1], y_max
                    ])
            elif metric_id == constants.DATA_POINTS:
                update_xy_limits = False
                # Process features
                data_id = 'X'
                features_dict = data_buffer.get_data(metric_id=metric_id,
                                                     data_id=data_id)
                feature_indices = list(features_dict.keys())
                feature_indices.sort()
                # Store tuple of feature values into the buffer, sorted by index
                plot_tracker.data[data_id].add_element([[
                    features_dict[feature_indices[0]],
                    features_dict[feature_indices[1]]
                ]])

                plot_tracker.sub_plot_obj.set_xlabel('Feature {}'.format(
                    feature_indices[0]))
                plot_tracker.sub_plot_obj.set_ylabel('Feature {}'.format(
                    feature_indices[1]))
                # TODO consider a fading/update strategy instead
                plot_tracker.sub_plot_obj.clear()
                X1 = plot_tracker.data[data_id].get_queue()[-1][0]
                X2 = plot_tracker.data[data_id].get_queue()[-1][1]

                # Process target values
                data_id = 'target_values'
                plot_tracker.data[data_id] = data_buffer.get_data(
                    metric_id=metric_id, data_id=data_id)
                if not plot_tracker.data['clusters_initialized']:
                    for j in range(len(plot_tracker.data[data_id])):
                        plot_tracker.data['clusters'].append(
                            FastBuffer(plot_tracker.data['buffer_size']))

                # Process predictions
                data_id = 'predictions'
                plot_tracker.data[data_id].add_element([
                    data_buffer.get_data(metric_id=metric_id, data_id=data_id)
                ])

                for k, cluster in enumerate(plot_tracker.data['clusters']):
                    if plot_tracker.data[data_id].get_queue()[-1] == k:
                        plot_tracker.data['clusters'][k].add_element([(X1, X2)
                                                                      ])
                        # TODO confirm buffer update inside the loop
                    if cluster.get_queue():
                        temp = cluster.get_queue()
                        plot_tracker.sub_plot_obj.scatter(
                            *zip(*temp), label="Class {k}".format(k=k))
                plot_tracker.sub_plot_obj.legend(loc=2,
                                                 bbox_to_anchor=(1.01, 1.))
            elif metric_id == constants.RUNNING_TIME:
                # Only the current time measurement must be saved
                for data_id in data_ids:
                    plot_tracker.data[data_id] = data_buffer.get_data(
                        metric_id=metric_id, data_id=data_id)
                memory_time.update(plot_tracker.data)

            elif metric_id == constants.MODEL_SIZE:
                plot_tracker.data['model_size'] = data_buffer.get_data(
                    metric_id=metric_id, data_id='model_size')
                memory_time['model_size'] = plot_tracker.data['model_size']
            else:
                # Default case, 'mean' and 'current' performance
                for data_id in data_ids:
                    # Buffer data
                    data = data_buffer.get_data(metric_id=metric_id,
                                                data_id=data_id)
                    for i in range(self.n_models):
                        plot_tracker.data[data_id][i].append(data[i])
                        plot_tracker.line_objs[data_id][i].set_data(
                            self._sample_ids, plot_tracker.data[data_id][i])
                # Process data
                for i in range(self.n_models):
                    # Update annotations
                    self._update_annotations(
                        i, plot_tracker.sub_plot_obj, self.model_names[i],
                        plot_tracker.data[constants.MEAN][i][-1],
                        plot_tracker.data[constants.CURRENT][i][-1])
                    # Update plot limits
                    if metric_id in [constants.KAPPA_T, constants.KAPPA_M]:
                        y_min = min([
                            plot_tracker.data[constants.MEAN][i][-1],
                            plot_tracker.data[constants.CURRENT][i][-1], y_min
                        ])
                    if metric_id in [
                            constants.MSE, constants.MAE, constants.AMSE,
                            constants.AMAE, constants.ARMSE
                    ]:
                        y_min = -1
                        y_max = max([
                            plot_tracker.data[constants.MEAN][i][-1],
                            plot_tracker.data[constants.CURRENT][i][-1], y_max
                        ])
                        pad = 0.5 * y_max  # Padding bellow and above thresholds
            if update_xy_limits:
                plot_tracker.sub_plot_obj.set_ylim((y_min - pad, y_max + pad))
                plot_tracker.sub_plot_obj.set_xlim(0, self._sample_ids[-1])
        if constants.RUNNING_TIME in self.metrics or \
                constants.MODEL_SIZE in self.metrics:
            self._update_time_and_memory_annotations(memory_time)
Beispiel #2
0
class MyKNNClassifier(KNNClassifier): # ... 
    def __init__(self, n_neighbors=5, max_window_size=1000, leaf_size=30, metric='euclidean', weighted_vote=False,
                 standardize = False):
        self.weighted_vote = weighted_vote
        self.standardize = standardize
        super().__init__(n_neighbors=n_neighbors, max_window_size=max_window_size, leaf_size=leaf_size, metric=metric)
        self.window_size = max_window_size
        self.window = None

        self.__configure()

    def __configure(self):
        self.window = FastBuffer(max_size=self.window_size)
        
    def partial_fit(self, X, y, classes=None, sample_weight=None):
        if(self.standardize == True):
            instance = np.array(X)
            X = self.transform_vector(instance)
            self.window.add_element(X)
        r, c = get_dimensions(X)

        if classes is not None:
            self.classes = list(set().union(self.classes, classes))

        for i in range(r):
            self.data_window.add_sample(X[i], y[i])

        return self
    
    
    def standardization(self, X):   
        #scaler = MinMaxScaler(feature_range=(0, 1))
        #scaler = scaler.fit(X)
        #print('Min: %f, Max: %f' % (scaler.data_min_, scaler.data_max_))
        #normalize the dataset and print the first 5 rows
        #normalized = scaler.transform(X)
        
        #return X
        
        scaler = StandardScaler()
        scaler.fit(X)
        normalized = scaler.fit_transform(X)
        X = normalized
        
        return X
    

    #Modify this method
    def predict_proba(self, X):
        #print("Not Weighted")
        #Add standardization in this method too
        if(self.standardize == True):
            instance = np.array(X)
            X = self.transform_vector(instance)
            
        r, c = get_dimensions(X)

        #print("Value of R: ", r) # r = 1
        #print("Value of C: ", c) # c = 2
        if self.data_window is None or self.data_window.size < self.n_neighbors:
            # The model is empty, defaulting to zero
            return np.zeros(shape=(r, 1))
        proba = []

        self.classes = list(set().union(self.classes, np.unique(self.data_window.targets_buffer.astype(np.int))))
        new_dist, new_ind = self._get_neighbors(X)

        #print("new_dist: ", new_dist)
        #print("new_ind: ", new_ind)
        ###################################### Weighting that I've added #######################################################
        #if(self.weighted_vote == True):
            #votes = self.vote(new_ind)
        #  self.classes = int(self.data_window.get_targets_matrix()[new_ind]) #Class of our index
        
        if(self.weighted_vote == False):
            #print("Not Weighted")
            for i in range(r):
                votes = [0.0 for _ in range(int(max(self.classes) + 1))]
                for index in new_ind[i]:
                    votes[int(self.data_window.targets_buffer[index])] += 1. / len(new_ind[i])

                proba.append(votes)
                
        else:
            #print("Weighted")
            position = 0
            for i in range(r):
                votes = [0.0 for _ in range(int(max(self.classes) + 1))]
                for index in new_ind[i]:
                    votes[int(self.data_window.targets_buffer[index])] += np.sum((1. / new_dist[i][position])) / len(new_ind[i])
                    position = position + 1
                proba.append(votes)

        return np.asarray(proba)

    
    def calculate_mean(self, column_index):
        mean = 0.
        if not self.window.is_empty():
            mean = np.nanmean(np.array(self.window.get_queue())[:, column_index])
        return mean

    def calculate_stddev(self, column_index):
        std = 1.
        if not self.window.is_empty():
            std = np.nanstd(np.array(self.window.get_queue())[:, column_index])
        if(std == 0.):
            std = 1.
        return std
    
    def transform_vector(self, X):
        r, c = get_dimensions(X)
        for i in range(r):
            row = np.copy([X[i][:]])
            for j in range(c):
                value = X[i][j]
                mean = self.calculate_mean(j)
                standard_deviation = self.calculate_stddev(j)
                standardized = (value - mean) / standard_deviation
                X[i][j] = standardized
            self.window.add_element(row)
        return X
Beispiel #3
0
    def __configure(self):
        """  This function will verify which subplots should be create. Initializing
        all relevant objects to keep track of the plotting points.
        
        Basic structures needed to keep track of plot values (for each subplot) 
        are: lists of values and matplot line objects.
        
        The __configure function will also initialize each subplot with the 
        correct name and setup the axis.
        
        The subplot size will self adjust to each screen size, so that data can 
        be better viewed in different contexts.

        """
        font_size_small = 8
        font_size_medium = 10
        font_size_large = 12

        plt.rc('font', size=font_size_small)  # controls default text sizes
        plt.rc('axes',
               titlesize=font_size_medium)  # font size of the axes title
        plt.rc('axes',
               labelsize=font_size_small)  # font size of the x and y labels
        plt.rc('xtick',
               labelsize=font_size_small)  # font size of the tick labels
        plt.rc('ytick',
               labelsize=font_size_small)  # font size of the tick labels
        plt.rc('legend', fontsize=font_size_small)  # legend font size
        plt.rc('figure',
               titlesize=font_size_large)  # font size of the figure title

        warnings.filterwarnings("ignore", ".*GUI is implemented.*")
        warnings.filterwarnings("ignore", ".*left==right.*")
        warnings.filterwarnings("ignore", ".*Passing 1d.*")

        self._sample_ids = []
        memory_time = {}

        plt.ion()
        self.fig = plt.figure(figsize=(9, 5))
        self.fig.suptitle(self.dataset_name)
        plot_metrics = [
            m for m in self.metrics
            if m not in [constants.RUNNING_TIME, constants.MODEL_SIZE]
        ]
        base = 11 + len(
            plot_metrics
        ) * 100  # 3-digit integer describing the position of the subplot.
        self.fig.canvas.set_window_title('scikit-multiflow')

        # Subplots handler
        for metric_id in self.metrics:
            data_ids = self._data_dict[metric_id]
            self._plot_trackers[metric_id] = PlotDataTracker(data_ids)
            plot_tracker = self._plot_trackers[metric_id]
            if metric_id not in [constants.RUNNING_TIME, constants.MODEL_SIZE]:
                plot_tracker.sub_plot_obj = self.fig.add_subplot(base)
            base += 1
            if metric_id == constants.TRUE_VS_PREDICTED:
                handle = []
                plot_tracker.sub_plot_obj.set_prop_cycle(
                    cycler('color', ['c', 'm', 'y', 'k']))
                for data_id in data_ids:
                    if data_id == constants.Y_TRUE:
                        # True data
                        plot_tracker.data[data_id] = []
                        label = 'True value'
                        line_style = '--'
                        line_obj = plot_tracker.line_objs
                        if self.task_type == constants.CLASSIFICATION:
                            line_obj[
                                data_id], = plot_tracker.sub_plot_obj.step(
                                    self._sample_ids,
                                    plot_tracker.data[data_id],
                                    label=label,
                                    linestyle=line_style)
                        else:
                            line_obj[
                                data_id], = plot_tracker.sub_plot_obj.plot(
                                    self._sample_ids,
                                    plot_tracker.data[data_id],
                                    label=label,
                                    linestyle=line_style)
                        handle.append(line_obj[data_id])
                    else:
                        # Predicted data
                        plot_tracker.data[data_id] = [
                            [] for _ in range(self.n_models)
                        ]
                        plot_tracker.line_objs[data_id] = [
                            None for _ in range(self.n_models)
                        ]
                        line_obj = plot_tracker.line_objs[data_id]
                        for i in range(self.n_models):
                            label = 'Predicted {}'.format(self.model_names[i])
                            line_style = '--'
                            if self.task_type == constants.CLASSIFICATION:
                                line_obj[i], = plot_tracker.sub_plot_obj.step(
                                    self._sample_ids,
                                    plot_tracker.data[data_id][i],
                                    label=label,
                                    linestyle=line_style)
                            else:
                                line_obj[i], = plot_tracker.sub_plot_obj.plot(
                                    self._sample_ids,
                                    plot_tracker.data[data_id][i],
                                    label=label,
                                    linestyle=line_style)
                            handle.append(line_obj[i])
                plot_tracker.sub_plot_obj.legend(handles=handle,
                                                 loc=2,
                                                 bbox_to_anchor=(1.01, 1.))
                plot_tracker.sub_plot_obj.set_title('True vs Predicted')
                plot_tracker.sub_plot_obj.set_ylabel('y')

            elif metric_id == constants.DATA_POINTS:
                plot_tracker.data['buffer_size'] = 100
                plot_tracker.data['X'] = FastBuffer(
                    plot_tracker.data['buffer_size'])
                plot_tracker.data['target_values'] = None
                plot_tracker.data['predictions'] = FastBuffer(
                    plot_tracker.data['buffer_size'])
                plot_tracker.data['clusters'] = []
                plot_tracker.data['clusters_initialized'] = False

            elif metric_id == constants.RUNNING_TIME:
                # Only the current time measurement must be saved
                for data_id in data_ids:
                    plot_tracker.data[data_id] = [
                        0.0 for _ in range(self.n_models)
                    ]
                # To make the annotations
                memory_time.update(plot_tracker.data)

            elif metric_id == constants.MODEL_SIZE:
                plot_tracker.data['model_size'] = [
                    0.0 for _ in range(self.n_models)
                ]

                memory_time['model_size'] = plot_tracker.data['model_size']
            else:
                # Default case, 'mean' and 'current' performance
                handle = []
                sorted_data_ids = data_ids.copy()
                sorted_data_ids.sort(
                )  # For better usage of the color cycle, start with 'current' data
                for data_id in sorted_data_ids:
                    plot_tracker.data[data_id] = [[]
                                                  for _ in range(self.n_models)
                                                  ]
                    plot_tracker.line_objs[data_id] = [
                        None for _ in range(self.n_models)
                    ]
                    line_obj = plot_tracker.line_objs[data_id]
                    for i in range(self.n_models):
                        if data_id == constants.CURRENT:
                            label = '{}  (current, {} samples)'.format(
                                self.model_names[i], self.n_wait)
                            line_style = '-'
                        else:
                            label = '{} (mean)'.format(self.model_names[i])
                            line_style = ':'
                        line_obj[i], = plot_tracker.sub_plot_obj.plot(
                            self._sample_ids,
                            plot_tracker.data[data_id][i],
                            label=label,
                            linestyle=line_style)
                        handle.append(line_obj[i])
                self._set_fig_legend(handle)

                if metric_id == constants.ACCURACY:
                    plot_tracker.sub_plot_obj.set_title('Accuracy')
                    plot_tracker.sub_plot_obj.set_ylabel('acc')
                elif metric_id == constants.KAPPA:
                    plot_tracker.sub_plot_obj.set_title('Kappa')
                    plot_tracker.sub_plot_obj.set_ylabel('kappa')
                elif metric_id == constants.KAPPA_T:
                    plot_tracker.sub_plot_obj.set_title('Kappa T')
                    plot_tracker.sub_plot_obj.set_ylabel('kappa t')
                elif metric_id == constants.KAPPA_M:
                    plot_tracker.sub_plot_obj.set_title('Kappa M')
                    plot_tracker.sub_plot_obj.set_ylabel('kappa m')
                elif metric_id == constants.HAMMING_SCORE:
                    plot_tracker.sub_plot_obj.set_title('Hamming score')
                    plot_tracker.sub_plot_obj.set_ylabel('hamming score')
                elif metric_id == constants.HAMMING_LOSS:
                    plot_tracker.sub_plot_obj.set_title('Hamming loss')
                    plot_tracker.sub_plot_obj.set_ylabel('hamming loss')
                elif metric_id == constants.EXACT_MATCH:
                    plot_tracker.sub_plot_obj.set_title('Exact Match')
                    plot_tracker.sub_plot_obj.set_ylabel('exact match')
                elif metric_id == constants.J_INDEX:
                    plot_tracker.sub_plot_obj.set_title('Jaccard Index')
                    plot_tracker.sub_plot_obj.set_ylabel('j-index')
                elif metric_id == constants.MSE:
                    plot_tracker.sub_plot_obj.set_title('Mean Squared Error')
                    plot_tracker.sub_plot_obj.set_ylabel('mse')
                elif metric_id == constants.MAE:
                    plot_tracker.sub_plot_obj.set_title('Mean Absolute Error')
                    plot_tracker.sub_plot_obj.set_ylabel('mae')
                elif metric_id == constants.AMSE:
                    plot_tracker.sub_plot_obj.set_title(
                        'Average Mean Squared Error')
                    plot_tracker.sub_plot_obj.set_ylabel('amse')
                elif metric_id == constants.AMAE:
                    plot_tracker.sub_plot_obj.set_title(
                        'Average Mean Absolute Error')
                    plot_tracker.sub_plot_obj.set_ylabel('amae')
                elif metric_id == constants.ARMSE:
                    plot_tracker.sub_plot_obj.set_title(
                        'Average Root Mean Squared Error')
                    plot_tracker.sub_plot_obj.set_ylabel('armse')
                elif metric_id == constants.DATA_POINTS:
                    plot_tracker.sub_plot_obj.set_title('')
                    plot_tracker.sub_plot_obj.set_xlabel('Feature x')
                    plot_tracker.sub_plot_obj.set_ylabel('Feature y')
                else:
                    plot_tracker.sub_plot_obj.set_title('Unknown metric')
                    plot_tracker.sub_plot_obj.set_ylabel('')

        if constants.DATA_POINTS not in self.metrics:
            plt.xlabel('Samples')
        if constants.RUNNING_TIME in self.metrics or \
                constants.MODEL_SIZE in self.metrics:
            self._update_time_and_memory_annotations(memory_time)

        self.fig.subplots_adjust(hspace=.5)
        self.fig.tight_layout(rect=[0, .04, 1, 0.98],
                              pad=2.6,
                              w_pad=0.4,
                              h_pad=1.0)
    def _update_metrics(self):
        """ _update_metrics

        Updates the metrics of interest. This function creates a metrics dictionary,
        which will be sent to _update_outputs, in order to save the data (if configured)

        Creates/updates a dictionary of new evaluation points. The keys of this dictionary are
        the metrics to keep track of, and the values are two element lists, or tuples, containing
        each metric's global value and their partial value (measured from the last n_wait samples).

        If more than one learner is evaluated at once, the value from the dictionary
        will be a list of lists, or tuples, containing the global metric value and
        the partial metric value, for each of the learners.

        """
        new_points_dict = {}
        if self.PERFORMANCE in self.metrics:
            new_points_dict[self.PERFORMANCE] = [[self.global_classification_metrics[i].get_performance(),
                                                  self.partial_classification_metrics[i].get_performance()]
                                                 for i in range(self.n_models)]

        if self.KAPPA in self.metrics:
            new_points_dict[self.KAPPA] = [[self.global_classification_metrics[i].get_kappa(),
                                            self.partial_classification_metrics[i].get_kappa()]
                                           for i in range(self.n_models)]

        if self.KAPPA_T in self.metrics:
            new_points_dict[self.KAPPA_T] = [[self.global_classification_metrics[i].get_kappa_t(),
                                              self.partial_classification_metrics[i].get_kappa_t()]
                                             for i in range(self.n_models)]

        if self.KAPPA_M in self.metrics:
            new_points_dict[self.KAPPA_M] = [[self.global_classification_metrics[i].get_kappa_m(),
                                              self.partial_classification_metrics[i].get_kappa_m()]
                                             for i in range(self.n_models)]

        if self.HAMMING_SCORE in self.metrics:
            new_points_dict[self.HAMMING_SCORE] = [[self.global_classification_metrics[i].get_hamming_score(),
                                                    self.partial_classification_metrics[i].get_hamming_score()]
                                                   for i in range(self.n_models)]

        if self.HAMMING_LOSS in self.metrics:
            new_points_dict[self.HAMMING_LOSS] = [[self.global_classification_metrics[i].get_hamming_loss(),
                                                   self.partial_classification_metrics[i].get_hamming_loss()]
                                                  for i in range(self.n_models)]

        if self.EXACT_MATCH in self.metrics:
            new_points_dict[self.EXACT_MATCH] = [[self.global_classification_metrics[i].get_exact_match(),
                                                  self.partial_classification_metrics[i].get_exact_match()]
                                                 for i in range(self.n_models)]

        if self.J_INDEX in self.metrics:
            new_points_dict[self.J_INDEX] = [[self.global_classification_metrics[i].get_j_index(),
                                              self.partial_classification_metrics[i].get_j_index()]
                                             for i in range(self.n_models)]

        if self.MSE in self.metrics:
            new_points_dict[self.MSE] = [[self.global_classification_metrics[i].get_mean_square_error(),
                                          self.partial_classification_metrics[i].get_mean_square_error()]
                                         for i in range(self.n_models)]

        if self.MAE in self.metrics:
            new_points_dict[self.MAE] = [[self.global_classification_metrics[i].get_average_error(),
                                          self.partial_classification_metrics[i].get_average_error()]
                                         for i in range(self.n_models)]

        if self.TRUE_VS_PREDICTED in self.metrics:
            true, pred = [], []
            for i in range(self.n_models):
                t, p = self.global_classification_metrics[i].get_last()
                true.append(t)
                pred.append(p)
            new_points_dict[self.TRUE_VS_PREDICTED] = [[true[i], pred[i]] for i in range(self.n_models)]

        if self.DATA_POINTS in self.metrics:

            targets = self.stream.target_values
            pred = []
            samples = FastBuffer(5000)

            for i in range(self.n_models):
                _, p = self.global_classification_metrics[i].get_last()
                X = self.global_classification_metrics[i].get_last_sample()

                pred.append(p)
                samples.add_element([X])

            new_points_dict[self.DATA_POINTS] = [[[samples.get_queue()[i]], targets, pred[i]]
                                                 for i in range(self.n_models)]

        shift = 0
        if self._method == 'prequential':
            shift = -self.batch_size   # Adjust index due to training after testing
        self._update_outputs(self.global_sample_count + shift, new_points_dict)
Beispiel #5
0
 def __configure(self):
     self.window = FastBuffer(max_size=self.window_size)
class MissingValuesCleaner(StreamTransform):
    """ Fill missing values with some defined value.

    Provides a simple way to replace missing values in data samples with some value. The imputation value
    can be set via a set of imputation strategies.
    
    Parameters
    ----------
    missing_value: int, float or list (Default: numpy.nan)
        Missing value to replace
    
    strategy: string (Default: 'zero')
        The strategy adopted to find the missing value replacement. It can 
        be one of the following: 'zero', 'mean', 'median', 'mode', 'custom'.
    
    window_size: int (Default: 200)
        Defines the window size for the 'mean', 'median' and 'mode' strategies.
    
    new_value: int (Default: 1)
        This is the replacement value in case the chosen strategy is 'custom'.
        
    Examples
    --------
    >>> # Imports
    >>> import numpy as np
    >>> from skmultiflow.data.file_stream import FileStream
    >>> from skmultiflow.transform.missing_values_cleaner import MissingValuesCleaner
    >>> # Setting up a stream
    >>> stream = FileStream('skmultiflow/data/datasets/covtype.csv', -1, 1)
    >>> # Setting up the filter to substitute values -47 by the median of the
    >>> # last 10 samples
    >>> cleaner = MissingValuesCleaner(-47, 'median', 10)
    >>> X, y = stream.next_sample(10)
    >>> X[9, 0] = -47
    >>> # We will use this list to keep track of values
    >>> data = []
    >>> # Iterate over the first 9 samples, to build a sample window
    >>> for i in range(9):
    >>>     X_transf = cleaner.partial_fit_transform([X[i].tolist()])
    >>>     data.append(X_transf[0][0])
    >>>
    >>> # Transform last sample. The first feature should be replaced by the list's 
    >>> # median value
    >>> X_transf = cleaner.partial_fit_transform([X[9].tolist()])
    >>> np.median(data)

    Notes
    -----
    A missing value in a sample can be coded in many different ways, but the
    most common one is to use numpy's NaN, that's why that is the default
    missing value parameter.

    The user should choose the correct substitution strategy for his use
    case, as each strategy has its pros and cons. The strategy can be chosen
    from a set of predefined strategies, which are: 'zero', 'mean', 'median',
    'mode', 'custom'.

    Notice that `MissingValuesCleaner` can actually be used to replace arbitrary
    values.

    """
    def __init__(self,
                 missing_value=np.nan,
                 strategy='zero',
                 window_size=200,
                 new_value=1):
        super().__init__()
        if isinstance(missing_value, list):
            self.missing_value = missing_value
        else:
            self.missing_value = [missing_value]
        self.strategy = strategy
        self.window_size = window_size
        self.window = None
        self.new_value = new_value

        self.__configure()

    def __configure(self):
        if self.strategy in ['mean', 'median', 'mode']:
            self.window = FastBuffer(max_size=self.window_size)

    def transform(self, X):
        """ transform
        
        Does the transformation process in the samples in X.
        
        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The sample or set of samples that should be transformed.
        
        """
        r, c = get_dimensions(X)
        for i in range(r):
            if self.strategy in ['mean', 'median', 'mode']:
                self.window.add_element([X[i][:]])
            for j in range(c):
                if X[i][j] in self.missing_value or np.isnan(X[i][j]):
                    X[i][j] = self._get_substitute(j)

        return X

    def _get_substitute(self, column_index):
        """ _get_substitute
        
        Computes the replacement for a missing value.
        
        Parameters
        ----------
        column_index: int
            The index from the column where the missing value was found.
            
        Returns
        -------
        int or float
            The replacement.
        
        """
        if self.strategy == 'zero':
            return 0
        elif self.strategy == 'mean':
            if not self.window.is_empty():
                return np.nanmean(
                    np.array(self.window.get_queue())[:, column_index])
            else:
                return self.new_value
        elif self.strategy == 'median':
            if not self.window.is_empty():
                return np.nanmedian(
                    np.array(self.window.get_queue())[:, column_index])
            else:
                return self.new_value
        elif self.strategy == 'mode':
            if not self.window.is_empty():
                return stats.mode(np.array(
                    self.window.get_queue())[:, column_index],
                                  nan_policy='omit')[0]
            else:
                return self.new_value
        elif self.strategy == 'custom':
            return self.new_value

    def partial_fit_transform(self, X, y=None):
        """ partial_fit_transform
        
        Partially fits the model and then apply the transform to the data.
        
        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The sample or set of samples that should be transformed.
            
        y: Array-like
            The true labels.
         
        Returns
        -------
        numpy.ndarray of shape (n_samples, n_features)
            The transformed data.
        
        """
        X = self.transform(X)

        return X

    def partial_fit(self, X, y=None):
        """ partial_fit
        
        Partial fits the model.
        
        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The sample or set of samples that should be transformed.
            
        y: Array-like
            The true labels.
        
        Returns
        -------
        MissingValuesCleaner
            self
        
        """
        X = np.asarray(X)
        if self.strategy in ['mean', 'median', 'mode']:
            self.window.add_element(X)
        return self
 def __configure(self):
     if self.strategy in ['mean', 'median', 'mode']:
         self.window = FastBuffer(max_size=self.window_size)
class WindowedMinmaxScaler(StreamTransform):
    """ Transform features by scaling each feature to a given range.
    This estimator scales and translates each feature individually such
    that it is in the given range on the training set, e.g. between zero and one.
    For the training set we consider a window of a given length.

    Parameters
    ----------
    window_size: int (Default: 200)
        Defines the window size to compute min and max values.

    Examples
    --------
    """

    def __init__(self, window_size=200):
        super().__init__()
        self.window_size = window_size
        self.window = None

        self.__configure()

    def __configure(self):
        self.window = FastBuffer(max_size=self.window_size)

    def transform(self, X):
        """ Does the transformation process in the samples in X.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The sample or set of samples that should be transformed.

        """
        r, c = get_dimensions(X)
        for i in range(r):
            row = np.copy([X[i][:]])
            for j in range(c):
                value = X[i][j]
                min_val = self._get_min(j)
                max_val = self._get_max(j)
                if((max_val-min_val)==0):
                    transformed=0
                else:
                    X_std = (value - min_val) / (max_val - min_val)
                    transformed = X_std * (max_val - min_val) + min_val
                X[i][j] = transformed
            self.window.add_element(row)
        return X

    def _get_min(self, column_index):
        min_val = 0.
        if not self.window.is_empty():
            min_val = np.nanmin(np.array(self.window.get_queue())[:, column_index])
        return min_val

    def _get_max(self, column_index):
        max_val = 1.
        if not self.window.is_empty():
            max_val = np.nanmax(np.array(self.window.get_queue())[:, column_index])
        return max_val

    def partial_fit_transform(self, X, y=None):
        """ Partially fits the model and then apply the transform to the data.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The sample or set of samples that should be transformed.

        y: numpy.ndarray (optional, default=None)
            The target values.

        Returns
        -------
        numpy.ndarray of shape (n_samples, n_features)
            The transformed data.

        """
        X = self.transform(X)

        return X

    def partial_fit(self, X, y=None):
        """ Partial fits the model.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The sample or set of samples that should be transformed.

        y: numpy.ndarray (optional, default=None)
            The target values.

        Returns
        -------
        MinmaxScaler
            self

        """
        self.window.add_element(X)
        return self
Beispiel #9
0
class EvaluationVisualizer(BaseListener):
    """ EvaluationVisualizer
    
    This class is responsible for maintaining and updating the plot modules 
    for all the evaluators in scikit-multiflow. 
    
    It uses matplotlib's pyplot modules to create the main plot, which 
    depending on the options passed to it as parameter, will create multiple 
    subplots to better display all requested metrics.
    
    The plots are updated on the go, at each n_wait samples. The plot is 
    redrawn at each step, which may cause significant slow down, depending on 
    the processor used and the plotting options.
    
    Line objects are used to describe performance measurements and scatter 
    instances will represent true labels and predictions, when requested.
    
    It supports the visualization of multiple learners per subplot as a way 
    of comparing the performance of different learning algorithms facing the 
    same data stream.
    
    Parameters
    ----------
    n_sliding: int
        The number of samples in the sliding window to track recent performance.
    
    dataset_name: string (Default: 'Unnamed graph')
        The title of the plot. Algorithmically it's not important.
    
    plots: list
        A list containing all the subplots to plot. Can be any of: 
        'accuracy', 'kappa', 'scatter', 'hamming_score', 'hamming_loss',
        'exact_match', 'j_index', 'mean_square_error', 'mean_absolute_error', 
        'true_vs_predicted', 'kappa_t', 'kappa_m'
    
    n_learners: int
        The number of learners to compare.
    
    Raises
    ------
    ValueError: A ValueError can be raised for a series of reasons. If no plots 
    are passed as parameter to the constructor a ValueError is raised. If the wrong 
    type of parameter is passed to on_new_train_step the same error is raised.
    
    Notes
    -----
    Using more than 3 plot types at a time is not recommended, as it will 
    significantly slow down processing. Also, for the same reason comparing 
    more than 3 learners at a time is not recommended.
    
    """
    def __init__(self,
                 task_type=None,
                 n_sliding=0,
                 dataset_name='Unnamed graph',
                 plots=None,
                 n_learners=1,
                 learner_name=None):
        super().__init__()

        # Default values
        self.sample_id = None
        self.scatter_x = None
        self._is_legend_set = False
        self._draw_cnt = 0

        self.text_annotations = []

        self.clusters = None
        self.X = None
        self.targets = None
        self.Flag = None

        self.true_values = None
        self.pred_values = None

        self.current_accuracy = None
        self.mean_accuracy = None

        self.mean_kappa = None
        self.current_kappa = None

        self.mean_kappa_t = None
        self.current_kappa_t = None

        self.mean_kappa_m = None
        self.current_kappa_m = None

        self.mean_hamming_score = None
        self.current_hamming_score = None

        self.mean_hamming_loss = None
        self.current_hamming_loss = None

        self.mean_exact_match = None
        self.current_exact_match = None

        self.mean_j_index = None
        self.current_j_index = None

        self.mean_mse = None
        self.current_mse = None

        self.mean_mae = None
        self.current_mae = None

        self.regression_true = None
        self.regression_pred = None

        # Configuration
        self.n_sliding = None
        self.dataset_name = None
        self.n_learners = None
        self.model_names = None
        self.num_plots = 0

        # Lines
        self.line_mean_accuracy = None
        self.line_current_accuracy = None

        self.line_mean_kappa = None
        self.line_current_kappa = None

        self.line_mean_kappa_t = None
        self.line_current_kappa_t = None

        self.line_mean_kappa_m = None
        self.line_current_kappa_m = None

        self.line_mean_hamming_score = None
        self.line_current_hamming_score = None

        self.line_mean_hamming_loss = None
        self.line_current_hamming_loss = None

        self.line_mean_exact_match = None
        self.line_current_exact_match = None

        self.line_mean_j_index = None
        self.line_current_j_index = None

        self.line_mean_mse = None
        self.line_current_mse = None

        self.line_mean_mae = None
        self.line_current_mae = None

        self.line_true = None
        self.line_pred = None

        self.line_prediction = None

        # Subplot default
        self.subplot_accuracy = None
        self.subplot_kappa = None
        self.subplot_kappa_t = None
        self.subplot_kappa_m = None
        self.subplot_scatter_points = None
        self.subplot_hamming_score = None
        self.subplot_hamming_loss = None
        self.subplot_exact_match = None
        self.subplot_j_index = None
        self.subplot_mse = None
        self.subplot_mae = None
        self.subplot_true_vs_predicted = None
        self.subplot_prediction = None

        if task_type is None or task_type == constants.UNDEFINED:
            raise ValueError('Task type for visualizer object is undefined.')
        else:
            if task_type in [
                    constants.CLASSIFICATION, constants.REGRESSION,
                    constants.MULTI_OUTPUT
            ]:
                self.task_type = task_type
            else:
                raise ValueError('Invalid task type: {}'.format(task_type))

        if learner_name is None:
            self.model_names = ['M{}'.format(i) for i in range(n_learners)]
        else:
            if isinstance(learner_name, list):
                if len(learner_name) != n_learners:
                    raise ValueError(
                        "Number of model names {} does not match the number of models {}."
                        .format(len(learner_name), n_learners))
                else:
                    self.model_names = learner_name
            else:
                raise ValueError("model_names must be a list.")

        if plots is not None:
            if len(plots) < 1:
                raise ValueError('No metrics were given.')
            else:
                self.__configure(n_sliding, dataset_name, plots, n_learners)
        else:
            raise ValueError('No metrics were given.')

    def on_new_train_step(self, train_step, metrics_dict):
        """ on_new_train_step
        
        This is the listener main function, which gives it the ability to 
        'listen' for the caller. Whenever the EvaluationVisualiser should 
        be aware of some new data, the caller will call this function, 
        passing the new data as parameter.
        
        Parameters
        ----------
        train_step: int
            The number of samples processed to this moment.
        
        metrics_dict: dictionary
            A dictionary containing metric measurements, where the key is
            the metric name and the value its corresponding measurement.
            
        Raises
        ------
        ValueError: If wrong data formats are passed as parameter this error 
        is raised.
         
        """
        try:
            self.draw(train_step, metrics_dict)
        except BaseException as exc:
            raise ValueError('Failed when trying to draw plot. ', exc)

    def on_new_scatter_data(self, X, y, prediction):
        pass

    def __configure(self, n_sliding, dataset_name, plots, n_learners):
        """ __configure
        
        This function will verify which subplots it should create. For each one 
        of those, it will initialize all relevant objects to keep track of the 
        plotting points.
        
        Basic structures needed to keep track of plot values (for each subplot) 
        are: lists of values and matplot line objects.
        
        The __configure function will also initialize each subplot with the 
        correct name and setup the axis.
        
        The subplot size will self adjust to each screen size, so that data can 
        be better viewed in different contexts.
        
        Parameters
        ----------
        n_sliding: int
            The number of samples in the sliding window to track recent performance.
    
        dataset_name: string (Default: 'Unnamed graph')
            The title of the plot. Algorithmically it's not important.
    
        plots: list
            A list containing all the subplots to plot. Can be any of: 
            'accuracy', 'kappa', 'scatter', 'hamming_score', 'hamming_loss',
            'exact_match', 'j_index', 'mean_square_error', 'mean_absolute_error', 
            'true_vs_predicted', 'kappa_t', 'kappa_m'
        
        n_learners: int
            The number of learners to compare.
         
        """
        data_points = False
        font_size_small = 8
        font_size_medium = 10
        font_size_large = 12

        plt.rc('font', size=font_size_small)  # controls default text sizes
        plt.rc('axes',
               titlesize=font_size_medium)  # font size of the axes title
        plt.rc('axes',
               labelsize=font_size_small)  # font size of the x and y labels
        plt.rc('xtick',
               labelsize=font_size_small)  # font size of the tick labels
        plt.rc('ytick',
               labelsize=font_size_small)  # font size of the tick labels
        plt.rc('legend', fontsize=font_size_small)  # legend font size
        plt.rc('figure',
               titlesize=font_size_large)  # font size of the figure title

        warnings.filterwarnings("ignore", ".*GUI is implemented.*")
        warnings.filterwarnings("ignore", ".*left==right.*")
        warnings.filterwarnings("ignore", ".*Passing 1d.*")

        self.n_sliding = n_sliding
        self.dataset_name = dataset_name
        self.plots = plots
        self.n_learners = n_learners
        self.sample_id = []

        plt.ion()
        self.fig = plt.figure(figsize=(9, 5))
        self.fig.suptitle(dataset_name)
        self.num_plots = len(self.plots)
        base = 11 + self.num_plots * 100  # 3-digit integer describing the position of the subplot.
        self.fig.canvas.set_window_title('scikit-multiflow')

        if constants.ACCURACY in self.plots:
            self.current_accuracy = [[] for _ in range(self.n_learners)]
            self.mean_accuracy = [[] for _ in range(self.n_learners)]

            self.subplot_accuracy = self.fig.add_subplot(base)
            self.subplot_accuracy.set_title('Accuracy')
            self.subplot_accuracy.set_ylabel('Accuracy')
            base += 1

            self.line_current_accuracy = [None for _ in range(self.n_learners)]
            self.line_mean_accuracy = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_accuracy[i], = self.subplot_accuracy.plot(
                    self.sample_id,
                    self.current_accuracy[i],
                    label='{}  (sliding {} samples)'.format(
                        self.model_names[i], self.n_sliding))
                self.line_mean_accuracy[i], = self.subplot_accuracy.plot(
                    self.sample_id,
                    self.mean_accuracy[i],
                    label='{} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_accuracy[i])
                handle.append(self.line_mean_accuracy[i])

            self._set_fig_legend(handle)
            self.subplot_accuracy.set_ylim(0, 1)

        if constants.KAPPA in self.plots:
            self.current_kappa = [[] for _ in range(self.n_learners)]
            self.mean_kappa = [[] for _ in range(self.n_learners)]

            self.subplot_kappa = self.fig.add_subplot(base)
            self.subplot_kappa.set_title('Kappa')
            self.subplot_kappa.set_ylabel('Kappa')
            base += 1

            self.line_current_kappa = [None for _ in range(self.n_learners)]
            self.line_mean_kappa = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_kappa[i], = self.subplot_kappa.plot(
                    self.sample_id,
                    self.current_kappa[i],
                    label='Model {}  (sliding {} samples)'.format(
                        self.model_names[i], self.n_sliding))
                self.line_mean_kappa[i], = self.subplot_kappa.plot(
                    self.sample_id,
                    self.mean_kappa[i],
                    label='Model {} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_kappa[i])
                handle.append(self.line_mean_kappa[i])

            self._set_fig_legend(handle)
            self.subplot_kappa.set_ylim(-1, 1)

        if constants.KAPPA_T in self.plots:
            self.current_kappa_t = [[] for _ in range(self.n_learners)]
            self.mean_kappa_t = [[] for _ in range(self.n_learners)]

            self.subplot_kappa_t = self.fig.add_subplot(base)
            self.subplot_kappa_t.set_title('Kappa T')
            self.subplot_kappa_t.set_ylabel('Kappa T')
            base += 1

            self.line_current_kappa_t = [None for _ in range(self.n_learners)]
            self.line_mean_kappa_t = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_kappa_t[i], = self.subplot_kappa_t.plot(
                    self.sample_id,
                    self.current_kappa_t[i],
                    label='Model {}  (sliding {} samples)'.format(
                        self.model_names[i], self.n_sliding))
                self.line_mean_kappa_t[i], = self.subplot_kappa_t.plot(
                    self.sample_id,
                    self.mean_kappa_t[i],
                    label='Model {} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_kappa_t[i])
                handle.append(self.line_mean_kappa_t[i])

            self._set_fig_legend(handle)
            self.subplot_kappa_t.set_ylim(-1, 1)

        if constants.KAPPA_M in self.plots:
            self.current_kappa_m = [[] for _ in range(self.n_learners)]
            self.mean_kappa_m = [[] for _ in range(self.n_learners)]

            self.subplot_kappa_m = self.fig.add_subplot(base)
            self.subplot_kappa_m.set_title('Kappa M')
            self.subplot_kappa_m.set_ylabel('Kappa M')
            base += 1

            self.line_current_kappa_m = [None for _ in range(self.n_learners)]
            self.line_mean_kappa_m = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_kappa_m[i], = self.subplot_kappa_m.plot(
                    self.sample_id,
                    self.current_kappa_m[i],
                    label='Model {}  (sliding {} samples)'.format(
                        self.model_names[i], self.n_sliding))
                self.line_mean_kappa_m[i], = self.subplot_kappa_m.plot(
                    self.sample_id,
                    self.mean_kappa_m[i],
                    label='Model {} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_kappa_m[i])
                handle.append(self.line_mean_kappa_m[i])

            self._set_fig_legend(handle)
            self.subplot_kappa_m.set_ylim(-1, 1)

        if constants.HAMMING_SCORE in self.plots:
            self.mean_hamming_score = [[] for _ in range(self.n_learners)]
            self.current_hamming_score = [[] for _ in range(self.n_learners)]

            self.subplot_hamming_score = self.fig.add_subplot(base)
            self.subplot_hamming_score.set_title('Hamming score')
            self.subplot_hamming_score.set_ylabel('Hamming score')
            base += 1

            self.line_current_hamming_score = [
                None for _ in range(self.n_learners)
            ]
            self.line_mean_hamming_score = [
                None for _ in range(self.n_learners)
            ]
            handle = []

            for i in range(self.n_learners):
                self.line_current_hamming_score[
                    i], = self.subplot_hamming_score.plot(
                        self.sample_id,
                        self.current_hamming_score[i],
                        label='Model {}  (sliding {} samples)'.format(
                            self.model_names[i], self.n_sliding))
                self.line_mean_hamming_score[
                    i], = self.subplot_hamming_score.plot(
                        self.sample_id,
                        self.mean_hamming_score[i],
                        label='Model {} (global)'.format(self.model_names[i]),
                        linestyle='dotted')
                handle.append(self.line_current_hamming_score[i])
                handle.append(self.line_mean_hamming_score[i])

            self._set_fig_legend(handle)
            self.subplot_hamming_score.set_ylim(0, 1)

        if constants.HAMMING_LOSS in self.plots:
            self.mean_hamming_loss = [[] for _ in range(self.n_learners)]
            self.current_hamming_loss = [[] for _ in range(self.n_learners)]

            self.subplot_hamming_loss = self.fig.add_subplot(base)
            self.subplot_hamming_loss.set_title('Hamming loss')
            self.subplot_hamming_loss.set_ylabel('Hamming loss')
            base += 1

            self.line_current_hamming_loss = [
                None for _ in range(self.n_learners)
            ]
            self.line_mean_hamming_loss = [
                None for _ in range(self.n_learners)
            ]
            handle = []

            for i in range(self.n_learners):
                self.line_current_hamming_loss[
                    i], = self.subplot_hamming_loss.plot(
                        self.sample_id,
                        self.current_hamming_loss[i],
                        label='Model {}  (sliding {} samples)'.format(
                            self.model_names[i], self.n_sliding))
                self.line_mean_hamming_loss[
                    i], = self.subplot_hamming_loss.plot(
                        self.sample_id,
                        self.mean_hamming_loss[i],
                        label='Model {} (global)'.format(self.model_names[i]),
                        linestyle='dotted')
                handle.append(self.line_current_hamming_loss[i])
                handle.append(self.line_mean_hamming_loss[i])

            self._set_fig_legend(handle)
            self.subplot_hamming_loss.set_ylim(0, 1)

        if constants.EXACT_MATCH in self.plots:
            self.mean_exact_match = [[] for _ in range(self.n_learners)]
            self.current_exact_match = [[] for _ in range(self.n_learners)]

            self.subplot_exact_match = self.fig.add_subplot(base)
            self.subplot_exact_match.set_title('Exact matches')
            self.subplot_exact_match.set_ylabel('Exact matches')
            base += 1

            self.line_current_exact_match = [
                None for _ in range(self.n_learners)
            ]
            self.line_mean_exact_match = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_exact_match[
                    i], = self.subplot_exact_match.plot(
                        self.sample_id,
                        self.current_exact_match[i],
                        label='Model {}  (sliding {} samples)'.format(
                            self.model_names[i], self.n_sliding))
                self.line_mean_exact_match[i], = self.subplot_exact_match.plot(
                    self.sample_id,
                    self.mean_exact_match[i],
                    label='Model {} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_exact_match[i])
                handle.append(self.line_mean_exact_match[i])

            self._set_fig_legend(handle)
            self.subplot_exact_match.set_ylim(0, 1)

        if constants.J_INDEX in self.plots:
            self.mean_j_index = [[] for _ in range(self.n_learners)]
            self.current_j_index = [[] for _ in range(self.n_learners)]

            self.subplot_j_index = self.fig.add_subplot(base)
            self.subplot_j_index.set_title('Jaccard index')
            self.subplot_j_index.set_ylabel('Jaccard index')
            base += 1

            self.line_current_j_index = [None for _ in range(self.n_learners)]
            self.line_mean_j_index = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_j_index[i], = self.subplot_j_index.plot(
                    self.sample_id,
                    self.current_j_index[i],
                    label='Model {}  (sliding {} samples)'.format(
                        self.model_names[i], self.n_sliding))
                self.line_mean_j_index[i], = self.subplot_j_index.plot(
                    self.sample_id,
                    self.mean_j_index[i],
                    label='Model {} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_j_index[i])
                handle.append(self.line_mean_j_index[i])

            self._set_fig_legend(handle)
            self.subplot_j_index.set_ylim(0, 1)

        if constants.MSE in self.plots:
            self.mean_mse = [[] for _ in range(self.n_learners)]
            self.current_mse = [[] for _ in range(self.n_learners)]

            self.subplot_mse = self.fig.add_subplot(base)
            self.subplot_mse.set_title('Mean Squared Error')
            self.subplot_mse.set_ylabel('MSE')
            base += 1

            self.line_current_mse = [None for _ in range(self.n_learners)]
            self.line_mean_mse = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_mse[i], = self.subplot_mse.plot(
                    self.sample_id,
                    self.current_mse[i],
                    label='Model {}  (sliding {} samples)'.format(
                        self.model_names[i], self.n_sliding))
                self.line_mean_mse[i], = self.subplot_mse.plot(
                    self.sample_id,
                    self.mean_mse[i],
                    label='Model {} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_mse[i])
                handle.append(self.line_mean_mse[i])

            self._set_fig_legend(handle)
            self.subplot_mse.set_ylim(0, 1)

        if constants.MAE in self.plots:
            self.mean_mae = [[] for _ in range(self.n_learners)]
            self.current_mae = [[] for _ in range(self.n_learners)]

            self.subplot_mae = self.fig.add_subplot(base)
            self.subplot_mae.set_title('Mean Absolute Error')
            self.subplot_mae.set_ylabel('MAE')
            base += 1

            self.line_current_mae = [None for _ in range(self.n_learners)]
            self.line_mean_mae = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_mae[i], = self.subplot_mae.plot(
                    self.sample_id,
                    self.current_mae[i],
                    label='Model {}  (sliding {} samples)'.format(
                        self.model_names[i], self.n_sliding))
                self.line_mean_mae[i], = self.subplot_mae.plot(
                    self.sample_id,
                    self.mean_mae[i],
                    label='Model {} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_mae[i])
                handle.append(self.line_mean_mae[i])

            self._set_fig_legend(handle)
            self.subplot_mae.set_ylim(0, 1)

        if constants.TRUE_VS_PREDICTED in self.plots:
            self.true_values = []
            self.pred_values = [[] for _ in range(self.n_learners)]

            self.subplot_true_vs_predicted = self.fig.add_subplot(base)
            self.subplot_true_vs_predicted.set_title('True vs Predicted')
            self.subplot_true_vs_predicted.set_ylabel('y')
            self.subplot_true_vs_predicted.set_prop_cycle(
                cycler('color', ['c', 'm', 'y', 'k']))
            base += 1

            if self.task_type == constants.CLASSIFICATION:
                self.line_true, = self.subplot_true_vs_predicted.step(
                    self.sample_id, self.true_values, label='True value')
            else:
                self.line_true, = self.subplot_true_vs_predicted.plot(
                    self.sample_id, self.true_values, label='True value')
            handle = [self.line_true]

            self.line_pred = [None for _ in range(self.n_learners)]

            for i in range(self.n_learners):
                if self.task_type == constants.CLASSIFICATION:
                    self.line_pred[i], = self.subplot_true_vs_predicted.step(
                        self.sample_id,
                        self.pred_values[i],
                        label='Model {} (global)'.format(self.model_names[i]),
                        linestyle='dotted')
                else:
                    self.line_pred[i], = self.subplot_true_vs_predicted.plot(
                        self.sample_id,
                        self.pred_values[i],
                        label='Model {} (global)'.format(self.model_names[i]),
                        linestyle='dotted')
                handle.append(self.line_pred[i])

            self.subplot_true_vs_predicted.legend(handles=handle)
            self.subplot_true_vs_predicted.set_ylim(0, 1)

        if constants.DATA_POINTS in self.plots:

            data_points = True
            self.Flag = True
            self.X = FastBuffer(5000)
            self.targets = []
            self.prediction = []
            self.clusters = []
            self.subplot_scatter_points = self.fig.add_subplot(base)
            base += 1

        if data_points:
            plt.xlabel('X1')
        else:
            plt.xlabel('Samples')

        self.fig.subplots_adjust(hspace=.5)
        self.fig.tight_layout(rect=[0, .04, 1, 0.98],
                              pad=2.6,
                              w_pad=0.5,
                              h_pad=1.0)

    def _set_fig_legend(self, handles=None):
        if not self._is_legend_set:
            self.fig.legend(handles=handles,
                            ncol=self.n_learners,
                            bbox_to_anchor=(0.02, 0.0),
                            loc="lower left")
            self._is_legend_set = True

    def draw(self, train_step, metrics_dict):
        """ draw
        
        Updates and redraws the plot.
        
        Parameters
        ----------
        train_step: int
            The number of samples processed to this moment.
        
        metrics_dict: dictionary
            A dictionary containing tuples, where the first element is the 
            string that identifies one of the plot's subplot names, and the 
            second element is its numerical value.
             
        """

        self.sample_id.append(train_step)

        self._clear_annotations()

        if constants.ACCURACY in self.plots:
            for i in range(self.n_learners):
                self.mean_accuracy[i].append(
                    metrics_dict[constants.ACCURACY][i][0])
                self.current_accuracy[i].append(
                    metrics_dict[constants.ACCURACY][i][1])
                self.line_mean_accuracy[i].set_data(self.sample_id,
                                                    self.mean_accuracy[i])
                self.line_current_accuracy[i].set_data(
                    self.sample_id, self.current_accuracy[i])

                self._update_annotations(i, self.subplot_accuracy,
                                         self.model_names[i],
                                         self.mean_accuracy[i][-1],
                                         self.current_accuracy[i][-1])

            self.subplot_accuracy.set_xlim(0, self.sample_id[-1])
            self.subplot_accuracy.set_ylim(0, 1)

        if constants.KAPPA in self.plots:
            for i in range(self.n_learners):
                self.mean_kappa[i].append(metrics_dict[constants.KAPPA][i][0])
                self.current_kappa[i].append(
                    metrics_dict[constants.KAPPA][i][1])
                self.line_mean_kappa[i].set_data(self.sample_id,
                                                 self.mean_kappa[i])
                self.line_current_kappa[i].set_data(self.sample_id,
                                                    self.current_kappa[i])

                self._update_annotations(i, self.subplot_kappa,
                                         self.model_names[i],
                                         self.mean_kappa[i][-1],
                                         self.current_kappa[i][-1])

            self.subplot_kappa.set_xlim(0, self.sample_id[-1])
            self.subplot_kappa.set_ylim(0, 1)

        if constants.KAPPA_T in self.plots:
            minimum = -1.
            for i in range(self.n_learners):
                self.mean_kappa_t[i].append(
                    metrics_dict[constants.KAPPA_T][i][0])
                self.current_kappa_t[i].append(
                    metrics_dict[constants.KAPPA_T][i][1])
                self.line_mean_kappa_t[i].set_data(self.sample_id,
                                                   self.mean_kappa_t[i])
                self.line_current_kappa_t[i].set_data(self.sample_id,
                                                      self.current_kappa_t[i])

                self._update_annotations(i, self.subplot_kappa_t,
                                         self.model_names[i],
                                         self.mean_kappa_t[i][-1],
                                         self.current_kappa_t[i][-1])

                minimum = min(min(minimum, min(self.mean_kappa_t[i])),
                              min(minimum, min(self.current_kappa_t[i])))

            self.subplot_kappa_t.set_xlim(0, self.sample_id[-1])
            self.subplot_kappa_t.set_ylim([minimum, 1.])

        if constants.KAPPA_M in self.plots:
            minimum = -1.
            for i in range(self.n_learners):
                self.mean_kappa_m[i].append(
                    metrics_dict[constants.KAPPA_M][i][0])
                self.current_kappa_m[i].append(
                    metrics_dict[constants.KAPPA_M][i][1])
                self.line_mean_kappa_m[i].set_data(self.sample_id,
                                                   self.mean_kappa_m[i])
                self.line_current_kappa_m[i].set_data(self.sample_id,
                                                      self.current_kappa_m[i])

                self._update_annotations(i, self.subplot_kappa_m,
                                         self.model_names[i],
                                         self.mean_kappa_m[i][-1],
                                         self.current_kappa_m[i][-1])

                minimum = min(min(minimum, min(self.mean_kappa_m[i])),
                              min(minimum, min(self.current_kappa_m[i])))

            self.subplot_kappa_m.set_xlim(0, self.sample_id[-1])
            self.subplot_kappa_m.set_ylim(minimum, 1.)

        if constants.HAMMING_SCORE in self.plots:
            for i in range(self.n_learners):
                self.mean_hamming_score[i].append(
                    metrics_dict[constants.HAMMING_SCORE][i][0])
                self.current_hamming_score[i].append(
                    metrics_dict[constants.HAMMING_SCORE][i][1])
                self.line_mean_hamming_score[i].set_data(
                    self.sample_id, self.mean_hamming_score[i])
                self.line_current_hamming_score[i].set_data(
                    self.sample_id, self.current_hamming_score[i])

                self._update_annotations(i, self.subplot_hamming_score,
                                         self.model_names[i],
                                         self.mean_hamming_score[i][-1],
                                         self.current_hamming_score[i][-1])

            self.subplot_hamming_score.set_xlim(0, self.sample_id[-1])
            self.subplot_hamming_score.set_ylim(0, 1)

        if constants.HAMMING_LOSS in self.plots:
            for i in range(self.n_learners):
                self.mean_hamming_loss[i].append(
                    metrics_dict[constants.HAMMING_LOSS][i][0])
                self.current_hamming_loss[i].append(
                    metrics_dict[constants.HAMMING_LOSS][i][1])
                self.line_mean_hamming_loss[i].set_data(
                    self.sample_id, self.mean_hamming_loss[i])
                self.line_current_hamming_loss[i].set_data(
                    self.sample_id, self.current_hamming_loss[i])

                self._update_annotations(i, self.subplot_hamming_loss,
                                         self.model_names[i],
                                         self.mean_hamming_loss[i][-1],
                                         self.current_hamming_loss[i][-1])

            self.subplot_hamming_loss.set_xlim(0, self.sample_id[-1])
            self.subplot_hamming_loss.set_ylim(0, 1)

        if constants.EXACT_MATCH in self.plots:
            for i in range(self.n_learners):
                self.mean_exact_match[i].append(
                    metrics_dict[constants.EXACT_MATCH][i][0])
                self.current_exact_match[i].append(
                    metrics_dict[constants.EXACT_MATCH][i][1])
                self.line_mean_exact_match[i].set_data(
                    self.sample_id, self.mean_exact_match[i])
                self.line_current_exact_match[i].set_data(
                    self.sample_id, self.current_exact_match[i])

                self._update_annotations(i, self.subplot_exact_match,
                                         self.model_names[i],
                                         self.mean_exact_match[i][-1],
                                         self.current_exact_match[i][-1])

            self.subplot_exact_match.set_xlim(0, self.sample_id[-1])
            self.subplot_exact_match.set_ylim(0, 1)

        if constants.J_INDEX in self.plots:
            for i in range(self.n_learners):
                self.mean_j_index[i].append(
                    metrics_dict[constants.J_INDEX][i][0])
                self.current_j_index[i].append(
                    metrics_dict[constants.J_INDEX][i][1])
                self.line_mean_j_index[i].set_data(self.sample_id,
                                                   self.mean_j_index[i])
                self.line_current_j_index[i].set_data(self.sample_id,
                                                      self.current_j_index[i])

                self._update_annotations(i, self.subplot_j_index,
                                         self.model_names[i],
                                         self.mean_j_index[i][-1],
                                         self.current_j_index[i][-1])

            self.subplot_j_index.set_xlim(0, self.sample_id[-1])
            self.subplot_j_index.set_ylim(0, 1)

        if constants.MSE in self.plots:
            minimum = -1
            maximum = 0
            for i in range(self.n_learners):
                self.mean_mse[i].append(metrics_dict[constants.MSE][i][0])
                self.current_mse[i].append(metrics_dict[constants.MSE][i][1])
                self.line_mean_mse[i].set_data(self.sample_id,
                                               self.mean_mse[i])
                self.line_current_mse[i].set_data(self.sample_id,
                                                  self.current_mse[i])

                self._update_annotations(i, self.subplot_mse,
                                         self.model_names[i],
                                         self.mean_mse[i][-1],
                                         self.current_mse[i][-1])

                # minimum = min([min(self.mean_mse[i]), min(self.current_mse[i]), minimum])
                maximum = max(
                    [max(self.mean_mse[i]),
                     max(self.current_mse[i]), maximum])

            self.subplot_mse.set_xlim(0, self.sample_id[-1])
            self.subplot_mse.set_ylim(minimum, 1.2 * maximum)

        if constants.MAE in self.plots:
            minimum = -1
            maximum = 0
            for i in range(self.n_learners):
                self.mean_mae[i].append(metrics_dict[constants.MAE][i][0])
                self.current_mae[i].append(metrics_dict[constants.MAE][i][1])
                self.line_mean_mae[i].set_data(self.sample_id,
                                               self.mean_mae[i])
                self.line_current_mae[i].set_data(self.sample_id,
                                                  self.current_mae[i])

                self._update_annotations(i, self.subplot_mae,
                                         self.model_names[i],
                                         self.mean_mae[i][-1],
                                         self.current_mae[i][-1])

                # minimum = min([min(self.mean_mae[i]), min(self.current_mae[i]), minimum])
                maximum = max(
                    [max(self.mean_mae[i]),
                     max(self.current_mae[i]), maximum])

            self.subplot_mae.set_xlim(0, self.sample_id[-1])
            self.subplot_mae.set_ylim(minimum, 1.2 * maximum)

        if constants.TRUE_VS_PREDICTED in self.plots:
            self.true_values.append(
                metrics_dict[constants.TRUE_VS_PREDICTED][0][0])
            self.line_true.set_data(self.sample_id, self.true_values)
            minimum = 0
            maximum = 0
            for i in range(self.n_learners):
                self.pred_values[i].append(
                    metrics_dict[constants.TRUE_VS_PREDICTED][i][1])
                self.line_pred[i].set_data(self.sample_id, self.pred_values[i])
                minimum = min(
                    [min(self.pred_values[i]),
                     min(self.true_values), minimum])
                maximum = max(
                    [max(self.pred_values[i]),
                     max(self.true_values), maximum])

            self.subplot_true_vs_predicted.set_xlim(0, self.sample_id[-1])
            self.subplot_true_vs_predicted.set_ylim(minimum - 1, maximum + 1)

            self.subplot_true_vs_predicted.legend(loc=2,
                                                  bbox_to_anchor=(1.01, 1.))

        if constants.DATA_POINTS in self.plots:
            self.X.add_element(metrics_dict[constants.DATA_POINTS][0][0])

            self.targets = metrics_dict[constants.DATA_POINTS][0][1]
            if self.n_learners > 1:
                raise ValueError(
                    "you can not compare classifiers in this type of plot.")
            else:

                self.prediction.append(
                    metrics_dict[constants.DATA_POINTS][0][2])
                if self.Flag is True:
                    for j in range(len(self.targets)):
                        self.clusters.append(FastBuffer(100))
                self.Flag = False

                self.subplot_scatter_points.clear()

                self.subplot_scatter_points.set_ylabel('X2')
                self.subplot_scatter_points.set_xlabel('X1')
                X1 = self.X.get_queue()[-1][0]
                X2 = self.X.get_queue()[-1][1]

                for k, cluster in enumerate(self.clusters):
                    if self.prediction[-1] == k:
                        self.clusters[k].add_element([(X1, X2)])
                    if cluster.get_queue():
                        temp = cluster.get_queue()
                        self.subplot_scatter_points.scatter(
                            *zip(*temp), label="class {k}".format(k=k))
                        self.subplot_scatter_points.legend(loc="best")

        if self._draw_cnt == 4:  # Refresh rate to mitigate re-drawing overhead for small changes
            plt.subplots_adjust(
                right=0.72)  # Adjust subplots to include metrics
            self.fig.canvas.draw()
            plt.pause(1e-9)
            self._draw_cnt = 0
        else:
            self._draw_cnt += 1

    def _clear_annotations(self):
        """ Clear annotations, so next frame is correctly rendered. """
        for i in range(len(self.text_annotations)):
            self.text_annotations[i].remove()
        self.text_annotations = []

    def _update_annotations(self, idx, subplot, model_name, global_value,
                            partial_value):
        xy_pos_default = (1.02, .90
                          )  # Default xy position for metric annotations
        shift_y = 10 * (idx + 1)  # y axis shift for plot annotations
        xy_pos = xy_pos_default
        if idx == 0:
            self.text_annotations.append(
                subplot.annotate('{: <12} | {: ^16} | {: ^16}'.format(
                    'Model', 'Mean', 'Current'),
                                 xy=xy_pos,
                                 xycoords='axes fraction'))
        self.text_annotations.append(
            subplot.annotate('{: <10.10s}'.format(model_name[:6]),
                             xy=xy_pos,
                             xycoords='axes fraction',
                             xytext=(0, -shift_y),
                             textcoords='offset points'))
        self.text_annotations.append(
            subplot.annotate('{: ^16.4f}  {: ^16.4f}'.format(
                global_value, partial_value),
                             xy=xy_pos,
                             xycoords='axes fraction',
                             xytext=(50, -shift_y),
                             textcoords='offset points'))

    def draw_scatter_points(self, X, y, predict):
        pass

    @staticmethod
    def hold():
        plt.show(block=True)

    def get_info(self):
        pass
Beispiel #10
0
    def draw(self, train_step, metrics_dict):
        """ draw
        
        Updates and redraws the plot.
        
        Parameters
        ----------
        train_step: int
            The number of samples processed to this moment.
        
        metrics_dict: dictionary
            A dictionary containing tuples, where the first element is the 
            string that identifies one of the plot's subplot names, and the 
            second element is its numerical value.
             
        """

        self.sample_id.append(train_step)

        self._clear_annotations()

        if constants.ACCURACY in self.plots:
            for i in range(self.n_learners):
                self.mean_accuracy[i].append(
                    metrics_dict[constants.ACCURACY][i][0])
                self.current_accuracy[i].append(
                    metrics_dict[constants.ACCURACY][i][1])
                self.line_mean_accuracy[i].set_data(self.sample_id,
                                                    self.mean_accuracy[i])
                self.line_current_accuracy[i].set_data(
                    self.sample_id, self.current_accuracy[i])

                self._update_annotations(i, self.subplot_accuracy,
                                         self.model_names[i],
                                         self.mean_accuracy[i][-1],
                                         self.current_accuracy[i][-1])

            self.subplot_accuracy.set_xlim(0, self.sample_id[-1])
            self.subplot_accuracy.set_ylim(0, 1)

        if constants.KAPPA in self.plots:
            for i in range(self.n_learners):
                self.mean_kappa[i].append(metrics_dict[constants.KAPPA][i][0])
                self.current_kappa[i].append(
                    metrics_dict[constants.KAPPA][i][1])
                self.line_mean_kappa[i].set_data(self.sample_id,
                                                 self.mean_kappa[i])
                self.line_current_kappa[i].set_data(self.sample_id,
                                                    self.current_kappa[i])

                self._update_annotations(i, self.subplot_kappa,
                                         self.model_names[i],
                                         self.mean_kappa[i][-1],
                                         self.current_kappa[i][-1])

            self.subplot_kappa.set_xlim(0, self.sample_id[-1])
            self.subplot_kappa.set_ylim(0, 1)

        if constants.KAPPA_T in self.plots:
            minimum = -1.
            for i in range(self.n_learners):
                self.mean_kappa_t[i].append(
                    metrics_dict[constants.KAPPA_T][i][0])
                self.current_kappa_t[i].append(
                    metrics_dict[constants.KAPPA_T][i][1])
                self.line_mean_kappa_t[i].set_data(self.sample_id,
                                                   self.mean_kappa_t[i])
                self.line_current_kappa_t[i].set_data(self.sample_id,
                                                      self.current_kappa_t[i])

                self._update_annotations(i, self.subplot_kappa_t,
                                         self.model_names[i],
                                         self.mean_kappa_t[i][-1],
                                         self.current_kappa_t[i][-1])

                minimum = min(min(minimum, min(self.mean_kappa_t[i])),
                              min(minimum, min(self.current_kappa_t[i])))

            self.subplot_kappa_t.set_xlim(0, self.sample_id[-1])
            self.subplot_kappa_t.set_ylim([minimum, 1.])

        if constants.KAPPA_M in self.plots:
            minimum = -1.
            for i in range(self.n_learners):
                self.mean_kappa_m[i].append(
                    metrics_dict[constants.KAPPA_M][i][0])
                self.current_kappa_m[i].append(
                    metrics_dict[constants.KAPPA_M][i][1])
                self.line_mean_kappa_m[i].set_data(self.sample_id,
                                                   self.mean_kappa_m[i])
                self.line_current_kappa_m[i].set_data(self.sample_id,
                                                      self.current_kappa_m[i])

                self._update_annotations(i, self.subplot_kappa_m,
                                         self.model_names[i],
                                         self.mean_kappa_m[i][-1],
                                         self.current_kappa_m[i][-1])

                minimum = min(min(minimum, min(self.mean_kappa_m[i])),
                              min(minimum, min(self.current_kappa_m[i])))

            self.subplot_kappa_m.set_xlim(0, self.sample_id[-1])
            self.subplot_kappa_m.set_ylim(minimum, 1.)

        if constants.HAMMING_SCORE in self.plots:
            for i in range(self.n_learners):
                self.mean_hamming_score[i].append(
                    metrics_dict[constants.HAMMING_SCORE][i][0])
                self.current_hamming_score[i].append(
                    metrics_dict[constants.HAMMING_SCORE][i][1])
                self.line_mean_hamming_score[i].set_data(
                    self.sample_id, self.mean_hamming_score[i])
                self.line_current_hamming_score[i].set_data(
                    self.sample_id, self.current_hamming_score[i])

                self._update_annotations(i, self.subplot_hamming_score,
                                         self.model_names[i],
                                         self.mean_hamming_score[i][-1],
                                         self.current_hamming_score[i][-1])

            self.subplot_hamming_score.set_xlim(0, self.sample_id[-1])
            self.subplot_hamming_score.set_ylim(0, 1)

        if constants.HAMMING_LOSS in self.plots:
            for i in range(self.n_learners):
                self.mean_hamming_loss[i].append(
                    metrics_dict[constants.HAMMING_LOSS][i][0])
                self.current_hamming_loss[i].append(
                    metrics_dict[constants.HAMMING_LOSS][i][1])
                self.line_mean_hamming_loss[i].set_data(
                    self.sample_id, self.mean_hamming_loss[i])
                self.line_current_hamming_loss[i].set_data(
                    self.sample_id, self.current_hamming_loss[i])

                self._update_annotations(i, self.subplot_hamming_loss,
                                         self.model_names[i],
                                         self.mean_hamming_loss[i][-1],
                                         self.current_hamming_loss[i][-1])

            self.subplot_hamming_loss.set_xlim(0, self.sample_id[-1])
            self.subplot_hamming_loss.set_ylim(0, 1)

        if constants.EXACT_MATCH in self.plots:
            for i in range(self.n_learners):
                self.mean_exact_match[i].append(
                    metrics_dict[constants.EXACT_MATCH][i][0])
                self.current_exact_match[i].append(
                    metrics_dict[constants.EXACT_MATCH][i][1])
                self.line_mean_exact_match[i].set_data(
                    self.sample_id, self.mean_exact_match[i])
                self.line_current_exact_match[i].set_data(
                    self.sample_id, self.current_exact_match[i])

                self._update_annotations(i, self.subplot_exact_match,
                                         self.model_names[i],
                                         self.mean_exact_match[i][-1],
                                         self.current_exact_match[i][-1])

            self.subplot_exact_match.set_xlim(0, self.sample_id[-1])
            self.subplot_exact_match.set_ylim(0, 1)

        if constants.J_INDEX in self.plots:
            for i in range(self.n_learners):
                self.mean_j_index[i].append(
                    metrics_dict[constants.J_INDEX][i][0])
                self.current_j_index[i].append(
                    metrics_dict[constants.J_INDEX][i][1])
                self.line_mean_j_index[i].set_data(self.sample_id,
                                                   self.mean_j_index[i])
                self.line_current_j_index[i].set_data(self.sample_id,
                                                      self.current_j_index[i])

                self._update_annotations(i, self.subplot_j_index,
                                         self.model_names[i],
                                         self.mean_j_index[i][-1],
                                         self.current_j_index[i][-1])

            self.subplot_j_index.set_xlim(0, self.sample_id[-1])
            self.subplot_j_index.set_ylim(0, 1)

        if constants.MSE in self.plots:
            minimum = -1
            maximum = 0
            for i in range(self.n_learners):
                self.mean_mse[i].append(metrics_dict[constants.MSE][i][0])
                self.current_mse[i].append(metrics_dict[constants.MSE][i][1])
                self.line_mean_mse[i].set_data(self.sample_id,
                                               self.mean_mse[i])
                self.line_current_mse[i].set_data(self.sample_id,
                                                  self.current_mse[i])

                self._update_annotations(i, self.subplot_mse,
                                         self.model_names[i],
                                         self.mean_mse[i][-1],
                                         self.current_mse[i][-1])

                # minimum = min([min(self.mean_mse[i]), min(self.current_mse[i]), minimum])
                maximum = max(
                    [max(self.mean_mse[i]),
                     max(self.current_mse[i]), maximum])

            self.subplot_mse.set_xlim(0, self.sample_id[-1])
            self.subplot_mse.set_ylim(minimum, 1.2 * maximum)

        if constants.MAE in self.plots:
            minimum = -1
            maximum = 0
            for i in range(self.n_learners):
                self.mean_mae[i].append(metrics_dict[constants.MAE][i][0])
                self.current_mae[i].append(metrics_dict[constants.MAE][i][1])
                self.line_mean_mae[i].set_data(self.sample_id,
                                               self.mean_mae[i])
                self.line_current_mae[i].set_data(self.sample_id,
                                                  self.current_mae[i])

                self._update_annotations(i, self.subplot_mae,
                                         self.model_names[i],
                                         self.mean_mae[i][-1],
                                         self.current_mae[i][-1])

                # minimum = min([min(self.mean_mae[i]), min(self.current_mae[i]), minimum])
                maximum = max(
                    [max(self.mean_mae[i]),
                     max(self.current_mae[i]), maximum])

            self.subplot_mae.set_xlim(0, self.sample_id[-1])
            self.subplot_mae.set_ylim(minimum, 1.2 * maximum)

        if constants.TRUE_VS_PREDICTED in self.plots:
            self.true_values.append(
                metrics_dict[constants.TRUE_VS_PREDICTED][0][0])
            self.line_true.set_data(self.sample_id, self.true_values)
            minimum = 0
            maximum = 0
            for i in range(self.n_learners):
                self.pred_values[i].append(
                    metrics_dict[constants.TRUE_VS_PREDICTED][i][1])
                self.line_pred[i].set_data(self.sample_id, self.pred_values[i])
                minimum = min(
                    [min(self.pred_values[i]),
                     min(self.true_values), minimum])
                maximum = max(
                    [max(self.pred_values[i]),
                     max(self.true_values), maximum])

            self.subplot_true_vs_predicted.set_xlim(0, self.sample_id[-1])
            self.subplot_true_vs_predicted.set_ylim(minimum - 1, maximum + 1)

            self.subplot_true_vs_predicted.legend(loc=2,
                                                  bbox_to_anchor=(1.01, 1.))

        if constants.DATA_POINTS in self.plots:
            self.X.add_element(metrics_dict[constants.DATA_POINTS][0][0])

            self.targets = metrics_dict[constants.DATA_POINTS][0][1]
            if self.n_learners > 1:
                raise ValueError(
                    "you can not compare classifiers in this type of plot.")
            else:

                self.prediction.append(
                    metrics_dict[constants.DATA_POINTS][0][2])
                if self.Flag is True:
                    for j in range(len(self.targets)):
                        self.clusters.append(FastBuffer(100))
                self.Flag = False

                self.subplot_scatter_points.clear()

                self.subplot_scatter_points.set_ylabel('X2')
                self.subplot_scatter_points.set_xlabel('X1')
                X1 = self.X.get_queue()[-1][0]
                X2 = self.X.get_queue()[-1][1]

                for k, cluster in enumerate(self.clusters):
                    if self.prediction[-1] == k:
                        self.clusters[k].add_element([(X1, X2)])
                    if cluster.get_queue():
                        temp = cluster.get_queue()
                        self.subplot_scatter_points.scatter(
                            *zip(*temp), label="class {k}".format(k=k))
                        self.subplot_scatter_points.legend(loc="best")

        if self._draw_cnt == 4:  # Refresh rate to mitigate re-drawing overhead for small changes
            plt.subplots_adjust(
                right=0.72)  # Adjust subplots to include metrics
            self.fig.canvas.draw()
            plt.pause(1e-9)
            self._draw_cnt = 0
        else:
            self._draw_cnt += 1
Beispiel #11
0
    def __configure(self, n_sliding, dataset_name, plots, n_learners):
        """ __configure
        
        This function will verify which subplots it should create. For each one 
        of those, it will initialize all relevant objects to keep track of the 
        plotting points.
        
        Basic structures needed to keep track of plot values (for each subplot) 
        are: lists of values and matplot line objects.
        
        The __configure function will also initialize each subplot with the 
        correct name and setup the axis.
        
        The subplot size will self adjust to each screen size, so that data can 
        be better viewed in different contexts.
        
        Parameters
        ----------
        n_sliding: int
            The number of samples in the sliding window to track recent performance.
    
        dataset_name: string (Default: 'Unnamed graph')
            The title of the plot. Algorithmically it's not important.
    
        plots: list
            A list containing all the subplots to plot. Can be any of: 
            'accuracy', 'kappa', 'scatter', 'hamming_score', 'hamming_loss',
            'exact_match', 'j_index', 'mean_square_error', 'mean_absolute_error', 
            'true_vs_predicted', 'kappa_t', 'kappa_m'
        
        n_learners: int
            The number of learners to compare.
         
        """
        data_points = False
        font_size_small = 8
        font_size_medium = 10
        font_size_large = 12

        plt.rc('font', size=font_size_small)  # controls default text sizes
        plt.rc('axes',
               titlesize=font_size_medium)  # font size of the axes title
        plt.rc('axes',
               labelsize=font_size_small)  # font size of the x and y labels
        plt.rc('xtick',
               labelsize=font_size_small)  # font size of the tick labels
        plt.rc('ytick',
               labelsize=font_size_small)  # font size of the tick labels
        plt.rc('legend', fontsize=font_size_small)  # legend font size
        plt.rc('figure',
               titlesize=font_size_large)  # font size of the figure title

        warnings.filterwarnings("ignore", ".*GUI is implemented.*")
        warnings.filterwarnings("ignore", ".*left==right.*")
        warnings.filterwarnings("ignore", ".*Passing 1d.*")

        self.n_sliding = n_sliding
        self.dataset_name = dataset_name
        self.plots = plots
        self.n_learners = n_learners
        self.sample_id = []

        plt.ion()
        self.fig = plt.figure(figsize=(9, 5))
        self.fig.suptitle(dataset_name)
        self.num_plots = len(self.plots)
        base = 11 + self.num_plots * 100  # 3-digit integer describing the position of the subplot.
        self.fig.canvas.set_window_title('scikit-multiflow')

        if constants.ACCURACY in self.plots:
            self.current_accuracy = [[] for _ in range(self.n_learners)]
            self.mean_accuracy = [[] for _ in range(self.n_learners)]

            self.subplot_accuracy = self.fig.add_subplot(base)
            self.subplot_accuracy.set_title('Accuracy')
            self.subplot_accuracy.set_ylabel('Accuracy')
            base += 1

            self.line_current_accuracy = [None for _ in range(self.n_learners)]
            self.line_mean_accuracy = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_accuracy[i], = self.subplot_accuracy.plot(
                    self.sample_id,
                    self.current_accuracy[i],
                    label='{}  (sliding {} samples)'.format(
                        self.model_names[i], self.n_sliding))
                self.line_mean_accuracy[i], = self.subplot_accuracy.plot(
                    self.sample_id,
                    self.mean_accuracy[i],
                    label='{} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_accuracy[i])
                handle.append(self.line_mean_accuracy[i])

            self._set_fig_legend(handle)
            self.subplot_accuracy.set_ylim(0, 1)

        if constants.KAPPA in self.plots:
            self.current_kappa = [[] for _ in range(self.n_learners)]
            self.mean_kappa = [[] for _ in range(self.n_learners)]

            self.subplot_kappa = self.fig.add_subplot(base)
            self.subplot_kappa.set_title('Kappa')
            self.subplot_kappa.set_ylabel('Kappa')
            base += 1

            self.line_current_kappa = [None for _ in range(self.n_learners)]
            self.line_mean_kappa = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_kappa[i], = self.subplot_kappa.plot(
                    self.sample_id,
                    self.current_kappa[i],
                    label='Model {}  (sliding {} samples)'.format(
                        self.model_names[i], self.n_sliding))
                self.line_mean_kappa[i], = self.subplot_kappa.plot(
                    self.sample_id,
                    self.mean_kappa[i],
                    label='Model {} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_kappa[i])
                handle.append(self.line_mean_kappa[i])

            self._set_fig_legend(handle)
            self.subplot_kappa.set_ylim(-1, 1)

        if constants.KAPPA_T in self.plots:
            self.current_kappa_t = [[] for _ in range(self.n_learners)]
            self.mean_kappa_t = [[] for _ in range(self.n_learners)]

            self.subplot_kappa_t = self.fig.add_subplot(base)
            self.subplot_kappa_t.set_title('Kappa T')
            self.subplot_kappa_t.set_ylabel('Kappa T')
            base += 1

            self.line_current_kappa_t = [None for _ in range(self.n_learners)]
            self.line_mean_kappa_t = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_kappa_t[i], = self.subplot_kappa_t.plot(
                    self.sample_id,
                    self.current_kappa_t[i],
                    label='Model {}  (sliding {} samples)'.format(
                        self.model_names[i], self.n_sliding))
                self.line_mean_kappa_t[i], = self.subplot_kappa_t.plot(
                    self.sample_id,
                    self.mean_kappa_t[i],
                    label='Model {} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_kappa_t[i])
                handle.append(self.line_mean_kappa_t[i])

            self._set_fig_legend(handle)
            self.subplot_kappa_t.set_ylim(-1, 1)

        if constants.KAPPA_M in self.plots:
            self.current_kappa_m = [[] for _ in range(self.n_learners)]
            self.mean_kappa_m = [[] for _ in range(self.n_learners)]

            self.subplot_kappa_m = self.fig.add_subplot(base)
            self.subplot_kappa_m.set_title('Kappa M')
            self.subplot_kappa_m.set_ylabel('Kappa M')
            base += 1

            self.line_current_kappa_m = [None for _ in range(self.n_learners)]
            self.line_mean_kappa_m = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_kappa_m[i], = self.subplot_kappa_m.plot(
                    self.sample_id,
                    self.current_kappa_m[i],
                    label='Model {}  (sliding {} samples)'.format(
                        self.model_names[i], self.n_sliding))
                self.line_mean_kappa_m[i], = self.subplot_kappa_m.plot(
                    self.sample_id,
                    self.mean_kappa_m[i],
                    label='Model {} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_kappa_m[i])
                handle.append(self.line_mean_kappa_m[i])

            self._set_fig_legend(handle)
            self.subplot_kappa_m.set_ylim(-1, 1)

        if constants.HAMMING_SCORE in self.plots:
            self.mean_hamming_score = [[] for _ in range(self.n_learners)]
            self.current_hamming_score = [[] for _ in range(self.n_learners)]

            self.subplot_hamming_score = self.fig.add_subplot(base)
            self.subplot_hamming_score.set_title('Hamming score')
            self.subplot_hamming_score.set_ylabel('Hamming score')
            base += 1

            self.line_current_hamming_score = [
                None for _ in range(self.n_learners)
            ]
            self.line_mean_hamming_score = [
                None for _ in range(self.n_learners)
            ]
            handle = []

            for i in range(self.n_learners):
                self.line_current_hamming_score[
                    i], = self.subplot_hamming_score.plot(
                        self.sample_id,
                        self.current_hamming_score[i],
                        label='Model {}  (sliding {} samples)'.format(
                            self.model_names[i], self.n_sliding))
                self.line_mean_hamming_score[
                    i], = self.subplot_hamming_score.plot(
                        self.sample_id,
                        self.mean_hamming_score[i],
                        label='Model {} (global)'.format(self.model_names[i]),
                        linestyle='dotted')
                handle.append(self.line_current_hamming_score[i])
                handle.append(self.line_mean_hamming_score[i])

            self._set_fig_legend(handle)
            self.subplot_hamming_score.set_ylim(0, 1)

        if constants.HAMMING_LOSS in self.plots:
            self.mean_hamming_loss = [[] for _ in range(self.n_learners)]
            self.current_hamming_loss = [[] for _ in range(self.n_learners)]

            self.subplot_hamming_loss = self.fig.add_subplot(base)
            self.subplot_hamming_loss.set_title('Hamming loss')
            self.subplot_hamming_loss.set_ylabel('Hamming loss')
            base += 1

            self.line_current_hamming_loss = [
                None for _ in range(self.n_learners)
            ]
            self.line_mean_hamming_loss = [
                None for _ in range(self.n_learners)
            ]
            handle = []

            for i in range(self.n_learners):
                self.line_current_hamming_loss[
                    i], = self.subplot_hamming_loss.plot(
                        self.sample_id,
                        self.current_hamming_loss[i],
                        label='Model {}  (sliding {} samples)'.format(
                            self.model_names[i], self.n_sliding))
                self.line_mean_hamming_loss[
                    i], = self.subplot_hamming_loss.plot(
                        self.sample_id,
                        self.mean_hamming_loss[i],
                        label='Model {} (global)'.format(self.model_names[i]),
                        linestyle='dotted')
                handle.append(self.line_current_hamming_loss[i])
                handle.append(self.line_mean_hamming_loss[i])

            self._set_fig_legend(handle)
            self.subplot_hamming_loss.set_ylim(0, 1)

        if constants.EXACT_MATCH in self.plots:
            self.mean_exact_match = [[] for _ in range(self.n_learners)]
            self.current_exact_match = [[] for _ in range(self.n_learners)]

            self.subplot_exact_match = self.fig.add_subplot(base)
            self.subplot_exact_match.set_title('Exact matches')
            self.subplot_exact_match.set_ylabel('Exact matches')
            base += 1

            self.line_current_exact_match = [
                None for _ in range(self.n_learners)
            ]
            self.line_mean_exact_match = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_exact_match[
                    i], = self.subplot_exact_match.plot(
                        self.sample_id,
                        self.current_exact_match[i],
                        label='Model {}  (sliding {} samples)'.format(
                            self.model_names[i], self.n_sliding))
                self.line_mean_exact_match[i], = self.subplot_exact_match.plot(
                    self.sample_id,
                    self.mean_exact_match[i],
                    label='Model {} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_exact_match[i])
                handle.append(self.line_mean_exact_match[i])

            self._set_fig_legend(handle)
            self.subplot_exact_match.set_ylim(0, 1)

        if constants.J_INDEX in self.plots:
            self.mean_j_index = [[] for _ in range(self.n_learners)]
            self.current_j_index = [[] for _ in range(self.n_learners)]

            self.subplot_j_index = self.fig.add_subplot(base)
            self.subplot_j_index.set_title('Jaccard index')
            self.subplot_j_index.set_ylabel('Jaccard index')
            base += 1

            self.line_current_j_index = [None for _ in range(self.n_learners)]
            self.line_mean_j_index = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_j_index[i], = self.subplot_j_index.plot(
                    self.sample_id,
                    self.current_j_index[i],
                    label='Model {}  (sliding {} samples)'.format(
                        self.model_names[i], self.n_sliding))
                self.line_mean_j_index[i], = self.subplot_j_index.plot(
                    self.sample_id,
                    self.mean_j_index[i],
                    label='Model {} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_j_index[i])
                handle.append(self.line_mean_j_index[i])

            self._set_fig_legend(handle)
            self.subplot_j_index.set_ylim(0, 1)

        if constants.MSE in self.plots:
            self.mean_mse = [[] for _ in range(self.n_learners)]
            self.current_mse = [[] for _ in range(self.n_learners)]

            self.subplot_mse = self.fig.add_subplot(base)
            self.subplot_mse.set_title('Mean Squared Error')
            self.subplot_mse.set_ylabel('MSE')
            base += 1

            self.line_current_mse = [None for _ in range(self.n_learners)]
            self.line_mean_mse = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_mse[i], = self.subplot_mse.plot(
                    self.sample_id,
                    self.current_mse[i],
                    label='Model {}  (sliding {} samples)'.format(
                        self.model_names[i], self.n_sliding))
                self.line_mean_mse[i], = self.subplot_mse.plot(
                    self.sample_id,
                    self.mean_mse[i],
                    label='Model {} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_mse[i])
                handle.append(self.line_mean_mse[i])

            self._set_fig_legend(handle)
            self.subplot_mse.set_ylim(0, 1)

        if constants.MAE in self.plots:
            self.mean_mae = [[] for _ in range(self.n_learners)]
            self.current_mae = [[] for _ in range(self.n_learners)]

            self.subplot_mae = self.fig.add_subplot(base)
            self.subplot_mae.set_title('Mean Absolute Error')
            self.subplot_mae.set_ylabel('MAE')
            base += 1

            self.line_current_mae = [None for _ in range(self.n_learners)]
            self.line_mean_mae = [None for _ in range(self.n_learners)]
            handle = []

            for i in range(self.n_learners):
                self.line_current_mae[i], = self.subplot_mae.plot(
                    self.sample_id,
                    self.current_mae[i],
                    label='Model {}  (sliding {} samples)'.format(
                        self.model_names[i], self.n_sliding))
                self.line_mean_mae[i], = self.subplot_mae.plot(
                    self.sample_id,
                    self.mean_mae[i],
                    label='Model {} (global)'.format(self.model_names[i]),
                    linestyle='dotted')
                handle.append(self.line_current_mae[i])
                handle.append(self.line_mean_mae[i])

            self._set_fig_legend(handle)
            self.subplot_mae.set_ylim(0, 1)

        if constants.TRUE_VS_PREDICTED in self.plots:
            self.true_values = []
            self.pred_values = [[] for _ in range(self.n_learners)]

            self.subplot_true_vs_predicted = self.fig.add_subplot(base)
            self.subplot_true_vs_predicted.set_title('True vs Predicted')
            self.subplot_true_vs_predicted.set_ylabel('y')
            self.subplot_true_vs_predicted.set_prop_cycle(
                cycler('color', ['c', 'm', 'y', 'k']))
            base += 1

            if self.task_type == constants.CLASSIFICATION:
                self.line_true, = self.subplot_true_vs_predicted.step(
                    self.sample_id, self.true_values, label='True value')
            else:
                self.line_true, = self.subplot_true_vs_predicted.plot(
                    self.sample_id, self.true_values, label='True value')
            handle = [self.line_true]

            self.line_pred = [None for _ in range(self.n_learners)]

            for i in range(self.n_learners):
                if self.task_type == constants.CLASSIFICATION:
                    self.line_pred[i], = self.subplot_true_vs_predicted.step(
                        self.sample_id,
                        self.pred_values[i],
                        label='Model {} (global)'.format(self.model_names[i]),
                        linestyle='dotted')
                else:
                    self.line_pred[i], = self.subplot_true_vs_predicted.plot(
                        self.sample_id,
                        self.pred_values[i],
                        label='Model {} (global)'.format(self.model_names[i]),
                        linestyle='dotted')
                handle.append(self.line_pred[i])

            self.subplot_true_vs_predicted.legend(handles=handle)
            self.subplot_true_vs_predicted.set_ylim(0, 1)

        if constants.DATA_POINTS in self.plots:

            data_points = True
            self.Flag = True
            self.X = FastBuffer(5000)
            self.targets = []
            self.prediction = []
            self.clusters = []
            self.subplot_scatter_points = self.fig.add_subplot(base)
            base += 1

        if data_points:
            plt.xlabel('X1')
        else:
            plt.xlabel('Samples')

        self.fig.subplots_adjust(hspace=.5)
        self.fig.tight_layout(rect=[0, .04, 1, 0.98],
                              pad=2.6,
                              w_pad=0.5,
                              h_pad=1.0)
    def draw(self, train_step, metrics_dict):
        """ draw
        
        Updates and redraws the plot.
        
        Parameters
        ----------
        train_step: int
            The number of samples processed to this moment.
        
        metrics_dict: dictionary
            A dictionary containing tuples, where the first element is the 
            string that identifies one of the plot's subplot names, and the 
            second element is its numerical value.
             
        """

        self.sample_id.append(train_step)

        self._clear_annotations()

        if 'performance' in self.plots:
            for i in range(self.n_learners):
                self.global_performance[i].append(
                    metrics_dict['performance'][i][0])
                self.partial_performance[i].append(
                    metrics_dict['performance'][i][1])
                self.line_global_performance[i].set_data(
                    self.sample_id, self.global_performance[i])
                self.line_partial_performance[i].set_data(
                    self.sample_id, self.partial_performance[i])

                self._update_annotations(i, self.subplot_performance,
                                         self.model_names[i],
                                         self.global_performance[i][-1],
                                         self.partial_performance[i][-1])

            self.subplot_performance.set_xlim(0, self.sample_id[-1])
            self.subplot_performance.set_ylim(0, 1)

        if 'kappa' in self.plots:
            for i in range(self.n_learners):
                self.global_kappa[i].append(metrics_dict['kappa'][i][0])
                self.partial_kappa[i].append(metrics_dict['kappa'][i][1])
                self.line_global_kappa[i].set_data(self.sample_id,
                                                   self.global_kappa[i])
                self.line_partial_kappa[i].set_data(self.sample_id,
                                                    self.partial_kappa[i])

                self._update_annotations(i, self.subplot_kappa,
                                         self.model_names[i],
                                         self.global_kappa[i][-1],
                                         self.partial_kappa[i][-1])

            self.subplot_kappa.set_xlim(0, self.sample_id[-1])
            self.subplot_kappa.set_ylim(0, 1)

        if 'kappa_t' in self.plots:
            minimum = -1.
            for i in range(self.n_learners):
                self.global_kappa_t[i].append(metrics_dict['kappa_t'][i][0])
                self.partial_kappa_t[i].append(metrics_dict['kappa_t'][i][1])
                self.line_global_kappa_t[i].set_data(self.sample_id,
                                                     self.global_kappa_t[i])
                self.line_partial_kappa_t[i].set_data(self.sample_id,
                                                      self.partial_kappa_t[i])

                self._update_annotations(i, self.subplot_kappa_t,
                                         self.model_names[i],
                                         self.global_kappa_t[i][-1],
                                         self.partial_kappa_t[i][-1])

                minimum = min(min(minimum, min(self.global_kappa_t[i])),
                              min(minimum, min(self.partial_kappa_t[i])))

            self.subplot_kappa_t.set_xlim(0, self.sample_id[-1])
            self.subplot_kappa_t.set_ylim([minimum, 1.])

        if 'kappa_m' in self.plots:
            minimum = -1.
            for i in range(self.n_learners):
                self.global_kappa_m[i].append(metrics_dict['kappa_m'][i][0])
                self.partial_kappa_m[i].append(metrics_dict['kappa_m'][i][1])
                self.line_global_kappa_m[i].set_data(self.sample_id,
                                                     self.global_kappa_m[i])
                self.line_partial_kappa_m[i].set_data(self.sample_id,
                                                      self.partial_kappa_m[i])

                self._update_annotations(i, self.subplot_kappa_m,
                                         self.model_names[i],
                                         self.global_kappa_m[i][-1],
                                         self.partial_kappa_m[i][-1])

                minimum = min(min(minimum, min(self.global_kappa_m[i])),
                              min(minimum, min(self.partial_kappa_m[i])))

            self.subplot_kappa_m.set_xlim(0, self.sample_id[-1])
            self.subplot_kappa_m.set_ylim(minimum, 1.)

        if 'hamming_score' in self.plots:
            for i in range(self.n_learners):
                self.global_hamming_score[i].append(
                    metrics_dict['hamming_score'][i][0])
                self.partial_hamming_score[i].append(
                    metrics_dict['hamming_score'][i][1])
                self.line_global_hamming_score[i].set_data(
                    self.sample_id, self.global_hamming_score[i])
                self.line_partial_hamming_score[i].set_data(
                    self.sample_id, self.partial_hamming_score[i])

                self._update_annotations(i, self.subplot_hamming_score,
                                         self.model_names[i],
                                         self.global_hamming_score[i][-1],
                                         self.partial_hamming_score[i][-1])

            self.subplot_hamming_score.set_xlim(0, self.sample_id[-1])
            self.subplot_hamming_score.set_ylim(0, 1)

        if 'hamming_loss' in self.plots:
            for i in range(self.n_learners):
                self.global_hamming_loss[i].append(
                    metrics_dict['hamming_loss'][i][0])
                self.partial_hamming_loss[i].append(
                    metrics_dict['hamming_loss'][i][1])
                self.line_global_hamming_loss[i].set_data(
                    self.sample_id, self.global_hamming_loss[i])
                self.line_partial_hamming_loss[i].set_data(
                    self.sample_id, self.partial_hamming_loss[i])

                self._update_annotations(i, self.subplot_hamming_loss,
                                         self.model_names[i],
                                         self.global_hamming_loss[i][-1],
                                         self.partial_hamming_loss[i][-1])

            self.subplot_hamming_loss.set_xlim(0, self.sample_id[-1])
            self.subplot_hamming_loss.set_ylim(0, 1)

        if 'exact_match' in self.plots:
            for i in range(self.n_learners):
                self.global_exact_match[i].append(
                    metrics_dict['exact_match'][i][0])
                self.partial_exact_match[i].append(
                    metrics_dict['exact_match'][i][1])
                self.line_global_exact_match[i].set_data(
                    self.sample_id, self.global_exact_match[i])
                self.line_partial_exact_match[i].set_data(
                    self.sample_id, self.partial_exact_match[i])

                self._update_annotations(i, self.subplot_exact_match,
                                         self.model_names[i],
                                         self.global_exact_match[i][-1],
                                         self.partial_exact_match[i][-1])

            self.subplot_exact_match.set_xlim(0, self.sample_id[-1])
            self.subplot_exact_match.set_ylim(0, 1)

        if 'j_index' in self.plots:
            for i in range(self.n_learners):
                self.global_j_index[i].append(metrics_dict['j_index'][i][0])
                self.partial_j_index[i].append(metrics_dict['j_index'][i][1])
                self.line_global_j_index[i].set_data(self.sample_id,
                                                     self.global_j_index[i])
                self.line_partial_j_index[i].set_data(self.sample_id,
                                                      self.partial_j_index[i])

                self._update_annotations(i, self.subplot_j_index,
                                         self.model_names[i],
                                         self.global_j_index[i][-1],
                                         self.partial_j_index[i][-1])

            self.subplot_j_index.set_xlim(0, self.sample_id[-1])
            self.subplot_j_index.set_ylim(0, 1)

        if 'mean_square_error' in self.plots:
            minimum = -1
            maximum = 0
            for i in range(self.n_learners):
                self.global_mse[i].append(
                    metrics_dict['mean_square_error'][i][0])
                self.partial_mse[i].append(
                    metrics_dict['mean_square_error'][i][1])
                self.line_global_mse[i].set_data(self.sample_id,
                                                 self.global_mse[i])
                self.line_partial_mse[i].set_data(self.sample_id,
                                                  self.partial_mse[i])

                self._update_annotations(i, self.subplot_mse,
                                         self.model_names[i],
                                         self.global_mse[i][-1],
                                         self.partial_mse[i][-1])

                # minimum = min([min(self.global_mse[i]), min(self.partial_mse[i]), minimum])
                maximum = max([
                    max(self.global_mse[i]),
                    max(self.partial_mse[i]), maximum
                ])

            self.subplot_mse.set_xlim(0, self.sample_id[-1])
            self.subplot_mse.set_ylim(minimum, 1.2 * maximum)

        if 'mean_absolute_error' in self.plots:
            minimum = -1
            maximum = 0
            for i in range(self.n_learners):
                self.global_mae[i].append(
                    metrics_dict['mean_absolute_error'][i][0])
                self.partial_mae[i].append(
                    metrics_dict['mean_absolute_error'][i][1])
                self.line_global_mae[i].set_data(self.sample_id,
                                                 self.global_mae[i])
                self.line_partial_mae[i].set_data(self.sample_id,
                                                  self.partial_mae[i])

                self._update_annotations(i, self.subplot_mae,
                                         self.model_names[i],
                                         self.global_mae[i][-1],
                                         self.partial_mae[i][-1])

                # minimum = min([min(self.global_mae[i]), min(self.partial_mae[i]), minimum])
                maximum = max([
                    max(self.global_mae[i]),
                    max(self.partial_mae[i]), maximum
                ])

            self.subplot_mae.set_xlim(0, self.sample_id[-1])
            self.subplot_mae.set_ylim(minimum, 1.2 * maximum)

        if 'true_vs_predicted' in self.plots:
            self.true_values.append(metrics_dict['true_vs_predicted'][0][0])
            self.line_true.set_data(self.sample_id, self.true_values)
            minimum = 0
            maximum = 0
            for i in range(self.n_learners):
                self.pred_values[i].append(
                    metrics_dict['true_vs_predicted'][i][1])
                self.line_pred[i].set_data(self.sample_id, self.pred_values[i])
                minimum = min(
                    [min(self.pred_values[i]),
                     min(self.true_values), minimum])
                maximum = max(
                    [max(self.pred_values[i]),
                     max(self.true_values), maximum])

            self.subplot_true_vs_predicted.set_xlim(0, self.sample_id[-1])
            self.subplot_true_vs_predicted.set_ylim(minimum - 1, maximum + 1)

            self.subplot_true_vs_predicted.legend(loc=2,
                                                  bbox_to_anchor=(1.01, 1.))

        if 'data_points' in self.plots:
            self.X.add_element(metrics_dict['data_points'][0][0])

            self.targets = metrics_dict['data_points'][0][1]
            if self.n_learners > 1:
                raise ValueError(
                    "you can not compare classifiers in this type of plot.")
            else:

                self.prediction.append(metrics_dict['data_points'][0][2])
                if self.Flag is True:
                    for j in range(len(self.targets)):
                        self.clusters.append(FastBuffer(100))
                self.Flag = False

                self.subplot_scatter_points.clear()

                self.subplot_scatter_points.set_ylabel('X2')
                self.subplot_scatter_points.set_xlabel('X1')
                X1 = self.X.get_queue()[-1][0]
                X2 = self.X.get_queue()[-1][1]

                for k, cluster in enumerate(self.clusters):
                    if self.prediction[-1] == k:
                        self.clusters[k].add_element([(X1, X2)])
                    if cluster.get_queue():
                        temp = cluster.get_queue()
                        self.subplot_scatter_points.scatter(
                            *zip(*temp), label="class {k}".format(k=k))
                        self.subplot_scatter_points.legend(loc="best")

        if self._draw_cnt == 4:  # Refresh rate to mitigate re-drawing overhead for small changes
            plt.subplots_adjust(
                right=0.72)  # Adjust subplots to include metrics
            self.fig.canvas.draw()
            plt.pause(1e-9)
            self._draw_cnt = 0
        else:
            self._draw_cnt += 1