def _init_metrics(self): """ Starts up the metrics and statistics watchers. One watcher is created for each of the learners to be evaluated. """ self.mean_eval_measurements = [] self.current_eval_measurements = [] if self._task_type == constants.CLASSIFICATION: for i in range(self.n_models): self.mean_eval_measurements.append( ClassificationMeasurements()) self.current_eval_measurements.append( WindowClassificationMeasurements( window_size=self.n_sliding)) elif self._task_type == constants.MULTI_TARGET_CLASSIFICATION: for i in range(self.n_models): self.mean_eval_measurements.append( MultiTargetClassificationMeasurements()) self.current_eval_measurements.append( WindowMultiTargetClassificationMeasurements( window_size=self.n_sliding)) elif self._task_type == constants.REGRESSION: for i in range(self.n_models): self.mean_eval_measurements.append(RegressionMeasurements()) self.current_eval_measurements.append( WindowRegressionMeasurements(window_size=self.n_sliding)) elif self._task_type == constants.MULTI_TARGET_REGRESSION: for i in range(self.n_models): self.mean_eval_measurements.append( MultiTargetRegressionMeasurements()) self.current_eval_measurements.append( WindowMultiTargetRegressionMeasurements( window_size=self.n_sliding)) # Running time self.running_time_measurements = [] for i in range(self.n_models): self.running_time_measurements.append(RunningTimeMeasurements()) # Evaluation data buffer self._data_dict = {} for metric in self.metrics: data_ids = [constants.MEAN, constants.CURRENT] if metric == constants.TRUE_VS_PREDICTED: data_ids = [constants.Y_TRUE, constants.Y_PRED] elif metric == constants.DATA_POINTS: data_ids = ['X', 'target_values', 'prediction'] elif metric == constants.RUNNING_TIME: data_ids = [ 'training_time', 'testing_time', 'total_running_time' ] elif metric == constants.MODEL_SIZE: data_ids = ['model_size'] self._data_dict[metric] = data_ids self._data_buffer = EvaluationDataBuffer(data_dict=self._data_dict)
def _init_metrics(self): """ Starts up the metrics and statistics watchers. One watcher is created for each of the learners to be evaluated. """ self.mean_eval_measurements = [] self.current_eval_measurements = [] if self._task_type == constants.CLASSIFICATION: for i in range(self.n_models): self.mean_eval_measurements.append( ClassificationMeasurements()) self.current_eval_measurements.append( WindowClassificationMeasurements( window_size=self.n_sliding)) elif self._task_type == constants.MULTI_OUTPUT: for i in range(self.n_models): self.mean_eval_measurements.append(MultiOutputMeasurements()) self.current_eval_measurements.append( WindowMultiOutputMeasurements(window_size=self.n_sliding)) elif self._task_type == constants.REGRESSION: for i in range(self.n_models): self.mean_eval_measurements.append(RegressionMeasurements()) self.current_eval_measurements.append( WindowRegressionMeasurements(window_size=self.n_sliding))
def test_classification_measurements(): y_true = np.concatenate((np.ones(85), np.zeros(10), np.ones(5))) y_pred = np.concatenate((np.ones(90), np.zeros(10))) measurements = ClassificationMeasurements() for i in range(len(y_true)): measurements.add_result(y_true[i], y_pred[i]) expected_acc = 90 / 100 assert expected_acc == measurements.get_accuracy() expected_incorrectly_classified_ratio = 1 - expected_acc assert expected_incorrectly_classified_ratio == measurements.get_incorrectly_classified_ratio( ) expected_kappa = (expected_acc - 0.82) / (1 - 0.82) assert np.isclose(expected_kappa, measurements.get_kappa()) expected_kappa_m = (expected_acc - .9) / (1 - 0.9) assert np.isclose(expected_kappa_m, measurements.get_kappa_m()) expected_kappa_t = (expected_acc - .97) / (1 - 0.97) assert expected_kappa_t == measurements.get_kappa_t() expected_precision = 85 / (85 + 5) assert np.isclose(expected_precision, measurements.get_precision()) expected_recall = 85 / (85 + 5) assert np.isclose(expected_recall, measurements.get_recall()) expected_f1_score = 2 * ((expected_precision * expected_recall) / (expected_precision + expected_recall)) assert np.isclose(expected_f1_score, measurements.get_f1_score()) expected_g_mean = np.sqrt((5 / (5 + 5)) * expected_recall) assert np.isclose(expected_g_mean, measurements.get_g_mean()) expected_info = 'ClassificationMeasurements: - sample_count: 100 - accuracy: 0.900000 - kappa: 0.444444 ' \ '- kappa_t: -2.333333 - kappa_m: 0.000000 - f1-score: 0.944444 - precision: 0.944444 ' \ '- recall: 0.944444 - g-mean: 0.687184 - majority_class: 1' assert expected_info == measurements.get_info() expected_last = (1.0, 0.0) assert expected_last == measurements.get_last() expected_majority_class = 1 assert expected_majority_class == measurements.get_majority_class() measurements.reset() assert measurements.sample_count == 0
def test_classification_measurements(): y_true = np.concatenate((np.ones(85), np.zeros(10), np.ones(5))) y_pred = np.concatenate((np.ones(90), np.zeros(10))) measurements = ClassificationMeasurements() for i in range(len(y_true)): measurements.add_result(y_true[i], y_pred[i]) expected_acc = .9 assert expected_acc == measurements.get_accuracy() expected_incorrectly_classified_ratio = 1 - expected_acc assert expected_incorrectly_classified_ratio == measurements.get_incorrectly_classified_ratio() expected_kappa = (expected_acc - 0.82) / (1 - 0.82) assert np.isclose(expected_kappa, measurements.get_kappa()) expected_kappa_m = (expected_acc - .1) / (1 - 0.1) assert np.isclose(expected_kappa_m, measurements.get_kappa_m()) expected_kappa_t = (expected_acc - .97) / (1 - 0.97) assert expected_kappa_t == measurements.get_kappa_t() expected_info = 'ClassificationMeasurements: - sample_count: 100 - accuracy: 0.900000 - kappa: 0.444444 ' \ '- kappa_t: -2.333333 - kappa_m: 0.888889 - majority_class: 0' assert expected_info == measurements.get_info() expected_last = (1.0, 0.0) assert expected_last == measurements.get_last() expected_majority_class = 0 assert expected_majority_class == measurements.get_majority_class() measurements.reset() assert measurements.sample_count == 0 assert measurements.get_class_type() == 'measurement'