Example #1
0
def test_calculate_object_size():
    elems = []
    array_length = 10

    for i in range(100):
        elems.append(np.ones((array_length), np.int8))
        elems.append('testing_string')

    if sys.platform == 'linux' and sys.version_info[:2] >= (3, 6):
        # object sizes vary across architectures and OSs
        # following are "expected" sizes for Python 3.6+ on linux systems
        expected_size_in_bytes_1 = 37335
        expected_size_in_bytes_2 = 37343
        expected_size_in_bytes_3 = 37327
        assert np.isclose(calculate_object_size(elems, 'byte'), expected_size_in_bytes_1) or \
               np.isclose(calculate_object_size(elems, 'byte'), expected_size_in_bytes_2) or \
               np.isclose(calculate_object_size(elems, 'byte'), expected_size_in_bytes_3)
    else:
        # only run for coverage
        calculate_object_size(elems, 'byte')

    # Run for coverage the 'kB' and 'MB' variants.
    # No asert is needed since they are based on the 'byte' size.
    calculate_object_size(elems, 'kB')
    calculate_object_size(elems, 'MB')
Example #2
0
def test_calculate_object_size():
    elems = []
    array_length = 10

    for i in range(100):
        elems.append(np.ones((array_length), np.int8))
        elems.append('testing_string')

    assert calculate_object_size(elems, 'byte') == 37335
    assert calculate_object_size(elems, 'kB') == 36.4599609375
    assert calculate_object_size(elems, 'MB') == 0.035605430603027344
Example #3
0
    def measure_byte_size(self):
        """ Calculate the size of the tree.

        Returns
        -------
        int
            Size of the tree in bytes.

        """
        return calculate_object_size(self)
Example #4
0
 def estimate_model_byte_size(self):
     """ Calculate the size of the model and trigger tracker function if the actual model size exceeds the max size
     in the configuration."""
     learning_nodes = self._find_learning_nodes()
     total_active_size = 0
     total_inactive_size = 0
     for found_node in learning_nodes:
         if isinstance(found_node.node, self.AnyTimeActiveLearningNode):
             total_active_size += calculate_object_size(found_node.node)
         else:
             total_inactive_size += calculate_object_size(found_node.node)
     if total_active_size > 0:
         self._active_leaf_byte_size_estimate = total_active_size / self._active_leaf_node_cnt
     if total_inactive_size > 0:
         self._inactive_leaf_byte_size_estimate = total_inactive_size / self._inactive_leaf_node_cnt
     actual_model_size = calculate_object_size(self)
     estimated_model_size = (self._active_leaf_node_cnt * self._active_leaf_byte_size_estimate
                             + self._inactive_leaf_node_cnt * self._inactive_leaf_byte_size_estimate)
     self._byte_size_estimate_overhead_fraction = actual_model_size / estimated_model_size
     if actual_model_size > self.max_byte_size:
         self.enforce_tracker_limit()
def test_calculate_object_size():
    elems = []
    array_length = 10

    for i in range(100):
        elems.append(np.ones((array_length), np.int8))
        elems.append('testing_string')

    if sys.platform == 'linux':
        # assert sizes based on a linux system
        assert calculate_object_size(elems, 'byte') == 37335
        assert calculate_object_size(elems, 'kB') == 36.4599609375
        assert calculate_object_size(elems, 'MB') == 0.035605430603027344
    else:
        # run for coverage
        calculate_object_size(elems, 'byte')
        calculate_object_size(elems, 'kB')
        calculate_object_size(elems, 'MB')
Example #6
0
    def _update_metrics(self):
        """ Updates the metrics of interest. This function updates the evaluation data buffer
        which is used to track performance during evaluation.

        The content of the buffer depends on the evaluation task type and metrics selected.

        If more than one model/learner is evaluated at once, data is stored as lists inside
        the buffer.

        """
        shift = 0
        if self._method == 'prequential':
            shift = -self.batch_size  # Adjust index due to training after testing
        sample_id = self.global_sample_count + shift

        for metric in self.metrics:
            values = [[], []]
            if metric == constants.ACCURACY:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_accuracy())
                    values[1].append(self.current_eval_measurements[i].get_accuracy())

            elif metric == constants.KAPPA:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_kappa())
                    values[1].append(self.current_eval_measurements[i].get_kappa())

            elif metric == constants.KAPPA_T:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_kappa_t())
                    values[1].append(self.current_eval_measurements[i].get_kappa_t())

            elif metric == constants.KAPPA_M:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_kappa_m())
                    values[1].append(self.current_eval_measurements[i].get_kappa_m())

            elif metric == constants.HAMMING_SCORE:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_hamming_score())
                    values[1].append(self.current_eval_measurements[i].get_hamming_score())

            elif metric == constants.HAMMING_LOSS:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_hamming_loss())
                    values[1].append(self.current_eval_measurements[i].get_hamming_loss())

            elif metric == constants.EXACT_MATCH:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_exact_match())
                    values[1].append(self.current_eval_measurements[i].get_exact_match())

            elif metric == constants.J_INDEX:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_j_index())
                    values[1].append(self.current_eval_measurements[i].get_j_index())

            elif metric == constants.MSE:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_mean_square_error())
                    values[1].append(self.current_eval_measurements[i].get_mean_square_error())

            elif metric == constants.MAE:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_average_error())
                    values[1].append(self.current_eval_measurements[i].get_average_error())

            elif metric == constants.AMSE:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_average_mean_square_error())
                    values[1].append(self.current_eval_measurements[i].get_average_mean_square_error())

            elif metric == constants.AMAE:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_average_absolute_error())
                    values[1].append(self.current_eval_measurements[i].get_average_absolute_error())

            elif metric == constants.ARMSE:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_average_root_mean_square_error())
                    values[1].append(self.current_eval_measurements[i].get_average_root_mean_square_error())

            elif metric == constants.F1_SCORE:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_f1_score())
                    values[1].append(self.current_eval_measurements[i].get_f1_score())

            elif metric == constants.PRECISION:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_precision())
                    values[1].append(self.current_eval_measurements[i].get_precision())

            elif metric == constants.RECALL:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_recall())
                    values[1].append(self.current_eval_measurements[i].get_recall())

            elif metric == constants.GMEAN:
                for i in range(self.n_models):
                    values[0].append(self.mean_eval_measurements[i].get_g_mean())
                    values[1].append(self.current_eval_measurements[i].get_g_mean())

            elif metric == constants.TRUE_VS_PREDICTED:
                y_true = -1
                y_pred = []
                for i in range(self.n_models):
                    t, p = self.mean_eval_measurements[i].get_last()
                    y_true = t  # We only need to keep one true value
                    y_pred.append(p)
                values[0] = y_true
                for i in range(self.n_models):
                    values[1].append(y_pred[i])

            elif metric == constants.DATA_POINTS:
                target_values = self.stream.target_values
                features = {}  # Dictionary containing feature values, using index as key

                y_pred, p = self.mean_eval_measurements[0].get_last()  # Only track one model (first) by default

                X, _ = self.stream.last_sample()
                idx_1 = 0  # TODO let the user choose the feature indices of interest
                idx_2 = 1
                features[idx_1] = X[0][idx_1]
                features[idx_2] = X[0][idx_2]

                values = [None, None, None]
                values[0] = features
                values[1] = target_values
                values[2] = y_pred

            elif metric == constants.RUNNING_TIME:
                values = [[], [], []]
                for i in range(self.n_models):
                    values[0].append(self.running_time_measurements[i].get_current_training_time())
                    values[1].append(self.running_time_measurements[i].get_current_testing_time())
                    values[2].append(self.running_time_measurements[i].get_current_total_running_time())

            elif metric == constants.MODEL_SIZE:
                values = []
                for i in range(self.n_models):
                    values.append(calculate_object_size(self.model[i], 'kB'))

            else:
                raise ValueError('Unknown metric {}'.format(metric))

            # Update buffer
            if metric == constants.TRUE_VS_PREDICTED:
                self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id=constants.Y_TRUE,
                                              value=values[0])
                self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id=constants.Y_PRED,
                                              value=values[1])
            elif metric == constants.DATA_POINTS:
                self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id='X',
                                              value=values[0])
                self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id='target_values',
                                              value=values[1])
                self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id='predictions',
                                              value=values[2])
            elif metric == constants.RUNNING_TIME:
                self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id='training_time',
                                              value=values[0])
                self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id='testing_time',
                                              value=values[1])
                self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id='total_running_time',
                                              value=values[2])
            elif metric == constants.MODEL_SIZE:
                self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id='model_size',
                                              value=values)
            else:
                # Default case, 'mean' and 'current' performance
                self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id=constants.MEAN,
                                              value=values[0])
                self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id=constants.CURRENT,
                                              value=values[1])

        shift = 0
        if self._method == 'prequential':
            shift = -self.batch_size   # Adjust index due to training after testing
        self._update_outputs(self.global_sample_count + shift)