def test_calculate_object_size(): elems = [] array_length = 10 for i in range(100): elems.append(np.ones((array_length), np.int8)) elems.append('testing_string') if sys.platform == 'linux' and sys.version_info[:2] >= (3, 6): # object sizes vary across architectures and OSs # following are "expected" sizes for Python 3.6+ on linux systems expected_size_in_bytes_1 = 37335 expected_size_in_bytes_2 = 37343 expected_size_in_bytes_3 = 37327 assert np.isclose(calculate_object_size(elems, 'byte'), expected_size_in_bytes_1) or \ np.isclose(calculate_object_size(elems, 'byte'), expected_size_in_bytes_2) or \ np.isclose(calculate_object_size(elems, 'byte'), expected_size_in_bytes_3) else: # only run for coverage calculate_object_size(elems, 'byte') # Run for coverage the 'kB' and 'MB' variants. # No asert is needed since they are based on the 'byte' size. calculate_object_size(elems, 'kB') calculate_object_size(elems, 'MB')
def test_calculate_object_size(): elems = [] array_length = 10 for i in range(100): elems.append(np.ones((array_length), np.int8)) elems.append('testing_string') assert calculate_object_size(elems, 'byte') == 37335 assert calculate_object_size(elems, 'kB') == 36.4599609375 assert calculate_object_size(elems, 'MB') == 0.035605430603027344
def measure_byte_size(self): """ Calculate the size of the tree. Returns ------- int Size of the tree in bytes. """ return calculate_object_size(self)
def estimate_model_byte_size(self): """ Calculate the size of the model and trigger tracker function if the actual model size exceeds the max size in the configuration.""" learning_nodes = self._find_learning_nodes() total_active_size = 0 total_inactive_size = 0 for found_node in learning_nodes: if isinstance(found_node.node, self.AnyTimeActiveLearningNode): total_active_size += calculate_object_size(found_node.node) else: total_inactive_size += calculate_object_size(found_node.node) if total_active_size > 0: self._active_leaf_byte_size_estimate = total_active_size / self._active_leaf_node_cnt if total_inactive_size > 0: self._inactive_leaf_byte_size_estimate = total_inactive_size / self._inactive_leaf_node_cnt actual_model_size = calculate_object_size(self) estimated_model_size = (self._active_leaf_node_cnt * self._active_leaf_byte_size_estimate + self._inactive_leaf_node_cnt * self._inactive_leaf_byte_size_estimate) self._byte_size_estimate_overhead_fraction = actual_model_size / estimated_model_size if actual_model_size > self.max_byte_size: self.enforce_tracker_limit()
def test_calculate_object_size(): elems = [] array_length = 10 for i in range(100): elems.append(np.ones((array_length), np.int8)) elems.append('testing_string') if sys.platform == 'linux': # assert sizes based on a linux system assert calculate_object_size(elems, 'byte') == 37335 assert calculate_object_size(elems, 'kB') == 36.4599609375 assert calculate_object_size(elems, 'MB') == 0.035605430603027344 else: # run for coverage calculate_object_size(elems, 'byte') calculate_object_size(elems, 'kB') calculate_object_size(elems, 'MB')
def _update_metrics(self): """ Updates the metrics of interest. This function updates the evaluation data buffer which is used to track performance during evaluation. The content of the buffer depends on the evaluation task type and metrics selected. If more than one model/learner is evaluated at once, data is stored as lists inside the buffer. """ shift = 0 if self._method == 'prequential': shift = -self.batch_size # Adjust index due to training after testing sample_id = self.global_sample_count + shift for metric in self.metrics: values = [[], []] if metric == constants.ACCURACY: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_accuracy()) values[1].append(self.current_eval_measurements[i].get_accuracy()) elif metric == constants.KAPPA: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_kappa()) values[1].append(self.current_eval_measurements[i].get_kappa()) elif metric == constants.KAPPA_T: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_kappa_t()) values[1].append(self.current_eval_measurements[i].get_kappa_t()) elif metric == constants.KAPPA_M: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_kappa_m()) values[1].append(self.current_eval_measurements[i].get_kappa_m()) elif metric == constants.HAMMING_SCORE: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_hamming_score()) values[1].append(self.current_eval_measurements[i].get_hamming_score()) elif metric == constants.HAMMING_LOSS: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_hamming_loss()) values[1].append(self.current_eval_measurements[i].get_hamming_loss()) elif metric == constants.EXACT_MATCH: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_exact_match()) values[1].append(self.current_eval_measurements[i].get_exact_match()) elif metric == constants.J_INDEX: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_j_index()) values[1].append(self.current_eval_measurements[i].get_j_index()) elif metric == constants.MSE: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_mean_square_error()) values[1].append(self.current_eval_measurements[i].get_mean_square_error()) elif metric == constants.MAE: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_average_error()) values[1].append(self.current_eval_measurements[i].get_average_error()) elif metric == constants.AMSE: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_average_mean_square_error()) values[1].append(self.current_eval_measurements[i].get_average_mean_square_error()) elif metric == constants.AMAE: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_average_absolute_error()) values[1].append(self.current_eval_measurements[i].get_average_absolute_error()) elif metric == constants.ARMSE: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_average_root_mean_square_error()) values[1].append(self.current_eval_measurements[i].get_average_root_mean_square_error()) elif metric == constants.F1_SCORE: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_f1_score()) values[1].append(self.current_eval_measurements[i].get_f1_score()) elif metric == constants.PRECISION: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_precision()) values[1].append(self.current_eval_measurements[i].get_precision()) elif metric == constants.RECALL: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_recall()) values[1].append(self.current_eval_measurements[i].get_recall()) elif metric == constants.GMEAN: for i in range(self.n_models): values[0].append(self.mean_eval_measurements[i].get_g_mean()) values[1].append(self.current_eval_measurements[i].get_g_mean()) elif metric == constants.TRUE_VS_PREDICTED: y_true = -1 y_pred = [] for i in range(self.n_models): t, p = self.mean_eval_measurements[i].get_last() y_true = t # We only need to keep one true value y_pred.append(p) values[0] = y_true for i in range(self.n_models): values[1].append(y_pred[i]) elif metric == constants.DATA_POINTS: target_values = self.stream.target_values features = {} # Dictionary containing feature values, using index as key y_pred, p = self.mean_eval_measurements[0].get_last() # Only track one model (first) by default X, _ = self.stream.last_sample() idx_1 = 0 # TODO let the user choose the feature indices of interest idx_2 = 1 features[idx_1] = X[0][idx_1] features[idx_2] = X[0][idx_2] values = [None, None, None] values[0] = features values[1] = target_values values[2] = y_pred elif metric == constants.RUNNING_TIME: values = [[], [], []] for i in range(self.n_models): values[0].append(self.running_time_measurements[i].get_current_training_time()) values[1].append(self.running_time_measurements[i].get_current_testing_time()) values[2].append(self.running_time_measurements[i].get_current_total_running_time()) elif metric == constants.MODEL_SIZE: values = [] for i in range(self.n_models): values.append(calculate_object_size(self.model[i], 'kB')) else: raise ValueError('Unknown metric {}'.format(metric)) # Update buffer if metric == constants.TRUE_VS_PREDICTED: self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id=constants.Y_TRUE, value=values[0]) self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id=constants.Y_PRED, value=values[1]) elif metric == constants.DATA_POINTS: self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id='X', value=values[0]) self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id='target_values', value=values[1]) self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id='predictions', value=values[2]) elif metric == constants.RUNNING_TIME: self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id='training_time', value=values[0]) self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id='testing_time', value=values[1]) self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id='total_running_time', value=values[2]) elif metric == constants.MODEL_SIZE: self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id='model_size', value=values) else: # Default case, 'mean' and 'current' performance self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id=constants.MEAN, value=values[0]) self._data_buffer.update_data(sample_id=sample_id, metric_id=metric, data_id=constants.CURRENT, value=values[1]) shift = 0 if self._method == 'prequential': shift = -self.batch_size # Adjust index due to training after testing self._update_outputs(self.global_sample_count + shift)