def _classify(self, sample): """ Predicts a sample's classification based on the training set. Args: sample: dict or pandas.Series the sample or observation to be classified. Returns: The sample's classification. """ class_probabilities = self.get_classification_probabilities(sample) return collection_utils.get_key_with_highest_value(class_probabilities)
def _classify(self, sample): """ Predicts a sample's classification based on the training set. Args: sample: dict or pandas.Series the sample or observation to be classified. Returns: The sample's classification. """ class_probabilities = self.get_classification_probabilities(sample) return collection_utils.get_key_with_highest_value(class_probabilities)
def choose_feature_to_split(dataset): """ Choose the root to be the feature which has the highest information gain. Args: dataset: model.DataSet The data set being used to build the decision tree. Returns: feature: string The feature which should be the root. """ gains = {} for feature in dataset.feature_list(): gains[feature] = info_gain(feature, dataset) return get_key_with_highest_value(gains)
def choose_feature_to_split(dataset): """ Choose the root to be the feature which has the highest information gain. Args: dataset: model.DataSet The data set being used to build the decision tree. Returns: feature: string The feature which should be the root. """ gains = {} for feature in dataset.feature_list(): gains[feature] = info_gain(feature, dataset) return get_key_with_highest_value(gains)
def _classify(self, sample): """ Predicts a sample's classification based on the training set. Args: sample: the sample or observation to be classified. Returns: The sample's classification. """ # This function is used so that we can reduce each row with respect # to the sample. def calc_dist(vector): return distance_utils.euclidean(vector, sample) distances = self.training_set.reduce_rows(calc_dist) votes = self._tally_votes(self.training_set.get_labels(), distances) return collection_utils.get_key_with_highest_value(votes)
def _classify(self, sample): """ Predicts a sample's classification based on the training set. Args: sample: the sample or observation to be classified. Returns: The sample's classification. """ # This function is used so that we can reduce each row with respect # to the sample. def calc_dist(vector): return distance_utils.euclidean(vector, sample) distances = self.training_set.reduce_rows(calc_dist) votes = self._tally_votes(self.training_set.get_labels(), distances) return collection_utils.get_key_with_highest_value(votes)
def test_get_key_with_highest_value_empty(self): dictionary = {} self.assertIsNone( collection_utils.get_key_with_highest_value(dictionary))
def test_get_key_with_highest_value_float(self): dictionary = {0: 0.10, 1: 0.0567, 2: 0.72} key = collection_utils.get_key_with_highest_value(dictionary) self.assertEqual(key, 2)
def test_get_key_with_highest_value(self): dictionary = {"dog": 5, "cat": 10, "bird": 7} key = collection_utils.get_key_with_highest_value(dictionary) self.assertEqual(key, "cat")
def test_get_key_with_highest_value_empty(self): dictionary = {} self.assertIsNone( collection_utils.get_key_with_highest_value(dictionary))
def test_get_key_with_highest_value_float(self): dictionary = {0: 0.10, 1: 0.0567, 2: 0.72} key = collection_utils.get_key_with_highest_value(dictionary) self.assertEqual(key, 2)
def test_get_key_with_highest_value(self): dictionary = {"dog": 5, "cat": 10, "bird": 7} key = collection_utils.get_key_with_highest_value(dictionary) self.assertEqual(key, "cat")