def choose_feature_to_split(dataset): """ Choose the root to be the feature which has the highest information gain. Args: dataset: model.DataSet The data set being used to build the decision tree. Returns: feature: string The feature which should be the root. """ gains = {} for feature in dataset.feature_list(): gains[feature] = info_gain(feature, dataset) return get_key_with_highest_value(gains)
def test_info_gain(self): dataset = self.create_example_dataset() self.assertAlmostEqual(info_gain("A", dataset), 0.311, places=3)