def __init__(self, *args, lmbda_1=0., lmbda_2=0.): super().__init__(*args) self.y = asarray2d(self.y) if lmbda_1 <= 0: lmbda_1 = estimate_entropy(self.y) / LAMBDA_1_ADJUSTMENT if lmbda_2 <= 0: lmbda_2 = estimate_entropy(self.y) / LAMBDA_2_ADJUSTMENT self.lmbda_1 = lmbda_1 self.lmbda_2 = lmbda_2
def test_entropy_multiple_disc(): same_val_arr_zero = np.zeros((50, 1)) same_val_arr_ones = np.ones((50, 1)) # The 0.5 forces float => classified as continuous cont_val_arange = asarray2d(np.arange(50) + 0.5) all_disc_arr = np.concatenate((same_val_arr_ones, same_val_arr_zero), axis=1) mixed_val_arr = np.concatenate((all_disc_arr, cont_val_arange), axis=1) all_disc_h = estimate_entropy(all_disc_arr) mixed_h = estimate_entropy(mixed_val_arr) assert mixed_h > all_disc_h, \ 'Expected adding continuous column increases entropy'
def __init__(self, *args, lmbda_1: float = 0.0, lmbda_2: float = 0.0, lambda_1_adjustment: float = LAMBDA_1_ADJUSTMENT, lambda_2_adjustment: float = LAMBDA_2_ADJUSTMENT): super().__init__(*args) self.y_val = asarray2d(self.y_val) if lmbda_1 <= 0: lmbda_1 = estimate_entropy(self.y_val) / lambda_1_adjustment if lmbda_2 <= 0: lmbda_2 = estimate_entropy(self.y_val) / lambda_2_adjustment self.lmbda_1 = lmbda_1 self.lmbda_2 = lmbda_2
def test_mi_informative(): x = np.reshape(np.arange(1, 101), (-1, 1)) y = np.reshape(np.arange(1, 101), (-1, 1)) mi = estimate_mutual_information(x, y) h_y = estimate_entropy(y) assert mi > h_y / 4, \ 'exact copy columns should have high information'
def test_mi_uninformative(): x = np.reshape(np.arange(1, 101), (-1, 1)) y = np.ones((100, 1)) mi = estimate_mutual_information(x, y) h_z = estimate_entropy(x) assert h_z / 4 > mi, \ 'uninformative column should have no information'
def test_mi_informative(self): x = np.reshape(np.arange(1, 101), (-1, 1)) y = np.reshape(np.arange(1, 101), (-1, 1)) mi = estimate_mutual_information(x, y) h_y = estimate_entropy(y) self.assertGreater(mi, h_y / 4, 'exact copy columns should have high information')
def test_mi_uninformative(self): x = np.reshape(np.arange(1, 101), (-1, 1)) y = np.ones((100, 1)) mi = estimate_mutual_information(x, y) h_z = estimate_entropy(x) self.assertGreater(h_z / 4, mi, 'uninformative column should have no information')
def test_cmi_redundant_info(): x = np.reshape(np.arange(1, 101), (-1, 1)) y = np.reshape(np.arange(1, 101), (-1, 1)) exact_z = np.reshape(np.arange(1, 101), (-1, 1)) h_y = estimate_entropy(y) cmi = estimate_conditional_information(x, y, exact_z) assert h_y / 4 > cmi, \ 'redundant copies should have little information'
def H(a): # noqa return estimate_entropy(asarray2d(a))