def test_cost_count(): schedule = csa.ExponentialCoolingSchedule(100) sa = SACluster(n_clusters=2, cooling_schedule=schedule, dist_metric='euclidean') #6 observations on 2 variables data = np.arange(12).reshape((6, 2)) state = np.zeros(6) state[3:] = 1 actual_energy, actual_count = sa._cost(state, data) expected_count = np.array([3, 3]) assert np.array_equal(expected_count, actual_count)
def test_copy_cluster_metadata_count(): ''' Test copying unit meta data - in particular that counts have been copied correctly. ''' #cooling schedule selected does not matter for the test schedule = ExponentialCoolingSchedule(100) n_clusters = 6 sa = SACluster(n_clusters=n_clusters, cooling_schedule=schedule) energy = np.arange(n_clusters) count = np.arange(10, 10 + n_clusters) actual_e, actual_c = sa._copy_cluster_metadata(energy, count) assert np.array_equal(actual_c, count)
def test_random_cluster_shift_2(): #cooling schedule selected does not matter for the test schedule = ExponentialCoolingSchedule(100) n_clusters = 10 sa = SACluster(n_clusters=n_clusters, cooling_schedule=schedule) original_cluster = 3 #control pseudo-random sampling np.random.seed(101) actual_cluster = sa._random_cluster_shift(original_cluster) #reset sampling np.random.seed(101) n_shift = np.random.randint(n_clusters) expected = (original_cluster + n_shift - 1) % n_clusters assert expected == actual_cluster
def test_energy_delta(): ''' Tests the sa.delta_cluster_energy() This calculates the delta (incremental difference) that single data point makes to a distribution NOT 100% convinced this is correct. ''' #cooling schedule selected does not matter for the test schedule = ExponentialCoolingSchedule(100) sa = SACluster(n_clusters=2, cooling_schedule=schedule, dist_metric='euclidean') # 5 observations on 2 variables data = np.arange(10).reshape((5, 2)) #state = [0, 0, 0, 1, 1] state = np.zeros(5) state[3:] = 1 #calculate energy for state and data actual_energy, actual_count = sa._cost(state, data) cluster_index = 0 observation_index = 1 actual = sa._delta_cluster_energy(state, data, cluster_index, observation_index) print('delta {}'.format(actual)) assigned_to_cluster = (state == cluster_index) cluster_energy = pdist(data[assigned_to_cluster, :], 'euclidean').sum() minus_obs = np.array([[0, 1], [4, 5]]) cluster_energy2 = pdist(minus_obs, 'euclidean').sum() expected_delta = abs(cluster_energy2 - cluster_energy) assert actual == expected_delta
def test_sample_observation(): ''' Test that an observation is sampled correctly from an ordered list of cluster observations ''' #cooling schedule selected does not matter for the test schedule = ExponentialCoolingSchedule(100) n_clusters = 3 sa = SACluster(n_clusters=n_clusters, cooling_schedule=schedule) state = np.zeros(10) state[2:6] = 1 state[6:] = 2 #for reproducibility np.random.seed(seed=101) actual_index, actual_value = sa._sample_observation(state) expected_value = state[actual_index] assert expected_value == actual_value
def test_generate_neighbour_state(): ''' Test that a state is cloned and correct array element is updated ''' state = np.zeros(10) state[2:6] = 1 state[6:] = 2 exp_state = state.copy() i_to_change = 3 new_cluster = 0 exp_state[i_to_change] = new_cluster schedule = ExponentialCoolingSchedule(100) n_clusters = 3 sa = SACluster(n_clusters=n_clusters, cooling_schedule=schedule) actual = sa._generate_neighbour_state(state, i_to_change, new_cluster) assert np.array_equal(exp_state, actual)
def test_cost_euclidean(): ''' Tests that the cost function calculates the weighted cluster euclidean distance correctly ''' #cooling schedule selected does not matter for the test schedule = ExponentialCoolingSchedule(100) sa = SACluster(n_clusters=2, cooling_schedule=schedule, dist_metric='euclidean') # 5 observations on 2 variables data = np.arange(10).reshape((5, 2)) #state = [0, 0, 0, 1, 1] state = np.zeros(5) state[3:] = 1 #calculate energy for state and data actual_energy, actual_count = sa._cost(state, data) #calculate expected energy based on pairwise euclidean distances expected_energy = np.zeros(2) expected = 0 for i in range(3): for j in range(i, 3): expected += euclidean_distance(data[i, :], data[j, :]) expected_energy[0] = expected expected_energy[1] = euclidean_distance(data[3, :], data[4, :]) print('expected {}'.format(expected_energy)) assert np.array_equal(actual_energy, expected_energy)