Beispiel #1
0
def test_cost_count():
    schedule = csa.ExponentialCoolingSchedule(100)
    sa = SACluster(n_clusters=2,
                   cooling_schedule=schedule,
                   dist_metric='euclidean')
    #6 observations on 2 variables
    data = np.arange(12).reshape((6, 2))
    state = np.zeros(6)
    state[3:] = 1
    actual_energy, actual_count = sa._cost(state, data)
    expected_count = np.array([3, 3])
    assert np.array_equal(expected_count, actual_count)
Beispiel #2
0
def test_copy_cluster_metadata_count():
    '''
    Test copying unit meta data - in particular that counts
    have been copied correctly.
    '''
    #cooling schedule selected does not matter for the test
    schedule = ExponentialCoolingSchedule(100)
    n_clusters = 6
    sa = SACluster(n_clusters=n_clusters, cooling_schedule=schedule)

    energy = np.arange(n_clusters)
    count = np.arange(10, 10 + n_clusters)

    actual_e, actual_c = sa._copy_cluster_metadata(energy, count)

    assert np.array_equal(actual_c, count)
Beispiel #3
0
def test_random_cluster_shift_2():
    #cooling schedule selected does not matter for the test
    schedule = ExponentialCoolingSchedule(100)
    n_clusters = 10
    sa = SACluster(n_clusters=n_clusters, cooling_schedule=schedule)
    original_cluster = 3

    #control pseudo-random sampling
    np.random.seed(101)
    actual_cluster = sa._random_cluster_shift(original_cluster)

    #reset sampling
    np.random.seed(101)
    n_shift = np.random.randint(n_clusters)
    expected = (original_cluster + n_shift - 1) % n_clusters

    assert expected == actual_cluster
Beispiel #4
0
def test_energy_delta():
    '''
    Tests the sa.delta_cluster_energy()
    
    This calculates the delta (incremental difference)
    that single data point makes to a distribution
    
    NOT 100% convinced this is correct.
    
    '''

    #cooling schedule selected does not matter for the test
    schedule = ExponentialCoolingSchedule(100)
    sa = SACluster(n_clusters=2,
                   cooling_schedule=schedule,
                   dist_metric='euclidean')

    # 5 observations on 2 variables
    data = np.arange(10).reshape((5, 2))

    #state = [0, 0, 0, 1, 1]
    state = np.zeros(5)
    state[3:] = 1

    #calculate energy for state and data
    actual_energy, actual_count = sa._cost(state, data)

    cluster_index = 0
    observation_index = 1

    actual = sa._delta_cluster_energy(state, data, cluster_index,
                                      observation_index)

    print('delta {}'.format(actual))

    assigned_to_cluster = (state == cluster_index)

    cluster_energy = pdist(data[assigned_to_cluster, :], 'euclidean').sum()

    minus_obs = np.array([[0, 1], [4, 5]])
    cluster_energy2 = pdist(minus_obs, 'euclidean').sum()

    expected_delta = abs(cluster_energy2 - cluster_energy)

    assert actual == expected_delta
Beispiel #5
0
def test_sample_observation():
    '''
    Test that an observation is sampled correctly 
    from an ordered list of cluster observations
    '''
    #cooling schedule selected does not matter for the test
    schedule = ExponentialCoolingSchedule(100)
    n_clusters = 3
    sa = SACluster(n_clusters=n_clusters, cooling_schedule=schedule)

    state = np.zeros(10)
    state[2:6] = 1
    state[6:] = 2

    #for reproducibility
    np.random.seed(seed=101)
    actual_index, actual_value = sa._sample_observation(state)

    expected_value = state[actual_index]

    assert expected_value == actual_value
Beispiel #6
0
def test_generate_neighbour_state():
    '''
    Test that a state is cloned and 
    correct array element is updated 
    '''
    state = np.zeros(10)
    state[2:6] = 1
    state[6:] = 2

    exp_state = state.copy()

    i_to_change = 3
    new_cluster = 0

    exp_state[i_to_change] = new_cluster

    schedule = ExponentialCoolingSchedule(100)
    n_clusters = 3
    sa = SACluster(n_clusters=n_clusters, cooling_schedule=schedule)
    actual = sa._generate_neighbour_state(state, i_to_change, new_cluster)

    assert np.array_equal(exp_state, actual)
Beispiel #7
0
def test_cost_euclidean():
    '''
    Tests that the cost function calculates
    the weighted cluster euclidean distance correctly
    '''

    #cooling schedule selected does not matter for the test
    schedule = ExponentialCoolingSchedule(100)
    sa = SACluster(n_clusters=2,
                   cooling_schedule=schedule,
                   dist_metric='euclidean')

    # 5 observations on 2 variables
    data = np.arange(10).reshape((5, 2))

    #state = [0, 0, 0, 1, 1]
    state = np.zeros(5)
    state[3:] = 1

    #calculate energy for state and data
    actual_energy, actual_count = sa._cost(state, data)

    #calculate expected energy based on pairwise euclidean distances
    expected_energy = np.zeros(2)
    expected = 0

    for i in range(3):
        for j in range(i, 3):
            expected += euclidean_distance(data[i, :], data[j, :])

    expected_energy[0] = expected
    expected_energy[1] = euclidean_distance(data[3, :], data[4, :])

    print('expected {}'.format(expected_energy))

    assert np.array_equal(actual_energy, expected_energy)