Python SACluster Examples

Programming Language: Python

Namespace/Package Name: clustering.sa

Class/Type: SACluster

Examples at hotexamples.com: 7

Python SACluster - 7 examples found. These are the top rated real world Python examples of clustering.sa.SACluster extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SACluster(7)

_cost(3)

_copy_cluster_metadata(1)

_delta_cluster_energy(1)

_generate_neighbour_state(1)

_random_cluster_shift(1)

_sample_observation(1)

Example #1

Show file

def test_cost_count():
    schedule = csa.ExponentialCoolingSchedule(100)
    sa = SACluster(n_clusters=2,
                   cooling_schedule=schedule,
                   dist_metric='euclidean')
    #6 observations on 2 variables
    data = np.arange(12).reshape((6, 2))
    state = np.zeros(6)
    state[3:] = 1
    actual_energy, actual_count = sa._cost(state, data)
    expected_count = np.array([3, 3])
    assert np.array_equal(expected_count, actual_count)

Example #2

Show file

def test_copy_cluster_metadata_count():
    '''
    Test copying unit meta data - in particular that counts
    have been copied correctly.
    '''
    #cooling schedule selected does not matter for the test
    schedule = ExponentialCoolingSchedule(100)
    n_clusters = 6
    sa = SACluster(n_clusters=n_clusters, cooling_schedule=schedule)

    energy = np.arange(n_clusters)
    count = np.arange(10, 10 + n_clusters)

    actual_e, actual_c = sa._copy_cluster_metadata(energy, count)

    assert np.array_equal(actual_c, count)

Example #3

Show file

def test_random_cluster_shift_2():
    #cooling schedule selected does not matter for the test
    schedule = ExponentialCoolingSchedule(100)
    n_clusters = 10
    sa = SACluster(n_clusters=n_clusters, cooling_schedule=schedule)
    original_cluster = 3

    #control pseudo-random sampling
    np.random.seed(101)
    actual_cluster = sa._random_cluster_shift(original_cluster)

    #reset sampling
    np.random.seed(101)
    n_shift = np.random.randint(n_clusters)
    expected = (original_cluster + n_shift - 1) % n_clusters

    assert expected == actual_cluster

Example #4

Show file

def test_energy_delta():
    '''
    Tests the sa.delta_cluster_energy()
    
    This calculates the delta (incremental difference)
    that single data point makes to a distribution
    
    NOT 100% convinced this is correct.
    
    '''

    #cooling schedule selected does not matter for the test
    schedule = ExponentialCoolingSchedule(100)
    sa = SACluster(n_clusters=2,
                   cooling_schedule=schedule,
                   dist_metric='euclidean')

    # 5 observations on 2 variables
    data = np.arange(10).reshape((5, 2))

    #state = [0, 0, 0, 1, 1]
    state = np.zeros(5)
    state[3:] = 1

    #calculate energy for state and data
    actual_energy, actual_count = sa._cost(state, data)

    cluster_index = 0
    observation_index = 1

    actual = sa._delta_cluster_energy(state, data, cluster_index,
                                      observation_index)

    print('delta {}'.format(actual))

    assigned_to_cluster = (state == cluster_index)

    cluster_energy = pdist(data[assigned_to_cluster, :], 'euclidean').sum()

    minus_obs = np.array([[0, 1], [4, 5]])
    cluster_energy2 = pdist(minus_obs, 'euclidean').sum()

    expected_delta = abs(cluster_energy2 - cluster_energy)

    assert actual == expected_delta

Example #5

Show file

def test_sample_observation():
    '''
    Test that an observation is sampled correctly 
    from an ordered list of cluster observations
    '''
    #cooling schedule selected does not matter for the test
    schedule = ExponentialCoolingSchedule(100)
    n_clusters = 3
    sa = SACluster(n_clusters=n_clusters, cooling_schedule=schedule)

    state = np.zeros(10)
    state[2:6] = 1
    state[6:] = 2

    #for reproducibility
    np.random.seed(seed=101)
    actual_index, actual_value = sa._sample_observation(state)

    expected_value = state[actual_index]

    assert expected_value == actual_value

Example #6

Show file

def test_generate_neighbour_state():
    '''
    Test that a state is cloned and 
    correct array element is updated 
    '''
    state = np.zeros(10)
    state[2:6] = 1
    state[6:] = 2

    exp_state = state.copy()

    i_to_change = 3
    new_cluster = 0

    exp_state[i_to_change] = new_cluster

    schedule = ExponentialCoolingSchedule(100)
    n_clusters = 3
    sa = SACluster(n_clusters=n_clusters, cooling_schedule=schedule)
    actual = sa._generate_neighbour_state(state, i_to_change, new_cluster)

    assert np.array_equal(exp_state, actual)

Example #7

Show file

def test_cost_euclidean():
    '''
    Tests that the cost function calculates
    the weighted cluster euclidean distance correctly
    '''

    #cooling schedule selected does not matter for the test
    schedule = ExponentialCoolingSchedule(100)
    sa = SACluster(n_clusters=2,
                   cooling_schedule=schedule,
                   dist_metric='euclidean')

    # 5 observations on 2 variables
    data = np.arange(10).reshape((5, 2))

    #state = [0, 0, 0, 1, 1]
    state = np.zeros(5)
    state[3:] = 1

    #calculate energy for state and data
    actual_energy, actual_count = sa._cost(state, data)

    #calculate expected energy based on pairwise euclidean distances
    expected_energy = np.zeros(2)
    expected = 0

    for i in range(3):
        for j in range(i, 3):
            expected += euclidean_distance(data[i, :], data[j, :])

    expected_energy[0] = expected
    expected_energy[1] = euclidean_distance(data[3, :], data[4, :])

    print('expected {}'.format(expected_energy))

    assert np.array_equal(actual_energy, expected_energy)