def update_for_cols(membership, cd_scores, multiprocessing): """updating column memberships according to cd_scores""" global UPDATE_MEMBERSHIP colnames = cd_scores.row_names best_clusters = get_best_clusters(cd_scores, membership.num_clusters_per_column()) max_changes = membership.max_changes_per_col() change_prob = membership.probability_seeing_col_change() for index in xrange(cd_scores.num_rows): col = colnames[index] clusters = best_clusters[col] if seeing_change(change_prob): for c in range(max_changes): if len(clusters) > 0: free_slots = membership.free_slots_for_column(col) if len(free_slots) > 0: slot = free_slots[0] """ the slot can be out of bounds for the clusters array when the setting for clusters_per_row/clusters_per_col is too large, in this case pick a spot inside the array to avoid the exception""" if slot > len(clusters) - 1: slot = len(clusters) - 1 take_cluster = clusters[slot] #print "ii = ", c, ", add cluster ", take_cluster, " at ", free_slots[0] membership.add_cluster_to_column(col, take_cluster) else: col_clusters = membership.col_membs[ membership.colidx[col]] multi = util.which_multiple(col_clusters) if len(multi) > 0: # indexes of col_clusters that are in multiple for i, cluster in enumerate(col_clusters): if cluster in multi: #print "multiple in row: ", index, " ii: ", c, " col.change ", i, " -> ", clusters[i] membership.replace_column_cluster( col, i, clusters[i]) break else: replace_delta_column_member( membership, col, clusters, cd_scores)
def update_for_cols(membership, cd_scores, multiprocessing): """updating column memberships according to cd_scores""" global UPDATE_MEMBERSHIP colnames = cd_scores.row_names best_clusters = get_best_clusters(cd_scores, membership.num_clusters_per_column()) max_changes = membership.max_changes_per_col() change_prob = membership.probability_seeing_col_change() for index in xrange(cd_scores.num_rows): col = colnames[index] clusters = best_clusters[col] if seeing_change(change_prob): for c in range(max_changes): if len(clusters) > 0: free_slots = membership.free_slots_for_column(col) if len(free_slots) > 0: slot = free_slots[0] """ the slot can be out of bounds for the clusters array when the setting for clusters_per_row/clusters_per_col is too large, in this case pick a spot inside the array to avoid the exception""" if slot > len(clusters) - 1: slot = len(clusters) - 1 take_cluster = clusters[slot] #print "ii = ", c, ", add cluster ", take_cluster, " at ", free_slots[0] membership.add_cluster_to_column(col, take_cluster) else: col_clusters = membership.col_membs[membership.colidx[col]] multi = util.which_multiple(col_clusters) if len(multi) > 0: # indexes of col_clusters that are in multiple for i, cluster in enumerate(col_clusters): if cluster in multi: #print "multiple in row: ", index, " ii: ", c, " col.change ", i, " -> ", clusters[i] membership.replace_column_cluster(col, i, clusters[i]) break else: replace_delta_column_member(membership, col, clusters, cd_scores)
def test_which_multiple_some_multiple(self): """tests the which_multiple() function""" multiple = util.which_multiple([1, 2, 3, 1, 2]) self.assertEquals(2, len(multiple)) self.assertTrue(1 in multiple) self.assertTrue(2 in multiple)
def test_which_multiple_all_unique(self): """tests the which_multiple() function""" self.assertEquals(0, len(util.which_multiple([1, 2, 3])))
def test_which_multiple_none(self): """tests the which_multiple() function""" self.assertEquals(0, len(util.which_multiple([])))