Exemple #1
0
def update_for_cols(membership, cd_scores, multiprocessing):
    """updating column memberships according to cd_scores"""
    global UPDATE_MEMBERSHIP

    colnames = cd_scores.row_names
    best_clusters = get_best_clusters(cd_scores,
                                      membership.num_clusters_per_column())
    max_changes = membership.max_changes_per_col()
    change_prob = membership.probability_seeing_col_change()

    for index in xrange(cd_scores.num_rows):
        col = colnames[index]
        clusters = best_clusters[col]
        if seeing_change(change_prob):
            for c in range(max_changes):
                if len(clusters) > 0:
                    free_slots = membership.free_slots_for_column(col)
                    if len(free_slots) > 0:
                        slot = free_slots[0]
                        """
                        the slot can be out of bounds for the clusters array when
                        the setting for clusters_per_row/clusters_per_col is too
                        large, in this case pick a spot inside the array to avoid
                        the exception"""
                        if slot > len(clusters) - 1:
                            slot = len(clusters) - 1
                        take_cluster = clusters[slot]
                        #print "ii = ", c, ", add cluster ", take_cluster, " at ", free_slots[0]
                        membership.add_cluster_to_column(col, take_cluster)
                    else:
                        col_clusters = membership.col_membs[
                            membership.colidx[col]]
                        multi = util.which_multiple(col_clusters)
                        if len(multi) > 0:
                            # indexes of col_clusters that are in multiple
                            for i, cluster in enumerate(col_clusters):
                                if cluster in multi:
                                    #print "multiple in row: ", index, " ii: ", c, " col.change ", i, " -> ", clusters[i]
                                    membership.replace_column_cluster(
                                        col, i, clusters[i])
                                    break
                        else:
                            replace_delta_column_member(
                                membership, col, clusters, cd_scores)
Exemple #2
0
def update_for_cols(membership, cd_scores, multiprocessing):
    """updating column memberships according to cd_scores"""
    global UPDATE_MEMBERSHIP

    colnames = cd_scores.row_names
    best_clusters = get_best_clusters(cd_scores, membership.num_clusters_per_column())
    max_changes = membership.max_changes_per_col()
    change_prob = membership.probability_seeing_col_change()

    for index in xrange(cd_scores.num_rows):
        col = colnames[index]
        clusters = best_clusters[col]
        if seeing_change(change_prob):
            for c in range(max_changes):
                if len(clusters) > 0:
                    free_slots = membership.free_slots_for_column(col)
                    if len(free_slots) > 0:
                        slot = free_slots[0]
                        """
                        the slot can be out of bounds for the clusters array when
                        the setting for clusters_per_row/clusters_per_col is too
                        large, in this case pick a spot inside the array to avoid
                        the exception"""
                        if slot > len(clusters) - 1:
                            slot = len(clusters) - 1
                        take_cluster = clusters[slot]
                        #print "ii = ", c, ", add cluster ", take_cluster, " at ", free_slots[0]
                        membership.add_cluster_to_column(col, take_cluster)
                    else:
                        col_clusters = membership.col_membs[membership.colidx[col]]
                        multi = util.which_multiple(col_clusters)
                        if len(multi) > 0:
                            # indexes of col_clusters that are in multiple
                            for i, cluster in enumerate(col_clusters):
                                if cluster in multi:
                                    #print "multiple in row: ", index, " ii: ", c, " col.change ", i, " -> ", clusters[i]
                                    membership.replace_column_cluster(col, i, clusters[i])
                                    break
                        else:
                            replace_delta_column_member(membership, col, clusters, cd_scores)
Exemple #3
0
 def test_which_multiple_some_multiple(self):
     """tests the which_multiple() function"""
     multiple = util.which_multiple([1, 2, 3, 1, 2])
     self.assertEquals(2, len(multiple))
     self.assertTrue(1 in multiple)
     self.assertTrue(2 in multiple)
Exemple #4
0
 def test_which_multiple_all_unique(self):
     """tests the which_multiple() function"""
     self.assertEquals(0, len(util.which_multiple([1, 2, 3])))
Exemple #5
0
 def test_which_multiple_none(self):
     """tests the which_multiple() function"""
     self.assertEquals(0, len(util.which_multiple([])))
Exemple #6
0
 def test_which_multiple_some_multiple(self):
     """tests the which_multiple() function"""
     multiple = util.which_multiple([1, 2, 3, 1, 2])
     self.assertEquals(2, len(multiple))
     self.assertTrue(1 in multiple)
     self.assertTrue(2 in multiple)
Exemple #7
0
 def test_which_multiple_all_unique(self):
     """tests the which_multiple() function"""
     self.assertEquals(0, len(util.which_multiple([1, 2, 3])))
Exemple #8
0
 def test_which_multiple_none(self):
     """tests the which_multiple() function"""
     self.assertEquals(0, len(util.which_multiple([])))