Ejemplo n.º 1
0
 def test_density(self):
     kvalues = [3.4268700450682301, 3.3655160468930152, -8.0654569044842539,
                2.0762815314005487, 4.8537715329554203, 1.2374476248622075]
     cluster_values = [-3.5923001345962162, 0.77069901513184735,
                        -4.942909785931378, -3.1580950032999096] 
     bandwidth = 2.69474878768
     dmin = -13.8848342423
     dmax = 12.6744452247
     result = util.density(kvalues, cluster_values, bandwidth, dmin, dmax)
     self.assertAlmostEquals(0.08663036966690765, result[0])
     self.assertAlmostEquals(0.08809242907902183, result[1])
     self.assertAlmostEquals(0.49712338305039777, result[2])
     self.assertAlmostEquals(0.12248549621579163, result[3])
     self.assertAlmostEquals(0.05708884005243133, result[4])
     self.assertAlmostEquals(0.14857948193544993, result[5])
Ejemplo n.º 2
0
def get_cc_scores(membership, scores, bandwidth, cluster):
    """calculate the density scores for the given column score values in the
    specified cluster"""
    cluster_rows = membership.rows_for_cluster(cluster)
    cluster_columns = membership.columns_for_cluster(cluster)
    kscores = scores.column_values(cluster - 1)
    kscores_finite = kscores[np.isfinite(kscores)]

    if len(cluster_rows) == 0 or len(kscores_finite) == 0 or len(cluster_columns) <= 1:
        # This is a little weird, but is here to at least attempt to simulate
        # what the original cMonkey is doing
        num_rows = scores.num_rows
        return [(1.0 / num_rows) for _ in xrange(num_rows)]
    else:
        score_indexes = scores.row_indexes_for(cluster_columns)
        cluster_scores = [kscores[index] for index in score_indexes]
        return util.density(kscores, cluster_scores, bandwidth,
                            np.amin(kscores_finite) - 1,
                            np.amax(kscores_finite) + 1)
Ejemplo n.º 3
0
def get_rr_scores(membership, rowscores, bandwidth, cluster):
    """calculate the density scores for the given row score values in the
    specified cluster"""
    def bwscale(value):
        """standard bandwidth scaling function for row scores"""
        return math.exp(-value / 10.0) * 10.0

    cluster_rows = membership.rows_for_cluster(cluster)
    cluster_columns = membership.columns_for_cluster(cluster)
    kscores = rowscores.column_values(cluster - 1)
    kscores_finite = kscores[np.isfinite(kscores)]

    if len(cluster_rows) == 0 or len(kscores_finite) == 0 or len(cluster_columns) == 0:
        num_rows = rowscores.num_rows
        return [(1.0 / num_rows) for _ in xrange(num_rows)]
    else:
        score_indexes = rowscores.row_indexes_for(cluster_rows)
        cluster_scores = [kscores[index] for index in score_indexes]
        cluster_bandwidth = bandwidth * bwscale(len(cluster_rows))
        return util.density(kscores, cluster_scores, cluster_bandwidth,
                            np.amin(kscores_finite) - 1,
                            np.amax(kscores_finite) + 1)