def test_density(self): kvalues = [3.4268700450682301, 3.3655160468930152, -8.0654569044842539, 2.0762815314005487, 4.8537715329554203, 1.2374476248622075] cluster_values = [-3.5923001345962162, 0.77069901513184735, -4.942909785931378, -3.1580950032999096] bandwidth = 2.69474878768 dmin = -13.8848342423 dmax = 12.6744452247 result = util.density(kvalues, cluster_values, bandwidth, dmin, dmax) self.assertAlmostEquals(0.08663036966690765, result[0]) self.assertAlmostEquals(0.08809242907902183, result[1]) self.assertAlmostEquals(0.49712338305039777, result[2]) self.assertAlmostEquals(0.12248549621579163, result[3]) self.assertAlmostEquals(0.05708884005243133, result[4]) self.assertAlmostEquals(0.14857948193544993, result[5])
def get_cc_scores(membership, scores, bandwidth, cluster): """calculate the density scores for the given column score values in the specified cluster""" cluster_rows = membership.rows_for_cluster(cluster) cluster_columns = membership.columns_for_cluster(cluster) kscores = scores.column_values(cluster - 1) kscores_finite = kscores[np.isfinite(kscores)] if len(cluster_rows) == 0 or len(kscores_finite) == 0 or len(cluster_columns) <= 1: # This is a little weird, but is here to at least attempt to simulate # what the original cMonkey is doing num_rows = scores.num_rows return [(1.0 / num_rows) for _ in xrange(num_rows)] else: score_indexes = scores.row_indexes_for(cluster_columns) cluster_scores = [kscores[index] for index in score_indexes] return util.density(kscores, cluster_scores, bandwidth, np.amin(kscores_finite) - 1, np.amax(kscores_finite) + 1)
def test_density(self): kvalues = [ 3.4268700450682301, 3.3655160468930152, -8.0654569044842539, 2.0762815314005487, 4.8537715329554203, 1.2374476248622075 ] cluster_values = [ -3.5923001345962162, 0.77069901513184735, -4.942909785931378, -3.1580950032999096 ] bandwidth = 2.69474878768 dmin = -13.8848342423 dmax = 12.6744452247 result = util.density(kvalues, cluster_values, bandwidth, dmin, dmax) self.assertAlmostEquals(0.08663036966690765, result[0]) self.assertAlmostEquals(0.08809242907902183, result[1]) self.assertAlmostEquals(0.49712338305039777, result[2]) self.assertAlmostEquals(0.12248549621579163, result[3]) self.assertAlmostEquals(0.05708884005243133, result[4]) self.assertAlmostEquals(0.14857948193544993, result[5])
def get_cc_scores(membership, scores, bandwidth, cluster): """calculate the density scores for the given column score values in the specified cluster""" cluster_rows = membership.rows_for_cluster(cluster) cluster_columns = membership.columns_for_cluster(cluster) kscores = scores.column_values(cluster - 1) kscores_finite = kscores[np.isfinite(kscores)] if len(cluster_rows) == 0 or len( kscores_finite) == 0 or len(cluster_columns) <= 1: # This is a little weird, but is here to at least attempt to simulate # what the original cMonkey is doing num_rows = scores.num_rows return [(1.0 / num_rows) for _ in xrange(num_rows)] else: score_indexes = scores.row_indexes_for(cluster_columns) cluster_scores = [kscores[index] for index in score_indexes] return util.density(kscores, cluster_scores, bandwidth, np.amin(kscores_finite) - 1, np.amax(kscores_finite) + 1)
def get_rr_scores(membership, rowscores, bandwidth, cluster): """calculate the density scores for the given row score values in the specified cluster""" def bwscale(value): """standard bandwidth scaling function for row scores""" return math.exp(-value / 10.0) * 10.0 cluster_rows = membership.rows_for_cluster(cluster) cluster_columns = membership.columns_for_cluster(cluster) kscores = rowscores.column_values(cluster - 1) kscores_finite = kscores[np.isfinite(kscores)] if len(cluster_rows) == 0 or len(kscores_finite) == 0 or len(cluster_columns) == 0: num_rows = rowscores.num_rows return [(1.0 / num_rows) for _ in xrange(num_rows)] else: score_indexes = rowscores.row_indexes_for(cluster_rows) cluster_scores = [kscores[index] for index in score_indexes] cluster_bandwidth = bandwidth * bwscale(len(cluster_rows)) return util.density(kscores, cluster_scores, cluster_bandwidth, np.amin(kscores_finite) - 1, np.amax(kscores_finite) + 1)
def get_rr_scores(membership, rowscores, bandwidth, cluster): """calculate the density scores for the given row score values in the specified cluster""" def bwscale(value): """standard bandwidth scaling function for row scores""" return math.exp(-value / 10.0) * 10.0 cluster_rows = membership.rows_for_cluster(cluster) cluster_columns = membership.columns_for_cluster(cluster) kscores = rowscores.column_values(cluster - 1) kscores_finite = kscores[np.isfinite(kscores)] if len(cluster_rows) == 0 or len(kscores_finite) == 0 or len( cluster_columns) == 0: num_rows = rowscores.num_rows return [(1.0 / num_rows) for _ in xrange(num_rows)] else: score_indexes = rowscores.row_indexes_for(cluster_rows) cluster_scores = [kscores[index] for index in score_indexes] cluster_bandwidth = bandwidth * bwscale(len(cluster_rows)) return util.density(kscores, cluster_scores, cluster_bandwidth, np.amin(kscores_finite) - 1, np.amax(kscores_finite) + 1)