def seed_column_members(data_matrix, row_membership, num_clusters, num_clusters_per_column): """Default column membership seeder ('best') In case of multiple input ratio matrices, we assume that these matrices have been combined into data_matrix""" num_rows = data_matrix.num_rows num_cols = data_matrix.num_columns # create a submatrix for each cluster cscores = np.zeros([data_matrix.num_columns, num_clusters]) for cluster_num in xrange(1, num_clusters + 1): current_cluster_rows = [] for row_index in xrange(num_rows): if row_membership[row_index][0] == cluster_num: current_cluster_rows.append(data_matrix.row_names[row_index]) submatrix = data_matrix.submatrix_by_name( row_names=current_cluster_rows) _, scores = scoring.compute_column_scores_submatrix(submatrix) cscores.T[cluster_num - 1] = -scores start_time = util.current_millis() column_members = [ util.rorder(cscores[i], num_clusters_per_column) for i in xrange(num_cols) ] elapsed = util.current_millis() - start_time logging.info("seed column members in %f s.", elapsed % 1000.0) return column_members
def seed_column_members(data_matrix, row_membership, num_clusters, num_clusters_per_column): """Default column membership seeder ('best') In case of multiple input ratio matrices, we assume that these matrices have been combined into data_matrix""" num_rows = data_matrix.num_rows() num_cols = data_matrix.num_columns() # create a submatrix for each cluster column_scores = [] for cluster_num in xrange(1, num_clusters + 1): current_cluster_rows = [] for row_index in xrange(num_rows): if row_membership[row_index][0] == cluster_num: current_cluster_rows.append(data_matrix.row_name(row_index)) submatrix = data_matrix.submatrix_by_name( row_names=current_cluster_rows) scores = (-scoring.compute_column_scores_submatrix(submatrix))[0] column_scores.append(scores) column_members = [] for column_index in xrange(num_cols): scores_to_order = [] for row_index in xrange(num_clusters): scores_to_order.append(column_scores[row_index][column_index]) column_members.append(order(scores_to_order)[:num_clusters_per_column]) return column_members
def seed_column_members(data_matrix, row_membership, num_clusters, num_clusters_per_column): """Default column membership seeder ('best') In case of multiple input ratio matrices, we assume that these matrices have been combined into data_matrix""" num_rows = data_matrix.num_rows num_cols = data_matrix.num_columns # create a submatrix for each cluster cscores = np.zeros([data_matrix.num_columns, num_clusters]) for cluster_num in xrange(1, num_clusters + 1): current_cluster_rows = [] for row_index in xrange(num_rows): if row_membership[row_index][0] == cluster_num: current_cluster_rows.append(data_matrix.row_names[row_index]) submatrix = data_matrix.submatrix_by_name( row_names=current_cluster_rows) _, scores = scoring.compute_column_scores_submatrix(submatrix) cscores.T[cluster_num - 1] = -scores start_time = util.current_millis() column_members = [util.rorder(cscores[i], num_clusters_per_column) for i in xrange(num_cols)] elapsed = util.current_millis() - start_time logging.debug("seed column members in %f s.", elapsed % 1000.0) return column_members
def seed_column_members(data_matrix, row_membership, num_clusters, num_clusters_per_column): """Default column membership seeder ('best') In case of multiple input ratio matrices, we assume that these matrices have been combined into data_matrix""" num_rows = data_matrix.num_rows num_cols = data_matrix.num_columns # create a submatrix for each cluster column_scores = [] for cluster_num in xrange(1, num_clusters + 1): current_cluster_rows = [] for row_index in xrange(num_rows): if row_membership[row_index][0] == cluster_num: current_cluster_rows.append(data_matrix.row_names[row_index]) submatrix = data_matrix.submatrix_by_name( row_names=current_cluster_rows) scores = -(scoring.compute_column_scores_submatrix(submatrix).values)[0] column_scores.append(scores) column_members = [] start_time = util.current_millis() for column_index in xrange(num_cols): scores_to_order = [] for row_index in xrange(num_clusters): scores_to_order.append(column_scores[row_index][column_index]) column_members.append(order(scores_to_order)[:num_clusters_per_column]) elapsed = util.current_millis() - start_time logging.info("seed column members in %f s.", elapsed % 1000.0) return column_members
def test_compute_column_scores_submatrix(self): """tests compute_column_scores_submatrix""" matrix = dm.DataMatrix(10, 5, ['R1', 'R2', 'R3', 'R4', 'R5', 'R6', 'R7', 'R8', 'R9', 'R10'], ['C1', 'C2', 'C3', 'C4', 'C5'], MATRIX1) result = scoring.compute_column_scores_submatrix(matrix) scores = result.values[0] self.assertEqual(5, len(scores)) self.assertAlmostEqual(0.03085775, scores[0]) self.assertAlmostEqual(0.05290099, scores[1]) self.assertAlmostEqual(0.05277032, scores[2]) self.assertAlmostEqual(0.00358045, scores[3]) self.assertAlmostEqual(0.03948821, scores[4])