def get_best_clusters(scores, n, sort=False): """retrieve the n best scored clusters for the given row/column score matrix""" if sort: return {scores.row_names[row]: sorted(util.rorder(scores.row_values(row), n)) for row in xrange(scores.num_rows)} else: return {scores.row_names[row]: util.rorder(scores.row_values(row), n) for row in xrange(scores.num_rows)}
def get_best_clusters(scores, n, sort=False): """retrieve the n best scored clusters for the given row/column score matrix""" if sort: return { scores.row_names[row]: sorted(util.rorder(scores.row_values(row), n)) for row in xrange(scores.num_rows) } else: return { scores.row_names[row]: util.rorder(scores.row_values(row), n) for row in xrange(scores.num_rows) }
def seed_column_members(data_matrix, row_membership, num_clusters, num_clusters_per_column): """Default column membership seeder ('best') In case of multiple input ratio matrices, we assume that these matrices have been combined into data_matrix""" num_rows = data_matrix.num_rows num_cols = data_matrix.num_columns # create a submatrix for each cluster cscores = np.zeros([data_matrix.num_columns, num_clusters]) for cluster_num in xrange(1, num_clusters + 1): current_cluster_rows = [] for row_index in xrange(num_rows): if row_membership[row_index][0] == cluster_num: current_cluster_rows.append(data_matrix.row_names[row_index]) submatrix = data_matrix.submatrix_by_name( row_names=current_cluster_rows) _, scores = scoring.compute_column_scores_submatrix(submatrix) cscores.T[cluster_num - 1] = -scores start_time = util.current_millis() column_members = [ util.rorder(cscores[i], num_clusters_per_column) for i in xrange(num_cols) ] elapsed = util.current_millis() - start_time logging.debug("seed column members in %f s.", elapsed % 1000.0) return column_members
def seed_column_members(data_matrix, row_membership, num_clusters, num_clusters_per_column): """Default column membership seeder ('best') In case of multiple input ratio matrices, we assume that these matrices have been combined into data_matrix""" num_rows = data_matrix.num_rows num_cols = data_matrix.num_columns # create a submatrix for each cluster cscores = np.zeros([data_matrix.num_columns, num_clusters]) for cluster_num in xrange(1, num_clusters + 1): current_cluster_rows = [] for row_index in xrange(num_rows): if row_membership[row_index][0] == cluster_num: current_cluster_rows.append(data_matrix.row_names[row_index]) submatrix = data_matrix.submatrix_by_name( row_names=current_cluster_rows) _, scores = scoring.compute_column_scores_submatrix(submatrix) cscores.T[cluster_num - 1] = -scores start_time = util.current_millis() column_members = [util.rorder(cscores[i], num_clusters_per_column) for i in xrange(num_cols)] elapsed = util.current_millis() - start_time logging.debug("seed column members in %f s.", elapsed % 1000.0) return column_members