コード例 #1
0
def seed_column_members(data_matrix, row_membership, num_clusters,
                        num_clusters_per_column):
    """Default column membership seeder ('best')
    In case of multiple input ratio matrices, we assume that these
    matrices have been combined into data_matrix"""
    num_rows = data_matrix.num_rows
    num_cols = data_matrix.num_columns
    # create a submatrix for each cluster
    cscores = np.zeros([data_matrix.num_columns, num_clusters])
    for cluster_num in xrange(1, num_clusters + 1):
        current_cluster_rows = []
        for row_index in xrange(num_rows):
            if row_membership[row_index][0] == cluster_num:
                current_cluster_rows.append(data_matrix.row_names[row_index])
        submatrix = data_matrix.submatrix_by_name(
            row_names=current_cluster_rows)
        _, scores = scoring.compute_column_scores_submatrix(submatrix)
        cscores.T[cluster_num - 1] = -scores

    start_time = util.current_millis()
    column_members = [
        util.rorder(cscores[i], num_clusters_per_column)
        for i in xrange(num_cols)
    ]
    elapsed = util.current_millis() - start_time
    logging.info("seed column members in %f s.", elapsed % 1000.0)
    return column_members
コード例 #2
0
ファイル: microarray.py プロジェクト: fschmitz/cmonkey-python
def seed_column_members(data_matrix, row_membership, num_clusters,
                        num_clusters_per_column):
    """Default column membership seeder ('best')
    In case of multiple input ratio matrices, we assume that these
    matrices have been combined into data_matrix"""
    num_rows = data_matrix.num_rows()
    num_cols = data_matrix.num_columns()
    # create a submatrix for each cluster
    column_scores = []
    for cluster_num in xrange(1, num_clusters + 1):
        current_cluster_rows = []
        for row_index in xrange(num_rows):
            if row_membership[row_index][0] == cluster_num:
                current_cluster_rows.append(data_matrix.row_name(row_index))
        submatrix = data_matrix.submatrix_by_name(
            row_names=current_cluster_rows)
        scores = (-scoring.compute_column_scores_submatrix(submatrix))[0]
        column_scores.append(scores)

    column_members = []
    for column_index in xrange(num_cols):
        scores_to_order = []
        for row_index in xrange(num_clusters):
            scores_to_order.append(column_scores[row_index][column_index])
        column_members.append(order(scores_to_order)[:num_clusters_per_column])
    return column_members
コード例 #3
0
def seed_column_members(data_matrix, row_membership, num_clusters,
                        num_clusters_per_column):
    """Default column membership seeder ('best')
    In case of multiple input ratio matrices, we assume that these
    matrices have been combined into data_matrix"""
    num_rows = data_matrix.num_rows
    num_cols = data_matrix.num_columns
    # create a submatrix for each cluster
    cscores = np.zeros([data_matrix.num_columns, num_clusters])
    for cluster_num in xrange(1, num_clusters + 1):
        current_cluster_rows = []
        for row_index in xrange(num_rows):
            if row_membership[row_index][0] == cluster_num:
                current_cluster_rows.append(data_matrix.row_names[row_index])
        submatrix = data_matrix.submatrix_by_name(
            row_names=current_cluster_rows)
        _, scores = scoring.compute_column_scores_submatrix(submatrix)
        cscores.T[cluster_num - 1] = -scores

    start_time = util.current_millis()
    column_members = [util.rorder(cscores[i], num_clusters_per_column)
                      for i in xrange(num_cols)]
    elapsed = util.current_millis() - start_time
    logging.debug("seed column members in %f s.", elapsed % 1000.0)
    return column_members
コード例 #4
0
def seed_column_members(data_matrix, row_membership, num_clusters,
                        num_clusters_per_column):
    """Default column membership seeder ('best')
    In case of multiple input ratio matrices, we assume that these
    matrices have been combined into data_matrix"""
    num_rows = data_matrix.num_rows
    num_cols = data_matrix.num_columns
    # create a submatrix for each cluster
    column_scores = []
    for cluster_num in xrange(1, num_clusters + 1):
        current_cluster_rows = []
        for row_index in xrange(num_rows):
            if row_membership[row_index][0] == cluster_num:
                current_cluster_rows.append(data_matrix.row_names[row_index])
        submatrix = data_matrix.submatrix_by_name(
            row_names=current_cluster_rows)
        scores = -(scoring.compute_column_scores_submatrix(submatrix).values)[0]
        column_scores.append(scores)

    column_members = []
    start_time = util.current_millis()
    for column_index in xrange(num_cols):
        scores_to_order = []
        for row_index in xrange(num_clusters):
            scores_to_order.append(column_scores[row_index][column_index])
        column_members.append(order(scores_to_order)[:num_clusters_per_column])
    elapsed = util.current_millis() - start_time
    logging.info("seed column members in %f s.", elapsed % 1000.0)
    return column_members
コード例 #5
0
 def test_compute_column_scores_submatrix(self):
     """tests compute_column_scores_submatrix"""
     matrix = dm.DataMatrix(10, 5, ['R1', 'R2', 'R3', 'R4', 'R5', 'R6',
                                    'R7', 'R8', 'R9', 'R10'],
                            ['C1', 'C2', 'C3', 'C4', 'C5'],
                            MATRIX1)
     result = scoring.compute_column_scores_submatrix(matrix)
     scores = result.values[0]
     self.assertEqual(5, len(scores))
     self.assertAlmostEqual(0.03085775, scores[0])
     self.assertAlmostEqual(0.05290099, scores[1])
     self.assertAlmostEqual(0.05277032, scores[2])
     self.assertAlmostEqual(0.00358045, scores[3])
     self.assertAlmostEqual(0.03948821, scores[4])