def test_qm_result_matrices(self):
     m1 = dm.DataMatrix(2, 2, values=[[2, 1], [3, 4]])
     m2 = dm.DataMatrix(2, 2, values=[[6, 5], [4, 3]])
     tmp_mean = np.array([1.0, 2.0, 3.0, 4.0])
     result = dm.qm_result_matrices([m1, m2], tmp_mean)
     self.assertEquals(2, len(result))
     qm1 = result[0]
     qm2 = result[1]
     self.assertTrue((qm1.values == [[2, 1], [3, 4]]).all())
     self.assertTrue((qm2.values == [[4, 3], [2, 1]]).all())
 def test_as_sorted_flat_values(self):
     """tests that the flat values of the input matrices are
     all put in one big numpy array"""
     m1 = dm.DataMatrix(2, 2, values=[[2, np.nan], [3, 4]])
     m2 = dm.DataMatrix(2, 2, values=[[6, 5], [4, 3]])
     flat_values = as_sorted_flat_values([m1, m2])
     self.assertEquals(4, len(flat_values))
     self.assertTrue((flat_values[0] == [2, 3]).all())
     self.assertTrue((flat_values[1] == [3, 4]).all())
     self.assertTrue((flat_values[2] == [4, 5]).all())
     self.assertTrue(np.isnan(flat_values[3][0]))
     self.assertEquals(6, flat_values[3][1])
Esempio n. 3
0
def compute_row_scores(membership, matrix, num_clusters, use_multiprocessing):
    """for each cluster 1, 2, .. num_clusters compute the row scores
    for the each row name in the input name matrix"""
    start_time = util.current_millis()
    cluster_row_scores = __compute_row_scores_for_clusters(
        membership, matrix, num_clusters, use_multiprocessing)
    # TODO: replace the nan/inf-Values with the quantile-thingy in the R-version

    logging.info("__compute_row_scores_for_clusters() in %f s.",
                 (util.current_millis() - start_time) / 1000.0)

    # rearrange result into a DataMatrix, where rows are indexed by gene
    # and columns represent clusters
    start_time = util.current_millis()
    values = np.zeros((matrix.num_rows, num_clusters))

    # note that cluster is 0 based on a matrix
    for cluster in xrange(num_clusters):
        row_scores = cluster_row_scores[cluster]
        values[:, cluster] = row_scores
    result = dm.DataMatrix(matrix.num_rows,
                           num_clusters,
                           row_names=matrix.row_names,
                           values=values)
    logging.info("made result matrix in %f s.",
                 (util.current_millis() - start_time) / 1000.0)
    return result
    def test_quantile_normalize_scores_with_no_weights(self):
        """no weights -> fall back to row means"""
        m1 = dm.DataMatrix(2, 2, values=[[1, 3], [2, 4]])
        m2 = dm.DataMatrix(2, 2, values=[[2.3, 2.5], [2.1, 2.31]])
        result = dm.quantile_normalize_scores([m1, m2], None)

        outmatrix1 = result[0].values
        self.assertAlmostEqual(1.55, outmatrix1[0][0])
        self.assertAlmostEqual(2.655, outmatrix1[0][1])
        self.assertAlmostEqual(2.15, outmatrix1[1][0])
        self.assertAlmostEqual(3.25, outmatrix1[1][1])

        outmatrix2 = result[1].values
        self.assertAlmostEqual(2.15, outmatrix2[0][0])
        self.assertAlmostEqual(3.25, outmatrix2[0][1])
        self.assertAlmostEqual(1.55, outmatrix2[1][0])
        self.assertAlmostEqual(2.655, outmatrix2[1][1])
 def test_residual2(self):
     """tests the residual() method"""
     matrix = dm.DataMatrix(3,
                            3,
                            values=[[1000, -4000,
                                     7000], [-2000, 5000, -8000],
                                    [3000, -6000, 9000]])
     self.assertAlmostEqual(4049.38271604938, matrix.residual())
    def test_quantile_normalize_scores_with_undefined_weight(self):
        """one undefined weight"""
        m1 = dm.DataMatrix(2, 2, values=[[1, 3], [2, 4]])
        m2 = dm.DataMatrix(2, 2, values=[[2.3, 2.5], [2.1, 2.31]])
        result = dm.quantile_normalize_scores([m1, m2], [6.0, np.nan])

        outmatrix1 = result[0].values
        self.assertAlmostEqual(1.0, outmatrix1[0][0])
        self.assertAlmostEqual(3.0, outmatrix1[0][1])
        self.assertAlmostEqual(2.0, outmatrix1[1][0])
        self.assertAlmostEqual(4.0, outmatrix1[1][1])

        outmatrix2 = result[1].values
        self.assertAlmostEqual(2.0, outmatrix2[0][0])
        self.assertAlmostEqual(4.0, outmatrix2[0][1])
        self.assertAlmostEqual(1.0, outmatrix2[1][0])
        self.assertAlmostEqual(3.0, outmatrix2[1][1])
 def test_min(self):
     """tests the min() method"""
     matrix = dm.DataMatrix(2,
                            2,
                            row_names=['R0', 'R1'],
                            col_names=['C0', 'C1'],
                            values=[[1, -np.inf], [np.nan, 4]])
     self.assertEquals(1, matrix.min())
    def test_quantile_normalize_scores_with_all_defined_weights(self):
        """happy path for quantile normalization"""
        m1 = dm.DataMatrix(2, 2, values=[[1, 3], [2, 4]])
        m2 = dm.DataMatrix(2, 2, values=[[2.3, 2.5], [2.1, 2.31]])
        result = dm.quantile_normalize_scores([m1, m2], [6.0, 1.0])

        outmatrix1 = result[0].values
        self.assertAlmostEqual(0.5785714, outmatrix1[0][0])
        self.assertAlmostEqual(1.45071428, outmatrix1[0][1])
        self.assertAlmostEqual(1.02142857, outmatrix1[1][0])
        self.assertAlmostEqual(1.89285714, outmatrix1[1][1])

        outmatrix2 = result[1].values
        self.assertAlmostEqual(1.02142857, outmatrix2[0][0])
        self.assertAlmostEqual(1.89285714, outmatrix2[0][1])
        self.assertAlmostEqual(0.5785714, outmatrix2[1][0])
        self.assertAlmostEqual(1.45071428, outmatrix2[1][1])
Esempio n. 9
0
def compute_column_scores(membership, matrix, num_clusters,
                          use_multiprocessing=False):
    """Computes the column scores for the specified number of clusters"""

    def compute_substitution(cluster_column_scores):
        """calculate substitution value for missing column scores"""
        membership_values = []
        for cluster in xrange(1, num_clusters + 1):
            columns = membership.columns_for_cluster(cluster)
            column_scores = cluster_column_scores[cluster - 1]
            if column_scores is not None:
                colnames, scores = column_scores
                for col in xrange(len(colnames)):
                    if colnames[col] in columns:
                        membership_values.append(scores[col])
        return util.quantile(membership_values, 0.95)

    def make_submatrix(cluster):
        row_names = membership.rows_for_cluster(cluster)
        if len(row_names) > 1:
            return matrix.submatrix_by_name(row_names=row_names)
        else:
            return None

    if use_multiprocessing:
        pool = mp.Pool()
        cluster_column_scores = pool.map(compute_column_scores_submatrix,
                                         map(make_submatrix, xrange(1, num_clusters + 1)))
        pool.close()
        pool.join()
    else:
        cluster_column_scores = []
        for cluster in xrange(1, num_clusters + 1):
            cluster_column_scores.append(compute_column_scores_submatrix(
                make_submatrix(cluster)))

    substitution = compute_substitution(cluster_column_scores)

    # Convert scores into a matrix that have the clusters as columns
    # and conditions in the rows
    result = dm.DataMatrix(matrix.num_columns, num_clusters,
                           row_names=matrix.column_names)
    rvalues = result.values
    for cluster in xrange(num_clusters):
        column_scores = cluster_column_scores[cluster]

        if column_scores is not None:
            _, scores = column_scores
            scores[np.isnan(scores)] = substitution

        for row_index in xrange(matrix.num_columns):
            if column_scores is None:
                rvalues[row_index][cluster] = substitution
            else:
                _, scores = column_scores
                rvalues[row_index][cluster] = scores[row_index]
    result.fix_extreme_values()
    return result
Esempio n. 10
0
 def test_row_values(self):
     """tests the row_values() method"""
     matrix = dm.DataMatrix(2, 3, values=[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
     rowvals = matrix.row_values(0)
     self.assertTrue((rowvals == [1.0, 2.0, 3.0]).all())
     rowvals[0] = 42.0
     self.assertTrue((rowvals == [42.0, 2.0, 3.0]).all())
     self.assertTrue((matrix.values == [[1.0, 2.0, 3.0], [4.0, 5.0,
                                                          6.0]]).all())
Esempio n. 11
0
 def test_column_values(self):
     """tests the column_values() method"""
     matrix = dm.DataMatrix(2, 3, values=[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
     colvals = matrix.column_values(1)
     self.assertTrue((colvals == [2.0, 5.0]).all())
     colvals[1] = 42.0
     self.assertTrue((colvals == [2.0, 42.0]).all())
     self.assertTrue((matrix.values == [[1.0, 2.0, 3.0], [4.0, 5.0,
                                                          6.0]]).all())
Esempio n. 12
0
 def test_remove_column(self):
     """remove one column"""
     matrix = dm.DataMatrix(2,
                            2, ['R1', 'R2'], ['C1', 'C2'],
                            values=[[0.001, -0.35], [np.nan, 0.42]])
     filtered = dm.nochange_filter(matrix)
     self.assertEquals(2, filtered.num_rows)
     self.assertEquals(1, filtered.num_columns)
     self.assertTrue((filtered.values == [[-0.35], [0.42]]).all())
Esempio n. 13
0
 def test_filter(self):
     """test the centering"""
     matrix = dm.DataMatrix(2,
                            2, ['R1', 'R2'], ['C1', 'C2'],
                            values=[[2, 3], [3, 4]])
     filtered = dm.center_scale_filter(matrix).values
     self.assertAlmostEqual(-0.70710678237309499, filtered[0][0])
     self.assertAlmostEqual(0.70710678237309499, filtered[0][1])
     self.assertAlmostEqual(-0.70710678237309499, filtered[1][0])
     self.assertAlmostEqual(0.70710678237309499, filtered[1][1])
Esempio n. 14
0
 def test_simple(self):
     """simplest test case: everything kept"""
     matrix = dm.DataMatrix(2,
                            2, ['R1', 'R2'], ['C1', 'C2'],
                            values=[[0.24, -0.35], [-0.42, 0.42]])
     filtered = dm.nochange_filter(matrix)
     self.assertEquals(2, filtered.num_rows)
     self.assertEquals(2, filtered.num_columns)
     self.assertTrue((filtered.values == [[0.24, -0.35], [-0.42,
                                                          0.42]]).all())
Esempio n. 15
0
 def test_sorted_by_rowname(self):
     matrix = dm.DataMatrix(3,
                            3,
                            row_names=['R0', 'R2', 'R1'],
                            col_names=['C0', 'C1', 'C2'],
                            values=[[1, 2, 3], [4, 5, 6], [8, 9, 10]])
     sorted_matrix = matrix.sorted_by_row_name()
     self.assertEquals(sorted_matrix.row_names, ['R0', 'R1', 'R2'])
     self.assertTrue((sorted_matrix.values == [[1, 2, 3], [8, 9, 10],
                                               [4, 5, 6]]).all())
Esempio n. 16
0
 def test_create_without_names(self):
     """create DataMatrix without row and column names"""
     matrix = dm.DataMatrix(3, 4)
     self.assertEquals(3, matrix.num_rows)
     self.assertEquals(4, matrix.num_columns)
     self.assertEquals(0.0, matrix.values[0][0])
     self.assertEquals("Row 0", matrix.row_names[0])
     self.assertEquals("Row 1", matrix.row_names[1])
     self.assertEquals("Col 0", matrix.column_names[0])
     self.assertEquals("Col 1", matrix.column_names[1])
Esempio n. 17
0
 def test_fix_extreme_values(self):
     """tests the adjustment function"""
     matrix = dm.DataMatrix(3,
                            2,
                            row_names=['R0', 'R1', 'R3'],
                            col_names=['C0', 'C1'],
                            values=[[-1.01, np.nan], [np.inf, -22.0],
                                    [-19.9, -25.3]])
     matrix.fix_extreme_values()
     self.assertTrue((matrix.values == [[-1.01, -1.01], [-1.01, -19.9],
                                        [-19.9, -19.9]]).all())
Esempio n. 18
0
 def test_submatrix_by_rows(self):
     """test creating sub matrices by providing row indexes"""
     matrix = dm.DataMatrix(4,
                            2,
                            row_names=['R0', 'R1', 'R2', 'R3'],
                            col_names=['C0', 'C1'],
                            values=[[1, 2], [3, 4], [5, 6], [7, 8]])
     submatrix = matrix.submatrix_by_rows([1, 3])
     self.assertEquals(submatrix.row_names, ['R1', 'R3'])
     self.assertEquals(submatrix.column_names, ['C0', 'C1'])
     self.assertTrue((submatrix.values == [[3, 4], [7, 8]]).all())
Esempio n. 19
0
 def test_submatrix_by_name_rows_only(self):
     """test creating sub matrices by row/column names"""
     matrix = dm.DataMatrix(4,
                            4,
                            row_names=['R0', 'R1', 'R2', 'R3'],
                            col_names=['C0', 'C1', 'C2', 'C3'],
                            values=[[1, 2, 3, 4], [4, 5, 6, 7],
                                    [8, 9, 10, 11], [12, 13, 14, 15]])
     submatrix = matrix.submatrix_by_name(row_names=['R0', 'R2'])
     self.assertEquals(submatrix.row_names, ['R0', 'R2'])
     self.assertTrue((submatrix.values == [[1, 2, 3, 4], [8, 9, 10,
                                                          11]]).all())
Esempio n. 20
0
 def test_create_with_names(self):
     """create DataMatrix with row and column names"""
     matrix = dm.DataMatrix(3, 2, ["MyRow1", "MyRow2", "MyRow3"],
                            ["MyCol1", "MyCol2"])
     self.assertEquals(3, matrix.num_rows)
     self.assertEquals(2, matrix.num_columns)
     self.assertEquals(0.0, matrix.values[0][0])
     self.assertEquals("MyRow1", matrix.row_names[0])
     self.assertEquals("MyRow2", matrix.row_names[1])
     self.assertEquals("MyCol1", matrix.column_names[0])
     self.assertEquals("MyCol2", matrix.column_names[1])
     self.assertIsNotNone(str(matrix))
Esempio n. 21
0
 def test_residual_var_normalize(self):
     """tests the residual() method. Note that this method
     seems to make rounding errors in the 5th place"""
     matrix = dm.DataMatrix(3,
                            3,
                            values=[[1000, -4000,
                                     7000], [-2000, 5000, -8000],
                                    [3000, -6000, 9000]])
     max_row_var = matrix.row_variance()
     self.assertAlmostEqual(0.000105128205128205,
                            matrix.residual(max_row_variance=max_row_var),
                            places=4)
Esempio n. 22
0
 def test_multiply_column_by(self):
     """tests the multiply_column_by method"""
     matrix = dm.DataMatrix(2,
                            2,
                            row_names=['R0', 'R1'],
                            col_names=['C0', 'C1'],
                            values=[[1, 2], [3, 4]])
     multiplied = matrix.multiply_column_by(1, 2)
     self.assertEquals(multiplied.row_names, ['R0', 'R1'])
     self.assertEquals(multiplied.column_names, ['C0', 'C1'])
     self.assertEquals(matrix, multiplied)
     self.assertTrue((multiplied.values == [[1, 4], [3, 8]]).all())
Esempio n. 23
0
 def test_submatrix_by_name_rows_and_cols_with_nonexisting(self):
     """test creating sub matrices by row/column name selection
     using non-existing names"""
     matrix = dm.DataMatrix(4,
                            4,
                            row_names=['R0', 'R1', 'R2', 'R3'],
                            col_names=['C0', 'C1', 'C2', 'C3'],
                            values=[[1, 2, 3, 4], [4, 5, 6, 7],
                                    [8, 9, 10, 11], [12, 13, 14, 15]])
     submatrix = matrix.submatrix_by_name(row_names=['R0', 'R2', 'R5'],
                                          column_names=['C1', 'C3', 'C5'])
     self.assertEquals(submatrix.row_names, ['R0', 'R2'])
     self.assertEquals(submatrix.column_names, ['C1', 'C3'])
     self.assertTrue((submatrix.values == [[2, 4], [9, 11]]).all())
Esempio n. 24
0
def pvalues2matrix(all_pvalues, num_clusters, gene_names, reverse_map):
    """converts a map from {cluster: {feature: pvalue}} to a scoring matrix
    """
    row_map = {gene: index for index, gene in enumerate(gene_names)}

    # convert remapped to an actual scoring matrix
    matrix = dm.DataMatrix(len(gene_names), num_clusters, gene_names)
    mvalues = matrix.values
    for cluster, feature_pvals in all_pvalues.items():
        for feature_id, pval in feature_pvals.items():
            ridx = row_map[reverse_map[feature_id]]
            mvalues[ridx][cluster - 1] = pval

    matrix.apply_log()
    return matrix
Esempio n. 25
0
def get_col_density_scores(membership, col_scores):
    num_clusters = membership.num_clusters()
    cscore_range = abs(col_scores.max() - col_scores.min())
    colscore_bandwidth = max(cscore_range / 100.0, 0.001)
    cd_scores = dm.DataMatrix(col_scores.num_rows, col_scores.num_columns,
                              col_scores.row_names, col_scores.column_names)
    cds_values = cd_scores.values

    start_time = util.current_millis()
    for cluster in xrange(1, num_clusters + 1):
        # instead of assigning the cc_scores values per row, we can assign to the
        # transpose and let numpy do the assignment
        cds_values.T[cluster - 1] = get_cc_scores(membership, col_scores,
                                                  colscore_bandwidth, cluster)

    elapsed = util.current_millis() - start_time
    logging.info("CC_SCORES IN %f s.", elapsed / 1000.0)
    return cd_scores
Esempio n. 26
0
def get_row_density_scores(membership, row_scores):
    """getting density scores improves small clusters"""
    num_clusters = membership.num_clusters()
    rscore_range = abs(row_scores.max() - row_scores.min())
    rowscore_bandwidth = max(rscore_range / 100.0, 0.001)
    rd_scores = dm.DataMatrix(row_scores.num_rows, row_scores.num_columns,
                              row_scores.row_names, row_scores.column_names)
    rds_values = rd_scores.values

    start_time = util.current_millis()
    for cluster in xrange(1, num_clusters + 1):
        # instead of assigning the rr_scores values per row, we can assign to the
        # transpose and let numpy do the assignment
        rds_values.T[cluster - 1] = get_rr_scores(membership, row_scores,
                                                  rowscore_bandwidth, cluster)

    elapsed = util.current_millis() - start_time
    logging.info("RR_SCORES IN %f s.", elapsed / 1000.0)
    return rd_scores
Esempio n. 27
0
    def do_compute(self, iteration_result, ref_matrix=None):
        """compute method, iteration is the 0-based iteration number"""

        matrix = dm.DataMatrix(len(self.gene_names()), self.num_clusters(),
                               self.gene_names())
        network_scores = {}
        for network in self.networks():
            logging.debug("Compute scores for network '%s', WEIGHT: %f",
                          network.name, network.weight)
            start_time = util.current_millis()
            network_score = self.__compute_network_cluster_scores(network)
            network_scores[network.name] = network_score
            self.__update_score_matrix(matrix, network_score, network.weight)
            elapsed = util.current_millis() - start_time
            logging.debug("NETWORK '%s' SCORING TIME: %f s.", network.name,
                          (elapsed / 1000.0))

        # compute and store score means
        self.score_means = self.__update_score_means(network_scores)
        return matrix
Esempio n. 28
0
def combine(result_matrices, score_scalings, membership, iteration,
            config_params):
    """This is  the combining function, taking n result matrices and scalings"""
    quantile_normalize = config_params['quantile_normalize']

    for i, m in enumerate(result_matrices):
        m.fix_extreme_values()
        m.subtract_with_quantile(0.99)

        # debug mode: print scoring matrices before combining
        if ('dump_scores' in config_params['debug']
                and (iteration == 1 or
                     (iteration % config_params['debug_freq'] == 0))):
            funs = config_params['pipeline']['row-scoring']['args'][
                'functions']
            m.write_tsv_file(os.path.join(
                config_params['output_dir'],
                'score-%s-%04d.tsv' % (funs[i]['id'], iteration)),
                             compressed=False)

    if quantile_normalize:
        if len(result_matrices) > 1:
            start_time = util.current_millis()
            result_matrices = dm.quantile_normalize_scores(
                result_matrices, score_scalings)
            elapsed = util.current_millis() - start_time
            logging.debug("quantile normalize in %f s.", elapsed / 1000.0)

        in_matrices = [m.values for m in result_matrices]

    else:
        in_matrices = []
        num_clusters = membership.num_clusters()
        mat = result_matrices[0]
        index_map = {name: index for index, name in enumerate(mat.row_names)}
        # we assume matrix 0 is always the gene expression score
        # we also assume that the matrices are already extreme value
        # fixed
        rsm = []
        for cluster in range(1, num_clusters + 1):
            row_members = sorted(membership.rows_for_cluster(cluster))
            rsm.extend([
                mat.values[index_map[row], cluster - 1] for row in row_members
            ])
        scale = util.mad(rsm)
        if scale == 0:  # avoid that we are dividing by 0
            scale = util.r_stddev(rsm)
        if scale != 0:
            median_rsm = util.median(rsm)
            rsvalues = (mat.values - median_rsm) / scale
            num_rows, num_cols = rsvalues.shape
            rscores = dm.DataMatrix(num_rows,
                                    num_cols,
                                    mat.row_names,
                                    mat.column_names,
                                    values=rsvalues)
            rscores.fix_extreme_values()
        else:
            logging.warn("combiner scaling -> scale == 0 !!!")
            rscores = mat
        in_matrices.append(rscores.values)

        if len(result_matrices) > 1:
            rs_quant = util.quantile(rscores.values, 0.01)
            logging.debug("RS_QUANT = %f", rs_quant)
            for i in range(1, len(result_matrices)):
                values = result_matrices[i].values
                qqq = abs(util.quantile(values, 0.01))
                if qqq == 0:
                    logging.debug(
                        'SPARSE SCORES - %d attempt 1: pick from sorted values',
                        i)
                    qqq = sorted(values.ravel())[9]
                if qqq == 0:
                    logging.debug(
                        'SPARSE SCORES - %d attempt 2: pick minimum value', i)
                    qqq = abs(values.min())
                if qqq != 0:
                    values = values / qqq * abs(rs_quant)
                else:
                    logging.debug('SPARSE SCORES - %d not normalizing!', i)
                in_matrices.append(values)

    if len(result_matrices) > 0:
        start_time = util.current_millis()
        # assuming same format of all matrices
        combined_score = np.zeros(in_matrices[0].shape)
        for i in xrange(len(in_matrices)):
            combined_score += in_matrices[i] * score_scalings[i]

        elapsed = util.current_millis() - start_time
        logging.debug("combined score in %f s.", elapsed / 1000.0)
        matrix0 = result_matrices[0]  # as reference for names
        return dm.DataMatrix(matrix0.num_rows,
                             matrix0.num_columns,
                             matrix0.row_names,
                             matrix0.column_names,
                             values=combined_score)
    else:
        return None
Esempio n. 29
0
def compute_column_scores(membership,
                          matrix,
                          num_clusters,
                          config_params,
                          BSCM_obj=None):
    """Computes the column scores for the specified number of clusters"""
    def compute_substitution(cluster_column_scores):
        """calculate substitution value for missing column scores"""
        membership_values = []
        for cluster in xrange(1, num_clusters + 1):
            columns = membership.columns_for_cluster(cluster)
            column_scores = cluster_column_scores[cluster - 1]
            if column_scores is not None:
                colnames, scores = column_scores
                for col in xrange(len(colnames)):
                    if colnames[col] in columns:
                        membership_values.append(scores[col])
        return util.quantile(membership_values, 0.95)

    def make_submatrix(cluster):
        row_names = membership.rows_for_cluster(cluster)
        if len(row_names) > 1:
            return matrix.submatrix_by_name(row_names=row_names)
        else:
            return None

    cluster_column_scores = []  #To be filled or overwritten
    if BSCM_obj is None:
        if config_params['multiprocessing']:
            with util.get_mp_pool(config_params) as pool:
                cluster_column_scores = pool.map(
                    compute_column_scores_submatrix,
                    map(make_submatrix, xrange(1, num_clusters + 1)))
        else:
            for cluster in xrange(1, num_clusters + 1):
                cluster_column_scores.append(
                    compute_column_scores_submatrix(make_submatrix(cluster)))
    else:  #if BSCM_obj exists
        num_cores = 1
        if not config_params['num_cores'] is None:
            num_cores = config_params['num_cores']

        for cluster in xrange(1, num_clusters + 1):
            if make_submatrix(cluster) is None:
                cluster_column_scores.append(None)
            else:
                cur_column_scores = BSCM_obj.getPvals(
                    make_submatrix(cluster).row_names, num_cores=num_cores)
                exp_names = cur_column_scores.keys()
                exp_scores = np.array(cur_column_scores.values())
                cluster_column_scores.append((exp_names, exp_scores))

    substitution = compute_substitution(cluster_column_scores)

    # Convert scores into a matrix that have the clusters as columns
    # and conditions in the rows
    result = dm.DataMatrix(matrix.num_columns,
                           num_clusters,
                           row_names=matrix.column_names)
    rvalues = result.values
    for cluster in xrange(num_clusters):
        column_scores = cluster_column_scores[cluster]

        if column_scores is not None:
            _, scores = column_scores
            scores[np.isnan(scores)] = substitution

        for row_index in xrange(matrix.num_columns):
            if column_scores is None:
                rvalues[row_index, cluster] = substitution
            else:
                _, scores = column_scores
                rvalues[row_index, cluster] = scores[row_index]
    result.fix_extreme_values()
    return result
    def do_compute(self, iteration_result, ref_matrix):
        """compute method
        Note: will return None if not computed yet and the result of a previous
        scoring if the function is not supposed to actually run in this iteration
        """
        global SET_MATRIX, SET_MEMBERSHIP, SET_SET_TYPE, SET_SYNONYMS, CANONICAL_ROWNAMES, CANONICAL_ROW_INDEXES
        logging.info("Compute scores for set enrichment...")
        start_time = util.current_millis()
        matrix = dm.DataMatrix(len(self.gene_names()), self.num_clusters(),
                               self.gene_names())
        use_multiprocessing = self.config_params[scoring.KEY_MULTIPROCESSING]
        SET_MATRIX = self.ratios
        SET_MEMBERSHIP = self.membership
        SET_SYNONYMS = self.organism.thesaurus()

        if CANONICAL_ROWNAMES is None:
            CANONICAL_ROWNAMES = set(
                map(lambda n: SET_SYNONYMS[n]
                    if n in SET_SYNONYMS else n, self.ratios.row_names))

        if CANONICAL_ROW_INDEXES is None:
            CANONICAL_ROW_INDEXES = {}
            for index, row in enumerate(self.ratios.row_names):
                if row in SET_SYNONYMS:
                    CANONICAL_ROW_INDEXES[SET_SYNONYMS[row]] = index
                else:
                    CANONICAL_ROW_INDEXES[row] = index

        ref_min_score = ref_matrix.min()
        logging.info('REF_MIN_SCORE: %f', ref_min_score)

        set_filepath = os.path.join(self.config_params['output_dir'],
                                    'setEnrichment_set.csv')
        pval_filepath = os.path.join(self.config_params['output_dir'],
                                     'setEnrichment_pvalue.csv')

        for set_type in self.__set_types:
            SET_SET_TYPE = set_type
            logging.info("PROCESSING SET TYPE '%s'", set_type.name)
            start1 = util.current_millis()
            if use_multiprocessing:
                with util.get_mp_pool(self.config_params) as pool:
                    results = pool.map(
                        compute_cluster_score,
                        [(cluster, self.bonferroni_cutoff(), ref_min_score)
                         for cluster in xrange(1,
                                               self.num_clusters() + 1)])
            else:
                results = []
                for cluster in xrange(1, self.num_clusters() + 1):
                    results.append(
                        compute_cluster_score(
                            (cluster, self.bonferroni_cutoff(),
                             ref_min_score)))

            elapsed1 = util.current_millis() - start1
            logging.info("ENRICHMENT SCORES COMPUTED in %f s, STORING...",
                         elapsed1 / 1000.0)

            if not os.path.exists(set_filepath):
                setFile = open(set_filepath, 'w')
                setFile.write(',' + ','.join(
                    [str(i) for i in xrange(1,
                                            self.num_clusters() + 1)]))
                pvFile = open(pval_filepath, 'w')
                pvFile.write(',' + ','.join(
                    [str(i) for i in xrange(1,
                                            self.num_clusters() + 1)]))
            else:
                setFile = open(set_filepath, 'a')
                pvFile = open(pval_filepath, 'a')

            minSets = []
            pValues = []
            for cluster in xrange(1, self.num_clusters() + 1):
                # store the best enriched set determined
                scores, min_set, min_pvalue = results[cluster - 1]
                minSets.append(min_set)
                pValues.append(min_pvalue)

                for row in xrange(len(self.gene_names())):
                    matrix.values[row][cluster -
                                       1] += scores[row] * set_type.weight
            setFile.write('\n' + str(iteration_result['iteration']) + ',' +
                          ','.join([str(i) for i in minSets]))
            pvFile.write('\n' + str(iteration_result['iteration']) + ',' +
                         ','.join([str(i) for i in pValues]))
            setFile.close()
            pvFile.close()

        logging.info("SET ENRICHMENT FINISHED IN %f s.\n",
                     (util.current_millis() - start_time) / 1000.0)
        # cleanup
        SET_SET_TYPE = None
        SET_MATRIX = None
        SET_MEMBERSHIP = None
        SET_SYNONYMS = None

        return matrix