コード例 #1
0
ファイル: datamatrix.py プロジェクト: sdanzige/cmonkey-python
def quantile_normalize_scores(matrices, weights=None):
    """quantile normalize scores against each other"""

    logging.info("COMPUTING WEIGHTED MEANS...")
    start_time = util.current_millis()

    # rearranges the scores in the input matrices into a matrix
    # with |matrices| columns where the columns contain the values
    # of each matrix in sorted order
    flat_values = np.transpose(np.asarray([np.sort(matrix.values.flatten())
                                           for matrix in matrices]))

    elapsed = util.current_millis() - start_time
    logging.info("flattened/sorted score matrices in %f s.", elapsed / 1000.0)

    start_time = util.current_millis()
    if weights is not None:
        # multiply each column of matrix with each component of the
        # weight vector: Using matrix multiplication resulted in speedup
        # from 125 s. to 0.125 seconds over apply_along_axis() (1000x faster)!
        scaled = weights * flat_values
        scale = np.sum(np.ma.masked_array(weights, np.isnan(weights)))
        tmp_mean = util.row_means(scaled) / scale
    else:
        tmp_mean = util.row_means(flat_values)
    elapsed = util.current_millis() - start_time
    logging.info("weighted means in %f s.", elapsed / 1000.0)
    start_time = util.current_millis()

    result = qm_result_matrices(matrices, tmp_mean)

    elapsed = util.current_millis() - start_time
    logging.info("result matrices built in %f s.", elapsed / 1000.0)
    return result
コード例 #2
0
def quantile_normalize_scores(matrices, weights=None):
    """quantile normalize scores against each other"""

    logging.info("COMPUTING WEIGHTED MEANS...")
    start_time = util.current_millis()

    # rearranges the scores in the input matrices into a matrix
    # with |matrices| columns where the columns contain the values
    # of each matrix in sorted order
    flat_values = np.transpose(
        np.asarray([np.sort(matrix.values.flatten()) for matrix in matrices]))

    elapsed = util.current_millis() - start_time
    logging.info("flattened/sorted score matrices in %f s.", elapsed / 1000.0)

    start_time = util.current_millis()
    if weights is not None:
        # multiply each column of matrix with each component of the
        # weight vector: Using matrix multiplication resulted in speedup
        # from 125 s. to 0.125 seconds over apply_along_axis() (1000x faster)!
        scaled = weights * flat_values
        scale = np.sum(np.ma.masked_array(weights, np.isnan(weights)))
        tmp_mean = util.row_means(scaled) / scale
    else:
        tmp_mean = util.row_means(flat_values)
    elapsed = util.current_millis() - start_time
    logging.info("weighted means in %f s.", elapsed / 1000.0)
    start_time = util.current_millis()

    result = qm_result_matrices(matrices, tmp_mean)

    elapsed = util.current_millis() - start_time
    logging.info("result matrices built in %f s.", elapsed / 1000.0)
    return result
コード例 #3
0
ファイル: util_test.py プロジェクト: baliga-lab/cmonkey2
 def test_row_means_with_nans(self):
     """tests the row_means() function"""
     matrix = [[0.0010, np.nan, 0.21370, 0.0342],
               [0.2123, -0.2135, -0.99980, -0.0213],
               [-0.4534, 0.5546, 0.79123, np.nan]]
     result = util.row_means(matrix)
     self.assertAlmostEqual(0.08296666, result[0])
     self.assertAlmostEqual(-0.255575, result[1])
     self.assertAlmostEqual(0.297476666, result[2])
コード例 #4
0
ファイル: util_test.py プロジェクト: baliga-lab/cmonkey2
 def test_row_means(self):
     """tests the row_means() function"""
     matrix = [[0.0010, 0.1234, 0.21370, 0.0342],
               [0.2123, -0.2135, -0.99980, -0.0213],
               [-0.4534, 0.5546, 0.79123, 0.00312321]]
     result = util.row_means(matrix)
     self.assertAlmostEqual(0.0930750, result[0])
     self.assertAlmostEqual(-0.255575, result[1])
     self.assertAlmostEqual(0.2238883025, result[2])
コード例 #5
0
 def test_row_means_with_nans(self):
     """tests the row_means() function"""
     matrix = [[0.0010, np.nan, 0.21370, 0.0342],
               [0.2123, -0.2135, -0.99980, -0.0213],
               [-0.4534, 0.5546, 0.79123, np.nan]]
     result = util.row_means(matrix)
     self.assertAlmostEqual(0.08296666, result[0])
     self.assertAlmostEqual(-0.255575, result[1])
     self.assertAlmostEqual(0.297476666, result[2])
コード例 #6
0
 def test_row_means(self):
     """tests the row_means() function"""
     matrix = [[0.0010, 0.1234, 0.21370, 0.0342],
               [0.2123, -0.2135, -0.99980, -0.0213],
               [-0.4534, 0.5546, 0.79123, 0.00312321]]
     result = util.row_means(matrix)
     self.assertAlmostEqual(0.0930750, result[0])
     self.assertAlmostEqual(-0.255575, result[1])
     self.assertAlmostEqual(0.2238883025, result[2])
コード例 #7
0
ファイル: microarray.py プロジェクト: sdanzige/cmonkey-python
def __compute_row_scores_for_submatrix(matrix, submatrix):
    """For a given matrix, compute the row scores. The second submatrix is
    used to calculate the column means on and should be derived from
    datamatrix filtered by the row names and column names of a specific
    cluster.
    matrix should be filtered by the columns of a specific cluster in
    order for the column means to be applied properly.
    The result is a DataMatrix with one row containing all the row scores"""
    rm = util.row_means(np.square(matrix.values - util.column_means(submatrix.values)))
    # we clip the values to make sure the argument to log will be
    # sufficiently above 0 to avoid errors
    return np.log(np.clip(rm, 1e-20, 1000.0) + 1e-99)
コード例 #8
0
def __compute_row_scores_for_submatrix(matrix, submatrix):
    """For a given matrix, compute the row scores. The second submatrix is
    used to calculate the column means on and should be derived from
    datamatrix filtered by the row names and column names of a specific
    cluster.
    matrix should be filtered by the columns of a specific cluster in
    order for the column means to be applied properly.
    The result is a DataMatrix with one row containing all the row scores"""
    rm = util.row_means(
        np.square(matrix.values - util.column_means(submatrix.values)))
    # we clip the values to make sure the argument to log will be
    # sufficiently above 0 to avoid errors
    return np.log(np.clip(rm, 1e-20, 1000.0) + 1e-99)
コード例 #9
0
 def residual(self, max_row_variance=None):
     """computes the residual for this matrix, if max_row_variance is given,
     result is normalized by the row variance"""
     d_rows = util.row_means(self.values)
     d_cols = util.column_means(self.values)
     d_all = util.mean(d_rows)
     tmp = self.values + d_all - util.r_outer(d_rows, d_cols, operator.add)
     average = util.mean(np.abs(tmp))
     if max_row_variance is not None:
         row_var = self.row_variance()
         if np.isnan(row_var) or row_var > max_row_variance:
             row_var = max_row_variance
         average = average / row_var
     return average
コード例 #10
0
ファイル: datamatrix.py プロジェクト: sdanzige/cmonkey-python
 def residual(self, max_row_variance=None):
     """computes the residual for this matrix, if max_row_variance is given,
     result is normalized by the row variance"""
     d_rows = util.row_means(self.values)
     d_cols = util.column_means(self.values)
     d_all = util.mean(d_rows)
     tmp = self.values + d_all - util.r_outer(d_rows, d_cols, operator.add)
     average = util.mean(np.abs(tmp))
     if max_row_variance is not None:
         row_var = self.row_variance()
         if np.isnan(row_var) or row_var > max_row_variance:
             row_var = max_row_variance
         average = average / row_var
     return average