def quantile_normalize_scores(matrices, weights=None):
    """quantile normalize scores against each other"""

    logging.info("COMPUTING WEIGHTED MEANS...")
    start_time = util.current_millis()

    # rearranges the scores in the input matrices into a matrix
    # with |matrices| columns where the columns contain the values
    # of each matrix in sorted order
    flat_values = np.transpose(np.asarray([np.sort(matrix.values.flatten())
                                           for matrix in matrices]))

    elapsed = util.current_millis() - start_time
    logging.info("flattened/sorted score matrices in %f s.", elapsed / 1000.0)

    start_time = util.current_millis()
    if weights is not None:
        # multiply each column of matrix with each component of the
        # weight vector: Using matrix multiplication resulted in speedup
        # from 125 s. to 0.125 seconds over apply_along_axis() (1000x faster)!
        scaled = weights * flat_values
        scale = np.sum(np.ma.masked_array(weights, np.isnan(weights)))
        tmp_mean = util.row_means(scaled) / scale
    else:
        tmp_mean = util.row_means(flat_values)
    elapsed = util.current_millis() - start_time
    logging.info("weighted means in %f s.", elapsed / 1000.0)
    start_time = util.current_millis()

    result = qm_result_matrices(matrices, tmp_mean)

    elapsed = util.current_millis() - start_time
    logging.info("result matrices built in %f s.", elapsed / 1000.0)
    return result
Exemple #2
0
def quantile_normalize_scores(matrices, weights=None):
    """quantile normalize scores against each other"""

    logging.info("COMPUTING WEIGHTED MEANS...")
    start_time = util.current_millis()

    # rearranges the scores in the input matrices into a matrix
    # with |matrices| columns where the columns contain the values
    # of each matrix in sorted order
    flat_values = np.transpose(
        np.asarray([np.sort(matrix.values.flatten()) for matrix in matrices]))

    elapsed = util.current_millis() - start_time
    logging.info("flattened/sorted score matrices in %f s.", elapsed / 1000.0)

    start_time = util.current_millis()
    if weights is not None:
        # multiply each column of matrix with each component of the
        # weight vector: Using matrix multiplication resulted in speedup
        # from 125 s. to 0.125 seconds over apply_along_axis() (1000x faster)!
        scaled = weights * flat_values
        scale = np.sum(np.ma.masked_array(weights, np.isnan(weights)))
        tmp_mean = util.row_means(scaled) / scale
    else:
        tmp_mean = util.row_means(flat_values)
    elapsed = util.current_millis() - start_time
    logging.info("weighted means in %f s.", elapsed / 1000.0)
    start_time = util.current_millis()

    result = qm_result_matrices(matrices, tmp_mean)

    elapsed = util.current_millis() - start_time
    logging.info("result matrices built in %f s.", elapsed / 1000.0)
    return result
Exemple #3
0
 def test_row_means_with_nans(self):
     """tests the row_means() function"""
     matrix = [[0.0010, np.nan, 0.21370, 0.0342],
               [0.2123, -0.2135, -0.99980, -0.0213],
               [-0.4534, 0.5546, 0.79123, np.nan]]
     result = util.row_means(matrix)
     self.assertAlmostEqual(0.08296666, result[0])
     self.assertAlmostEqual(-0.255575, result[1])
     self.assertAlmostEqual(0.297476666, result[2])
Exemple #4
0
 def test_row_means(self):
     """tests the row_means() function"""
     matrix = [[0.0010, 0.1234, 0.21370, 0.0342],
               [0.2123, -0.2135, -0.99980, -0.0213],
               [-0.4534, 0.5546, 0.79123, 0.00312321]]
     result = util.row_means(matrix)
     self.assertAlmostEqual(0.0930750, result[0])
     self.assertAlmostEqual(-0.255575, result[1])
     self.assertAlmostEqual(0.2238883025, result[2])
Exemple #5
0
 def test_row_means_with_nans(self):
     """tests the row_means() function"""
     matrix = [[0.0010, np.nan, 0.21370, 0.0342],
               [0.2123, -0.2135, -0.99980, -0.0213],
               [-0.4534, 0.5546, 0.79123, np.nan]]
     result = util.row_means(matrix)
     self.assertAlmostEqual(0.08296666, result[0])
     self.assertAlmostEqual(-0.255575, result[1])
     self.assertAlmostEqual(0.297476666, result[2])
Exemple #6
0
 def test_row_means(self):
     """tests the row_means() function"""
     matrix = [[0.0010, 0.1234, 0.21370, 0.0342],
               [0.2123, -0.2135, -0.99980, -0.0213],
               [-0.4534, 0.5546, 0.79123, 0.00312321]]
     result = util.row_means(matrix)
     self.assertAlmostEqual(0.0930750, result[0])
     self.assertAlmostEqual(-0.255575, result[1])
     self.assertAlmostEqual(0.2238883025, result[2])
def __compute_row_scores_for_submatrix(matrix, submatrix):
    """For a given matrix, compute the row scores. The second submatrix is
    used to calculate the column means on and should be derived from
    datamatrix filtered by the row names and column names of a specific
    cluster.
    matrix should be filtered by the columns of a specific cluster in
    order for the column means to be applied properly.
    The result is a DataMatrix with one row containing all the row scores"""
    rm = util.row_means(np.square(matrix.values - util.column_means(submatrix.values)))
    # we clip the values to make sure the argument to log will be
    # sufficiently above 0 to avoid errors
    return np.log(np.clip(rm, 1e-20, 1000.0) + 1e-99)
Exemple #8
0
def __compute_row_scores_for_submatrix(matrix, submatrix):
    """For a given matrix, compute the row scores. The second submatrix is
    used to calculate the column means on and should be derived from
    datamatrix filtered by the row names and column names of a specific
    cluster.
    matrix should be filtered by the columns of a specific cluster in
    order for the column means to be applied properly.
    The result is a DataMatrix with one row containing all the row scores"""
    rm = util.row_means(
        np.square(matrix.values - util.column_means(submatrix.values)))
    # we clip the values to make sure the argument to log will be
    # sufficiently above 0 to avoid errors
    return np.log(np.clip(rm, 1e-20, 1000.0) + 1e-99)
Exemple #9
0
 def residual(self, max_row_variance=None):
     """computes the residual for this matrix, if max_row_variance is given,
     result is normalized by the row variance"""
     d_rows = util.row_means(self.values)
     d_cols = util.column_means(self.values)
     d_all = util.mean(d_rows)
     tmp = self.values + d_all - util.r_outer(d_rows, d_cols, operator.add)
     average = util.mean(np.abs(tmp))
     if max_row_variance is not None:
         row_var = self.row_variance()
         if np.isnan(row_var) or row_var > max_row_variance:
             row_var = max_row_variance
         average = average / row_var
     return average
Exemple #10
0
 def residual(self, max_row_variance=None):
     """computes the residual for this matrix, if max_row_variance is given,
     result is normalized by the row variance"""
     d_rows = util.row_means(self.values)
     d_cols = util.column_means(self.values)
     d_all = util.mean(d_rows)
     tmp = self.values + d_all - util.r_outer(d_rows, d_cols, operator.add)
     average = util.mean(np.abs(tmp))
     if max_row_variance is not None:
         row_var = self.row_variance()
         if np.isnan(row_var) or row_var > max_row_variance:
             row_var = max_row_variance
         average = average / row_var
     return average