def quantile_normalize_scores(matrices, weights=None): """quantile normalize scores against each other""" logging.info("COMPUTING WEIGHTED MEANS...") start_time = util.current_millis() # rearranges the scores in the input matrices into a matrix # with |matrices| columns where the columns contain the values # of each matrix in sorted order flat_values = np.transpose(np.asarray([np.sort(matrix.values.flatten()) for matrix in matrices])) elapsed = util.current_millis() - start_time logging.info("flattened/sorted score matrices in %f s.", elapsed / 1000.0) start_time = util.current_millis() if weights is not None: # multiply each column of matrix with each component of the # weight vector: Using matrix multiplication resulted in speedup # from 125 s. to 0.125 seconds over apply_along_axis() (1000x faster)! scaled = weights * flat_values scale = np.sum(np.ma.masked_array(weights, np.isnan(weights))) tmp_mean = util.row_means(scaled) / scale else: tmp_mean = util.row_means(flat_values) elapsed = util.current_millis() - start_time logging.info("weighted means in %f s.", elapsed / 1000.0) start_time = util.current_millis() result = qm_result_matrices(matrices, tmp_mean) elapsed = util.current_millis() - start_time logging.info("result matrices built in %f s.", elapsed / 1000.0) return result
def quantile_normalize_scores(matrices, weights=None): """quantile normalize scores against each other""" logging.info("COMPUTING WEIGHTED MEANS...") start_time = util.current_millis() # rearranges the scores in the input matrices into a matrix # with |matrices| columns where the columns contain the values # of each matrix in sorted order flat_values = np.transpose( np.asarray([np.sort(matrix.values.flatten()) for matrix in matrices])) elapsed = util.current_millis() - start_time logging.info("flattened/sorted score matrices in %f s.", elapsed / 1000.0) start_time = util.current_millis() if weights is not None: # multiply each column of matrix with each component of the # weight vector: Using matrix multiplication resulted in speedup # from 125 s. to 0.125 seconds over apply_along_axis() (1000x faster)! scaled = weights * flat_values scale = np.sum(np.ma.masked_array(weights, np.isnan(weights))) tmp_mean = util.row_means(scaled) / scale else: tmp_mean = util.row_means(flat_values) elapsed = util.current_millis() - start_time logging.info("weighted means in %f s.", elapsed / 1000.0) start_time = util.current_millis() result = qm_result_matrices(matrices, tmp_mean) elapsed = util.current_millis() - start_time logging.info("result matrices built in %f s.", elapsed / 1000.0) return result
def test_row_means_with_nans(self): """tests the row_means() function""" matrix = [[0.0010, np.nan, 0.21370, 0.0342], [0.2123, -0.2135, -0.99980, -0.0213], [-0.4534, 0.5546, 0.79123, np.nan]] result = util.row_means(matrix) self.assertAlmostEqual(0.08296666, result[0]) self.assertAlmostEqual(-0.255575, result[1]) self.assertAlmostEqual(0.297476666, result[2])
def test_row_means(self): """tests the row_means() function""" matrix = [[0.0010, 0.1234, 0.21370, 0.0342], [0.2123, -0.2135, -0.99980, -0.0213], [-0.4534, 0.5546, 0.79123, 0.00312321]] result = util.row_means(matrix) self.assertAlmostEqual(0.0930750, result[0]) self.assertAlmostEqual(-0.255575, result[1]) self.assertAlmostEqual(0.2238883025, result[2])
def __compute_row_scores_for_submatrix(matrix, submatrix): """For a given matrix, compute the row scores. The second submatrix is used to calculate the column means on and should be derived from datamatrix filtered by the row names and column names of a specific cluster. matrix should be filtered by the columns of a specific cluster in order for the column means to be applied properly. The result is a DataMatrix with one row containing all the row scores""" return np.log( util.row_means(np.square(matrix.values - submatrix.column_means())) + 1e-99)
def weighted_row_means(matrix, weights): """compute weighted row means""" start_time = util.current_millis() # multiply each column of matrix with each component of the # weight vector: Using matrix multiplication resulted in speedup # from 125 s. to 0.125 seconds over apply_along_axis() (1000x faster)! scaled = weights * matrix elapsed = util.current_millis() - start_time #logging.info("APPLIED WEIGHTS TO COLUMNS in %f s.", elapsed / 1000.0) scale = np.sum(np.ma.masked_array(weights, np.isnan(weights))) return util.row_means(scaled) / scale
def __compute_row_scores_for_submatrix(matrix, submatrix): """For a given matrix, compute the row scores. The second submatrix is used to calculate the column means on and should be derived from datamatrix filtered by the row names and column names of a specific cluster. matrix should be filtered by the columns of a specific cluster in order for the column means to be applied properly. The result is a DataMatrix with one row containing all the row scores""" rm = util.row_means(np.square(matrix.values - util.column_means(submatrix.values))) # we clip the values to make sure the argument to log will be # sufficiently above 0 to avoid errors return np.log(np.clip(rm, 1e-20, 1000.0) + 1e-99)
def __compute_row_scores_for_submatrix(matrix, submatrix): """For a given matrix, compute the row scores. The second submatrix is used to calculate the column means on and should be derived from datamatrix filtered by the row names and column names of a specific cluster. matrix should be filtered by the columns of a specific cluster in order for the column means to be applied properly. The result is a DataMatrix with one row containing all the row scores""" rm = util.row_means( np.square(matrix.values - util.column_means(submatrix.values))) # we clip the values to make sure the argument to log will be # sufficiently above 0 to avoid errors return np.log(np.clip(rm, 1e-20, 1000.0) + 1e-99)
def quantile_normalize_scores(matrices, weights=None): """quantile normalize scores against each other""" flat_values = as_sorted_flat_values(matrices) #logging.info("COMPUTING WEIGHTED MEANS...") #start_time = util.current_millis() if weights != None: # multiply each column of matrix with each component of the # weight vector: Using matrix multiplication resulted in speedup # from 125 s. to 0.125 seconds over apply_along_axis() (1000x faster)! scaled = weights * flat_values scale = np.sum(np.ma.masked_array(weights, np.isnan(weights))) tmp_mean = util.row_means(scaled) / scale else: tmp_mean = util.row_means(flat_values) #elapsed = util.current_millis() - start_time #logging.info("weighted means in %f s.", elapsed / 1000.0) #start_time = util.current_millis() result = qm_result_matrices(matrices, tmp_mean) #elapsed = util.current_millis() - start_time #logging.info("result matrices built in %f s.", elapsed / 1000.0) return result
def residual(self, max_row_variance=None): """computes the residual for this matrix, if max_row_variance is given, result is normalized by the row variance""" d_rows = util.row_means(self.values) d_cols = util.column_means(self.values) d_all = util.mean(d_rows) tmp = self.values + d_all - util.r_outer(d_rows, d_cols, operator.add) average = util.mean(np.abs(tmp)) if max_row_variance is not None: row_var = self.row_variance() if np.isnan(row_var) or row_var > max_row_variance: row_var = max_row_variance average = average / row_var return average
def quantile_normalize_scores(matrices, weights=None): """quantile normalize scores against each other""" flat_values = as_sorted_flat_values(matrices) #logging.info("COMPUTING WEIGHTED MEANS...") start_time = util.current_millis() if weights != None: tmp_mean = weighted_row_means(flat_values, weights) else: tmp_mean = util.row_means(flat_values) elapsed = util.current_millis() - start_time #logging.info("weighted means in %f s.", elapsed / 1000.0) start_time = util.current_millis() result = qm_result_matrices(matrices, tmp_mean) elapsed = util.current_millis() - start_time #logging.info("result matrices built in %f s.", elapsed / 1000.0) return result
def row_means(self): """Returns a numpy array, containing the column means""" return util.row_means(self.values)