def save_corr_matrix(self, file_handle): """ saves the correlation matrix """ if self.column_order: self.corr_matrix = self.corr_matrix[:, self.column_order][self.column_order] self.labels = [self.labels[i] for i in self.column_order] self.labels = [toString(x) for x in self.labels] file_handle.write("\t'" + "'\t'".join(self.labels) + "'\n") fmt = "\t".join(np.repeat('%.4f', self.corr_matrix.shape[1])) + "\n" i = 0 for row in self.corr_matrix: file_handle.write( "'%s'\t" % self.labels[i] + fmt % tuple(row)) i += 1
def __init__(self, matrix_file, corr_method=None, labels=None, remove_outliers=False, skip_zeros=False, log1p=False): self.load_matrix(matrix_file) self.skip_zeros = skip_zeros self.corr_method = corr_method self.corr_matrix = None # correlation matrix self.column_order = None self.rowCenter = False if labels is not None: # test that the length of labels # corresponds to the length of # samples self.labels = labels self.labels = [toString(x) for x in self.labels] if self.matrix.shape[1] == 1: # There's nothing that can be done with a single sample sys.exit("\nPlease use a matrix with more than one sample\n") if skip_zeros is True: # remove rows containing only nans or zeros # that could be unmappable regions. self.remove_rows_of_zeros() if remove_outliers is True: # remove outliers, otherwise outliers will produce a very # high pearson correlation. Unnecessary for spearman correlation self.remove_outliers() if log1p is True: self.matrix = np.log1p(self.matrix) if corr_method: self.compute_correlation()