method = 'ward' # 'complete', 'weighted' metric = 'euclidean' ''' Load in factor matrices. ''' folder_matrices = project_location+'HMF/methylation/bicluster_analysis/matrices/' F_genes = numpy.loadtxt(folder_matrices+'F_genes') F_samples = numpy.loadtxt(folder_matrices+'F_samples') S_ge = numpy.loadtxt(folder_matrices+'S_ge') S_pm = numpy.loadtxt(folder_matrices+'S_pm') S_gm = numpy.loadtxt(folder_matrices+'S_gm') ''' Also load in list of genes, samples, sample labels, and gene GO ids. ''' _, _, _, genes, samples = filter_driver_genes_std() labels_tumour = load_tumor_label_list() top_n_go = 5 # number of GO term classes to use, +1 for 'other' genes_go = load_top_n_GO_terms(n=top_n_go, genes=genes) genes_go_rank = load_top_n_GO_terms_as_rank(n=top_n_go, genes=genes) ''' Method for computing dendrogram. Return order of indices. ''' def compute_dendrogram(R): #plt.figure() # Hierarchical clustering methods: # single (Nearest Point), complete (Von Hees), average (UPGMA), weighted (WPGMA), centroid (UPGMC), median (WPGMC), ward (incremental) Y = linkage(y=R, method='centroid', metric='euclidean') Z = dendrogram(Z=Y, orientation='top', no_plot=True)#False) reordered_indices = Z['leaves'] return reordered_indices
settings = { 'priorF': 'exponential', 'priorG': 'normal', 'priorSn': 'normal', 'priorSm': 'normal', 'orderF': 'columns', 'orderG': 'rows', 'orderSn': 'rows', 'orderSm': 'rows', 'ARD': True } ''' Load in data ''' #(R_ge, R_pm, genes, samples) = load_ge_pm_top_n_genes(no_genes) #R_ge, R_pm, R_gm, genes, samples = filter_driver_genes() R_ge, R_pm, R_gm, genes, samples = filter_driver_genes_std() X1, X2, Y = R_ge.T, R_pm.T, R_gm.T # The different R, C, D values values_factorisation = [ { 'X1': 'R', 'X2': 'R', 'Y': 'R' }, { 'X1': 'R', 'X2': 'R', 'Y': 'D' },
''' import sys, os project_location = os.path.dirname(__file__)+"/../../../../" sys.path.append(project_location) from HMF.methylation.load_methylation import filter_driver_genes_std import numpy import matplotlib.pyplot as plt ''' Method for plotting the distributions ''' def plot_distribution(matrix, plot_location, binsize=0.2, dpi=600): values = matrix[~numpy.isnan(matrix)] fig = plt.figure(figsize=(2, 1.8)) fig.subplots_adjust(left=0.065, right=0.935, bottom=0.11, top=0.99) plt.hist(values,bins=numpy.arange(-5, 5.01, binsize)) plt.xlim(-5, 5.0) plt.xticks(fontsize=8) plt.yticks([],fontsize=8) plt.show() plt.savefig(plot_location, dpi=dpi, bbox_inches='tight') ''' Load the data ''' (R_ge_std, R_pm_std, R_gm_std, genes_std, samples_std) = filter_driver_genes_std() ''' Plot the data ''' plot_distribution(R_ge_std, "pretty_ge_std") plot_distribution(R_pm_std, "pretty_pm_std") plot_distribution(R_gm_std, "pretty_gm_std")