コード例 #1
0
method = 'ward' # 'complete', 'weighted'
metric = 'euclidean'


''' Load in factor matrices. '''
folder_matrices = project_location+'HMF/methylation/bicluster_analysis/matrices/'

F_genes = numpy.loadtxt(folder_matrices+'F_genes')
F_samples = numpy.loadtxt(folder_matrices+'F_samples')
S_ge = numpy.loadtxt(folder_matrices+'S_ge')
S_pm = numpy.loadtxt(folder_matrices+'S_pm')
S_gm = numpy.loadtxt(folder_matrices+'S_gm')


''' Also load in list of genes, samples, sample labels, and gene GO ids. '''
_, _, _, genes, samples = filter_driver_genes_std()
labels_tumour = load_tumor_label_list()
top_n_go = 5 # number of GO term classes to use, +1 for 'other'
genes_go = load_top_n_GO_terms(n=top_n_go, genes=genes)
genes_go_rank = load_top_n_GO_terms_as_rank(n=top_n_go, genes=genes)


''' Method for computing dendrogram. Return order of indices. '''
def compute_dendrogram(R):
    #plt.figure()
    # Hierarchical clustering methods: 
    # single (Nearest Point), complete (Von Hees), average (UPGMA), weighted (WPGMA), centroid (UPGMC), median (WPGMC), ward (incremental)
    Y = linkage(y=R, method='centroid', metric='euclidean') 
    Z = dendrogram(Z=Y, orientation='top', no_plot=True)#False)
    reordered_indices = Z['leaves']
    return reordered_indices
コード例 #2
0
settings = {
    'priorF': 'exponential',
    'priorG': 'normal',
    'priorSn': 'normal',
    'priorSm': 'normal',
    'orderF': 'columns',
    'orderG': 'rows',
    'orderSn': 'rows',
    'orderSm': 'rows',
    'ARD': True
}
''' Load in data '''
#(R_ge, R_pm, genes, samples) = load_ge_pm_top_n_genes(no_genes)
#R_ge, R_pm, R_gm, genes, samples = filter_driver_genes()
R_ge, R_pm, R_gm, genes, samples = filter_driver_genes_std()

X1, X2, Y = R_ge.T, R_pm.T, R_gm.T

# The different R, C, D values
values_factorisation = [
    {
        'X1': 'R',
        'X2': 'R',
        'Y': 'R'
    },
    {
        'X1': 'R',
        'X2': 'R',
        'Y': 'D'
    },
コード例 #3
0
ファイル: plot_distributions.py プロジェクト: rintukutum/HMF
'''

import sys, os
project_location = os.path.dirname(__file__)+"/../../../../"
sys.path.append(project_location)

from HMF.methylation.load_methylation import filter_driver_genes_std

import numpy
import matplotlib.pyplot as plt

''' Method for plotting the distributions '''
def plot_distribution(matrix, plot_location, binsize=0.2, dpi=600):
    values = matrix[~numpy.isnan(matrix)]
    fig = plt.figure(figsize=(2, 1.8))
    fig.subplots_adjust(left=0.065, right=0.935, bottom=0.11, top=0.99)   
    plt.hist(values,bins=numpy.arange(-5, 5.01, binsize))
    plt.xlim(-5, 5.0)
    plt.xticks(fontsize=8)
    plt.yticks([],fontsize=8)
    plt.show()
    plt.savefig(plot_location, dpi=dpi, bbox_inches='tight')

''' Load the data '''
(R_ge_std, R_pm_std, R_gm_std, genes_std, samples_std) = filter_driver_genes_std()

''' Plot the data '''
plot_distribution(R_ge_std, "pretty_ge_std")
plot_distribution(R_pm_std, "pretty_pm_std")
plot_distribution(R_gm_std, "pretty_gm_std")