init_UV = 'random' ARD = False lambdaU, lambdaV = 0.1, 0.1 alphatau, betatau = 1., 1. alpha0, beta0 = 1., 1. hyperparams = { 'alphatau': alphatau, 'betatau': betatau, 'alpha0': alpha0, 'beta0': beta0, 'lambdaU': lambdaU, 'lambdaV': lambdaV } ''' Load in data. ''' R, M = load_gdsc_ic50() I, J = M.shape ''' Generate matrices M - one list of M's for each value of K. ''' M_attempts = 1000 all_Ms_training_and_test = [ compute_folds_attempts(I=I, J=J, no_folds=no_folds, attempts=M_attempts, M=M) for K in values_K ] ''' We now run the Gibbs sampler on each of the M's for each fraction. ''' all_performances = {metric: [] for metric in metrics} average_performances = {metric: [] for metric in metrics} # averaged over repeats for K, (Ms_train, Ms_test) in zip(values_K, all_Ms_training_and_test):
Methods for plotting the distribution of the drug sensitivity datasets. ''' project_location = "/home/tab43/Documents/Projects/libraries/" # "/Users/thomasbrouwer/Documents/Projects/libraries/" import sys sys.path.append(project_location) from BNMTF_ARD.data.drug_sensitivity.load_data import load_gdsc_ic50 from BNMTF_ARD.data.drug_sensitivity.load_data import load_ctrp_ec50 from BNMTF_ARD.data.drug_sensitivity.load_data import load_ccle_ic50 from BNMTF_ARD.data.drug_sensitivity.load_data import load_ccle_ec50 import itertools import matplotlib.pyplot as plt ''' Load in the data. ''' R_gdsc, M_gdsc = load_gdsc_ic50() R_ctrp, M_ctrp = load_ctrp_ec50() R_ccle_ic, M_ccle_ic = load_ccle_ic50() R_ccle_ec, M_ccle_ec = load_ccle_ec50() def extract_values(R, M): I, J = R.shape return [ R[i, j] for i, j in itertools.product(range(I), range(J)) if M[i, j] ] values_plotnames_bins = [ (extract_values(R_gdsc, M_gdsc), 'distribution_gdsc_ic50.pdf', [v - 0.5 for v in range(0, 100 + 10, 5)]),