metrics = ['MSE', 'R^2', 'Rp'] ''' Load data ''' location = project_location+"DI_MMTF/data/datasets_drug_sensitivity/overlap/" location_data = location+"data_row_01/" R, M_original, _, _ = load_data_without_empty(location_data+"ctrp_ec50_row_01.txt") #''' Seed all of the methods the same ''' #numpy.random.seed(0) #random.seed(0) ''' Generate matrices M - one list of (M_train,M_test)'s for each fraction ''' M_attempts = 10000 all_Ms_train_test = [ [try_generate_M_from_M(M=M_original,fraction=fraction,attempts=M_attempts) for r in range(0,repeats)] for fraction in fractions_unknown ] ''' Make sure each M has no empty rows or columns ''' def check_empty_rows_columns(M,fraction): sums_columns = M.sum(axis=0) sums_rows = M.sum(axis=1) for i,c in enumerate(sums_rows): assert c != 0, "Fully unobserved row in M, row %s. Fraction %s." % (i,fraction) for j,c in enumerate(sums_columns): assert c != 0, "Fully unobserved column in M, column %s. Fraction %s." % (j,fraction) for Ms_train_test,fraction in zip(all_Ms_train_test,fractions_unknown): for (M_train,M_test) in Ms_train_test: check_empty_rows_columns(M_train,fraction)
R_ctrp, M_ctrp, cell_lines, drugs = load_data_without_empty( location_data + "ctrp_ec50_row_01.txt") R_ccle_ec, M_ccle_ec = load_data_filter(location_data + "ccle_ec50_row_01.txt", cell_lines, drugs) R_gdsc, M_gdsc = load_data_filter(location_data + "gdsc_ic50_row_01.txt", cell_lines, drugs) R_ccle_ic, M_ccle_ic = load_data_filter(location_data + "ccle_ic50_row_01.txt", cell_lines, drugs) #''' Seed all of the methods the same ''' #numpy.random.seed(0) #random.seed(0) ''' Generate matrices M - one list of (M_train,M_test)'s for each fraction ''' M_attempts = 10000 all_Ms_train_test = [[ try_generate_M_from_M(M=M_ctrp, fraction=fraction, attempts=M_attempts) for r in range(0, repeats) ] for fraction in fractions_unknown] ''' Make sure each M has no empty rows or columns ''' def check_empty_rows_columns(M, fraction): sums_columns = M.sum(axis=0) sums_rows = M.sum(axis=1) for i, c in enumerate(sums_rows): assert c != 0, "Fully unobserved row in M, row %s. Fraction %s." % ( i, fraction) for j, c in enumerate(sums_columns): assert c != 0, "Fully unobserved column in M, column %s. Fraction %s." % ( j, fraction)
priors = {'alpha': alpha, 'beta': beta, 'lambdaU': lambdaU, 'lambdaV': lambdaV} metrics = ['MSE', 'R^2', 'Rp'] ''' Load data ''' location = project_location + "DI_MMTF/data/datasets_drug_sensitivity/overlap/" location_data = location + "data_row_01/" R, M_original, _, _ = load_data_without_empty(location_data + "ctrp_ec50_row_01.txt") #''' Seed all of the methods the same ''' #numpy.random.seed(0) #random.seed(0) ''' Generate matrices M - one list of (M_train,M_test)'s for each fraction ''' M_attempts = 10000 all_Ms_train_test = [[ try_generate_M_from_M(M=M_original, fraction=fraction, attempts=M_attempts) for r in range(0, repeats) ] for fraction in fractions_unknown] ''' Make sure each M has no empty rows or columns ''' def check_empty_rows_columns(M, fraction): sums_columns = M.sum(axis=0) sums_rows = M.sum(axis=1) for i, c in enumerate(sums_rows): assert c != 0, "Fully unobserved row in M, row %s. Fraction %s." % ( i, fraction) for j, c in enumerate(sums_columns): assert c != 0, "Fully unobserved column in M, column %s. Fraction %s." % ( j, fraction)