def fdot(A, B): # flatten dot. Regard A and B as long vector A = pp.farr(A) B = pp.farr(B) A = A * (1.0/A.shape[0]) B = B * (1.0/B.shape[0]) return pnp.sum(A * B)
def predict(network, inputs, hypers): n_layer = len(network) batch_size = hypers[6] up = inputs.shape[0] - batch_size + 1 for s in range(0, up, batch_size): temp = pp.farr(inputs[s: s + batch_size, :]) for i in range(n_layer): layer = network[i] temp = pnp.dot(temp, layer['weight']) + pnp.dot(pnp.ones((temp.shape[0], 1)), layer['bias']) if layer['activation'] == 'relu': temp = relu(temp) else: # 'linear' pass if s == 0: pred = temp else: pred = pnp.concatenate((pred, temp), axis = 0) if up <= 0 or pred.shape[0] < inputs.shape[0]: if up <= 0: temp = inputs else: temp = inputs[pred.shape[0]:, :] for i in range(n_layer): layer = network[i] temp = pnp.dot(temp, layer['weight']) + pnp.dot(pnp.ones((temp.shape[0], 1)), layer['bias']) if layer['activation'] == 'relu': temp = relu(temp) else: # 'linear' pass if up <= 0: pred = temp else: pred = pnp.concatenate((pred, temp), axis = 0) return pred
def add_layer(n_in, n_out, activation, p): weight = 0.1 * np.random.random((n_in, n_out)) - 0.05 weight = pp.farr(weight) v = pnp.zeros((n_in, n_out)) bias = pnp.zeros((1, n_out)) bias_v = pnp.zeros((1, n_out)) return {'weight': weight, 'bias': bias, 'activation': activation, 'p': p, 'v': v, 'bias_v': bias_v}
def demo(hyper_param): seed = hyper_param['seed'] suffix = '' data_dir = hyper_param['data_dir'] no = hyper_param['no'] seed = hyper_param['seed'] dir_trial = data_dir + 'trial' + str(seed) + suffix + '/' metrics = [] DTItrain = read_ndarray(dir_trial, 'fold' + str(no) + '_train_dense', ' ', 'int32') DTItrain = pp.farr(DTItrain) Re = train(data_dir, no + seed, DTItrain, hyper_param['maxiterpr'], hyper_param['maxiterd'], hyper_param['restartProb'], hyper_param['dim_drug'], hyper_param['dim_prot'], hyper_param['imc_k'], hyper_param['imc_iter'], hyper_param['log_iter'], hyper_param['pmiter'], hyper_param['explicit'], hyper_param['gciter'], hyper_param['lamb']) test1 = read_ndarray(dir_trial, 'fold' + str(no) + '_test1basic', ' ', 'int32') auroc, aupr = evaluate(Re, test1) metrics.append(auroc) metrics.append(aupr) test9 = read_ndarray(dir_trial, 'fold' + str(no) + '_test9extra', ' ', 'int32') test_10 = pnp.vstack((test1, test9)) auroc, aupr = evaluate(Re, test_10) metrics.append(auroc) metrics.append(aupr) testall = read_ndarray(dir_trial, 'fold' + str(no) + '_testallextra', ' ', 'int32') test_all = pnp.vstack((test1, testall)) auroc, aupr = evaluate(Re, test_all) metrics.append(auroc) metrics.append(aupr) Re = pp.back2plain(Re) metrics = np.array(metrics) return Re, metrics
def maxabsscaler(x): batch = 20 max_abs = pp.farr([pnp.max(pnp.abs(x[i:i+batch,:]), axis=0) for i in range(0, x.shape[0], batch)]) if (len(max_abs) == 1): max_abs = pp.farr(max_abs) else: max_abs = pnp.vstack(pp.farr(max_abs)) max_abs = pnp.max(max_abs, axis = 0) m_tmp = pnp.ravel(max_abs) #max_abs[max_abs == 0.0] = 1.0 for i in range(len(m_tmp)): flag_zero = (m_tmp[i] < 1e-8) m_tmp[i] = m_tmp[i]*(1-flag_zero) + 1.0*flag_zero max_abs = pnp.reshape(m_tmp, (1, -1)) return max_abs
def pmPCA_po(A, dim, it): results = [] ws = [] ls = [] for i in range(dim): v = pnp.ones((A.shape[0], 1)) v, l = power(A, v, it) # Prepare a vector w w = pnp.zeros(v.shape) w[0] = pnp.norm(v) w += v w = w * myreciprocal(pnp.norm(w)) # Reduce the matrix dimension A = hhupdate(A, w) # Reconstruct the eigenvector of original matrix from the current one for wp in ws: v = pnp.concatenate((pp.farr([[0]]), v)) v = hhmul(v, wp) v = v * myreciprocal(pnp.norm(v)) results.append(v) ws.insert(0, w) ls.append(pp.sfixed(l.flatten()[0])) return pnp.concatenate(results, axis=1), pp.farr(ls)
def scale(x, max_abs): #x = x / np.dot(np.ones((x.shape[0], 1)), max_abs) x_ones = pnp.ones((x.shape[0], 1)) sca = pnp.dot(x_ones, max_abs) tar_shape = x.shape #x = x * pnp.reciprocal(sca) batch_size = 20 tmp_x = [(x[i:i+batch_size, :] * pp.reciprocal(sca[i:i+batch_size, :])) for i in range(0, x.shape[0], batch_size)] if(len(tmp_x) == 1): x = pp.farr(tmp_x) x = pnp.reshape(x, tar_shape) else: x = pnp.vstack(tmp_x) x = pnp.reshape(x, tar_shape) print("!!!!!!!!!1", x.shape) return x
def train(data_dir, seed_IMC, DTItrain, maxiterpr, maxiterd, restartProb, dim_drug, dim_prot, imc_k, imc_iter, log_iter, pmiter, explicit, gciter, lamb): ### input and output dir name for compact feature learning dir_inter = data_dir + 'data_prep/' dir_DTIMPC = data_dir + 'data_luo/' ### get drug networks, require MPC version <- # structure dpsM = np.load(dir_inter + 'finger_rdkit_' + str(explicit) + '.npy') dpsM = pp.farr(dpsM) drugSim = dice_sim_matrix_po(dpsM) drugNets = [drugSim] # interactions / associations drugNetworks = ['mat_drug_disease'] for idrug in drugNetworks: idr = read_ndarray(dir_DTIMPC, idrug, ' ', 'int32') idrSim = jaccard_sim_po(idr) drugNets.append(idrSim) #DCA protein_feature = np.load(dir_inter + 'public_protein_feature_' + str(dim_prot) + '.npy') drug_feature = DCA_po(drugNets, dim_drug, restartProb, maxiterd, pmiter, log_iter) np.random.seed(seed_IMC) # <--- random seed to initialize IMC Re = IMC_po(DTItrain, drug_feature, protein_feature, k=imc_k, lamb=lamb, maxiter=imc_iter, gciter=gciter) return Re
def run(config): data_set = config[1][0] data_dir = config[1][1] ilist = [data_set] hyper = [ int(config[1][2]), int(config[1][3]), float(config[1][4]), float(config[1][5]), float(config[1][6]), int(config[1][7]), # hyper[5] - n_epoch int(config[1][8]) # hyper[6] - batch_size ] ensemble = int(config[1][9]) seed = int(config[1][10]) # ilist and hyper are public for i in ilist: X_train = pp.farr(np.load(data_dir + i + "_Xtrain.npy")) X_test = pp.farr(np.load(data_dir + i + "_Xtest.npy")) y_train = pp.farr(np.load(data_dir + i + "_ytrain.npy")) y_test = pp.farr(np.load(data_dir + i + "_ytest.npy")) #X_train = pp.ss("X_train") #X_test = pp.ss("X_test") #y_train = pp.ss("y_train") #y_test = pp.ss("y_test") X_train = pp.farr(X_train) X_test = pp.farr(X_test) y_train = pp.farr(y_train) y_test = pp.farr(y_test) ## X_train, X_test, y_train, y_test, all should be secretly shared <- print('**************', i, X_train.shape, X_test.shape, y_train.shape, y_test.shape, '***************') # val, num_train, ran, random seed, index_perm, those five are public val = float(config[1][11]) num_train = X_train.shape[0] ran = int(config[1][12]) np.random.seed(seed) index_perm = np.random.permutation(num_train) X_train_c = X_train[index_perm,:] #shuffled train data y_train_c = y_train[index_perm,:] #keep corresponding X_test_c = X_test y_test_c = y_test maxabs_x = maxabsscaler(X_train_c) X_train_c = scale(X_train_c, maxabs_x) X_test_c = scale(X_test_c, maxabs_x) maxabs_y = maxabsscaler(y_train_c) y_train_c = scale(y_train_c, maxabs_y) y_test_c = scale(y_test_c, maxabs_y) # split train set and validation set valid_num = int(val * (X_train_c.shape[0])) X_valid_c = X_train_c[:valid_num, :] y_valid_c = y_train_c[:valid_num, :] X_train_c = X_train_c[valid_num:, :] y_train_c = y_train_c[valid_num:, :] # train and test, ensemble number (i.e., 8) is public r2test, y_test_pred = nns(X_train_c, y_train_c, X_valid_c, y_valid_c, X_test_c, y_test_c, maxabs_y, ensemble, hyper, ran) print('important log') print('seed:', ran) print('hyper:', hyper) print('dataset:', i) return r2test, y_test_pred
def DCA_po(networks, dim, rsp, maxiter, pmiter, log_iter): def log_po(x, times): tmp = x - 1 sgn = 1 result = 0 for k in range(times): result += 1. / (k+1) * sgn * tmp tmp *= x - 1 sgn *= -1 return result def power(A, v, it): u = v for i in range(it): v = pnp.dot(A, u) l = pnp.dot(pnp.transpose(v), u) * myreciprocal(pnp.dot(pnp.transpose(u), u)) u = v * myreciprocal(l.flatten()[0]) return u, l def hhmul(v, w): return v - 2 * pnp.transpose(w).dot(v).flatten()[0]* w def hhupdate(A, w): wA = 2 * pnp.dot(w, pnp.dot(pnp.transpose(w), A)) wAw = 2 * pnp.dot(pnp.dot(wA, w), pnp.transpose(w)) A = A - wA - pnp.transpose(wA) + wAw return A[1:, 1:] def pmPCA_po(A, dim, it): results = [] ws = [] ls = [] for i in range(dim): v = pnp.ones((A.shape[0], 1)) v, l = power(A, v, it) # Prepare a vector w w = pnp.zeros(v.shape) w[0] = pnp.norm(v) w += v w = w * myreciprocal(pnp.norm(w)) # Reduce the matrix dimension A = hhupdate(A, w) # Reconstruct the eigenvector of original matrix from the current one for wp in ws: v = pnp.concatenate((pp.farr([[0]]), v)) v = hhmul(v, wp) v = v * myreciprocal(pnp.norm(v)) results.append(v) ws.insert(0, w) ls.append(pp.sfixed(l.flatten()[0])) return pnp.concatenate(results, axis=1), pp.farr(ls) P = pp.farr([]) for net in networks: tQ = RWR_po(net, maxiter, rsp) if P.shape[0] == 0: P = pnp.zeros((tQ.shape[0], 0)) # concatenate network P = pnp.hstack((P, tQ)) alpha = 0.01 P = log_po(P + alpha, log_iter) - pnp.log(alpha) # 0 < p <ln(n+1) P = pnp.dot(P, pnp.transpose(P)) # 0 < p < n * ln^2(n+1) vecs, lambdas = pmPCA_po(P, dim, pmiter) sigd = pnp.dot(pnp.eye(dim), pnp.diag(lambdas)) sigd_sqsq = pnp.sqrt(pnp.sqrt(sigd)) flag = pnp.abs(sigd)<1e-6 sigd_sqsq = flag*pnp.zeros(sigd.shape)+(1-flag)*sigd_sqsq X = pnp.dot(vecs, sigd_sqsq) return X
def read_ndarray(dirn, net, sepa, dtype = 'float64'): inputID = dirn + net + '.txt' print(inputID) M = np.loadtxt(inputID, delimiter = sepa, dtype = dtype) return pp.farr(M)
def multi_dot(arr): ans = arr[-1] for ii in range(len(arr)-2,-1,-1): ans = pp.farr(arr[ii]).dot(ans) return ans