def setUpClass(cls): files = [ "train.sdm", "test.sdm", "side_c2v.ddm", "side_ecfp6_counts_var005.sdm", "side_ecfp6_folded_dense.ddm" ] cls.data = {f: mio.read_matrix(f) for f in files}
def test_matrix_ddm(self): matrix_filename = "test_matrix_ddt.ddm" matrix_relative_path = "{}/{}".format(self.TEMP_DIR_NAME, matrix_filename) expected_matrix = numpy.random.randn(10, 20) matrix_io.write_matrix(matrix_relative_path, expected_matrix) actual_matrix = matrix_io.read_matrix(matrix_relative_path) self.assertTrue(numpy.array_equal(actual_matrix, expected_matrix))
def test_matrix_sdm(self): matrix_filename = "test_matrix_sdm.sdm" matrix_relative_path = "{}/{}".format(self.TEMP_DIR_NAME, matrix_filename) expected_matrix = scipy.sparse.rand(10, 20, 0.5) matrix_io.write_matrix(matrix_relative_path, expected_matrix) actual_matrix = matrix_io.read_matrix(matrix_relative_path) self.assertTrue((expected_matrix != actual_matrix).nnz == 0)
def test_dense_matrix_csv(self): matrix_filename = "test_dense_matrix_csv.csv" matrix_relative_path = "{}/{}".format(self.TEMP_DIR_NAME, matrix_filename) expected_matrix = numpy.random.randn(10, 20) matrix_io.write_matrix(matrix_relative_path, expected_matrix) actual_matrix = matrix_io.read_matrix(matrix_relative_path) self.assertTrue(numpy.allclose(actual_matrix, expected_matrix))
def test_matrix_sbm(self): matrix_filename = "test_matrix_sbm.sbm" matrix_relative_path = "{}/{}".format(self.TEMP_DIR_NAME, matrix_filename) expected_dense_matrix = numpy.random.randint(0, 2, size=(10, 20)) expected_sparse_matrix = scipy.sparse.coo_matrix(expected_dense_matrix) matrix_io.write_matrix(matrix_relative_path, expected_sparse_matrix) actual_matrix = matrix_io.read_matrix(matrix_relative_path) self.assertTrue((expected_sparse_matrix != actual_matrix).nnz == 0)
def test_matrix_sparse_mtx(self): matrix_filename = "test_matrix_sparse_mtx.mtx" matrix_relative_path = "{}/{}".format(self.TEMP_DIR_NAME, matrix_filename) expected_matrix = scipy.sparse.rand(10, 20, 0.5) matrix_io.write_matrix(matrix_relative_path, expected_matrix) actual_matrix = matrix_io.read_matrix(matrix_relative_path) self.assertTrue( numpy.allclose(actual_matrix.todense(), expected_matrix.todense()))
def fromStepFile(cls, file_name, iter): cp = read_config_file(file_name) nmodes = int(cp["models"]["num_models"]) sample = cls(nmodes, iter) # latent matrices for i in range(sample.nmodes): file_name = cp["models"]["model_" + str(i)] sample.add_latent(mio.read_matrix(file_name)) # link matrices (beta) for i in range(sample.nmodes): file_name = cp["priors"]["prior_" + str(i)] try: sample.add_beta(mio.read_matrix(file_name)) except FileNotFoundError: sample.add_beta(np.ndarray((0, 0))) return sample
def calc_rmse(predfile, test): predictions = mio.read_matrix(predfile) # extract predictions in test matrix selected_predictions = [ smurff.Prediction((i, j), v, pred_avg=predictions[i, j]) for i, j, v in zip(*sparse.find(test)) ] return smurff.calc_rmse(selected_predictions)
def fromStepFile(cls, file_name, dir_name): cp = read_config_file(file_name, dir_name) nmodes = int(cp["global"]["num_modes"]) iter = int(cp["global"]["number"]) sample = cls(nmodes, iter) # latent matrices for i in range(sample.nmodes): file_name = os.path.join(dir_name, cp["latents"]["latents_" + str(i)]) sample.add_latent(mio.read_matrix(file_name)) # link matrices (beta) for i in range(sample.nmodes): file_name = cp["link_matrices"]["link_matrix_" + str(i)] if (file_name != 'none'): sample.add_beta(mio.read_matrix(os.path.join(dir_name, file_name))) else: sample.add_beta(np.ndarray((0, 0))) return sample
def fromStepFile(cls, file_name, iter): cp = HeadlessConfigParser(file_name) nmodes = int(cp["num_models"]) sample = cls(nmodes, iter) sample.predictions = pd.read_csv(cp["pred"], sep=";") # latent matrices for i in range(sample.nmodes): file_name = cp["model_" + str(i)] sample.add_latent(mio.read_matrix(file_name)) # link matrices (beta) for i in range(sample.nmodes): file_name = cp["prior_" + str(i)] try: sample.add_beta(mio.read_matrix(file_name)) except FileNotFoundError: sample.add_beta(np.ndarray((0, 0))) return sample
def test_read_cpp_generated_sparse_matrix_mtx(self): matrix_relative_path = "test_data/cpp_generated_sparse_matrix.mtx" expected_matrix_rows = numpy.array([0, 0, 0, 0, 2, 2, 2, 2]) expected_matrix_cols = numpy.array([0, 1, 2, 3, 0, 1, 2, 3]) expected_matrix_vals = numpy.array([1, 2, 3, 4, 9, 10, 11, 12]) expected_matrix = scipy.sparse.coo_matrix( (expected_matrix_vals, (expected_matrix_rows, expected_matrix_cols)), shape=(3, 4)) actual_matrix = matrix_io.read_matrix(matrix_relative_path) self.assertTrue( numpy.allclose(actual_matrix.todense(), expected_matrix.todense()))
def test_macauoom(self): train = mio.read_matrix("train.mm").tocsr() test = mio.read_matrix("test.mm").tocsr() sideinfo = mio.read_matrix("sideinfo.mm").tocsr() bpmf_rmse = train_session( mkdtemp(), train, test, ) rootdir = mkdtemp() macau_rmse = train_session( rootdir, train, test, sideinfo, ) # make out-of-matrix predictions for rows not in train num_nonzeros_train = np.diff(train.indptr) test_empty = test[num_nonzeros_train == 0] rootfile = join(rootdir, "root.ini") predict_session = smurff.PredictSession(rootfile) rmse_im = im_prediction(predict_session, test_empty) rmse_oom_py = smurff_py_oom_prediction(predict_session, sideinfo, test_empty) rmse_oom_cmd = smurff_cmd_oom_prediction(rootfile, "sideinfo.mm", test_empty) rmse_oom_tf = tf_cmd_oom_prediction(rootdir, "sideinfo.mm", test_empty) rmse_oom_af = af_cmd_oom_prediction(rootdir, sideinfo, test_empty) print("bpmf full test : %.2f" % bpmf_rmse) print("macau full test: %.2f" % macau_rmse) print("in-matrix: %.2f" % rmse_im) print("out-of-matrix smurff python: %.2f" % rmse_oom_py) print("out-of-matrix smurff cmd: %.2f" % rmse_oom_cmd) print("out-of-matrix tf (floats): %.2f" % rmse_oom_tf) print("out-of-matrix af (floats): %.2f" % rmse_oom_af)
def fromStepFile(cls, file_name, dir_name): cp = read_config_file(file_name, dir_name) nmodes = int(cp["global"]["num_modes"]) iter = int(cp["global"]["number"]) sample = cls(nmodes, iter) # predictions, rmse sample.pred_stats = dict( read_config_file(cp["predictions"]["pred_state"], dir_name)["global"].items()) sample.pred_avg = mio.read_matrix( os.path.join(dir_name, cp["predictions"]["pred_avg"])) sample.pred_var = mio.read_matrix( os.path.join(dir_name, cp["predictions"]["pred_var"])) # latent matrices for i in range(sample.nmodes): file_name = os.path.join(dir_name, cp["latents"]["latents_" + str(i)]) U = mio.read_matrix(file_name) postMu = None postLambda = None file_name = cp["latents"]["post_mu_" + str(i)] if (file_name != 'none'): postMu = mio.read_matrix(os.path.join(dir_name, file_name)) file_name = cp["latents"]["post_lambda_" + str(i)] if (file_name != 'none'): postLambda = mio.read_matrix(os.path.join(dir_name, file_name)) sample.add_latent(U, postMu, postLambda) # link matrices (beta) and hyper mus beta = np.ndarray((0, 0)) mu = np.ndarray((0, 0)) file_name = cp["link_matrices"]["link_matrix_" + str(i)] if (file_name != 'none'): beta = mio.read_matrix(os.path.join(dir_name, file_name)) file_name = cp["link_matrices"]["mu_" + str(i)] if (file_name != 'none'): mu = mio.read_matrix(os.path.join(dir_name, file_name)) mu = np.squeeze(mu) sample.add_beta(beta, mu) return sample
#!/usr/bin/env python3 import matrix_io as mio import sys for f in sys.argv[1:]: m = mio.read_matrix(f) try: print(f, ":", m.shape, m.nnz) except AttributeError: print(f, ":", m.shape)
def test_read_cpp_generated_dense_matrix_csv(self): matrix_relative_path = "test_data/cpp_generated_dense_matrix.csv" expected_matrix = numpy.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]) actual_matrix = matrix_io.read_matrix(matrix_relative_path) self.assertTrue(numpy.allclose(actual_matrix, expected_matrix))
else: if (args.mode == "cols"): m = m - np.broadcast_to(np.expand_dims(mean, 0), m.shape) elif (args.mode == "rows"): m = m - np.broadcast_to(np.expand_dims(mean, 1), m.shape) elif (args.mode == "global"): m = m - mean elif (args.mode == "none"): pass elif (args.mode != "none"): raise ValueError("Unknown centering mode: %s" % (args.mode)) return m train = mio.read_matrix(args.train) test = mio.read_matrix(args.test) assert train.shape == test.shape mean_train = mean(train, args.mode) centered_train = center(train, args.mode, mean_train) centered_test = center(test, args.mode, mean_train) mio.write_matrix(os.path.join(args.output, os.path.basename(args.train)), centered_train) mio.write_matrix(os.path.join(args.output, os.path.basename(args.test)), centered_test) features = [] if args.col_features: features += args.col_features if args.row_features: features += args.row_features for fname in features:
#!/usr/bin/python import matrix_io as mio import numpy as np import math from glob import glob # collect U for all samples Us = [ mio.read_matrix(m) for m in glob("output/U-[0123456789].ddm") ] print("samples:\n", Us) # stack them and compute mean Ustacked = np.stack(Us) mu1 = np.mean(Ustacked, axis = 0) print("python mu:\n", mu1) mu2 = mio.read_matrix("output/U-mu.ddm") print("bpmf mu:\n", mu2) print("norm mu1 - mu2: %.4f" % np.linalg.norm(mu1 - mu2)) # Compute covariance and precision, first unstack in different way Uunstacked = np.squeeze(np.split(Ustacked, Ustacked.shape[2], axis = 2)) Ucov = [ np.cov(u, rowvar = False) for u in Uunstacked ] Uprec = [ np.linalg.inv(np.cov(u, rowvar = False)) for u in Uunstacked ] # restack Ucovstacked = np.stack(Ucov, axis = 2) Lambda1 = np.stack(Uprec, axis = 2) # reshape correctly print("python: precision user 0\n", Lambda1[:,:,0]) Lambda2_flat = mio.read_matrix("output/U-Lambda.ddm")
#!/usr/bin/python import matrix_io as mio import numpy as np import math from glob import glob # collect U for all samples Us = [mio.read_matrix(m) for m in glob("output/U-[0123456789].ddm")] print("samples:\n", Us) # stack them and compute mean Ustacked = np.stack(Us) mu1 = np.mean(Ustacked, axis=0) print("python mu:\n", mu1) mu2 = mio.read_matrix("output/U-mu.ddm") print("bpmf mu:\n", mu2) print("norm mu1 - mu2: %.4f" % np.linalg.norm(mu1 - mu2)) # Compute covariance and precision, first unstack in different way Uunstacked = np.squeeze(np.split(Ustacked, Ustacked.shape[2], axis=2)) Ucov = [np.cov(u, rowvar=False) for u in Uunstacked] Uprec = [np.linalg.inv(np.cov(u, rowvar=False)) for u in Uunstacked] # restack Ucovstacked = np.stack(Ucov, axis=2) Lambda1 = np.stack(Uprec, axis=2) # reshape correctly print("python: precision user 0\n", Lambda1[:, :, 0]) Lambda2_flat = mio.read_matrix("output/U-Lambda.ddm")
#!/usr/bin/env python import smurff import matrix_io as mio #load data ic50 = mio.read_matrix("chembl-IC50-346targets.mm") ic50_train, ic50_test = smurff.make_train_test(ic50, 0.2) ic50_threshold = 6. session = smurff.TrainSession( priors=['normal', 'normal'], num_latent=32, burnin=10, nsamples=10, # Using threshold of 6. to calculate AUC on test data threshold=ic50_threshold) ## using activity threshold pIC50 > 6. to binarize train data session.addTrainAndTest(ic50_train, ic50_test, smurff.ProbitNoise(ic50_threshold)) predictions = session.run() print("RMSE = %.2f" % smurff.calc_rmse(predictions)) print("AUC = %.2f" % smurff.calc_auc(predictions, ic50_threshold))