Exemple #1
0
    def run_predict_session(self, nmodes, sparse):
        train_session = self.run_train_session(nmodes, sparse)
        predict_session = train_session.makePredictSession()

        p1 = sorted(train_session.getTestPredictions())
        p2 = sorted(predict_session.predict_some(self.Ytest))
        p3 = predict_session.predict_one(p1[0].coords, p1[0].val)
        p4 = predict_session.predict_all()

        self.assertEqual(len(p1), len(p2))

        # check train_session vs predict_session for Ytest
        self.assertEqual(p1[0].coords, p2[0].coords)
        self.assertAlmostEqual(p1[0].val, p2[0].val, places = 2)
        self.assertAlmostEqual(p1[0].pred_1sample, p2[0].pred_1sample, places = 2)
        self.assertAlmostEqual(p1[0].pred_avg, p2[0].pred_avg, places = 2)

        # check predict_session.predict_some vs predict_session.predict_one
        self.assertEqual(p1[0].coords, p3.coords)
        self.assertAlmostEqual(p1[0].val, p3.val, places = 2)
        self.assertAlmostEqual(p1[0].pred_1sample, p3.pred_1sample, places = 2)
        self.assertAlmostEqual(p1[0].pred_avg, p3.pred_avg, places = 2)

        # check predict_session.predict_some vs predict_session.predict_all
        for s in p2:
            ecoords = (Ellipsis,) + s.coords
            for p in zip(s.pred_all, p4[ecoords]):
                self.assertAlmostEqual(*p, places=2)

        p1_rmse_avg = smurff.calc_rmse(p1)
        p2_rmse_avg = smurff.calc_rmse(p2)

        self.assertAlmostEqual(train_session.getRmseAvg(), p2_rmse_avg, places = 2)
        self.assertAlmostEqual(train_session.getRmseAvg(), p1_rmse_avg, places = 2)
Exemple #2
0
    def test_macau_dense_probit(self):
        A = np.random.randn(25, 2)
        B = np.random.randn(3, 2)

        idx = list(
            itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B"])
        df["value"] = (np.array([np.sum(A[i[0], :] * B[i[1], :])
                                 for i in idx]) > 0.0).astype(np.float64)
        Ytrain, Ytest = smurff.make_train_test_df(df, 0.2)

        threshold = 0.5  # since we sample from mu(0,1)

        trainSession = smurff.TrainSession(priors=['macau', 'normal'],
                                           num_latent=4,
                                           threshold=threshold,
                                           burnin=20,
                                           nsamples=20,
                                           verbose=False)

        trainSession.addTrainAndTest(Ytrain, Ytest,
                                     smurff.ProbitNoise(threshold))
        trainSession.addSideInfo(0, A, direct=True)

        predictions = trainSession.run()

        rmse = smurff.calc_rmse(predictions)
        self.assertTrue(
            rmse > 0.55,
            msg=
            "Probit factorization (with dense side) gave AUC below 0.55 (%f)."
            % rmse)
Exemple #3
0
    def test_macau_tensor_univariate(self):
        A = np.random.randn(15, 2)
        B = np.random.randn(3, 2)
        C = np.random.randn(2, 2)

        idx = list(
            itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]),
                              np.arange(C.shape[0])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array(
            [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx])
        Ytrain, Ytest = smurff.make_train_test_df(df, 0.2)

        Acoo = scipy.sparse.coo_matrix(A)

        predictions = smurff.MacauSession(Ytrain=Ytrain,
                                          Ytest=Ytest,
                                          side_info=[Acoo, None, None],
                                          univariate=True,
                                          num_latent=4,
                                          verbose=0,
                                          burnin=200,
                                          nsamples=400).run()

        rmse = smurff.calc_rmse(predictions)

        self.assertTrue(rmse < 0.5,
                        msg="Tensor factorization gave RMSE above 0.5 (%f)." %
                        rmse)
Exemple #4
0
    def test_bpmf_tensor2(self):
        A = np.random.randn(15, 2)
        B = np.random.randn(20, 2)
        C = np.random.randn(3, 2)

        idx = list(
            itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]),
                              np.arange(C.shape[0])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array(
            [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx])
        Ytrain, Ytest = smurff.make_train_test_df(df, 0.2)

        predictions = smurff.smurff(Ytrain,
                                    Ytest=Ytest,
                                    priors=['normal', 'normal', 'normal'],
                                    num_latent=4,
                                    verbose=False,
                                    burnin=20,
                                    nsamples=20)

        rmse = smurff.calc_rmse(predictions)

        self.assertTrue(rmse < 0.5,
                        msg="Tensor factorization gave RMSE above 0.5 (%f)." %
                        rmse)
Exemple #5
0
    def test_macau_tensor(self):
        shape = [30, 4, 2]

        A = np.random.randn(shape[0], 2)
        B = np.random.randn(shape[1], 2)
        C = np.random.randn(shape[2], 2)

        idx = list(
            itertools.product(np.arange(shape[0]), np.arange(shape[1]),
                              np.arange(shape[2])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array(
            [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx])
        Ytrain, Ytest = smurff.make_train_test_df(df, 0.2, shape=shape)

        Acoo = scipy.sparse.coo_matrix(A)

        predictions = smurff.macau(Ytrain=Ytrain,
                                   Ytest=Ytest,
                                   side_info=[Acoo, None, None],
                                   num_latent=4,
                                   verbose=verbose,
                                   burnin=200,
                                   nsamples=200)

        rmse = smurff.calc_rmse(predictions)

        self.assertTrue(rmse < 0.5,
                        msg="Tensor factorization gave RMSE above 0.5 (%f)." %
                        rmse)
Exemple #6
0
    def test_bpmf_tensor3(self):
        A = np.random.randn(15, 2)
        B = np.random.randn(20, 2)
        C = np.random.randn(1, 2)

        idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0])) )
        df  = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array([ np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx ])
        Ytrain, Ytest = smurff.make_train_test_df(df, 0.2)

        predictions = smurff.bpmf(Ytrain,
                                Ytest=Ytest,
                                num_latent=4,
                                verbose=verbose,
                                burnin=20,
                                nsamples=20)

        rmse = smurff.calc_rmse(predictions)

        self.assertTrue(rmse < 0.5,
                        msg="Tensor factorization gave RMSE above 0.5 (%f)." % rmse)

        Ytrain_df = Ytrain.data
        Ytest_df = Ytest.data
        Ytrain_sp = sp.coo_matrix( (Ytrain_df.value, (Ytrain_df.A, Ytrain_df.B) ) )
        Ytest_sp  = sp.coo_matrix( (Ytest_df.value,  (Ytest_df.A, Ytest_df.B) ) )

        results_mat = smurff.bpmf(Ytrain_sp,
                                    Ytest=Ytest_sp,
                                    num_latent=4,
                                    verbose=verbose,
                                    burnin=20,
                                    nsamples=20)
Exemple #7
0
def train_session(root, train, test, sideinfo=None):
    import shutil
    shutil.rmtree(root, ignore_errors=True)
    os.makedirs(root)
    print("save prefix = ", root)
    trainSession = smurff.TrainSession(
        num_latent=4,
        burnin=800,
        nsamples=100,
        verbose=global_verbose,
        save_freq=1,
        save_prefix=root,
    )
    trainSession.addTrainAndTest(train, test, smurff.FixedNoise(1.0))
    if sideinfo is not None:
        trainSession.addSideInfo(0,
                                 sideinfo,
                                 smurff.FixedNoise(10.),
                                 direct=True)

    predictions = trainSession.run()
    rmse = smurff.calc_rmse(predictions)

    #print("RMSE = %.2f%s" % (rmse, "" if sideinfo is None else " (with sideinfo)" ))
    return rmse
Exemple #8
0
def im_prediction(predict_session, test):
    im_predictions = predict_session.predict_sparse(test)
    rmse = smurff.calc_rmse(im_predictions)
    # print("Macau in-matrix prediction RMSE = %.2f" % rmse )
    # print("Predictions:")
    # for p in im_predictions:
    #     print(p)
    # print()
    return rmse
Exemple #9
0
def calc_rmse(predfile, test):
    predictions = mio.read_matrix(predfile)

    # extract predictions in test matrix
    selected_predictions = [
        smurff.Prediction((i, j), v, pred_avg=predictions[i, j])
        for i, j, v in zip(*sparse.find(test))
    ]

    return smurff.calc_rmse(selected_predictions)
    def test_simple(self):
        train_session = self.run_train_session()
        predict_session = train_session.makePredictSession()

        p1 = sorted(train_session.getTestPredictions())
        p2 = sorted(predict_session.predict_some(self.Ytest))

        one = p1[0]

        p3 = predict_session.predict_one(one.coords, one.val)
        p4 = predict_session.predict_all()

        self.assertEqual(len(p1), len(p2))

        # check train_session vs predict_session for Ytest
        self.assertEqual(p1[0].coords, p2[0].coords)
        self.assertAlmostEqual(p1[0].val, p2[0].val, places = 2)
        self.assertAlmostEqual(p1[0].pred_1sample, p2[0].pred_1sample, places = 2)
        self.assertAlmostEqual(p1[0].pred_avg, p2[0].pred_avg, places = 2)

        # check predict_session.predict_some vs predict_session.predict_one
        self.assertEqual(p1[0].coords, p3.coords)
        self.assertAlmostEqual(p1[0].val, p3.val, places = 2)
        self.assertAlmostEqual(p1[0].pred_1sample, p3.pred_1sample, places = 2)
        self.assertAlmostEqual(p1[0].pred_avg, p3.pred_avg, places = 2)

        # check predict_session.predict_some vs predict_session.predict_all
        for s in p2:
            ecoords = (Ellipsis,) + s.coords
            for p in zip(s.pred_all, p4[ecoords]):
                self.assertAlmostEqual(*p, places=2)

        p5 = predict_session.predict([self.side_info[one.coords[0]], one.coords[1]])

        p1_rmse_avg = smurff.calc_rmse(p1)
        p2_rmse_avg = smurff.calc_rmse(p2)

        self.assertAlmostEqual(train_session.getRmseAvg(), p2_rmse_avg, places = 2)
        self.assertAlmostEqual(train_session.getRmseAvg(), p1_rmse_avg, places = 2)
        self.assertAlmostEqual(np.mean(p5), p2_rmse_avg, places = -1)
Exemple #11
0
def smurff_py_oom_prediction(predict_session, sideinfo, test):
    sideinfo = sideinfo.tocsr()
    oom_predictions = [
        predict_session.predict_one((sideinfo[i, :], j), v)
        for i, j, v in zip(*sparse.find(test))
    ]
    rmse = smurff.calc_rmse(oom_predictions)
    #print("Macau ouf-of-matrix prediction RMSE = %.2f" % smurff.calc_rmse(oom_predictions) )
    #print("Predictions:")
    #for p in oom_predictions:
    #    print(p)
    #print()
    return rmse
Exemple #12
0
#!/usr/bin/env python

import smurff
import matrix_io as mio

#load data
ic50 = mio.read_matrix("chembl-IC50-346targets.mm")
ic50_train, ic50_test = smurff.make_train_test(ic50, 0.2)
ic50_threshold = 6.

session = smurff.TrainSession(
    priors=['normal', 'normal'],
    num_latent=32,
    burnin=10,
    nsamples=10,
    # Using threshold of 6. to calculate AUC on test data
    threshold=ic50_threshold)

## using activity threshold pIC50 > 6. to binarize train data
session.addTrainAndTest(ic50_train, ic50_test,
                        smurff.ProbitNoise(ic50_threshold))
predictions = session.run()
print("RMSE = %.2f" % smurff.calc_rmse(predictions))
print("AUC = %.2f" % smurff.calc_auc(predictions, ic50_threshold))