Exemple #1
0
    def test_macau_tensor_univariate(self):
        A = np.random.randn(15, 2)
        B = np.random.randn(3, 2)
        C = np.random.randn(2, 2)

        idx = list(
            itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]),
                              np.arange(C.shape[0])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array(
            [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx])
        Ytrain, Ytest = smurff.make_train_test_df(df, 0.2)

        Acoo = scipy.sparse.coo_matrix(A)

        predictions = smurff.MacauSession(Ytrain=Ytrain,
                                          Ytest=Ytest,
                                          side_info=[Acoo, None, None],
                                          univariate=True,
                                          num_latent=4,
                                          verbose=0,
                                          burnin=200,
                                          nsamples=400).run()

        rmse = smurff.calc_rmse(predictions)

        self.assertTrue(rmse < 0.5,
                        msg="Tensor factorization gave RMSE above 0.5 (%f)." %
                        rmse)
Exemple #2
0
    def test_macau_dense_probit(self):
        A = np.random.randn(25, 2)
        B = np.random.randn(3, 2)

        idx = list(
            itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B"])
        df["value"] = (np.array([np.sum(A[i[0], :] * B[i[1], :])
                                 for i in idx]) > 0.0).astype(np.float64)
        Ytrain, Ytest = smurff.make_train_test_df(df, 0.2)

        threshold = 0.5  # since we sample from mu(0,1)

        trainSession = smurff.TrainSession(priors=['macau', 'normal'],
                                           num_latent=4,
                                           threshold=threshold,
                                           burnin=20,
                                           nsamples=20,
                                           verbose=False)

        trainSession.addTrainAndTest(Ytrain, Ytest,
                                     smurff.ProbitNoise(threshold))
        trainSession.addSideInfo(0, A, direct=True)

        predictions = trainSession.run()

        rmse = smurff.calc_rmse(predictions)
        self.assertTrue(
            rmse > 0.55,
            msg=
            "Probit factorization (with dense side) gave AUC below 0.55 (%f)."
            % rmse)
Exemple #3
0
    def test_bpmf_tensor2(self):
        A = np.random.randn(15, 2)
        B = np.random.randn(20, 2)
        C = np.random.randn(3, 2)

        idx = list(
            itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]),
                              np.arange(C.shape[0])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array(
            [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx])
        Ytrain, Ytest = smurff.make_train_test_df(df, 0.2)

        predictions = smurff.smurff(Ytrain,
                                    Ytest=Ytest,
                                    priors=['normal', 'normal', 'normal'],
                                    num_latent=4,
                                    verbose=False,
                                    burnin=20,
                                    nsamples=20)

        rmse = smurff.calc_rmse(predictions)

        self.assertTrue(rmse < 0.5,
                        msg="Tensor factorization gave RMSE above 0.5 (%f)." %
                        rmse)
Exemple #4
0
    def test_macau_tensor(self):
        shape = [30, 4, 2]

        A = np.random.randn(shape[0], 2)
        B = np.random.randn(shape[1], 2)
        C = np.random.randn(shape[2], 2)

        idx = list(
            itertools.product(np.arange(shape[0]), np.arange(shape[1]),
                              np.arange(shape[2])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array(
            [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx])
        Ytrain, Ytest = smurff.make_train_test_df(df, 0.2, shape=shape)

        Acoo = scipy.sparse.coo_matrix(A)

        predictions = smurff.macau(Ytrain=Ytrain,
                                   Ytest=Ytest,
                                   side_info=[Acoo, None, None],
                                   num_latent=4,
                                   verbose=verbose,
                                   burnin=200,
                                   nsamples=200)

        rmse = smurff.calc_rmse(predictions)

        self.assertTrue(rmse < 0.5,
                        msg="Tensor factorization gave RMSE above 0.5 (%f)." %
                        rmse)
Exemple #5
0
    def test_bpmf_tensor3(self):
        A = np.random.randn(15, 2)
        B = np.random.randn(20, 2)
        C = np.random.randn(1, 2)

        idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0])) )
        df  = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array([ np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx ])
        Ytrain, Ytest = smurff.make_train_test_df(df, 0.2)

        predictions = smurff.bpmf(Ytrain,
                                Ytest=Ytest,
                                num_latent=4,
                                verbose=verbose,
                                burnin=20,
                                nsamples=20)

        rmse = smurff.calc_rmse(predictions)

        self.assertTrue(rmse < 0.5,
                        msg="Tensor factorization gave RMSE above 0.5 (%f)." % rmse)

        Ytrain_df = Ytrain.data
        Ytest_df = Ytest.data
        Ytrain_sp = sp.coo_matrix( (Ytrain_df.value, (Ytrain_df.A, Ytrain_df.B) ) )
        Ytest_sp  = sp.coo_matrix( (Ytest_df.value,  (Ytest_df.A, Ytest_df.B) ) )

        results_mat = smurff.bpmf(Ytrain_sp,
                                    Ytest=Ytest_sp,
                                    num_latent=4,
                                    verbose=verbose,
                                    burnin=20,
                                    nsamples=20)
Exemple #6
0
    def test_macau_dense_probit(self):
        A = np.random.randn(25, 2)
        B = np.random.randn(3, 2)

        idx = list(
            itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B"])
        df["value"] = (np.array([np.sum(A[i[0], :] * B[i[1], :])
                                 for i in idx]) > 0.0).astype(np.float64)
        Ytrain, Ytest = smurff.make_train_test_df(df, 0.2)

        predictions = smurff.smurff(
            Ytrain,
            Ytest=Ytest,
            priors=['macau', 'normal'],
            #prior_noises=[('probit', None, None, None, 0.5), ('fixed', 1.0, None, None, None)],
            side_info=[A, None],
            num_latent=4,
            burnin=20,
            nsamples=20,
            verbose=False)

        self.assertTrue(
            rmse > 0.55,
            msg=
            "Probit factorization (with dense side) gave AUC below 0.55 (%f)."
            % rmse)
Exemple #7
0
    def test_make_train_test_df(self):
        idx = list( itertools.product(np.arange(10), np.arange(8), np.arange(3) ))
        df  = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.arange(10.0 * 8.0 * 3.0)

        Ytr, Yte = smurff.make_train_test_df(df, 0.4)
        self.assertEqual(Ytr.data.shape[0], df.shape[0] * 0.6)
        self.assertEqual(Yte.data.shape[0], df.shape[0] * 0.4)

        A1 = np.zeros( (10, 8, 3) )
        A2 = np.zeros( (10, 8, 3) )
        A1[df.A, df.B, df.C] = df.value
        A2[Ytr.data.A, Ytr.data.B, Ytr.data.C] = Ytr.data.value
        A2[Yte.data.A, Yte.data.B, Yte.data.C] = Yte.data.value

        self.assertTrue(np.allclose(A1, A2))