def test_macau_tensor_univariate(self): A = np.random.randn(15, 2) B = np.random.randn(3, 2) C = np.random.randn(2, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0]))) df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.array( [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx]) Ytrain, Ytest = smurff.make_train_test_df(df, 0.2) Acoo = scipy.sparse.coo_matrix(A) predictions = smurff.MacauSession(Ytrain=Ytrain, Ytest=Ytest, side_info=[Acoo, None, None], univariate=True, num_latent=4, verbose=0, burnin=200, nsamples=400).run() rmse = smurff.calc_rmse(predictions) self.assertTrue(rmse < 0.5, msg="Tensor factorization gave RMSE above 0.5 (%f)." % rmse)
def test_macau_dense_probit(self): A = np.random.randn(25, 2) B = np.random.randn(3, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]))) df = pd.DataFrame(np.asarray(idx), columns=["A", "B"]) df["value"] = (np.array([np.sum(A[i[0], :] * B[i[1], :]) for i in idx]) > 0.0).astype(np.float64) Ytrain, Ytest = smurff.make_train_test_df(df, 0.2) threshold = 0.5 # since we sample from mu(0,1) trainSession = smurff.TrainSession(priors=['macau', 'normal'], num_latent=4, threshold=threshold, burnin=20, nsamples=20, verbose=False) trainSession.addTrainAndTest(Ytrain, Ytest, smurff.ProbitNoise(threshold)) trainSession.addSideInfo(0, A, direct=True) predictions = trainSession.run() rmse = smurff.calc_rmse(predictions) self.assertTrue( rmse > 0.55, msg= "Probit factorization (with dense side) gave AUC below 0.55 (%f)." % rmse)
def test_bpmf_tensor2(self): A = np.random.randn(15, 2) B = np.random.randn(20, 2) C = np.random.randn(3, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0]))) df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.array( [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx]) Ytrain, Ytest = smurff.make_train_test_df(df, 0.2) predictions = smurff.smurff(Ytrain, Ytest=Ytest, priors=['normal', 'normal', 'normal'], num_latent=4, verbose=False, burnin=20, nsamples=20) rmse = smurff.calc_rmse(predictions) self.assertTrue(rmse < 0.5, msg="Tensor factorization gave RMSE above 0.5 (%f)." % rmse)
def test_macau_tensor(self): shape = [30, 4, 2] A = np.random.randn(shape[0], 2) B = np.random.randn(shape[1], 2) C = np.random.randn(shape[2], 2) idx = list( itertools.product(np.arange(shape[0]), np.arange(shape[1]), np.arange(shape[2]))) df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.array( [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx]) Ytrain, Ytest = smurff.make_train_test_df(df, 0.2, shape=shape) Acoo = scipy.sparse.coo_matrix(A) predictions = smurff.macau(Ytrain=Ytrain, Ytest=Ytest, side_info=[Acoo, None, None], num_latent=4, verbose=verbose, burnin=200, nsamples=200) rmse = smurff.calc_rmse(predictions) self.assertTrue(rmse < 0.5, msg="Tensor factorization gave RMSE above 0.5 (%f)." % rmse)
def test_bpmf_tensor3(self): A = np.random.randn(15, 2) B = np.random.randn(20, 2) C = np.random.randn(1, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0])) ) df = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.array([ np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx ]) Ytrain, Ytest = smurff.make_train_test_df(df, 0.2) predictions = smurff.bpmf(Ytrain, Ytest=Ytest, num_latent=4, verbose=verbose, burnin=20, nsamples=20) rmse = smurff.calc_rmse(predictions) self.assertTrue(rmse < 0.5, msg="Tensor factorization gave RMSE above 0.5 (%f)." % rmse) Ytrain_df = Ytrain.data Ytest_df = Ytest.data Ytrain_sp = sp.coo_matrix( (Ytrain_df.value, (Ytrain_df.A, Ytrain_df.B) ) ) Ytest_sp = sp.coo_matrix( (Ytest_df.value, (Ytest_df.A, Ytest_df.B) ) ) results_mat = smurff.bpmf(Ytrain_sp, Ytest=Ytest_sp, num_latent=4, verbose=verbose, burnin=20, nsamples=20)
def test_macau_dense_probit(self): A = np.random.randn(25, 2) B = np.random.randn(3, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]))) df = pd.DataFrame(np.asarray(idx), columns=["A", "B"]) df["value"] = (np.array([np.sum(A[i[0], :] * B[i[1], :]) for i in idx]) > 0.0).astype(np.float64) Ytrain, Ytest = smurff.make_train_test_df(df, 0.2) predictions = smurff.smurff( Ytrain, Ytest=Ytest, priors=['macau', 'normal'], #prior_noises=[('probit', None, None, None, 0.5), ('fixed', 1.0, None, None, None)], side_info=[A, None], num_latent=4, burnin=20, nsamples=20, verbose=False) self.assertTrue( rmse > 0.55, msg= "Probit factorization (with dense side) gave AUC below 0.55 (%f)." % rmse)
def test_make_train_test_df(self): idx = list( itertools.product(np.arange(10), np.arange(8), np.arange(3) )) df = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.arange(10.0 * 8.0 * 3.0) Ytr, Yte = smurff.make_train_test_df(df, 0.4) self.assertEqual(Ytr.data.shape[0], df.shape[0] * 0.6) self.assertEqual(Yte.data.shape[0], df.shape[0] * 0.4) A1 = np.zeros( (10, 8, 3) ) A2 = np.zeros( (10, 8, 3) ) A1[df.A, df.B, df.C] = df.value A2[Ytr.data.A, Ytr.data.B, Ytr.data.C] = Ytr.data.value A2[Yte.data.A, Yte.data.B, Yte.data.C] = Yte.data.value self.assertTrue(np.allclose(A1, A2))