def test_bpmf_tensor(self): np.random.seed(1234) shape = [5, 4, 3] Y = smurff.SparseTensor( pd.DataFrame({ "A": np.random.randint(0, 5, 7), "B": np.random.randint(0, 4, 7), "C": np.random.randint(0, 3, 7), "value": np.random.randn(7) }), shape) Ytest = smurff.SparseTensor( pd.DataFrame({ "A": np.random.randint(0, 5, 5), "B": np.random.randint(0, 4, 5), "C": np.random.randint(0, 3, 5), "value": np.random.randn(5) }), shape) predictions = smurff.smurff(Y, Ytest=Ytest, priors=['normal', 'normal', 'normal'], num_latent=4, verbose=False, burnin=50, nsamples=50)
def test_bpmf_dense_matrix_sparse_2d_tensor(self): np.random.seed(1234) # Generate train dense matrix train_shape = (5 ,5) train_sparse_matrix = scipy.sparse.random(5, 5, density=1.0) train_dense_matrix = train_sparse_matrix.todense() # Generate test sparse matrix test_shape = (5, 5) test_rows = np.random.randint(0, 5, 5) test_cols = np.random.randint(0, 4, 5) test_vals = np.random.randn(5) test_sparse_matrix = scipy.sparse.coo_matrix((test_vals, (test_rows, test_cols)), test_shape) # Create train and test sparse tensors train_sparse_tensor = smurff.SparseTensor(pd.DataFrame({ '0': train_sparse_matrix.row, '1': train_sparse_matrix.col, 'v': train_sparse_matrix.data }), train_shape) test_sparse_tensor = smurff.SparseTensor(pd.DataFrame({ '0': test_sparse_matrix.row, '1': test_sparse_matrix.col, 'v': test_sparse_matrix.data }), train_shape) # Run SMURFF sparse_matrix_predictions = smurff.smurff(train_dense_matrix, Ytest=test_sparse_matrix, priors=['normal', 'normal'], num_latent=4, verbose=False, burnin=50, nsamples=50, seed=1234) sparse_tensor_predictions = smurff.smurff(train_sparse_tensor, Ytest=test_sparse_tensor, priors=['normal', 'normal'], num_latent=4, verbose=False, burnin=50, nsamples=50, seed=1234) # Transfrom SMURFF results to dictionary of coords and predicted values sparse_matrix_results_dict = collections.OrderedDict((p.coords, p.pred_1sample) for p in sparse_matrix_predictions) sparse_tensor_results_dict = collections.OrderedDict((p.coords, p.pred_1sample) for p in sparse_tensor_predictions) self.assertEqual(len(sparse_matrix_results_dict), len(sparse_tensor_results_dict)) self.assertEqual(sparse_tensor_results_dict.keys(), sparse_tensor_results_dict.keys()) for coords, matrix_pred_1sample in sparse_matrix_results_dict.items(): tensor_pred_1sample = sparse_tensor_results_dict[coords] self.assertAlmostEqual(matrix_pred_1sample, tensor_pred_1sample)
def test_bpmf_dense_matrix_dense_2d_tensor(self): np.random.seed(1234) # Generate train matrix rows, cols and vals train_shape = (5, 4) sparse_random = sp.random(5, 4, density=1.0) train_dense_matrix = sparse_random.todense() _, test_sparse_matrix = smurff.make_train_test(sparse_random, 0.2) # Create train and test sparse train_sparse_matrix = sp.coo_matrix(train_dense_matrix) # acutally dense test_sparse_matrix = test_sparse_matrix.tocoo() # Create train and test sparse representations of dense tensors train_sparse_tensor = smurff.SparseTensor(pd.DataFrame({ '0': train_sparse_matrix.row, '1': train_sparse_matrix.col, 'v': train_sparse_matrix.data }), train_shape) test_sparse_tensor = smurff.SparseTensor(pd.DataFrame({ '0': test_sparse_matrix.row, '1': test_sparse_matrix.col, 'v': test_sparse_matrix.data }), train_shape) # Run SMURFF sparse_matrix_predictions = smurff.bpmf(train_dense_matrix, Ytest=test_sparse_matrix, num_latent=4, num_threads=1, verbose=verbose, burnin=50, nsamples=50, seed=1234) sparse_tensor_predictions = smurff.bpmf(train_sparse_tensor, Ytest=test_sparse_tensor, num_latent=4, num_threads=1, verbose=verbose, burnin=50, nsamples=50, seed=1234) # Sort and compare coords and predicted values sparse_matrix_predictions.sort() sparse_tensor_predictions.sort() self.assertEqual(len(sparse_matrix_predictions), len(sparse_tensor_predictions)) for m, t in zip(sparse_matrix_predictions, sparse_tensor_predictions): self.assertEqual(m.coords, t.coords) self.assertAlmostEqual(m.pred_1sample, t.pred_1sample)
def train_test(self): np.random.seed(1234) train_df = pd.DataFrame({ "A": np.random.randint(0, 15, 7), "B": np.random.randint(0, 4, 7), "C": np.random.randint(0, 3, 7), "value": np.random.randn(7) }) test_df = pd.DataFrame({ "A": np.random.randint(0, 15, 5), "B": np.random.randint(0, 4, 5), "C": np.random.randint(0, 3, 5), "value": np.random.randn(5) }) shape = [15, 4, 3] Ytrain = smurff.SparseTensor(train_df, shape=shape) Ytest = smurff.SparseTensor(test_df, shape=shape) return Ytrain, Ytest
def test_bpmf_tensor(self): np.random.seed(1234) Y = smurff.SparseTensor(pd.DataFrame({ "A": np.random.randint(0, 5, 7), "B": np.random.randint(0, 4, 7), "C": np.random.randint(0, 3, 7), "value": np.random.randn(7) })) Ytest = smurff.SparseTensor(pd.DataFrame({ "A": np.random.randint(0, 5, 5), "B": np.random.randint(0, 4, 5), "C": np.random.randint(0, 3, 5), "value": np.random.randn(5) })) predictions = smurff.bpmf(Y, Ytest=Ytest, num_latent=4, verbose=verbose, burnin=50, nsamples=50)
def test_macau_tensor_empty(self): A = np.random.randn(30, 2) B = np.random.randn(4, 2) C = np.random.randn(2, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0])) ) df = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.array([ np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx ]) Acoo = scipy.sparse.coo_matrix(A) predictions = smurff.smurff(smurff.SparseTensor(df), priors=['normal', 'normal', 'normal'], num_latent=2, burnin=5, nsamples=5, verbose=False) self.assertFalse(predictions)
def test_bpmf_sparse_matrix_sparse_2d_tensor(self): np.random.seed(1234) # Generate train matrix rows, cols and vals train_shape = (5, 5) train_rows = np.random.randint(0, 5, 7) train_cols = np.random.randint(0, 4, 7) train_vals = np.random.randn(7) # Generate test matrix rows, cols and vals test_shape = (5, 5) test_rows = np.random.randint(0, 5, 5) test_cols = np.random.randint(0, 4, 5) test_vals = np.random.randn(5) # Create train and test sparse matrices train_sparse_matrix = scipy.sparse.coo_matrix( (train_vals, (train_rows, train_cols)), train_shape) test_sparse_matrix = scipy.sparse.coo_matrix( (test_vals, (test_rows, test_cols)), test_shape) # Force NNZ recalculation to remove duplicate coordinates because of random generation train_sparse_matrix.count_nonzero() test_sparse_matrix.count_nonzero() # Create train and test sparse tensors train_sparse_tensor = smurff.SparseTensor( pd.DataFrame({ '0': train_sparse_matrix.row, '1': train_sparse_matrix.col, 'v': train_sparse_matrix.data }), train_shape) test_sparse_tensor = smurff.SparseTensor( pd.DataFrame({ '0': test_sparse_matrix.row, '1': test_sparse_matrix.col, 'v': test_sparse_matrix.data }), train_shape) # Run SMURFF sparse_matrix_predictions = smurff.bpmf(train_sparse_matrix, Ytest=test_sparse_matrix, num_latent=4, num_threads=1, verbose=verbose, burnin=50, nsamples=50, seed=1234) sparse_tensor_predictions = smurff.bpmf(train_sparse_tensor, Ytest=test_sparse_tensor, num_latent=4, num_threads=1, verbose=verbose, burnin=50, nsamples=50, seed=1234) # Transfrom SMURFF results to dictionary of coords and predicted values sparse_matrix_predictions_dict = collections.OrderedDict( (p.coords, p.pred_1sample) for p in sparse_matrix_predictions) sparse_tensor_predictions_dict = collections.OrderedDict( (p.coords, p.pred_1sample) for p in sparse_tensor_predictions) self.assertEqual(len(sparse_matrix_predictions_dict), len(sparse_tensor_predictions_dict)) self.assertEqual(sparse_tensor_predictions_dict.keys(), sparse_tensor_predictions_dict.keys()) for coords, matrix_pred_1sample in sparse_matrix_predictions_dict.items( ): tensor_pred_1sample = sparse_tensor_predictions_dict[coords] self.assertAlmostEqual(matrix_pred_1sample, tensor_pred_1sample)