Python make_train_test_df 예제들, macau.make_train_test_df Python 예제들

예제 #1

0

파일 보기

파일: test_macau.py 프로젝트: edebrouwer/macau

    def test_macau_dense_probit(self):
        A = np.random.randn(25, 2)
        B = np.random.randn(3, 2)

        idx = list(
            itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B"])
        df["value"] = (np.array([np.sum(A[i[0], :] * B[i[1], :])
                                 for i in idx]) > 0.0).astype(np.float64)
        Ytrain, Ytest = macau.make_train_test_df(df, 0.2)

        results = macau.macau(Y=Ytrain,
                              Ytest=Ytest,
                              side=[A, None],
                              num_latent=4,
                              verbose=False,
                              burnin=20,
                              nsamples=20,
                              univariate=False,
                              precision="probit")

        self.assertTrue((results.prediction.columns[0:2] == ["A", "B"]).all())
        self.assertTrue(
            results.rmse_test > 0.55,
            msg=
            "Probit factorization (with dense side) gave AUC below 0.55 (%f)."
            % results.rmse_test)

예제 #2

0

파일 보기

파일: test_macau.py 프로젝트: edebrouwer/macau

    def test_macau_tensor_univariate(self):
        A = np.random.randn(30, 2)
        B = np.random.randn(4, 2)
        C = np.random.randn(2, 2)

        idx = list(
            itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]),
                              np.arange(C.shape[0])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array(
            [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx])
        Ytrain, Ytest = macau.make_train_test_df(df, 0.2)

        Acoo = scipy.sparse.coo_matrix(A)

        results = macau.macau(Y=Ytrain,
                              Ytest=Ytest,
                              side=[Acoo, None, None],
                              num_latent=4,
                              verbose=False,
                              burnin=20,
                              nsamples=20,
                              univariate=True,
                              precision=50)
        self.assertTrue(results.rmse_test < 0.5,
                        msg="Tensor factorization gave RMSE above 0.5 (%f)." %
                        results.rmse_test)

예제 #3

0

파일 보기

파일: test_macau.py 프로젝트: tvandera/macau

    def test_bpmf_tensor2(self):
        A = np.random.randn(15, 2)
        B = np.random.randn(20, 2)
        C = np.random.randn(3, 2)

        idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0])) )
        df  = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array([ np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx ])
        Ytrain, Ytest = macau.make_train_test_df(df, 0.2)

        results = macau.bpmf(Y = Ytrain, Ytest = Ytest, num_latent = 4,
                             verbose = False, burnin = 20, nsamples = 20,
                             univariate = False, precision = 50)
        self.assertTrue(results.rmse_test < 0.5,
                        msg="Tensor factorization gave RMSE above 0.5 (%f)." % results.rmse_test)

예제 #4

0

파일 보기

파일: test_macau.py 프로젝트: wangwangzhang/macau

    def test_make_train_test_df(self):
        idx = list( itertools.product(np.arange(10), np.arange(8), np.arange(3) ))
        df  = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.arange(10.0 * 8.0 * 3.0)

        Ytr, Yte = macau.make_train_test_df(df, 0.4)
        self.assertEqual(Ytr.shape[0], df.shape[0] * 0.6)
        self.assertEqual(Yte.shape[0], df.shape[0] * 0.4)

        A1 = np.zeros( (10, 8, 3) )
        A2 = np.zeros( (10, 8, 3) )
        A1[df.A, df.B, df.C] = df.value
        A2[Ytr.A, Ytr.B, Ytr.C] = Ytr.value
        A2[Yte.A, Yte.B, Yte.C] = Yte.value

        self.assertTrue(np.allclose(A1, A2))

예제 #5

0

파일 보기

파일: test_macau.py 프로젝트: tvandera/macau

    def test_macau_dense_probit(self):
        A = np.random.randn(25, 2)
        B = np.random.randn(3, 2)

        idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0])) )
        df  = pd.DataFrame( np.asarray(idx), columns=["A", "B"])
        df["value"] = (np.array([ np.sum(A[i[0], :] * B[i[1], :]) for i in idx ]) > 0.0).astype(np.float64)
        Ytrain, Ytest = macau.make_train_test_df(df, 0.2)

        results = macau.macau(Y = Ytrain, Ytest = Ytest, side=[A, None], num_latent = 4,
                             verbose = False, burnin = 20, nsamples = 20,
                             univariate = False, precision = "probit")

        self.assertTrue( (results.prediction.columns[0:2] == ["A", "B"]).all() )
        self.assertTrue(results.auc_test > 0.55,
                        msg="Probit factorization (with dense side) gave AUC below 0.55 (%f)." % results.rmse_test)

예제 #6

0

파일 보기

파일: test_macau.py 프로젝트: tvandera/macau

    def test_macau_tensor(self):
        A = np.random.randn(15, 2)
        B = np.random.randn(3, 2)
        C = np.random.randn(2, 2)

        idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0])) )
        df  = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array([ np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx ])
        Ytrain, Ytest = macau.make_train_test_df(df, 0.2)

        Acoo = scipy.sparse.coo_matrix(A)

        results = macau.macau(Y = Ytrain, Ytest = Ytest, side=[Acoo, None, None], num_latent = 4,
                             verbose = False, burnin = 20, nsamples = 20,
                             univariate = False, precision = 50)

        self.assertTrue( (results.prediction.columns[0:3] == ["A", "B", "C"]).all() )
        self.assertTrue(results.rmse_test < 0.5,
                        msg="Tensor factorization gave RMSE above 0.5 (%f)." % results.rmse_test)

예제 #7

0

파일 보기

import unittest
import numpy as np
import pandas as pd
import scipy.sparse
import macau
import itertools

A = np.random.randn(15, 2)
B = np.random.randn(20, 2)
C = np.random.randn(1, 2)

idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0])) )
df  = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"])
df["value"] = np.array([ np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx ])
Ytrain, Ytest = macau.make_train_test_df(df, 0.2)

results = macau.bpmf(Y = Ytrain, Ytest = Ytest, num_latent = 4,
                     verbose = True, burnin = 20, nsamples = 2,
                     univariate = False, precision = 50)

Ytrain_sp = scipy.sparse.coo_matrix( (Ytrain.value, (Ytrain.A, Ytrain.B) ) )
Ytest_sp  = scipy.sparse.coo_matrix( (Ytest.value,  (Ytest.A, Ytest.B) ) )

results = macau.bpmf(Y = Ytrain_sp, Ytest = Ytest_sp, num_latent = 4,
                     verbose = True, burnin = 20, nsamples = 2,
                     univariate = False, precision = 50)

예제 #8

0

파일 보기

파일: unit_tests.py 프로젝트: edebrouwer/Tensor_Fact

def macau_test():
    num_latents = 2
    n_samples = 800

    save_prefix = "macau_unit_test"
    ## generating toy data
    A = np.random.randn(15, num_latents)
    B = np.random.randn(3, num_latents)
    C = np.random.randn(5, num_latents)

    idx = list(
        itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]),
                          np.arange(C.shape[0])))
    df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"])

    df["value"] = np.array(
        [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx])

    ## side information is again a sparse matrix

    df_train, df_val = macau.make_train_test_df(df, 0.05)

    results = macau.macau(Y=df_train,
                          Ytest=df_val,
                          side=[None, None, None],
                          num_latent=num_latents,
                          verbose=True,
                          burnin=400,
                          nsamples=n_samples,
                          precision="adaptive",
                          save_prefix=save_prefix)

    str_dir = "results_unittests/"
    if (not os.path.exists(str_dir)):
        os.makedirs(str_dir)
    else:
        #replace_prev=input("This configuration has already been run !Do you want to continue ? y/n")
        #if (replace_prev=="n"):
        #    raise ValueError("Aborted")
        shutil.rmtree(str_dir)
        os.makedirs(str_dir)

    files = os.listdir("./")
    for f in files:
        if (f.startswith(save_prefix)):
            shutil.move(f, str_dir)
    file_path = save_prefix
    N = n_samples

    mean_lat_pat = 0
    mean_lat_meas = 0
    mean_lat_time = 0
    for n in range(1, N + 1):
        mean_lat_pat += np.loadtxt(str_dir + file_path +
                                   "-sample%d-U1-latents.csv" % n,
                                   delimiter=",")
        mean_lat_meas += np.loadtxt(str_dir + file_path +
                                    "-sample%d-U2-latents.csv" % n,
                                    delimiter=",")
        mean_lat_time += np.loadtxt(str_dir + file_path +
                                    "-sample%d-U3-latents.csv" % n,
                                    delimiter=",")

    mean_lat_pat /= N
    mean_lat_meas /= N
    mean_lat_time /= N

    np.save(str_dir + "mean_pat_latent.npy", mean_lat_pat)
    np.save(str_dir + "mean_pat_latent.npy", mean_lat_meas)
    np.save(str_dir + "mean_pat_latent.npy", mean_lat_time)

    print("Loaded")
    print("Patients Latents")
    print(mean_lat_pat.T)
    print("True Latents")
    print(A)
    print("Features Latents")
    print(mean_lat_meas.T)
    print("True Latents")
    print(B)
    print("Time Latents")
    print(mean_lat_time.T)
    print("True Latents")
    print(C)