Ejemplo n.º 1
0
 def __init__(
         self,
         data,
         prior_eta   = GammaPrior(2., 0.5),
         prior_mu    = (0., 4.),
         prior_Sigma = (10, 0.5),
         prior_xi    = GammaPrior(1., 1.),
         prior_tau   = GammaPrior(2., 2.),
         p           = 10,
         max_clust_count = 300,
         ntemps      = 5,
         stepping    = 1.3,
         ):
     self.data = data
     self.max_clust_count = max_clust_count
     self.p = p
     self.nCol = self.data.nCol
     self.nDat = self.data.nDat
     _prior_mu = NormalPrior(
         np.ones(self.nCol) * prior_mu[0],
         np.eye(self.nCol) * np.sqrt(prior_mu[1]),
         np.eye(self.nCol) / np.sqrt(prior_mu[1]),
         )
     _prior_Sigma = InvWishartPrior(
         self.nCol + prior_Sigma[0],
         np.eye(self.nCol) * prior_Sigma[1],
         )
     self.priors = Prior(prior_eta, _prior_mu, _prior_Sigma, prior_xi, prior_tau)
     self.set_projection()
     self.itl = 1 / stepping**np.arange(ntemps)
     self.nTemp = ntemps
     self.temp_unravel = np.repeat(np.arange(self.nTemp), self.nDat)
     self.nSwap_per = self.nTemp // 2
     self.swap_start = 100
     return
Ejemplo n.º 2
0
    def __init__(
        self,
        data,
        prior_eta=GammaPrior(2., 0.5),
        prior_alpha=GammaPrior(1., 1.),
        prior_beta=GammaPrior(1., 1.),
        p=10,
        max_clust_count=300,
        ntemps=3,
        stepping=1.05,
    ):
        assert type(data) is MixedData
        self.data = data
        self.max_clust_count = max_clust_count
        self.p = p
        self.nCat = self.data.nCat
        self.nCol = self.data.nCol
        self.tCol = self.nCol + self.nCat
        self.nDat = self.data.nDat
        self.nCats = self.data.Cats.shape[0]
        self.priors = Prior(prior_eta, prior_alpha, prior_beta)
        self.set_projection()
        self.categorical_considerations()
        # self.pool = Pool(processes = 8, initializer = limit_cpu())

        self.nTemp = ntemps
        self.itl = 1 / stepping**np.arange(ntemps)
        self.temp_unravel = np.repeat(np.arange(self.nTemp), self.nDat)
        self.nSwap_per = self.nTemp // 2
        self.swap_start = 100
        return
Ejemplo n.º 3
0
 def __init__(
         self,
         data,
         prior_eta=GammaPrior(2., 0.5),
         prior_alpha=GammaPrior(0.5, 0.5),
         prior_beta=GammaPrior(0.5, 0.5),
         prior_xi=GammaPrior(0.5, 0.5),
         prior_tau=GammaPrior(0.5, 0.5),
         p=10,
         max_clust_count=300,
 ):
     assert type(data) is MixedData
     self.data = data
     self.max_clust_count = max_clust_count
     self.p = p
     self.nCat = self.data.nCat
     self.nCol = self.data.nCol
     self.nDat = self.data.nDat
     self.nCats = self.data.Cats.shape[0]
     self.priors = Prior(prior_eta, prior_alpha, prior_beta, prior_xi,
                         prior_tau)
     self.set_projection()
     self.categorical_considerations()
     self.build_sigma_unity()
     # self.pool = Pool(processes = 8, initializer = limit_cpu())
     return
Ejemplo n.º 4
0
 def __init__(
         self,
         data,
         prior_zeta=GammaPrior(0.5, 0.5),
         prior_sigma=GammaPrior(2., 2.),
 ):
     self.data = data
     self.nCol = self.data.nCol
     self.nDat = self.data.nDat
     self.priors = Prior(prior_zeta, prior_sigma)
     return
Ejemplo n.º 5
0
 def __init__(self,
              data,
              nMix,
              prior_alpha=GammaPrior(0.5, 0.5),
              prior_beta=GammaPrior(2., 2.),
              prior_pi=DirichletPrior(1.)):
     self.data = data
     self.nCol = self.data.nCol
     self.nDat = self.data.nDat
     self.nMix = nMix
     self.priors = Prior(prior_alpha, prior_beta, prior_pi)
     # self.pool = Pool(processes = 8)
     return
Ejemplo n.º 6
0
 def __init__(
         self,
         data,
         prior_alpha=GammaPrior(1., 1.),
         prior_beta=GammaPrior(1., 1.),
         prior_eta=GammaPrior(2., 0.5),
         m=20,
 ):
     self.m = m
     self.data = data
     self.nCol = self.data.nCol
     self.nDat = self.data.nDat
     self.priors = Prior(prior_alpha, prior_beta, prior_eta)
     # self.pool = Pool(8)
     return
Ejemplo n.º 7
0
 def __init__(
         self,
         data,
         prior_eta   = GammaPrior(2., 0.5),
         prior_alpha = GammaPrior(0.5, 0.5),
         prior_beta  = GammaPrior(2., 2.),
         prior_xi    = GammaPrior(0.5, 0.5),
         prior_tau   = GammaPrior(2., 2.),
         m = 20,
         ):
     self.data = data
     self.m = m
     self.nCol = self.data.nCol
     self.nDat = self.data.nDat
     self.priors = Prior(prior_eta, prior_alpha, prior_beta, prior_xi, prior_tau)
     return
Ejemplo n.º 8
0
 def __init__(
         self,
         data,
         nMix,
         prior_pi    = DirichletPrior(0.5,),
         prior_alpha = GammaPrior(0.5, 0.5),
         prior_beta  = GammaPrior(2., 2.),
         prior_xi    = GammaPrior(0.5, 0.5),
         prior_tau   = GammaPrior(2., 2.),
         ):
     self.data = data
     self.nMix = nMix
     self.nCol = self.data.nCol
     self.nDat = self.data.nDat
     self.priors = Prior(prior_pi, prior_alpha, prior_beta, prior_xi, prior_tau)
     return
Ejemplo n.º 9
0
 def __init__(
     self,
     data,
     prior_mu=(0., 1.),  # NormalPrior(np.zeros(7), np.eye(7) * 1.),
     prior_mu0=(0., 1.),  # NormalPrior(np.zeros(7), np.eye(7) * 0.125),
     prior_Sigma=(5, 1.),  # InvWishartPrior(10, np.eye(7) * 1),
     prior_Sigma0=(5, 1.),  #InvWishartPrior(10, np.eye(7) * 1),
     prior_eta=GammaPrior(2, 10),
     m=20,
 ):
     self.m = m
     self.data = data
     self.nCol = self.data.nCol
     self.nDat = self.data.nDat
     mu_actual = NormalPrior(
         np.ones(self.nCol - 1) * prior_mu[0],
         np.eye(self.nCol - 1) * prior_mu[1],
     )
     mu0_actual = NormalPrior(
         np.ones(self.nCol - 1) * prior_mu0[0],
         np.eye(self.nCol - 1) * prior_mu[1],
     )
     Sigma_actual = InvWishartPrior(
         self.nCol + prior_Sigma[0],
         np.eye(self.nCol - 1) * prior_Sigma[1],
     )
     Sigma0_actual = InvWishartPrior(
         self.nCol + prior_Sigma0[0],
         np.eye(self.nCol - 1) * prior_Sigma0[1],
     )
     self.priors = Prior(mu_actual, Sigma_actual, mu0_actual, Sigma0_actual,
                         prior_eta)
     self.pool = mp.Pool(processes=8, initializer=limit_cpu)
     return
Ejemplo n.º 10
0
 def __init__(
         self,
         data,
         prior_eta=GammaPrior(2., 0.5),
         prior_alpha=GammaPrior(0.5, 0.5),
         prior_beta=GammaPrior(2., 2.),
         p=10,
         max_clust_count=300,
 ):
     self.data = data
     self.max_clust_count = max_clust_count
     self.p = p
     self.nCol = self.data.nCol
     self.nDat = self.data.nDat
     self.priors = Prior(prior_eta, prior_alpha, prior_beta)
     self.set_projection()
     return
Ejemplo n.º 11
0
 def __init__(
         self,
         data,
         prior_eta=GammaPrior(2., 1.),
         prior_alpha=GammaPrior(0.5, 0.5),
         prior_beta=GammaPrior(2., 2.),
         p=10,
         max_clust_count=300,
 ):
     self.data = data
     self.max_clust_count = max_clust_count
     self.p = p
     self.nCat = self.data.nCat
     self.nDat = self.data.nDat
     self.spheres = self.data.spheres
     self.sphere_mat = np.zeros((len(self.spheres), self.nCat))
     for i, sphere in enumerate(self.spheres):
         self.sphere_mat[i][sphere] = True
     self.priors = Prior(prior_eta, prior_alpha, prior_beta)
     self.pool = Pool(processes=8, initializer=limit_cpu)
     return
Ejemplo n.º 12
0
    def __init__(
        self,
        data,
        prior_eta=GammaPrior(2., 0.5),
        prior_mu=(0, 3.),
        prior_Sigma=(10, 0.5),
        p=10,
        max_clust_count=300,
        ntemps=3,
        stepping=1.05,
    ):
        assert type(data) is MixedData
        self.data = data
        self.max_clust_count = max_clust_count
        self.p = p
        self.nCat = self.data.nCat
        self.nCol = self.data.nCol
        self.tCol = self.nCol + self.nCat
        self.nDat = self.data.nDat
        self.nCats = self.data.Cats.shape[0]

        # Setting Priors
        _prior_mu = NormalPrior(
            np.ones(self.tCol) * prior_mu[0],
            np.eye(self.tCol) * np.sqrt(prior_mu[1]),
            np.eye(self.tCol) / np.sqrt(prior_mu[1]),
        )
        _prior_Sigma = InvWishartPrior(
            self.tCol + prior_Sigma[0],
            np.eye(self.tCol) * prior_Sigma[1],
        )
        self.priors = Prior(prior_eta, _prior_mu, _prior_Sigma)
        self.set_projection()
        self.categorical_considerations()

        # Parallel Tempering
        self.nTemp = ntemps
        self.itl = 1 / stepping**np.arange(ntemps)
        self.temp_unravel = np.repeat(np.arange(self.nTemp), self.nDat)
        self.nSwap_per = self.nTemp // 2
        self.swap_start = 100

        # Adaptive Metropolis
        self.am_Sigma = PerObsTemperedOnlineCovariance(self.nTemp, self.nDat,
                                                       self.tCol,
                                                       self.max_clust_count)
        self.am_scale = 2.38**2 / self.tCol
        return
Ejemplo n.º 13
0
        'in_outcome_path': './ad/mammography/outcome.csv',
        'out_path': './ad/mammography/results_mdppprgln_test.pkl',
        'cat_vars': '[5,6,7,8]',
        'decluster': 'False',
        'quantile': 0.95,
        'nSamp': 5000,
        'nKeep': 2000,
        'nThin': 3,
        'eta_alpha': 2.,
        'eta_beta': 1.,
    }
    p = Heap(**d)

    raw = read_csv(p.in_data_path).values
    out = read_csv(p.in_outcome_path).values
    data = MixedData(
        raw,
        cat_vars=np.array(eval(p.cat_vars), dtype=int),
        decluster=eval(p.decluster),
        quantile=float(p.quantile),
        outcome=out,
    )
    data.fill_outcome(out)
    model = Chain(data, prior_eta=GammaPrior(2, 1), p=10, ntemps=3)
    model.sample(p.nSamp)
    model.write_to_disk(p.out_path, p.nKeep, p.nThin)
    res = Result(p.out_path)
    # res.write_posterior_predictive('./test/postpred.csv')

# EOF
Ejemplo n.º 14
0
pt.MPI_MESSAGE_SIZE = 2**20

if rank > 0:
    chain = pt.PTSlave(comm=comm, statmodel=DPMPG_Chain)
    chain.watch()

if rank == 0:
    raw = read_csv('./datasets/ivt_nov_mar.csv')
    raw2 = np.hstack((raw.T[-2:].T, raw.T[:-2].T))
    data = Data_From_Raw(raw2, True)
    data.write_empirical('./output/dppgln2/empirical.csv')

    model = pt.PTMaster(comm,
                        temperature_ladder=1.05**np.array(range(size - 1)),
                        data=data,
                        prior_eta=GammaPrior(2., .1))
    model.sample(10000)
    model.write_to_disk('./output/dppgln2/results_2_1e-1.db', 5000, 1)
    model.complete()

    # res = DPMPG_Result('./output/dppgln/results_2_1e-1.db') # not working yet.
    # res.write_posterior_predictive('./output/dppgln/postpred_2_1e-1.csv')

## relevant to non-MPI version
# if __name__ == '__main__':
#     raw  = read_csv('./datasets/ivt_nov_mar.csv')
#     data = Data_From_Raw(raw, True)
#     data.write_empirical('./output/dppgln2/empirical.csv')
#
#     model = pt.PTMaster(
#         statmodel = DPMPG_Chain,
Ejemplo n.º 15
0
from dp_projgamma import DPMPG, ResultDPMPG
from projgamma import GammaPrior
from data import Data_From_Raw
from pandas import read_csv
from random import shuffle


path = './datasets/ivt_nov_mar.csv'


if __name__ == '__main__':
    raw  = read_csv(path)
    # cols = raw.columns.values.tolist()
    # shuffle(cols)
    # raw  = raw.reindex(columns = cols)
    data = Data_From_Raw(raw, True)
    data.write_empirical('./output/dpmpg/empirical.csv')

    dpmpg = DPMPG(
        data,
        prior_eta = GammaPrior(2.,10.)
        )
    dpmpg.initialize_sampler(50000)
    dpmpg.sample(50000)
    dpmpg.write_to_disk('./output/dpmpg/results_2_1e1.db', 25000,5)

    res = ResultDPMPG('./output/dpmpg/results_2_1e1.db')
    res.write_posterior_predictive('./output/dpmpg/postpred_2_1e1.csv')

# EOF
Ejemplo n.º 16
0
            p.out_folder,
            p.model,
            'empirical.csv',
        )
        out_path = os.path.join(
            p.out_folder,
            p.model,
            'results_{}_{}.db'.format(p.eta_shape, p.eta_rate),
        )
        pp_path = os.path.join(
            p.out_folder,
            p.model,
            'postpred_{}_{}.csv'.format(p.eta_shape, p.eta_rate),
        )
        model = Chain(data,
                      prior_eta=GammaPrior(float(p.eta_shape),
                                           float(p.eta_rate)))

    elif p.model.startswith('m'):
        emp_path = os.path.join(
            p.out_folder,
            p.model,
            'empirical.csv',
        )
        out_path = os.path.join(
            p.out_folder,
            p.model,
            'results_{}.db'.format(p.nMix),
        )
        pp_path = os.path.join(
            p.out_folder,
            p.model,
Ejemplo n.º 17
0
        'in_outcome_path': './ad/cover/outcome.csv',
        'out_path': './ad/cover/results_mdppprg.pkl',
        'cat_vars': '[9,10,11,12]',
        'decluster': 'False',
        'quantile': 0.998,
        'nSamp': 50000,
        'nKeep': 20000,
        'nThin': 30,
        'eta_alpha': 2.,
        'eta_beta': 1.,
    }
    p = Heap(**d)

    raw = read_csv(p.in_data_path).values
    out = read_csv(p.in_outcome_path).values
    data = MixedData(
        raw,
        cat_vars=np.array(eval(p.cat_vars), dtype=int),
        decluster=eval(p.decluster),
        quantile=float(p.quantile),
        outcome=out,
    )
    data.fill_outcome(out)
    model = Chain(data, prior_eta=GammaPrior(2, 1), p=10)
    model.sample(p.nSamp)
    model.write_to_disk(p.out_path, p.nKeep, p.nThin)
    res = Result(p.out_path)
    # res.write_posterior_predictive('./test/postpred.csv')

# EOF
Ejemplo n.º 18
0
from dp_rprojgamma import DPMPG, ResultDPMPG
from projgamma import GammaPrior
from data import Data_From_Raw
from pandas import read_csv
from random import shuffle


path = './datasets/ivt_nov_mar.csv'


if __name__ == '__main__':
    raw  = read_csv(path)
    # cols = raw.columns.values.tolist()
    # shuffle(cols)
    # raw  = raw.reindex(columns = cols)
    data = Data_From_Raw(raw, True)
    data.write_empirical('./output/dpmrpg/empirical.csv')

    dpmpg = DPMPG(
        data,
        prior_eta = GammaPrior(2.,1e0)
        )
    dpmpg.initialize_sampler(20000)
    dpmpg.sample(20000)
    dpmpg.write_to_disk('./output/dpmrpg/results_2_1e0.db', 10000,2)

    res = ResultDPMPG('./output/dpmrpg/results_2_1e0.db')
    res.write_posterior_predictive('./output/dpmrpg/postpred_2_1e0.csv')

# EOF
Ejemplo n.º 19
0
    Chain  = models.Chains[p.model]
    Result = models.Results[p.model]

    data = Data(os.path.join(p.in_path, 'data.db'))

    if p.model.startswith('dp'):
        emp_path = os.path.join(p.in_path, p.model, 'empirical.csv')
        out_path = os.path.join(
            p.in_path, p.model, 'results_{}.db'.format(p.p),
            )
        pp_path = os.path.join(
            p.in_path, p.model, 'postpred_{}.csv'.format(p.p),
            )
        model = Chain(
            data, p = float(p.p), prior_eta = GammaPrior(float(p.eta_shape), float(p.eta_rate)),
            )
    else:
        raise ValueError

    data.write_empirical(emp_path)

    model.sample(int(p.nSamp))

    model.write_to_disk(out_path, int(p.nKeep), int(p.nThin))

    res = Result(out_path)

    res.write_posterior_predictive(pp_path)

# EOF
Ejemplo n.º 20
0
from dp_projgamma import DPMPG, ResultDPMPG
from projgamma import GammaPrior
from data import Data_From_Raw
from pandas import read_csv
from random import shuffle


path = './datasets/ivt_nov_mar.csv'


if __name__ == '__main__':
    raw  = read_csv(path)
    # cols = raw.columns.values.tolist()
    # shuffle(cols)
    # raw  = raw.reindex(columns = cols)
    data = Data_From_Raw(raw, True)
    data.write_empirical('./output/dpmpg/empirical.csv')

    dpmpg = DPMPG(
        data,
        prior_eta = GammaPrior(2.,0.1)
        )
    dpmpg.initialize_sampler(50000)
    dpmpg.sample(50000)
    dpmpg.write_to_disk('./output/dpmpg/results_2_1e-1.db', 25000,5)

    res = ResultDPMPG('./output/dpmpg/results_2_1e-1.db')
    res.write_posterior_predictive('./output/dpmpg/postpred_2_1e-1.csv')

# EOF
Ejemplo n.º 21
0
if rank > 0:
    chain = pt.PTSlave(comm=comm, statmodel=DPMPG_Chain)
    chain.watch()

if rank == 0:
    raw = read_csv('./datasets/ivt_nov_mar.csv')
    data = Data_From_Raw(raw, True)
    data.write_empirical('./output/dppgln/empirical.csv')

    model = pt.PTMaster(
        comm,
        # statmodel = DPMPG_Chain,
        temperature_ladder=1.05**np.array(range(size - 1)),
        data=data,
        prior_eta=GammaPrior(2., 1.))
    model.sample(50000)
    model.write_to_disk('./output/dppgln/results_2_1e0.db', 25000, 5)
    model.complete()

    res = DPMPG_Result('./output/dppgln/results_2_1e0.db')
    res.write_posterior_predictive('./output/dppgln/postpred_2_1e0.csv')

#
# if __name__ == '__main__':
#     raw  = read_csv('./datasets/ivt_nov_mar.csv')
#     data = Data_From_Raw(raw, True)
#     data.write_empirical('./output/dpmpg_empirical_decluster.csv')
#
#     model = pt.PTMaster(
#         statmodel = DPMPG_Chain,
Ejemplo n.º 22
0
from dp_projgamma import DPMPG, ResultDPMPG
from projgamma import GammaPrior
from data import Data_From_Raw
from pandas import read_csv
from random import shuffle

path = './datasets/ivt_nov_mar.csv'

if __name__ == '__main__':
    raw = read_csv(path)
    # cols = raw.columns.values.tolist()
    # shuffle(cols)
    # raw  = raw.reindex(columns = cols)
    data = Data_From_Raw(raw, True)
    data.write_empirical('./output/dpmpg/empirical.csv')

    dpmpg = DPMPG(data, prior_eta=GammaPrior(2., 1.))
    dpmpg.initialize_sampler(50000)
    dpmpg.sample(50000)
    dpmpg.write_to_disk('./output/dpmpg/results_2_1e0.db', 25000, 5)

    res = ResultDPMPG('./output/dpmpg/results_2_1e0.db')
    res.write_posterior_predictive('./output/dpmpg/postpred_2_1e0.csv')

# EOF