import sys
from simplex import *
from data import Data_From_Raw
from pandas import read_csv

path = './datasets/ivt_nov_mar.csv'
# cols = [int(x) for x in sys.argv[1:]]

col_idx = [int(x) for x in sys.argv[1:]]

for x in sys.argv[1:]:
    print(x)

if True:
    emp_path = './output/fmix_3d/empirical_{}_{}_{}.csv'.format(*col_idx)
    res_path = './output/fmix_3d/results_{}_{}_{}.db'.format(*col_idx)
    out_path = './output/fmix_3d/postpred_{}_{}_{}.csv'.format(*col_idx)

    raw = read_csv(path).iloc[:,col_idx]
    data = Data_From_Raw(raw, True)
    data.write_empirical(emp_path.format(*col_idx))

    fmix = FMIX_Chain(data, 10, GammaPrior(0.1,0.1), DirichletPrior(1.))
    fmix.sample(50000)
    fmix.write_to_disk(res_path, 25000, 5)
    res = FMIX_Result(res_path)
    res.write_posterior_predictive(out_path)

# EOF
Beispiel #2
0
from dp_projgamma import DPMPG, ResultDPMPG
from data import Data_From_Raw
from pandas import read_csv
from random import shuffle

path = './datasets/ivt_nov_mar.csv'

if __name__ == '__main__':
    raw = read_csv(path)
    # cols = raw.columns.values.tolist()
    # shuffle(cols)
    # raw  = raw.reindex(columns = cols)
    data = Data_From_Raw(raw, True)
    data.write_empirical('./output/dpmpg_empirical_decluster.csv')

    dpmpg = DPMPG(data)
    dpmpg.initialize_sampler(10000)
    dpmpg.sample(10000)
    dpmpg.write_to_disk('./output/dpmpg_results_decluster.db', 5000, 1)

    res = ResultDPMPG('./output/dpmpg_results_decluster.db')
    res.write_posterior_predictive('./output/dpmpg_postpred_decluster.csv')

# EOF
Beispiel #3
0
from simplex import *
from data import Data_From_Raw
from pandas import read_csv

path = './datasets/ivt_nov_mar.csv'

if __name__ == '__main__':
    raw = read_csv(path)
    data = Data_From_Raw(raw, True)
    data.write_empirical('./output/dpmix/empirical.csv')

    model = DPSimplex_Chain(data, GammaPrior(
        0.5,
        0.5,
    ), GammaPrior(
        2.,
        2.,
    ), GammaPrior(2., 1e-1))
    model.sample(20000)
    model.write_to_disk('./output/dpmix/results_2_1e-1.db', 10000, 2)
    res = DPSimplex_Result('./output/dpmix/results_2_1e-1.db')
    res.write_posterior_predictive('./output/dpmix/postpred_2_1e-1.csv')

# EOF
Beispiel #4
0
 def instantiate_data(self, path, decluster=True):
     """ path: raw data path """
     raw = pd.read_csv(path)
     self.data = Data_From_Raw(raw, decluster)
     return
from data import Data_From_Raw
from pandas import read_csv
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
pt.MPI_MESSAGE_SIZE = 2**20

if rank > 0:
    chain = pt.PTSlave(comm=comm, statmodel=DPMPG_Chain)
    chain.watch()

if rank == 0:
    raw = read_csv('./datasets/ivt_nov_mar.csv')
    raw2 = np.hstack((raw.T[-2:].T, raw.T[:-2].T))
    data = Data_From_Raw(raw2, True)
    data.write_empirical('./output/dppgln2/empirical.csv')

    model = pt.PTMaster(comm,
                        temperature_ladder=1.05**np.array(range(size - 1)),
                        data=data,
                        prior_eta=GammaPrior(2., .1))
    model.sample(10000)
    model.write_to_disk('./output/dppgln2/results_2_1e-1.db', 5000, 1)
    model.complete()

    # res = DPMPG_Result('./output/dppgln/results_2_1e-1.db') # not working yet.
    # res.write_posterior_predictive('./output/dppgln/postpred_2_1e-1.csv')

## relevant to non-MPI version
# if __name__ == '__main__':
Beispiel #6
0
                )
            except:
                data = MixedData(
                    raw,
                    eval(p.cats),
                    decluster=eval(p.decluster),
                    quantile=float(p.quantile),
                )
    else:
        if eval(p.sphere):
            data = Data_From_Sphere(raw)
        else:
            try:
                data = Data_From_Raw(
                    raw,
                    decluster=eval(p.decluster),
                    quantile=float(p.quantile),
                )
            except:
                data = Data_From_Raw(
                    raw,
                    decluster=eval(p.decluster),
                    quantile=float(p.quantile),
                )

    ## If there's a supplied outcome, initialize it
    if os.path.exists(p.outcome):
        outcome = read_csv(p.outcome).values
        data.fill_outcome(outcome)

    ## Initialize Chain
 def load_raw(self, path):
     raw = pd.read_csv(path)
     self.data = Data_From_Raw(raw, True)
     return
Beispiel #8
0
size = comm.Get_size()

pt.MPI_MESSAGE_SIZE = 2**24
args = argparser()

Chain = models.Chains[args.model]
Result = models.Results[args.model]

if rank > 0:
    chain = pt.PTSlave(comm=comm, statmodel=Chain)
    chain.watch()

if rank == 0:
    raw = read_csv(args.in_path)
    data = Data_From_Raw(raw,
                         decluster=eval(args.decluster),
                         quantile=float(args.quantile))

    if args.model.startswith('dp'):
        emp_path = os.path.join(
            args.out_folder,
            args.model,
            'empirical.csv',
        )
        out_path = os.path.join(
            args.out_folder,
            args.model,
            'results_{}_{}.db'.format(args.eta_shape, args.eta_rate),
        )
        pp_path = os.path.join(
            args.out_folder,
Beispiel #9
0
from gendirichlet import *
from data import Data_From_Raw
from pandas import read_csv

path = './datasets/ivt_nov_mar.csv'

if __name__ == '__main__':
    raw = read_csv(path)
    data = Data_From_Raw(raw, True)
    model = MGD_Chain(
             data,
             10,
            DirichletPrior(0.5),
            GammaPrior(0.5, 0.5),
            GammaPrior(2.0, 2.0),
            GammaPrior(0.5, 0.5),
            GammaPrior(2.0, 2.0),
            )
    model.sample(20000)
    model.write_to_disk('./output/mgd/results_test.db', 10000, 2)
    res = MGD_Result('./output/mgd/results_test.db')
    res.write_posterior_predictive('./output/mgd/postpred_test.csv')
    # model = DPPG_Chain(
    #         data,
    #         GammaPrior(2., 0.5),
    #         )
    # model.sample(20000)
    # model.write_to_disk('./output/dpmpg/results_test.db', 10000, 2)
    # res = DPPG_Result('./output/dpmpg/results_test.db')
    # res.write_posterior_predictive('./output/dpmpg/postpred_test.csv')
Beispiel #10
0
            sigmas[np.where(sigmas.T[0] == i)[0], 1:]
            for i in range(self.nSamp)
        ]
        self.samples.mu = mus
        self.samples.Sigma = Sigmas.reshape(self.nSamp, self.nCol, self.nCol)
        self.samples.r = rs
        return

    def __init__(self, path):
        self.load_data(path)
        return


if __name__ == '__main__':
    from data import Data_From_Raw
    from projgamma import GammaPrior
    from pandas import read_csv
    import os
    import time

    raw = read_csv('./datasets/ivt_nov_mar.csv')
    data = Data_From_Raw(raw, decluster=True, quantile=0.95)
    data.write_empirical('./test/empirical.csv')
    model = Chain(data, prior_eta=GammaPrior(2, 1), p=10)
    model.sample(10000)
    model.write_to_disk('./test/results.pkl', 5000, 5)
    res = Result('./test/results.pkl')
    res.write_posterior_predictive('./test/postpred.csv')

# EOF
Beispiel #11
0
 def instantiate_data(self, path, quantile=0.95, decluster=True):
     """ path: raw data path """
     raw = pd.read_csv(path)
     self.data = Data_From_Raw(raw, decluster=decluster, quantile=quantile)
     self.postpred = self.generate_posterior_predictive_hypercube(10)
     return
Beispiel #12
0
        self.samples.alpha = alphas
        self.samples.beta = betas
        self.samples.zeta = [
            zetas[np.where(zetas.T[0] == i)[0], 1:] for i in range(self.nSamp)
        ]
        self.samples.r = rs
        return

    def __init__(self, path):
        self.load_data(path)
        return


if __name__ == '__main__':
    pass

    from data import Data_From_Raw
    from projgamma import GammaPrior
    from pandas import read_csv
    import os

    raw = read_csv('./datasets/ivt_nov_mar.csv')
    data = Data_From_Raw(raw, decluster=True, quantile=0.95)
    model = Chain(data, prior_eta=GammaPrior(2, 1), p=10)
    model.sample(5000)
    model.write_to_disk('./test/results.pickle', 3000, 2)
    res = Result('./test/results.pickle')
    # res.write_posterior_predictive('./test/postpred.csv')

# EOF