def fit(self):
        p = self._demonstration_sizes[0][1]

        Nmax = self._demonstration_sizes[0][0]
        affine = True
        nlags = self.lag
        obs_distns = [
            di.AutoRegression(nu_0=self.nu,
                              S_0=np.eye(p),
                              M_0=np.zeros((p, 2 * p + affine)),
                              K_0=np.eye(2 * p + affine),
                              affine=affine) for state in range(Nmax)
        ]

        dur_distns = [
            NegativeBinomialIntegerR2Duration(r_discrete_distn=np.ones(10.),
                                              alpha_0=1.,
                                              beta_0=1.)
            for state in range(Nmax)
        ]

        model = m.ARWeakLimitHDPHSMMIntNegBin(
            alpha=self.alpha,
            gamma=self.gamma,
            init_state_concentration=self.init_state_concentration,
            obs_distns=obs_distns,
            dur_distns=dur_distns,
        )

        for d in self._demonstrations:
            model.add_data(d, trunc=60)

        #model.resample_model()

        for itr in progprint_xrange(20):
            model.resample_model()

        new_segments = []
        for i in range(0, len(self._demonstrations)):
            #print model.states_list[i].stateseq
            new_segments.append(
                self.findTransitions(model.states_list[i].stateseq))

        self.segmentation = new_segments
        self.model = model
from pyhsmm.util.stats import whiten, cov

import autoregressive.models as m
import autoregressive.distributions as d

###################
#  generate data  #
###################

As = [0.99*np.hstack((-np.eye(2),2*np.eye(2))),
    0.99*np.array([[np.cos(np.pi/6),-np.sin(np.pi/6)],[np.sin(np.pi/6),np.cos(np.pi/6)]]).dot(np.hstack((-np.eye(2),np.eye(2)))) + np.hstack((np.zeros((2,2)),np.eye(2))),
    0.99*np.array([[np.cos(-np.pi/6),-np.sin(-np.pi/6)],[np.sin(-np.pi/6),np.cos(-np.pi/6)]]).dot(np.hstack((-np.eye(2),np.eye(2)))) + np.hstack((np.zeros((2,2)),np.eye(2)))]

truemodel = m.ARHSMM(
        alpha=2.,init_state_distn='uniform',
        obs_distns=[d.AutoRegression(A=A,sigma=np.eye(2)) for A in As],
        dur_distns=[pyhsmm.basic.distributions.PoissonDuration(alpha_0=3*50,beta_0=3)
            for state in range(len(As))],
        )

data, labels = truemodel.generate(1000)
data += np.random.normal(size=data.shape) # some extra noise

fig, spa = plt.subplots(2,1)
spa[0].plot(data[:,0],data[:,1],'bx-')
spa[1].plot(data,'bx-')
spa[1].set_xlim(0,data.shape[0])
fig.suptitle('data')

truemodel.plot()
plt.gcf().suptitle('truth')
Exemple #3
0
### model

Nmax = 20
affine = True
nlags = 1

model = models.FastARWeakLimitStickyHDPHMM(
    alpha=10.,
    gamma=10.,
    kappa=1e4,
    init_state_distn='uniform',
    obs_distns=[
        distributions.AutoRegression(nu_0=ndim + 1,
                                     S_0=np.eye(ndim),
                                     M_0=np.zeros(
                                         (ndim, ndim * nlags + affine)),
                                     K_0=np.eye(ndim * nlags + affine),
                                     affine=affine) for state in range(Nmax)
    ],
)

model.add_data(data)

### inference

# for itr in progprint_xrange(500):
#     model.resample_model()

# model.plot_stateseq(model.states_list[0], plot_slice=slice(6000,8000))
# plt.show()
Exemple #4
0
from pyhsmm.basic.distributions import NegativeBinomialIntegerR2Duration

import autoregressive.models as m
import autoregressive.distributions as d

###################
#  generate data  #
###################

As = [np.hstack((-np.eye(2),2*np.eye(2))),
        np.array([[np.cos(np.pi/6),-np.sin(np.pi/6)],[np.sin(np.pi/6),np.cos(np.pi/6)]]).dot(np.hstack((-np.eye(2),np.eye(2)))) + np.hstack((np.zeros((2,2)),np.eye(2))),
        np.array([[np.cos(-np.pi/6),-np.sin(-np.pi/6)],[np.sin(-np.pi/6),np.cos(-np.pi/6)]]).dot(np.hstack((-np.eye(2),np.eye(2)))) + np.hstack((np.zeros((2,2)),np.eye(2)))]

truemodel = m.ARHSMM(
        alpha=4.,init_state_concentration=4.,
        obs_distns=[d.AutoRegression(A=A,sigma=0.1*np.eye(2)) for A in As],
        dur_distns=[pyhsmm.basic.distributions.PoissonDuration(alpha_0=4*25,beta_0=4)
            for state in range(len(As))],
        )

data = truemodel.rvs(1000)

plt.figure()
plt.plot(data[:,0],data[:,1],'bx-')

##################
#  create model  #
##################

Nmax = 20
affine = True
Exemple #5
0
def fit_ar_pyhsmm_models(
    train_datas,
    val_datas,
    test_datas,
    K=2,
    N_iters=2000,
    seed=1,
    lags=1,
    affine=True,
    alpha=10,
    gamma=10,
    kappa=100,
    init_state_distn="uniform",
    observations="ar",
):
    """
    Fit datasets for multiple values
    """
    npr.seed(seed)
    assert (len(train_datas) > 0) and (type(train_datas) is list)
    assert (len(val_datas) > 0) and (type(val_datas) is list)
    assert (len(test_datas) > 0) and (type(test_datas) is list)

    # Standard AR model (Scale resampling)
    D_obs = train_datas[0].shape[1]

    def evalute_model(model):
        # train log log_likelihood
        train_ll = model.log_likelihood()

        # validation log log_likelihood
        val_pll = 0
        for data in val_datas:
            val_pll += model.log_likelihood(data)

        # Test log log_likelihood
        test_pll = 0
        for data in test_datas:
            test_pll += model.log_likelihood(data)

        return train_ll, val_pll, test_pll

    # Construct a standard AR-HMM
    obs_hypers = dict(
        nu_0=D_obs + 2,
        S_0=np.eye(D_obs),
        M_0=np.hstack(
            (np.eye(D_obs), np.zeros((D_obs, D_obs * (lags - 1) + affine)))),
        K_0=np.eye(D_obs * lags + affine),
        affine=affine,
    )

    obs_hypers = get_empirical_ar_params(train_datas, obs_hypers)
    obs_distns = [
        ardistributions.AutoRegression(**obs_hypers) for _ in range(K)
    ]

    # ----------------
    # Init Model Param
    # ----------------
    model = armodels.ARWeakLimitStickyHDPHMM(
        # sampled from 1d finite pmf
        alpha=alpha,
        gamma=gamma,
        init_state_distn=init_state_distn,
        # create A, Sigma
        obs_distns=obs_distns,
        kappa=kappa,
    )

    # ----------------
    # Add datasets
    # ----------------

    for data in train_datas:
        model.add_data(data)

    # ---------------------
    # Initialize the states
    # ---------------------
    model.resample_states()

    # ------------------------------
    #  Initialize log log_likelihood
    # ------------------------------
    init_val = evalute_model(model)

    # -----------------------
    # Fit with Gibbs sampling
    # -----------------------
    def sample(model):
        tic = time.time()
        model.resample_model()
        timestep = time.time() - tic
        return evalute_model(model), timestep

    # ----------------------
    # Run for each iteration
    # ----------------------

    # values at each timestep
    vals, timesteps = zip(*[sample(model) for _ in trange(N_iters)])

    lls_train, lls_val, lls_test = \
            zip(*((init_val,) + vals))

    timestamps = np.cumsum((0., ) + timesteps)

    # calculate the states after N_iters
    z = [mm.stateseq for mm in model.states_list]

    return model, lls_train, lls_val, lls_test, timestamps, z
Exemple #6
0
def fit_ar_separate_trans_pyhsmm_models(
    train_datas,
    val_datas,
    test_datas,
    K=2,
    N_iters=2000,
    seed=1,
    lags=1,
    affine=True,
    alpha=10,
    gamma=10,
    kappa=100,
    init_state_distn="uniform",
    observations="ar",
):
    """
    Fit model using separate transition matrices per
    element in dictionary.
    """
    npr.seed(seed)
    assert type(train_datas) is defaultdict

    datas_all = []
    for _, datalist in train_datas.items():
        print(len(datalist))
        datas_all.extend(datalist)

    print("Running for {} data chunks".format(len(datas_all)))
    # Standard AR model (Scale resampling)
    D_obs = datas_all[0].shape[1]

    def evalute_model(model):
        # train log log_likelihood
        ll = model.log_likelihood()

        # validation log log_likelihood
        val_pll = 0
        for data_id, data in val_datas.items():
            val_pll += model.log_likelihood(group_id=data_id, data=data)

        # Test log log_likelihood
        test_pll = 0
        for data_id, data in test_datas.items():
            test_pll += model.log_likelihood(group_id=data_id, data=data)

        return ll, val_pll, test_pll

    # Construct a standard AR-HMM
    obs_hypers = dict(
        nu_0=D_obs + 2,
        S_0=np.eye(D_obs),
        M_0=np.hstack(
            (np.eye(D_obs), np.zeros((D_obs, D_obs * (lags - 1) + affine)))),
        K_0=np.eye(D_obs * lags + affine),
        affine=affine,
    )

    obs_hypers = get_empirical_ar_params(datas_all, obs_hypers)
    obs_distns = [
        ardistributions.AutoRegression(**obs_hypers) for _ in range(K)
    ]

    # free space
    del datas_all

    # Init Model Param
    model = armodels.ARWeakLimitStickyHDPHMMSeparateTrans(
        # sampled from 1d finite pmf
        alpha=alpha,
        gamma=gamma,
        init_state_distn=init_state_distn,
        # create A, Sigma
        obs_distns=obs_distns,
        kappa=kappa,
    )

    # free space for very large datasets
    del obs_distns

    # --------------
    # Add datasets
    # --------------
    for group_id, datalist in train_datas.items():
        for data in datalist:
            model.add_data(group_id=group_id, data=data)

    # free space for very large datasets
    del train_datas

    # ---------------------
    # Initialize the states
    # ---------------------
    model.resample_states()

    # ------------------------------
    #  Initialize log log_likelihood
    # ------------------------------
    init_val = evalute_model(model)

    # -----------------------
    # Fit with Gibbs sampling
    # -----------------------
    def sample(model):
        tic = time.time()
        # resample model
        model.resample_model()
        timestep = time.time() - tic
        return evalute_model(model), timestep

    # ----------------------
    # Run for each iteration
    # ----------------------

    # values at each timestep
    vals, timesteps = zip(*[sample(model) for _ in trange(N_iters)])
    lls_train, lls_val, lls_test = \
            zip(*((init_val,) + vals))

    timestamps = np.cumsum((0., ) + timesteps)

    # calculate the states after N_iters
    z = [mm.stateseq for mm in model.states_list]

    return model, lls_train, lls_val, lls_test, timestamps, z
As = [
    0.99 * np.hstack((-np.eye(2), 2 * np.eye(2))),
    np.array([[np.cos(np.pi / 6), -np.sin(np.pi / 6)],
              [np.sin(np.pi / 6), np.cos(np.pi / 6)]]).dot(
                  np.hstack((-np.eye(2), np.eye(2)))) + np.hstack((np.zeros(
                      (2, 2)), np.eye(2))),
    np.array([[np.cos(-np.pi / 6), -np.sin(-np.pi / 6)],
              [np.sin(-np.pi / 6), np.cos(-np.pi / 6)]]).dot(
                  np.hstack((-np.eye(2), np.eye(2)))) + np.hstack((np.zeros(
                      (2, 2)), np.eye(2)))
]

truemodel = m.ARHSMM(
    alpha=4.,
    init_state_concentration=4.,
    obs_distns=[d.AutoRegression(A=A, sigma=np.eye(2)) for A in As],
    dur_distns=[
        pyhsmm.basic.distributions.PoissonDuration(alpha_0=4 * 25, beta_0=4)
        for state in range(len(As))
    ],
)

datas = []
labels = []
for t in range(0, T, 500):
    data, label = truemodel.generate(500, keep=True)
    datas.append(data)
    labels.append(label)

plt.figure()
plt.plot(data[:, 0], data[:, 1], 'bx-')
def make_joint_models(train_datas, Nmax=10):
    # Define a sequence of models
    if isinstance(train_datas, list) and len(train_datas) > 0:
        data = train_datas
        num_worms = len(train_datas)
    else:
        data = [train_datas]
        num_worms = 1
    print('Making models')
    names_list = []
    fnames_list = []
    hmm_list = []
    color_list = []
    method_list = []
    # Standard AR model (Scale resampling)
    D_obs = data[0].shape[1]
    print('D_obs shape {}'.format(data[0].shape[1]))
    affine = True
    nlags = 1
    init_state_distn = 'uniform'

    # Construct a standard AR-HMM for fitting
    # with just one worm
    obs_hypers = dict(nu_0=D_obs + 2,
                      S_0=np.eye(D_obs),
                      M_0=np.hstack(
                          (np.eye(D_obs),
                           np.zeros((D_obs, D_obs * (nlags - 1) + affine)))),
                      K_0=np.eye(D_obs * nlags + affine),
                      affine=affine)

    # Joint model - fitting all worm at a time
    # Fit range of parameters for each state
    state_array = [1, 2, 4, 6, 8, 10, 12, 15]
    alpha_array = [10.0]
    gamma_array = [10.0]
    kappa_array = 10**np.arange(2, 11)[::2]

    # Vary the hyperparameters of the scale resampling model
    for num_states, alpha_a_0, gamma_a_0, kappa_a_0 in itertools.product(
            state_array, alpha_array, gamma_array, kappa_array):
        # using data of all worms
        obs_hypers = get_empirical_ar_params(data, obs_hypers)
        obs_distns = [
            d.AutoRegression(**obs_hypers) for state in range(num_states)
        ]
        names_list.append("AR-HMM (Scale)")
        fnames_list.append(
            "ar_scale_wormall_states%.1f_alpha%.1f_gamma%.1f_kappa%.1f" %
            (num_states, alpha_a_0, gamma_a_0, kappa_a_0))
        color_list.append(allcolors[1])
        # Init Model Param
        hmm = m.ARWeakLimitStickyHDPHMM(
            # sampled from 1d finite pmf
            alpha=alpha_a_0,
            gamma=gamma_a_0,
            init_state_distn=init_state_distn,
            # create A, Sigma
            obs_distns=obs_distns,
            kappa=kappa_a_0  # kappa
        )
        # Add data of each worm
        for cworm in np.arange(num_worms):
            hmm.add_data(data[cworm])
        # Append model  and store
        hmm_list.append(hmm)
        method_list.append(fit)
    print('Finished making baseline models')
    return names_list, fnames_list, color_list, hmm_list, method_list