예제 #1
0
def fit_arhmm(x, affine=True):
    print("Fitting Sticky ARHMM")
    dynamics_hypparams = \
        dict(nu_0=D_latent + 2,
             S_0=np.eye(D_latent),
             M_0=np.hstack((np.eye(D_latent), np.zeros((D_latent, int(affine))))),
             K_0=np.eye(D_latent + affine),
             affine=affine)
    dynamics_hypparams = get_empirical_ar_params([x], dynamics_hypparams)

    dynamics_distns = [
        AutoRegression(
            A=np.column_stack((0.99 * np.eye(D_latent),
                               np.zeros((D_latent, int(affine))))),
            sigma=np.eye(D_latent),
            **dynamics_hypparams)
        for _ in range(args.K)]

    init_distn = Gaussian(nu_0=D_latent + 2,
                          sigma_0=np.eye(D_latent),
                          mu_0=np.zeros(D_latent),
                          kappa_0=1.0)

    arhmm = ARWeakLimitStickyHDPHMM(
        init_state_distn='uniform',
        init_emission_distn=init_distn,
        obs_distns=dynamics_distns,
        alpha=3.0, kappa=10.0, gamma=3.0)

    arhmm.add_data(x)

    lps = []
    for _ in tqdm(range(args.N_samples)):
        arhmm.resample_model()
        lps.append(arhmm.log_likelihood())

    z_init = arhmm.states_list[0].stateseq
    z_init = np.concatenate(([0], z_init))

    return arhmm, z_init
예제 #2
0
파일: models.py 프로젝트: ekellbuch/behdata
def fit_ar_pyhsmm_models(
    train_datas,
    val_datas,
    test_datas,
    K=2,
    N_iters=2000,
    seed=1,
    lags=1,
    affine=True,
    alpha=10,
    gamma=10,
    kappa=100,
    init_state_distn="uniform",
    observations="ar",
):
    """
    Fit datasets for multiple values
    """
    npr.seed(seed)
    assert (len(train_datas) > 0) and (type(train_datas) is list)
    assert (len(val_datas) > 0) and (type(val_datas) is list)
    assert (len(test_datas) > 0) and (type(test_datas) is list)

    # Standard AR model (Scale resampling)
    D_obs = train_datas[0].shape[1]

    def evalute_model(model):
        # train log log_likelihood
        train_ll = model.log_likelihood()

        # validation log log_likelihood
        val_pll = 0
        for data in val_datas:
            val_pll += model.log_likelihood(data)

        # Test log log_likelihood
        test_pll = 0
        for data in test_datas:
            test_pll += model.log_likelihood(data)

        return train_ll, val_pll, test_pll

    # Construct a standard AR-HMM
    obs_hypers = dict(
        nu_0=D_obs + 2,
        S_0=np.eye(D_obs),
        M_0=np.hstack(
            (np.eye(D_obs), np.zeros((D_obs, D_obs * (lags - 1) + affine)))),
        K_0=np.eye(D_obs * lags + affine),
        affine=affine,
    )

    obs_hypers = get_empirical_ar_params(train_datas, obs_hypers)
    obs_distns = [
        ardistributions.AutoRegression(**obs_hypers) for _ in range(K)
    ]

    # ----------------
    # Init Model Param
    # ----------------
    model = armodels.ARWeakLimitStickyHDPHMM(
        # sampled from 1d finite pmf
        alpha=alpha,
        gamma=gamma,
        init_state_distn=init_state_distn,
        # create A, Sigma
        obs_distns=obs_distns,
        kappa=kappa,
    )

    # ----------------
    # Add datasets
    # ----------------

    for data in train_datas:
        model.add_data(data)

    # ---------------------
    # Initialize the states
    # ---------------------
    model.resample_states()

    # ------------------------------
    #  Initialize log log_likelihood
    # ------------------------------
    init_val = evalute_model(model)

    # -----------------------
    # Fit with Gibbs sampling
    # -----------------------
    def sample(model):
        tic = time.time()
        model.resample_model()
        timestep = time.time() - tic
        return evalute_model(model), timestep

    # ----------------------
    # Run for each iteration
    # ----------------------

    # values at each timestep
    vals, timesteps = zip(*[sample(model) for _ in trange(N_iters)])

    lls_train, lls_val, lls_test = \
            zip(*((init_val,) + vals))

    timestamps = np.cumsum((0., ) + timesteps)

    # calculate the states after N_iters
    z = [mm.stateseq for mm in model.states_list]

    return model, lls_train, lls_val, lls_test, timestamps, z
예제 #3
0
파일: models.py 프로젝트: ekellbuch/behdata
def fit_ar_separate_trans_pyhsmm_models(
    train_datas,
    val_datas,
    test_datas,
    K=2,
    N_iters=2000,
    seed=1,
    lags=1,
    affine=True,
    alpha=10,
    gamma=10,
    kappa=100,
    init_state_distn="uniform",
    observations="ar",
):
    """
    Fit model using separate transition matrices per
    element in dictionary.
    """
    npr.seed(seed)
    assert type(train_datas) is defaultdict

    datas_all = []
    for _, datalist in train_datas.items():
        print(len(datalist))
        datas_all.extend(datalist)

    print("Running for {} data chunks".format(len(datas_all)))
    # Standard AR model (Scale resampling)
    D_obs = datas_all[0].shape[1]

    def evalute_model(model):
        # train log log_likelihood
        ll = model.log_likelihood()

        # validation log log_likelihood
        val_pll = 0
        for data_id, data in val_datas.items():
            val_pll += model.log_likelihood(group_id=data_id, data=data)

        # Test log log_likelihood
        test_pll = 0
        for data_id, data in test_datas.items():
            test_pll += model.log_likelihood(group_id=data_id, data=data)

        return ll, val_pll, test_pll

    # Construct a standard AR-HMM
    obs_hypers = dict(
        nu_0=D_obs + 2,
        S_0=np.eye(D_obs),
        M_0=np.hstack(
            (np.eye(D_obs), np.zeros((D_obs, D_obs * (lags - 1) + affine)))),
        K_0=np.eye(D_obs * lags + affine),
        affine=affine,
    )

    obs_hypers = get_empirical_ar_params(datas_all, obs_hypers)
    obs_distns = [
        ardistributions.AutoRegression(**obs_hypers) for _ in range(K)
    ]

    # free space
    del datas_all

    # Init Model Param
    model = armodels.ARWeakLimitStickyHDPHMMSeparateTrans(
        # sampled from 1d finite pmf
        alpha=alpha,
        gamma=gamma,
        init_state_distn=init_state_distn,
        # create A, Sigma
        obs_distns=obs_distns,
        kappa=kappa,
    )

    # free space for very large datasets
    del obs_distns

    # --------------
    # Add datasets
    # --------------
    for group_id, datalist in train_datas.items():
        for data in datalist:
            model.add_data(group_id=group_id, data=data)

    # free space for very large datasets
    del train_datas

    # ---------------------
    # Initialize the states
    # ---------------------
    model.resample_states()

    # ------------------------------
    #  Initialize log log_likelihood
    # ------------------------------
    init_val = evalute_model(model)

    # -----------------------
    # Fit with Gibbs sampling
    # -----------------------
    def sample(model):
        tic = time.time()
        # resample model
        model.resample_model()
        timestep = time.time() - tic
        return evalute_model(model), timestep

    # ----------------------
    # Run for each iteration
    # ----------------------

    # values at each timestep
    vals, timesteps = zip(*[sample(model) for _ in trange(N_iters)])
    lls_train, lls_val, lls_test = \
            zip(*((init_val,) + vals))

    timestamps = np.cumsum((0., ) + timesteps)

    # calculate the states after N_iters
    z = [mm.stateseq for mm in model.states_list]

    return model, lls_train, lls_val, lls_test, timestamps, z