def fit(self): p = self._demonstration_sizes[0][1] Nmax = self._demonstration_sizes[0][0] affine = True nlags = self.lag obs_distns = [ di.AutoRegression(nu_0=self.nu, S_0=np.eye(p), M_0=np.zeros((p, 2 * p + affine)), K_0=np.eye(2 * p + affine), affine=affine) for state in range(Nmax) ] dur_distns = [ NegativeBinomialIntegerR2Duration(r_discrete_distn=np.ones(10.), alpha_0=1., beta_0=1.) for state in range(Nmax) ] model = m.ARWeakLimitHDPHSMMIntNegBin( alpha=self.alpha, gamma=self.gamma, init_state_concentration=self.init_state_concentration, obs_distns=obs_distns, dur_distns=dur_distns, ) for d in self._demonstrations: model.add_data(d, trunc=60) #model.resample_model() for itr in progprint_xrange(20): model.resample_model() new_segments = [] for i in range(0, len(self._demonstrations)): #print model.states_list[i].stateseq new_segments.append( self.findTransitions(model.states_list[i].stateseq)) self.segmentation = new_segments self.model = model
from pyhsmm.util.stats import whiten, cov import autoregressive.models as m import autoregressive.distributions as d ################### # generate data # ################### As = [0.99*np.hstack((-np.eye(2),2*np.eye(2))), 0.99*np.array([[np.cos(np.pi/6),-np.sin(np.pi/6)],[np.sin(np.pi/6),np.cos(np.pi/6)]]).dot(np.hstack((-np.eye(2),np.eye(2)))) + np.hstack((np.zeros((2,2)),np.eye(2))), 0.99*np.array([[np.cos(-np.pi/6),-np.sin(-np.pi/6)],[np.sin(-np.pi/6),np.cos(-np.pi/6)]]).dot(np.hstack((-np.eye(2),np.eye(2)))) + np.hstack((np.zeros((2,2)),np.eye(2)))] truemodel = m.ARHSMM( alpha=2.,init_state_distn='uniform', obs_distns=[d.AutoRegression(A=A,sigma=np.eye(2)) for A in As], dur_distns=[pyhsmm.basic.distributions.PoissonDuration(alpha_0=3*50,beta_0=3) for state in range(len(As))], ) data, labels = truemodel.generate(1000) data += np.random.normal(size=data.shape) # some extra noise fig, spa = plt.subplots(2,1) spa[0].plot(data[:,0],data[:,1],'bx-') spa[1].plot(data,'bx-') spa[1].set_xlim(0,data.shape[0]) fig.suptitle('data') truemodel.plot() plt.gcf().suptitle('truth')
### model Nmax = 20 affine = True nlags = 1 model = models.FastARWeakLimitStickyHDPHMM( alpha=10., gamma=10., kappa=1e4, init_state_distn='uniform', obs_distns=[ distributions.AutoRegression(nu_0=ndim + 1, S_0=np.eye(ndim), M_0=np.zeros( (ndim, ndim * nlags + affine)), K_0=np.eye(ndim * nlags + affine), affine=affine) for state in range(Nmax) ], ) model.add_data(data) ### inference # for itr in progprint_xrange(500): # model.resample_model() # model.plot_stateseq(model.states_list[0], plot_slice=slice(6000,8000)) # plt.show()
from pyhsmm.basic.distributions import NegativeBinomialIntegerR2Duration import autoregressive.models as m import autoregressive.distributions as d ################### # generate data # ################### As = [np.hstack((-np.eye(2),2*np.eye(2))), np.array([[np.cos(np.pi/6),-np.sin(np.pi/6)],[np.sin(np.pi/6),np.cos(np.pi/6)]]).dot(np.hstack((-np.eye(2),np.eye(2)))) + np.hstack((np.zeros((2,2)),np.eye(2))), np.array([[np.cos(-np.pi/6),-np.sin(-np.pi/6)],[np.sin(-np.pi/6),np.cos(-np.pi/6)]]).dot(np.hstack((-np.eye(2),np.eye(2)))) + np.hstack((np.zeros((2,2)),np.eye(2)))] truemodel = m.ARHSMM( alpha=4.,init_state_concentration=4., obs_distns=[d.AutoRegression(A=A,sigma=0.1*np.eye(2)) for A in As], dur_distns=[pyhsmm.basic.distributions.PoissonDuration(alpha_0=4*25,beta_0=4) for state in range(len(As))], ) data = truemodel.rvs(1000) plt.figure() plt.plot(data[:,0],data[:,1],'bx-') ################## # create model # ################## Nmax = 20 affine = True
def fit_ar_pyhsmm_models( train_datas, val_datas, test_datas, K=2, N_iters=2000, seed=1, lags=1, affine=True, alpha=10, gamma=10, kappa=100, init_state_distn="uniform", observations="ar", ): """ Fit datasets for multiple values """ npr.seed(seed) assert (len(train_datas) > 0) and (type(train_datas) is list) assert (len(val_datas) > 0) and (type(val_datas) is list) assert (len(test_datas) > 0) and (type(test_datas) is list) # Standard AR model (Scale resampling) D_obs = train_datas[0].shape[1] def evalute_model(model): # train log log_likelihood train_ll = model.log_likelihood() # validation log log_likelihood val_pll = 0 for data in val_datas: val_pll += model.log_likelihood(data) # Test log log_likelihood test_pll = 0 for data in test_datas: test_pll += model.log_likelihood(data) return train_ll, val_pll, test_pll # Construct a standard AR-HMM obs_hypers = dict( nu_0=D_obs + 2, S_0=np.eye(D_obs), M_0=np.hstack( (np.eye(D_obs), np.zeros((D_obs, D_obs * (lags - 1) + affine)))), K_0=np.eye(D_obs * lags + affine), affine=affine, ) obs_hypers = get_empirical_ar_params(train_datas, obs_hypers) obs_distns = [ ardistributions.AutoRegression(**obs_hypers) for _ in range(K) ] # ---------------- # Init Model Param # ---------------- model = armodels.ARWeakLimitStickyHDPHMM( # sampled from 1d finite pmf alpha=alpha, gamma=gamma, init_state_distn=init_state_distn, # create A, Sigma obs_distns=obs_distns, kappa=kappa, ) # ---------------- # Add datasets # ---------------- for data in train_datas: model.add_data(data) # --------------------- # Initialize the states # --------------------- model.resample_states() # ------------------------------ # Initialize log log_likelihood # ------------------------------ init_val = evalute_model(model) # ----------------------- # Fit with Gibbs sampling # ----------------------- def sample(model): tic = time.time() model.resample_model() timestep = time.time() - tic return evalute_model(model), timestep # ---------------------- # Run for each iteration # ---------------------- # values at each timestep vals, timesteps = zip(*[sample(model) for _ in trange(N_iters)]) lls_train, lls_val, lls_test = \ zip(*((init_val,) + vals)) timestamps = np.cumsum((0., ) + timesteps) # calculate the states after N_iters z = [mm.stateseq for mm in model.states_list] return model, lls_train, lls_val, lls_test, timestamps, z
def fit_ar_separate_trans_pyhsmm_models( train_datas, val_datas, test_datas, K=2, N_iters=2000, seed=1, lags=1, affine=True, alpha=10, gamma=10, kappa=100, init_state_distn="uniform", observations="ar", ): """ Fit model using separate transition matrices per element in dictionary. """ npr.seed(seed) assert type(train_datas) is defaultdict datas_all = [] for _, datalist in train_datas.items(): print(len(datalist)) datas_all.extend(datalist) print("Running for {} data chunks".format(len(datas_all))) # Standard AR model (Scale resampling) D_obs = datas_all[0].shape[1] def evalute_model(model): # train log log_likelihood ll = model.log_likelihood() # validation log log_likelihood val_pll = 0 for data_id, data in val_datas.items(): val_pll += model.log_likelihood(group_id=data_id, data=data) # Test log log_likelihood test_pll = 0 for data_id, data in test_datas.items(): test_pll += model.log_likelihood(group_id=data_id, data=data) return ll, val_pll, test_pll # Construct a standard AR-HMM obs_hypers = dict( nu_0=D_obs + 2, S_0=np.eye(D_obs), M_0=np.hstack( (np.eye(D_obs), np.zeros((D_obs, D_obs * (lags - 1) + affine)))), K_0=np.eye(D_obs * lags + affine), affine=affine, ) obs_hypers = get_empirical_ar_params(datas_all, obs_hypers) obs_distns = [ ardistributions.AutoRegression(**obs_hypers) for _ in range(K) ] # free space del datas_all # Init Model Param model = armodels.ARWeakLimitStickyHDPHMMSeparateTrans( # sampled from 1d finite pmf alpha=alpha, gamma=gamma, init_state_distn=init_state_distn, # create A, Sigma obs_distns=obs_distns, kappa=kappa, ) # free space for very large datasets del obs_distns # -------------- # Add datasets # -------------- for group_id, datalist in train_datas.items(): for data in datalist: model.add_data(group_id=group_id, data=data) # free space for very large datasets del train_datas # --------------------- # Initialize the states # --------------------- model.resample_states() # ------------------------------ # Initialize log log_likelihood # ------------------------------ init_val = evalute_model(model) # ----------------------- # Fit with Gibbs sampling # ----------------------- def sample(model): tic = time.time() # resample model model.resample_model() timestep = time.time() - tic return evalute_model(model), timestep # ---------------------- # Run for each iteration # ---------------------- # values at each timestep vals, timesteps = zip(*[sample(model) for _ in trange(N_iters)]) lls_train, lls_val, lls_test = \ zip(*((init_val,) + vals)) timestamps = np.cumsum((0., ) + timesteps) # calculate the states after N_iters z = [mm.stateseq for mm in model.states_list] return model, lls_train, lls_val, lls_test, timestamps, z
As = [ 0.99 * np.hstack((-np.eye(2), 2 * np.eye(2))), np.array([[np.cos(np.pi / 6), -np.sin(np.pi / 6)], [np.sin(np.pi / 6), np.cos(np.pi / 6)]]).dot( np.hstack((-np.eye(2), np.eye(2)))) + np.hstack((np.zeros( (2, 2)), np.eye(2))), np.array([[np.cos(-np.pi / 6), -np.sin(-np.pi / 6)], [np.sin(-np.pi / 6), np.cos(-np.pi / 6)]]).dot( np.hstack((-np.eye(2), np.eye(2)))) + np.hstack((np.zeros( (2, 2)), np.eye(2))) ] truemodel = m.ARHSMM( alpha=4., init_state_concentration=4., obs_distns=[d.AutoRegression(A=A, sigma=np.eye(2)) for A in As], dur_distns=[ pyhsmm.basic.distributions.PoissonDuration(alpha_0=4 * 25, beta_0=4) for state in range(len(As)) ], ) datas = [] labels = [] for t in range(0, T, 500): data, label = truemodel.generate(500, keep=True) datas.append(data) labels.append(label) plt.figure() plt.plot(data[:, 0], data[:, 1], 'bx-')
def make_joint_models(train_datas, Nmax=10): # Define a sequence of models if isinstance(train_datas, list) and len(train_datas) > 0: data = train_datas num_worms = len(train_datas) else: data = [train_datas] num_worms = 1 print('Making models') names_list = [] fnames_list = [] hmm_list = [] color_list = [] method_list = [] # Standard AR model (Scale resampling) D_obs = data[0].shape[1] print('D_obs shape {}'.format(data[0].shape[1])) affine = True nlags = 1 init_state_distn = 'uniform' # Construct a standard AR-HMM for fitting # with just one worm obs_hypers = dict(nu_0=D_obs + 2, S_0=np.eye(D_obs), M_0=np.hstack( (np.eye(D_obs), np.zeros((D_obs, D_obs * (nlags - 1) + affine)))), K_0=np.eye(D_obs * nlags + affine), affine=affine) # Joint model - fitting all worm at a time # Fit range of parameters for each state state_array = [1, 2, 4, 6, 8, 10, 12, 15] alpha_array = [10.0] gamma_array = [10.0] kappa_array = 10**np.arange(2, 11)[::2] # Vary the hyperparameters of the scale resampling model for num_states, alpha_a_0, gamma_a_0, kappa_a_0 in itertools.product( state_array, alpha_array, gamma_array, kappa_array): # using data of all worms obs_hypers = get_empirical_ar_params(data, obs_hypers) obs_distns = [ d.AutoRegression(**obs_hypers) for state in range(num_states) ] names_list.append("AR-HMM (Scale)") fnames_list.append( "ar_scale_wormall_states%.1f_alpha%.1f_gamma%.1f_kappa%.1f" % (num_states, alpha_a_0, gamma_a_0, kappa_a_0)) color_list.append(allcolors[1]) # Init Model Param hmm = m.ARWeakLimitStickyHDPHMM( # sampled from 1d finite pmf alpha=alpha_a_0, gamma=gamma_a_0, init_state_distn=init_state_distn, # create A, Sigma obs_distns=obs_distns, kappa=kappa_a_0 # kappa ) # Add data of each worm for cworm in np.arange(num_worms): hmm.add_data(data[cworm]) # Append model and store hmm_list.append(hmm) method_list.append(fit) print('Finished making baseline models') return names_list, fnames_list, color_list, hmm_list, method_list