def __init__(self, K, D, x_grids, y_grids, device=torch.device('cpu')): super(SingleLinearGridTransformation, self).__init__(K, D) assert D == 2, D self.device = device self.x_grids = nn.Parameter( check_and_convert_to_tensor(x_grids, dtype=torch.float64), requires_grad=False) # [x_0, x_1, ..., x_m] self.y_grids = nn.Parameter( check_and_convert_to_tensor(y_grids, dtype=torch.float64), requires_grad=False) # a list [y_0, y_1, ..., y_n] self.n_x = len(x_grids) - 1 self.n_y = len(y_grids) - 1 self.ly = len(y_grids) # shape: (n_gps, D) self.gridpoints = np.array([(x_grid, y_grid) for x_grid in self.x_grids for y_grid in self.y_grids]) # number of basis grid points self.n_gps = self.gridpoints.shape[0] # dynamics at the grid points self.us = nn.Parameter(torch.rand((self.K, self.n_gps, self.D), dtype=torch.float64), requires_grad=True)
def most_likely_states(self, data, input=None, transition_mkwargs=None, **memory_kwargs): if len(data) == 0: return np.array([]) if isinstance(self.transition, InputDrivenTransition) and input is None: raise ValueError("Please provide input.") if input is not None: input = check_and_convert_to_tensor(input, device=self.device) data = check_and_convert_to_tensor(data, device=self.device) T = data.shape[0] log_pi0 = get_np(self.init_state_distn.log_probs) if isinstance(self.transition, StationaryTransition): log_Ps = self.transition.log_stationary_transition_matrix log_Ps = get_np(log_Ps) # (K, K) log_Ps = log_Ps[None, ] else: assert isinstance(self.transition, GridTransition), type(self.transition) transition_mkwargs = transition_mkwargs if transition_mkwargs else {} input = input[:-1] if input else input log_Ps = self.transition.log_transition_matrix( data[:-1], input, **transition_mkwargs) log_Ps = get_np(log_Ps) assert log_Ps.shape == (T - 1, self.K, self.K), log_Ps.shape log_likes = get_np(self.observation.log_prob(data, **memory_kwargs)) return viterbi(log_pi0, log_Ps, log_likes)
def get_lp_coefficients(points, Q11s, Q22s, device=torch.device('cpu')): """ Compute the coefficients of (Q11, Q12, Q21, Q22) for each point. :param points: (T, 2) :param Q11s: (T, 2) :param Q22s: (T, 2) :return: """ Q11s = check_and_convert_to_tensor(Q11s, device=device) Q22s = check_and_convert_to_tensor(Q22s, device=device) T = points.shape[0] x2_minus_x1 = Q22s[:, 0] - Q11s[:, 0] y2_minus_y1 = Q22s[:, 1] - Q11s[:, 1] x2_minus_x = Q22s[:, 0] - points[:, 0] y2_minus_y = Q22s[:, 1] - points[:, 1] x_minus_x1 = points[:, 0] - Q11s[:, 0] y_minus_y1 = points[:, 1] - Q11s[:, 1] c_Q11 = x2_minus_x * y2_minus_y # (T, ) c_Q12 = x2_minus_x * y_minus_y1 c_Q21 = x_minus_x1 * y2_minus_y c_Q22 = x_minus_x1 * y_minus_y1 coeffs = torch.stack((c_Q11, c_Q12, c_Q21, c_Q22), dim=-1) # (T, 4) coeffs = coeffs / ((x2_minus_x1 * y2_minus_y1)[:, None]) assert coeffs.shape == (T, 4) return coeffs
def sample_condition_on_zs(self, zs, x0=None, transformation=False, return_np=True, **kwargs): """ Given a z sequence, generate samples condition on this sequence. :param zs: (T, ) :param x0: shape (D,) :param return_np: return np.ndarray or torch.tensor :return: generated samples (T, D) """ zs = check_and_convert_to_tensor(zs, dtype=torch.int, device=self.device) T = zs.shape[0] assert T > 0 dtype = torch.float64 xs = torch.zeros((T, self.D), dtype=dtype) if T == 1: if x0 is not None: print("Nothing to sample") return else: return self.observation.sample_x(zs[0], with_noise=transformation) if x0 is None: x0 = self.observation.sample_x(zs[0], with_noise=transformation, return_np=False) else: x0 = check_and_convert_to_tensor(x0, dtype=dtype, device=self.device) assert x0.shape == (self.D, ) xs[0] = x0 for t in np.arange(1, T): x_t = self.observation.sample_x(zs[t], xihst=xs[:t], with_noise=transformation, return_np=False, **kwargs) xs[t] = x_t if return_np: return get_np(xs) return xs
def most_likely_states(self, data, input=None, cache=None, transition_mkwargs=None, **memory_kwargs): with torch.no_grad(): if len(data) == 0: return np.array([]) cache = cache if cache else {} log_pi0 = cache.get("log_pi0", None) log_Ps = cache.get("log_Ps", None) bwd_obs_logprobs = cache.get("bwd_obs_log_probs", None) if input is not None: input = check_and_convert_to_tensor(input, device=self.device) data = check_and_convert_to_tensor(data, device=self.device) T = data.shape[0] if log_pi0 is None: log_pi0 = self.init_state_distn.log_probs # (K, ) if log_Ps is None: if isinstance(self.transition, StationaryTransition): log_Ps = self.transition.log_stationary_transition_matrix log_Ps = log_Ps # (K, K) log_Ps = log_Ps[None, ].repeat(T - 1, 1, 1) else: assert isinstance(self.transition, GridTransition), type(self.transition) transition_mkwargs = transition_mkwargs if transition_mkwargs else {} input = input[:-1] if input else input log_Ps = self.transition.log_transition_matrix( data[:-1], input, **transition_mkwargs) log_Ps = log_Ps assert log_Ps.shape == (T - 1, self.K, self.K), log_Ps.shape if bwd_obs_logprobs is None: log_likes = self.observation.log_prob( data, **memory_kwargs) # (T, K) bwd_obs_logprobs = self.stacked_bw_log_likes_helper( log_likes, self.L) return hsmm_viterbi(log_pi0, trans_logprobs=log_Ps, bwd_obs_logprobs=bwd_obs_logprobs, len_logprobs=self.len_logprobs)
def get_masks_for_single_animal(data_a, x_grids, y_grids): """ :param data: (T, 2) :param x_grids :param y_grids :return: a lists which contains G masks, where each mask is a binary-valued array of length T """ data = check_and_convert_to_tensor(data_a) masks_a = [] for i in range(len(x_grids) - 1): for j in range(len(y_grids) - 1): if i == 0: cond_x = (x_grids[i] <= data[:, 0]) & (data[:, 0] <= x_grids[i + 1]) else: cond_x = (x_grids[i] < data[:, 0]) & (data[:, 0] <= x_grids[i + 1]) if j == 0: cond_y = (y_grids[j] <= data[:, 1]) & (data[:, 1] <= y_grids[j + 1]) else: cond_y = (y_grids[j] < data[:, 1]) & (data[:, 1] <= y_grids[j + 1]) mask = (cond_x & cond_y).double() masks_a.append(mask) masks_a = torch.stack(masks_a, dim=0) # TODO: some model may generate out of box samples #assert torch.all(masks_a.sum(dim=0) == 1) return masks_a
def transform_condition_on_z(self, z, inputs_self, inputs_other, **memory_kwargs): """ :param z: an integer :param inputs_self: (T_pre, d) :param inputs_other: (T_pre, d) :return: """ feature_vec = memory_kwargs.get("feature_vec", None) if feature_vec is None: feature_vec = self.feature_vec_func(inputs_self[-1:]) assert feature_vec.shape == (1, self.Df, 2) feature_vec = torch.squeeze(feature_vec, dim=0) else: feature_vec = check_and_convert_to_tensor(feature_vec) assert feature_vec.shape == (self.Df, self.d) # (1, 1+Df) * (1+Df, d) -> (1, d) out = torch.matmul(self.Ws[z][None], feature_vec) assert out.shape == (1, self.d) out = torch.squeeze(out, dim=0) assert out.shape == (self.d, ) out = inputs_self[-1] + self.acc_factor * out assert out.shape == (self.d, ) return out
def get_masks(self, data): """ :param data: (T, 2) :return: two lists of masks, each list contains G masks, where each mask is a binary-valued array of length T """ data = check_and_convert_to_tensor(data) masks_a = [] for i in range(len(self.x_grids) - 1): for j in range(len(self.y_grids) - 1): if i == 0: cond_x = (self.x_grids[i] <= data[:, 0]) & (data[:, 0] <= self.x_grids[i + 1]) else: cond_x = (self.x_grids[i] < data[:, 0]) & (data[:, 0] <= self.x_grids[i + 1]) if j == 0: cond_y = (self.y_grids[j] <= data[:, 1]) & (data[:, 1] <= self.y_grids[j + 1]) else: cond_y = (self.y_grids[j] < data[:, 1]) & (data[:, 1] <= self.y_grids[j + 1]) mask = (cond_x & cond_y).double() masks_a.append(mask) masks_a = torch.stack(masks_a, dim=0) assert torch.all(masks_a.sum(dim=0) == 1) return masks_a
def k_step_prediction_for_lineargrid_model(model, model_z, data, **kwargs): if len(data) == 0: return None data = check_and_convert_to_tensor(data) _, D = data.shape assert D == 2 or D == 4 if D == 4: feature_vecs = kwargs.get("feature_vecs", None) gridpoints = kwargs.get("gridpoints", None) gridpoints_idx = kwargs.get("gridpoints_idx", None) if feature_vecs is None or gridpoints_idx is None or gridpoints is None: print("Did not provide memory information") return k_step_prediction(model, model_z, data) else: grid_points_idx_a, grid_points_idx_b = gridpoints_idx gridpoints_a, gridpoints_b = gridpoints feature_vecs_a, feature_vecs_b = feature_vecs x_predict_arr = [] x_predict = model.observation.sample_x(model_z[0], data[:0], return_np=True, with_noise=True) x_predict_arr.append(x_predict) for t in range(1, data.shape[0]): x_predict = model.observation.sample_x( model_z[t], data[t - 1:t], return_np=True, with_noise=True, gridpoints=(gridpoints_a[t - 1], gridpoints_b[t - 1]), gridpoints_idx=(grid_points_idx_a[t - 1], grid_points_idx_b[t - 1]), feature_vec=(feature_vecs_a[t - 1:t], feature_vecs_b[t - 1:t])) x_predict_arr.append(x_predict) x_predict_arr = np.array(x_predict_arr) else: coeffs = kwargs.get("coeffs", None) gridpoints_idx = kwargs.get("gridpoints_idx", None) if coeffs is None or gridpoints_idx is None: print("Did not provide memory ") return k_step_prediction(model, model_z, data) else: x_predict_arr = [] x_predict = model.observation.sample_x(model_z[0], data[:0], return_np=True, with_noise=True) x_predict_arr.append(x_predict) for t in range(1, data.shape[0]): x_predict = model.observation.sample_x( model_z[t], data[t - 1:t], return_np=True, with_noise=True, coeffs=coeffs[t - 1:t], gridpoints_idx=gridpoints_idx[t - 1]) return x_predict_arr
def k_step_prediction_for_lstm_model(model, model_z, data, feature_vecs=None): data = check_and_convert_to_tensor(data) if feature_vecs is None: print("Did not provide memory information") return k_step_prediction(model, model_z, data) else: feature_vecs_a, feature_vecs_b = feature_vecs x_predict_arr = [] x_predict = model.observation.sample_x(model_z[0], data[:0], return_np=True) x_predict_arr.append(x_predict) for t in range(1, data.shape[0]): feature_vec_t = (feature_vecs_a[t - 1:t], feature_vecs_b[t - 1:t]) x_predict = model.observation.sample_x(model_z[t], data[:t], return_np=True, with_noise=True, feature_vec=feature_vec_t) x_predict_arr.append(x_predict) x_predict_arr = np.array(x_predict_arr) return x_predict_arr
def k_step_prediction_for_momentum_feature_model(model, model_z, data, momentum_vecs=None, features=None): data = check_and_convert_to_tensor(data) if momentum_vecs is None or features is None: return k_step_prediction(model, model_z, data) else: x_predict_arr = [] x_predict = model.observation.sample_x(model_z[0], data[:0], return_np=True) x_predict_arr.append(x_predict) for t in range(1, data.shape[0]): x_predict = model.observation.sample_x( model_z[t], data[:t], return_np=True, with_noise=True, momentum_vec=momentum_vecs[t - 1], features=(features[0][t - 1], features[1][t - 1])) x_predict_arr.append(x_predict) x_predict_arr = np.array(x_predict_arr) return x_predict_arr
def __init__(self, mus, log_sigmas, bounds, device=torch.device('cpu')): super(TruncatedNormal, self).__init__() self.mus = mus self.log_sigmas = log_sigmas self.bounds = check_and_convert_to_tensor( bounds, dtype=torch.float64, device=device) # mus.shape + (2, )
def __init__(self, K, D, M=0, logits=None, dtype=torch.float64): super(BaseInitStateDistn, self).__init__() self.K, self.D, self.M = K, D, M if logits is None: logits = torch.ones(self.K, dtype=dtype) else: logits = check_and_convert_to_tensor(logits, dtype=dtype) self.logits = nn.Parameter(logits, requires_grad=True)
def k_step_prediction_for_gpgrid_model(model, model_z, data, **memory_kwargs): data = check_and_convert_to_tensor(data) if memory_kwargs == {}: print("Did not provide memory information") return k_step_prediction(model, model_z, data) else: feature_vecs_a = memory_kwargs.get("feature_vecs_a", None) feature_vecs_b = memory_kwargs.get("feature_vecs_b", None) gpt_idx_a = memory_kwargs.get("gpt_idx_a", None) gpt_idx_b = memory_kwargs.get("gpt_idx_b", None) grid_idx_a = memory_kwargs.get("grid_idx_a", None) grid_idx_b = memory_kwargs.get("grid_idx_b") coeff_a = memory_kwargs.get("coeff_a", None) coeff_b = memory_kwargs.get("coeff_b", None) dist_sq_a = memory_kwargs.get("dist_sq_a", None) dist_sq_b = memory_kwargs.get("dist_sq_b", None) x_predict_arr = [] x_predict = model.observation.sample_x(model_z[0], data[:0], return_np=True) x_predict_arr.append(x_predict) for t in range(1, data.shape[0]): if dist_sq_a is None: x_predict = model.observation.sample_x( model_z[t], data[:t], return_np=True, with_noise=True, feature_vec_a=feature_vecs_a[t - 1:t], feature_vec_b=feature_vecs_b[t - 1:t], gpt_idx_a=gpt_idx_a[t - 1:t], gpt_idx_b=gpt_idx_b[t - 1:t], grid_idx_a=grid_idx_a[t - 1:t], grid_idx_b=grid_idx_b[t - 1:t], coeff_a=coeff_a[t - 1:t], coeff_b=coeff_b[t - 1:t]) else: x_predict = model.observation.sample_x( model_z[t], data[:t], return_np=True, with_noise=True, feature_vec_a=feature_vecs_a[t - 1:t], feature_vec_b=feature_vecs_b[t - 1:t], gpt_idx_a=gpt_idx_a[t - 1:t], gpt_idx_b=gpt_idx_b[t - 1:t], grid_idx_a=grid_idx_a[t - 1:t], grid_idx_b=grid_idx_b[t - 1:t], dist_sq_a=dist_sq_a[t - 1:t], dist_sq_b=dist_sq_b[t - 1:t]) x_predict_arr.append(x_predict) x_predict_arr = np.array(x_predict_arr) return x_predict_arr
def __init__(self, K, D, x_grids, y_grids, Df, feature_vec_func, tran=None, acc_factor=2, lags=1, use_log_prior=False, no_boundary_prior=False, add_log_diagonal_prior=False, log_prior_sigma_sq=-np.log(1e3), device=torch.device('cpu'), version=1): assert lags == 1, "lags should be 1 for lineargrid with_noise." super(LinearGridTransformation, self).__init__(K, D) self.version = version self.d = int(self.D / 2) self.device = device self.x_grids = check_and_convert_to_tensor(x_grids, dtype=torch.float64, device=self.device) # [x_0, x_1, ..., x_m] self.y_grids = check_and_convert_to_tensor(y_grids, dtype=torch.float64, device=self.device) # a list [y_0, y_1, ..., y_n] self.n_x = len(x_grids) - 1 self.n_y = len(y_grids) - 1 # shape: (d, n_gps) self.gridpoints = torch.tensor([(x_grid, y_grid) for x_grid in self.x_grids for y_grid in self.y_grids], device=device) self.gridpoints = torch.transpose(self.gridpoints, 0, 1) # number of basis grid points self.GP = self.gridpoints.shape[1] self.Df = Df self.feature_vec_func = feature_vec_func if tran is not None: assert isinstance(tran, LinearGridTransformation) self.use_log_prior = tran.use_log_prior self.add_log_diagonal_prior = tran.add_log_diagonal_prior self.no_boundary_prior = tran.no_boundary_prior self.log_prior_sigma_sq = torch.tensor(get_np(tran.log_prior_sigma_sq), dtype=torch.float64, device=self.device) self.acc_factor = tran.acc_factor self.Ws = torch.tensor(get_np(tran.Ws), dtype=torch.float64, requires_grad=True, device=self.device) else: self.use_log_prior = use_log_prior self.add_log_diagonal_prior = add_log_diagonal_prior self.no_boundary_prior = no_boundary_prior self.log_prior_sigma_sq = torch.tensor(log_prior_sigma_sq, dtype=torch.float64, device=device) self.acc_factor = acc_factor self.Ws = torch.rand(self.K, 2, self.GP, self.Df, dtype=torch.float64, requires_grad=True, device=self.device)
def transform_condition_on_z(self, z, inputs_self, inputs_other, **memory_kwargs): """ :param z: an integer :param inputs_self: (T_pre, d) :return: """ assert inputs_other is not None inputs_other = check_and_convert_to_tensor(inputs_other) assert inputs_other.shape == inputs_self.shape feature_vec = memory_kwargs.get("feature_vec", None) if feature_vec is None: feature_vec = self.feature_vec_func(inputs_self[-1:]) assert feature_vec.shape == (1, self.Df, 2) feature_vec = torch.squeeze(feature_vec, dim=0) else: feature_vec = check_and_convert_to_tensor(feature_vec) assert feature_vec.shape == (self.Df, self.d) # (T_pre, D) inputs = torch.cat((inputs_self, inputs_other), dim=-1) # (Df, D) * (D, 1) --> (Df, 1) weights = torch.matmul(self.As[z], inputs[-1:]) assert weights.shape == (self.Df, 1) weights = torch.squeeze(weights, dim=-1) + self.bs[z] assert weights.shape == (self.Df, ) # (1, Df) * (Df, d) -> (1, d) out = torch.matmul(torch.sigmoid(weights[None,]), feature_vec) assert out.shape == (1, self.d) out = torch.squeeze(out, dim=0) assert out.shape == (self.d,) out = inputs_self[-1] + self.acc_factor * out assert out.shape == (self.d,) return out
def transform_condition_on_z(self, z, inputs_self, inputs_other, **memory_kwargs): """ :param z: an integer :param inputs_self: (T_pre, d) :param inputs_other: (T_pre, d) :return: """ momentum_vec = memory_kwargs.get("momentum_vec", None) feature_vec = memory_kwargs.get("feature_vec", None) if momentum_vec is None: momentum_vec = get_momentum(inputs_self, lags=self.momentum_lags, weights=self.momentum_weights) # (d, ) else: momentum_vec = check_and_convert_to_tensor(momentum_vec) assert momentum_vec.shape == (self.d,) if feature_vec is None: feature_vec = self.feature_vec_func(inputs_self[-1:], inputs_other[-1:]) assert feature_vec.shape == (1, self.Df, 2) feature_vec = torch.squeeze(feature_vec, dim=0) else: feature_vec = check_and_convert_to_tensor(feature_vec) assert feature_vec.shape == (self.Df, self.d) all_vecs = torch.cat((momentum_vec[None, ], feature_vec), dim=0) # (1+Df, d) assert all_vecs.shape == (1 + self.Df, self.d) # (1, 1+Df) * (1+Df, d) -> (1, d) out = torch.matmul(torch.sigmoid(self.Ws[z][None]), all_vecs) assert out.shape == (1, self.d) out = torch.squeeze(out, dim=0) assert out.shape == (self.d,) out = inputs_self[-1] + self.acc_factor * out assert out.shape == (self.d,) return out
def __init__(self, mus, log_sigmas, bounds, alpha=1.0): super(LogitNormal, self).__init__() self.mus = mus self.log_sigmas = log_sigmas self.bounds = check_and_convert_to_tensor( bounds, dtype=torch.float64) # mus.shape + (2, ) #assert self.bounds.shape == self.mus.shape + (2,) self.alpha = torch.tensor(alpha, dtype=torch.float64) assert self.alpha.shape == ()
def get_mu_and_cov_for_single_animal(self, inputs, animal_idx, mu_only=False, **kwargs): assert animal_idx == 0 or animal_idx == 1, animal_idx inputs = check_and_convert_to_tensor(inputs, dtype=torch.float64, device=self.device) T, _ = inputs.shape # this is useful when train_rs =False and train_vs=False: Sigma = kwargs.get("Sigma_a", None) if animal_idx == 0 else kwargs.get( "Sigma_b", None) A = kwargs.get("A_a", None) if animal_idx == 0 else kwargs.get( "A_b", None) if A is None: #print("Not using cache. Calculating Sigma, A...") Sigma, A = self.get_gp_cache(inputs, animal_idx, A_only=mu_only, **kwargs) assert A.shape == (T, self.K, 2, 2 * self.n_gps), A.shape # calculate the dynamics at the grids us = self.us[..., 0:2] if animal_idx == 0 else self.us[..., 2:4] assert us.shape == (self.K, self.n_gps, 2), \ "the correct shape is {}, instead we got {}".format((self.K, self.n_gps, 2), us.shape) us = torch.reshape(us, (self.K, -1)) # (K, n_gps*2) # (T-1, K, 2, 2*n_gps) * (K, 2*n_gps, 1) -> (T-1, K, 2, 1) mu = torch.matmul(A, us[..., None]) mu = torch.squeeze(mu, dim=-1) mu = mu + inputs[:, None] assert mu.shape == (T, self.K, 2) if mu_only: return mu, 0 assert Sigma.shape == (T, self.K, 2, 2), Sigma.shape # (K, 2) sigma = torch.exp( self.log_sigmas[:, 0:2]) if animal_idx == 0 else torch.exp( self.log_sigmas[:, 2:4]) # (K, 2, 2) sigma = torch.diag_embed(sigma) cov = Sigma + sigma return mu, cov
def get_masks_for_two_animals(data, x_grids, y_grids): """ :param data: (T, 4) :param x_grids :param y_grids :return: two lists of masks, each list contains G masks, where each mask is a binary-valued array of length T """ data = check_and_convert_to_tensor(data) _, D = data.shape assert D == 4 masks_a = get_masks_for_single_animal(data[:, 0:2], x_grids, y_grids) masks_b = get_masks_for_single_animal(data[:, 2:4], x_grids, y_grids) return masks_a, masks_b
def get_mu_and_cov_for_single_animal(self, inputs, mu_only=False, dtype=torch.float64, **kwargs): """ :param inputs: (T, 2) :param mu_only: :param kwargs: :return: mu: (T, K, 2), cov (T, K, 2) """ inputs = check_and_convert_to_tensor(inputs, dtype=dtype, device=self.device) T, _ = inputs.shape # this is useful when train_rs =False and train_vs=False: Sigma = kwargs.get("Sigma", None) A = kwargs.get("A", None) if A is None: #print("Not using cache. Calculating Sigma, A...") Sigma, A = self.get_gp_cache(inputs, A_only=mu_only, **kwargs) A_x, A_y = A # (K, T, n_gps) * (K, n_gps, 1) -> (K, T, 1) mu_x = torch.matmul(A_x, self.us[:, :, 0:1]) assert mu_x.shape == (self.K, T, 1) mu_y = torch.matmul(A_y, self.us[:, :, 1:2]) assert mu_y.shape == (self.K, T, 1) mu = torch.cat((mu_x, mu_y), dim=-1) # (K, T, 2) mu = torch.transpose(mu, 0, 1) # (T, K, 2) mu = mu + inputs[:, None] if mu_only: return mu, 0 assert Sigma.shape == (self.K, T, 2) Sigma = torch.transpose(Sigma, 0, 1) # (K, 2) sigma = torch.exp(self.log_sigmas) # (T, K, 2) cov = Sigma + sigma**2 return mu, cov
def transform(self, inputs_self, **memory_kwargs): """ x^{self}_t \sim x^{self}_{t-1} + acc_factor * [ \sum_{i=1}^{Df} sigmoid(W_i) f_i (self, other)] :param inputs_self: (T, d) :param inputs_other: (T, d) :param momentum_vecs: :return: outputs_self: (T, d) """ T = inputs_self.shape[0] inputs_other = memory_kwargs.get("inputs_other", None) assert inputs_other is not None inputs_other = check_and_convert_to_tensor(inputs_other) assert inputs_self.shape == inputs_other.shape, "inputs_self and inputs_other must have the same shape!" feature_vecs = memory_kwargs.get("feature_vecs", None) if feature_vecs is None: feature_vecs = self.feature_vec_func(inputs_self) assert feature_vecs.shape == (T, self.Df, self.d), \ "Feature vec shape is " + str(feature_vecs.shape) \ + ". It should have shape ({}, {}, {}).".format(T, self.Df, self.d) inputs = torch.cat((inputs_self, inputs_other), dim=-1) assert inputs.shape == (T, 4) # (K, Df, D) * (T, D) -> (T, K, Df) # (K, Df, D) * (T, 1, D, 1) --> (T, K, Df, 1) weights = torch.matmul(self.As, inputs[:, None, :, None]) assert weights.shape == (T, self.K, self.Df, 1) weights = torch.squeeze(weights, dim=-1) + self.bs # (T, K, Df) * (T, Df, d) -> (T, K, d) out = torch.matmul(torch.sigmoid(weights), feature_vecs) assert out.shape == (T, self.K, 2) out = inputs_self[:, None, ] + self.acc_factor * out assert out.shape == (T, self.K, self.d) return out
def __init__(self, K, D, M=0, transformation='linear', mus_init=None, sigmas=None, lags=1, train_sigma=True): super(ARGaussianObservation, self).__init__(K, D, M) if mus_init is None: self.mus_init = torch.zeros(self.K, self.D, dtype=torch.float64) else: self.mus_init = check_and_convert_to_tensor(mus_init) # consider diagonal covariance self.log_sigmas_init = torch.tensor(np.log(np.ones((K, D))), dtype=torch.float64) if sigmas is None: self.log_sigmas = nn.Parameter(torch.tensor(np.log(5 * np.ones( (K, D))), dtype=torch.float64), requires_grad=train_sigma) else: # TODO: assert sigmas positive assert sigmas.shape == (self.K, self.D) self.log_sigmas = nn.Parameter(torch.tensor(np.log(sigmas), dtype=torch.float64), requires_grad=True) self.lags = lags if isinstance(transformation, str): if transformation == 'linear': self.transformation = LinearTransformation(K=self.K, D=self.D, lags=self.lags) else: assert isinstance(transformation, BaseTransformation) self.transformation = transformation self.lags = self.transformation.lags
def k_step_prediction_for_grid_model(model, model_z, data, **memory_kwargs): if len(data) == 0: return None data = check_and_convert_to_tensor(data) memory_kwargs_a = memory_kwargs.get("memory_kwargs_a", None) memory_kwargs_b = memory_kwargs.get("memory_kwargs_b", None) if memory_kwargs_a is None or memory_kwargs_b is None: print("Did not provide memory information") return k_step_prediction(model, model_z, data) else: momentum_vecs_a = memory_kwargs_a.get("momentum_vecs", None) feature_vecs_a = memory_kwargs_a.get("feature_vecs", None) momentum_vecs_b = memory_kwargs_b.get("momentum_vecs", None) feature_vecs_b = memory_kwargs_b.get("feature_vecs", None) x_predict_arr = [] x_predict = model.observation.sample_x(model_z[0], data[:0], return_np=True) x_predict_arr.append(x_predict) for t in range(1, data.shape[0]): if momentum_vecs_a is None: m_kwargs_a = dict(feature_vec=feature_vecs_a[t - 1]) m_kwargs_b = dict(feature_vec=feature_vecs_b[t - 1]) else: m_kwargs_a = dict(momentum_vec=momentum_vecs_a[t - 1], feature_vec=feature_vecs_a[t - 1]) m_kwargs_b = dict(momentum_vec=momentum_vecs_b[t - 1], feature_vec=feature_vecs_b[t - 1]) x_predict = model.observation.sample_x(model_z[t], data[:t], return_np=True, with_noise=True, memory_kwargs_a=m_kwargs_a, memory_kwargs_b=m_kwargs_b) x_predict_arr.append(x_predict) x_predict_arr = np.array(x_predict_arr) return x_predict_arr
def __init__(self, K, D, Df, lags=2, momentum_weights=None, feature_vec_func=None, acc_factor=2): super(UnitMomentumDirectionTransformation, self).__init__(K, D) # d = int(D/2) if Df is None: raise ValueError("Please provide number of features") self.Df = Df self.momentum_lags = lags if momentum_weights is None: self.momentum_weights = torch.ones(lags, dtype=torch.float64) else: self.momentum_weights = check_and_convert_to_tensor(momentum_weights) if feature_vec_func is None: raise ValueError("Must provide feature funcs.") self.feature_vec_func = feature_vec_func self.acc_factor = acc_factor # int self.Ws = torch.rand(self.K, 1 + self.Df, dtype=torch.float64, requires_grad=True)
def transform(self, inputs_self, masks_a=None, memory_kwargs_a=None): """ Transform based on the current grid :param inputs_self: (T, 2) :param masks_a: :param memory_kwargs_a: :return: outputs (T, K, 4) """ inputs_self = check_and_convert_to_tensor(inputs_self) if memory_kwargs_a is None: inputs_other = None else: inputs_other = memory_kwargs_a.get("inputs_other", None) T, _ = inputs_self.shape # perform transform on data # use the idea of mask if masks_a is None: masks_a = self.get_masks(inputs_self) memory_kwargs_a = memory_kwargs_a or {} output_a = 0 for g in range(self.G): t_a = self.transformations_a[g].transform(inputs_self, inputs_other, **memory_kwargs_a) output_a = output_a + t_a * masks_a[g][:, None, None] assert output_a.shape == (T, self.K, 2) return output_a
def sample(self, T, prefix=None, input=None, with_noise=True, return_np=False): """ Sample synthetic data from the model. Optionally, condition on a given prefix (preceding discrete states and data). Parameters ---------- T : int number of time steps to sample prefix : (zpre, xpre) Optional prefix of discrete states (zpre) and continuous states (xpre) zpre must be an array of integers taking values 0...num_states-1. xpre must be an array of the same length that has preceding observations. input : (T, input_dim) array_like Optional inputs to specify for sampling tag : object Optional tag indicating which "type" of sampled data with_noise : bool Whether or not to sample data with noise. Returns ------- z_sample : array_like of type int Sequence of sampled discrete states x_sample : (T x observation_dim) array_like Array of sampled data """ with torch.no_grad(): if isinstance(self.transition, InputDrivenTransition) and input is None: raise ValueError("Please provide input.") if input is not None: input = check_and_convert_to_tensor(input, device=self.device) K = self.K D = self.D M = self.M dtype = torch.float64 if prefix is None: # no prefix is given. Sample the initial state as the prefix z = torch.empty(T, dtype=torch.int, device=self.device) data = torch.empty((T, D), dtype=dtype, device=self.device) # sample the first state from the initial distribution z_0 = self.init_state_distn.sample() # sample duration L_0 = torch.randint(low=1, high=self.L + 1, size=[]) L_0 = min(int(L_0), T) z[0:L_0] = torch.stack([z_0] * L_0) # forward sample L_0 steps for t in range(L_0): data[t] = self.observation.sample_x(z=z_0, xhist=data[:t], with_noise=with_noise, return_np=False) # We only need to sample T-L0 datapoints now T = T - L_0 T_pre = L_0 else: # check that the prefix is of the right shape z_pre, x_pre = prefix assert len(z_pre.shape) == 1 T_pre = z_pre.shape[0] assert x_pre.shape == ( T_pre, self.D), "should be {}, but got {}.".format( (T_pre, self.D), x_pre.shape) z_pre = check_and_convert_to_tensor(z_pre, dtype=torch.int, device=self.device) x_pre = check_and_convert_to_tensor(x_pre, dtype=dtype, device=self.device) # construct the states and data z = torch.cat( (z_pre, torch.empty(T, dtype=torch.int, device=self.device))) assert z.shape == (T_pre + T, ) data = torch.cat( (x_pre, torch.empty((T, D), dtype=dtype, device=self.device))) if isinstance(self.transition, StationaryTransition): P = get_np( self.transition.stationary_transition_matrix) # (K, K) t = T_pre while True: #for t in range(T_pre, T_pre + T): z_t = torch.tensor(npr.choice(K, p=P[z[t - 1]])) L_t = torch.randint(low=1, high=self.L + 1, size=[]) L_t = min(int(L_t), T_pre + T - t) z[t:t + L_t] = torch.stack([z_t] * L_t) for t_forward in range(t, t + L_t): data[t_forward] = self.observation.sample_x( z_t, data[:t_forward], with_noise=with_noise, return_np=False) t = t + L_t if t == T_pre + T: break else: # TODO: not yet modified for t in range(T_pre, T_pre + T): input_t = input[t - 1:t] if input else input P = self.transition.transition_matrix( data[t - 1:t], input_t) assert P.shape == (1, self.K, self.K) P = torch.squeeze(P, dim=0) P = get_np(P) z[t] = npr.choice(K, p=P[z[t - 1]]) data[t] = self.observation.sample_x(z[t], data[:t], with_noise=with_noise, return_np=False) if prefix is None: if return_np: return get_np(z), get_np(data) return z, data else: if return_np: return get_np(z[T_pre:]), get_np(data[T_pre:]) return z[T_pre:], data[T_pre:]
def k_step_prediction_for_lstm_based_model(model, model_z, data, k=0, feature_vecs=None): data = check_and_convert_to_tensor(data) T, D = data.shape lstm_states = {} x_predict_arr = [] if k == 0: if feature_vecs is None: print("Did not provide memory information") return k_step_prediction(model, model_z, data) else: feature_vecs_a, feature_vecs_b = feature_vecs x_predict = model.observation.sample_x(model_z[0], data[:0], return_np=True) x_predict_arr.append(x_predict) for t in range(1, data.shape[0]): feature_vec_t = (feature_vecs_a[t - 1:t], feature_vecs_b[t - 1:t]) x_predict = model.observation.sample_x( model_z[t], data[:t], return_np=True, with_noise=True, feature_vec=feature_vec_t, lstm_states=lstm_states) x_predict_arr.append(x_predict) else: assert k > 0 # neglects t = 0 since there is no history if T <= k: raise ValueError("Please input k such that k < {}.".format(T)) for t in range(1, T - k + 1): # sample k steps forward # first step use real value z, x = model.sample(1, prefix=(model_z[t - 1:t], data[t - 1:t]), return_np=False, with_noise=True, lstm_states=lstm_states) # last k-1 steps use sampled value if k >= 1: sampled_lstm_states = dict(h_t=lstm_states["h_t"], c_t=lstm_states["c_t"]) for i in range(k - 1): z, x = model.sample(1, prefix=(z, x), return_np=False, with_noise=True, lstm_states=sampled_lstm_states) assert x.shape == (1, D) x_predict_arr.append(get_np(x[0])) x_predict_arr = np.array(x_predict_arr) assert x_predict_arr.shape == (T - k, D) return x_predict_arr
def k_step_prediction_for_gpmodel(model, model_z, data, **memory_kwargs): data = check_and_convert_to_tensor(data) T, D = data.shape assert D == 4 or D == 2, D K = model.observation.K if memory_kwargs == {}: print("Did not provide memory information") return k_step_prediction(model, model_z, data) else: # compute As if D == 4: _, A_a = model.observation.get_gp_cache(data[:-1, 0:2], 0, A_only=True, **memory_kwargs) _, A_b = model.observation.get_gp_cache(data[:-1, 2:4], 1, A_only=True, **memory_kwargs) assert A_a.shape == A_b.shape == ( T - 1, K, 2, model.observation.n_gps * 2), "{}, {}".format( A_a.shape, A_b.shape) x_predict_arr = [] x_predict = model.observation.sample_x(model_z[0], data[:0], return_np=True) x_predict_arr.append(x_predict) for t in range(1, data.shape[0]): x_predict = model.observation.sample_x(model_z[t], data[:t], return_np=True, with_noise=True, A_a=A_a[t - 1:t, model_z[t]], A_b=A_b[t - 1:t, model_z[t]]) x_predict_arr.append(x_predict) else: _, A = model.observation.get_gp_cache(data[:-1], A_only=True, **memory_kwargs) x_predict_arr = [] x_predict = model.observation.sample_x(model_z[0], data[:0], return_np=True) x_predict_arr.append(x_predict) for t in range(1, data.shape[0]): x_predict = model.observation.sample_x(model_z[t], data[:t], return_np=True, with_noise=True, A=(A[0][model_z[t], t - 1:t], A[1][model_z[t], t - 1:t])) x_predict_arr.append(x_predict) x_predict_arr = np.array(x_predict_arr) return x_predict_arr
def log_likelihood(self, datas, inputs=None, transition_memory_kwargs=None, **memory_kwargs): """ Compute the log probability of the data under the current model parameters. :param datas: single array or list of arrays of data. :return total log probability of the data. """ batch_size = len(datas) list_of_transition_mkwargs = [{} for _ in range(batch_size)] if transition_memory_kwargs: assert isinstance(transition_memory_kwargs, dict), type(transition_memory_kwargs) for key, val in transition_memory_kwargs.items(): # TODO: some ad-hoc fix if isinstance(val, list) and not isinstance(val[0], list): val = [val] assert len( val ) == batch_size, key + " must be a list of length {}".format( batch_size) for i in range(batch_size): list_of_transition_mkwargs[i][key] = val[i] list_of_memory_kwargs = [{} for _ in range(batch_size)] if memory_kwargs != {}: for key, val in memory_kwargs.items(): val = [val] if not isinstance(val, list) else val assert len( val ) == batch_size, key + " must be a list of length {}".format( batch_size) for i in range(batch_size): list_of_memory_kwargs[i][key] = val[i] ll = 0 for data, input, transition_mkwargs, m_kwargs \ in zip(datas, inputs, list_of_transition_mkwargs, list_of_memory_kwargs): if len(data) == 0: continue data = check_and_convert_to_tensor(data, torch.float64, device=self.device) T = data.shape[0] log_pi0 = self.init_state_distn.log_probs if isinstance(self.transition, StationaryTransition): log_P = self.transition.log_stationary_transition_matrix assert log_P.shape == (self.K, self.K) if T == 1: # TODO: check this log_Ps = log_P[None, ][:0] else: #log_Ps = log_P[None,].repeat(T - 1, 1, 1) # (T-1, K, K) log_Ps = log_P.expand((T - 1, self.K, self.K)) else: assert isinstance(self.transition, GridTransition) input = input[:-1] if input else input log_Ps = self.transition.log_transition_matrix( data[:-1], input, **transition_mkwargs) assert log_Ps.shape == (T - 1, self.K, self.K), \ "correct shape is {}, but got {}".format((T - 1, self.K, self.K), log_Ps.shape) log_likes = self.observation.log_prob(data, **m_kwargs) # (T, K) assert log_likes.shape == (T, self.K) fwd_obs_logprobs = self.stacked_fw_log_likes_helper( log_likes, self.L) ll = ll + hsmm_normalizer(log_pi=log_pi0, tran_logprobs=log_Ps, len_logprobs=self.len_logprobs, fwd_obs_logprobs=fwd_obs_logprobs) return ll