Esempio n. 1
0
    def __init__(self, nb_states, obs_dim, act_dim, obs_lag=1,
                 algo_type='MAP', init_obs_type='full',
                 trans_type='neural', obs_type='full', ctl_type='full',
                 init_state_prior=None, init_obs_prior=None,
                 trans_prior=None, obs_prior=None, ctl_prior=None,
                 init_state_kwargs={}, init_obs_kwargs={},
                 trans_kwargs={}, obs_kwargs={}, ctl_kwargs={}):

        self.nb_states = nb_states
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        self.obs_lag = obs_lag

        self.algo_type = algo_type

        self.dynamics = RecurrentAutoRegressiveHiddenMarkovModel(nb_states, obs_dim, act_dim, obs_lag,
                                                                 algo_type=algo_type, init_obs_type=init_obs_type,
                                                                 trans_type=trans_type, obs_type=obs_type,
                                                                 init_state_prior=init_state_prior,
                                                                 init_obs_prior=init_obs_prior,
                                                                 trans_prior=trans_prior,
                                                                 obs_prior=obs_prior,
                                                                 init_state_kwargs=init_state_kwargs,
                                                                 init_obs_kwargs=init_obs_kwargs,
                                                                 trans_kwargs=trans_kwargs,
                                                                 obs_kwargs=obs_kwargs)

        self.ctl_type = ctl_type
        self.ctl_prior = ctl_prior
        self.ctl_kwargs = ctl_kwargs

        if self.algo_type == 'ML':
            self.controls = GaussianControl(self.nb_states, self.obs_dim, self.act_dim, **ctl_kwargs)
        else:
            if self.ctl_type == 'full':
                self.controls = BayesianGaussianControl(self.nb_states, self.obs_dim, self.act_dim,
                                                        prior=ctl_prior, **ctl_kwargs)
            elif self.ctl_type == 'ard':
                self.controls = BayesianGaussianControlWithAutomaticRelevance(self.nb_states, self.obs_dim, self.act_dim,
                                                                              prior=ctl_prior, **ctl_kwargs)
Esempio n. 2
0
def make_nascar_model():
    As = [random_rotation(2, np.pi/24.),
          random_rotation(2, np.pi/48.)]

    # Set the center points for each system
    centers = [np.array([+2.0, 0.]),
               np.array([-2.0, 0.])]
    cs = [-(A - np.eye(2)).dot(center) for A, center in zip(As, centers)]

    # Add a "right" state
    As.append(np.eye(2))
    cs.append(np.array([+0.1, 0.]))

    # Add a "right" state
    As.append(np.eye(2))
    cs.append(np.array([-0.25, 0.]))

    # Construct multinomial regression to divvy up the space
    w1, b1 = 100 * np.array([+1.0, 0.0]), 100 * np.array([-2.0])   # x + b > 0 -> x > -b
    w2, b2 = 100 * np.array([-1.0, 0.0]), 100 * np.array([-2.0])   # -x + b > 0 -> x < b
    w3, b3 = 10 * np.array([0.0, +1.0]), 10 * np.array([0.0])    # y > 0
    w4, b4 = 10 * np.array([0.0, -1.0]), 10 * np.array([0.0])    # y < 0

    weights = np.vstack((w1, w2, w3, w4))
    biases = np.hstack((b1, b2, b3, b4))

    true_rarhmm = RecurrentAutoRegressiveHiddenMarkovModel(nb_states=4, obs_dim=2,
                                                           trans_type='poly-only')

    true_rarhmm.init_observation.mu = np.tile(np.array([[0, 1]]), (4, 1))
    true_rarhmm.init_observation.sigma = np.array([1e0 * np.eye(2) for _ in range(4)])
    true_rarhmm.observations.A = np.array(As)
    true_rarhmm.observations.c = np.array(cs)
    true_rarhmm.observations.sigma = np.array([1e-4 * np.eye(2) for _ in range(4)])

    true_rarhmm.transitions.params = [weights, biases]

    return true_rarhmm
Esempio n. 3
0
class ClosedLoopRecurrentAutoRegressiveHiddenMarkovModel:

    def __init__(self, nb_states, obs_dim, act_dim, obs_lag=1,
                 algo_type='MAP', init_obs_type='full',
                 trans_type='neural', obs_type='full', ctl_type='full',
                 init_state_prior=None, init_obs_prior=None,
                 trans_prior=None, obs_prior=None, ctl_prior=None,
                 init_state_kwargs={}, init_obs_kwargs={},
                 trans_kwargs={}, obs_kwargs={}, ctl_kwargs={}):

        self.nb_states = nb_states
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        self.obs_lag = obs_lag

        self.algo_type = algo_type

        self.dynamics = RecurrentAutoRegressiveHiddenMarkovModel(nb_states, obs_dim, act_dim, obs_lag,
                                                                 algo_type=algo_type, init_obs_type=init_obs_type,
                                                                 trans_type=trans_type, obs_type=obs_type,
                                                                 init_state_prior=init_state_prior,
                                                                 init_obs_prior=init_obs_prior,
                                                                 trans_prior=trans_prior,
                                                                 obs_prior=obs_prior,
                                                                 init_state_kwargs=init_state_kwargs,
                                                                 init_obs_kwargs=init_obs_kwargs,
                                                                 trans_kwargs=trans_kwargs,
                                                                 obs_kwargs=obs_kwargs)

        self.ctl_type = ctl_type
        self.ctl_prior = ctl_prior
        self.ctl_kwargs = ctl_kwargs

        if self.algo_type == 'ML':
            self.controls = GaussianControl(self.nb_states, self.obs_dim, self.act_dim, **ctl_kwargs)
        else:
            if self.ctl_type == 'full':
                self.controls = BayesianGaussianControl(self.nb_states, self.obs_dim, self.act_dim,
                                                        prior=ctl_prior, **ctl_kwargs)
            elif self.ctl_type == 'ard':
                self.controls = BayesianGaussianControlWithAutomaticRelevance(self.nb_states, self.obs_dim, self.act_dim,
                                                                              prior=ctl_prior, **ctl_kwargs)

    @property
    def params(self):
        return self.dynamics.params,\
               self.controls.params

    @params.setter
    def params(self, value):
        self.dynamics.params = value[:4]
        self.controls.params = value[4]

    def permute(self, perm):
        self.dynamics.permute(perm)
        self.controls.permute(perm)

    @ensure_args_are_viable
    def initialize(self, obs, act, **kwargs):
        self.dynamics.initialize(obs, act, **kwargs)
        self.controls.initialize(obs, act, **kwargs)

    @ensure_args_are_viable
    def log_likelihoods(self, obs, act):
        if isinstance(obs, np.ndarray) and isinstance(act, np.ndarray):
            loginit, logtrans, logobs = self.dynamics.log_likelihoods(obs, act)
            logact = self.controls.log_likelihood(obs, act)
            return loginit, logtrans, logobs, logact
        else:
            def inner(obs, act):
                return self.log_likelihoods.__wrapped__(self, obs, act)
            result = map(inner, obs, act)
            return list(map(list, zip(*result)))

    def log_normalizer(self, obs, act):
        loglikhds = self.log_likelihoods(obs, act)
        _, norm = self.forward(*loglikhds)
        return np.sum(np.hstack(norm))

    def forward(self, loginit, logtrans, logobs, logact):
        if isinstance(loginit, np.ndarray) \
                and isinstance(logtrans, np.ndarray) \
                and isinstance(logobs, np.ndarray) \
                and isinstance(logact, np.ndarray):

            nb_steps = logobs.shape[0]
            alpha = np.zeros((nb_steps, self.nb_states))
            norm = np.zeros((nb_steps,))

            forward_cy(to_c(loginit), to_c(logtrans),
                       to_c(logobs), to_c(logact),
                       to_c(alpha), to_c(norm))

            return alpha, norm
        else:
            def partial(loginit, logtrans, logobs, logact):
                return self.forward(loginit, logtrans, logobs, logact)
            result = map(partial, loginit, logtrans, logobs, logact)
            return list(map(list, zip(*result)))

    def backward(self, loginit, logtrans, logobs, logact, scale=None):
        if isinstance(loginit, np.ndarray) \
                and isinstance(logtrans, np.ndarray) \
                and isinstance(logobs, np.ndarray) \
                and isinstance(logact, np.ndarray) \
                and isinstance(scale, np.ndarray):

            nb_steps = logobs.shape[0]
            beta = np.zeros((nb_steps, self.nb_states))

            backward_cy(to_c(loginit), to_c(logtrans),
                        to_c(logobs), to_c(logact),
                        to_c(beta), to_c(scale))

            return beta
        else:
            def partial(loginit, logtrans, logobs, logact, scale):
                return self.backward(loginit, logtrans, logobs, logact, scale)
            return list(map(partial, loginit, logtrans, logobs, logact, scale))

    def smoothed_posterior(self, alpha, beta, temperature=1.):
        if isinstance(alpha, np.ndarray) and isinstance(beta, np.ndarray):
            return np.exp(temperature * (alpha + beta)
                          - logsumexp(temperature * (alpha + beta), axis=1, keepdims=True))
        else:
            def partial(alpha, beta):
                return self.smoothed_posterior(alpha, beta, temperature)
            return list(map(self.smoothed_posterior, alpha, beta))

    def smoothed_joint(self, alpha, beta, loginit, logtrans, logobs, logact, temperature=1.):
        if isinstance(loginit, np.ndarray) \
                and isinstance(logtrans, np.ndarray) \
                and isinstance(logobs, np.ndarray) \
                and isinstance(logact, np.ndarray) \
                and isinstance(alpha, np.ndarray) \
                and isinstance(beta, np.ndarray):

            zeta = temperature * (alpha[:-1, :, None] + beta[1:, None, :]) + logtrans \
                   + logobs[1:][:, None, :] + logact[1:][:, None, :]

            return np.exp(zeta - logsumexp(zeta, axis=(1, 2), keepdims=True))
        else:
            def partial(alpha, beta, loginit, logtrans, logobs, logact):
                return self.smoothed_joint(alpha, beta, loginit, logtrans, logobs, logact, temperature)
            return list(map(partial, alpha, beta, loginit, logtrans, logobs, logact))

    def estep(self, obs, act, temperature=1.):
        loglikhds = self.log_likelihoods(obs, act)
        alpha, norm = self.forward(*loglikhds)
        beta = self.backward(*loglikhds, scale=norm)
        gamma = self.smoothed_posterior(alpha, beta, temperature=temperature)
        zeta = self.smoothed_joint(alpha, beta, *loglikhds, temperature=temperature)
        return gamma, zeta

    def mstep(self, gamma, zeta, obs, act,
              init_state_mstep_kwargs,
              init_obs_mstep_kwargs,
              trans_mstep_kwargs,
              obs_mstep_kwargs,
              ctl_mstep_kwargs, **kwargs):

        self.dynamics.mstep(gamma, zeta, obs, act,
                            init_state_mstep_kwargs,
                            trans_mstep_kwargs,
                            obs_mstep_kwargs,
                            init_obs_mstep_kwargs)

        self.controls.mstep(gamma, obs, act, **ctl_mstep_kwargs)

    @ensure_args_are_viable
    def em(self, train_obs, train_act=None,
           nb_iter=50, tol=1e-4, initialize=True,
           init_state_mstep_kwargs={},
           init_obs_mstep_kwargs={},
           trans_mstep_kwargs={},
           obs_mstep_kwargs={},
           ctl_mstep_kwargs={}, **kwargs):

        process_id = kwargs.pop('process_id', 0)

        if initialize:
            self.initialize(train_obs, train_act)

        train_lls = []
        train_ll = self.log_normalizer(train_obs, train_act)
        train_lls.append(train_ll)
        last_train_ll = train_ll

        pbar = trange(nb_iter, position=process_id)
        pbar.set_description("#{}, ll: {:.5f}".format(process_id, train_lls[-1]))

        for _ in pbar:
            gamma, zeta = self.estep(train_obs, train_act)
            self.mstep(gamma, zeta,
                       train_obs, train_act,
                       init_state_mstep_kwargs,
                       init_obs_mstep_kwargs,
                       trans_mstep_kwargs,
                       obs_mstep_kwargs,
                       ctl_mstep_kwargs)

            train_ll = self.log_normalizer(train_obs, train_act)
            train_lls.append(train_ll)

            pbar.set_description("#{}, ll: {:.5f}".format(process_id, train_lls[-1]))

            if abs(train_ll - last_train_ll) < tol:
                break
            else:
                last_train_ll = train_ll

        return train_lls

    @ensure_args_are_viable
    def smoothed_control(self, obs, act):
        if isinstance(obs, np.ndarray) and isinstance(act, np.ndarray):
            loglikhds = self.log_likelihoods(obs, act)
            alpha, norm = self.forward(*loglikhds)
            beta = self.backward(*loglikhds, scale=norm)
            gamma = self.smoothed_posterior(alpha, beta)
            return self.controls.smooth(gamma, obs, act)
        else:
            def inner(obs, act):
                return self.smoothed_control.__wrapped__(self, obs, act)
            return list(map(inner, obs, act))

    @ensure_args_are_viable
    def filtered_control(self, obs, act, stoch=False):
        if isinstance(obs, np.ndarray) and isinstance(act, np.ndarray):
            loglikhds = self.dynamics.log_likelihoods(obs, act)
            alpha, _ = self.dynamics.forward(*loglikhds)

            w = np.exp(alpha - logsumexp(alpha, axis=-1, keepdims=True))
            z = np.zeros((len(obs,)), dtype=np.int64)
            u = np.zeros((len(act), self.act_dim))

            for t in range(len(obs)):
                z[t] = npr.choice(self.nb_states, p=w[t, :]) if stoch\
                       else np.argmax(w[t, :])
                u[t] = self.controls.sample(z[t], obs[t, :]) if stoch\
                       else self.controls.mean(z[t], obs[t, :])
            return z, u
        else:
            def partial(obs, act):
                return self.filtered_control.__wrapped__(self, obs, act, stoch)
            result = map(partial, obs, act)
            return list(map(list, zip(*result)))

    def action(self, hist_obs, hist_act, stoch=False, average=False):
        obs = hist_obs[-1]
        belief = self.dynamics.filtered_posterior(hist_obs, hist_act)[-1]
        state = npr.choice(self.nb_states, p=belief) if stoch else np.argmax(belief)

        nxt_act = np.zeros((self.act_dim,))
        if average:
            for k in range(self.nb_states):
                nxt_act += belief[k] * self.controls.sample(k, obs) if stoch\
                           else self.controls.mean(k, obs)
        else:
            nxt_act = self.controls.sample(state, obs) if stoch\
                      else self.controls.mean(state, obs)

        return belief, state, nxt_act
Esempio n. 4
0
    trans_mstep_kwargs = {
        'nb_iter': 25,
        'batch_size': 256,
        'lr': 1e-3,
        'l2': 1e-32
    }

    rarhmm = RecurrentAutoRegressiveHiddenMarkovModel(
        nb_states=nb_states,
        obs_dim=obs_dim,
        act_dim=act_dim,
        obs_lag=nb_lags,
        algo_type=algo_type,
        init_obs_type=init_obs_type,
        obs_type=obs_type,
        trans_type=trans_type,
        init_state_prior=init_state_prior,
        init_obs_prior=init_obs_prior,
        trans_prior=trans_prior,
        obs_prior=obs_prior,
        init_state_kwargs=init_state_kwargs,
        init_obs_kwargs=init_obs_kwargs,
        trans_kwargs=trans_kwargs,
        obs_kwargs=obs_kwargs)

    rarhmm.em(obs,
              act,
              nb_iter=100,
              tol=1e-4,
              initialize=True,
              init_state_mstep_kwargs=init_state_mstep_kwargs,
Esempio n. 5
0
def create_job(train_obs, train_act, kwargs, seed):

    random.seed(seed)
    npr.seed(seed)
    torch.manual_seed(seed)

    # model arguments
    nb_states = kwargs.pop('nb_states')
    obs_dim = kwargs.pop('obs_dim')
    act_dim = kwargs.pop('act_dim')
    obs_lag = kwargs.pop('obs_lag')

    algo_type = kwargs.pop('algo_type')
    init_obs_type = kwargs.pop('init_obs_type')
    trans_type = kwargs.pop('trans_type')
    obs_type = kwargs.pop('obs_type')

    # model priors
    init_state_prior = kwargs.pop('init_state_prior')
    init_obs_prior = kwargs.pop('init_obs_prior')
    trans_prior = kwargs.pop('trans_prior')
    obs_prior = kwargs.pop('obs_prior')

    # model kwargs
    init_state_kwargs = kwargs.pop('init_state_kwargs')
    init_obs_kwargs = kwargs.pop('init_obs_kwargs')
    trans_kwargs = kwargs.pop('trans_kwargs')
    obs_kwargs = kwargs.pop('obs_kwargs')

    # em arguments
    nb_iter = kwargs.pop('nb_iter')
    tol = kwargs.pop('tol')
    process_id = seed

    init_mstep_kwargs = kwargs.pop('init_state_mstep_kwargs')
    init_mstep_kwargs = kwargs.pop('init_obs_mstep_kwargs')
    trans_mstep_kwargs = kwargs.pop('trans_mstep_kwargs')
    obs_mstep_kwargs = kwargs.pop('obs_mstep_kwargs')

    rarhmm = RecurrentAutoRegressiveHiddenMarkovModel(
        nb_states=nb_states,
        obs_dim=obs_dim,
        act_dim=act_dim,
        obs_lag=obs_lag,
        algo_type=algo_type,
        init_obs_type=init_obs_type,
        trans_type=trans_type,
        obs_type=obs_type,
        init_state_prior=init_state_prior,
        init_obs_prior=init_obs_prior,
        trans_prior=trans_prior,
        obs_prior=obs_prior,
        init_state_kwargs=init_state_kwargs,
        init_obs_kwargs=init_obs_kwargs,
        trans_kwargs=trans_kwargs,
        obs_kwargs=obs_kwargs)

    rarhmm.em(train_obs,
              train_act,
              nb_iter=nb_iter,
              tol=tol,
              initialize=True,
              process_id=process_id,
              init_state_mstep_kwargs=init_state_mstep_kwargs,
              init_obs_mstep_kwargs=init_obs_mstep_kwargs,
              trans_mstep_kwargs=trans_mstep_kwargs,
              obs_mstep_kwargs=obs_mstep_kwargs)

    return rarhmm
Esempio n. 6
0
                         'std': np.std(np.vstack(x), axis=0)},
                'device': 'cpu'}

# # neural transition
# trans_type = 'neural-only'
# trans_kwargs = {'hidden_sizes': (16, ), 'activation': 'relu',
#                 'norm': {'mean': np.mean(np.vstack(x), axis=0),
#                          'std': np.std(np.vstack(x), axis=0)},
#                 'device': 'cpu'}

trans_mstep_kwargs = {'nb_iter': 25, 'batch_size': 256,
                      'lr': 1e-3, 'l2': 1e-32}

# npr.seed(1337)
std_rarhmm = RecurrentAutoRegressiveHiddenMarkovModel(nb_states=4, obs_dim=2,
                                                      trans_type=trans_type,
                                                      trans_kwargs=trans_kwargs)

std_lls = std_rarhmm.em(x, nb_iter=5,
                        tol=0., initialize=True,
                        trans_mstep_kwargs=trans_mstep_kwargs)

print("true_ll=", true_ll, "std_ll=", std_lls[-1])

plt.figure(figsize=(7, 7))
plt.axhline(y=true_ll, color='r')
plt.plot(std_lls)
plt.xscale('symlog')
plt.yscale('symlog')
plt.show()