model.fit(states, actions) print ("Model trained in %.03f sec." % tm.time) failed = 0 sampled = np.zeros((ntrials, d, n)) ncases = model._cb_t._counter - 1 for j in xrange(ntrials): with Timer() as tm: # Test model iter_ = 0 nfails = 0 while True: try: sampled[j, :, 0] = model.sample() for iter_, a in enumerate(actions.T): # sample next state resulting from executing action `a` in state `state` next_state = model.sample(MDPState(sampled[j, :, iter_]), a) if next_state is None: raise TypeError sampled[j, :, iter_ + 1] = next_state except: # plot_sampled(obs[:j], sampled[i, j, :, :iter_]) sampled[j, :].fill(0) nfails += 1 print "{0}:{1} Failed to infer next state distribution at step {2}.".format( j, nfails, iter_
n = obs.shape[0] action_error = -np.inf * np.ones(n) delta_error = -np.inf * np.ones(n) for i, states in enumerate(obs): # Train CASML's case base and hmm with states and actions model.fit(states, actions) # Test model cntr = 0 iter_ = 0 while cntr < 10: sampled = None try: sampled = np.array([model.sample()]).T for iter_, a in enumerate(actions.T): # sample next state resulting from executing action `a` in state `state` next_state = model.sample(MDPState(sampled[:, -1]), a)[:, np.newaxis] sampled = np.hstack([sampled, next_state]) except: print "{0}:{1} Failed to infer next state distribution at step {2}.".format(i + 1, cntr + 1, iter_ + 1) # plot_sampled(obs[0:i+1], sampled) cntr += 1 continue break if cntr < 10: action_error[i] = evaluate_action(actions, obs[0:i + 1], sampled, plot=True)[0] delta_error[i] = evaluate_delta(obs[0:i + 1], sampled, plot=True)