def run_value_iteration(self, solver, epoch): run_start_time = time.time() reward = 0 discounted_reward = 0 discount = 1.0 solver.value_iteration(self.model.get_transition_matrix(), self.model.get_observation_matrix(), self.model.get_reward_matrix(), self.model.planning_horizon) b = self.model.get_initial_belief_state() for i in range(self.model.max_steps): # TODO: record average V(b) per epoch action, v_b = solver.select_action(b, solver.gamma) step_result = self.model.generate_step(action) if not step_result.is_terminal: b = self.model.belief_update(b, action, step_result.observation) reward += step_result.reward discounted_reward += discount * step_result.reward discount *= self.model.discount # show the step result self.display_step_result(i, step_result) if step_result.is_terminal: console(3, module, 'Terminated after episode step ' + str(i + 1)) break # TODO: add belief state History sequence self.results.time.add(time.time() - run_start_time) self.results.update_reward_results(reward, discounted_reward) # Pretty Print results self.results.show(epoch) console( 3, module, 'Total possible undiscounted return: ' + str(self.model.get_max_undiscounted_return())) print_divider('medium') self.experiment_results.time.add(self.results.time.running_total) self.experiment_results.undiscounted_return.count += ( self.results.undiscounted_return.count - 1) self.experiment_results.undiscounted_return.add( self.results.undiscounted_return.running_total) self.experiment_results.discounted_return.count += ( self.results.discounted_return.count - 1) self.experiment_results.discounted_return.add( self.results.discounted_return.running_total)
def prune(self, belief_node): """ Prune the siblings of the chosen belief node and set that node as the new "root" :return: """ start_time = time.time() self.belief_tree.prune_siblings(belief_node) elapsed = time.time() - start_time console(2, module, "Time spent pruning = " + str(elapsed) + " seconds")
def show(self, epoch): print_divider('large') print('\tEpoch #' + str(epoch) + ' RESULTS') print_divider('large') console(2, module, 'discounted return statistics') print_divider('medium') self.discounted_return.show() print_divider('medium') console(2, module, 'undiscounted return statistics') print_divider('medium') self.undiscounted_return.show()
def display_step_result(step_num, step_result): """ Pretty prints step result information :param step_num: :param step_result: :return: """ console(3, module, 'Step Number = ' + str(step_num)) console(3, module, 'Step Result.Action = ' + step_result.action.to_string()) console(3, module, 'Step Result.Observation = ' + step_result.observation.to_string()) console(3, module, 'Step Result.Next_State = ' + step_result.next_state.to_string()) console(3, module, 'Step Result.Reward = ' + str(step_result.reward))
def discounted_return(self): if self.model.solver == 'ValueIteration': solver = self.solver_factory(self) self.run_value_iteration(solver, 1) if self.model.save: save_pkl(solver.gamma, os.path.join(self.model.weight_dir, 'VI_planning_horizon_{}.pkl'.format(self.model.planning_horizon))) elif not self.model.use_tf: self.multi_epoch() else: self.multi_epoch_tf() print('\n') console(2, module, 'epochs: ' + str(self.model.n_epochs)) console(2, module, 'ave undiscounted return/step: ' + str(self.experiment_results.undiscounted_return.mean) + ' +- ' + str(self.experiment_results.undiscounted_return.std_err())) console(2, module, 'ave discounted return/step: ' + str(self.experiment_results.discounted_return.mean) + ' +- ' + str(self.experiment_results.discounted_return.std_err())) console(2, module, 'ave time/epoch: ' + str(self.experiment_results.time.mean)) self.logger.info('env: ' + self.model.env + '\t' + 'epochs: ' + str(self.model.n_epochs) + '\t' + 'ave undiscounted return: ' + str(self.experiment_results.undiscounted_return.mean) + ' +- ' + str(self.experiment_results.undiscounted_return.std_err()) + '\t' + 'ave discounted return: ' + str(self.experiment_results.discounted_return.mean) + ' +- ' + str(self.experiment_results.discounted_return.std_err()) + '\t' + 'ave time/epoch: ' + str(self.experiment_results.time.mean))
def display_step_result(step_num, step_result): """ Pretty prints step result information :param step_num: :param step_result: :return: """ print_divider("large") console(2, module, "Step Number = " + str(step_num)) console(2, module, "Step Result.Action = " + step_result.action.to_string()) console(2, module, "Step Result.Observation = " + step_result.observation.to_string()) console(2, module, "Step Result.Next_State = " + step_result.next_state.to_string()) console(2, module, "Step Result.Reward = " + str(step_result.reward))
def multi_run(self): num_runs = self.model.sys_cfg["num_runs"] for i in range(num_runs): console( 2, module, "Starting run " + str(i + 1) + " with " + str(self.model.sys_cfg["num_sims"]) + " simulations") self.run() total_time = self.results.time.mean * self.results.time.count if total_time > self.model.sys_cfg["max_time_out"]: console( 2, module, "Timed out after " + str(i) + " runs in " + total_time + " seconds")
def display_step_result(step_num, step_result): """ Pretty prints step result information :param step_num: :param step_result: :return: """ console(3, module, 'Step Number = ' + str(step_num)) if(step_result.action.to_string() == 'CHECK'): string = step_result.action.to_string() + ' rock ' + str(step_result.action.rock_no) else: string = step_result.action.to_string() console(3, module,string) console(3, module, 'Step Result.Observation = ' + step_result.observation.to_string()) console(3, module, 'Step Result.Next_State = ' + step_result.next_state.to_string()) console(3, module, 'Step Result.Reward = ' + str(step_result.reward))
def multi_epoch(self): eps = self.model.epsilon_start self.model.reset_for_epoch() for i in range(self.model.n_epochs): # Reset the epoch stats self.results = Results() if self.model.solver == 'POMCP': eps = self.run_pomcp(i + 1, eps) self.model.reset_for_epoch() if self.experiment_results.time.running_total > self.model.timeout: console(2, module, 'Timed out after ' + str(i) + ' epochs in ' + self.experiment_results.time.running_total + ' seconds') break
def policy_iteration(self): """ Template-method pattern For on-policy learning algorithms such as SARSA, this method will carry out the policy iteration. Afterwards, the learned policy can be evaluated by consecutive calls to select_action(), which specifies the action selection rule For off-policy learning algorithms such as Q-learning, this method will repeatedly be called at each step of the policy traversal The policy iterator does not advance :return: """ start_time = time.time() self.total_reward_stats.clear() # save the state of the current belief # only passing a reference to the action map current_belief = self.policy_iterator.copy() for i in range(self.model.sys_cfg["num_sims"]): # Reset the Simulator self.model.reset_for_simulation() state = self.policy_iterator.sample_particle() console( 3, module, "Starting simulation at random state = " + state.to_string()) approx_value = self.simulate(state, start_time, i) self.total_reward_stats.add(approx_value) console( 3, module, "Approximation of the value function = " + str(approx_value)) # reset the policy iterator self.policy_iterator = current_belief
def multi_epoch(self): eps = self.model.epsilon_start self.model.reset_for_epoch() for i in range( self.model.n_epochs ): # Num of epochs of the experiment to conduct, default 100 # Reset the epoch stats self.results = Results() if self.model.solver == 'POMCP': #how many times the belief states will be sampled, s from B(h) eps = self.run_pomcp(i + 1, eps) self.model.reset_for_epoch() print("##########################") if self.experiment_results.time.running_total > self.model.timeout: console( 2, module, 'Timed out after ' + str(i) + ' epochs in ' + self.experiment_results.time.running_total + ' seconds') break
def traverse(self, belief_node, tree_depth, start_time): delayed_reward = 0 state = belief_node.sample_particle() # Time expired if time.time() - start_time > self.model.action_selection_timeout: console(4, module, "action selection timeout") return 0 action = ucb_action(self, belief_node, False) # Search horizon reached if tree_depth >= self.model.max_depth: console(4, module, "Search horizon reached") return 0 step_result, is_legal = self.model.generate_step(state, action) child_belief_node = belief_node.child(action, step_result.observation) if child_belief_node is None and not step_result.is_terminal and belief_node.action_map.total_visit_count > 0: child_belief_node, added = belief_node.create_or_get_child( action, step_result.observation) if not step_result.is_terminal or not is_legal: tree_depth += 1 if child_belief_node is not None: # Add S' to the new belief node # Add a state particle with the new state if child_belief_node.state_particles.__len__( ) < self.model.max_particle_count: child_belief_node.state_particles.append( step_result.next_state) delayed_reward = self.traverse(child_belief_node, tree_depth, start_time) else: delayed_reward = self.rollout(belief_node) tree_depth -= 1 else: console(4, module, "Reached terminal state.") # delayed_reward is "Q maximal" # current_q_value is the Q value of the current belief-action pair action_mapping_entry = belief_node.action_map.get_entry( action.bin_number) q_value = action_mapping_entry.mean_q_value # STEP update p value as well p_value = action_mapping_entry.mean_p_value # off-policy Q learning update rule q_value += (step_result.reward + (self.model.discount * delayed_reward) - q_value) p_value += 1 if step_result.observation.is_obstacle or not is_legal else 0 action_mapping_entry.update_visit_count(1) action_mapping_entry.update_q_value(q_value) action_mapping_entry.update_p_value(p_value) # Add RAVE ? return q_value
def show(): print_divider("large") print "\tRUN RESULTS" print_divider("large") console(2, module, "Discounted Return statistics") print_divider("medium") Results.discounted_return.show() print_divider("medium") console(2, module, "Un-discounted Return statistics") print_divider("medium") Results.undiscounted_return.show() print_divider("medium") console(2, module, "Time") print_divider("medium") Results.time.show() print_divider("medium")
def discounted_return(self): self.multi_epoch() print('\n') console(2, module, 'epochs: ' + str(self.model.n_epochs)) console(2, module, 'ave undiscounted return/step: ' + str(self.experiment_results.undiscounted_return.mean) + ' +- ' + str(self.experiment_results.undiscounted_return.std_err())) console(2, module, 'ave discounted return/step: ' + str(self.experiment_results.discounted_return.mean) + ' +- ' + str(self.experiment_results.discounted_return.std_err())) # console(2, module, 'ave time/epoch: ' + str(self.experiment_results.time.mean)) self.logger.info('env: ' + self.model.env + '\t' + 'epochs: ' + str(self.model.n_epochs) + '\t' + 'ave undiscounted return: ' + str(self.experiment_results.undiscounted_return.mean) + ' +- ' + str(self.experiment_results.undiscounted_return.std_err()) + '\t' + 'ave discounted return: ' + str(self.experiment_results.discounted_return.mean) + ' +- ' + str(self.experiment_results.discounted_return.std_err()) + '\t' + 'ave time/epoch: ' + str(self.experiment_results.time.mean)) return self.policy['optimal_traj']
def reset_for_epoch(self): self.actual_rock_states = self.sample_rocks() console(2, module, "Actual rock states = " + str(self.actual_rock_states))
def run(self, num_steps=None): run_start_time = time.time() discount = 1.0 if num_steps is None: num_steps = self.model.sys_cfg["num_steps"] # Reset the running total for each statistic for this run self.results.reset_running_totals() # Create a new solver solver = self.solver_factory(self, self.model) # Perform sim behaviors that must done for each run self.model.reset_for_run() console( 2, module, "num of particles generated = " + str(solver.belief_tree.root.state_particles.__len__())) if solver.on_policy: solver.policy_iteration() # Monte-Carlo start state state = self.model.sample_an_init_state() console(2, module, "Initial search state: " + state.to_string()) for i in range(num_steps): start_time = time.time() # action will be of type Discrete Action action = solver.select_action() step_result, is_legal = self.model.generate_step(state, action) self.results.reward.add(step_result.reward) self.results.undiscounted_return.running_total += step_result.reward self.results.discounted_return.running_total += ( step_result.reward * discount) discount *= self.model.sys_cfg["discount"] state = step_result.next_state # show the step result display_step_result(i, step_result) if not step_result.is_terminal: solver.update(step_result) # Extend the history sequence new_hist_entry = solver.history.add_entry() new_hist_entry.reward = step_result.reward new_hist_entry.action = step_result.action new_hist_entry.observation = step_result.observation new_hist_entry.register_entry(new_hist_entry, None, step_result.next_state) if step_result.is_terminal: console(2, module, "Terminated after episode step " + str(i)) break console( 2, module, "MCTS step forward took " + str(time.time() - start_time) + " seconds") self.results.time.add(time.time() - run_start_time) self.results.discounted_return.add( self.results.discounted_return.running_total) self.results.undiscounted_return.add( self.results.undiscounted_return.running_total) # Pretty Print results print_divider("large") solver.history.show() self.results.show() console( 2, module, "Max possible total Un-discounted Return: " + str(self.model.get_max_undiscounted_return())) print_divider("medium")
def traverse(self, belief_node, tree_depth, start_time): delayed_reward = 0 state = belief_node.sample_particle( ) #s~B, belief tree does not change, just change sampled state #sample action action = ucb_action( self, belief_node, False ) #argmax action, inside simuation, cal q-value to expand tree # Search horizon reached if tree_depth >= self.model.max_depth: console(4, module, "Search horizon reached") return 0 step_result, is_legal = self.model.generate_step(state, action) #black box #h' <- (h, a, o) # get belief node, but could be none child_belief_node = belief_node.child(action, step_result.observation) # if child_belief_node is None and not step_result.is_terminal and belief_node.action_map.total_visit_count > 0: child_belief_node, added = belief_node.create_or_get_child( action, step_result.observation) if not step_result.is_terminal or not is_legal: tree_depth += 1 if child_belief_node is not None: # Add S' to the new belief node # Add a state particle with the new state if child_belief_node.state_particles.__len__( ) < self.model.max_particle_count: child_belief_node.state_particles.append( step_result.next_state) delayed_reward = self.traverse(child_belief_node, tree_depth, start_time) #recursion else: delayed_reward = self.rollout( belief_node) # if child_belief_node is None tree_depth -= 1 else: console(4, module, "Reached terminal state.") # delayed_reward is "Q maximal" # current_q_value is the Q value of the current belief-action pair action_mapping_entry = belief_node.action_map.get_entry( action.bin_number) q_value = action_mapping_entry.mean_q_value # off-policy Q learning update rule q_value += (step_result.reward + (self.model.discount * delayed_reward) - q_value) action_mapping_entry.update_visit_count(1) action_mapping_entry.update_q_value(q_value) # Add RAVE ? return q_value
def run_pomcp(self, epoch, eps): epoch_start = time.time() # Create a new solver solver = self.solver_factory(self) # Monte-Carlo start state state = solver.belief_tree_index.sample_particle() reward = 0 discounted_reward = 0 discount = 1.0 for i in range(self.model.max_steps): start_time = time.time() # action will be of type Discrete Action action = solver.select_eps_greedy_action(eps, start_time) # update epsilon if eps > self.model.epsilon_minimum: eps *= self.model.epsilon_decay step_result, is_legal = self.model.generate_step(state, action) reward += step_result.reward discounted_reward += discount * step_result.reward discount *= self.model.discount state = step_result.next_state # show the step result self.display_step_result(i, step_result) if not step_result.is_terminal or not is_legal: solver.update(step_result) # Extend the history sequence new_hist_entry = solver.history.add_entry() HistoryEntry.update_history_entry(new_hist_entry, step_result.reward, step_result.action, step_result.observation, step_result.next_state) if step_result.is_terminal or not is_legal: console(3, module, 'Terminated after episode step ' + str(i + 1)) break self.results.time.add(time.time() - epoch_start) self.results.update_reward_results(reward, discounted_reward) # Pretty Print results # print_divider('large') solver.history.show() self.results.show(epoch) console( 3, module, 'Total possible undiscounted return: ' + str(self.model.get_max_undiscounted_return())) print_divider('medium') self.experiment_results.time.add(self.results.time.running_total) self.experiment_results.undiscounted_return.count += ( self.results.undiscounted_return.count - 1) self.experiment_results.undiscounted_return.add( self.results.undiscounted_return.running_total) self.experiment_results.discounted_return.count += ( self.results.discounted_return.count - 1) self.experiment_results.discounted_return.add( self.results.discounted_return.running_total) return eps
def reset_for_epoch(self): self.actual_cell_states = self.sample_cells() console(2, module, "Actual cell states = " + str(self.actual_cell_states))
def update(self, step_result, prune=True): """ Feed back the step result, updating the belief_tree, extending the history, updating particle sets, etc Advance the policy index to point to the next belief node in the episode :return: """ # Update the Simulator with the Step Result # This is important in case there are certain actions that change the state of the simulator self.model.update(step_result) child_belief_node = self.belief_tree_index.get_child(step_result.action, step_result.observation) # If the child_belief_node is None because the step result randomly produced a different observation, # grab any of the beliefs extending from the belief node's action node if child_belief_node is None: action_node = self.belief_tree_index.action_map.get_action_node(step_result.action) if action_node is None: # I grabbed a child belief node that doesn't have an action node. Use rollout from here on out. console(2, module, "Reached branch with no leaf nodes, using random rollout to finish the episode") print "Should not get here!" exit() self.disable_tree = True return obs_mapping_entries = list(action_node.observation_map.child_map.values()) for entry in obs_mapping_entries: if entry.child_node is not None: child_belief_node = entry.child_node console(2, module, "Had to grab nearest belief node...variance added") print "if get here, we need to think about this case!" exit() break # If the new root does not yet have the max possible number of particles add some more if child_belief_node.state_particles.__len__() < self.model.max_particle_count: num_to_add = self.model.max_particle_count - child_belief_node.state_particles.__len__() # Generate particles for the new root node child_belief_node.state_particles += self.model.generate_particles(self.belief_tree_index, step_result.action, step_result.observation, num_to_add, self.belief_tree_index.state_particles) # If that failed, attempt to create a new state particle set if child_belief_node.state_particles.__len__() == 0: print "you will not believe this ever becoming zero!" exit() child_belief_node.state_particles += self.model.generate_particles_uninformed(self.belief_tree_index, step_result.action, step_result.observation, self.model.min_particle_count) # Failed to continue search- ran out of particles if child_belief_node is None or child_belief_node.state_particles.__len__() == 0: console(1, module, "Couldn't refill particles, must use random rollout to finish episode") exit() self.disable_tree = True return self.belief_tree_index = child_belief_node if prune: self.prune(self.belief_tree_index)
def run_pomcp(self, epoch, eps): epoch_start = time.time() # Create a new solver, includes UCB, belief tree stuff solver = self.solver_factory( self ) # first build a belief tree, belief tree has many state particles # Monte-Carlo start state, random sample a start state state = solver.belief_tree_index.sample_particle() reward = 0 discounted_reward = 0 discount = 1.0 #the process of change root of belief tree, can think of it as building history(build tree by choose action) for i in range( self.model.max_steps ): #max_steps Max num of steps per trial/episode/trajectory/epoch, 200 start_time = time.time() # action will be of type Discrete Action, choose action after planning # this is the pomcp planner (plan), for one belief state, simulate 500 times action = solver.select_eps_greedy_action( eps, start_time) #eps is tree depth ######################################################################## # update epsilon if eps > self.model.epsilon_minimum: eps *= self.model.epsilon_decay # this is the execution stage(act and sense), # choose the action and actually execute, get next state and observation step_result, is_legal = self.model.generate_step(state, action) reward += step_result.reward discounted_reward += discount * step_result.reward discount *= self.model.discount state = step_result.next_state print("inside step loop: {}".format(i)) # show the step result self.display_step_result(i, step_result) if not step_result.is_terminal or not is_legal: solver.update( step_result ) #update belief state and prune, the belief state tree gets changed # Extend the history sequence new_hist_entry = solver.history.add_entry() HistoryEntry.update_history_entry(new_hist_entry, step_result.reward, step_result.action, step_result.observation, step_result.next_state) if step_result.is_terminal or not is_legal: console(3, module, 'Terminated after episode step ' + str(i + 1)) break self.results.time.add(time.time() - epoch_start) self.results.update_reward_results(reward, discounted_reward) # Pretty Print results # print_divider('large') solver.history.show() self.results.show(epoch) console( 3, module, 'Total possible undiscounted return: ' + str(self.model.get_max_undiscounted_return())) print_divider('medium') self.experiment_results.time.add(self.results.time.running_total) self.experiment_results.undiscounted_return.count += ( self.results.undiscounted_return.count - 1) self.experiment_results.undiscounted_return.add( self.results.undiscounted_return.running_total) self.experiment_results.discounted_return.count += ( self.results.discounted_return.count - 1) self.experiment_results.discounted_return.add( self.results.discounted_return.running_total) return eps
def traverse(self, belief_node, tree_depth, start_time): delayed_reward = 0 state = belief_node.sample_particle() # Time expired if time.time() - start_time > self.model.action_selection_timeout: console(4, module, "action selection timeout") return 0 action = ucb_action(self, belief_node, False) # Search horizon reached if tree_depth >= self.model.max_depth: console(4, module, "Search horizon reached") return 0 step_result, is_legal = self.model.generate_step(state, action) # if belief_node->action_node child->belief_node child exists # copy all the data from belief_node to the (a,o) child belief node # print "simulate: action=", action.bin_number, " obs=", step_result.observation.is_good, "total visit=", belief_node.action_map.total_visit_count, "depth=", belief_node.depth child_belief_node = belief_node.child(action, step_result.observation) # grow the belief tree by constructing the new child_belief_node if child_belief_node is None and not step_result.is_terminal and belief_node.visited: child_belief_node, added = belief_node.create_or_get_child( action, step_result.observation) if not step_result.is_terminal or not is_legal: if child_belief_node is not None: tree_depth += 1 # Add S' to the new belief node # Add a state particle with the new state if child_belief_node.state_particles.__len__( ) < self.model.max_particle_count: child_belief_node.state_particles.append( step_result.next_state) delayed_reward = self.traverse(child_belief_node, tree_depth, start_time) else: delayed_reward = self.rollout_from_state(state) belief_node.visited = True # total_reward = step_result.reward + (self.model.discount * delayed_reward) # return total_reward else: console(4, module, "Reached terminal state.") # delayed_reward is "Q maximal" # current_q_value is the Q value of the current belief-action pair action_mapping_entry = belief_node.action_map.get_entry( action.bin_number) q_value = action_mapping_entry.mean_q_value # off-policy Q learning update rule q_value += (step_result.reward + (self.model.discount * delayed_reward) - q_value) action_mapping_entry.update_visit_count(1) action_mapping_entry.update_q_value(q_value) #off_policy Q learning update max_q_value = -np.inf for action_entry in belief_node.action_map.entries.values(): if action_entry.mean_q_value > max_q_value: max_q_value = action_entry.mean_q_value # Add RAVE ? return max_q_value
def run_pomcp(self, epoch, eps): epoch_start = time.time() # Create a new solver solver = self.solver_factory(self) # Monte-Carlo start state state = solver.belief_tree_index.sample_particle() # NOTE: rock example specific self.model.set_rock_states(state) reward = 0 discounted_reward = 0 discount = 1.0 solver.show_current_belief() for i in range(self.model.max_steps): start_time = time.time() # action will be of type Discrete Action action = solver.select_eps_greedy_action(eps, start_time) # print("selected action : " + str(action.bin_number)) # raw_input("Press Enter to continue...") # update epsilon if eps > self.model.epsilon_minimum: eps *= self.model.epsilon_decay step_result, is_legal = self.model.generate_step(state, action) reward += step_result.reward discounted_reward += discount * step_result.reward discount *= self.model.discount state = step_result.next_state # show the step result self.display_step_result(i, step_result) if not step_result.is_terminal or not is_legal: # prune the tree and augment the child belief node to proceed with enough particles that match the current (a,o) solver.update(step_result) solver.show_current_belief() # Extend the history sequence new_hist_entry = solver.history.add_entry() HistoryEntry.update_history_entry(new_hist_entry, step_result.reward, step_result.action, step_result.observation, step_result.next_state) if step_result.is_terminal or not is_legal: console(3, module, 'Terminated after episode step ' + str(i + 1)) break self.results.time.add(time.time() - epoch_start) self.results.update_reward_results(reward, discounted_reward) # Pretty Print results # print_divider('large') solver.history.show() self.results.show(epoch) console(3, module, 'Total possible undiscounted return: ' + str(self.model.get_max_undiscounted_return())) print_divider('medium') self.experiment_results.time.add(self.results.time.running_total) self.experiment_results.undiscounted_return.count += (self.results.undiscounted_return.count - 1) self.experiment_results.undiscounted_return.add(self.results.undiscounted_return.running_total) self.experiment_results.discounted_return.count += (self.results.discounted_return.count - 1) self.experiment_results.discounted_return.add(self.results.discounted_return.running_total) return eps
def discounted_return(self): """ Encapsulates logging and begins the runs :return: """ console(2, module, "Main runs") self.logger.info( "Simulations\tRuns\tUndiscounted Return\tUndiscounted Error\t" + "\tDiscounted Return\tDiscounted Error\tTime") self.multi_run() console(2, module, "Simulations = " + str(self.model.sys_cfg["num_sims"])) console(2, module, "Runs = " + str(self.results.time.count)) console( 2, module, "Undiscounted Return = " + str(self.results.undiscounted_return.mean) + " +- " + str(self.results.undiscounted_return.std_err())) console( 2, module, "Discounted Return = " + str(self.results.discounted_return.mean) + " +- " + str(self.results.discounted_return.std_err())) console(2, module, "Time = " + str(self.results.time.mean)) self.logger.info( str(self.model.sys_cfg["num_sims"]) + '\t' + str(self.results.time.count) + '\t' + '\t' + str(self.results.undiscounted_return.mean) + '\t' + str(self.results.undiscounted_return.std_err()) + '\t' + '\t' + str(self.results.discounted_return.mean) + '\t' + str(self.results.discounted_return.std_err()) + '\t' + '\t' + str(self.results.time.mean))
def set_rock_states(self, state): self.actual_rock_states = state.rock_states console(2, module, "Actual rock states = " + str(self.actual_rock_states))