def __init__(self, rollout_actions_length, environment, mutation_probability, num_pop, use_shift_buffer=False, flip_at_least_one=True, discount_factor=1, ignore_frames=0, do_ns=False, path=''): self._logger = logging.getLogger('RHEA') self._rollout_actions_length = rollout_actions_length self._environment = MazeSimulator(render=True, xml_file=path) self.path = path self._environment.env.robot = copy.deepcopy(environment.env.robot) self.environment = environment self._use_shift_buffer = use_shift_buffer self._flip_at_least_one = flip_at_least_one self._mutation_probability = mutation_probability self._discount_factor = discount_factor self.num_pop = num_pop self._ignore_frames = ignore_frames self.best_solution_val = -99999 self.best_solution = None self.cur_best_solution_val = -99999 self.cur_best_solution = None self.cur_best_novelty = -99999 self.cur_best_novelty_sol = None self.history = [] self.ns = ns.NoveltySearch() self.old_pop = [] self.ns = ns.NoveltySearch(behavior_type='ad_hoc') #self.ns = ns.NoveltySearch(behavior_type='trajectory') #self.ns = ns.NoveltySearch(behavior_type='hamming') #self.ns = ns.NoveltySearch(behavior_type='entropy') #self.ns = ns.NoveltySearch(behavior_switch=True) self.ea = ea.EA('default') self.do_ns = do_ns #self.behv_state =copy.deepcopy(environment) #self.behv_last_visit =copy.deepcopy(environment) #self.behv_rewards =copy.deepcopy(environment) self.playout_count = 0 # Initialize the solution to a random sequence if self._use_shift_buffer: self._solution = self._random_solution() self.tree = {} self.store_tree = False
def __init__(self, max_depth=1000, playouts=10000, do_ns=False): self.max_depth = max_depth self.playouts = playouts self.ns = ns.NoveltySearch() self.do_ns = do_ns self.cycle = 0
def __init__(self, max_depth=1000, playouts=10000,do_ns=False): self.max_depth = max_depth self.playouts = playouts self.ns = ns.NoveltySearch(behavior_type='ad_hoc') #self.ns = ns.NoveltySearch(behavior_type='trajectory') #self.ns = ns.NoveltySearch(behavior_type='hamming') #self.ns = ns.NoveltySearch(behavior_type='entropy') #self.ns = ns.NoveltySearch(behavior_switch=True) self.do_ns =do_ns self.cycle =0 self.e=1 self.decay=1
def __init__(self, max_depth=1000, playouts=10000, do_ns=False, sel_type='ucb', update_type='amaf'): self.max_depth = max_depth self.playouts = playouts #self.ns = ns.NoveltySearch(behavior_type='ad_hoc') self.ns = ns.NoveltySearch(behavior_type='trajectory') #self.ns = ns.NoveltySearch(behavior_type='hamming') #self.ns = ns.NoveltySearch(behavior_type='entropy') #self.ns = ns.NoveltySearch(behavior_switch=True) self.do_ns = do_ns self.cycle = 0 self.e = 1 self.decay = 0.99995 self.sel_type = sel_type self.update_type = update_type
import random from deap import creator, base, tools, algorithms import ns import maze import copy import numpy as np env = maze.Maze(1) n_s = ns.NoveltySearch(behavior_type='ad_hoc') playout_count = 0 dummy_count = 0 behv_state = copy.deepcopy(env) behv_last_visit = copy.deepcopy(env) behv_rewards = copy.deepcopy(env) behv_acc_visit = copy.deepcopy(env) def print_debug_states(): global behv_state global behv_last_visit global behv_rewards global behv_acc_visit print('accumulative end positions of generation:') print(behv_state.render()) print('accumulative end positions overall') print(behv_acc_visit.render()) print('last visited places') print(behv_last_visit.render())