예제 #1
0
파일: rhea.py 프로젝트: abcp4/SFPNovelty
    def __init__(self,
                 rollout_actions_length,
                 environment,
                 mutation_probability,
                 num_pop,
                 use_shift_buffer=False,
                 flip_at_least_one=True,
                 discount_factor=1,
                 ignore_frames=0,
                 do_ns=False,
                 path=''):

        self._logger = logging.getLogger('RHEA')

        self._rollout_actions_length = rollout_actions_length
        self._environment = MazeSimulator(render=True, xml_file=path)
        self.path = path
        self._environment.env.robot = copy.deepcopy(environment.env.robot)
        self.environment = environment
        self._use_shift_buffer = use_shift_buffer
        self._flip_at_least_one = flip_at_least_one
        self._mutation_probability = mutation_probability
        self._discount_factor = discount_factor
        self.num_pop = num_pop
        self._ignore_frames = ignore_frames
        self.best_solution_val = -99999
        self.best_solution = None
        self.cur_best_solution_val = -99999
        self.cur_best_solution = None
        self.cur_best_novelty = -99999
        self.cur_best_novelty_sol = None

        self.history = []
        self.ns = ns.NoveltySearch()
        self.old_pop = []
        self.ns = ns.NoveltySearch(behavior_type='ad_hoc')
        #self.ns = ns.NoveltySearch(behavior_type='trajectory')
        #self.ns = ns.NoveltySearch(behavior_type='hamming')
        #self.ns = ns.NoveltySearch(behavior_type='entropy')
        #self.ns = ns.NoveltySearch(behavior_switch=True)
        self.ea = ea.EA('default')
        self.do_ns = do_ns
        #self.behv_state =copy.deepcopy(environment)
        #self.behv_last_visit =copy.deepcopy(environment)
        #self.behv_rewards =copy.deepcopy(environment)
        self.playout_count = 0

        # Initialize the solution to a random sequence
        if self._use_shift_buffer:
            self._solution = self._random_solution()

        self.tree = {}
        self.store_tree = False
예제 #2
0
파일: mo_mcts.py 프로젝트: abcp4/SFPNovelty
    def __init__(self, max_depth=1000, playouts=10000, do_ns=False):

        self.max_depth = max_depth
        self.playouts = playouts
        self.ns = ns.NoveltySearch()
        self.do_ns = do_ns
        self.cycle = 0
예제 #3
0
    def __init__(self,  max_depth=1000, playouts=10000,do_ns=False):

        self.max_depth = max_depth
        self.playouts = playouts
        self.ns = ns.NoveltySearch(behavior_type='ad_hoc')
        #self.ns = ns.NoveltySearch(behavior_type='trajectory')
        #self.ns = ns.NoveltySearch(behavior_type='hamming')
        #self.ns = ns.NoveltySearch(behavior_type='entropy')
        #self.ns = ns.NoveltySearch(behavior_switch=True)
        self.do_ns =do_ns
        self.cycle =0
        self.e=1
        self.decay=1
예제 #4
0
    def __init__(self,
                 max_depth=1000,
                 playouts=10000,
                 do_ns=False,
                 sel_type='ucb',
                 update_type='amaf'):

        self.max_depth = max_depth
        self.playouts = playouts
        #self.ns = ns.NoveltySearch(behavior_type='ad_hoc')
        self.ns = ns.NoveltySearch(behavior_type='trajectory')
        #self.ns = ns.NoveltySearch(behavior_type='hamming')
        #self.ns = ns.NoveltySearch(behavior_type='entropy')
        #self.ns = ns.NoveltySearch(behavior_switch=True)
        self.do_ns = do_ns
        self.cycle = 0
        self.e = 1
        self.decay = 0.99995
        self.sel_type = sel_type
        self.update_type = update_type
예제 #5
0
import random
from deap import creator, base, tools, algorithms
import ns
import maze
import copy
import numpy as np

env = maze.Maze(1)
n_s = ns.NoveltySearch(behavior_type='ad_hoc')
playout_count = 0
dummy_count = 0
behv_state = copy.deepcopy(env)
behv_last_visit = copy.deepcopy(env)
behv_rewards = copy.deepcopy(env)
behv_acc_visit = copy.deepcopy(env)


def print_debug_states():

    global behv_state
    global behv_last_visit
    global behv_rewards
    global behv_acc_visit
    print('accumulative end positions of generation:')
    print(behv_state.render())

    print('accumulative end positions overall')
    print(behv_acc_visit.render())

    print('last visited places')
    print(behv_last_visit.render())