Beispiel #1
0
    def gather(self):
        for i in range(self.levelCount):
            level_name = self.levels[i]

            print('gathering from ' + level_name)

            delay = self.gather_delay[i]
            args = copy.deepcopy(self.args)
            args.memory_delay = delay
            args.directory = 'gather_' + level_name

            agent, hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental_init(
                args, agent_a3c.Agent, self.state_dim, self.action_dim)

            while True:
                hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental(
                    agent, self.env, self.state_dim, self.action_dim,
                    hasSavedMemory, max_frame_saved)
                if hasSavedMemory:
                    break

            agent.metrics.save(agent.results_location,
                               'metrics')  # Save metrics
            agent.metrics.runs.graph(agent.results_location, 'runs')
            agent.metrics = Metrics(agent.metrics.type)  # Reset metrics
            agent.brain.metrics = agent.metrics

            print('switching levels')
            # Switch to next level
            self.env.env.env.press('right_arrow')
            time.sleep(0.1)
            self.env.env.env.release('right_arrow')
            time.sleep(0.1)

        print('all done')
Beispiel #2
0
    def incremental_learn(self, args, levels_list):
        args = copy.deepcopy(args)
        agent, hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental_init(
            args, agent_a3c.Agent, self.state_dim, self.action_dim)
        length = len(levels_list)
        idxs = []
        idx_diff = []
        for i in range(length):
            idxs.append(self.levels.index(levels_list[i]))
        idx_diff.append((idxs[0] - self.curlevel) % self.levelCount)
        for i in range(1, length):
            idx_diff.append((idxs[i] - idxs[i - 1]) % self.levelCount)

        print(idxs)
        print(idx_diff)

        print('Switching Levels Initial')
        for i in range(idx_diff[0]):
            self.env.env.env.press('right_arrow')
            time.sleep(0.1)
            self.env.env.env.release('right_arrow')
            time.sleep(0.1)
        self.curlevel = idxs[0]

        for i in range(length):
            time_start = time.time()
            while True:
                hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental(
                    agent, self.env, self.state_dim, self.action_dim,
                    hasSavedMemory, max_frame_saved)
                if time.time() - time_start > break_time:
                    break
            if i != length - 1:
                print('Switching Levels')
                for j in range(idx_diff[i + 1]):
                    self.env.env.env.press('right_arrow')
                    time.sleep(0.1)
                    self.env.env.env.release('right_arrow')
                    time.sleep(0.1)
                self.curlevel = idxs[i + 1]

            save_weights(agent, 'id_' + str(i))  # Save weights
            agent.metrics.save(agent.results_location,
                               'metrics_id_' + str(i))  # Save metrics
            agent.metrics.runs.graph(agent.results_location,
                                     'runs_id_' + str(i))
            agent.metrics = Metrics(agent.metrics.type)  # Reset metrics
            agent.brain.metrics = agent.metrics

        agent.brain.init_vars()  # Reset network
Beispiel #3
0
    def run_job(self, job):
        args = job.args
        level = job.level

        self.controller.switch(level)

        agent, hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental_init(args, agent_a3c.Agent,
                                                                                     self.state_dim, self.action_dim)


        time_start = time.time()
        while True:
            hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental(agent, self.env, self.state_dim,
                                                                             self.action_dim, hasSavedMemory,
                                                                             max_frame_saved)
            if agent.args.mode == 'gather':
                if hasSavedMemory:
                    break
            elif time.time() - time_start > self.break_time:
                break

        save_weights(agent, 'end') # Save weights
        agent.metrics.save(agent.results_location, 'metrics_end') # Save metrics
        agent.metrics.runs.graph(agent.results_location, 'runs_end')
        agent.metrics = Metrics(agent.metrics.type) # Reset metrics
        agent.brain.metrics = agent.metrics
        agent.brain.init_vars() # Reset network

        if agent.args.mode != 'gather':
            directory = agent.results_location
            allfiles = listdir(directory)
            onlyfiles = [f for f in allfiles if isfile(join(directory, f))]
            frame = [f for f in onlyfiles if 'model_frame' in f and '.h5' in f]
            frame_time = [int(f[12:-3]) for f in frame]
            if frame_time == []:
                max_file_name = 'model_end.h5'
            else:
                max_file_name = frame[np.argmax(frame_time)]

            max_file = join(directory,max_file_name)

            src = max_file
            dst = join(directory, 'model_max.h5')
            shutil.copyfile(src, dst)
Beispiel #4
0
    def __init__(self, args, state_dim, action_dim, modelFunc=None):
        print(state_dim)
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.h = args.hyper
        self.metrics = Metrics()
        self.memory = Memory(self.h.memory_size, self.state_dim, 1)
        self.brain = Brain(self, modelFunc)
        self.args = args
        self.epsilon = self.h.epsilon_init

        self.run_count = -1
        self.replay_count = -1
        self.save_iterator = -1
        self.update_iterator = -1
        self.mode = 'observe'

        load_weights(self)
        self.brain.update_target_model()
Beispiel #5
0
    def __init__(self, args, state_dim, action_dim):
        self.h = args.hyper
        self.mode = 'observe'
        self.args = args
        self.metrics = Metrics()
        self.action_dim = action_dim
        self.state_dim = state_dim
        self.memory = Memory(self.h.memory_size)
        self.run_count = -1
        self.replay_count = -1
        self.save_iterator = -1
        self.update_iterator = -1

        if self.args.directory == 'default':
            self.args.directory = G.CUR_FOLDER

        results_location = G.RESULT_FOLDER_FULL + '/' + self.args.directory
        data_location = G.DATA_FOLDER_FULL + '/' + self.args.directory
        os.makedirs(results_location,exist_ok=True)  # Generates results folder
        os.makedirs(data_location,exist_ok=True)  # Generates data folder
        self.results_location = results_location + '/'
        self.data_location = data_location + '/'
Beispiel #6
0
    def __init__(self,
                 args,
                 state_dim,
                 action_dim,
                 model_func=None,
                 visualization=False,
                 brain=None,
                 idx=0):
        self.idx = idx
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.args = args
        self.h = self.args.hyper
        self.epsilon = self.h.epsilon_init
        self.h.gamma_n = self.h.gamma**self.h.memory_size
        self.run_count = -1
        self.replay_count = -1
        self.save_iterator = -1
        self.update_iterator = -1
        self.mode = 'train'
        self.R = 0
        self.visualization = visualization

        self.metrics = Metrics()
        self.memory = Memory(self.h.memory_size, self.state_dim, 1)
        if not brain:
            self.brain = Brain(self, model_func)

        else:
            self.brain = brain

        self.brain.init_model()
        load_weights(self)
        if self.args.env.problem != 'Hexagon':
            self.brain.finalize_model()

        save_class(self.args, self.data_location + 'args')
Beispiel #7
0
    def gather(self):
        idxs = []
        idx_diff = []
        for i in range(self.levelCount):
            idxs.append(self.levels.index(self.levelpairs[i][1]))
        idx_diff.append((idxs[0] - self.curlevel) % self.levelCount)
        for i in range(1, self.levelCount):
            idx_diff.append((idxs[i] - idxs[i - 1]) % self.levelCount)

        print(idxs)
        print(idx_diff)

        print('Switching Levels Initial')
        for i in range(idx_diff[0]):
            self.env.env.env.press('right_arrow')
            time.sleep(0.1)
            self.env.env.env.release('right_arrow')
            time.sleep(0.1)
        self.curlevel = idxs[0]

        for i in range(self.levelCount):
            #level_name = self.levels[i]
            trained_index = self.levels.index(self.levelpairs[i][0])
            training_index = self.levels.index(self.levelpairs[i][1])
            level_name = self.levels[training_index]
            print('gathering from ' + level_name)

            delay = self.gather_delay[training_index]
            args = copy.deepcopy(self.args)
            args.memory_delay = delay
            args.directory = self.directory[0]  #self.directory[trained_index]
            args.weight_override = self.weights[i]

            agent, hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental_init(
                args, agent_a3c.Agent, self.state_dim, self.action_dim)

            while True:
                hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental(
                    agent, self.env, self.state_dim, self.action_dim,
                    hasSavedMemory, max_frame_saved)
                if hasSavedMemory:
                    break

            agent.metrics.save(agent.results_location,
                               'metrics')  # Save metrics
            agent.metrics.runs.graph(agent.results_location, 'runs')
            agent.metrics = Metrics(agent.metrics.type)  # Reset metrics
            agent.brain.metrics = agent.metrics

            #print('switching levels')
            # Switch to next level
            #$self.env.env.env.press('right_arrow')
            #time.sleep(0.1)
            #self.env.env.env.release('right_arrow')
            #time.sleep(0.1)

            if i != self.levelCount - 1:
                print('Switching Levels')
                for j in range(idx_diff[i + 1]):
                    self.env.env.env.press('right_arrow')
                    time.sleep(0.1)
                    self.env.env.env.release('right_arrow')
                    time.sleep(0.1)
                self.curlevel = idxs[i + 1]

        print('all done')