Beispiel #1
0
def play_game_gym_ddqn(args, agent_func):
    if args.hyper.img_channels > 1:
        env = EnvironmentGymRgb(args.env)
        state_dim = env.env.state_dim + [args.hyper.img_channels]
    else:
        env = EnvironmentGym(args.env)
        state_dim = env.env.state_dim
    action_dim = env.env.action_dim

    agent = agent_func(args, state_dim, action_dim,
                       getattr(models, args.model))

    iteration = 0
    while True:
        iteration += 1

        R, use_rate = env.run(agent)

        if agent.memory.total_saved > agent.h.extra.observe:
            if agent.mode == 'observe':
                agent.mode = 'train'
                print('Training...')
                time.sleep(0.5)

        if agent.mode == 'train':
            if iteration % 10 == 0:
                print("Step:", agent.memory.total_saved, ", Total reward:", R)

        # agent.metrics.display_metrics(frame, use_rate, agent.memory.total_saved, agent.epsilon)

        if agent.h.save_rate < agent.save_iterator:
            agent.save_iterator -= agent.h.save_rate
            save_weights(agent)
Beispiel #2
0
def play_game_real_ddqn(args, agent_func, screen_number=0, screen_id=-1):

    img_channels = args.hyper.img_channels
    env = EnvironmentRealtime(args.env)
    action_dim = env.env.action_dim()
    state_dim = list(env.env.state_dim()) + [img_channels]

    agent = agent_func(args, state_dim, action_dim,
                       getattr(models, args.hyper.model))

    if args.data:
        # Load Memory
        load_memory_v2(agent, args.data)

        agent.mode = 'train'
        loaded_replays = int(agent.memory.size)
        print('Running', loaded_replays, 'replays')
        # Train on loaded memory
        for i in range(loaded_replays):
            agent.update_agent()
            if i % 1000 == 0:
                print(i, '/', loaded_replays, 'replays learned')
            if i % 100 == 0:
                agent.replay(debug=True)
            else:
                agent.replay(debug=False)

        agent.save_weights()

    time.sleep(1)

    while True:
        frame, use_rate, frame_saved = env.run(agent)

        agent.metrics.display_metrics(frame, use_rate,
                                      agent.memory.total_saved, agent.epsilon)

        if agent.mode == 'train':
            print('Running', frame_saved, 'replays')
            for i in range(frame_saved):
                if i % 100 == 0:
                    agent.replay(debug=True)
                else:
                    agent.replay(debug=False)

        if agent.h.save_rate < agent.save_iterator:
            agent.save_iterator -= agent.h.save_rate
            save_weights(agent)
            if agent.mode == 'train':  # Fix this later, not correct
                agent.metrics.save(agent.results_location, 'metrics')
                agent.metrics.runs.graph(agent.results_location)
                agent.metrics.save_metrics_training(agent.results_location)
Beispiel #3
0
    def incremental_learn(self, args, levels_list):
        args = copy.deepcopy(args)
        agent, hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental_init(
            args, agent_a3c.Agent, self.state_dim, self.action_dim)
        length = len(levels_list)
        idxs = []
        idx_diff = []
        for i in range(length):
            idxs.append(self.levels.index(levels_list[i]))
        idx_diff.append((idxs[0] - self.curlevel) % self.levelCount)
        for i in range(1, length):
            idx_diff.append((idxs[i] - idxs[i - 1]) % self.levelCount)

        print(idxs)
        print(idx_diff)

        print('Switching Levels Initial')
        for i in range(idx_diff[0]):
            self.env.env.env.press('right_arrow')
            time.sleep(0.1)
            self.env.env.env.release('right_arrow')
            time.sleep(0.1)
        self.curlevel = idxs[0]

        for i in range(length):
            time_start = time.time()
            while True:
                hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental(
                    agent, self.env, self.state_dim, self.action_dim,
                    hasSavedMemory, max_frame_saved)
                if time.time() - time_start > break_time:
                    break
            if i != length - 1:
                print('Switching Levels')
                for j in range(idx_diff[i + 1]):
                    self.env.env.env.press('right_arrow')
                    time.sleep(0.1)
                    self.env.env.env.release('right_arrow')
                    time.sleep(0.1)
                self.curlevel = idxs[i + 1]

            save_weights(agent, 'id_' + str(i))  # Save weights
            agent.metrics.save(agent.results_location,
                               'metrics_id_' + str(i))  # Save metrics
            agent.metrics.runs.graph(agent.results_location,
                                     'runs_id_' + str(i))
            agent.metrics = Metrics(agent.metrics.type)  # Reset metrics
            agent.brain.metrics = agent.metrics

        agent.brain.init_vars()  # Reset network
Beispiel #4
0
def play_game_real_a3c_incremental(agent, env, state_dim, action_dim,
                                   has_saved_memory, max_frame_saved):

    pointer_start = agent.brain.brain_memory.cur_index + 0
    frame, use_rate, frame_saved = env.run(agent)
    pointer_end = agent.brain.brain_memory.cur_index + 0
    agent.metrics.display_metrics(frame, use_rate, agent.memory.total_saved,
                                  agent.epsilon)

    if frame_saved > max_frame_saved:
        print('New max time!')
        max_frame_saved = frame_saved

        save_memory_subset(agent,
                           pointer_start,
                           pointer_end,
                           frame_saved,
                           skip=1)
        save_weights(agent, 'frame_' + str(frame_saved))

    if agent.h.save_rate < agent.save_iterator:
        agent.save_iterator -= agent.h.save_rate
        if agent.args.mode != 'gather':
            save_weights(agent, agent.run_count)
            agent.metrics.save(agent.results_location, 'metrics')
            agent.metrics.runs.graph(agent.results_location)

        # agent.metrics.save_metrics_v(agent.results_location)
        # agent.metrics.a3c.graph_all(agent.results_location)
        # agent.metrics.save_metrics_training(agent.results_location)

    if agent.brain.brain_memory.isFull and has_saved_memory is False:
        has_saved_memory = True
        save_memory_v2(agent)

    frame_saved = int(frame_saved)
    if frame_saved > 3000:
        frame_saved = 3000
    if frame_saved < 300:
        frame_saved = 300
    batch_count = int(90000 / frame_saved)
    batch_count = 15  # 75
    if agent.brain.brain_memory.isFull:
        if agent.args.mode != 'gather' and agent.args.mode != 'run':
            agent.brain.optimize_batch(batch_count)

    return has_saved_memory, max_frame_saved
Beispiel #5
0
def play_game_gym_a3c_multithread(agent, env):
    iteration = 0
    while True:
        iteration += 1

        R, use_rate = env.run(agent)

        if agent.mode == 'train':
            if iteration % 10 == 0:
                print("Step:", agent.memory.total_saved, ", Total reward:", R,
                      "idx:", agent.idx)

        # agent.metrics.display_metrics(frame, use_rate, agent.memory.total_saved, agent.epsilon)

        if agent.idx == 0:
            if agent.h.save_rate < agent.save_iterator:
                agent.save_iterator -= agent.h.save_rate
                save_weights(agent, agent.run_count)
Beispiel #6
0
    def run_job(self, job):
        args = job.args
        level = job.level

        self.controller.switch(level)

        agent, hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental_init(args, agent_a3c.Agent,
                                                                                     self.state_dim, self.action_dim)


        time_start = time.time()
        while True:
            hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental(agent, self.env, self.state_dim,
                                                                             self.action_dim, hasSavedMemory,
                                                                             max_frame_saved)
            if agent.args.mode == 'gather':
                if hasSavedMemory:
                    break
            elif time.time() - time_start > self.break_time:
                break

        save_weights(agent, 'end') # Save weights
        agent.metrics.save(agent.results_location, 'metrics_end') # Save metrics
        agent.metrics.runs.graph(agent.results_location, 'runs_end')
        agent.metrics = Metrics(agent.metrics.type) # Reset metrics
        agent.brain.metrics = agent.metrics
        agent.brain.init_vars() # Reset network

        if agent.args.mode != 'gather':
            directory = agent.results_location
            allfiles = listdir(directory)
            onlyfiles = [f for f in allfiles if isfile(join(directory, f))]
            frame = [f for f in onlyfiles if 'model_frame' in f and '.h5' in f]
            frame_time = [int(f[12:-3]) for f in frame]
            if frame_time == []:
                max_file_name = 'model_end.h5'
            else:
                max_file_name = frame[np.argmax(frame_time)]

            max_file = join(directory,max_file_name)

            src = max_file
            dst = join(directory, 'model_max.h5')
            shutil.copyfile(src, dst)
Beispiel #7
0
    def run(self):
        iteration = 0
        while not self.stop_signal:
            iteration += 1

            R, use_rate = self.env.run(self.agent)

            if self.agent.mode == 'train':
                if iteration % 10 == 0:
                    print("Step:", self.agent.memory.total_saved,
                          ", Total reward:", R, "idx:", self.agent.idx)

            # agent.metrics.display_metrics(frame, use_rate, agent.memory.total_saved, agent.epsilon)

            if self.agent.idx == 0:
                if self.agent.h.save_rate < self.agent.save_iterator:
                    self.agent.save_iterator -= self.agent.h.save_rate
                    save_weights(self.agent,
                                 self.agent.brain.brain_memory.total_saved)
Beispiel #8
0
def play_game_real_a3c(args, agent_func, screen_number=0, screen_id=-1):

    img_channels = args.hyper.img_channels
    env = EnvironmentRealtimeA3C(args.env)
    action_dim = env.env.action_dim()
    state_dim = list(env.env.state_dim()) + [img_channels]
    # env = Environment_realtime_a3c(emulator, img_channels)

    print(state_dim)
    print(action_dim)

    agent = agent_func(args, state_dim, action_dim,
                       getattr(models, args.model))

    has_saved_memory = False

    max_frame_saved = 300
    total_saved = 0
    while True:
        pointer_start = agent.brain.brain_memory.cur_index + 0
        frame, use_rate, frame_saved = env.run(agent)
        pointer_end = agent.brain.brain_memory.cur_index + 0
        agent.metrics.display_metrics(frame, use_rate,
                                      agent.memory.total_saved, agent.epsilon)

        if frame_saved > max_frame_saved:
            print('New max time!')
            max_frame_saved = frame_saved

            save_memory_subset(agent,
                               pointer_start,
                               pointer_end,
                               frame_saved,
                               skip=8)
            save_weights(agent, 'frame_' + str(frame_saved))

        if agent.h.save_rate < agent.save_iterator:
            agent.save_iterator -= agent.h.save_rate
            save_weights(agent, agent.run_count)
            agent.metrics.save(agent.results_location, 'metrics')
            agent.metrics.runs.graph(agent.results_location)
            # agent.metrics.save_metrics_v(agent.results_location)
            # agent.metrics.a3c.graph_all(agent.results_location)
            # agent.metrics.save_metrics_training(agent.results_location)

        if agent.brain.brain_memory.is_full and has_saved_memory is False:
            has_saved_memory = True
            save_memory_v2(agent)
            if agent.args.mode == 'gather':
                print('Finished Gathering Data')
                break
        frame_saved = int(frame_saved)
        if frame_saved > 1000:
            frame_saved = 1000
        if frame_saved < 300:
            frame_saved = 300
        if total_saved > 100000:
            frame_saved = int(frame_saved / 4)
        if agent.brain.brain_memory.is_full:
            total_saved += frame_saved
            agent.brain.optimize_batch(frame_saved)