class AgentController:
    def __init__(self):
        self.agent = CnnDqnAgent()
        self.agent_initialized = False
        self.cycle_counter = 0
        self.log_file = 'reward.log'
        self.reward_sum = 0
        # press, up, down, left, right, none
        self.commands = ["none", "none", "none", "none", "none"]

    def get_commands(self):
        return self.commands

    def set_commands_from_action(self, action):
        command_candidate = ["press", "up", "down", "right", "left", "none"]
        self.commands = [command_candidate[a] for a in action]

    def update(self, message):
        image = message["image"]
        pad_states = message["pad_states"]
        end_episode = message['end_episode']
        observation = {"image": image, "pad_states": pad_states}
        reward = message['reward']

        if not self.agent_initialized:
            self.agent_initialized = True
            print("initializing agent......")
            self.agent.agent_init(use_gpu=args.gpu,
                                  pad_states_dim=len(pad_states))

            action = self.agent.agent_start(observation)
            self.set_commands_from_action(action)
            with open(self.log_file, 'w') as the_file:
                the_file.write('cycle, episode_reward_sum \n')
        else:
            self.cycle_counter += 1
            self.reward_sum += reward

            if end_episode:
                self.agent.agent_end(reward)
                with open(self.log_file, 'a') as the_file:
                    the_file.write(
                        str(self.cycle_counter) + ',' + str(self.reward_sum) +
                        '\n')
                self.reward_sum = 0
            else:
                action, eps, q_now, obs_array = self.agent.agent_step(
                    reward, observation)
                self.set_commands_from_action(action)
                self.agent.agent_step_update(reward, action, eps, q_now,
                                             obs_array)
Exemple #2
0
agent = CnnDqnAgent()
agent_initialized = False
cycle_counter = 0
thread_event = threading.Event()
log_file = args.log_file
reward_sum = 0
depth_image_dim = 32 * 32
depth_image_count = 1
total_episode = 10000
episode_count = 0

while episode_count <= total_episode:
    if not agent_initialized:
        agent_initialized = True
        print("initializing agent...")
        agent.agent_init(use_gpu=args.gpu,
                         depth_image_dim=depth_image_dim * depth_image_count)

        env = gym.make('Lis-v2')

        observation, _, _ = env.step(
            env.action_space.sample())  #最初は画像がないのでランダムアクションを送る ー,①
        action = agent.agent_start(observation)  #オブザベーションをもらったのでエージェントスタート
        observation, reward, end_episode = env.step(action)  #アクション決定、送信 ⑴、②

        with open(log_file, 'w') as the_file:
            the_file.write('cycle, episode_reward_sum \n')
    else:
        thread_event.wait()
        cycle_counter += 1  #報酬計算
        reward_sum += reward
Exemple #3
0
def agent_process(gpu_id, log_file, q_from_parent, q_to_parent):
    # initialization
    depth_image_dim = 32 * 32
    depth_image_count = 1

    has_started = False
    cycle_counter = 0
    reward_sum = 0
    agent = CnnDqnAgent()

    print("initializing agent...")
    agent.agent_init(
        use_gpu=gpu_id,
        depth_image_dim=depth_image_dim * depth_image_count,
    )

    with open(log_file, 'w') as the_file:
        the_file.write('cycle, episode_reward_sum \n')

    # step
    byte_data = q_from_parent.get()
    while not byte_data is None:
        #try:
        # data extraction
        dat = msgpack.unpackb(byte_data)
        image = [
            Image.open(io.BytesIO(bytearray(dat[b'image'][i])))
            for i in range(depth_image_count)
        ]
        depth = [
            np.array(
                ImageOps.grayscale(
                    Image.open(io.BytesIO(bytearray(
                        dat[b'depth'][i]))))).reshape(depth_image_dim)
            for i in range(depth_image_count)
        ]
        observation = {"image": image, "depth": depth}
        reward = dat[b'reward']
        end_episode = dat[b'endEpisode']

        # action-making
        ret = None
        if not has_started:
            has_started = True
            ret = agent.agent_start(observation)
        else:
            cycle_counter += 1
            reward_sum += reward

            if end_episode:
                agent.agent_end(reward)
                with open(log_file, 'a') as the_file:
                    the_file.write('%d, %f\n' % (cycle_counter, reward_sum))
                reward_sum = 0

                ret = agent.agent_start(observation)
            else:
                action, eps, q_now, new_feature_vec, deg_interest = agent.agent_step(
                    reward, observation)
                agent.agent_step_update(reward, action, eps, q_now,
                                        new_feature_vec, deg_interest)
                ret = (action, deg_interest)

        q_to_parent.put(ret)
        #except Exception as e:
        #print(e)
        #q_to_parent.put(None)
        #raise e
        byte_data = q_from_parent.get()
Exemple #4
0
class Agent:
    agent_initialized = False
    ga = GeneGenerator()  # add Naka
    agent_id = -1  # add Naka
    cycle_counter = 0
    thread_event = threading.Event()
    reward_sum = 0
    depth_image_dim = 32 * 32
    depth_image_count = 1
    gene_count = 3  # Number of gene (add Naka)
    scale_x = 1
    scale_y = 1
    scale_z = 1

    def __init__(self, args):
        print "start to load cnn model"
        self.args = args
        self.cnnDqnAgent = CnnDqnAgent(use_gpu=self.args.gpu,
                                       depth_image_dim=self.depth_image_dim *
                                       self.depth_image_count,
                                       agent_id=self.agent_id)
        print 'finish loading cnn model'
        self.cnnDqnAgent.agent_init()
        print 'finish init cnn dqn agent'

    def received_message(self, agentServer, dat):
        image = []
        for i in xrange(self.depth_image_count):
            image.append(Image.open(io.BytesIO(bytearray(dat['image'][i]))))
        depth = []
        for i in xrange(self.depth_image_count):
            d = (Image.open(io.BytesIO(bytearray(dat['depth'][i]))))
            depth.append(
                np.array(ImageOps.grayscale(d)).reshape(self.depth_image_dim))

        observation = {
            "image": image,
            "depth": depth,
            "scale": [dat['x_s'], dat['y_s'], dat['z_s']]
        }

        self.scale_x = dat['x_s']
        self.scale_y = dat['y_s']
        self.scale_z = dat['z_s']

        # print 'scale'
        # print observation['scale']

        gene = []  # add Naka
        for i in xrange(len(dat['gene'])):
            gene.append(dat['gene'][i])
        reward = dat['reward']
        rewards = dat['rewards']  # add Naka
        self.agent_id = dat['agent_id']  # add Naka
        end_episode = dat['endEpisode']

        if not self.agent_initialized:
            print 'connected and agent started..'
            self.agent_initialized = True
            action = self.cnnDqnAgent.agent_start(observation)
            agentServer.send_action(action)
            if not os.path.exists(self.args.log_file):
                with open(self.args.log_file, 'w') as the_file:
                    the_file.write('cycle, episode_reward_sum \n')
        else:
            self.thread_event.wait()
            self.cycle_counter += 1
            self.reward_sum += reward

            if end_episode:
                self.cnnDqnAgent.agent_end(reward, self.agent_id)
                action = self.cnnDqnAgent.agent_start(observation)  # TODO
                self.gene = self.ga.gene_updater(gene, rewards)  # add Naka
                print self.agent_id, self.gene
                agentServer.send_actionAndgene(
                    action, self.gene[self.agent_id])  # add Naka
                with open(self.args.log_file, 'a') as the_file:
                    the_file.write(
                        str(self.cycle_counter) + ',' + str(self.reward_sum) +
                        ',' + str(self.scale_x) + ',' + str(self.scale_y) +
                        ',' + str(self.scale_z) + '\n')
                self.reward_sum = 0
            else:
                action, eps, obs_array = self.cnnDqnAgent.agent_step(
                    reward, observation)
                agentServer.send_action(action)
                self.cnnDqnAgent.agent_step_update(reward, action, eps,
                                                   obs_array, self.agent_id)

        self.thread_event.set()