class AgentController: def __init__(self): self.agent = CnnDqnAgent() self.agent_initialized = False self.cycle_counter = 0 self.log_file = 'reward.log' self.reward_sum = 0 # press, up, down, left, right, none self.commands = ["none", "none", "none", "none", "none"] def get_commands(self): return self.commands def set_commands_from_action(self, action): command_candidate = ["press", "up", "down", "right", "left", "none"] self.commands = [command_candidate[a] for a in action] def update(self, message): image = message["image"] pad_states = message["pad_states"] end_episode = message['end_episode'] observation = {"image": image, "pad_states": pad_states} reward = message['reward'] if not self.agent_initialized: self.agent_initialized = True print("initializing agent......") self.agent.agent_init(use_gpu=args.gpu, pad_states_dim=len(pad_states)) action = self.agent.agent_start(observation) self.set_commands_from_action(action) with open(self.log_file, 'w') as the_file: the_file.write('cycle, episode_reward_sum \n') else: self.cycle_counter += 1 self.reward_sum += reward if end_episode: self.agent.agent_end(reward) with open(self.log_file, 'a') as the_file: the_file.write( str(self.cycle_counter) + ',' + str(self.reward_sum) + '\n') self.reward_sum = 0 else: action, eps, q_now, obs_array = self.agent.agent_step( reward, observation) self.set_commands_from_action(action) self.agent.agent_step_update(reward, action, eps, q_now, obs_array)
agent = CnnDqnAgent() agent_initialized = False cycle_counter = 0 thread_event = threading.Event() log_file = args.log_file reward_sum = 0 depth_image_dim = 32 * 32 depth_image_count = 1 total_episode = 10000 episode_count = 0 while episode_count <= total_episode: if not agent_initialized: agent_initialized = True print("initializing agent...") agent.agent_init(use_gpu=args.gpu, depth_image_dim=depth_image_dim * depth_image_count) env = gym.make('Lis-v2') observation, _, _ = env.step( env.action_space.sample()) #最初は画像がないのでランダムアクションを送る ー,① action = agent.agent_start(observation) #オブザベーションをもらったのでエージェントスタート observation, reward, end_episode = env.step(action) #アクション決定、送信 ⑴、② with open(log_file, 'w') as the_file: the_file.write('cycle, episode_reward_sum \n') else: thread_event.wait() cycle_counter += 1 #報酬計算 reward_sum += reward
def agent_process(gpu_id, log_file, q_from_parent, q_to_parent): # initialization depth_image_dim = 32 * 32 depth_image_count = 1 has_started = False cycle_counter = 0 reward_sum = 0 agent = CnnDqnAgent() print("initializing agent...") agent.agent_init( use_gpu=gpu_id, depth_image_dim=depth_image_dim * depth_image_count, ) with open(log_file, 'w') as the_file: the_file.write('cycle, episode_reward_sum \n') # step byte_data = q_from_parent.get() while not byte_data is None: #try: # data extraction dat = msgpack.unpackb(byte_data) image = [ Image.open(io.BytesIO(bytearray(dat[b'image'][i]))) for i in range(depth_image_count) ] depth = [ np.array( ImageOps.grayscale( Image.open(io.BytesIO(bytearray( dat[b'depth'][i]))))).reshape(depth_image_dim) for i in range(depth_image_count) ] observation = {"image": image, "depth": depth} reward = dat[b'reward'] end_episode = dat[b'endEpisode'] # action-making ret = None if not has_started: has_started = True ret = agent.agent_start(observation) else: cycle_counter += 1 reward_sum += reward if end_episode: agent.agent_end(reward) with open(log_file, 'a') as the_file: the_file.write('%d, %f\n' % (cycle_counter, reward_sum)) reward_sum = 0 ret = agent.agent_start(observation) else: action, eps, q_now, new_feature_vec, deg_interest = agent.agent_step( reward, observation) agent.agent_step_update(reward, action, eps, q_now, new_feature_vec, deg_interest) ret = (action, deg_interest) q_to_parent.put(ret) #except Exception as e: #print(e) #q_to_parent.put(None) #raise e byte_data = q_from_parent.get()
class Agent: agent_initialized = False ga = GeneGenerator() # add Naka agent_id = -1 # add Naka cycle_counter = 0 thread_event = threading.Event() reward_sum = 0 depth_image_dim = 32 * 32 depth_image_count = 1 gene_count = 3 # Number of gene (add Naka) scale_x = 1 scale_y = 1 scale_z = 1 def __init__(self, args): print "start to load cnn model" self.args = args self.cnnDqnAgent = CnnDqnAgent(use_gpu=self.args.gpu, depth_image_dim=self.depth_image_dim * self.depth_image_count, agent_id=self.agent_id) print 'finish loading cnn model' self.cnnDqnAgent.agent_init() print 'finish init cnn dqn agent' def received_message(self, agentServer, dat): image = [] for i in xrange(self.depth_image_count): image.append(Image.open(io.BytesIO(bytearray(dat['image'][i])))) depth = [] for i in xrange(self.depth_image_count): d = (Image.open(io.BytesIO(bytearray(dat['depth'][i])))) depth.append( np.array(ImageOps.grayscale(d)).reshape(self.depth_image_dim)) observation = { "image": image, "depth": depth, "scale": [dat['x_s'], dat['y_s'], dat['z_s']] } self.scale_x = dat['x_s'] self.scale_y = dat['y_s'] self.scale_z = dat['z_s'] # print 'scale' # print observation['scale'] gene = [] # add Naka for i in xrange(len(dat['gene'])): gene.append(dat['gene'][i]) reward = dat['reward'] rewards = dat['rewards'] # add Naka self.agent_id = dat['agent_id'] # add Naka end_episode = dat['endEpisode'] if not self.agent_initialized: print 'connected and agent started..' self.agent_initialized = True action = self.cnnDqnAgent.agent_start(observation) agentServer.send_action(action) if not os.path.exists(self.args.log_file): with open(self.args.log_file, 'w') as the_file: the_file.write('cycle, episode_reward_sum \n') else: self.thread_event.wait() self.cycle_counter += 1 self.reward_sum += reward if end_episode: self.cnnDqnAgent.agent_end(reward, self.agent_id) action = self.cnnDqnAgent.agent_start(observation) # TODO self.gene = self.ga.gene_updater(gene, rewards) # add Naka print self.agent_id, self.gene agentServer.send_actionAndgene( action, self.gene[self.agent_id]) # add Naka with open(self.args.log_file, 'a') as the_file: the_file.write( str(self.cycle_counter) + ',' + str(self.reward_sum) + ',' + str(self.scale_x) + ',' + str(self.scale_y) + ',' + str(self.scale_z) + '\n') self.reward_sum = 0 else: action, eps, obs_array = self.cnnDqnAgent.agent_step( reward, observation) agentServer.send_action(action) self.cnnDqnAgent.agent_step_update(reward, action, eps, obs_array, self.agent_id) self.thread_event.set()