def __init__(self):
     self.agent = CnnDqnAgent()
     self.agent_initialized = False
     self.cycle_counter = 0
     self.log_file = 'reward.log'
     self.reward_sum = 0
     # press, up, down, left, right, none
     self.commands = ["none", "none", "none", "none", "none"]
Exemple #2
0
 def __init__(self, args):
     print "start to load cnn model"
     self.args = args
     self.cnnDqnAgent = CnnDqnAgent(use_gpu=self.args.gpu,
                                    depth_image_dim=self.depth_image_dim *
                                    self.depth_image_count,
                                    agent_id=self.agent_id)
     print 'finish loading cnn model'
     self.cnnDqnAgent.agent_init()
     print 'finish init cnn dqn agent'
Exemple #3
0
class AgentServer(WebSocket):
    agent = CnnDqnAgent()
    agent_initialized = False
    cycle_counter = 0
    thread_event = threading.Event()
    log_file = args.log_file
    reward_sum = 0
    depth_image_dim = 32 * 32
    depth_image_count = 1

    def send_action(self, action):
        dat = msgpack.packb({"command": str(action)})
        self.send(dat, binary=True)

    def received_message(self, m):
        payload = m.data
        dat = msgpack.unpackb(payload)

        image = []
        for i in xrange(self.depth_image_count):
            image.append(Image.open(io.BytesIO(bytearray(dat['image'][i]))))
        depth = []
        for i in xrange(self.depth_image_count):
            d = (Image.open(io.BytesIO(bytearray(dat['depth'][i]))))
            depth.append(np.array(ImageOps.grayscale(d)).reshape(self.depth_image_dim))

        observation = {"image": image, "depth": depth}
        reward = dat['reward']
        end_episode = dat['endEpisode']

        if not self.agent_initialized:
            self.agent_initialized = True
            print ("initializing agent...")
            self.agent.agent_init(
                use_gpu=args.gpu,
                depth_image_dim=self.depth_image_dim * self.depth_image_count)

            action = self.agent.agent_start(observation)
            self.send_action(action)
            with open(self.log_file, 'w') as the_file:
                the_file.write('cycle, episode_reward_sum \n')
        else:
            self.thread_event.wait()
            self.cycle_counter += 1
            self.reward_sum += reward

            if end_episode:
                self.agent.agent_end(reward)
                action = self.agent.agent_start(observation)  # TODO
                self.send_action(action)
                with open(self.log_file, 'a') as the_file:
                    the_file.write(str(self.cycle_counter) +
                                   ',' + str(self.reward_sum) + '\n')
                self.reward_sum = 0
            else:
                action, eps, q_now, obs_array = self.agent.agent_step(reward, observation)
                self.send_action(action)
                self.agent.agent_step_update(reward, action, eps, q_now, obs_array)

        self.thread_event.set()
class AgentController:
    def __init__(self):
        self.agent = CnnDqnAgent()
        self.agent_initialized = False
        self.cycle_counter = 0
        self.log_file = 'reward.log'
        self.reward_sum = 0
        # press, up, down, left, right, none
        self.commands = ["none", "none", "none", "none", "none"]

    def get_commands(self):
        return self.commands

    def set_commands_from_action(self, action):
        command_candidate = ["press", "up", "down", "right", "left", "none"]
        self.commands = [command_candidate[a] for a in action]

    def update(self, message):
        image = message["image"]
        pad_states = message["pad_states"]
        end_episode = message['end_episode']
        observation = {"image": image, "pad_states": pad_states}
        reward = message['reward']

        if not self.agent_initialized:
            self.agent_initialized = True
            print("initializing agent......")
            self.agent.agent_init(use_gpu=args.gpu,
                                  pad_states_dim=len(pad_states))

            action = self.agent.agent_start(observation)
            self.set_commands_from_action(action)
            with open(self.log_file, 'w') as the_file:
                the_file.write('cycle, episode_reward_sum \n')
        else:
            self.cycle_counter += 1
            self.reward_sum += reward

            if end_episode:
                self.agent.agent_end(reward)
                with open(self.log_file, 'a') as the_file:
                    the_file.write(
                        str(self.cycle_counter) + ',' + str(self.reward_sum) +
                        '\n')
                self.reward_sum = 0
            else:
                action, eps, q_now, obs_array = self.agent.agent_step(
                    reward, observation)
                self.set_commands_from_action(action)
                self.agent.agent_step_update(reward, action, eps, q_now,
                                             obs_array)
Exemple #5
0
import numpy as np

parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--gpu',
                    '-g',
                    default=-1,
                    type=int,
                    help='GPU ID (negative value indicates CPU)')
parser.add_argument('--log-file',
                    '-l',
                    default='reward.log',
                    type=str,
                    help='reward log file name')
args = parser.parse_args()

agent = CnnDqnAgent()
agent_initialized = False
cycle_counter = 0
thread_event = threading.Event()
log_file = args.log_file
reward_sum = 0
depth_image_dim = 32 * 32
depth_image_count = 1
total_episode = 10000
episode_count = 0

while episode_count <= total_episode:
    if not agent_initialized:
        agent_initialized = True
        print("initializing agent...")
        agent.agent_init(use_gpu=args.gpu,
Exemple #6
0
def agent_process(gpu_id, log_file, q_from_parent, q_to_parent):
    # initialization
    depth_image_dim = 32 * 32
    depth_image_count = 1

    has_started = False
    cycle_counter = 0
    reward_sum = 0
    agent = CnnDqnAgent()

    print("initializing agent...")
    agent.agent_init(
        use_gpu=gpu_id,
        depth_image_dim=depth_image_dim * depth_image_count,
    )

    with open(log_file, 'w') as the_file:
        the_file.write('cycle, episode_reward_sum \n')

    # step
    byte_data = q_from_parent.get()
    while not byte_data is None:
        #try:
        # data extraction
        dat = msgpack.unpackb(byte_data)
        image = [
            Image.open(io.BytesIO(bytearray(dat[b'image'][i])))
            for i in range(depth_image_count)
        ]
        depth = [
            np.array(
                ImageOps.grayscale(
                    Image.open(io.BytesIO(bytearray(
                        dat[b'depth'][i]))))).reshape(depth_image_dim)
            for i in range(depth_image_count)
        ]
        observation = {"image": image, "depth": depth}
        reward = dat[b'reward']
        end_episode = dat[b'endEpisode']

        # action-making
        ret = None
        if not has_started:
            has_started = True
            ret = agent.agent_start(observation)
        else:
            cycle_counter += 1
            reward_sum += reward

            if end_episode:
                agent.agent_end(reward)
                with open(log_file, 'a') as the_file:
                    the_file.write('%d, %f\n' % (cycle_counter, reward_sum))
                reward_sum = 0

                ret = agent.agent_start(observation)
            else:
                action, eps, q_now, new_feature_vec, deg_interest = agent.agent_step(
                    reward, observation)
                agent.agent_step_update(reward, action, eps, q_now,
                                        new_feature_vec, deg_interest)
                ret = (action, deg_interest)

        q_to_parent.put(ret)
        #except Exception as e:
        #print(e)
        #q_to_parent.put(None)
        #raise e
        byte_data = q_from_parent.get()
class AgentServer(WebSocket):
    test_num = 5000  # 1つのモデルをためすテストの回数

    agent = CnnDqnAgent()  #cnn_dqn_agent.pyの中のCnnDqnAgentクラスのインスタンス
    agent_initialized = False

    thread_event = threading.Event()  #threading -> Eventの中にWait,Setがある
    reward_sum = 0
    depth_image_dim = 32 * 32  #receive後のreshapeに使うのでここで定義
    image_count = 1  # 1cycleでUnityのAgentから送られてくる画像の枚数

    log_file = args.log_file
    gpu = args.gpu
    draw = args.draw
    test = args.test
    episode_num = args.episode  #行ったエピソードの数
    folder = args.folder
    model_num = args.model_num

    cycle_counter = 0

    print u"------------------------------------------------"
    print u"./%sディレクトリが存在するか確認" % (folder)
    print u"logファイルがあってるか確認"
    print u"------------------------------------------------"

    def send_action(self, action):
        dat = msgpack.packb({"command": str(action)})
        self.send(dat, binary=True)

    def received_message(self, m):
        try:
            payload = m.data
            dat = msgpack.unpackb(payload)

            image = []
            for i in xrange(self.image_count):
                image.append(Image.open(io.BytesIO(bytearray(
                    dat['image'][i]))))
            depth = []
            for i in xrange(self.image_count):
                d = (Image.open(io.BytesIO(bytearray(dat['depth'][i]))))
                #depth画像は一次元ベクトルにreshape
                depth.append(
                    np.array(ImageOps.grayscale(d)).reshape(
                        self.depth_image_dim))

            observation = {"image": image, "depth": depth}

            reward = dat['reward']
            end_episode = dat['endEpisode']
            lastZ = dat['score']

            if not self.agent_initialized:
                self.agent_initialized = True
                print("initializing agent...")
                #depth_image_dimが引数で使われるのはここだけ
                self.agent.agent_init(depth_image_dim=self.depth_image_dim,
                                      image_count=self.image_count,
                                      use_gpu=self.gpu,
                                      test=self.test,
                                      folder=self.folder,
                                      model_num=self.model_num)

                action = self.agent.agent_start(observation)
                self.send_action(action)
                print "send"

                #logファイルへの書き込み
                if not self.succeed:
                    with open(self.log_file, 'w') as the_file:
                        the_file.write('Cycle,Score,Episode \n')

                if (args.draw):
                    self.fig, self.ax1 = plt.subplots(1, 1)

            else:
                self.thread_event.wait()
                self.cycle_counter += 1
                self.reward_sum += reward

                if end_episode:
                    self.agent.agent_end(reward)
                    #logファイルへの書き込み
                    with open(self.log_file, 'a') as the_file:
                        the_file.write(
                            str(self.cycle_counter) + ',' + str(lastZ) + ',' +
                            str(self.episode_num) + '\n')
                    print "Last Player's Z is %d" % (lastZ)
                    self.reward_sum = 0

                    if (args.test and self.episode_num % self.test_num == 0):
                        self.model_num += 10000
                        self.agent.q_net.load_model(self.model_num,
                                                    self.velocity)

                    self.episode_num += 1

                    print "----------------------------------"
                    print "Episode %d Start" % (self.episode_num)
                    print "----------------------------------"

                    action = self.agent.agent_start(observation)  # TODO
                    self.send_action(action)

                else:
                    action, eps, q_now, obs_array = self.agent.agent_step(
                        observation)
                    self.send_action(action)
                    self.agent.agent_step_update(reward, action, eps, q_now)

                    if args.draw:
                        pause_Q_plot(q_now.ravel())

            self.thread_event.set()
        except:
            import traceback
            import sys
            traceback.print_exc()
            sys.exit()

    def pause_Image_plot(self, img):
        plt.cla()
        plt.imshow(img)
        plt.pause(1.0 / 10**10)  #引数はsleep時間

    #Q関数のplot
    def pause_Q_plot(self, q):
        self.ax1.cla()
        actions = range(3)
        q = q[:3]
        max_q_abs = max(abs(q))
        if max_q_abs != 0:
            q = q / float(max_q_abs)

        self.ax1.set_xticks(actions)
        self.ax1.set_xticklabels(['Left', 'Forward', 'Right'],
                                 rotation=0,
                                 fontsize='small')
        self.ax1.set_xlabel("Action")  # x軸のラベル
        self.ax1.set_ylabel("Q_Value")  # y軸のラベル
        self.ax1.set_ylim(-1.1, 1.1)  # yを-1.1-1.1の範囲に限定
        self.ax1.set_xlim(-1, 4)
        self.ax1.hlines(y=0, xmin=-1, xmax=4, colors='r',
                        linewidths=2)  #y=0の直線

        self.ax1.bar(actions, q, align="center")
        plt.pause(1.0 / 10**10)  #引数はsleep時間
Exemple #8
0
class AgentServer(WebSocket):
    agent = CnnDqnAgent()
    agent_initialized = False
    cycle_counter = 1
    rgb_image_count = 1
    depth_image_count = 0
    depth_image_dim = 0
    ir_count = 1
    ground_count = 0
    compass_count = 1
    target_count = 1

    if args.mode_distribute:
        thread_event = threading.Event()

    def send_action(self, action):
        dat = msgpack.packb({"command": "".join(map(str, action))})
        self.send(dat, binary=True)

    def received_message(self, m):
        payload = m.data
        dat = msgpack.unpackb(payload, encoding='utf-8')

        image = []
        depth = []
        agent_count = len(dat['image'])

        for i in range(agent_count):
            image.append(Image.open(io.BytesIO(bytearray(dat['image'][i]))))
            if (self.depth_image_count == 1):
                depth_dim = len(dat['depth'][0])
                temp = (Image.open(io.BytesIO(bytearray(dat['depth'][i]))))
                depth.append(
                    np.array(ImageOps.grayscale(temp)).reshape(
                        self.depth_image_dim))

        if (self.ir_count == 1):
            ir = dat['ir']
            ir_dim = len(ir[0])
        else:
            ir = []
            ir_dim = 0

        if (self.ground_count == 1):
            ground = dat['ground']
            ground_dim = len(ground[0])
        else:
            ground = []
            ground_dim = 0

        if (self.compass_count == 1):
            compass = dat['compass']
            compass_dim = len(compass[0])
        else:
            compass = []
            compass_dim = 0

        if (self.target_count == 1):
            target = dat['target']
            target_dim = len(target[0])
        else:
            target = []
            target_dim = 0

        observation = {
            "image": image,
            "depth": depth,
            "ir": ir,
            "ground": ground,
            "compass": compass,
            "target": target
        }
        reward = np.array(dat['reward'], dtype=np.float32)
        end_episode = np.array(dat['endEpisode'], dtype=np.bool)

        if not self.agent_initialized:
            self.agent_initialized = True
            print("initializing agent...")
            self.agent.agent_init(use_gpu=args.gpu,
                                  agent_count=agent_count,
                                  rgb_image_count=self.rgb_image_count,
                                  depth_image_dim=self.depth_image_count *
                                  self.depth_image_dim,
                                  ir_dim=self.ir_count * ir_dim,
                                  ground_dim=self.ground_count * ground_dim,
                                  compass_dim=self.compass_count * compass_dim,
                                  target_dim=self.target_count * target_dim,
                                  model=args.model)
            self.reward_sum = np.zeros((agent_count), dtype=np.float32)
            dateinfo = datetime.datetime.now()
            self.logDirPath = args.log_file + dateinfo.strftime(
                "%Y%m%d%H%M%S") + "/"
            os.makedirs(self.logDirPath)
            self.log_file = self.logDirPath + "reward.log"

            with open(self.log_file, 'w') as the_file:
                the_file.write('cycle, episode_reward_sum \n')

            self.agent.q_net.model.to_cpu()
            self.model_log = self.logDirPath + "model_" + str(self.agent.time -
                                                              1) + ".pkl"
            pickle.dump(self.agent.q_net.model, open(self.model_log, "wb"), -1)
            self.agent.q_net.model.to_gpu()
            self.agent.q_net.optimizer.setup(self.agent.q_net.model)

            action, q_now = self.agent.agent_start(observation, reward)
            self.send_action(action)

            self.q_log = self.logDirPath + "q.pkl"
            pickle.dump(q_now, open(self.q_log, "wb"), -1)

        else:
            if args.mode_distribute:
                self.thread_event.wait()

            self.cycle_counter += 1
            self.reward_sum += reward

            if end_episode:
                self.agent.agent_end(reward)
                with open(self.log_file, 'a') as the_file:
                    the_file.write(
                        str(self.agent.time - 1) + ',' + str(self.reward_sum) +
                        '\n')

                self.agent.q_net.model.to_cpu()
                self.model_log = self.logDirPath + "model_" + str(
                    self.agent.time - 1) + ".pkl"
                pickle.dump(self.agent.q_net.model, open(self.model_log, "wb"),
                            -1)
                self.agent.q_net.model.to_gpu()
                self.agent.q_net.optimizer.setup(self.agent.q_net.model)

                self.reward_sum = np.zeros((agent_count), dtype=np.float32)
                action = self.agent.agent_start(observation, reward)  # TODO
                self.send_action(action)
            else:
                action, eps, q_now = self.agent.agent_step(reward, observation)
                self.send_action(action)
                self.agent.agent_step_update(reward, action, eps)

                pickle.dump(q_now, open(self.q_log, "ab"), -1)

        if args.mode_distribute:
            self.thread_event.set()
Exemple #9
0
class AgentServer(WebSocket):
    agent = CnnDqnAgent()
    agent_initialized = False
    cycle_counter = 0
    thread_event = threading.Event()
    log_file = args.log_file
    reward_sum = 0
    depth_image_dim = 32 * 32
    depth_image_count = 1
    now = datetime.datetime.now()
    otherStyleTime = now.strftime("%Y-%m-%d %H_%M_%S")

    cur_dir = 'C:\Users\hosilab\Desktop\ls\ls\python-agent'
    folder_name = 'RGB'
    directory = cur_dir + '\\' + folder_name
    if os.path.isdir(cur_dir) and os.path.exists(directory) is False:
        os.makedirs(directory)
        #os.mkdir(os.path.join(cur_dir, folder_name))
        #folder_name = folder_name

        #+ otherStyleTime[-1:-9:-1][::-1]
    def send_action(self, action):
        dat = msgpack.packb({"command": str(action)})
        self.send(dat, binary=True)

    def received_message(self, m):
        payload = m.data
        dat = msgpack.unpackb(payload)

        image = []
        for i in xrange(self.depth_image_count):
            image_ = Image.open(io.BytesIO(bytearray(dat['image'][i])))
            #image_.save("./RGB/" + "img_" + str(self.cycle_counter) + ".png")
            image.append(image_)
        #depth = []
        # for i in xrange(self.depth_image_count):
        #     d = (Image.open(io.BytesIO(bytearray(dat['depth'][i]))))
        #     depth.append(np.array(ImageOps.grayscale(d)).reshape(self.depth_image_dim))

        observation = {"image": image}  #, "depth": depth}
        #print observation["image"][0], "observation" #observation["image"]=
        # [<PIL.PngImagePlugin.PngImageFile image mode=RGB size=227x227 at 0x7F60F28>]
        #[<PIL.PngImagePlugin.PngImageFile image mode=RGB size=227x227 at 0x7E61FD0>]
        #reward = dat['reward']#np.array(dat['reward'], dtype=np.float32)#dat['reward']#<type 'float'>#
        reward = np.asanyarray(
            dat['reward'],
            dtype=np.float32)  #-----------------------------------
        #print(reward.ndim, 'Reward!!length')#error#------------------------------------------------
        end_episode = dat['endEpisode']

        if not self.agent_initialized:
            self.agent_initialized = True
            print("initializing agent...")
            self.agent.agent_init(use_gpu=args.gpu)  #,
            #depth_image_dim=self.depth_image_dim * self.depth_image_count)

            action = self.agent.agent_start(observation)  #开始的第一个动作是白送的
            # # if action != self.agent.actions.index(2):
            #      reward = -0.3
            #  else:
            #      reward = 0.1
            #  #print reward, "!!!!!!!!!!!!!"
            self.send_action(action)
            with open(self.log_file, 'w') as the_file:
                the_file.write('cycle, episode_reward_sum \n')
        else:  #如果agent启动观测环境和动作给了,大如果
            self.thread_event.wait()
            self.cycle_counter += 1
            self.reward_sum += reward  #此处reward要改#---------------------------

            if end_episode:  #如果在大如果下 结束这一回合,通过开始的state_获得动作和q值,然后下个状态就是开始的状态
                self.agent.agent_end(reward)
                action = self.agent.agent_start(
                    observation)  # TODO# return return_action
                # #action, q_now = self.q_net.e_greedy(state_, self.epsilon)-75
                # if action != self.agent.actions.index(2):
                #     reward -= 0.3
                self.send_action(action)
                with open(self.log_file, 'a') as the_file:
                    the_file.write(
                        str(self.cycle_counter) + ',' + str(self.reward_sum) +
                        '\n')
                self.reward_sum = 0
            else:  #如果在大如果下没有结束这个回合
                action, eps, q_now, obs_array = self.agent.agent_step(
                    reward, observation)
                if action != self.agent.actions.index(2) and reward != 1.:
                    reward -= 0.1  #先执行这个
                self.send_action(action)
                self.agent.agent_step_update(reward, action, eps, q_now,
                                             obs_array)
                #通过cnn——agentupdata self.q_net.stock_experience和 self.q_net.experience_replay
        self.thread_event.set()
Exemple #10
0
class Agent:
    agent_initialized = False
    ga = GeneGenerator()  # add Naka
    agent_id = -1  # add Naka
    cycle_counter = 0
    thread_event = threading.Event()
    reward_sum = 0
    depth_image_dim = 32 * 32
    depth_image_count = 1
    gene_count = 3  # Number of gene (add Naka)
    scale_x = 1
    scale_y = 1
    scale_z = 1

    def __init__(self, args):
        print "start to load cnn model"
        self.args = args
        self.cnnDqnAgent = CnnDqnAgent(use_gpu=self.args.gpu,
                                       depth_image_dim=self.depth_image_dim *
                                       self.depth_image_count,
                                       agent_id=self.agent_id)
        print 'finish loading cnn model'
        self.cnnDqnAgent.agent_init()
        print 'finish init cnn dqn agent'

    def received_message(self, agentServer, dat):
        image = []
        for i in xrange(self.depth_image_count):
            image.append(Image.open(io.BytesIO(bytearray(dat['image'][i]))))
        depth = []
        for i in xrange(self.depth_image_count):
            d = (Image.open(io.BytesIO(bytearray(dat['depth'][i]))))
            depth.append(
                np.array(ImageOps.grayscale(d)).reshape(self.depth_image_dim))

        observation = {
            "image": image,
            "depth": depth,
            "scale": [dat['x_s'], dat['y_s'], dat['z_s']]
        }

        self.scale_x = dat['x_s']
        self.scale_y = dat['y_s']
        self.scale_z = dat['z_s']

        # print 'scale'
        # print observation['scale']

        gene = []  # add Naka
        for i in xrange(len(dat['gene'])):
            gene.append(dat['gene'][i])
        reward = dat['reward']
        rewards = dat['rewards']  # add Naka
        self.agent_id = dat['agent_id']  # add Naka
        end_episode = dat['endEpisode']

        if not self.agent_initialized:
            print 'connected and agent started..'
            self.agent_initialized = True
            action = self.cnnDqnAgent.agent_start(observation)
            agentServer.send_action(action)
            if not os.path.exists(self.args.log_file):
                with open(self.args.log_file, 'w') as the_file:
                    the_file.write('cycle, episode_reward_sum \n')
        else:
            self.thread_event.wait()
            self.cycle_counter += 1
            self.reward_sum += reward

            if end_episode:
                self.cnnDqnAgent.agent_end(reward, self.agent_id)
                action = self.cnnDqnAgent.agent_start(observation)  # TODO
                self.gene = self.ga.gene_updater(gene, rewards)  # add Naka
                print self.agent_id, self.gene
                agentServer.send_actionAndgene(
                    action, self.gene[self.agent_id])  # add Naka
                with open(self.args.log_file, 'a') as the_file:
                    the_file.write(
                        str(self.cycle_counter) + ',' + str(self.reward_sum) +
                        ',' + str(self.scale_x) + ',' + str(self.scale_y) +
                        ',' + str(self.scale_z) + '\n')
                self.reward_sum = 0
            else:
                action, eps, obs_array = self.cnnDqnAgent.agent_step(
                    reward, observation)
                agentServer.send_action(action)
                self.cnnDqnAgent.agent_step_update(reward, action, eps,
                                                   obs_array, self.agent_id)

        self.thread_event.set()