Python DQN_Agent.choose_action Examples

Programming Language: Python

Namespace/Package Name: src.agents

Class/Type: DQN_Agent

Method/Function: choose_action

Examples at hotexamples.com: 3

Python DQN_Agent.choose_action - 3 examples found. These are the top rated real world Python examples of src.agents.DQN_Agent.choose_action extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

choose_action(3)

learn(3)

model(3)

remember(3)

DQN_Agent(2)

curr_exploration_rate(1)

Example #1

Show file

class RunningAgent(object):
    def __init__(self,
                 dqn_path,
                 banchmark_q,
                 cloud_backend,
                 recent_zone=10,
                 explor_rate=0.2,
                 acc_threshold=0.8,
                 reward_threshold=0.45):
        # reward_threshold=0.35):

        feature_extractor = MobileNetV2(include_top=False)
        x = feature_extractor.output
        x = AveragePooling2D(pool_size=(4, 4))(x)
        self.feature_extractor = Model(inputs=feature_extractor.input,
                                       outputs=x)

        self.agent = DQN_Agent(s_dim=1280,
                               a_dim=10,
                               epsilon_decay=0.99,
                               epsilon_min=0.02,
                               gamma=0.95,
                               replay_batchsize=256)

        self.agent.model = load_model(dqn_path)
        self.dqn_path = dqn_path
        self.agent.curr_exploration_rate = 0
        self.STATUS = "INFERENCE"  # INITIAL_TRAIN, INFERENCE, ESTIMATE, RETRAIN
        # self.STATUS = "INITIAL_TRAIN"
        self.STATUS = "RETRAIN"

        self.cloud_backend = cloud_backend
        self.banchmark_q = banchmark_q
        self.explor_rate = explor_rate
        self.recent_zone = recent_zone
        self.reward_threshold = reward_threshold
        self.acc_threshold = acc_threshold

        self.grad_scale_factor = 3

        self.agent_memory = defaultdict(list)
        self.running_log = defaultdict(list)
        self.last_env_step = None
        self.step_count = 0
        self.train_count = 0
        self.load_model = 1
        # self.flag = 'DNIM'
        self.flag = 'FLIR'

    def infer(self, image):
        image_data = preprocess_input(
            np.expand_dims(np.asarray(image.resize((224, 224)),
                                      dtype=np.float32),
                           axis=0))
        features = self.feature_extractor.predict(image_data)[0][0][0]
        state_action, action_id = self.agent.choose_action(features)
        return state_action, features, action_id, int(
            [i for i in np.arange(5, 105, 10)][action_id])

    def remember(self, status, action, accuracy, ref_size, comp_size,
                 upload_size, size_reward, reward, image_path, recent_reward,
                 recent_accuracy):
        self.running_log['status'].append(status)
        self.running_log['action'].append(action)
        self.running_log['accuracy'].append(
            1 if self.STATUS != "INFERENCE" else accuracy)
        self.running_log['agent_accuracy'].append(accuracy)
        self.running_log['ref_size'].append(ref_size)
        self.running_log['comp_size'].append(comp_size)
        self.running_log['upload_size'].append(upload_size)
        self.running_log['size_reward'].append(size_reward)
        self.running_log['reward'].append(reward)
        self.running_log['image_path'].append(image_path)
        self.running_log['step_count'].append(self.step_count)
        self.running_log['recent_reward'].append(recent_reward)
        self.running_log['recent_accuracy'].append(recent_accuracy)
        self.running_log['explor_rate'].append(self.explor_rate)
        self.running_log['agent_epsilon'].append(
            self.agent.curr_exploration_rate)

        if self.STATUS != "INFERENCE":  # in ESTIMATE, RETRAIN and INITIAL_TRAIN status, the agent can log everything
            self.agent_memory['image_path'].append(image_path)
            self.agent_memory['step_count'].append(self.step_count)
            self.agent_memory['accuracy'].append(accuracy)
            self.agent_memory['ref_size'].append(ref_size)
            self.agent_memory['comp_size'].append(comp_size)
            self.agent_memory['size_reward'].append(size_reward)
            self.agent_memory['reward'].append(reward)
            self.agent_memory['action'].append(action)
            self.agent_memory['recent_accuracy'].append(recent_accuracy)

    def agent_upload(self, image_path):
        image = Image.open(image_path).convert("RGB")
        self.step_count += 1
        state_action, features, action_id, action = self.infer(image)

        if ref_cache["%s##%s" % (image_path, action)] == {}:
            error_code, results, size = self.cloud_backend.recognize(
                image, action)
            ref_cache["%s##%s" % (image_path, action)] = {
                "error_code": error_code,
                "results": results,
                "size": size,
                "banchmark_q": action
            }
        else:
            cache = ref_cache["%s##%s" % (image_path, action)]
            error_code = cache['error_code']
            results = cache['results']
            size = cache['size']

        if error_code > 0: return 1, results

        # if status == "ESTIMATE"
        if ref_cache["%s##%s" % (image_path, self.banchmark_q)] == {}:
            error_code, ref_results, ref_size = self.cloud_backend.recognize(
                image, self.banchmark_q)
            ref_cache["%s##%s" % (image_path, self.banchmark_q)] = {
                "error_code": error_code,
                "results": ref_results,
                "size": ref_size,
                "banchmark_q": self.banchmark_q
            }
        else:
            cache = ref_cache["%s##%s" % (image_path, self.banchmark_q)]
            error_code = cache['error_code']
            ref_results = cache['results']
            ref_size = cache['size']

        if error_code > 0: return 2, ref_results

        ref_labels = np.array([
            line['keyword'] for line in ref_results
        ])[np.argsort([line['score'] for line in ref_results])[::-1]][:1]
        accuracy = 1 if len(
            set(ref_labels) & set([line['keyword']
                                   for line in results])) >= 1 else 0

        size_reward = size / ref_size
        reward = accuracy - size_reward

        recent_acc, recent_reward = self.estimate()
        # print("检查acc,reward ",accuracy,reward)
        # print("检查recent_acc,reward ",recent_acc,recent_reward)

        # Remember current behavior
        log_dict = {
            "status": ["INITIAL_TRAIN", "INFERENCE", "ESTIMATE",
                       "RETRAIN"].index(self.STATUS),
            "accuracy":
            accuracy,
            "ref_size":
            ref_size,
            "comp_size":
            size,
            "upload_size":
            ref_size + size if self.STATUS != "INFERENCE" else size,
            "size_reward":
            size_reward,
            "reward":
            reward,
            "image_path":
            image_path,
            "action":
            action,
            "recent_accuracy":
            recent_acc,
            "recent_reward":
            recent_reward
        }
        self.remember(**log_dict)

        # Status drift
        if self.STATUS == "INFERENCE":
            self.explor_rate = np.clip(self.explor_rate, 0.2, 0.95)
            self.STATUS = "ESTIMATE" if np.random.uniform(
                low=0, high=1) < self.explor_rate else "INFERENCE"
        elif self.STATUS == "ESTIMATE":
            if len(self.agent_memory['recent_accuracy']) > self.recent_zone:
                grad = np.gradient(self.agent_memory['recent_accuracy'])[-1]
                self.explor_rate = self.explor_rate if np.isnan(
                    grad) else self.explor_rate - self.grad_scale_factor * grad
                self.explor_rate = np.clip(self.explor_rate, 0.2, 0.95)

            if len(self.agent_memory['recent_accuracy']
                   ) > self.recent_zone and recent_acc < self.acc_threshold:
                print("try retrain ", self.step_count)
                print("try retrain ", self.flag)
                print("\n")
                # if self.step_count > 720 or self.step_count > 2720:
                if (self.flag == 'DNIM' and self.step_count > 720) or (
                        self.flag == 'imagenet' and self.step_count > 2720):
                    print("yes")
                    if (self.load_model == 1 and self.step_count > 2720):
                        print("load model\n")
                        self.STATUS = "INFERENCE"
                        self.explor_rate = 0.2
                        self.agent_memory = defaultdict(list)
                        self.agent.model = load_model(
                            "evaluation_results/agent_DQN_baidu_all_DNIM.h5")
                        self.agent.curr_exploration_rate = 0
                        self.flag = 'DNIM_load'
                        # self.load_model = 1
                    else:
                        print("infenence to retrain\n")
                        self.agent.memory.clear()
                        self.agent.curr_exploration_rate = 1
                        self.STATUS = "RETRAIN"
                else:
                    self.STATUS = "ESTIMATE" if np.random.uniform(
                        low=0, high=1) < self.explor_rate else "INFERENCE"
            else:
                self.STATUS = "ESTIMATE" if np.random.uniform(
                    low=0, high=1) < self.explor_rate else "INFERENCE"
        elif self.STATUS == "RETRAIN":
            if self.last_env_step is not None:
                self.agent.remember(self.last_env_step['features'],
                                    self.last_env_step['action_id'],
                                    self.last_env_step['reward'], features)
            self.last_env_step = {
                "features": features,
                "action_id": action_id,
                "reward": reward
            }
            # if recent_reward > self.reward_threshold and recent_acc > self.acc_threshold and self.agent.curr_exploration_rate < 0.2:
            # if recent_reward > self.reward_threshold and recent_acc > self.acc_threshold and self.agent.curr_exploration_rate < 0.5:
            if self.step_count > 1998:
                self.agent.model.save("evaluation_results/baidu_2k_FLIR.h5")
                self.agent.model = load_model(
                    "evaluation_results/baidu_2k_FLIR.h5")
                return 0, log_dict

            if (recent_reward > self.reward_threshold and recent_acc > self.acc_threshold \
                and self.agent.curr_exploration_rate < 0.2 and self.step_count > 2720) or \
                    (recent_reward > self.reward_threshold and recent_acc > self.acc_threshold \
                     and self.agent.curr_exploration_rate < 0.4 and self.step_count > 2720):
                print("train done ", self.step_count)
                print("\n")
                if self.step_count > 720:
                    # 进入ImageNet的retrain部分 2000张图
                    print("retrain to inference\n")
                    self.STATUS = "INFERENCE"
                    self.explor_rate = 0.2
                    self.agent_memory = defaultdict(list)
                    if self.step_count < 2720:
                        # 初始化训练
                        self.agent.model.save(
                            "evaluation_results/agent_DQN_baidu_FLIR.h5")
                        self.agent.model = load_model(
                            "evaluation_results/agent_DQN_baidu_FLIR.h5")
                        self.flag = 'FLIR'
                        # self.agent.model.save("evaluation_results/agent_DQN_baidu_two_imagenet.h5")
                        # self.agent.model = load_model("evaluation_results/agent_DQN_baidu_two_imagenet.h5")
                        # self.flag = 'imagenet'
                    else:
                        print("when load, no retrain\n")
                        self.agent.model.save(
                            "evaluation_results/agent_DQN_baidu_all_DNIM_retrain.h5"
                        )
                        self.agent.model = load_model(
                            "evaluation_results/agent_DQN_baidu_all_DNIM_retrain.h5"
                        )
                        self.flag = 'DNIM_retrain'
                    # self.agent.model.save(self.dqn_path + ".retrain")
                    # self.agent.model = load_model(self.dqn_path + ".retrain")
                    self.agent.curr_exploration_rate = 0
                else:
                    self.train_count += 1
                    if self.train_count > 128 and self.train_count % 5 == 0:
                        self.agent.learn()
                    if self.train_count <= 128:
                        self.agent.curr_exploration_rate = 1  # exploration at the beginning steps
            else:
                self.train_count += 1
                if self.train_count > 128 and self.train_count % 5 == 0:
                    self.agent.learn()
                if self.train_count <= 128:
                    self.agent.curr_exploration_rate = 1  # exploration at the beginning steps

        return 0, log_dict

    def estimate(self):
        if len(self.agent_memory['reward']) < self.recent_zone:
            recent_reward = np.mean(self.agent_memory['reward'])
            recent_acc = np.mean(self.agent_memory['accuracy'])
        else:
            recent_reward = np.mean(
                self.agent_memory['reward'][-self.recent_zone:])
            recent_acc = np.mean(
                self.agent_memory['accuracy'][-self.recent_zone:])
        return recent_acc, recent_reward

Example #2

Show file

File: testset_eval.py Project: ZhaoliangHe/AdaCompress-Code

class RunningAgent(object):
    def __init__(self,
                 dqn_path,
                 banchmark_q,
                 cloud_backend,
                 recent_zone=40,
                 explor_rate=0.5,
                 acc_threshold=0.85,
                 reward_threshold=0.45):

        feature_extractor = MobileNetV2(include_top=False)
        x = feature_extractor.output
        x = AveragePooling2D(pool_size=(4, 4))(x)
        self.feature_extractor = Model(inputs=feature_extractor.input,
                                       outputs=x)

        self.agent = DQN_Agent(s_dim=1280,
                               a_dim=10,
                               epsilon_decay=0.99,
                               epsilon_min=0.02,
                               gamma=0.95,
                               replay_batchsize=256)

        self.agent.model = load_model(dqn_path)
        self.dqn_path = dqn_path
        self.agent.curr_exploration_rate = 0
        self.STATUS = "INFERENCE"  # INITIAL_TRAIN, INFERENCE, ESTIMATE, RETRAIN

        self.cloud_backend = cloud_backend
        self.banchmark_q = banchmark_q
        self.explor_rate = explor_rate
        self.recent_zone = recent_zone
        self.reward_threshold = reward_threshold
        self.acc_threshold = acc_threshold

        self.agent_memory = defaultdict(list)
        self.running_log = defaultdict(list)
        self.Kfilter = KalmanFilter()
        self.last_env_step = None
        self.step_count = 0
        self.train_count = 0

    def infer(self, image):
        image_data = preprocess_input(
            np.expand_dims(np.asarray(image.resize((224, 224)),
                                      dtype=np.float32),
                           axis=0))
        features = self.feature_extractor.predict(image_data)[0][0][0]
        state_action, action_id = self.agent.choose_action(features)
        return state_action, features, action_id, int(
            [i for i in np.arange(5, 105, 10)][action_id])

    def remember(self, status, action, accuracy, ref_size, comp_size,
                 upload_size, size_reward, reward, image_path, recent_reward,
                 recent_accuracy):
        self.running_log['status'].append(status)
        self.running_log['action'].append(action)
        self.running_log['accuracy'].append(
            1 if self.STATUS != "INFERENCE" else accuracy)
        self.running_log['agent_accuracy'].append(accuracy)
        self.running_log['ref_size'].append(ref_size)
        self.running_log['comp_size'].append(comp_size)
        self.running_log['upload_size'].append(upload_size)
        self.running_log['size_reward'].append(size_reward)
        self.running_log['reward'].append(reward)
        self.running_log['image_path'].append(image_path)
        self.running_log['step_count'].append(self.step_count)
        self.running_log['recent_reward'].append(recent_reward)
        self.running_log['recent_accuracy'].append(recent_accuracy)
        self.running_log['agent_epsilon'].append(
            self.agent.curr_exploration_rate)

        if self.STATUS != "INFERENCE":  # in ESTIMATE, RETRAIN and INITIAL_TRAIN status, the agent can log everything
            self.agent_memory['image_path'].append(image_path)
            self.agent_memory['step_count'].append(self.step_count)
            self.agent_memory['accuracy'].append(accuracy)
            self.agent_memory['ref_size'].append(ref_size)
            self.agent_memory['comp_size'].append(comp_size)
            self.agent_memory['size_reward'].append(size_reward)
            self.agent_memory['reward'].append(reward)
            self.agent_memory['action'].append(action)
            self.agent_memory['recent_accuracy'].append(recent_accuracy)

    def agent_upload(self, image_path):
        image = Image.open(image_path).convert("RGB")
        self.step_count += 1
        state_action, features, action_id, action = self.infer(image)
        # action = 25

        if ref_cache["%s##%s" % (image_path, action)] == {}:
            error_code, results, size = self.cloud_backend.recognize(
                image, action)
            ref_cache["%s##%s" % (image_path, action)] = {
                "error_code": error_code,
                "results": results,
                "size": size,
                "banchmark_q": action
            }
        else:
            cache = ref_cache["%s##%s" % (image_path, action)]
            error_code = cache['error_code']
            results = cache['results']
            size = cache['size']

        if error_code > 0: return 1, results

        # if status == "ESTIMATE"
        if ref_cache["%s##%s" % (image_path, self.banchmark_q)] == {}:
            error_code, ref_results, ref_size = self.cloud_backend.recognize(
                image, self.banchmark_q)
            ref_cache["%s##%s" % (image_path, self.banchmark_q)] = {
                "error_code": error_code,
                "results": ref_results,
                "size": ref_size,
                "banchmark_q": self.banchmark_q
            }
        else:
            cache = ref_cache["%s##%s" % (image_path, self.banchmark_q)]
            error_code = cache['error_code']
            ref_results = cache['results']
            ref_size = cache['size']

        if error_code > 0: return 2, ref_results

        ref_labels = np.array([
            line['keyword'] for line in ref_results
        ])[np.argsort([line['score'] for line in ref_results])[::-1]][:1]
        accuracy = 1 if len(
            set(ref_labels) & set([line['keyword']
                                   for line in results])) >= 1 else 0

        size_reward = size / ref_size
        reward = accuracy - size_reward

        recent_acc, recent_reward = self.estimate()

        # Status drift
        if self.STATUS == "INFERENCE":
            self.STATUS = "ESTIMATE" if np.random.uniform(
                low=0, high=1) < self.explor_rate else "INFERENCE"
        elif self.STATUS == "ESTIMATE":
            if len(self.agent_memory['accuracy']
                   ) > self.recent_zone and recent_acc < self.acc_threshold:
                # self.STATUS = "RETRAIN"
                pass
            else:
                self.STATUS = "ESTIMATE" if np.random.uniform(
                    low=0, high=1) < self.explor_rate else "INFERENCE"
        elif self.STATUS == "RETRAIN":
            if recent_reward > self.reward_threshold and recent_acc > self.acc_threshold and self.agent.curr_exploration_rate < 0.2:
                self.STATUS = "INFERENCE"
                self.agent.model.save(self.dqn_path + ".retrain")
                self.agent.model = load_model(self.dqn_path + ".retrain")
                self.agent.curr_exploration_rate = 0
            else:
                self.train_count += 1
                if self.train_count > 128 and self.train_count % 5 == 0:
                    self.agent.learn()
                if self.train_count <= 128:
                    self.agent.curr_exploration_rate = 1  # exploration at the beginning steps

        if self.STATUS != "INFERENCE":  # remember transitions
            if self.last_env_step is not None:
                self.agent.remember(self.last_env_step['features'],
                                    self.last_env_step['action_id'],
                                    self.last_env_step['reward'], features)
            self.last_env_step = {
                "features": features,
                "action_id": action_id,
                "reward": reward
            }

        # Remember current behavior
        log_dict = {
            "status": ["INITIAL_TRAIN", "INFERENCE", "ESTIMATE",
                       "RETRAIN"].index(self.STATUS),
            "accuracy":
            accuracy,
            "ref_size":
            ref_size,
            "comp_size":
            size,
            "upload_size":
            ref_size + size if self.STATUS != "INFERENCE" else size,
            "size_reward":
            size_reward,
            "reward":
            reward,
            "image_path":
            image_path,
            "action":
            action,
            "recent_accuracy":
            recent_acc,
            "recent_reward":
            recent_reward
        }
        self.remember(**log_dict)

        return 0, log_dict

    def estimate(self):
        if len(self.agent_memory['reward']) < self.recent_zone:
            recent_reward = np.mean(self.agent_memory['reward'])
            recent_acc = np.mean(self.agent_memory['accuracy'])
        else:
            recent_reward = np.mean(
                self.agent_memory['reward'][-self.recent_zone:])
            recent_acc = np.mean(
                self.agent_memory['accuracy'][-self.recent_zone:])
        return recent_acc, recent_reward

Example #3

Show file

File: initial_train.py Project: ZhaoliangHe/AdaCompress-Code

    plot_y = []
    plot_part = deque(maxlen=10)

    for i_episode in range(1):
        print("\n\nepisode %s:" % i_episode)
        image = env.reset()

        image_data = preprocess_input(
            np.expand_dims(np.asarray(image.resize((224, 224)),
                                      dtype=np.float32),
                           axis=0))
        features = feature_extractor.predict(image_data)[0][0][0]

        while True:
            step_count += 1
            state_actions, action_id = agent.choose_action(features)
            action = [i for i in np.arange(5, 105, 10)][action_id]
            error_code, new_image, reward, done_flag, info = env.step(action)
            # print("step image_id", env.curr_image_id)
            if error_code > 0:
                step_count -= 1
                print(error_code)
                continue
            # print("path_num ",len(env.image_paths),"image_id ",env.curr_image_id)

            # if len(env.image_paths) == env.curr_image_id:
            #    break

            train_log['image_path'].append(env.image_paths[env.curr_image_id])
            train_log['acc_r'].append(info['acc_r'])
            train_log['size_r'].append(info['size_r'])