class RunningAgent(object): def __init__(self, dqn_path, banchmark_q, cloud_backend, recent_zone=10, explor_rate=0.2, acc_threshold=0.8, reward_threshold=0.45): # reward_threshold=0.35): feature_extractor = MobileNetV2(include_top=False) x = feature_extractor.output x = AveragePooling2D(pool_size=(4, 4))(x) self.feature_extractor = Model(inputs=feature_extractor.input, outputs=x) self.agent = DQN_Agent(s_dim=1280, a_dim=10, epsilon_decay=0.99, epsilon_min=0.02, gamma=0.95, replay_batchsize=256) self.agent.model = load_model(dqn_path) self.dqn_path = dqn_path self.agent.curr_exploration_rate = 0 self.STATUS = "INFERENCE" # INITIAL_TRAIN, INFERENCE, ESTIMATE, RETRAIN # self.STATUS = "INITIAL_TRAIN" self.STATUS = "RETRAIN" self.cloud_backend = cloud_backend self.banchmark_q = banchmark_q self.explor_rate = explor_rate self.recent_zone = recent_zone self.reward_threshold = reward_threshold self.acc_threshold = acc_threshold self.grad_scale_factor = 3 self.agent_memory = defaultdict(list) self.running_log = defaultdict(list) self.last_env_step = None self.step_count = 0 self.train_count = 0 self.load_model = 1 # self.flag = 'DNIM' self.flag = 'FLIR' def infer(self, image): image_data = preprocess_input( np.expand_dims(np.asarray(image.resize((224, 224)), dtype=np.float32), axis=0)) features = self.feature_extractor.predict(image_data)[0][0][0] state_action, action_id = self.agent.choose_action(features) return state_action, features, action_id, int( [i for i in np.arange(5, 105, 10)][action_id]) def remember(self, status, action, accuracy, ref_size, comp_size, upload_size, size_reward, reward, image_path, recent_reward, recent_accuracy): self.running_log['status'].append(status) self.running_log['action'].append(action) self.running_log['accuracy'].append( 1 if self.STATUS != "INFERENCE" else accuracy) self.running_log['agent_accuracy'].append(accuracy) self.running_log['ref_size'].append(ref_size) self.running_log['comp_size'].append(comp_size) self.running_log['upload_size'].append(upload_size) self.running_log['size_reward'].append(size_reward) self.running_log['reward'].append(reward) self.running_log['image_path'].append(image_path) self.running_log['step_count'].append(self.step_count) self.running_log['recent_reward'].append(recent_reward) self.running_log['recent_accuracy'].append(recent_accuracy) self.running_log['explor_rate'].append(self.explor_rate) self.running_log['agent_epsilon'].append( self.agent.curr_exploration_rate) if self.STATUS != "INFERENCE": # in ESTIMATE, RETRAIN and INITIAL_TRAIN status, the agent can log everything self.agent_memory['image_path'].append(image_path) self.agent_memory['step_count'].append(self.step_count) self.agent_memory['accuracy'].append(accuracy) self.agent_memory['ref_size'].append(ref_size) self.agent_memory['comp_size'].append(comp_size) self.agent_memory['size_reward'].append(size_reward) self.agent_memory['reward'].append(reward) self.agent_memory['action'].append(action) self.agent_memory['recent_accuracy'].append(recent_accuracy) def agent_upload(self, image_path): image = Image.open(image_path).convert("RGB") self.step_count += 1 state_action, features, action_id, action = self.infer(image) if ref_cache["%s##%s" % (image_path, action)] == {}: error_code, results, size = self.cloud_backend.recognize( image, action) ref_cache["%s##%s" % (image_path, action)] = { "error_code": error_code, "results": results, "size": size, "banchmark_q": action } else: cache = ref_cache["%s##%s" % (image_path, action)] error_code = cache['error_code'] results = cache['results'] size = cache['size'] if error_code > 0: return 1, results # if status == "ESTIMATE" if ref_cache["%s##%s" % (image_path, self.banchmark_q)] == {}: error_code, ref_results, ref_size = self.cloud_backend.recognize( image, self.banchmark_q) ref_cache["%s##%s" % (image_path, self.banchmark_q)] = { "error_code": error_code, "results": ref_results, "size": ref_size, "banchmark_q": self.banchmark_q } else: cache = ref_cache["%s##%s" % (image_path, self.banchmark_q)] error_code = cache['error_code'] ref_results = cache['results'] ref_size = cache['size'] if error_code > 0: return 2, ref_results ref_labels = np.array([ line['keyword'] for line in ref_results ])[np.argsort([line['score'] for line in ref_results])[::-1]][:1] accuracy = 1 if len( set(ref_labels) & set([line['keyword'] for line in results])) >= 1 else 0 size_reward = size / ref_size reward = accuracy - size_reward recent_acc, recent_reward = self.estimate() # print("检查acc,reward ",accuracy,reward) # print("检查recent_acc,reward ",recent_acc,recent_reward) # Remember current behavior log_dict = { "status": ["INITIAL_TRAIN", "INFERENCE", "ESTIMATE", "RETRAIN"].index(self.STATUS), "accuracy": accuracy, "ref_size": ref_size, "comp_size": size, "upload_size": ref_size + size if self.STATUS != "INFERENCE" else size, "size_reward": size_reward, "reward": reward, "image_path": image_path, "action": action, "recent_accuracy": recent_acc, "recent_reward": recent_reward } self.remember(**log_dict) # Status drift if self.STATUS == "INFERENCE": self.explor_rate = np.clip(self.explor_rate, 0.2, 0.95) self.STATUS = "ESTIMATE" if np.random.uniform( low=0, high=1) < self.explor_rate else "INFERENCE" elif self.STATUS == "ESTIMATE": if len(self.agent_memory['recent_accuracy']) > self.recent_zone: grad = np.gradient(self.agent_memory['recent_accuracy'])[-1] self.explor_rate = self.explor_rate if np.isnan( grad) else self.explor_rate - self.grad_scale_factor * grad self.explor_rate = np.clip(self.explor_rate, 0.2, 0.95) if len(self.agent_memory['recent_accuracy'] ) > self.recent_zone and recent_acc < self.acc_threshold: print("try retrain ", self.step_count) print("try retrain ", self.flag) print("\n") # if self.step_count > 720 or self.step_count > 2720: if (self.flag == 'DNIM' and self.step_count > 720) or ( self.flag == 'imagenet' and self.step_count > 2720): print("yes") if (self.load_model == 1 and self.step_count > 2720): print("load model\n") self.STATUS = "INFERENCE" self.explor_rate = 0.2 self.agent_memory = defaultdict(list) self.agent.model = load_model( "evaluation_results/agent_DQN_baidu_all_DNIM.h5") self.agent.curr_exploration_rate = 0 self.flag = 'DNIM_load' # self.load_model = 1 else: print("infenence to retrain\n") self.agent.memory.clear() self.agent.curr_exploration_rate = 1 self.STATUS = "RETRAIN" else: self.STATUS = "ESTIMATE" if np.random.uniform( low=0, high=1) < self.explor_rate else "INFERENCE" else: self.STATUS = "ESTIMATE" if np.random.uniform( low=0, high=1) < self.explor_rate else "INFERENCE" elif self.STATUS == "RETRAIN": if self.last_env_step is not None: self.agent.remember(self.last_env_step['features'], self.last_env_step['action_id'], self.last_env_step['reward'], features) self.last_env_step = { "features": features, "action_id": action_id, "reward": reward } # if recent_reward > self.reward_threshold and recent_acc > self.acc_threshold and self.agent.curr_exploration_rate < 0.2: # if recent_reward > self.reward_threshold and recent_acc > self.acc_threshold and self.agent.curr_exploration_rate < 0.5: if self.step_count > 1998: self.agent.model.save("evaluation_results/baidu_2k_FLIR.h5") self.agent.model = load_model( "evaluation_results/baidu_2k_FLIR.h5") return 0, log_dict if (recent_reward > self.reward_threshold and recent_acc > self.acc_threshold \ and self.agent.curr_exploration_rate < 0.2 and self.step_count > 2720) or \ (recent_reward > self.reward_threshold and recent_acc > self.acc_threshold \ and self.agent.curr_exploration_rate < 0.4 and self.step_count > 2720): print("train done ", self.step_count) print("\n") if self.step_count > 720: # 进入ImageNet的retrain部分 2000张图 print("retrain to inference\n") self.STATUS = "INFERENCE" self.explor_rate = 0.2 self.agent_memory = defaultdict(list) if self.step_count < 2720: # 初始化训练 self.agent.model.save( "evaluation_results/agent_DQN_baidu_FLIR.h5") self.agent.model = load_model( "evaluation_results/agent_DQN_baidu_FLIR.h5") self.flag = 'FLIR' # self.agent.model.save("evaluation_results/agent_DQN_baidu_two_imagenet.h5") # self.agent.model = load_model("evaluation_results/agent_DQN_baidu_two_imagenet.h5") # self.flag = 'imagenet' else: print("when load, no retrain\n") self.agent.model.save( "evaluation_results/agent_DQN_baidu_all_DNIM_retrain.h5" ) self.agent.model = load_model( "evaluation_results/agent_DQN_baidu_all_DNIM_retrain.h5" ) self.flag = 'DNIM_retrain' # self.agent.model.save(self.dqn_path + ".retrain") # self.agent.model = load_model(self.dqn_path + ".retrain") self.agent.curr_exploration_rate = 0 else: self.train_count += 1 if self.train_count > 128 and self.train_count % 5 == 0: self.agent.learn() if self.train_count <= 128: self.agent.curr_exploration_rate = 1 # exploration at the beginning steps else: self.train_count += 1 if self.train_count > 128 and self.train_count % 5 == 0: self.agent.learn() if self.train_count <= 128: self.agent.curr_exploration_rate = 1 # exploration at the beginning steps return 0, log_dict def estimate(self): if len(self.agent_memory['reward']) < self.recent_zone: recent_reward = np.mean(self.agent_memory['reward']) recent_acc = np.mean(self.agent_memory['accuracy']) else: recent_reward = np.mean( self.agent_memory['reward'][-self.recent_zone:]) recent_acc = np.mean( self.agent_memory['accuracy'][-self.recent_zone:]) return recent_acc, recent_reward
class RunningAgent(object): def __init__(self, dqn_path, banchmark_q, cloud_backend, recent_zone=40, explor_rate=0.5, acc_threshold=0.85, reward_threshold=0.45): feature_extractor = MobileNetV2(include_top=False) x = feature_extractor.output x = AveragePooling2D(pool_size=(4, 4))(x) self.feature_extractor = Model(inputs=feature_extractor.input, outputs=x) self.agent = DQN_Agent(s_dim=1280, a_dim=10, epsilon_decay=0.99, epsilon_min=0.02, gamma=0.95, replay_batchsize=256) self.agent.model = load_model(dqn_path) self.dqn_path = dqn_path self.agent.curr_exploration_rate = 0 self.STATUS = "INFERENCE" # INITIAL_TRAIN, INFERENCE, ESTIMATE, RETRAIN self.cloud_backend = cloud_backend self.banchmark_q = banchmark_q self.explor_rate = explor_rate self.recent_zone = recent_zone self.reward_threshold = reward_threshold self.acc_threshold = acc_threshold self.agent_memory = defaultdict(list) self.running_log = defaultdict(list) self.Kfilter = KalmanFilter() self.last_env_step = None self.step_count = 0 self.train_count = 0 def infer(self, image): image_data = preprocess_input( np.expand_dims(np.asarray(image.resize((224, 224)), dtype=np.float32), axis=0)) features = self.feature_extractor.predict(image_data)[0][0][0] state_action, action_id = self.agent.choose_action(features) return state_action, features, action_id, int( [i for i in np.arange(5, 105, 10)][action_id]) def remember(self, status, action, accuracy, ref_size, comp_size, upload_size, size_reward, reward, image_path, recent_reward, recent_accuracy): self.running_log['status'].append(status) self.running_log['action'].append(action) self.running_log['accuracy'].append( 1 if self.STATUS != "INFERENCE" else accuracy) self.running_log['agent_accuracy'].append(accuracy) self.running_log['ref_size'].append(ref_size) self.running_log['comp_size'].append(comp_size) self.running_log['upload_size'].append(upload_size) self.running_log['size_reward'].append(size_reward) self.running_log['reward'].append(reward) self.running_log['image_path'].append(image_path) self.running_log['step_count'].append(self.step_count) self.running_log['recent_reward'].append(recent_reward) self.running_log['recent_accuracy'].append(recent_accuracy) self.running_log['agent_epsilon'].append( self.agent.curr_exploration_rate) if self.STATUS != "INFERENCE": # in ESTIMATE, RETRAIN and INITIAL_TRAIN status, the agent can log everything self.agent_memory['image_path'].append(image_path) self.agent_memory['step_count'].append(self.step_count) self.agent_memory['accuracy'].append(accuracy) self.agent_memory['ref_size'].append(ref_size) self.agent_memory['comp_size'].append(comp_size) self.agent_memory['size_reward'].append(size_reward) self.agent_memory['reward'].append(reward) self.agent_memory['action'].append(action) self.agent_memory['recent_accuracy'].append(recent_accuracy) def agent_upload(self, image_path): image = Image.open(image_path).convert("RGB") self.step_count += 1 state_action, features, action_id, action = self.infer(image) # action = 25 if ref_cache["%s##%s" % (image_path, action)] == {}: error_code, results, size = self.cloud_backend.recognize( image, action) ref_cache["%s##%s" % (image_path, action)] = { "error_code": error_code, "results": results, "size": size, "banchmark_q": action } else: cache = ref_cache["%s##%s" % (image_path, action)] error_code = cache['error_code'] results = cache['results'] size = cache['size'] if error_code > 0: return 1, results # if status == "ESTIMATE" if ref_cache["%s##%s" % (image_path, self.banchmark_q)] == {}: error_code, ref_results, ref_size = self.cloud_backend.recognize( image, self.banchmark_q) ref_cache["%s##%s" % (image_path, self.banchmark_q)] = { "error_code": error_code, "results": ref_results, "size": ref_size, "banchmark_q": self.banchmark_q } else: cache = ref_cache["%s##%s" % (image_path, self.banchmark_q)] error_code = cache['error_code'] ref_results = cache['results'] ref_size = cache['size'] if error_code > 0: return 2, ref_results ref_labels = np.array([ line['keyword'] for line in ref_results ])[np.argsort([line['score'] for line in ref_results])[::-1]][:1] accuracy = 1 if len( set(ref_labels) & set([line['keyword'] for line in results])) >= 1 else 0 size_reward = size / ref_size reward = accuracy - size_reward recent_acc, recent_reward = self.estimate() # Status drift if self.STATUS == "INFERENCE": self.STATUS = "ESTIMATE" if np.random.uniform( low=0, high=1) < self.explor_rate else "INFERENCE" elif self.STATUS == "ESTIMATE": if len(self.agent_memory['accuracy'] ) > self.recent_zone and recent_acc < self.acc_threshold: # self.STATUS = "RETRAIN" pass else: self.STATUS = "ESTIMATE" if np.random.uniform( low=0, high=1) < self.explor_rate else "INFERENCE" elif self.STATUS == "RETRAIN": if recent_reward > self.reward_threshold and recent_acc > self.acc_threshold and self.agent.curr_exploration_rate < 0.2: self.STATUS = "INFERENCE" self.agent.model.save(self.dqn_path + ".retrain") self.agent.model = load_model(self.dqn_path + ".retrain") self.agent.curr_exploration_rate = 0 else: self.train_count += 1 if self.train_count > 128 and self.train_count % 5 == 0: self.agent.learn() if self.train_count <= 128: self.agent.curr_exploration_rate = 1 # exploration at the beginning steps if self.STATUS != "INFERENCE": # remember transitions if self.last_env_step is not None: self.agent.remember(self.last_env_step['features'], self.last_env_step['action_id'], self.last_env_step['reward'], features) self.last_env_step = { "features": features, "action_id": action_id, "reward": reward } # Remember current behavior log_dict = { "status": ["INITIAL_TRAIN", "INFERENCE", "ESTIMATE", "RETRAIN"].index(self.STATUS), "accuracy": accuracy, "ref_size": ref_size, "comp_size": size, "upload_size": ref_size + size if self.STATUS != "INFERENCE" else size, "size_reward": size_reward, "reward": reward, "image_path": image_path, "action": action, "recent_accuracy": recent_acc, "recent_reward": recent_reward } self.remember(**log_dict) return 0, log_dict def estimate(self): if len(self.agent_memory['reward']) < self.recent_zone: recent_reward = np.mean(self.agent_memory['reward']) recent_acc = np.mean(self.agent_memory['accuracy']) else: recent_reward = np.mean( self.agent_memory['reward'][-self.recent_zone:]) recent_acc = np.mean( self.agent_memory['accuracy'][-self.recent_zone:]) return recent_acc, recent_reward
plot_y = [] plot_part = deque(maxlen=10) for i_episode in range(1): print("\n\nepisode %s:" % i_episode) image = env.reset() image_data = preprocess_input( np.expand_dims(np.asarray(image.resize((224, 224)), dtype=np.float32), axis=0)) features = feature_extractor.predict(image_data)[0][0][0] while True: step_count += 1 state_actions, action_id = agent.choose_action(features) action = [i for i in np.arange(5, 105, 10)][action_id] error_code, new_image, reward, done_flag, info = env.step(action) # print("step image_id", env.curr_image_id) if error_code > 0: step_count -= 1 print(error_code) continue # print("path_num ",len(env.image_paths),"image_id ",env.curr_image_id) # if len(env.image_paths) == env.curr_image_id: # break train_log['image_path'].append(env.image_paths[env.curr_image_id]) train_log['acc_r'].append(info['acc_r']) train_log['size_r'].append(info['size_r'])