episode_total_reward, episode_avg_max_q, episode_duration ] #, episode_avg_loss] summary_placeholders = [ tf.placeholder(tf.float32) for _ in range(len(summary_vars)) ] update_ops = [ summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars)) ] summary_op = tf.summary.merge_all() return summary_placeholders, update_ops, summary_op if __name__ == "__main__": # CartPole-v1 환경, 최대 타임스텝 수가 500 env = Env() state_size = len(env.get_state()) action_size = env.action_size # DQN 에이전트 생성 agent = DQNAgent(state_size, action_size) scores, episodes, global_step = [], [], 0 for e in range(EPISODES): print("---------------------------------------------------") done = False score, step = 0, 0 # env 초기화 state = env.reset() state = np.reshape(state, [1, state_size])
from scipy import ndimage, misc import image_segment from environment import Env from agent import R_Learner import numpy as np if __name__ == "__main__": img = ndimage.imread('butterfly.jpg') img = misc.imresize(img, size=0.0625) search_env = img patch_src = img K = 50 labels = image_segment.segment(img, K) label = 15 patch_indices = np.where(labels == label) patch_pixels = img[patch_indices] indices_at_origin = (patch_indices[0] - np.min(patch_indices[0]), patch_indices[1] - np.min(patch_indices[1])) env = Env(img) agent = R_Learner(patch_pixels, indices_at_origin, env) while True: state = env.get_state() action = agent.get_action(state) reward, next_state = env.take_action(action) agent.learn(state, action, reward, next_state)