# maximum learning episodes and number of steps taken in single episode MAX_EPISODES = 5000 MAX_STEPS = 1000 # buffer is the memory of previous states and actions MAX_BUFFER = 1000000 MAX_TOTAL_REWARD = 300 S_DIM = env.observation_space.shape[0] A_DIM = env.action_space.shape[0] A_MAX = env.action_space.high[0] print(' State Dimensions :- ', S_DIM) print(' Action Dimensions :- ', A_DIM) print(' Action Max :- ', A_MAX) ram = buffer.MemoryBuffer(MAX_BUFFER) trainer = train.Trainer(S_DIM, A_DIM, A_MAX, ram) for _ep in range(MAX_EPISODES): observation = env.reset() print('EPISODE :- ', _ep) for r in range(MAX_STEPS): env.render() state = np.float32(observation) action = trainer.get_exploration_action(state) # if _ep%5 == 0: # # validate every 5th episode # action = trainer.get_exploitation_action(state) # else: # # get action based on observation, use exploration policy here
args.dir_name = dt.strftime('%Y%m%d%H%M') + '_' + args.dir_name os.mkdir(args.dir_name) # create environment env = gym.make(args.env_name) env_params = { 'state_dim' : env.observation_space.shape[0], 'action_dim' : env.action_space.shape[0], 'action_max' : float(env.action_space.high[0]) } print(' State Dimensions :- ', env_params['state_dim']) print(' Action Dimensions :- ', env_params['action_dim']) print(' Action Max :- ', env_params['action_max']) # initialize memory buffer ram = buffer.MemoryBuffer(args.buffer_size) # initialize agent agent = trainer.Trainer(args, env_params['state_dim'], env_params['action_dim'], env_params['action_max'], ram) # config logging logging.basicConfig(filename=args.dir_name + '/her.log',level=logging.DEBUG,filemode='w') run(args, env, agent, ram, env_params) # end logging logging.shutdown()
def train(continue_epi=250000, policy_path="../Models/policy_template/50000_base_policy.pth"): ram = buffer.MemoryBuffer(MAX_BUFFER) siam = SiameseNet(BaselineEmbeddingNet()) pi = T_Policy(T_N) pi.load_state_dict(torch.load(policy_path)) if torch.cuda.is_available(): pi = pi.cuda() siam = siam.cuda() trainer = Trainer(ram) # continue_epi = 0 if continue_epi > 0: trainer.load_models(continue_epi) var = 0.5 start_time = time.time() vis = Visdom(env='td_error') line_loss = vis.line(np.arange(1)) train_ilsvrc_data_path = 'ilsvrc_train_new.json' ilsvrc_home = '/media/x/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID' # ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID' reward_100 = 0 train_dataset = ILSVRCDataset(train_ilsvrc_data_path, ilsvrc_home + '/train') for train_step in range(MAX_EPISODES): frame_name_list, gt, length = train_dataset.next() img = Image.open(frame_name_list[0]).convert('RGB') img_size = img.size ground_th = gt[0] rate = ground_th[2] / ground_th[3] pos = ground_th reward_all = 0 templates = [] for init_num in range(1): trainer.init_actor(img, ground_th) img = Image.open(frame_name_list[init_num]).convert('RGB') template = crop_image(np.array(img), ground_th) for i in range(T_N): templates.append(template) for frame in range(1, length): img = Image.open(frame_name_list[frame]).convert('RGB') pos_ = pos img_crop_l, img_crop_g, _ = crop_image_actor_(np.array(img), pos) imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107)) imo_crop_g = (np.array(img_crop_g).reshape(3, 107, 107)) imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3)) imo_g = np2tensor(np.array(img_crop_g).reshape(1, 107, 107, 3)) cv2_img = cv2.cvtColor(cv2.imread(frame_name_list[frame]), cv2.COLOR_BGR2RGB) np_img = np.array( cv2.resize(cv2_img, (255, 255), interpolation=cv2.INTER_AREA)).transpose(2, 0, 1) np_imgs = [] for i in range(T_N): np_imgs.append(np_img) responses = siam( torch.Tensor(templates).permute(0, 3, 1, 2).float().cuda(), torch.Tensor(np_imgs).float().cuda()) action_tensor = pi(responses.permute(1, 0, 2, 3).cuda()) del responses action = action_tensor.cpu().detach().numpy() action_id = np.argmax(action) template = templates[action_id] imo_g = np2tensor(np.array(template).reshape(1, 107, 107, 3)) # img_l = np2tensor(np_img_l) # torch_image = loader(img.resize((255, 255),Image.ANTIALIAS)).unsqueeze(0).cuda().mul(255.) deta_pos = trainer.actor(imo_l, imo_g).squeeze(0).cpu().detach().numpy() if np.random.random(1) < var or frame <= 5 or frame % 15 == 0: deta_pos_ = cal_distance(np.vstack([pos, pos]), np.vstack([gt[frame], gt[frame]])) if np.max(abs(deta_pos_)) < 0.1: deta_pos = deta_pos_[0] if deta_pos[2] > 0.05 or deta_pos[2] < -0.05: deta_pos[2] = 0 pos_ = move_crop(pos_, deta_pos, img_size, rate) if frame % INTERVRAL == 0: template = crop_image(np.array(img), pos_) templates.append(template) templates.pop(1) img_crop_l_, img_crop_g_, out_flag = crop_image_actor_( np.array(img), pos_) # if out_flag: # pos = gt[frame] # continue imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107) imo_g_ = np.array(img_crop_g_).reshape(3, 107, 107) # img_l_ = np.array(img_l_).reshape(1, 127, 127, 3) gt_frame = gt[frame] r = _compute_iou(pos_, gt[frame]) if r > 0.7: reward = 1 elif r >= 0.5 and r <= 0.7: gt_pre = gt[frame - 1] r_pre = _compute_iou(pos, gt_pre) reward = max(0, r - r_pre) else: reward = -1 imo_g_ = np.array(template).reshape(3, 107, 107) trainer.ram.add(npBN(imo_crop_g), npBN(imo_g_), deta_pos, reward, npBN(imo_crop_l), npBN(imo_g_)) # if r == 0: # break reward_all += reward pos = pos_ if out_flag or r == 0: pos = gt[frame] trainer.optimize() reward_100 += reward_all gc.collect() if train_step % 100 == 0: td_error = trainer.show_critic_loss() print(train_step, reward_100, 'td_error', td_error) y = np.array(td_error.cpu().detach().numpy()) message = 'train_step: %d, reward_100: %d, td_error: %f \n' % ( train_step, reward_100, y) with open("../logs/train_td_error.txt", "a", encoding='utf-8') as f: f.write(message) vis.line(X=np.array([train_step]), Y=np.array([y]), win=line_loss, update='append') reward_100 = 0 if train_step % 200 == 0: trainer.save_models(train_step) if train_step % 10000 == 0: var = var * 0.95
import buffer # creating environment env = Reactor() MAX_EPISODES = 5000 # max amount of times the reactor is being looped over MAX_STEPS = 200 # max amount of timesteps (dt) in a single run of the reactor MAX_BUFFER = 1000000 # max amount of (state, action reward, new_state) in the buffer S_DIM = 3 # state space A_DIM = 1 # action space dt = 0.25 # timestep for reactor simulation PLOT_CLOSE = False # True if plot is open ram = buffer.MemoryBuffer(MAX_BUFFER) # initializing buffer trainer = train.Trainer(S_DIM, A_DIM, ram) # initializing neural nets # trainer.load_models(300) # used to load past model def mkdir(base, name): path = os.path.join(base, name) if not os.path.exists(path): os.makedirs(path) return path mkdir('.', 'Models') mkdir('.', 'plots') for _ep in range(MAX_EPISODES):
def train(continue_epi=0, policy_path="../models/template_policy/{}_template_policy.pth", siamfc_path="../models/siamfc_pretrained.pth", gpu_id=0): #强化学习样本存储空间 ram = buffer.MemoryBuffer(MAX_BUFFER) ac_trainer = Trainer(ram) # continue_epi = 0 if continue_epi > 0: policy_path = policy_path.format(254400) ac_trainer.load_models(continue_epi) #siamfc跟踪器 siamfc = SiamFCTracker(model_path=siamfc_path, gpu_id=gpu_id) #模板选择网络 pi = T_Policy(T_N) weights_init(pi) policy_path = policy_path.format(254400) pretrained_pi_dict = torch.load(policy_path) pi_dict = pi.state_dict() pretrained_pi_dict = { k: v for k, v in pretrained_pi_dict.items() if k in pi_dict } # and k.startswith("conv")} pi_dict.update(pretrained_pi_dict) pi.load_state_dict(pi_dict) siam = SiameseNet(BaselineEmbeddingNet()) weights_init(siam) pretrained_siam = torch.load(siamfc_path) siam_dict = siam.state_dict() pretrained_siam = { k: v for k, v in pretrained_siam.items() if k in siam_dict } siam_dict.update(pretrained_siam) siam.load_state_dict(siam_dict) if torch.cuda.is_available(): pi = pi.cuda() siam = siam.cuda() var = 0.5 # vis = Visdom(env='td_error') # line_loss = vis.line(np.arange(1)) train_ilsvrc_data_path = 'ilsvrc_train_new.json' ilsvrc_home = '/media/x/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID' # ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID' reward_100 = 0 train_dataset = ILSVRCDataset(train_ilsvrc_data_path, ilsvrc_home + '/train') for train_step in range(MAX_EPISODES): frame_name_list, gt, length = train_dataset.next() img = cv2.cvtColor(cv2.imread(frame_name_list[0]), cv2.COLOR_BGR2RGB) img_size = (img.shape[1], img.shape[0]) ground_th = gt[0] rate = ground_th[2] / ground_th[3] pos = ground_th reward_all = 0 templates = [] for init_num in range(1): ac_trainer.init_actor(img, ground_th) template = siamfc.init(img, ground_th) for i in range(T_N): templates.append(template) for frame in range(1, length): cv2_img = cv2.cvtColor(cv2.imread(frame_name_list[frame]), cv2.COLOR_BGR2RGB) np_img = np.array( cv2.resize(cv2_img, (255, 255), interpolation=cv2.INTER_AREA)).transpose(2, 0, 1) np_imgs = [] for i in range(T_N): np_imgs.append(np_img) with torch.no_grad(): responses = siam( torch.Tensor(templates).permute(0, 3, 1, 2).float().cuda(), torch.Tensor(np_imgs).float().cuda()) pi_input = torch.tensor(responses).permute(1, 0, 2, 3).cuda() del responses, np_imgs, np_img action = pi(pi_input).cpu() pos_ = pos action_id = np.argmax(action.detach().numpy()) template = templates[action_id] with torch.no_grad(): siam_box_oral = siamfc.update(cv2_img, templates[0]) siam_box = siamfc.update(cv2_img, template) siam_box_oral = [ siam_box_oral[0], siam_box_oral[1], siam_box_oral[2] - siam_box_oral[0], siam_box_oral[3] - siam_box_oral[1] ] siam_box = [ siam_box[0], siam_box[1], siam_box[2] - siam_box[0], siam_box[3] - siam_box[1] ] img_crop_l, img_crop_g, _ = crop_image_actor_( np.array(cv2_img), siam_box_oral) imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107)) imo_crop_g = (np.array(img_crop_g).reshape(3, 107, 107)) imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3)) imo_g = np2tensor(np.array(img_crop_g).reshape(1, 107, 107, 3)) del img_crop_l, img_crop_g expect = 0 deta_pos = ac_trainer.actor( imo_l, imo_g).squeeze(0).cpu().detach().numpy() del imo_l, imo_g if np.random.random(1) < var or frame <= 3 or frame % 20 == 0: deta_pos_ = cal_distance(np.vstack([pos, pos]), np.vstack([gt[frame], gt[frame]])) if np.max(abs(deta_pos_)) < 0.05: expect = 1 deta_pos = deta_pos_[0] if deta_pos[2] > 0.05 or deta_pos[2] < -0.05: deta_pos[2] = 0 pos_ = move_crop(np.array(siam_box_oral), deta_pos, img_size, rate) img_crop_l_, img_crop_g_, out_flag = crop_image_actor_( np.array(cv2_img), pos_) # if out_flag: # pos = gt[frame] # continue imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107) imo_g_ = np.array(img_crop_g_).reshape(3, 107, 107) # img_l_ = np.array(img_l_).reshape(1, 127, 127, 3) # gt_frame = gt[frame] iou_siam_oral = _compute_iou(siam_box_oral, gt[frame]) iou_siam = _compute_iou(siam_box, gt[frame]) iou_ac = _compute_iou(pos_, gt[frame]) # reward_ac = iou_ac - iou_siam # reward_t = iou_siam - iou_siam_oral if iou_ac > iou_siam_oral: reward_ac = 1 else: reward_ac = -1 if iou_siam > iou_siam_oral: reward_t = 1 else: reward_t = -1 # print("iou_siam_oral: %2f, iou_siam: %2f, iou_ac: %2f"%(iou_siam_oral, iou_siam, iou_ac)) message = "iou_siam_oral: %2f, iou_siam: %2f, iou_ac: %2f ,expecte :%d\n" % ( iou_siam_oral, iou_siam, iou_ac, expect) # with open("../logs/iou.txt", "a", encoding='utf-8') as f: # f.write(message) if reward_ac or reward_t and iou_siam_oral > 0.6: template = siamfc.init(cv2_img, pos_) templates.append(template) templates.pop(1) log_pi = torch.log(action[0, action_id]) pi.put_data((reward_t, log_pi)) ac_trainer.ram.add(npBN(imo_crop_g), npBN(imo_g_), deta_pos, reward_ac, npBN(imo_crop_l), npBN(imo_l_)) # if r == 0: # break reward_all += reward_ac pos = pos_ if out_flag or iou_ac <= 0.2: pos = gt[frame] with open("../logs/iou.txt", "a", encoding='utf-8') as f: f.write('\n\n') ac_trainer.optimize() pi.train_policy() reward_100 += reward_all gc.collect() if train_step % 100 == 0 and train_step != 0: td_error = ac_trainer.show_critic_loss() print(train_step, reward_100, 'td_error', td_error) y = np.array(td_error.cpu().detach().numpy()) message = 'train_step: %d, reward_100: %d, td_error: %f \n' % ( train_step, reward_100, y) with open("../logs/train_td_error.txt", "a", encoding='utf-8') as f: f.write(message) # vis.line(X=np.array([train_step]), Y=np.array([y]), # win=line_loss, # update='append') reward_100 = 0 if train_step % 400 == 0 and train_step != 0: ac_trainer.save_models(train_step) torch.save( pi.state_dict(), '../models/template_policy/' + str(train_step + continue_epi) + '_template_policy.pth') print("save model----{}".format(str(train_step + continue_epi))) if train_step % 10000 == 0: var = var * 0.95
def train(): ram = buffer.MemoryBuffer(MAX_BUFFER) trainer = Trainer(ram) continue_epi = 0 if continue_epi > 0: trainer.load_models(continue_epi) var = 0.5 start_time = time.time() vis = Visdom(env='td_error') line_loss = vis.line(np.arange(1)) train_ilsvrc_data_path = 'ilsvrc_train_new.json' # ilsvrc_home = '/media/ps/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID' ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID' reward_100 = 0 train_dataset = ILSVRCDataset(train_ilsvrc_data_path, ilsvrc_home + '/train') for train_step in range(MAX_EPISODES): frame_name_list, gt, length = train_dataset.next() img = Image.open(frame_name_list[0]).convert('RGB') img_size = img.size ground_th = gt[0] rate = ground_th[2] / ground_th[3] pos = ground_th reward_all = 0 for init_num in range(1): trainer.init_actor(img, ground_th) for frame in range(1, length): img = Image.open(frame_name_list[frame]).convert('RGB') pos_ = pos img_crop_l, img_crop_g = crop_image(np.array(img), pos) imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107)) imo_crop_g = (np.array(img_crop_g).reshape(3, 107, 107)) imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3)) imo_g = np2tensor(np.array(img_crop_g).reshape(1, 107, 107, 3)) # img_l = np2tensor(np_img_l) # torch_image = loader(img.resize((255, 255),Image.ANTIALIAS)).unsqueeze(0).cuda().mul(255.) deta_pos = trainer.actor(imo_l, imo_g).squeeze(0).cpu().detach().numpy() if np.random.random(1) < var: deta_pos_ = cal_distance(np.vstack([pos, pos]), np.vstack([gt[frame], gt[frame]])) if np.max(abs(deta_pos_)) < 1: deta_pos = deta_pos_[0] if deta_pos[2] > 0.05 or deta_pos[2] < -0.05: deta_pos[2] = 0 pos_ = move_crop(pos_, deta_pos, img_size, rate) img_crop_l_, img_crop_g_ = crop_image(np.array(img), pos_) imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107) imo_g_ = np.array(img_crop_g_).reshape(3, 107, 107) # img_l_ = np.array(img_l_).reshape(1, 127, 127, 3) r = _compute_iou(pos_, gt[frame]) if r > 0.7: reward = 1 else: reward = -1 ram.add(imo_crop_g, imo_g_, deta_pos, reward, imo_crop_l, imo_l_) reward_all += reward pos = pos_ trainer.optimize() reward_100 += reward_all if train_step % 100 == 0: td_error = trainer.show_critic_loss() print(train_step, reward_100, 'td_error', td_error) y = np.array(td_error.cpu().detach().numpy()) vis.line(X=np.array([train_step]), Y=np.array([y]), win=line_loss, update='append') reward_100 = 0 if train_step % 200 == 0: trainer.save_models(train_step) if train_step % 10000 == 0: var = var * 0.95
def train(): ram = buffer.MemoryBuffer(MAX_BUFFER) trainer = Trainer(ram) continue_epi = 121000 if continue_epi > 0: trainer.load_models(continue_epi) var = 0.5 start_time = time.time() vis = Visdom(env='td_error') line_loss = vis.line(np.arange(1)) train_ilsvrc_data_path = 'ilsvrc_train_new.json' ilsvrc_home = '/media/x/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID' # ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID' reward_100 = 0 train_dataset = ILSVRCDataset(train_ilsvrc_data_path, ilsvrc_home + '/train') for train_step in range(MAX_EPISODES): frame_name_list, gt, length = train_dataset.next() img = Image.open(frame_name_list[0]).convert('RGB') img_size = img.size ground_th = gt[0] rate = ground_th[2] / ground_th[3] pos = ground_th reward_all = 0 for init_num in range(1): trainer.init_actor(img, ground_th) img = Image.open(frame_name_list[init_num]).convert('RGB') for frame in range(1, length): img = Image.open(frame_name_list[frame]).convert('RGB') pos_ = pos img_crop_l, img_crop_g, _ = crop_image_actor_(np.array(img), pos) imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107)) imo_crop_g = (np.array(img_crop_g).reshape(3, 107, 107)) imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3)) imo_g = np2tensor(np.array(img_crop_g).reshape(1, 107, 107, 3)) # img_l = np2tensor(np_img_l) # torch_image = loader(img.resize((255, 255),Image.ANTIALIAS)).unsqueeze(0).cuda().mul(255.) deta_pos = trainer.actor(imo_l, imo_g).squeeze(0).cpu().detach().numpy() if np.random.random(1) < var or frame <= 5 or frame % 15 == 0: deta_pos_ = cal_distance(np.vstack([pos, pos]), np.vstack([gt[frame], gt[frame]])) if np.max(abs(deta_pos_)) < 0.1: deta_pos = deta_pos_[0] if deta_pos[2] > 0.05 or deta_pos[2] < -0.05: deta_pos[2] = 0 pos_ = move_crop(pos_, deta_pos, img_size, rate) img_crop_l_, img_crop_g_, out_flag = crop_image_actor_( np.array(img), pos_) # if out_flag: # pos = gt[frame] # continue imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107) imo_g_ = np.array(img_crop_g_).reshape(3, 107, 107) # img_l_ = np.array(img_l_).reshape(1, 127, 127, 3) gt_frame = gt[frame] r = _compute_iou(pos_, gt[frame]) if r > 0.7: reward = 1 elif r >= 0.5 and r <= 0.7: gt_pre = gt[frame - 1] r_pre = _compute_iou(pos, gt_pre) reward = max(0, r - r_pre) else: reward = -1 trainer.ram.add(npBN(imo_crop_g), npBN(imo_g_), deta_pos, reward, npBN(imo_crop_l), npBN(imo_l_)) # if r == 0: # break reward_all += reward pos = pos_ if out_flag or r == 0: pos = gt[frame] trainer.optimize() reward_100 += reward_all gc.collect() if train_step % 100 == 0: td_error = trainer.show_critic_loss() print(train_step, reward_100, 'td_error', td_error) y = np.array(td_error.cpu().detach().numpy()) message = 'train_step: %d, reward_100: %d, td_error: %f \n' % ( train_step, reward_100, y) with open("../logs/train_td_error.txt", "a", encoding='utf-8') as f: f.write(message) vis.line(X=np.array([train_step]), Y=np.array([y]), win=line_loss, update='append') reward_100 = 0 if train_step % 200 == 0: trainer.save_models(train_step) if train_step % 10000 == 0: var = var * 0.95