コード例 #1
0
# maximum learning episodes and number of steps taken in single episode
MAX_EPISODES = 5000
MAX_STEPS = 1000
# buffer is the memory of previous states and actions
MAX_BUFFER = 1000000
MAX_TOTAL_REWARD = 300
S_DIM = env.observation_space.shape[0]
A_DIM = env.action_space.shape[0]
A_MAX = env.action_space.high[0]

print(' State Dimensions :- ', S_DIM)
print(' Action Dimensions :- ', A_DIM)
print(' Action Max :- ', A_MAX)

ram = buffer.MemoryBuffer(MAX_BUFFER)
trainer = train.Trainer(S_DIM, A_DIM, A_MAX, ram)

for _ep in range(MAX_EPISODES):
    observation = env.reset()
    print('EPISODE :- ', _ep)
    for r in range(MAX_STEPS):
        env.render()
        state = np.float32(observation)

        action = trainer.get_exploration_action(state)
        # if _ep%5 == 0:
        # 	# validate every 5th episode
        # 	action = trainer.get_exploitation_action(state)
        # else:
        # 	# get action based on observation, use exploration policy here
コード例 #2
0
    args.dir_name = dt.strftime('%Y%m%d%H%M') + '_' + args.dir_name
    os.mkdir(args.dir_name)
    
    # create environment
    env = gym.make(args.env_name)
    env_params = {
        'state_dim' : env.observation_space.shape[0],
        'action_dim' : env.action_space.shape[0],
        'action_max' : float(env.action_space.high[0])
    }
    print(' State Dimensions :- ', env_params['state_dim'])
    print(' Action Dimensions :- ', env_params['action_dim'])
    print(' Action Max :- ', env_params['action_max'])
    
    # initialize memory buffer
    ram = buffer.MemoryBuffer(args.buffer_size)
    
    # initialize agent
    agent = trainer.Trainer(args,
                            env_params['state_dim'],
                            env_params['action_dim'],
                            env_params['action_max'], ram)
    
    # config logging
    logging.basicConfig(filename=args.dir_name + '/her.log',level=logging.DEBUG,filemode='w')
    
    run(args, env, agent, ram, env_params)
    
    # end logging
    logging.shutdown()
コード例 #3
0
def train(continue_epi=250000,
          policy_path="../Models/policy_template/50000_base_policy.pth"):
    ram = buffer.MemoryBuffer(MAX_BUFFER)
    siam = SiameseNet(BaselineEmbeddingNet())
    pi = T_Policy(T_N)
    pi.load_state_dict(torch.load(policy_path))
    if torch.cuda.is_available():
        pi = pi.cuda()
        siam = siam.cuda()
    trainer = Trainer(ram)
    # continue_epi = 0
    if continue_epi > 0:
        trainer.load_models(continue_epi)
    var = 0.5
    start_time = time.time()
    vis = Visdom(env='td_error')
    line_loss = vis.line(np.arange(1))
    train_ilsvrc_data_path = 'ilsvrc_train_new.json'
    ilsvrc_home = '/media/x/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID'
    # ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID'
    reward_100 = 0
    train_dataset = ILSVRCDataset(train_ilsvrc_data_path,
                                  ilsvrc_home + '/train')
    for train_step in range(MAX_EPISODES):
        frame_name_list, gt, length = train_dataset.next()
        img = Image.open(frame_name_list[0]).convert('RGB')
        img_size = img.size

        ground_th = gt[0]
        rate = ground_th[2] / ground_th[3]

        pos = ground_th
        reward_all = 0
        templates = []
        for init_num in range(1):
            trainer.init_actor(img, ground_th)
            img = Image.open(frame_name_list[init_num]).convert('RGB')
            template = crop_image(np.array(img), ground_th)
            for i in range(T_N):
                templates.append(template)

        for frame in range(1, length):
            img = Image.open(frame_name_list[frame]).convert('RGB')
            pos_ = pos
            img_crop_l, img_crop_g, _ = crop_image_actor_(np.array(img), pos)
            imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107))
            imo_crop_g = (np.array(img_crop_g).reshape(3, 107, 107))

            imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3))
            imo_g = np2tensor(np.array(img_crop_g).reshape(1, 107, 107, 3))

            cv2_img = cv2.cvtColor(cv2.imread(frame_name_list[frame]),
                                   cv2.COLOR_BGR2RGB)
            np_img = np.array(
                cv2.resize(cv2_img, (255, 255),
                           interpolation=cv2.INTER_AREA)).transpose(2, 0, 1)
            np_imgs = []
            for i in range(T_N):
                np_imgs.append(np_img)
            responses = siam(
                torch.Tensor(templates).permute(0, 3, 1, 2).float().cuda(),
                torch.Tensor(np_imgs).float().cuda())

            action_tensor = pi(responses.permute(1, 0, 2, 3).cuda())
            del responses
            action = action_tensor.cpu().detach().numpy()
            action_id = np.argmax(action)
            template = templates[action_id]
            imo_g = np2tensor(np.array(template).reshape(1, 107, 107, 3))

            # img_l = np2tensor(np_img_l)
            # torch_image = loader(img.resize((255, 255),Image.ANTIALIAS)).unsqueeze(0).cuda().mul(255.)
            deta_pos = trainer.actor(imo_l,
                                     imo_g).squeeze(0).cpu().detach().numpy()

            if np.random.random(1) < var or frame <= 5 or frame % 15 == 0:
                deta_pos_ = cal_distance(np.vstack([pos, pos]),
                                         np.vstack([gt[frame], gt[frame]]))
                if np.max(abs(deta_pos_)) < 0.1:
                    deta_pos = deta_pos_[0]

            if deta_pos[2] > 0.05 or deta_pos[2] < -0.05:
                deta_pos[2] = 0

            pos_ = move_crop(pos_, deta_pos, img_size, rate)
            if frame % INTERVRAL == 0:
                template = crop_image(np.array(img), pos_)
                templates.append(template)
                templates.pop(1)
            img_crop_l_, img_crop_g_, out_flag = crop_image_actor_(
                np.array(img), pos_)
            # if out_flag:
            #     pos = gt[frame]
            #     continue
            imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107)
            imo_g_ = np.array(img_crop_g_).reshape(3, 107, 107)

            # img_l_ = np.array(img_l_).reshape(1, 127, 127, 3)
            gt_frame = gt[frame]
            r = _compute_iou(pos_, gt[frame])

            if r > 0.7:
                reward = 1
            elif r >= 0.5 and r <= 0.7:
                gt_pre = gt[frame - 1]
                r_pre = _compute_iou(pos, gt_pre)
                reward = max(0, r - r_pre)
            else:
                reward = -1
            imo_g_ = np.array(template).reshape(3, 107, 107)
            trainer.ram.add(npBN(imo_crop_g), npBN(imo_g_), deta_pos, reward,
                            npBN(imo_crop_l), npBN(imo_g_))
            # if r == 0:
            #     break
            reward_all += reward
            pos = pos_
            if out_flag or r == 0:
                pos = gt[frame]
        trainer.optimize()
        reward_100 += reward_all
        gc.collect()
        if train_step % 100 == 0:
            td_error = trainer.show_critic_loss()

            print(train_step, reward_100, 'td_error', td_error)
            y = np.array(td_error.cpu().detach().numpy())
            message = 'train_step: %d, reward_100: %d, td_error: %f \n' % (
                train_step, reward_100, y)
            with open("../logs/train_td_error.txt", "a",
                      encoding='utf-8') as f:
                f.write(message)
            vis.line(X=np.array([train_step]),
                     Y=np.array([y]),
                     win=line_loss,
                     update='append')
            reward_100 = 0

        if train_step % 200 == 0:
            trainer.save_models(train_step)
        if train_step % 10000 == 0:
            var = var * 0.95
コード例 #4
0
import buffer

# creating environment
env = Reactor()

MAX_EPISODES = 5000  # max amount of times the reactor is being looped over
MAX_STEPS = 200  # max amount of timesteps (dt) in a single run of the reactor
MAX_BUFFER = 1000000  # max amount of (state, action reward, new_state) in the buffer

S_DIM = 3  # state space
A_DIM = 1  # action space

dt = 0.25  # timestep for reactor simulation
PLOT_CLOSE = False  # True if plot is open

ram = buffer.MemoryBuffer(MAX_BUFFER)  # initializing buffer
trainer = train.Trainer(S_DIM, A_DIM, ram)  # initializing neural nets
# trainer.load_models(300)							# used to load past model


def mkdir(base, name):
    path = os.path.join(base, name)
    if not os.path.exists(path):
        os.makedirs(path)
    return path


mkdir('.', 'Models')
mkdir('.', 'plots')

for _ep in range(MAX_EPISODES):
コード例 #5
0
def train(continue_epi=0,
          policy_path="../models/template_policy/{}_template_policy.pth",
          siamfc_path="../models/siamfc_pretrained.pth",
          gpu_id=0):
    #强化学习样本存储空间
    ram = buffer.MemoryBuffer(MAX_BUFFER)
    ac_trainer = Trainer(ram)
    # continue_epi = 0
    if continue_epi > 0:
        policy_path = policy_path.format(254400)
        ac_trainer.load_models(continue_epi)
    #siamfc跟踪器
    siamfc = SiamFCTracker(model_path=siamfc_path, gpu_id=gpu_id)
    #模板选择网络
    pi = T_Policy(T_N)
    weights_init(pi)
    policy_path = policy_path.format(254400)
    pretrained_pi_dict = torch.load(policy_path)
    pi_dict = pi.state_dict()
    pretrained_pi_dict = {
        k: v
        for k, v in pretrained_pi_dict.items() if k in pi_dict
    }  # and k.startswith("conv")}
    pi_dict.update(pretrained_pi_dict)
    pi.load_state_dict(pi_dict)

    siam = SiameseNet(BaselineEmbeddingNet())
    weights_init(siam)
    pretrained_siam = torch.load(siamfc_path)
    siam_dict = siam.state_dict()
    pretrained_siam = {
        k: v
        for k, v in pretrained_siam.items() if k in siam_dict
    }
    siam_dict.update(pretrained_siam)
    siam.load_state_dict(siam_dict)

    if torch.cuda.is_available():
        pi = pi.cuda()
        siam = siam.cuda()

    var = 0.5
    # vis = Visdom(env='td_error')
    # line_loss = vis.line(np.arange(1))
    train_ilsvrc_data_path = 'ilsvrc_train_new.json'
    ilsvrc_home = '/media/x/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID'
    # ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID'
    reward_100 = 0
    train_dataset = ILSVRCDataset(train_ilsvrc_data_path,
                                  ilsvrc_home + '/train')
    for train_step in range(MAX_EPISODES):
        frame_name_list, gt, length = train_dataset.next()
        img = cv2.cvtColor(cv2.imread(frame_name_list[0]), cv2.COLOR_BGR2RGB)
        img_size = (img.shape[1], img.shape[0])

        ground_th = gt[0]
        rate = ground_th[2] / ground_th[3]

        pos = ground_th
        reward_all = 0
        templates = []
        for init_num in range(1):
            ac_trainer.init_actor(img, ground_th)
            template = siamfc.init(img, ground_th)
            for i in range(T_N):
                templates.append(template)

        for frame in range(1, length):
            cv2_img = cv2.cvtColor(cv2.imread(frame_name_list[frame]),
                                   cv2.COLOR_BGR2RGB)
            np_img = np.array(
                cv2.resize(cv2_img, (255, 255),
                           interpolation=cv2.INTER_AREA)).transpose(2, 0, 1)
            np_imgs = []
            for i in range(T_N):
                np_imgs.append(np_img)
            with torch.no_grad():
                responses = siam(
                    torch.Tensor(templates).permute(0, 3, 1, 2).float().cuda(),
                    torch.Tensor(np_imgs).float().cuda())
            pi_input = torch.tensor(responses).permute(1, 0, 2, 3).cuda()
            del responses, np_imgs, np_img
            action = pi(pi_input).cpu()
            pos_ = pos

            action_id = np.argmax(action.detach().numpy())
            template = templates[action_id]
            with torch.no_grad():
                siam_box_oral = siamfc.update(cv2_img, templates[0])
                siam_box = siamfc.update(cv2_img, template)
            siam_box_oral = [
                siam_box_oral[0], siam_box_oral[1],
                siam_box_oral[2] - siam_box_oral[0],
                siam_box_oral[3] - siam_box_oral[1]
            ]
            siam_box = [
                siam_box[0], siam_box[1], siam_box[2] - siam_box[0],
                siam_box[3] - siam_box[1]
            ]

            img_crop_l, img_crop_g, _ = crop_image_actor_(
                np.array(cv2_img), siam_box_oral)
            imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107))
            imo_crop_g = (np.array(img_crop_g).reshape(3, 107, 107))

            imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3))
            imo_g = np2tensor(np.array(img_crop_g).reshape(1, 107, 107, 3))
            del img_crop_l, img_crop_g
            expect = 0
            deta_pos = ac_trainer.actor(
                imo_l, imo_g).squeeze(0).cpu().detach().numpy()
            del imo_l, imo_g
            if np.random.random(1) < var or frame <= 3 or frame % 20 == 0:
                deta_pos_ = cal_distance(np.vstack([pos, pos]),
                                         np.vstack([gt[frame], gt[frame]]))
                if np.max(abs(deta_pos_)) < 0.05:
                    expect = 1
                    deta_pos = deta_pos_[0]

            if deta_pos[2] > 0.05 or deta_pos[2] < -0.05:
                deta_pos[2] = 0

            pos_ = move_crop(np.array(siam_box_oral), deta_pos, img_size, rate)
            img_crop_l_, img_crop_g_, out_flag = crop_image_actor_(
                np.array(cv2_img), pos_)
            # if out_flag:
            #     pos = gt[frame]
            #     continue
            imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107)
            imo_g_ = np.array(img_crop_g_).reshape(3, 107, 107)

            # img_l_ = np.array(img_l_).reshape(1, 127, 127, 3)
            # gt_frame = gt[frame]
            iou_siam_oral = _compute_iou(siam_box_oral, gt[frame])
            iou_siam = _compute_iou(siam_box, gt[frame])
            iou_ac = _compute_iou(pos_, gt[frame])

            # reward_ac = iou_ac - iou_siam
            # reward_t = iou_siam - iou_siam_oral
            if iou_ac > iou_siam_oral:
                reward_ac = 1
            else:
                reward_ac = -1
            if iou_siam > iou_siam_oral:
                reward_t = 1
            else:
                reward_t = -1
            # print("iou_siam_oral: %2f, iou_siam: %2f, iou_ac: %2f"%(iou_siam_oral, iou_siam, iou_ac))
            message = "iou_siam_oral: %2f, iou_siam: %2f, iou_ac: %2f ,expecte :%d\n" % (
                iou_siam_oral, iou_siam, iou_ac, expect)
            # with open("../logs/iou.txt", "a", encoding='utf-8') as f:
            #     f.write(message)
            if reward_ac or reward_t and iou_siam_oral > 0.6:
                template = siamfc.init(cv2_img, pos_)
                templates.append(template)
                templates.pop(1)
            log_pi = torch.log(action[0, action_id])
            pi.put_data((reward_t, log_pi))
            ac_trainer.ram.add(npBN(imo_crop_g), npBN(imo_g_), deta_pos,
                               reward_ac, npBN(imo_crop_l), npBN(imo_l_))
            # if r == 0:
            #     break
            reward_all += reward_ac
            pos = pos_
            if out_flag or iou_ac <= 0.2:
                pos = gt[frame]
        with open("../logs/iou.txt", "a", encoding='utf-8') as f:
            f.write('\n\n')
        ac_trainer.optimize()
        pi.train_policy()
        reward_100 += reward_all
        gc.collect()
        if train_step % 100 == 0 and train_step != 0:
            td_error = ac_trainer.show_critic_loss()

            print(train_step, reward_100, 'td_error', td_error)
            y = np.array(td_error.cpu().detach().numpy())
            message = 'train_step: %d, reward_100: %d, td_error: %f \n' % (
                train_step, reward_100, y)
            with open("../logs/train_td_error.txt", "a",
                      encoding='utf-8') as f:
                f.write(message)
            # vis.line(X=np.array([train_step]), Y=np.array([y]),
            #          win=line_loss,
            #          update='append')
            reward_100 = 0

        if train_step % 400 == 0 and train_step != 0:
            ac_trainer.save_models(train_step)
            torch.save(
                pi.state_dict(), '../models/template_policy/' +
                str(train_step + continue_epi) + '_template_policy.pth')
            print("save model----{}".format(str(train_step + continue_epi)))
        if train_step % 10000 == 0:
            var = var * 0.95
コード例 #6
0
def train():
    ram = buffer.MemoryBuffer(MAX_BUFFER)
    trainer = Trainer(ram)
    continue_epi = 0
    if continue_epi > 0:
        trainer.load_models(continue_epi)
    var = 0.5
    start_time = time.time()
    vis = Visdom(env='td_error')
    line_loss = vis.line(np.arange(1))
    train_ilsvrc_data_path = 'ilsvrc_train_new.json'
    # ilsvrc_home = '/media/ps/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID'
    ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID'
    reward_100 = 0
    train_dataset = ILSVRCDataset(train_ilsvrc_data_path,
                                  ilsvrc_home + '/train')
    for train_step in range(MAX_EPISODES):
        frame_name_list, gt, length = train_dataset.next()
        img = Image.open(frame_name_list[0]).convert('RGB')
        img_size = img.size

        ground_th = gt[0]
        rate = ground_th[2] / ground_th[3]

        pos = ground_th
        reward_all = 0

        for init_num in range(1):
            trainer.init_actor(img, ground_th)

        for frame in range(1, length):
            img = Image.open(frame_name_list[frame]).convert('RGB')
            pos_ = pos
            img_crop_l, img_crop_g = crop_image(np.array(img), pos)
            imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107))
            imo_crop_g = (np.array(img_crop_g).reshape(3, 107, 107))

            imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3))
            imo_g = np2tensor(np.array(img_crop_g).reshape(1, 107, 107, 3))

            # img_l = np2tensor(np_img_l)
            # torch_image = loader(img.resize((255, 255),Image.ANTIALIAS)).unsqueeze(0).cuda().mul(255.)
            deta_pos = trainer.actor(imo_l,
                                     imo_g).squeeze(0).cpu().detach().numpy()

            if np.random.random(1) < var:
                deta_pos_ = cal_distance(np.vstack([pos, pos]),
                                         np.vstack([gt[frame], gt[frame]]))
                if np.max(abs(deta_pos_)) < 1:
                    deta_pos = deta_pos_[0]

            if deta_pos[2] > 0.05 or deta_pos[2] < -0.05:
                deta_pos[2] = 0

            pos_ = move_crop(pos_, deta_pos, img_size, rate)
            img_crop_l_, img_crop_g_ = crop_image(np.array(img), pos_)
            imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107)
            imo_g_ = np.array(img_crop_g_).reshape(3, 107, 107)

            # img_l_ = np.array(img_l_).reshape(1, 127, 127, 3)
            r = _compute_iou(pos_, gt[frame])

            if r > 0.7:
                reward = 1
            else:
                reward = -1

            ram.add(imo_crop_g, imo_g_, deta_pos, reward, imo_crop_l, imo_l_)
            reward_all += reward
            pos = pos_
        trainer.optimize()
        reward_100 += reward_all

        if train_step % 100 == 0:
            td_error = trainer.show_critic_loss()
            print(train_step, reward_100, 'td_error', td_error)
            y = np.array(td_error.cpu().detach().numpy())
            vis.line(X=np.array([train_step]),
                     Y=np.array([y]),
                     win=line_loss,
                     update='append')
            reward_100 = 0

        if train_step % 200 == 0:
            trainer.save_models(train_step)
        if train_step % 10000 == 0:
            var = var * 0.95
コード例 #7
0
def train():
    ram = buffer.MemoryBuffer(MAX_BUFFER)

    trainer = Trainer(ram)
    continue_epi = 121000
    if continue_epi > 0:
        trainer.load_models(continue_epi)
    var = 0.5
    start_time = time.time()
    vis = Visdom(env='td_error')
    line_loss = vis.line(np.arange(1))
    train_ilsvrc_data_path = 'ilsvrc_train_new.json'
    ilsvrc_home = '/media/x/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID'
    # ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID'
    reward_100 = 0
    train_dataset = ILSVRCDataset(train_ilsvrc_data_path,
                                  ilsvrc_home + '/train')
    for train_step in range(MAX_EPISODES):
        frame_name_list, gt, length = train_dataset.next()
        img = Image.open(frame_name_list[0]).convert('RGB')
        img_size = img.size

        ground_th = gt[0]
        rate = ground_th[2] / ground_th[3]

        pos = ground_th
        reward_all = 0
        for init_num in range(1):
            trainer.init_actor(img, ground_th)
            img = Image.open(frame_name_list[init_num]).convert('RGB')

        for frame in range(1, length):
            img = Image.open(frame_name_list[frame]).convert('RGB')
            pos_ = pos
            img_crop_l, img_crop_g, _ = crop_image_actor_(np.array(img), pos)
            imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107))
            imo_crop_g = (np.array(img_crop_g).reshape(3, 107, 107))

            imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3))
            imo_g = np2tensor(np.array(img_crop_g).reshape(1, 107, 107, 3))

            # img_l = np2tensor(np_img_l)
            # torch_image = loader(img.resize((255, 255),Image.ANTIALIAS)).unsqueeze(0).cuda().mul(255.)
            deta_pos = trainer.actor(imo_l,
                                     imo_g).squeeze(0).cpu().detach().numpy()

            if np.random.random(1) < var or frame <= 5 or frame % 15 == 0:
                deta_pos_ = cal_distance(np.vstack([pos, pos]),
                                         np.vstack([gt[frame], gt[frame]]))
                if np.max(abs(deta_pos_)) < 0.1:
                    deta_pos = deta_pos_[0]

            if deta_pos[2] > 0.05 or deta_pos[2] < -0.05:
                deta_pos[2] = 0

            pos_ = move_crop(pos_, deta_pos, img_size, rate)
            img_crop_l_, img_crop_g_, out_flag = crop_image_actor_(
                np.array(img), pos_)
            # if out_flag:
            #     pos = gt[frame]
            #     continue
            imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107)
            imo_g_ = np.array(img_crop_g_).reshape(3, 107, 107)

            # img_l_ = np.array(img_l_).reshape(1, 127, 127, 3)
            gt_frame = gt[frame]
            r = _compute_iou(pos_, gt[frame])

            if r > 0.7:
                reward = 1
            elif r >= 0.5 and r <= 0.7:
                gt_pre = gt[frame - 1]
                r_pre = _compute_iou(pos, gt_pre)
                reward = max(0, r - r_pre)
            else:
                reward = -1

            trainer.ram.add(npBN(imo_crop_g), npBN(imo_g_), deta_pos, reward,
                            npBN(imo_crop_l), npBN(imo_l_))
            # if r == 0:
            #     break
            reward_all += reward
            pos = pos_
            if out_flag or r == 0:
                pos = gt[frame]
        trainer.optimize()
        reward_100 += reward_all
        gc.collect()
        if train_step % 100 == 0:
            td_error = trainer.show_critic_loss()

            print(train_step, reward_100, 'td_error', td_error)
            y = np.array(td_error.cpu().detach().numpy())
            message = 'train_step: %d, reward_100: %d, td_error: %f \n' % (
                train_step, reward_100, y)
            with open("../logs/train_td_error.txt", "a",
                      encoding='utf-8') as f:
                f.write(message)
            vis.line(X=np.array([train_step]),
                     Y=np.array([y]),
                     win=line_loss,
                     update='append')
            reward_100 = 0

        if train_step % 200 == 0:
            trainer.save_models(train_step)
        if train_step % 10000 == 0:
            var = var * 0.95