def get_mean_distance(indexSeq): print('processing seq : {:02d}'.format(indexSeq)) dirSave = 'scale' if not osp.exists(dirSave): os.mkdir(dirSave) file = open(osp.join(dirSave, 'scale_{:02d}.txt'.format(indexSeq)), 'w') # the location ranges from -1 to 1 and (x, y) for indexFrame in range(1, 7500 + 1): if indexFrame % 1000 == 0: print('Seq {:02d} has processed {} images'.format( indexSeq, indexFrame)) pathFix = os.path.join(dreyeve_dir, '{:02d}'.format(indexSeq), 'saliency_fix', '{:06d}.png'.format(indexFrame)) fixationMap = read_image(pathFix, channels_first=True, color=False) / 255 locs = np.where(fixationMap == np.max(fixationMap)) loc_center = np.mean(np.where(fixationMap == np.max(fixationMap)), axis=1) distance = abs(np.transpose(np.asarray(locs), (1, 0)) - loc_center) scale_h = np.sort(distance[:, 0])[int(np.floor( distance.shape[0] * 0.8))] / fixationMap.shape[0] scale_w = np.sort(distance[:, 1])[int(np.floor( distance.shape[0] * 0.8))] / fixationMap.shape[1] # mean_distance = mean_distance / [fixationMap.shape[0:2]] * 2 scale = np.max((scale_w, scale_h)) # print('mean_distance is {}'.format(mean_distance)) line = '{:02d} {:04d} {:.3f}\n'.format(indexSeq, indexFrame, scale) file.writelines(line) file.close()
def __init__(self, phase): # self.split_file = split_file self.root = tmp_dir self.phase = phase self.debug = False if self.phase == 'train': self.sequences = dreyeve_train_seq self.allowed_frames = train_frame_range self.allow_mirror = True elif self.phase == 'val': self.sequences = dreyeve_train_seq self.allowed_frames = val_frame_range self.allow_mirror = False elif self.phase == 'test': self.sequences = dreyeve_test_seq self.allowed_frames = test_frame_range self.allow_mirror = False # generate batch signatures self.data = self.make_dreye_dataset(self.sequences, self.allowed_frames) self.mean_imgae_256 = read_image(os.path.join( dreyeve_dir, 'dreyeve_mean_frame.png'), channels_first=True, resize_dim=(w, h)) self.load_mode = 1 pathSpeedsCouse = osp.join('data_prepare', 'speed_and_course.txt') self.dfSpeedsCourses = self.read_speeds_and_courses(pathSpeedsCouse) pathLoc = osp.join('data_prepare', 'locations.txt') self.dfLocations = self.read_locations(pathLoc) # focus of view scale pathDistance = osp.join('data_prepare', 'scales.txt') self.dfScale = self.read_scale(pathDistance)
def get_mean_loc(indexSeq, indexFrame): # the location ranges from -1 to 1 and (x, y) pathFix = os.path.join(dreyeve_dir, '{:02d}'.format(indexSeq), 'saliency_fix', '{:06d}.png'.format(indexFrame)) fixationMap = read_image(pathFix, channels_first=True, color=False) / 255 loc = np.mean(np.where(fixationMap == np.max(fixationMap)), axis=1) loc = loc / [fixationMap.shape[0:2]] * 2 - 1 # cv2.imread('temp.jpg',fixationMap) # pt_loc = (int(loc[0]), int(loc[1])) return loc
def load_frames(self, indexSequence, frameBegin, frameEnd): # init imgs imgs_h_w = np.zeros(shape=(3, frames_per_seq, h, w), dtype=np.float32) # get fixation maps pathFix = os.path.join(self.root, '{:02d}'.format(indexSequence), 'saliency_fix', '{:06d}.png'.format(frameEnd)) fixationMap = read_image( pathFix, channels_first=True, color=False, resize_dim=(h, w)) / 255 for indexFrame in range(frameBegin, frameEnd + 1): pathImg = os.path.join(self.root, '{:02d}'.format(indexSequence), 'frames-256', '{:06d}.jpg'.format(indexFrame)) singleImgResize = read_image(pathImg, channels_first=True, resize_dim=(h, w)) imgs_h_w[:, indexFrame - frameBegin, :, :] = singleImgResize / 255 return imgs_h_w, fixationMap
def get_mean_loc(indexSeq): print('processing seq : {:02d}'.format(indexSeq)) dirSave = 'locations' if not osp.exists(dirSave): os.mkdir(dirSave) file = open(osp.join(dirSave, 'loc_{:02d}.txt'.format(indexSeq)), 'w') # the location ranges from -1 to 1 and (x, y) for indexFrame in range(1, 7500 + 1): if indexFrame % 1000 == 0: print('Seq {:02d} has processed {} images'.format( indexSeq, indexFrame)) pathFix = os.path.join(dreyeve_dir, '{:02d}'.format(indexSeq), 'saliency_fix', '{:06d}.png'.format(indexFrame)) fixationMap = read_image(pathFix, channels_first=True, color=False) / 255 loc = np.mean(np.where(fixationMap == np.max(fixationMap)), axis=1) loc = loc / [fixationMap.shape[0:2]] * 2 - 1 line = '{:02d} {:04d} {:.3f} {:.3f}\n'.format(indexSeq, indexFrame, loc[0, 0], loc[0, 1]) file.writelines(line) file.close()
if best: filename = self.model_name + '_model_best.pth.tar' ckpt_path = os.path.join(self.ckpt_dir, filename) ckpt = torch.load(ckpt_path) # load variables from checkpoint self.start_epoch = ckpt['epoch'] self.best_valid_acc = ckpt['best_valid_acc'] self.model.load_state_dict(ckpt['model_state']) self.optimizer.load_state_dict(ckpt['optim_state']) if best: print("[*] Loaded {} checkpoint @ epoch {} " "with best valid acc of {:.3f}".format( filename, ckpt['epoch'], ckpt['best_valid_acc'])) else: print("[*] Loaded {} checkpoint @ epoch {}".format( filename, ckpt['epoch'])) if __name__ == '__main__': # test get_mean_loc print('test get_mean_loc') pathImg = '/home/lk/data/DREYEVE_DATA/01/frames/000001.jpg' pathFix = '/home/lk/data/DREYEVE_DATA/01/saliency_fix/000001.png' img = read_image(pathImg, channels_first=True, color=True) / 255 fix = read_image(pathFix, channels_first=True, color=False) / 255 # fix = torch.from_numpy(fix) np.mean(np.where(fix == np.max(fix)), axis=1)
def validate(self, epoch): """ Evaluate the model on the validation set. """ losses = AverageMeter() accs = AverageMeter() countTotal = 50 count = 0 is_blend = 1 save_dir = os.path.join('logs', '{:02d}'.format(epoch)) if not os.path.exists(save_dir): os.mkdir(save_dir) for i, (x, fixs, y, speeds, courses, scale_gt, indexSeq, frameEnd) in enumerate(self.valid_loader): y = y.squeeze().float() if count > countTotal: return losses.avg, accs.avg count = count + 1 if self.use_gpu: x, y, speeds, courses = x.cuda(), y.cuda(), speeds.cuda( ), courses.cuda() x, y, speeds, courses = Variable(x), Variable(y), Variable( speeds), Variable(courses) # duplicate 10 times x = x.repeat(self.M, 1, 1, 1, 1) # speeds = speeds.repeat(self.M, 1,1,) # initialize location vector and hidden state self.batch_size = x.shape[0] h_t, l_t = self.reset() # extract the glimpses log_pi = [] baselines = [] for t in range(self.num_glimpses - 1): # forward pass through model h_t, l_t, b_t, p = self.model(x, speeds, courses, l_t, h_t, t) # store baselines.append(b_t) log_pi.append(p) # last iteration h_t, l_t, b_t, l_t_final, p, scale = self.model(x, speeds, courses, l_t, h_t, self.num_glimpses - 1, last=True) log_pi.append(p) baselines.append(b_t) # convert list to tensors and reshape baselines = torch.stack(baselines).transpose(1, 0) log_pi = torch.stack(log_pi).transpose(1, 0) # average l_t_final = l_t_final.view(self.M, -1, l_t_final.shape[-1]) l_t_final = torch.mean(l_t_final, dim=0) if is_blend: for indexBlend in range(x.shape[0]): # img = x[indexBlend, :, -1, : , :].cpu().numpy() # img = np.transpose(img, (1,2,0))*255 # img = img[:, :, [2, 1, 0]] pathImg = os.path.join( dreyeve_dir, '{:02d}'.format(indexSeq[indexBlend]), 'frames', '{:06d}.jpg'.format(frameEnd[indexBlend])) img = read_image(pathImg, channels_first=False, color=True) # cv2.imwrite( 'temp.jpg', img) pathFix = os.path.join( dreyeve_dir, '{:02d}'.format(indexSeq[indexBlend]), 'saliency_fix', '{:06d}.png'.format(frameEnd[indexBlend])) map = read_image(pathFix, channels_first=False, color=False) # map = fixs[indexBlend, :,:].cpu().numpy() loc = l_t_final[indexBlend, :].cpu().detach().numpy() loc_gt = y[indexBlend].cpu().numpy() scale_blend = scale[indexBlend].cpu().detach().numpy() scale_gt_blend = scale_gt[indexBlend].cpu().detach().numpy( ) # blend = blend_map_with_focus_circle # loc= np.array([0,0]) # draw target blend = blend_map_with_focus_rectangle(img, map, loc, scale=scale_blend, color=(0, 0, 255)) #draw gt if not (np.isnan(loc_gt[0]) or np.isnan(loc_gt[1])): # loc_gt[0]=-0.9 # loc_gt[1]=0.2 blend = blend_map_with_focus_rectangle( blend, map, loc_gt, scale=scale_gt_blend, color=(0, 255, 0)) # blend = blend_map_with_focus_circle(img, map, loc_gt, color=(0, 255, 0)) print('scale is {:.3f} and scale_gt is {:.3f}'.format( float(scale_blend), float(scale_gt_blend))) cv2.imwrite( os.path.join(save_dir, '{:06d}.jpg'.format( frameEnd[indexBlend])), blend) baselines = baselines.contiguous().view(self.M, -1, baselines.shape[-1]) baselines = torch.mean(baselines, dim=0) log_pi = log_pi.contiguous().view(self.M, -1, log_pi.shape[-1]) log_pi = torch.mean(log_pi, dim=0) # # calculate reward # predicted = torch.max(log_probas, 1)[1] # R = (predicted.detach() == y).float() # R = R.unsqueeze(1).repeat(1, self.num_glimpses) dis = 0 R = torch.zeros(y.shape[0]) for index in range(y.shape[0]): # get the distance of two locations distance = torch.sqrt( torch.pow(l_t_final[index, 0] - y[index, 0], 2) + torch.pow(l_t_final[index, 1] - y[index, 1], 2)) dis = dis + distance # R[index] = distance < self.dis_R_thres R[index] = distance < self.dis_R_thres # R = locs R = R.unsqueeze(1).repeat(1, self.num_glimpses).to(self.device) # compute losses for differentiable modules loss_action = F.mse_loss(l_t_final, y) loss_baseline = F.mse_loss(baselines, R) # compute reinforce loss adjusted_reward = R - baselines.detach() loss_reinforce = torch.sum(-log_pi * adjusted_reward, dim=1) loss_reinforce = torch.mean(loss_reinforce, dim=0) # sum up into a hybrid loss loss = loss_action * 100 + loss_baseline + loss_reinforce # loss = loss_baseline + loss_reinforce # compute accuracy # compute accuracy correct = dis.float() # acc = 100 * (correct.sum() / len(y)) acc = dis / len(y) print('avg dist is {}'.format(acc)) # store losses.update(loss.data, x.size()[0]) accs.update(acc.data, x.size()[0]) # log to tensorboard if self.use_tensorboard: iteration = epoch * len(self.valid_loader) + i log_value('valid_loss', losses.avg, iteration) log_value('valid_acc', accs.avg, iteration) return losses.avg, accs.avg