Beispiel #1
0
def get_mean_distance(indexSeq):
    print('processing seq : {:02d}'.format(indexSeq))
    dirSave = 'scale'
    if not osp.exists(dirSave):
        os.mkdir(dirSave)
    file = open(osp.join(dirSave, 'scale_{:02d}.txt'.format(indexSeq)), 'w')
    # the location ranges from -1 to 1 and (x, y)
    for indexFrame in range(1, 7500 + 1):
        if indexFrame % 1000 == 0:
            print('Seq {:02d} has processed {} images'.format(
                indexSeq, indexFrame))
        pathFix = os.path.join(dreyeve_dir, '{:02d}'.format(indexSeq),
                               'saliency_fix', '{:06d}.png'.format(indexFrame))
        fixationMap = read_image(pathFix, channels_first=True,
                                 color=False) / 255
        locs = np.where(fixationMap == np.max(fixationMap))
        loc_center = np.mean(np.where(fixationMap == np.max(fixationMap)),
                             axis=1)
        distance = abs(np.transpose(np.asarray(locs), (1, 0)) - loc_center)
        scale_h = np.sort(distance[:, 0])[int(np.floor(
            distance.shape[0] * 0.8))] / fixationMap.shape[0]
        scale_w = np.sort(distance[:, 1])[int(np.floor(
            distance.shape[0] * 0.8))] / fixationMap.shape[1]

        # mean_distance = mean_distance / [fixationMap.shape[0:2]] * 2
        scale = np.max((scale_w, scale_h))
        # print('mean_distance is {}'.format(mean_distance))

        line = '{:02d} {:04d} {:.3f}\n'.format(indexSeq, indexFrame, scale)
        file.writelines(line)

    file.close()
    def __init__(self, phase):
        # self.split_file = split_file
        self.root = tmp_dir
        self.phase = phase
        self.debug = False

        if self.phase == 'train':
            self.sequences = dreyeve_train_seq
            self.allowed_frames = train_frame_range
            self.allow_mirror = True
        elif self.phase == 'val':
            self.sequences = dreyeve_train_seq
            self.allowed_frames = val_frame_range
            self.allow_mirror = False
        elif self.phase == 'test':
            self.sequences = dreyeve_test_seq
            self.allowed_frames = test_frame_range
            self.allow_mirror = False

        # generate batch signatures
        self.data = self.make_dreye_dataset(self.sequences,
                                            self.allowed_frames)
        self.mean_imgae_256 = read_image(os.path.join(
            dreyeve_dir, 'dreyeve_mean_frame.png'),
                                         channels_first=True,
                                         resize_dim=(w, h))
        self.load_mode = 1
        pathSpeedsCouse = osp.join('data_prepare', 'speed_and_course.txt')
        self.dfSpeedsCourses = self.read_speeds_and_courses(pathSpeedsCouse)
        pathLoc = osp.join('data_prepare', 'locations.txt')
        self.dfLocations = self.read_locations(pathLoc)

        # focus of view scale
        pathDistance = osp.join('data_prepare', 'scales.txt')
        self.dfScale = self.read_scale(pathDistance)
Beispiel #3
0
def get_mean_loc(indexSeq, indexFrame):
    # the location ranges from -1 to 1 and (x, y)
    pathFix = os.path.join(dreyeve_dir, '{:02d}'.format(indexSeq), 'saliency_fix', '{:06d}.png'.format(indexFrame))
    fixationMap = read_image(pathFix, channels_first=True, color=False) / 255
    loc = np.mean(np.where(fixationMap == np.max(fixationMap)), axis=1)
    loc = loc / [fixationMap.shape[0:2]] * 2 - 1
    # cv2.imread('temp.jpg',fixationMap)

    # pt_loc = (int(loc[0]), int(loc[1]))
    return loc
    def load_frames(self, indexSequence, frameBegin, frameEnd):
        # init imgs
        imgs_h_w = np.zeros(shape=(3, frames_per_seq, h, w), dtype=np.float32)

        # get fixation maps
        pathFix = os.path.join(self.root, '{:02d}'.format(indexSequence),
                               'saliency_fix', '{:06d}.png'.format(frameEnd))
        fixationMap = read_image(
            pathFix, channels_first=True, color=False, resize_dim=(h, w)) / 255

        for indexFrame in range(frameBegin, frameEnd + 1):
            pathImg = os.path.join(self.root, '{:02d}'.format(indexSequence),
                                   'frames-256',
                                   '{:06d}.jpg'.format(indexFrame))
            singleImgResize = read_image(pathImg,
                                         channels_first=True,
                                         resize_dim=(h, w))

            imgs_h_w[:, indexFrame - frameBegin, :, :] = singleImgResize / 255

        return imgs_h_w, fixationMap
Beispiel #5
0
def get_mean_loc(indexSeq):
    print('processing seq : {:02d}'.format(indexSeq))
    dirSave = 'locations'
    if not osp.exists(dirSave):
        os.mkdir(dirSave)
    file = open(osp.join(dirSave, 'loc_{:02d}.txt'.format(indexSeq)), 'w')
    # the location ranges from -1 to 1 and (x, y)
    for indexFrame in range(1, 7500 + 1):
        if indexFrame % 1000 == 0:
            print('Seq {:02d} has processed {} images'.format(
                indexSeq, indexFrame))
        pathFix = os.path.join(dreyeve_dir, '{:02d}'.format(indexSeq),
                               'saliency_fix', '{:06d}.png'.format(indexFrame))
        fixationMap = read_image(pathFix, channels_first=True,
                                 color=False) / 255
        loc = np.mean(np.where(fixationMap == np.max(fixationMap)), axis=1)
        loc = loc / [fixationMap.shape[0:2]] * 2 - 1

        line = '{:02d} {:04d} {:.3f} {:.3f}\n'.format(indexSeq, indexFrame,
                                                      loc[0, 0], loc[0, 1])
        file.writelines(line)

    file.close()
Beispiel #6
0
        if best:
            filename = self.model_name + '_model_best.pth.tar'
        ckpt_path = os.path.join(self.ckpt_dir, filename)
        ckpt = torch.load(ckpt_path)

        # load variables from checkpoint
        self.start_epoch = ckpt['epoch']
        self.best_valid_acc = ckpt['best_valid_acc']
        self.model.load_state_dict(ckpt['model_state'])
        self.optimizer.load_state_dict(ckpt['optim_state'])

        if best:
            print("[*] Loaded {} checkpoint @ epoch {} "
                  "with best valid acc of {:.3f}".format(
                      filename, ckpt['epoch'], ckpt['best_valid_acc']))
        else:
            print("[*] Loaded {} checkpoint @ epoch {}".format(
                filename, ckpt['epoch']))


if __name__ == '__main__':
    # test get_mean_loc
    print('test get_mean_loc')
    pathImg = '/home/lk/data/DREYEVE_DATA/01/frames/000001.jpg'
    pathFix = '/home/lk/data/DREYEVE_DATA/01/saliency_fix/000001.png'

    img = read_image(pathImg, channels_first=True, color=True) / 255
    fix = read_image(pathFix, channels_first=True, color=False) / 255
    # fix = torch.from_numpy(fix)
    np.mean(np.where(fix == np.max(fix)), axis=1)
Beispiel #7
0
    def validate(self, epoch):
        """
        Evaluate the model on the validation set.
        """
        losses = AverageMeter()
        accs = AverageMeter()
        countTotal = 50

        count = 0
        is_blend = 1
        save_dir = os.path.join('logs', '{:02d}'.format(epoch))
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)

        for i, (x, fixs, y, speeds, courses, scale_gt, indexSeq,
                frameEnd) in enumerate(self.valid_loader):
            y = y.squeeze().float()

            if count > countTotal:
                return losses.avg, accs.avg
            count = count + 1

            if self.use_gpu:
                x, y, speeds, courses = x.cuda(), y.cuda(), speeds.cuda(
                ), courses.cuda()
            x, y, speeds, courses = Variable(x), Variable(y), Variable(
                speeds), Variable(courses)

            # duplicate 10 times
            x = x.repeat(self.M, 1, 1, 1, 1)
            # speeds = speeds.repeat(self.M, 1,1,)

            # initialize location vector and hidden state
            self.batch_size = x.shape[0]
            h_t, l_t = self.reset()

            # extract the glimpses
            log_pi = []
            baselines = []
            for t in range(self.num_glimpses - 1):
                # forward pass through model
                h_t, l_t, b_t, p = self.model(x, speeds, courses, l_t, h_t, t)

                # store
                baselines.append(b_t)
                log_pi.append(p)

            # last iteration
            h_t, l_t, b_t, l_t_final, p, scale = self.model(x,
                                                            speeds,
                                                            courses,
                                                            l_t,
                                                            h_t,
                                                            self.num_glimpses -
                                                            1,
                                                            last=True)
            log_pi.append(p)
            baselines.append(b_t)

            # convert list to tensors and reshape
            baselines = torch.stack(baselines).transpose(1, 0)
            log_pi = torch.stack(log_pi).transpose(1, 0)

            # average
            l_t_final = l_t_final.view(self.M, -1, l_t_final.shape[-1])
            l_t_final = torch.mean(l_t_final, dim=0)

            if is_blend:
                for indexBlend in range(x.shape[0]):
                    # img = x[indexBlend, :, -1, : , :].cpu().numpy()
                    # img = np.transpose(img, (1,2,0))*255
                    # img = img[:, :, [2, 1, 0]]
                    pathImg = os.path.join(
                        dreyeve_dir, '{:02d}'.format(indexSeq[indexBlend]),
                        'frames', '{:06d}.jpg'.format(frameEnd[indexBlend]))
                    img = read_image(pathImg, channels_first=False, color=True)
                    # cv2.imwrite( 'temp.jpg', img)
                    pathFix = os.path.join(
                        dreyeve_dir, '{:02d}'.format(indexSeq[indexBlend]),
                        'saliency_fix',
                        '{:06d}.png'.format(frameEnd[indexBlend]))
                    map = read_image(pathFix,
                                     channels_first=False,
                                     color=False)
                    # map = fixs[indexBlend, :,:].cpu().numpy()
                    loc = l_t_final[indexBlend, :].cpu().detach().numpy()
                    loc_gt = y[indexBlend].cpu().numpy()
                    scale_blend = scale[indexBlend].cpu().detach().numpy()
                    scale_gt_blend = scale_gt[indexBlend].cpu().detach().numpy(
                    )
                    # blend = blend_map_with_focus_circle
                    # loc= np.array([0,0])

                    # draw target
                    blend = blend_map_with_focus_rectangle(img,
                                                           map,
                                                           loc,
                                                           scale=scale_blend,
                                                           color=(0, 0, 255))
                    #draw gt
                    if not (np.isnan(loc_gt[0]) or np.isnan(loc_gt[1])):
                        # loc_gt[0]=-0.9
                        # loc_gt[1]=0.2
                        blend = blend_map_with_focus_rectangle(
                            blend,
                            map,
                            loc_gt,
                            scale=scale_gt_blend,
                            color=(0, 255, 0))
                        # blend = blend_map_with_focus_circle(img, map, loc_gt, color=(0, 255, 0))

                    print('scale is {:.3f} and scale_gt is {:.3f}'.format(
                        float(scale_blend), float(scale_gt_blend)))
                    cv2.imwrite(
                        os.path.join(save_dir, '{:06d}.jpg'.format(
                            frameEnd[indexBlend])), blend)

            baselines = baselines.contiguous().view(self.M, -1,
                                                    baselines.shape[-1])
            baselines = torch.mean(baselines, dim=0)

            log_pi = log_pi.contiguous().view(self.M, -1, log_pi.shape[-1])
            log_pi = torch.mean(log_pi, dim=0)

            # # calculate reward
            # predicted = torch.max(log_probas, 1)[1]
            # R = (predicted.detach() == y).float()
            # R = R.unsqueeze(1).repeat(1, self.num_glimpses)
            dis = 0
            R = torch.zeros(y.shape[0])
            for index in range(y.shape[0]):
                # get the distance of two locations
                distance = torch.sqrt(
                    torch.pow(l_t_final[index, 0] - y[index, 0], 2) +
                    torch.pow(l_t_final[index, 1] - y[index, 1], 2))
                dis = dis + distance
                # R[index] = distance < self.dis_R_thres
                R[index] = distance < self.dis_R_thres
            # R = locs
            R = R.unsqueeze(1).repeat(1, self.num_glimpses).to(self.device)

            # compute losses for differentiable modules
            loss_action = F.mse_loss(l_t_final, y)
            loss_baseline = F.mse_loss(baselines, R)

            # compute reinforce loss
            adjusted_reward = R - baselines.detach()
            loss_reinforce = torch.sum(-log_pi * adjusted_reward, dim=1)
            loss_reinforce = torch.mean(loss_reinforce, dim=0)

            # sum up into a hybrid loss
            loss = loss_action * 100 + loss_baseline + loss_reinforce
            # loss =  loss_baseline + loss_reinforce

            # compute accuracy
            # compute accuracy
            correct = dis.float()
            # acc = 100 * (correct.sum() / len(y))
            acc = dis / len(y)

            print('avg dist is {}'.format(acc))

            # store
            losses.update(loss.data, x.size()[0])
            accs.update(acc.data, x.size()[0])

            # log to tensorboard
            if self.use_tensorboard:
                iteration = epoch * len(self.valid_loader) + i
                log_value('valid_loss', losses.avg, iteration)
                log_value('valid_acc', accs.avg, iteration)

        return losses.avg, accs.avg