Exemple #1
0
def create_sampled_dataset(original_dataset, rate):
    dataset = {}
    for enum_user, user in enumerate(original_dataset.keys()):
        dataset[user] = {}
        for enum_video, video in enumerate(original_dataset[user].keys()):
            print('creating sampled dataset', 'video', enum_video, '/',
                  len(original_dataset[user].keys()), 'user', enum_user, '/',
                  len(original_dataset.keys()))
            sample_orig = np.array([1, 0, 0])
            data_per_video = []
            for sample in original_dataset[user][video]:
                sample_yaw, sample_pitch = transform_the_degrees_in_range(
                    sample['yaw'], sample['pitch'])
                sample_new = eulerian_to_cartesian(sample_yaw, sample_pitch)
                quat_rot = rotationBetweenVectors(sample_orig, sample_new)
                # append the quaternion to the list
                data_per_video.append([
                    sample['sec'], quat_rot[0], quat_rot[1], quat_rot[2],
                    quat_rot[3]
                ])
                # update the values of time and sample
            # interpolate the quaternions to have a rate of 0.2 secs
            data_per_video = np.array(data_per_video)
            # In this case the time starts counting at random parts of the video
            dataset[user][video] = interpolate_quaternions(
                data_per_video[:, 0],
                data_per_video[:, 1:],
                rate=rate,
                time_orig_at_zero=False)
    return dataset
Exemple #2
0
def transform_angles_for_model(trace):
    new_trace = []
    for sample in trace:
        sample_yaw, sample_pitch = transform_the_degrees_in_range(sample[0], sample[1])
        sample_new = eulerian_to_cartesian(sample_yaw, sample_pitch)
        new_trace.append(sample_new)
    return np.array(new_trace)
Exemple #3
0
def create_sampled_dataset(original_dataset, rate):
    dataset = {}
    for user in original_dataset.keys():
        dataset[user] = {}
        for video in original_dataset[user].keys():
            print('creating sampled dataset', user, video)
            sample_orig = np.array([1, 0, 0])
            data_per_video = []
            for sample in original_dataset[user][video]:
                sample_yaw, sample_pitch = transform_the_degrees_in_range(
                    sample['yaw'], sample['pitch'])
                sample_new = eulerian_to_cartesian(sample_yaw, sample_pitch)
                quat_rot = rotationBetweenVectors(sample_orig, sample_new)
                # append the quaternion to the list
                data_per_video.append([
                    sample['sec'], quat_rot[0], quat_rot[1], quat_rot[2],
                    quat_rot[3]
                ])
                # update the values of time and sample
            # interpolate the quaternions to have a rate of 0.2 secs
            data_per_video = np.array(data_per_video)
            dataset[user][video] = interpolate_quaternions(data_per_video[:,
                                                                          0],
                                                           data_per_video[:,
                                                                          1:],
                                                           rate=rate)
    return dataset
Exemple #4
0
def from_position_to_tile_probability_cartesian(pos):
    yaw_grid, pitch_grid = np.meshgrid(np.linspace(0, 1, NUM_TILES_WIDTH, endpoint=False), np.linspace(0, 1, NUM_TILES_HEIGHT, endpoint=False))
    yaw_grid += 1.0 / (2.0 * NUM_TILES_WIDTH)
    pitch_grid += 1.0 / (2.0 * NUM_TILES_HEIGHT)
    yaw_grid = yaw_grid * 2*np.pi
    pitch_grid = pitch_grid * np.pi
    x_grid, y_grid, z_grid = eulerian_to_cartesian(theta=yaw_grid, phi=pitch_grid)
    great_circle_distance = np.arccos(np.maximum(np.minimum(x_grid * pos[0] + y_grid * pos[1] + z_grid * pos[2], 1.0), -1.0))
    binary_orth = np.where(great_circle_distance < (((FOV_SIZE/2.0)/180.0)*np.pi), 1, 0)
    return binary_orth
Exemple #5
0
 def from_position_to_tile_probability_cartesian(pos):
     yaw_grid, pitch_grid = np.meshgrid(np.linspace(0, 1, NUM_TILES_WIDTH_TRUE_SAL, endpoint=False),
                                        np.linspace(0, 1, NUM_TILES_HEIGHT_TRUE_SAL, endpoint=False))
     yaw_grid += 1.0 / (2.0 * NUM_TILES_WIDTH_TRUE_SAL)
     pitch_grid += 1.0 / (2.0 * NUM_TILES_HEIGHT_TRUE_SAL)
     yaw_grid = yaw_grid * 2 * np.pi
     pitch_grid = pitch_grid * np.pi
     x_grid, y_grid, z_grid = eulerian_to_cartesian(theta=yaw_grid, phi=pitch_grid)
     great_circle_distance = np.arccos(np.maximum(np.minimum(x_grid * pos[0] + y_grid * pos[1] + z_grid * pos[2], 1.0), -1.0))
     gaussian_orth = np.exp((-1.0 / (2.0 * np.square(0.1))) * np.square(great_circle_distance))
     return gaussian_orth
Exemple #6
0
def get_most_salient_content_based_points_per_video(videos, saliency_folder, k=1):
    most_salient_points_per_video = {}
    for video in videos:
        saliencies_for_video = load_saliency(saliency_folder, video, RUN_IN_SERVER=False)
        most_salient_points_in_video = []
        for id, sal in enumerate(saliencies_for_video):
            coordinates = peak_local_max(sal, exclude_border=False, num_peaks=k)
            coordinates_normalized = coordinates / np.array([NUM_TILES_HEIGHT, NUM_TILES_WIDTH])
            coordinates_radians = coordinates_normalized * np.array([np.pi, 2.0*np.pi])
            cartesian_pts = np.array([eulerian_to_cartesian(sample[1], sample[0]) for sample in coordinates_radians])
            most_salient_points_in_video.append(cartesian_pts)
        most_salient_points_per_video[video] = np.array(most_salient_points_in_video)
    return most_salient_points_per_video
Exemple #7
0
def transform_dataset_in_cartesian(original_dataset):
    dataset = {}
    for enum_user, user in enumerate(original_dataset.keys()):
        dataset[user] = {}
        for enum_video, video in enumerate(original_dataset[user].keys()):
            print('creating cartesian dataset', 'user', enum_user, '/', len(original_dataset.keys()), 'video', enum_video, '/', len(original_dataset[user].keys()))
            data_per_video = []
            for sample in original_dataset[user][video]:
                sample_yaw, sample_pitch = transform_the_degrees_in_range(sample['yaw'], sample['pitch'])
                sample_new = eulerian_to_cartesian(sample_yaw, sample_pitch)
                data_per_video.append(sample_new)
            dataset[user][video] = np.array(data_per_video)
    return dataset
Exemple #8
0
def create_and_store_tile_probability_replica(original_dataset):
    if not os.path.exists(OUTPUT_TILE_PROB_FOLDER):
        os.makedirs(OUTPUT_TILE_PROB_FOLDER)
    for enum_user, user in enumerate(original_dataset.keys()):
        for enum_video, video in enumerate(original_dataset[user].keys()):
            print('creating tiles for', 'user', enum_user, '/', len(original_dataset.keys()), 'video', enum_video, '/', len(original_dataset[user].keys()))
            tile_prob_for_trace = []
            for sample_id, sample in enumerate(original_dataset[user][video]):
                sample_yaw, sample_pitch = transform_the_degrees_in_range(sample['yaw'], sample['pitch'])
                sample_new = eulerian_to_cartesian(sample_yaw, sample_pitch)
                gen_tile_prob_cartesian = from_position_to_tile_probability_cartesian(sample_new)
                tile_prob_for_trace.append(gen_tile_prob_cartesian)
            filename = '%s_%s_created_tile.npy' % (video, user)
            file_path = os.path.join(OUTPUT_TILE_PROB_FOLDER, filename)
            np.save(file_path, np.array(tile_prob_for_trace))
Exemple #9
0
def get_most_salient_points_per_video(videos, true_saliency_folder, k=1):
    most_salient_points_per_video = {}
    for video in videos:
        saliencies_for_video_file = os.path.join(true_saliency_folder,
                                                 video + '.npy')
        saliencies_for_video = np.load(saliencies_for_video_file)
        most_salient_points_in_video = []
        for id, sal in enumerate(saliencies_for_video):
            coordinates = peak_local_max(sal,
                                         exclude_border=False,
                                         num_peaks=k)
            coordinates_normalized = coordinates / np.array(
                [NUM_TILES_HEIGHT_TRUE_SAL, NUM_TILES_WIDTH_TRUE_SAL])
            coordinates_radians = coordinates_normalized * np.array(
                [np.pi, 2.0 * np.pi])
            cartesian_pts = np.array([
                eulerian_to_cartesian(sample[1], sample[0])
                for sample in coordinates_radians
            ])
            most_salient_points_in_video.append(cartesian_pts)
        most_salient_points_per_video[video] = np.array(
            most_salient_points_in_video)
    return most_salient_points_per_video
Exemple #10
0
def get_most_salient_points_per_video():
    from skimage.feature import peak_local_max
    most_salient_points_per_video = {}
    for video in VIDEOS:
        saliencies_for_video_file = os.path.join(OUTPUT_TRUE_SALIENCY_FOLDER,
                                                 video + '.npy')
        saliencies_for_video = np.load(saliencies_for_video_file)
        most_salient_points_in_video = []
        for id, sal in enumerate(saliencies_for_video):
            coordinates = peak_local_max(sal,
                                         exclude_border=False,
                                         num_peaks=5)
            coordinates_normalized = coordinates / np.array(
                [NUM_TILES_HEIGHT_TRUE_SAL, NUM_TILES_WIDTH_TRUE_SAL])
            coordinates_radians = coordinates_normalized * np.array(
                [np.pi, 2.0 * np.pi])
            cartesian_pts = np.array([
                eulerian_to_cartesian(sample[1], sample[0])
                for sample in coordinates_radians
            ])
            most_salient_points_in_video.append(cartesian_pts)
        most_salient_points_per_video[video] = np.array(
            most_salient_points_in_video)
    return most_salient_points_per_video
Exemple #11
0
def compute_error(model_name):
    if model_name == 'CVPR18':
        model = create_CVPR18_model(M_WINDOW, H_WINDOW, NUM_TILES_HEIGHT,
                                    NUM_TILES_WIDTH)
        model.load_weights(
            os.path.join(
                ROOT_FOLDER, 'CVPR18',
                'Models_EncDec_3DCoords_ContSal_init_5_in_5_out_13_end_13',
                'weights.hdf5'))
    if model_name == 'TRACK':
        model = create_TRACK_model(M_WINDOW, H_WINDOW, NUM_TILES_HEIGHT,
                                   NUM_TILES_WIDTH)

        weights_file = os.path.join(
            ROOT_FOLDER, 'TRACK',
            'Models_EncDec_3DCoords_ContSal_init_5_in_5_out_13_end_13',
            'weights.hdf5')
        if os.path.isfile(weights_file):
            model.load_weights(weights_file)
        else:
            raise Exception(
                'Sorry, the folder ./Nguyen_MM_18/TRACK/ doesn\'t exist or is incomplete.\nYou can:\n* Create it using the command:\n\t\"python training_procedure.py -train -gpu_id 0 -dataset_name Nguyen_MM_18 -model_name TRACK -m_window 5 -h_window 13 -provided_videos\" or \n* Download the file from:\n\thttps://unice-my.sharepoint.com/:u:/g/personal/miguel_romero-rondon_unice_fr/EYNvRsxKh1FCiJrhudfBMUsBhp1oB5m3fxTYa8kkZHOcSA?e=eC2Plz'
            )
    if model_name == 'pos_only':
        model = create_pos_only_model(M_WINDOW, H_WINDOW)

        weights_file = os.path.join(
            ROOT_FOLDER, 'pos_only',
            'Models_EncDec_eulerian_init_5_in_5_out_13_end_13', 'weights.hdf5')
        if os.path.isfile(weights_file):
            model.load_weights(weights_file)
        else:
            raise Exception(
                'Sorry, the folder ./Nguyen_MM_18/pos_only/ doesn\'t exist or is incomplete.\nYou can:\n* Create it using the command:\n\t\"python training_procedure.py -train -gpu_id 0 -dataset_name Nguyen_MM_18 -model_name pos_only -m_window 5 -h_window 13 -provided_videos\" or \n* Download the file from:\n\thttps://unice-my.sharepoint.com/:u:/g/personal/miguel_romero-rondon_unice_fr/EWO4VEQP2GtMp6NEZBMZA-QBpuXFo6WG2jQb-muvPc_ejw?e=iaPbYp'
            )

    # From the paper MM18:
    # We use the input feature from the past one second to predict the head orientation in the future.
    one_second_in_timesteps = int(np.ceil(1.0 / ORIGINAL_SAMPLING_RATE))
    one_timestep_models = MODEL_SAMPLING_RATE / ORIGINAL_SAMPLING_RATE
    # From the paper MM18:
    # "The default prediction window k is set to be 0.5 seconds.
    # To explore the effect of prediction window k on the accuracy of the proposed model and other three benchmarks, we vary k from 0.5 seconds to 2.5 seconds."
    prediction_horizons = [0.5, 1, 1.5, 2, 2.5]
    results_for_pred_horizon = {}
    for pred_hor in prediction_horizons:
        results_for_pred_horizon[pred_hor] = []
        prediction_horizon_in_timesteps = int(
            np.ceil(pred_hor / ORIGINAL_SAMPLING_RATE))
        for enum_video, video in enumerate(VIDEOS_TEST):
            saliency = salient_ds_dict['360net'][video]['salient']
            # preprocess saliency
            saliency = np.array([
                cv2.resize(sal, (NUM_TILES_WIDTH, NUM_TILES_HEIGHT))
                for sal in saliency
            ])
            saliency = np.array([(sal * 1.0 - sal.min()) for sal in saliency])
            saliency = np.array([(sal / sal.max()) * 255 for sal in saliency])
            saliency = np.array([post_filter(sal) for sal in saliency])
            saliency = np.array([
                mmscaler.fit_transform(salmap.ravel().reshape(-1, 1)).reshape(
                    salmap.shape) for salmap in saliency
            ])
            saliency = np.expand_dims(saliency, -1)
            for user in USERS:
                print('computing', args.model_name, 'baseline error for video',
                      enum_video, '/', len(VIDEOS_TEST), 'user', user, '/',
                      len(USERS), 'prediction_horizon', pred_hor)
                trace = salient_ds_dict['360net'][video]['headpos'][user]

                trace_for_model = np.array(
                    [vector_to_ang(point) for point in trace])
                trace_for_model = np.array([
                    transform_the_degrees_in_range(sample[0], sample[1])
                    for sample in trace_for_model
                ])
                trace_for_model = np.array([
                    eulerian_to_cartesian(sample[0], sample[1])
                    for sample in trace_for_model
                ])
                for t in range(one_second_in_timesteps,
                               len(trace) - prediction_horizon_in_timesteps):
                    if model_name == 'no_motion':
                        model_pred = create_head_map(trace[t])
                    if model_name == 'TRACK':
                        pos_input = trace_for_model[t -
                                                    one_second_in_timesteps:t]
                        saliency_input = saliency[
                            t -
                            int(np.ceil((M_WINDOW - 1) *
                                        one_timestep_models)):t +
                            int(np.ceil(one_timestep_models * (H_WINDOW + 1)))]
                        model_pred = get_TRACK_prediction(
                            model, pos_input, saliency_input, pred_hor)
                    if model_name == 'CVPR18':
                        pos_input = trace_for_model[t -
                                                    one_second_in_timesteps:t]
                        saliency_input = saliency[
                            t -
                            int(np.ceil((M_WINDOW - 1) *
                                        one_timestep_models)):t +
                            int(np.ceil(one_timestep_models * (H_WINDOW + 1)))]
                        model_pred = get_CVPR18_prediction(
                            model, pos_input, saliency_input, pred_hor)
                    if model_name == 'pos_only':
                        pos_input = trace_for_model[t -
                                                    one_second_in_timesteps:t]
                        model_pred = get_pos_only_prediction(
                            model, pos_input, pred_hor)
                    groundtruth = create_head_map(
                        trace[t + prediction_horizon_in_timesteps])
                    results_for_pred_horizon[pred_hor].append(
                        compute_accuracy_metric(model_pred, groundtruth))
        print(pred_hor, np.mean(results_for_pred_horizon[pred_hor]))
    return results_for_pred_horizon
Exemple #12
0
def transform_normalized_eulerian_to_cartesian(position):
    position = position * np.array([2 * np.pi, np.pi])
    eulerian_samples = eulerian_to_cartesian(position[0], position[1])
    return np.array(eulerian_samples)
Exemple #13
0
def transform_normalized_eulerian_to_cartesian(positions):
    positions = positions * np.array([2*np.pi, np.pi])
    eulerian_samples = [eulerian_to_cartesian(pos[0], pos[1]) for pos in positions]
    return np.array(eulerian_samples)
Exemple #14
0
            model_prediction = np.array([get_max_sal_pos(sal, dataset_name) for sal in decoder_sal_inputs_for_sample[0, :, :, 0]])
        elif model_name == 'true_saliency':
            model_prediction = predict_most_salient_point(decoder_true_sal_inputs_for_sample, decoder_pos_inputs_for_sample[0, 0])
        elif model_name == 'content_based_saliency':
            model_prediction = predict_most_salient_cb_point(decoder_true_sal_inputs_for_sample, decoder_pos_inputs_for_sample[0, 0])
        elif model_name == 'CVPR18_orig':
            initial_pos_inputs = transform_batches_cartesian_to_normalized_eulerian(encoder_pos_inputs_for_sample)
            model_pred = auto_regressive_prediction(model, initial_pos_inputs, decoder_sal_inputs_for_sample, M_WINDOW, H_WINDOW)
            model_prediction = transform_normalized_eulerian_to_cartesian(model_pred)
        elif model_name == 'MM18':
            model_prediction = []
            groundtruth = []
            for _h_window in range(H_WINDOW):
                model_pred = mm18_models[_h_window].predict(encoder_sal_inputs_for_sample)[0]
                model_pred_norm_eul = MM18_model.model_pred_in_normalized_eulerian(model_pred)
                model_prediction.append(eulerian_to_cartesian(model_pred_norm_eul[0], model_pred_norm_eul[1]))

                groundtruth_eulerian = MM18_model.model_pred_in_normalized_eulerian(all_headmaps[video][user][x_i+_h_window+1])
                groundtruth.append(eulerian_to_cartesian(groundtruth_eulerian[0], groundtruth_eulerian[1]))

        for t in range(len(groundtruth)):
            if t not in errors_per_video[video].keys():
                errors_per_video[video][t] = []
            errors_per_video[video][t].append(metric(groundtruth[t], model_prediction[t]))
            if t not in errors_per_timestep.keys():
                errors_per_timestep[t] = []
            errors_per_timestep[t].append(metric(groundtruth[t], model_prediction[t]))

    for video_name in videos_test:
        for t in range(H_WINDOW):
            print(video_name, t, np.mean(errors_per_video[video_name][t]), end=';')