def create_sampled_dataset(original_dataset, rate): dataset = {} for user in original_dataset.keys(): dataset[user] = {} for video in original_dataset[user].keys(): print('creating sampled dataset', user, video) sample_orig = np.array([1, 0, 0]) data_per_video = [] for sample in original_dataset[user][video]: sample_yaw, sample_pitch = transform_the_degrees_in_range( sample['yaw'], sample['pitch']) sample_new = eulerian_to_cartesian(sample_yaw, sample_pitch) quat_rot = rotationBetweenVectors(sample_orig, sample_new) # append the quaternion to the list data_per_video.append([ sample['sec'], quat_rot[0], quat_rot[1], quat_rot[2], quat_rot[3] ]) # update the values of time and sample # interpolate the quaternions to have a rate of 0.2 secs data_per_video = np.array(data_per_video) dataset[user][video] = interpolate_quaternions(data_per_video[:, 0], data_per_video[:, 1:], rate=rate) return dataset
def create_sampled_dataset(original_dataset, rate): dataset = {} for enum_user, user in enumerate(original_dataset.keys()): dataset[user] = {} for enum_video, video in enumerate(original_dataset[user].keys()): print('creating sampled dataset', 'video', enum_video, '/', len(original_dataset[user].keys()), 'user', enum_user, '/', len(original_dataset.keys())) sample_orig = np.array([1, 0, 0]) data_per_video = [] for sample in original_dataset[user][video]: sample_yaw, sample_pitch = transform_the_degrees_in_range( sample['yaw'], sample['pitch']) sample_new = eulerian_to_cartesian(sample_yaw, sample_pitch) quat_rot = rotationBetweenVectors(sample_orig, sample_new) # append the quaternion to the list data_per_video.append([ sample['sec'], quat_rot[0], quat_rot[1], quat_rot[2], quat_rot[3] ]) # update the values of time and sample # interpolate the quaternions to have a rate of 0.2 secs data_per_video = np.array(data_per_video) # In this case the time starts counting at random parts of the video dataset[user][video] = interpolate_quaternions( data_per_video[:, 0], data_per_video[:, 1:], rate=rate, time_orig_at_zero=False) return dataset
def compute_pretrained_model_error(dataset, videos_list, model_name, history_window, model_weights_path): if model_name == 'TRACK': model = create_TRACK_model(history_window, TRAINED_PREDICTION_HORIZON, NUM_TILES_HEIGHT, NUM_TILES_WIDTH) elif model_name == 'CVPR18': model = create_CVPR18_model(history_window, TRAINED_PREDICTION_HORIZON, NUM_TILES_HEIGHT, NUM_TILES_WIDTH) elif model_name == 'pos_only': model = create_pos_only_model(history_window, TRAINED_PREDICTION_HORIZON) if os.path.isfile(model_weights_path): model.load_weights(model_weights_path) else: command = 'python training_procedure.py -train -gpu_id 0 -dataset_name Xu_PAMI_18 -model_name %s -m_window 5 -h_window 5 -exp_folder sampled_by_frame_dataset -provided_videos' % model_name raise Exception( 'Sorry, the file '+model_weights_path+' doesn\'t exist.\nYou can:\n* Create it using the command:\n\t\"'+command+'\" or \n* Download the file from:\n\thttps://unice-my.sharepoint.com/:f:/g/personal/miguel_romero-rondon_unice_fr/EvQmshggLahKnBjIehzAbY0Bd-JDlzzFPYw9_R8IrGjQPA?e=Yk7f3c') saliency_folder = os.path.join(ROOT_FOLDER, 'extract_saliency/saliency') if model_name not in ['pos_only']: all_saliencies = {} for video in videos_list: if os.path.isdir(saliency_folder): all_saliencies[video] = load_saliency(saliency_folder, video) else: raise Exception('Sorry, the folder ./Xu_PAMI_18/extract_saliency doesn\'t exist or is incomplete.\nYou can:\n* Create it using the command:\n\t\"./Xu_PAMI_18/dataset/creation_of_scaled_images.sh\n\tpython ./Extract_Saliency/panosalnet.py -dataset_name PAMI_18\" or \n* Download the folder from:https://unice-my.sharepoint.com/:f:/g/personal/miguel_romero-rondon_unice_fr/Eir98fXEHKRBq9j-bgKGNTYBNN-_FQkvisJ1j9kOeVrB-Q?e=50lCOb\n\t') mo_calculator = MeanOverlap(3840, 1920, 65.5 / 2, 3.0 / 4.0) error_per_video = {} for user in dataset.keys(): for video in VIDEOS_TEST: time_stamps_in_saliency = np.arange(0.0, len(all_saliencies[video]) * 0.2, 0.2) print('computing error for user', user, 'and video', video) angles_per_video = recover_original_angles_from_quaternions_trace(dataset[user][video]) if video not in error_per_video.keys(): error_per_video[video] = [] # 1. Find the first time-stamp greater than 1 second (so that the input of the trace is greater than 5 when sampled in 0.2) # 1.1 Get the video rate (This is also the index of the first time-stamp at 1 sec) video_rate = int(np.ceil(get_frame_rate(video, hardcoded=True))) for t in range(video_rate, len(angles_per_video) - 1): # Remember that python arrays do not include the last index when sliced, e.g. [0, 1, 2, 3][:2] = [0, 1], in this case the input_data doesn't include the value at t+1 input_data = dataset[user][video][t-video_rate:t+1] sample_t_n = angles_per_video[t+1] sampled_input_data = interpolate_quaternions(input_data[:, 0], input_data[:, 1:], rate=RATE, time_orig_at_zero=False) sampled_input_data_xyz = recover_xyz_from_quaternions_trace(sampled_input_data) # 2. For the saliency, get the time-stamps of the input trace and find the closest first_decoder_saliency_timestamp = input_data[-1, 0] + 0.2 first_decoder_sal_id = np.argmin(np.power(time_stamps_in_saliency - first_decoder_saliency_timestamp, 2.0)) if model_name not in ['pos_only', 'no_motion']: encoder_sal_inputs_for_sample = np.array([np.expand_dims(all_saliencies[video][first_decoder_sal_id - history_window:first_decoder_sal_id], axis=-1)]) # ToDo: Be careful here, we are using TRAINED_PREDICTION_HORIZON to load future saliencies decoder_sal_inputs_for_sample = np.zeros((1, TRAINED_PREDICTION_HORIZON, NUM_TILES_HEIGHT, NUM_TILES_WIDTH, 1)) taken_saliencies = all_saliencies[video][first_decoder_sal_id:min(first_decoder_sal_id + TRAINED_PREDICTION_HORIZON, len(all_saliencies[video]))] # decoder_sal_inputs_for_sample = np.array([np.expand_dims(taken_saliencies, axis=-1)]) decoder_sal_inputs_for_sample[0, :len(taken_saliencies), :, :, 0] = taken_saliencies encoder_pos_inputs_for_sample = [sampled_input_data_xyz[-history_window - 1:-1, 1:]] decoder_pos_inputs_for_sample = [sampled_input_data_xyz[-1:, 1:]] # 3. predict if model_name == 'TRACK': model_prediction = model.predict( [np.array(encoder_pos_inputs_for_sample), np.array(encoder_sal_inputs_for_sample), np.array(decoder_pos_inputs_for_sample), np.array(decoder_sal_inputs_for_sample)])[0] elif model_name == 'CVPR18': model_prediction = model.predict( [np.array(encoder_pos_inputs_for_sample), np.array(decoder_pos_inputs_for_sample), np.array(decoder_sal_inputs_for_sample)])[0] elif model_name == 'pos_only': model_pred = model.predict( [transform_batches_cartesian_to_normalized_eulerian(encoder_pos_inputs_for_sample), transform_batches_cartesian_to_normalized_eulerian(decoder_pos_inputs_for_sample)])[0] model_prediction = transform_normalized_eulerian_to_cartesian(model_pred) # 4. upsample the predicted trace from 0.2 sec to the video rate sample_orig = np.array([1, 0, 0]) quat_rot_1 = rotationBetweenVectors(sample_orig, sampled_input_data_xyz[-1, 1:]) quat_rot_1 = np.array([quat_rot_1[0], quat_rot_1[1], quat_rot_1[2], quat_rot_1[3]]) quat_rot_2 = rotationBetweenVectors(sample_orig, model_prediction[0]) quat_rot_2 = np.array([quat_rot_2[0], quat_rot_2[1], quat_rot_2[2], quat_rot_2[3]]) interpolated = interpolate_quaternions([0.0, RATE], [quat_rot_1, quat_rot_2], rate=1.0/video_rate) pred_samples = recover_original_angles_from_quaternions_trace(interpolated) pred_sample_t_n = pred_samples[1] mo_score = mo_calculator.calc_mo_deg([pred_sample_t_n[0], pred_sample_t_n[1]], [sample_t_n[0], sample_t_n[1]], is_centered=True) error_per_video[video].append(mo_score) avg_error_per_video = {} for video in VIDEOS_TEST: avg_error_per_video[video] = np.mean(error_per_video[video]) return avg_error_per_video