def sentence_similarity(targets, predictions, sentence_similarity_model, **unused_kwargs): with tf.Session() as session: session.run( [tf.global_variables_initializer(), tf.tables_initializer()]) targets_embeddings = session.run(sentence_similarity_model(targets)) predictions_embeddings = session.run( sentence_similarity_model(predictions)) sentence_similarity_all = np.einsum('ij,ij->i', targets_embeddings, predictions_embeddings) sentence_similarity_avg = sentence_similarity_all.mean() return {"sentence_similarity": sentence_similarity_avg}
def calculate_fvd_from_inference_result(gen_path, ref_path='./Evaluation/ref', num_of_video=16, video_length=10): VIDEO_LENGTH = video_length print('{}'.format(video_length)) base_ref = VideoGenerateDataset(ref_path, min_len=VIDEO_LENGTH) base_tar = VideoGenerateDataset(gen_path, min_len=VIDEO_LENGTH) bs = num_of_video assert bs % 16 == 0 videoloader_ref = torch.utils.data.DataLoader( base_ref, batch_size=bs, #len(videodataset), drop_last=True, shuffle=False) videoloader_tar = torch.utils.data.DataLoader( base_tar, batch_size=bs, #len(videodataset), drop_last=True, shuffle=False) with tqdm(total=len(videoloader_ref), dynamic_ncols=True) as pbar: for i, data in enumerate(videoloader_ref): images_ref = data.numpy() break for i, data in enumerate(videoloader_tar): images_tar = data.numpy() break with tf.Graph().as_default(): ref_tf = tf.convert_to_tensor(images_ref, dtype=tf.uint8) tar_tf = tf.convert_to_tensor(images_tar, dtype=tf.uint8) first_set_of_videos = ref_tf #14592 second_set_of_videos = tar_tf result = calculate_fvd( create_id3_embedding(preprocess(first_set_of_videos, (224, 224)), bs), create_id3_embedding(preprocess(second_set_of_videos, (224, 224)), bs)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) return sess.run(result)