if __name__ == '__main__': parser = argparse.ArgumentParser(description='Generate captions por test samples') parser.add_argument('-chckpt', '--checkpoint_path', type=str, default='pretrain/chckpt.pt', help='Set the path to pre-trained model (default is pretrain/chckpt.pt).') parser.add_argument('-data', '--dataset_folder', type=str, default='data/MSVD', help='Set the path to dataset folder (default is data/MSVD).') parser.add_argument('-out', '--output_folder', type=str, default='results/MSVD', help='Set the path to output folder (default is results/MSVD).') args = parser.parse_args() # load vocabulary with open(os.path.join(args.dataset_folder, 'corpus.pkl'), "rb") as f: corpus = pickle.load(f) idx2word_dict = corpus[4] vocab = Vocabulary.from_idx2word_dict(idx2word_dict, False) print('Size of vocabulary: {}'.format(len(vocab))) # Pretrained Embedding pretrained_embedding = torch.Tensor(corpus[5]) cnn_feature_size = 2048 c3d_feature_size = 4096 i3d_feature_size = 400 eco_feature_size = 1536 res_eco_features_size = 3584 cnn_global_size = 512 projected_size = 512 hidden_size = 1024 # Number of hidden layer units of the cyclic network mid_size = 128 # The middle of the boundary detection layer represents the dimension