parser.add_argument("--features_path", type=str) parser.add_argument("--annotation_folder", type=str) args = parser.parse_args() print("Meshed-Memory Transformer Evaluation") # Pipeline for image regions image_field = ImageDetectionsField(detections_path=args.features_path, max_detections=50, load_in_tmp=False) # Pipeline for text text_field = TextField( init_token="<bos>", eos_token="<eos>", lower=True, tokenize="spacy", remove_punctuation=True, nopoints=False, ) # Create the dataset dataset = COCO( image_field, text_field, "coco/images/", args.annotation_folder, args.annotation_folder, ) _, _, test_dataset = dataset.splits text_field.vocab = pickle.load(open("vocab.pkl", "rb"))
parser.add_argument('--resume_best', action='store_true') parser.add_argument('--features_path', type=str) parser.add_argument('--annotation_folder', type=str) parser.add_argument('--logs_folder', type=str, default='tensorboard_logs') args = parser.parse_args() print(args) print('Meshed-Memory Transformer Training') writer = SummaryWriter(log_dir=os.path.join(args.logs_folder, args.exp_name)) # Pipeline for image regions image_field = ImageDetectionsField(detections_path=args.features_path, max_detections=50, load_in_tmp=False) # Pipeline for text text_field = TextField(init_token='<bos>', eos_token='<eos>', lower=True, tokenize='spacy', remove_punctuation=True, nopoints=False) # Create the dataset dataset = COCO(image_field, text_field, 'coco/images/', args.annotation_folder, args.annotation_folder) train_dataset, val_dataset, test_dataset = dataset.splits if not os.path.isfile('vocab_%s.pkl' % args.exp_name): print("Building vocabulary") text_field.build_vocab(train_dataset, val_dataset, min_freq=5) pickle.dump(text_field.vocab, open('vocab_%s.pkl' % args.exp_name, 'wb')) else: text_field.vocab = pickle.load(open('vocab_%s.pkl' % args.exp_name, 'rb')) # Model and dataloaders encoder = MemoryAugmentedEncoder(3, 0, attention_module=ScaledDotProductAttentionMemory, attention_module_kwargs={'m': args.m})
type=str, default= "/cluster/sorona/dchen/faster_rcnn_R_101_DC5_3x_ScanNet_feats.hdf5") args = parser.parse_args() print('Meshed-Memory Transformer Evaluation') # Pipeline for image regions image_field = ScanNetDetectionsField(detections_path=args.features_path, max_detections=50, load_in_tmp=False) # Pipeline for text text_field = TextField(init_token='<bos>', eos_token='<eos>', lower=True, tokenize='spacy', remove_punctuation=True, nopoints=False) # Create the dataset dataset = ScanNet(image_field, text_field, "/cluster/sorona/dchen/ScanNet_frames/", get_image_ids(args.features_path)) _, _, test_dataset = dataset.splits text_field.vocab = pickle.load(open('vocab.pkl', 'rb')) # Model and dataloaders encoder = MemoryAugmentedEncoder( 3, 0, attention_module=ScaledDotProductAttentionMemory,