예제 #1
0
    parser.add_argument("--features_path", type=str)
    parser.add_argument("--annotation_folder", type=str)
    args = parser.parse_args()

    print("Meshed-Memory Transformer Evaluation")

    # Pipeline for image regions
    image_field = ImageDetectionsField(detections_path=args.features_path,
                                       max_detections=50,
                                       load_in_tmp=False)

    # Pipeline for text
    text_field = TextField(
        init_token="<bos>",
        eos_token="<eos>",
        lower=True,
        tokenize="spacy",
        remove_punctuation=True,
        nopoints=False,
    )

    # Create the dataset
    dataset = COCO(
        image_field,
        text_field,
        "coco/images/",
        args.annotation_folder,
        args.annotation_folder,
    )
    _, _, test_dataset = dataset.splits
    text_field.vocab = pickle.load(open("vocab.pkl", "rb"))
    parser.add_argument('--resume_best', action='store_true')
    parser.add_argument('--features_path', type=str)
    parser.add_argument('--annotation_folder', type=str)
    parser.add_argument('--logs_folder', type=str, default='tensorboard_logs')
    args = parser.parse_args()
    print(args)

    print('Meshed-Memory Transformer Training')

    writer = SummaryWriter(log_dir=os.path.join(args.logs_folder, args.exp_name))

    # Pipeline for image regions
    image_field = ImageDetectionsField(detections_path=args.features_path, max_detections=50, load_in_tmp=False)

    # Pipeline for text
    text_field = TextField(init_token='<bos>', eos_token='<eos>', lower=True, tokenize='spacy',
                           remove_punctuation=True, nopoints=False)

    # Create the dataset
    dataset = COCO(image_field, text_field, 'coco/images/', args.annotation_folder, args.annotation_folder)
    train_dataset, val_dataset, test_dataset = dataset.splits

    if not os.path.isfile('vocab_%s.pkl' % args.exp_name):
        print("Building vocabulary")
        text_field.build_vocab(train_dataset, val_dataset, min_freq=5)
        pickle.dump(text_field.vocab, open('vocab_%s.pkl' % args.exp_name, 'wb'))
    else:
        text_field.vocab = pickle.load(open('vocab_%s.pkl' % args.exp_name, 'rb'))

    # Model and dataloaders
    encoder = MemoryAugmentedEncoder(3, 0, attention_module=ScaledDotProductAttentionMemory,
                                     attention_module_kwargs={'m': args.m})
        type=str,
        default=
        "/cluster/sorona/dchen/faster_rcnn_R_101_DC5_3x_ScanNet_feats.hdf5")
    args = parser.parse_args()

    print('Meshed-Memory Transformer Evaluation')

    # Pipeline for image regions
    image_field = ScanNetDetectionsField(detections_path=args.features_path,
                                         max_detections=50,
                                         load_in_tmp=False)

    # Pipeline for text
    text_field = TextField(init_token='<bos>',
                           eos_token='<eos>',
                           lower=True,
                           tokenize='spacy',
                           remove_punctuation=True,
                           nopoints=False)

    # Create the dataset
    dataset = ScanNet(image_field, text_field,
                      "/cluster/sorona/dchen/ScanNet_frames/",
                      get_image_ids(args.features_path))
    _, _, test_dataset = dataset.splits
    text_field.vocab = pickle.load(open('vocab.pkl', 'rb'))

    # Model and dataloaders
    encoder = MemoryAugmentedEncoder(
        3,
        0,
        attention_module=ScaledDotProductAttentionMemory,