def __init__(self, pretrained_path): super().__init__() self.pretrained_path = pretrained_path self.output_dim = 800 * 800 # ------------------ # PRE-TRAINED MODEL # ------------------ ae = AE.load_from_checkpoint(pretrained_path) ae.freeze() self.backbone = ae.encoder self.backbone.c3_only = True self.backbone.out_channels = 32 # ------------------ # FAST RCNN # ------------------ anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2) self.fast_rcnn = FasterRCNN(self.backbone, num_classes=9, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) # for unfreezing encoder later self.frozen = True
required=True, help='root directory that contains captions') parser.add_argument('--fasttext_model', type=str, required=True, help='pretrained fastText model (binary file)') parser.add_argument('--max_nwords', type=int, default=50, help='maximum number of words (default: 50)') parser.add_argument('--img_model', type=str, required=True, help='pretrained autoencoder model') args = parser.parse_args() if __name__ == '__main__': caption_root = args.caption_root.split('/')[-1] if (caption_root + '_vec') not in os.listdir( args.caption_root.replace(caption_root, '')): os.makedirs(args.caption_root + '_vec') print('Loading a pretrained image model...') img_model = AE.load_from_checkpoint(checkpoint_path=args.img_model) model = nn.Sequential(img_model.encoder, img_model.fc) model = model.eval() print('Loading a pretrained fastText model...') word_embedding = fastText.load_model(args.fasttext_model) print('Making vectorized caption data files...') ConvertCapVec().convert_and_save3(args.caption_root, word_embedding, args.max_nwords, model)