Example #1
0
    def __init__(self, pretrained_path):
        super().__init__()
        self.pretrained_path = pretrained_path
        self.output_dim = 800 * 800

        # ------------------
        # PRE-TRAINED MODEL
        # ------------------
        ae = AE.load_from_checkpoint(pretrained_path)
        ae.freeze()

        self.backbone = ae.encoder
        self.backbone.c3_only = True
        self.backbone.out_channels = 32

        # ------------------
        # FAST RCNN
        # ------------------
        anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                           aspect_ratios=((0.5, 1.0, 2.0), ))

        roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                        output_size=7,
                                                        sampling_ratio=2)
        self.fast_rcnn = FasterRCNN(self.backbone,
                                    num_classes=9,
                                    rpn_anchor_generator=anchor_generator,
                                    box_roi_pool=roi_pooler)

        # for unfreezing encoder later
        self.frozen = True
                    required=True,
                    help='root directory that contains captions')
parser.add_argument('--fasttext_model',
                    type=str,
                    required=True,
                    help='pretrained fastText model (binary file)')
parser.add_argument('--max_nwords',
                    type=int,
                    default=50,
                    help='maximum number of words (default: 50)')
parser.add_argument('--img_model',
                    type=str,
                    required=True,
                    help='pretrained autoencoder model')
args = parser.parse_args()

if __name__ == '__main__':
    caption_root = args.caption_root.split('/')[-1]
    if (caption_root + '_vec') not in os.listdir(
            args.caption_root.replace(caption_root, '')):
        os.makedirs(args.caption_root + '_vec')
        print('Loading a pretrained image model...')
        img_model = AE.load_from_checkpoint(checkpoint_path=args.img_model)
        model = nn.Sequential(img_model.encoder, img_model.fc)
        model = model.eval()
        print('Loading a pretrained fastText model...')
        word_embedding = fastText.load_model(args.fasttext_model)
        print('Making vectorized caption data files...')
        ConvertCapVec().convert_and_save3(args.caption_root, word_embedding,
                                          args.max_nwords, model)