def __init__(self, config: Config, *args, **kwargs): super().__init__() self.config = config model = getattr(torchvision.models, config.name)( pretrained=config.pretrained, zero_init_residual=config.zero_init_residual ) # Set avgpool and fc layers in torchvision to Identity. model.avgpool = Identity() model.fc = Identity() self.model = model self.out_dim = 2048
def __init__(self, emb_type, **kwargs): super().__init__() self.model_data_dir = kwargs.get("model_data_dir", None) self.embedding_dim = kwargs.get("embedding_dim", None) # Update kwargs here if emb_type == "identity": self.module = Identity() self.module.text_out_dim = self.embedding_dim elif emb_type == "vocab": self.module = VocabEmbedding(**kwargs) self.module.text_out_dim = self.embedding_dim elif emb_type == "projection": self.module = ProjectionEmbedding(**kwargs) self.module.text_out_dim = self.module.out_dim elif emb_type == "preextracted": self.module = PreExtractedEmbedding(**kwargs) elif emb_type == "bilstm": self.module = BiLSTMTextEmbedding(**kwargs) elif emb_type == "attention": self.module = AttentionTextEmbedding(**kwargs) elif emb_type == "mcan": self.module = SAEmbedding(**kwargs) elif emb_type == "torch": vocab_size = kwargs["vocab_size"] embedding_dim = kwargs["embedding_dim"] self.module = nn.Embedding(vocab_size, embedding_dim) self.module.text_out_dim = self.embedding_dim else: raise NotImplementedError("Unknown question embedding '%s'" % emb_type) self.text_out_dim = self.module.text_out_dim
def __init__(self, config: Config, *args, **kwargs): super().__init__() encoder_type = config.type if isinstance(encoder_type, ImageFeatureEncoderTypes): encoder_type = encoder_type.value assert ("in_dim" in config.params ), "ImageFeatureEncoder require 'in_dim' param in config" params = config.params if encoder_type == "default" or encoder_type == "identity": self.module = Identity() self.module.in_dim = params.in_dim self.module.out_dim = params.in_dim elif encoder_type == "projection": if "module" not in params: params = deepcopy(params) params.module = "linear" self.module = ProjectionEmbedding(**params) elif encoder_type == "finetune_faster_rcnn_fpn_fc7": self.module = FinetuneFasterRcnnFpnFc7(params) elif encoder_type == "spatial": self.module = VisionSpatialEmbedding(**params) else: raise NotImplementedError("Unknown Image Encoder: %s" % encoder_type) self.out_dim = self.module.out_dim
def __init__(self, encoder_type, in_dim, **kwargs): super().__init__() if encoder_type == "default" or encoder_type == "identity": self.module = Identity() self.module.in_dim = in_dim self.module.out_dim = in_dim elif encoder_type == "projection": module_type = kwargs.pop("module", "linear") self.module = ProjectionEmbedding(module_type, in_dim, **kwargs) elif encoder_type == "finetune_faster_rcnn_fpn_fc7": self.module = FinetuneFasterRcnnFpnFc7(in_dim, **kwargs) else: raise NotImplementedError("Unknown Image Encoder: %s" % encoder_type) self.out_dim = self.module.out_dim