Esempio n. 1
0
    def build(self):
        self._is_direct_features_input = self.config.direct_features_input
        # Encoders
        self.text_encoder = build_text_encoder(self.config.text_encoder)
        self.image_encoder = build_image_encoder(
            self.config.image_encoder, self._is_direct_features_input
        )

        # Projectors
        image_proj_config = deepcopy(self.config.image_projection)
        self.image_proj = build_classifier_layer(image_proj_config)

        text_proj_config = deepcopy(self.config.text_projection)
        self.text_proj = build_classifier_layer(text_proj_config)

        # Aggregators
        self.image_pool = AttnPool1d(self.config.final_hidden_size, 1)
        self.text_pool = AttnPool1d(self.config.final_hidden_size, 1)

        # Shared transformer
        transformer_layer = torch.nn.TransformerEncoderLayer(
            self.config.final_hidden_size, 4, 2048, dropout=0.1, activation="relu"
        )
        self.shared_transformer = torch.nn.TransformerEncoder(
            transformer_layer, num_layers=2
        )

        # Position embeddings - Image
        self.image_pos_emb = PositionEmbeddingSine(self.config.final_hidden_size // 2)
Esempio n. 2
0
 def __init__(self, embedding_dim: int, **kwargs):
     super().__init__()
     hidden_dim = kwargs.get("hidden_dim", 512)
     self.sga = SGAEmbedding(embedding_dim, **kwargs)
     self.sga_pool = AttnPool1d(hidden_dim, 1)
     self.cbn = CBNEmbedding(embedding_dim, **kwargs)
     self.out_dim = hidden_dim
Esempio n. 3
0
    def __init__(self, hidden_dim: int, embedding_dim: int, **kwargs):
        super().__init__()
        num_attn = kwargs["num_attn"]
        num_layers = kwargs["num_layers"]
        dropout = kwargs.get("dropout", 0.1)
        num_attn_pool = kwargs.get("num_attn_pool", 1)
        num_feat = kwargs.get("num_feat", -1)

        self.lstm = nn.LSTM(
            input_size=embedding_dim,
            hidden_size=hidden_dim,
            num_layers=1,
            batch_first=True,
        )
        self.self_attns = nn.ModuleList(
            [SelfAttention(hidden_dim, num_attn, dropout) for _ in range(num_layers)]
        )
        self.attn_pool = None
        self.num_feat = num_feat
        self.text_out_dim = hidden_dim
        if num_attn_pool > 0:
            self.attn_pool = AttnPool1d(hidden_dim, num_feat * num_attn_pool)
            self.text_out_dim = hidden_dim * num_attn_pool