Ejemplo n.º 1
0
    def __init__(
        self,
        num_hidden_layers: int,
        layer: Optional[TransformerLayer] = None,
        hidden_size: Optional[int] = None,
        intermediate_size: Optional[int] = None,
        num_attention_heads: int = 8,
        attention_dropout: float = 0.1,
        hidden_dropout: float = 0.1,
        activation: Union[str, torch.nn.Module] = "relu",
        add_cross_attention: bool = False,
    ):
        super().__init__()

        if layer is not None:
            logger.warning(
                "The `layer` argument has been specified. Any other arguments will be ignored."
            )
        else:
            assert (hidden_size is not None) and (intermediate_size is not None), "As the `layer`"
            "has not been provided, `hidden_size` and `intermediate_size` are"
            "required to create `TransformerLayer`s."

        layer = layer or TransformerLayer(
            hidden_size,  # type: ignore
            intermediate_size,  # type: ignore
            num_attention_heads,
            attention_dropout,
            hidden_dropout,
            activation,
            add_cross_attention,
        )
        self.layers = replicate_layers(layer, num_hidden_layers)
Ejemplo n.º 2
0
    def __init__(
        self,
        num_hidden_layers1: int = 12,
        num_hidden_layers2: int = 12,
        hidden_size1: int = 1024,
        hidden_size2: int = 1024,
        combined_hidden_size: int = 1024,
        intermediate_size1: int = 1024,
        intermediate_size2: int = 1024,
        num_attention_heads1: int = 8,
        num_attention_heads2: int = 8,
        combined_num_attention_heads: int = 8,
        attention_dropout1: float = 0.1,
        hidden_dropout1: float = 0.1,
        attention_dropout2: float = 0.1,
        hidden_dropout2: float = 0.1,
        activation: str = "relu",
        biattention_id1: Optional[List[int]] = None,
        biattention_id2: Optional[List[int]] = None,
        fixed_layer1: int = 0,
        fixed_layer2: int = 0,
        fast_mode: bool = False,
        with_coattention: bool = True,
        in_batch_pairs: bool = False,
    ):
        super().__init__()

        self.FAST_MODE = fast_mode
        self.with_coattention = with_coattention
        self.biattention_id1 = biattention_id1 or [1]
        self.biattention_id2 = biattention_id2 or [1]
        self.in_batch_pairs = in_batch_pairs
        self.fixed_layer1 = fixed_layer1
        self.fixed_layer2 = fixed_layer2
        self.combined_size = combined_hidden_size
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2

        layer1 = TransformerLayer(
            hidden_size=hidden_size1,
            intermediate_size=intermediate_size1,
            num_attention_heads=num_attention_heads1,
            attention_dropout=attention_dropout1,
            hidden_dropout=hidden_dropout1,
            activation=activation,
        )
        layer2 = TransformerLayer(
            hidden_size=hidden_size2,
            intermediate_size=intermediate_size2,
            num_attention_heads=num_attention_heads2,
            attention_dropout=attention_dropout2,
            hidden_dropout=hidden_dropout2,
            activation=activation,
        )
        connect_layer = BiModalConnectionLayer(
            hidden_size1=hidden_size1,
            hidden_size2=hidden_size2,
            combined_hidden_size=combined_hidden_size,
            intermediate_size1=intermediate_size1,
            intermediate_size2=intermediate_size2,
            num_attention_heads=combined_num_attention_heads,
            dropout1=hidden_dropout1,
            dropout2=hidden_dropout2,
            activation=activation,
        )

        self.layers1 = replicate_layers(layer1, num_hidden_layers1)
        self.layers2 = replicate_layers(layer2, num_hidden_layers2)
        self.c_layer = replicate_layers(connect_layer,
                                        len(self.biattention_id2))