Ejemplo n.º 1
0
    def __init__(self, opt, encoder_name='encoder_cand'):
        super().__init__()
        n_positions = get_n_positions_from_options(opt)
        d = DictionaryAgent(opt)
        e = torch.nn.Embedding(len(d), opt['embedding_size'], d[d.null_token])
        torch.nn.init.normal_(e.weight, mean=0, std=opt['embedding_size'] ** -0.5)
        torch.nn.init.constant_(e.weight[d[d.null_token]], 0)

        self.opt = opt
        self.vocab_size = len(d)
        encoder_cand = TransformerAREncoder(
            n_heads=opt['n_heads'],
            n_layers=opt['n_layers'],
            embedding_size=opt['embedding_size'],
            ffn_size=opt['ffn_size'],
            embedding=e,
            dropout=opt['dropout'],
            attention_dropout=opt['attention_dropout'],
            relu_dropout=opt['relu_dropout'],
            padding_idx=d[d.null_token],
            learn_positional_embeddings=opt['learn_positional_embeddings'],
            embeddings_scale=opt['embeddings_scale'],
            n_positions=n_positions,
            n_segments=opt.get('n_segments', 2),
            activation=opt['activation'],
            variant=opt['variant'],
            output_scaling=opt['output_scaling'],
        )
        self.encoder_name = encoder_name
        setattr(self, encoder_name, encoder_cand)
        self.cls = LMPredictionHead(opt, len(d))
Ejemplo n.º 2
0
    def __init__(self, opt, dictionary):
        super().__init__(opt, dictionary)
        n_positions = get_n_positions_from_options(opt)
        self.context_encoder = _build_encoder(
            opt,
            dictionary,
            self.embeddings,
            self.pad_idx,
            reduction_type=self.reduction_type,
            n_positions=n_positions,
            n_segments=self.n_segments,
        )

        if opt.get('share_encoders'):
            self.cand_encoder = TransformerResponseWrapper(self.context_encoder, self.context_encoder.out_dim)
        else:
            if not self.share_word_embedding:
                cand_embeddings = self.cand_embeddings
            else:
                cand_embeddings = self.embeddings
            self.cand_encoder = _build_encoder(
                opt,
                dictionary,
                cand_embeddings,
                self.pad_idx,
                n_positions=n_positions,
                reduction_type=self.reduction_type,
                n_segments=self.n_segments,
            )

        # build memory encoder
        if opt.get('wrap_memory_encoder', False):
            self.memory_transformer = TransformerResponseWrapper(self.context_encoder, self.context_encoder.out_dim)
        else:
            self.memory_transformer = self.context_encoder
Ejemplo n.º 3
0
    def get_encoder(self, opt, dict_, null_idx, reduction_type,
                    for_context: bool):
        """
        Return encoder that allows for image features to be passed in, given options.

        :param opt:
            opt dict
        :param dict:
            dictionary agent
        :param null_idx:
            null/pad index into dict
        :param reduction_type: only used for compatibility with the superclass method
        :param for_context:
            whether this is the context encoder (as opposed to the candidate encoder)
        :return:
            either a TransformerEncoder or a ContextWithImageEncoder, initialized
            correctly
        """
        if for_context:
            if reduction_type is not None:
                raise NotImplementedError(
                    'No encoder output reductions supported!')
            n_positions = get_n_positions_from_options(opt)
            embeddings = self._get_embeddings(
                dict_=dict_,
                null_idx=null_idx,
                embedding_size=opt['embedding_size'])
            return ContextWithImageEncoder(
                n_heads=opt['n_heads'],
                n_layers=opt['n_layers'],
                embedding_size=opt['embedding_size'],
                ffn_size=opt['ffn_size'],
                vocabulary_size=len(dict_),
                embedding=embeddings,
                dropout=opt['dropout'],
                attention_dropout=opt['attention_dropout'],
                relu_dropout=opt['relu_dropout'],
                padding_idx=null_idx,
                learn_positional_embeddings=opt['learn_positional_embeddings'],
                embeddings_scale=opt['embeddings_scale'],
                n_positions=n_positions,
                n_segments=opt['n_segments'],
                activation=opt['activation'],
                variant=opt['variant'],
                output_scaling=opt['output_scaling'],
                image_encoder_num_layers=opt['image_encoder_num_layers'],
                image_features_dim=opt['image_features_dim'],
                image_combination_mode=opt['image_combination_mode'],
                n_image_tokens=opt['n_image_tokens'],
            )
        else:
            # The candidate encoder is the same as for PolyEncoderModule
            return super().get_encoder(
                opt=opt,
                dict_=dict_,
                null_idx=null_idx,
                reduction_type=reduction_type,
                for_context=for_context,
            )
Ejemplo n.º 4
0
    def __init__(self, opt, dictionary, retriever_shared=None):
        from parlai.agents.rag.rag import RAG_MODELS

        self.pad_idx = dictionary[dictionary.null_token]
        self.start_idx = dictionary[dictionary.start_token]
        self.end_idx = dictionary[dictionary.end_token]
        super().__init__(self.pad_idx, self.start_idx, self.end_idx)
        self.fp16 = (not opt['no_cuda'] and torch.cuda.is_available()
                     and opt.get('fp16', False))
        self.dict = dictionary
        self.embeddings = create_embeddings(dictionary, opt['embedding_size'],
                                            self.pad_idx)
        # attrs
        self.rag_model_type = opt['rag_model_type']
        self._rag_model_interface = RAG_MODELS[self.rag_model_type](
            opt, self.pad_idx)
        self.generation_model = opt['generation_model']
        self.n_extra_positions = opt['n_extra_positions']
        self.n_positions = get_n_positions_from_options(
            opt) + opt['n_extra_positions']
        assert opt['n_extra_positions'] >= 0
        self.expanded_input_truncate = min(
            opt['text_truncate'] or opt['truncate'],
            get_n_positions_from_options(opt))
        if self.n_extra_positions > 0:
            # This attribute is overloaded.
            # when n_extra_positions == 0, it is the truncation of the full expanded input
            # when >0, it is the maximum length of the knowledge tokens.
            self.expanded_input_truncate = self.n_extra_positions
        self.min_doc_token_length = opt['min_doc_token_length']

        # modules
        self.retriever = retriever_factory(opt,
                                           dictionary,
                                           shared=retriever_shared)
        self.seq2seq_encoder = self.build_encoder(
            opt,
            dictionary=dictionary,
            embedding=self.embeddings,
            padding_idx=self.pad_idx,
        )
        self.seq2seq_decoder = self.build_decoder(opt,
                                                  embedding=self.embeddings,
                                                  padding_idx=self.pad_idx)
Ejemplo n.º 5
0
    def __init__(
        self,
        opt: Opt,
        dictionary: DictionaryAgent,
        embedding: Optional[torch.nn.Embedding] = None,
        padding_idx: int = 0,
    ):
        """
        RagEncoder initialization.

        The Rag Seq2seq encoder is just a regular encoder
        """
        n_init_positions = get_n_positions_from_options(
            opt) + opt['n_extra_positions']
        super().__init__(
            opt=opt,
            vocabulary_size=len(dictionary),
            embedding=embedding,
            padding_idx=padding_idx,
            reduction_type='none',
            n_positions=n_init_positions,
        )
Ejemplo n.º 6
0
 def get_encoder(self, opt, dict_, null_idx, reduction_type,
                 for_context: bool):
     n_positions = get_n_positions_from_options(opt)
     embeddings = self._get_embeddings(dict_=dict_, null_idx=null_idx, embedding_size=opt['embedding_size'])
     return PolyEncoderTransformerEncoder(
         n_heads=opt['n_heads'],
         n_layers=opt['n_layers'],
         embedding_size=opt['embedding_size'],
         ffn_size=opt['ffn_size'],
         vocabulary_size=len(dict_),
         embedding=embeddings,
         dropout=opt['dropout'],
         attention_dropout=opt['attention_dropout'],
         relu_dropout=opt['relu_dropout'],
         padding_idx=null_idx,
         learn_positional_embeddings=opt['learn_positional_embeddings'],
         embeddings_scale=opt['embeddings_scale'],
         reduction_type=reduction_type,
         n_positions=n_positions,
         n_segments=opt.get('n_segments', 2),
         activation=opt['activation'],
         variant=opt['variant'],
         output_scaling=opt['output_scaling'],
     )
Ejemplo n.º 7
0
    def __init__(
        self,
        opt: Opt,
        embedding: Optional[nn.Embedding] = None,
        n_positions: Optional[int] = None,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.opt = opt

        def _default(val, default):
            return val if val is not None else default

        self.embedding_size = opt['embedding_size']
        self.ffn_size = opt['ffn_size']
        self.n_layers = (opt['n_decoder_layers']
                         if opt.get('n_decoder_layers', -1) > 0 else
                         opt['n_layers'])
        self.n_heads = opt['n_heads']
        self.dim = self.embedding_size
        self.activation = opt.get('activation', 'relu')
        self.variant = opt.get('variant', 'aiayn')

        self.embeddings_scale = opt.get('embeddings_scale', True)
        self.dropout = nn.Dropout(p=opt.get('dropout', 0.0))  # --dropout

        self.n_positions = _default(n_positions,
                                    get_n_positions_from_options(opt))
        self.out_dim = self.embedding_size
        assert (self.embedding_size % self.n_heads == 0
                ), 'Transformer embedding size must be a multiple of n_heads'

        self.embeddings = embedding

        if (self.variant == 'xlm' or self.variant == 'prelayernorm'
                or self.variant == 'bart'):
            self.norm_embeddings = torch.nn.LayerNorm(self.dim,
                                                      eps=LAYER_NORM_EPS)
            if self.variant == 'xlm':
                warn_once(
                    'DEPRECATED: XLM should only be used for backwards compatibility, '
                    'as it involves a less-stable layernorm operation.')
        elif self.variant == 'aiayn':
            pass
        else:
            raise ValueError("Can't handle --variant {}".format(self.variant))

        # create the positional embeddings
        self.position_embeddings = nn.Embedding(self.n_positions,
                                                self.embedding_size)
        if not opt.get('learn_positional_embeddings', False):
            create_position_codes(
                self.n_positions,
                self.embedding_size,
                out=self.position_embeddings.weight,
            )
        else:
            nn.init.normal_(self.position_embeddings.weight, 0,
                            self.embedding_size**-0.5)

        # build the model
        self.layers = self.build_layers()
Ejemplo n.º 8
0
    def __init__(
        self,
        opt: Opt,
        vocabulary_size: int,
        embedding: Optional[nn.Embedding] = None,
        padding_idx: int = 0,
        reduction_type: str = 'mean',
        n_positions: Optional[int] = None,
        n_segments: Optional[int] = None,
        embeddings_scale: Optional[bool] = None,
        dropout: Optional[float] = None,
        activation: Optional[str] = None,
        variant: Optional[str] = None,
        output_scaling: Optional[float] = None,
    ):
        super(TransformerEncoder, self).__init__()

        def _default(val, default):
            return val if val is not None else default

        self.embedding_size = opt['embedding_size']
        self.ffn_size = opt['ffn_size']
        self.n_layers = (
            opt['n_encoder_layers']
            if opt.get('n_encoder_layers', -1) > 0
            else opt['n_layers']
        )
        self.n_heads = opt['n_heads']
        self.dim = self.embedding_size
        self.embeddings_scale = _default(
            embeddings_scale, opt.get('embeddings_scale', False)
        )
        self.reduction_type = reduction_type
        self.padding_idx = padding_idx
        # this is --dropout, not --relu-dropout or --attention-dropout
        self.dropout_frac = _default(dropout, opt.get('dropout', 0.0))
        self.dropout = nn.Dropout(p=self.dropout_frac)
        self.variant = _default(variant, opt.get('variant', 'aiayn'))
        self.n_segments = _default(n_segments, opt.get('n_segments', 0))

        self.n_positions = _default(n_positions, get_n_positions_from_options(opt))
        self.out_dim = self.embedding_size
        assert (
            self.embedding_size % self.n_heads == 0
        ), 'Transformer embedding size must be a multiple of n_heads'

        # check input formats:
        if embedding is not None:
            assert (
                self.embedding_size is None
                or self.embedding_size == embedding.weight.shape[1]
            ), "Embedding dim must match the embedding size."

        if embedding is not None:
            self.embeddings = embedding
        else:
            raise AssertionError(
                "This code should not execute. Left here in case we want to enable it."
            )
            assert self.padding_idx is not None
            self.embeddings = nn.Embedding(
                vocabulary_size, self.embedding_size, padding_idx=padding_idx
            )
            nn.init.normal_(self.embeddings.weight, 0, self.embedding_size ** -0.5)

        # create the positional embeddings
        self.position_embeddings = nn.Embedding(self.n_positions, self.embedding_size)
        if not opt.get('learn_positional_embeddings', False):
            create_position_codes(
                self.n_positions,
                self.embedding_size,
                out=self.position_embeddings.weight,
            )
        else:
            nn.init.normal_(
                self.position_embeddings.weight, 0, self.embedding_size ** -0.5
            )

        # embedding normalization
        if (
            self.variant == 'xlm'
            or self.variant == 'prelayernorm'
            or self.variant == 'bart'
        ):
            self.norm_embeddings = torch.nn.LayerNorm(self.dim, eps=LAYER_NORM_EPS)
        elif self.variant == 'aiayn':
            pass
        else:
            raise ValueError("Can't handle --variant {}".format(self.variant))

        if self.n_segments >= 1:
            self.segment_embeddings = nn.Embedding(self.n_segments, self.dim)

        # build the model
        self.layers = nn.ModuleList()
        for _ in range(self.n_layers):
            self.layers.append(
                TransformerEncoderLayer(
                    self.n_heads,
                    self.embedding_size,
                    self.ffn_size,
                    attention_dropout=opt.get('attention_dropout', 0.0),
                    relu_dropout=opt.get('relu_dropout', 0.0),
                    dropout=self.dropout_frac,
                    variant=self.variant,
                    activation=_default(activation, opt.get('activation', 'relu')),
                )
            )
        self.output_scaling = _default(output_scaling, opt.get('output_scaling', 1.0))