Beispiel #1
0
    def __init__(self, args, dictionary, embed_tokens, left_pad=True):
        super().__init__(dictionary)
        self.dropout = args.dropout

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = PositionalEmbedding(
            args.max_source_positions, embed_dim, self.padding_idx,
            left_pad=left_pad,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None

        self.src_embed = nn.Parameter(nn.init.normal_(
            torch.Tensor(embed_dim),
            mean=0, std=embed_dim ** -0.5
        )) if args.src_tgt_embed else None

        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerEncoderLayer(args)
            for _ in range(args.encoder_layers)
        ])
        self.register_buffer('version', torch.Tensor([2]))
        self.normalize = args.encoder_normalize_before
        if self.normalize:
            self.layer_norm = LayerNorm(embed_dim)
Beispiel #2
0
    def __init__(self, args, dictionary, embed_tokens, left_pad=True, add_pos_emb=True):
        super().__init__(dictionary)

        self.dropout = args.dropout
        self.add_pos_emb = add_pos_emb

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.proto_k = args.proto_k
        self.proto_emb_no_grad = getattr(args, 'proto_emb_no_grad', False)

        print("| ---- [encoder] building prob encoder, layers = {}, add_pos_emb = {}, "
              "proto_k = {}, proto_emb_no_grad = {}".format(
            args.encoder_layers, add_pos_emb, self.proto_k, self.proto_emb_no_grad))

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = PositionalEmbedding(
            1024, embed_dim, self.padding_idx,
            left_pad=left_pad,
            learned=args.encoder_learned_pos,
        )

        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerEncoderLayer(args)
            for i in range(args.encoder_layers)
        ])
Beispiel #3
0
    def __init__(self, args, conv_layers_before=None, input_size=83):
        super(TransformerEncoder, self).__init__(None)  # no src dictionary
        self.register_buffer('version', torch.Tensor([3]))

        self.dropout = args.dropout

        self.conv_layers_before = conv_layers_before
        self.fc0 = Linear(input_size, args.encoder_embed_dim) \
            if input_size != args.encoder_embed_dim else None
        self.max_source_positions = args.max_source_positions

        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerEncoderLayer(args) for i in range(args.encoder_layers)
        ])

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(args.encoder_embed_dim)
        else:
            self.layer_norm = None
    def __init__(self, args, dictionary, embed_tokens, left_pad=True):
        super().__init__(dictionary)
        self.dropout = args.dropout

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            self.padding_idx,
            left_pad=left_pad,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None

        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerEncoderLayer(args) for i in range(args.encoder_layers)
        ])
        self.register_buffer('version', torch.Tensor([2]))
        self.normalize = args.encoder_normalize_before
        if self.normalize:
            self.layer_norm = LayerNorm(embed_dim)

        self.conv_layer = nn.Conv1d(embed_dim,
                                    embed_dim,
                                    args.kernel_size,
                                    stride=args.kernel_size)
        self.kernel_size = args.kernel_size
        self.deconv = args.deconv

        if self.deconv:
            self.deconv_layer = nn.ConvTranspose1d(embed_dim,
                                                   embed_dim,
                                                   args.kernel_size,
                                                   stride=args.kernel_size)
Beispiel #5
0
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(args, dictionary, embed_tokens)
        if args.encoder_layers % 2:
            raise ValueError("number of layers shoud be divisible by 2")

        # Transformer Encoder boilerplate

        embed_dim = embed_tokens.embedding_dim  # same as args.encoder_embed_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        return_all_hiddens = getattr(args, "return_all_hiddens", False)
        if return_all_hiddens:
            raise ValueError(
                "UnetTransformer2Col does not support returning all hiddens")

        self.num_layers = args.encoder_layers
        self.dropout = args.dropout
        if getattr(args, "layer_wise_attention", False):
            raise ValueError(
                "UNetTransformer does not support layer-wise attention")

        self.embed_tokens = embed_tokens

        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(
            embed_dim)

        self.embed_positions = (PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None)

        # U-Net Transformer Encoder
        model_dim, ffn_hidden, n_heads = (
            embed_dim,
            args.encoder_ffn_embed_dim,
            args.encoder_attention_heads,
        )

        unet_dict = self.build_unet_stacks(args, model_dim, ffn_hidden,
                                           n_heads)

        self.input_layer = unet_dict["input_layer"]
        self.down_layers = nn.ModuleList(unet_dict["down_layers"])
        self.up_layers = nn.ModuleList(unet_dict["up_layers"])
        self.output_layer = unet_dict["output_layer"]

        assert self.num_layers == 2 + len(self.down_layers) + len(
            self.up_layers)

        # Vanilla transformer Encoder
        self.transformer_layers = nn.ModuleList([
            TransformerEncoderLayer(args) for _ in range(args.encoder_layers)
        ])

        # More Transformer Encoder boilerplate
        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None

        if getattr(args, "layernorm_embedding", False):
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None