コード例 #1
0
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer('version', torch.Tensor([3]))

        self.dropout = args.dropout

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None

        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerEncoderLayer(args) for i in range(args.encoder_layers)
        ])

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None

        self.add_template = args.add_template

        if self.add_template:
            self.template_layers = nn.ModuleList([])
            self.template_layers.extend([
                TransformerEncoderLayer(args)
                for i in range(args.encoder_layers)
            ])

            if args.encoder_normalize_before:
                self.tp_layer_norm = LayerNorm(embed_dim)
            else:
                self.tp_layer_norm = None

            self.positionwise = PositionWise(embed_dim, embed_dim,
                                             self.dropout)

            self.two_encoder_mix = nn.Linear(2 * embed_dim, embed_dim)

            self.attention = MultiheadAttention(embed_dim,
                                                args.encoder_attention_heads,
                                                dropout=args.attention_dropout)
コード例 #2
0
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer('version', torch.Tensor([3]))

        self.dropout = args.dropout
        self.encoder_layerdrop = args.encoder_layerdrop

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None

        self.layer_wise_attention = getattr(args, 'layer_wise_attention',
                                            False)

        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerEncoderLayer(args) for i in range(args.encoder_layers)
        ])

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
コード例 #3
0
    def __init__(self, args):
        super().__init__(None)

        self.dropout_module = FairseqDropout(
            p=args.dropout, module_name=self.__class__.__name__)
        self.embed_scale = math.sqrt(args.encoder_embed_dim)
        if args.no_scale_embedding:
            self.embed_scale = 1.0
        self.padding_idx = 1

        self.subsample = Conv1dSubsampler(
            args.input_feat_per_channel * args.input_channels,
            args.conv_channels,
            args.encoder_embed_dim,
            [int(k) for k in args.conv_kernel_sizes.split(",")],
        )

        self.embed_positions = PositionalEmbedding(args.max_source_positions,
                                                   args.encoder_embed_dim,
                                                   self.padding_idx)

        self.transformer_layers = nn.ModuleList([
            TransformerEncoderLayer(args) for _ in range(args.encoder_layers)
        ])
        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(args.encoder_embed_dim)
        else:
            self.layer_norm = None
コード例 #4
0
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer("version", torch.Tensor([3]))

#        pdb.set_trace()
#        self.dropout = [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
        self.dropout = [0, 0, 0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.3, 0.3]
        self.index = None
        self.encoder_layerdrop = args.encoder_layerdrop

        
#        self.embedding_hidden_mapping_in = SlimmableLinear([embed_dim, embed_dim, embed_dim, embed_dim], 
#                                                           [int(embed_dim / 4), int(embed_dim * 2 / 4), int(embed_dim * 3 / 4),  embed_dim])

        embed_dim = embed_tokens.embedding_dim
        self.embed_dim = embed_dim

#        self.embedding_hidden_mapping_in = SlimmableLinear([embed_dim, embed_dim, embed_dim, embed_dim],
#                                                           [int(embed_dim / 4), int(embed_dim * 2 / 4), int(embed_dim * 3 / 4),  embed_dim])

        self.embedding_hidden_mapping_in = SlimmableLinear([embed_dim,               embed_dim,               embed_dim,               embed_dim,               embed_dim,               embed_dim,               embed_dim,                embed_dim,                embed_dim,                embed_dim,                 embed_dim,                embed_dim,                embed_dim],
                                                           [int(embed_dim * 4 / 16), int(embed_dim * 5 / 16), int(embed_dim * 6 / 16), int(embed_dim * 7 / 16), int(embed_dim * 8 / 16), int(embed_dim * 9 / 16), int(embed_dim * 10 / 16), int(embed_dim * 11 / 16), int(embed_dim * 12 / 16), int(embed_dim * 13 / 16),  int(embed_dim * 14 / 16), int(embed_dim * 15 / 16), embed_dim])


        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens

        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(embed_dim)

        self.embed_positions = (
            PositionalEmbedding(
                args.max_source_positions,
                embed_dim,
                self.padding_idx,
                learned=args.encoder_learned_pos,
            )
            if not args.no_token_positional_embeddings
            else None
        )

        self.layer_wise_attention = getattr(args, "layer_wise_attention", False)

        self.layers = nn.ModuleList([])
        self.layers.extend(
            [TransformerEncoderLayer(args) for i in range(args.encoder_layers)]
        )
        self.num_layers = len(self.layers)

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
        if getattr(args, "layernorm_embedding", False):
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None
コード例 #5
0
ファイル: transformer_modular.py プロジェクト: varisd/fairseq
    def build_encoder_layer(self, args, i=None):
        modular_layer_indices = eval(args.encoder_modular_layer_indices)
        if type(modular_layer_indices) is int:
            modular_layer_indices = [modular_layer_indices]

        if i in modular_layer_indices:
            return TransformerModularEncoderLayer(args)
        else:
            return TransformerEncoderLayer(args)
コード例 #6
0
ファイル: wavtransbart.py プロジェクト: Hertin/fairseq
    def build_encoder_layer(self, args):
        layer = TransformerEncoderLayer(args)

        # if we are checkpointing, enforce that FSDP always wraps the
        # checkpointed layer, regardless of layer size
        min_params_to_wrap = (getattr(args, "min_params_to_wrap",
                                      DEFAULT_MIN_PARAMS_TO_WRAP))
        layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap)
        return layer
コード例 #7
0
ファイル: transformer.py プロジェクト: yyht/T-Fixup
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer("version", torch.Tensor([3]))

        self.dropout = args.dropout
        self.encoder_layerdrop = args.encoder_layerdrop

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        if args.Tfixup:
            temp_state_dict = embed_tokens.state_dict()
            temp_state_dict["weight"] = (9 * args.encoder_layers) ** (- 1 / 4) * temp_state_dict["weight"]
            embed_tokens.load_state_dict(temp_state_dict)

        self.embed_tokens = embed_tokens

        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(embed_dim)

        self.embed_positions = (
            PositionalEmbedding(
                args.max_source_positions,
                embed_dim,
                self.padding_idx,
                learned=args.encoder_learned_pos,
            )
            if not args.no_token_positional_embeddings
            else None
        )

        self.layer_wise_attention = getattr(args, "layer_wise_attention", False)

        self.layers = nn.ModuleList([])
        self.layers.extend(
            [TransformerEncoderLayer(args) for i in range(args.encoder_layers)]
        )

        # ======================== fixup calling initialization in layers ==================================
        if args.Tfixup:
            for encoder_layer in self.layers:
                encoder_layer.fixup_initialization(args)
        # =================================================================

        self.num_layers = len(self.layers)
        if args.encoder_normalize_before and not args.dont_use_layernorm:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
        if getattr(args, "layernorm_embedding", False) and not args.dont_use_layernorm:
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None
コード例 #8
0
 def __init__(self, args, task):
     super().__init__(None)
     assert args.pretrained_model is not None
     pretrained_models, _ = checkpoint_utils.load_model_ensemble(
         [args.pretrained_model], task=task)
     self.audio_encoder = pretrained_models[0].encoder
     if args.freeze_pretrained != "none":
         for p_name, p_val in self.audio_encoder.named_parameters():
             p_val.requires_grad = False
     self.n_layers = args.context_encoder_layers
     self.dropout = args.dropout
     self.layers = nn.ModuleList(
         [TransformerEncoderLayer(args) for _ in range(self.n_layers)])
コード例 #9
0
 def build_encoder_layer(self, args):
     layer = TransformerEncoderLayer(args)
     checkpoint = getattr(args, "checkpoint_activations", False)
     if checkpoint:
         offload_to_cpu = getattr(args, "offload_activations", False)
         layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu)
     # if we are checkpointing, enforce that FSDP always wraps the
     # checkpointed layer, regardless of layer size
     min_params_to_wrap = (getattr(args, "min_params_to_wrap",
                                   DEFAULT_MIN_PARAMS_TO_WRAP)
                           if not checkpoint else 0)
     layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap)
     return layer
コード例 #10
0
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer('version', torch.Tensor([3]))

        self.dropout = args.dropout
        self.encoder_layerdrop = args.encoder_layerdrop

        self.layer_wise_attention = getattr(args, 'layer_wise_attention',
                                            False)

        self.layer_norm = LayerNorm(args.encoder_embed_dim)

        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerEncoderLayer(args) for i in range(args.encoder_layers)
        ])
コード例 #11
0
    def __init__(self, ninp, nhead, nhid, nout, nlayers, dropout=0.5):

        super(TransformerEnc, self).__init__()
        from torch.nn import TransformerEncoder, TransformerEncoderLayer
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(ninp, dropout, max_len=100)
        encoder_layers = TransformerEncoderLayer(nhid, nhead, nhid, dropout)

        #self.linear_pos_enc = LinearPositionalEmbedding(max_len=100)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        # self.encoder = nn.Embedding(ntoken, ninp)
        self.ninp = ninp

        self.hidden2pose_projection = nn.Linear(nhid, nout)
        self.pose2hidden_projection = nn.Linear(ninp, nhid)
コード例 #12
0
    def __init__(self, args, src_dict, embed_speaker):
        super().__init__(src_dict)
        self.padding_idx = src_dict.pad()
        self.embed_speaker = embed_speaker
        self.spk_emb_proj = None
        if embed_speaker is not None:
            self.spk_emb_proj = nn.Linear(
                args.encoder_embed_dim + args.speaker_embed_dim, args.encoder_embed_dim
            )

        self.dropout_module = FairseqDropout(
            p=args.dropout, module_name=self.__class__.__name__
        )
        self.embed_tokens = nn.Embedding(
            len(src_dict), args.encoder_embed_dim, padding_idx=self.padding_idx
        )
        assert args.encoder_conv_kernel_size % 2 == 1
        self.prenet = nn.ModuleList(
            nn.Sequential(
                nn.Conv1d(
                    args.encoder_embed_dim,
                    args.encoder_embed_dim,
                    kernel_size=args.encoder_conv_kernel_size,
                    padding=((args.encoder_conv_kernel_size - 1) // 2),
                ),
                nn.BatchNorm1d(args.encoder_embed_dim),
                nn.ReLU(),
                nn.Dropout(args.encoder_dropout),
            )
            for _ in range(args.encoder_conv_layers)
        )
        self.prenet_proj = nn.Linear(args.encoder_embed_dim, args.encoder_embed_dim)
        self.embed_positions = PositionalEmbedding(
            args.max_source_positions, args.encoder_embed_dim, self.padding_idx
        )
        self.pos_emb_alpha = nn.Parameter(torch.ones(1))

        self.transformer_layers = nn.ModuleList(
            TransformerEncoderLayer(args)
            for _ in range(args.encoder_transformer_layers)
        )
        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(args.encoder_embed_dim)
        else:
            self.layer_norm = None

        self.apply(encoder_init)
コード例 #13
0
ファイル: transformer.py プロジェクト: zhajiahe/Token_Drop
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer("version", torch.Tensor([3]))

        self.dropout = args.dropout
        self.encoder_layerdrop = args.encoder_layerdrop

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens

        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(
            embed_dim)

        self.embed_positions = (PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None)

        self.layer_wise_attention = getattr(args, "layer_wise_attention",
                                            False)

        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerEncoderLayer(args) for i in range(args.encoder_layers)
        ])
        self.num_layers = len(self.layers)

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
        if getattr(args, "layernorm_embedding", False):
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None
        self.src_drop = args.src_drop
        self.RTD = args.RTD
        self.drop_method = args.drop_method
        if self.drop_method == 'drop_tag':
            self.mask = dictionary.indices['<dropped>']
        elif self.drop_method == 'unk_tag':
            self.mask = dictionary.indices['<unk>']
コード例 #14
0
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer('version', torch.Tensor([3]))

        self.dropout = args.dropout
        # self.args = args

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        print(self.padding_idx)
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None

        self.layer_wise_attention = getattr(args, 'layer_wise_attention',
                                            False)

        self.layers = nn.ModuleList([])
        # if 'adaptive' in args.init_type and not args.encoder_normalize_before:
        #     print('adaptive init')
        self.layers.extend([
            TransformerEncoderLayer(args, LayerNum=i)
            for i in range(args.encoder_layers)
        ])
        # else:
        #     self.layers.extend([
        #         TransformerEncoderLayer(args)
        #         for i in range(args.encoder_layers)
        #     ])

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None

        if args.fp16:
            self.out_type = torch.half
        else:
            self.out_type = torch.float
コード例 #15
0
	def __init__(self, args, dictionary, word_encoder_embed_dim, encoder_embed_dim):
		super().__init__(dictionary)
		self.register_buffer("version", torch.Tensor([3]))

		self.dropout = args.dropout
		self.encoder_layerdrop = args.encoder_layerdrop

		if word_encoder_embed_dim != encoder_embed_dim:
			self.word_projection_layer = Linear(word_encoder_embed_dim, encoder_embed_dim)
		else:
			self.word_projection_layer = None
		embed_dim = encoder_embed_dim
		self.output_units = encoder_embed_dim
		self.padding_idx = dictionary.pad()
		self.max_source_positions = args.max_source_positions


		self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(embed_dim)

		self.embed_positions = (
			PositionalEmbedding(
				args.max_source_positions,
				embed_dim,
				self.padding_idx,
				learned=args.encoder_learned_pos,
			)
			if not args.no_token_positional_embeddings
			else None
		)

		self.layer_wise_attention = getattr(args, "layer_wise_attention", False)

		self.layers = nn.ModuleList([])
		self.layers.extend(
			[TransformerEncoderLayer(args) for i in range(args.encoder_layers)]
		)
		self.num_layers = len(self.layers)

		if args.encoder_normalize_before:
			self.layer_norm = LayerNorm(embed_dim)
		else:
			self.layer_norm = None
		if getattr(args, "layernorm_embedding", False):
			self.layernorm_embedding = LayerNorm(embed_dim)
		else:
			self.layernorm_embedding = None
コード例 #16
0
    def __init__(self, args):
        """Construct an Encoder object."""
        super().__init__(None)

        self.dropout = args.dropout
        self.embed_scale = (1.0 if args.no_scale_embedding else math.sqrt(
            args.encoder_embed_dim))
        self.padding_idx = 1
        self.in_channels = 1
        self.input_dim = args.input_feat_per_channel
        self.conv = torch.nn.Sequential(
            torch.nn.Conv2d(1,
                            args.conv_out_channels,
                            3,
                            stride=2,
                            padding=3 // 2),
            torch.nn.ReLU(),
            torch.nn.Conv2d(
                args.conv_out_channels,
                args.conv_out_channels,
                3,
                stride=2,
                padding=3 // 2,
            ),
            torch.nn.ReLU(),
        )
        transformer_input_dim = self.infer_conv_output_dim(
            self.in_channels, self.input_dim, args.conv_out_channels)
        self.out = torch.nn.Linear(transformer_input_dim,
                                   args.encoder_embed_dim)
        self.embed_positions = PositionalEmbedding(
            args.max_source_positions,
            args.encoder_embed_dim,
            self.padding_idx,
            learned=False,
        )

        self.transformer_layers = nn.ModuleList([])
        self.transformer_layers.extend([
            TransformerEncoderLayer(args) for i in range(args.encoder_layers)
        ])
        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(args.encoder_embed_dim)
        else:
            self.layer_norm = None
コード例 #17
0
    def __init__(self, args):
        # pass an empty dictionary
        super().__init__(dict())
        self.register_buffer('version', torch.Tensor([3]))

        self.dropout = args.dropout
        self.encoder_layerdrop = args.encoder_layerdrop

        embed_dim = args.encoder_embed_dim
        self.max_source_positions = args.max_source_positions

        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(
            embed_dim)
        # self.embed_scale = 1.0

        self.embed_positions = PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            padding_idx=0,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None

        self.layer_wise_attention = getattr(args, 'layer_wise_attention',
                                            False)

        # downsize the input embedding
        self.early_proj = args.early_proj
        if self.early_proj:
            self.inp_fc = Linear(args.src_embed_dim, embed_dim)

        # add encoder layers
        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerEncoderLayer(args) for i in range(args.encoder_layers)
        ])

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
        if getattr(args, 'layernorm_embedding', False):
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None
コード例 #18
0
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer('version', torch.Tensor([3]))

        self.dropout = args.dropout

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions
        self.agg_method = args.agg_method
        self.agg_layers = args.agg_layers

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None

        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerEncoderLayer(args) for i in range(args.encoder_layers)
        ])

        self.attn = MultiheadAttention(embed_dim,
                                       args.encoder_attention_heads,
                                       dropout=args.attention_dropout,
                                       encoder_decoder_attention=True)
        self.fc = Linear(args.agg_layers * embed_dim, embed_dim)

        self.activation_fn = utils.get_activation_fn(
            activation=getattr(args, 'activation_fn', 'relu'))
        self.activation_dropout = getattr(args, 'activation_dropout', 0)
        if self.activation_dropout == 0:
            # for backwards compatibility with models that use args.relu_dropout
            self.activation_dropout = getattr(args, 'relu_dropout', 0)

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
コード例 #19
0
    def __init__(self, args, embed_dim, dictionary=None):
        super(VideoTransformerEncoder, self).__init__(dictionary)
        self.register_buffer("version", torch.Tensor([3]))

        self.args = args
        self.dropout = args.dropout

        resnet18 = models.resnet18(pretrained=False)
        self.spatio_enc = nn.Sequential(*list(resnet18.children())[:-1])
        if args.cnn_parameters_freeze:
            for p in self.spatio_enc.parameters():
                p.requires_grad = False

        if args.cnn_normalize_after:
            self.batchnorm = nn.BatchNorm1d(embed_dim)
            self.relu = nn.ReLU()

        self.padding_idx = 0  # 这里不同于 tgt_dict 的 padding_idx.
        self.embed_positions = (PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None)

        self.layer_wise_attention = getattr(args, "layer_wise_attention",
                                            False)
        self.encoder_layerdrop = args.encoder_layerdrop

        self.temporal_enc_layers = nn.ModuleList([])
        self.temporal_enc_layers.extend([
            TransformerEncoderLayer(args) for i in range(args.encoder_layers)
        ])
        self.num_layer = len(self.temporal_enc_layers)

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
        if getattr(args, "layernorm_embedding", False):
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None
コード例 #20
0
 def __init__(self, args, tgt_dict, embed_tokens):
     super().__init__(tgt_dict)
     self.n_layers = args.context_encoder_layers
     self.embed_dim = args.decoder_embed_dim
     self.embed_scale = math.sqrt(
         self.embed_dim)  # todo: try with input_embed_dim
     self.embed_tokens = embed_tokens
     self.dropout = args.dropout
     self.embed_positions = PositionalEmbedding(
         args.max_target_positions,
         self.embed_dim,
         self.embed_tokens.padding_idx,
         learned=args.decoder_learned_pos,
     )
     self.layers = nn.ModuleList(
         [TransformerEncoderLayer(args) for _ in range(self.n_layers)])
     input_embed_dim = embed_tokens.embedding_dim
     self.project_in_dim = Linear(input_embed_dim, self.embed_dim, bias=False) \
         if self.embed_dim != input_embed_dim else None
コード例 #21
0
ファイル: bgt.py プロジェクト: jwcmu/bgt
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer('version', torch.Tensor([3]))
        self.args = args
        self.dropout = args.dropout
        self.bgt_setting = self.args.bgt_setting

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None

        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerEncoderLayer(args) for i in range(args.encoder_layers)
        ])

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None

        self.hidden2mean = nn.Linear(embed_dim,
                                     self.args.latent_size,
                                     bias=False)

        if self.bgt_setting == "bgt":
            self.hidden2logv = nn.Linear(embed_dim,
                                         self.args.latent_size,
                                         bias=False)
            self.latent2hidden = nn.Linear(self.args.latent_size,
                                           embed_dim,
                                           bias=False)
コード例 #22
0
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer('version', torch.Tensor([3]))

        self.dropout = args.dropout

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None

        self.join_layer = args.join_layer
        if self.join_layer:
            self.num_branches = args.encoder_branches
            self.branch_dropout = args.branch_dropout
            self.layers_branches = nn.ModuleList([])
            for _ in range(args.encoder_layers):
                layer_i_branches = nn.ModuleList([])
                self.layers_branches.append(layer_i_branches)
                for _ in range(self.num_branches):
                    layer_i_branches.append(TransformerEncoderLayer(args))
        else:
            self.layers = nn.ModuleList([])
            self.layers.extend([
                TransformerMBEncoderLayer(args)
                for i in range(args.encoder_layers)
            ])

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
コード例 #23
0
ファイル: transformer.py プロジェクト: zjpbinary/multiDDS
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer('version', torch.Tensor([3]))

        self.dropout = args.dropout

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None

        self.sde = args.sde
        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerEncoderLayer(args) for i in range(args.encoder_layers)
        ])

        if args.scale_norm:
            scale = embed_dim**0.5
        else:
            scale = None

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim, scale=scale)
        else:
            self.layer_norm = None
        self.tracker = VariableTracker()
        self.track_gradients = False
コード例 #24
0
    def __init__(self, args, dictionary):
        super().__init__(dictionary)

        self.dropout_module = FairseqDropout(
            p=args.dropout, module_name=self.__class__.__name__)
        self.embed_scale = math.sqrt(args.encoder_embed_dim)
        if args.no_scale_embedding:
            self.embed_scale = 1.0
        self.padding_idx = 1

        self.subsample = Conv1dSubsampler(
            args.input_feat_per_channel * args.input_channels,
            args.conv_channels,
            args.encoder_embed_dim,
            [int(k) for k in args.conv_kernel_sizes.split(",")],
        )

        self.embed_positions = PositionalEmbedding(args.max_source_positions,
                                                   args.encoder_embed_dim,
                                                   self.padding_idx)

        self.transformer_layers = nn.ModuleList([
            TransformerEncoderLayer(args) for _ in range(args.encoder_layers)
        ])
        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(args.encoder_embed_dim)
        else:
            self.layer_norm = None
        # ctc
        self.ctc_compress_out = args.ctc_compress_out
        if self.ctc_compress_out:
            self.ctc_fc = nn.Linear(args.encoder_embed_dim, len(dictionary))
            assert args.criterion == "ctc_multi_loss"
            self.ctc_layer = args.ctc_encoder_layer
            self.ctc_compress_method = getattr(CTCCompressStrategy,
                                               args.ctc_compress_strategy)
コード例 #25
0
 def build_encoder_layer(self, args):
     return TransformerEncoderLayer(args)
コード例 #26
0
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer("version", torch.Tensor([3]))

        try:
            if args.PRUNE_BOOL:
                self.encoder_self_attn_path = args.ENCODER_SELF_ATTN_PATH
                self.encoder_self_attn_pattern = torch.from_numpy(np.load(self.encoder_self_attn_path)) #(no_layers, 1, no_head, 1024, 1024)
                if args.CUDA:
                     self.encoder_self_attn_pattern = self.encoder_self_attn_pattern.cuda()
        except: #backward compatibility
            args.PRUNE_BOOL = False
            args.PRUNE_ENC_SELF_ATTN = False
            args.PRUNE_DEC_SELF_ATTN = False
            args.PRUNE_ENC_DEC_ATTN = False
            args.TAU = 0
            args.USE_ENTMAX = False
            args.ENCODER_SELF_ATTN_PATH = None
            args.DECODER_SELF_ATTN_PATH = None
            args.ENCODER_DECODER_ATTN_PATH = None
            args.CUDA = True
            args.RANDOM_PRUNE = False


        self.dropout = args.dropout
        self.encoder_layerdrop = args.encoder_layerdrop

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens

        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(embed_dim)

        self.embed_positions = (
            PositionalEmbedding(
                args.max_source_positions,
                embed_dim,
                self.padding_idx,
                learned=args.encoder_learned_pos,
            )
            if not args.no_token_positional_embeddings
            else None
        )

        self.layer_wise_attention = getattr(args, "layer_wise_attention", False)

        self.layers = nn.ModuleList([])
        if args.PRUNE_BOOL:
            self.layers.extend(
                [TransformerEncoderLayer(args, self.encoder_self_attn_pattern[i]) for i in range(args.encoder_layers)]
            )
        else:
            self.layers.extend(
                [TransformerEncoderLayer(args, None) for i in range(args.encoder_layers)]
            )
        self.num_layers = len(self.layers)

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
        if getattr(args, "layernorm_embedding", False):
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None
コード例 #27
0
ファイル: transformer.py プロジェクト: zubairabid/UVR-NMT
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer('version', torch.Tensor([3]))

        self.dropout = args.dropout

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = PositionalEmbedding(
            args.max_source_positions, embed_dim, self.padding_idx,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None

        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerEncoderLayer(args)
            for i in range(args.encoder_layers)
        ])

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None

        #image section

        self.img_dim = 2048
        self.text_dim = embed_dim
        self.L2norm = args.L2norm
        self.total_num_img = args.total_num_img
        self.per_num_img = args.per_num_img

        # cap2image_file = args.cap2image_file
        # image_embedding_file = args.image_embedding_file
        
        cap2image_file = getattr(args, "cap2image_file", "data/cap2image.pickle")
        image_embedding_file = getattr(args, "image_embedding_file", "features_resnet50/train-resnet50-avgpool.npy")

        self.cap2image = pickle.load(open(cap2image_file, "rb"))  #cap_id to image_id

        #print("image embedding processing...")
        embeding_weights = np.load(image_embedding_file)
        img_vocab, img_dim = embeding_weights.shape
        embeddings_matrix = np.zeros((img_vocab + 1, img_dim))
        embeddings_matrix[1:] = embeding_weights
        self.img_embeddings = nn.Embedding.from_pretrained(torch.FloatTensor(embeddings_matrix),
                                                           freeze=args.image_emb_fix)  # update embedding

        # self.img_embeddings.load_state_dict({'weight': embeddings_matrix})
        # if args.image_emb_fix:
        #     self.img_embeddings.weight.requires_grad = False
        self.merge_option = args.merge_option
        self.dense = nn.Linear(self.img_dim, self.text_dim)

        self.mergeImage = nn.Linear(self.total_num_img, 1)
        if self.merge_option == "att-mul-concat":
            self.proj_attention = SCAttention(self.text_dim, 128)
            self.dense2 = nn.Linear(self.text_dim, 384)
        elif self.merge_option == "att-concat":
            self.dense2 = nn.Linear(2 * self.text_dim, self.text_dim)
        elif self.merge_option == "att-gate":
            self.gate_type = args.gate_type
            self.proj_attention = SCAttention(self.text_dim, self.text_dim)
            if self.gate_type == "neural-gate":
                self.sigmoid = nn.Sigmoid()
                self.gate_dense = nn.Linear(2*self.text_dim, self.text_dim)
            elif self.gate_type == "scalar-gate":
                self.sigmoid = nn.Sigmoid()
                self.gate_dense = nn.Linear(2*self.text_dim, 1)
            else:
                self.image_weight = args.image_weight

        else:
            self.proj_attention = SCAttention(self.text_dim, self.text_dim)
コード例 #28
0
    def __init__(
        self,
        input_feat_per_channel,
        vggblock_config=DEFAULT_ENC_VGGBLOCK_CONFIG,
        transformer_config=DEFAULT_ENC_TRANSFORMER_CONFIG,
        encoder_output_dim=512,
        in_channels=1,
        transformer_context=None,
        transformer_sampling=None,
    ):
        """constructor for VGGTransformerEncoder

        Args:
            - input_feat_per_channel: feature dim (not including stacked,
              just base feature)
            - in_channel: # input channels (e.g., if stack 8 feature vector
                together, this is 8)
            - vggblock_config: configuration of vggblock, see comments on
                DEFAULT_ENC_VGGBLOCK_CONFIG
            - transformer_config: configuration of transformer layer, see comments
                on DEFAULT_ENC_TRANSFORMER_CONFIG
            - encoder_output_dim: final transformer output embedding dimension
            - transformer_context: (left, right) if set, self-attention will be focused
              on (t-left, t+right)
            - transformer_sampling: an iterable of int, must match with
              len(transformer_config), transformer_sampling[i] indicates sampling
              factor for i-th transformer layer, after multihead att and feedfoward
              part
        """
        super().__init__(None)

        self.num_vggblocks = 0
        if vggblock_config is not None:
            if not isinstance(vggblock_config, Iterable):
                raise ValueError("vggblock_config is not iterable")
            self.num_vggblocks = len(vggblock_config)

        self.conv_layers = nn.ModuleList()
        self.in_channels = in_channels
        self.input_dim = input_feat_per_channel
        self.pooling_kernel_sizes = []

        if vggblock_config is not None:
            for _, config in enumerate(vggblock_config):
                (
                    out_channels,
                    conv_kernel_size,
                    pooling_kernel_size,
                    num_conv_layers,
                    layer_norm,
                ) = config
                self.conv_layers.append(
                    VGGBlock(
                        in_channels,
                        out_channels,
                        conv_kernel_size,
                        pooling_kernel_size,
                        num_conv_layers,
                        input_dim=input_feat_per_channel,
                        layer_norm=layer_norm,
                    ))
                self.pooling_kernel_sizes.append(pooling_kernel_size)
                in_channels = out_channels
                input_feat_per_channel = self.conv_layers[-1].output_dim

        transformer_input_dim = self.infer_conv_output_dim(
            self.in_channels, self.input_dim)
        # transformer_input_dim is the output dimension of VGG part

        self.validate_transformer_config(transformer_config)
        self.transformer_context = self.parse_transformer_context(
            transformer_context)
        self.transformer_sampling = self.parse_transformer_sampling(
            transformer_sampling, len(transformer_config))

        self.transformer_layers = nn.ModuleList()

        if transformer_input_dim != transformer_config[0][0]:
            self.transformer_layers.append(
                Linear(transformer_input_dim, transformer_config[0][0]))
        self.transformer_layers.append(
            TransformerEncoderLayer(
                prepare_transformer_encoder_params(*transformer_config[0])))

        for i in range(1, len(transformer_config)):
            if transformer_config[i - 1][0] != transformer_config[i][0]:
                self.transformer_layers.append(
                    Linear(transformer_config[i - 1][0],
                           transformer_config[i][0]))
            self.transformer_layers.append(
                TransformerEncoderLayer(
                    prepare_transformer_encoder_params(
                        *transformer_config[i])))

        self.encoder_output_dim = encoder_output_dim
        self.transformer_layers.extend([
            Linear(transformer_config[-1][0], encoder_output_dim),
            LayerNorm(encoder_output_dim),
        ])
コード例 #29
0
 def build_encoder_layer(self, args):
     layer = TransformerEncoderLayer(args)
     if getattr(args, "checkpoint_activations", False):
         layer = checkpoint_wrapper(layer)
     return layer
コード例 #30
0
ファイル: transformer.py プロジェクト: hfxunlp/fairseq-py
 def build_encoder_layer(self, args):
     layer = TransformerEncoderLayer(args)
     if getattr(args, "checkpoint_activations", False):
         offload_to_cpu = getattr(args, "offload_activations", False)
         layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu)
     return layer