def __init__(self, args, dictionary, embed_tokens, left_pad=True): super().__init__(dictionary) self.dropout = args.dropout embed_dim = embed_tokens.embedding_dim self.padding_idx = embed_tokens.padding_idx self.embed_tokens = embed_tokens self.embed_scale = math.sqrt(embed_dim) self.embed_positions = fairseq_transformer.PositionalEmbedding( 1024, embed_dim, self.padding_idx, left_pad=left_pad, learned=args.encoder_learned_pos, ) self.layers = nn.ModuleList([]) self.layers.extend([ fairseq_transformer.TransformerEncoderLayer(args) for i in range(args.encoder_layers) ]) # Variable tracker self.tracker = VariableTracker() # Initialize adversarial mode self.set_gradient_tracking_mode(False)
def __init__(self, args, proj_to_decoder): super().__init__() self.layers = nn.ModuleList([]) self.layers.extend([ fairseq_transformer.TransformerEncoderLayer(args) for i in range(args.encoder_layers) ])
def __init__(self, args, proj_to_decoder): super().__init__() self.layers = nn.ModuleList([]) self.layers.extend([ fairseq_transformer.TransformerEncoderLayer(args) for i in range(args.encoder_layers) ]) self.output_fc = None if args.encoder_embed_dim != args.decoder_embed_dim and proj_to_decoder: self.output_fc = fairseq_transformer.Linear( args.encoder_embed_dim, args.decoder_embed_dim)