def __init__( self, dictionary, node_dictionary, embed_dim=512, type_embed_dim=512, hidden_size=512, decoder_hidden_size=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, bidirectional=True, left_pad=True, pretrained_embed=None, pretrained_terminals_embed=None, padding_idx=None, type_padding_idx=None, max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS, ): super().__init__(dictionary) self.node_dictionary = node_dictionary self.num_layers = num_layers self.dropout_in = dropout_in self.dropout_out = dropout_out self.bidirectional = bidirectional self.hidden_size = hidden_size self.max_source_positions = max_source_positions self.padding_idx = padding_idx if padding_idx is not None else self.dictionary.pad( ) self.type_padding_idx = type_padding_idx if type_padding_idx is not None else self.node_dictionary.pad( ) if pretrained_embed is None: self.subtoken_embed = Embedding(len(dictionary), embed_dim, self.padding_idx) else: self.subtoken_embed = pretrained_embed if pretrained_terminals_embed is None: self.node_embed = Embedding(len(self.node_dictionary), type_embed_dim, self.type_padding_idx) else: self.node_embed = pretrained_terminals_embed self.lstm = LSTM( input_size=embed_dim, hidden_size=hidden_size // (1 + int(bidirectional)), num_layers=num_layers, dropout=self.dropout_out if num_layers > 1 else 0., bidirectional=bidirectional, batch_first=True, ) self.left_pad = left_pad self.transform = nn.Sequential( nn.Linear(2 * type_embed_dim + hidden_size, decoder_hidden_size, bias=False), nn.Tanh(), ) self.output_units = decoder_hidden_size
def __init__(self, dictionary, embed_dim=400, pos_len=100, pos_dim=50, hidden_size=400, out_embed_dim=400, num_layers=1, dropout_in=0.5, dropout_out=0.5, encoder_output_units=400, pretrained_embed=None, share_input_output_embed=False, max_target_positions=DEFAULT_MAX_TARGET_POSITIONS): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed self.max_target_positions = max_target_positions num_embeddings = len(dictionary) if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx=dictionary.pad()) else: self.embed_tokens = pretrained_embed self.pos_len = pos_len + 1 self.pos_dim = pos_dim self.pos_embed = Embedding(self.pos_len, pos_dim) # disable input feeding if there is no encoder # input feeding is described in arxiv.org/abs/1508.04025 # self.layers = nn.ModuleList([ # LSTMCell( # # input_size=encoder_output_units + pos_dim if layer == 0 else hidden_size, # input_size=encoder_output_units if layer == 0 else hidden_size, # hidden_size=hidden_size, # ) # for layer in range(num_layers) # ]) self.layers = nn.ModuleList([ LSTM( in_dim=encoder_output_units + pos_dim if layer == 0 else hidden_size, # in_dim=encoder_output_units if layer == 0 else hidden_size, out_dim=hidden_size, ) for layer in range(num_layers) ]) # W_H(h)+W_T(t) => fc_out self.W_H = nn.Linear(self.hidden_size, self.hidden_size) self.W_T = nn.Linear(self.hidden_size, self.hidden_size) if not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings)
def __init__( self, dictionary, embed_dim=512, hidden_size=512, num_layers=1, bidirectional=False, dropout=0.5, pretrained_embed=None, shared_embedding=False, ): super(LSTMDecoder, self).__init__(dictionary) if pretrained_embed is None: self.embed_tokens = Embedding(len(dictionary), embed_dim, padding_idx=dictionary.pad()) else: self.embed_tokens = pretrained_embed self.rnn = LSTM( embed_dim, hidden_size, num_layers=num_layers, dropout=dropout, batch_first=True, bidirectional= False, # in prediction task, cannot set bidirectional True ) # self.dropout = dropout # self.bidirectional = bidirectional # if bidirectional: # self.proj = Linear(hidden_size * 2, hidden_size) self.fc_out = Linear(hidden_size, len(dictionary)) if shared_embedding: self.fc_out.weight = self.embed_tokens.weight
def __init__( self, dictionary, embed_dim=512, hidden_size=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, bidirectional=False, left_pad=True, pretrained_embed=None, padding_idx=None, max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS, ): super().__init__(dictionary) self.num_layers = num_layers self.dropout_in = dropout_in self.dropout_out = dropout_out self.bidirectional = bidirectional self.hidden_size = hidden_size self.max_source_positions = max_source_positions num_embeddings = len(dictionary) self.padding_idx = padding_idx if padding_idx is not None else dictionary.pad() if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx) else: self.embed_tokens = pretrained_embed self.lstm = ChildSumTreeLSTMCell( input_size=embed_dim, hidden_size=hidden_size, ) self.left_pad = left_pad self.output_units = hidden_size if bidirectional: self.output_units *= 2
def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens)
def __init__(self, dictionary, embed_dim, token_types, max_positions, self_attn_layers, attention_heads, ffn_embed_dim, activation_fn, dropout, **kwargs, ): super(SelfAttnEncoder, self).__init__(dictionary) # word embedding self.embed = Embedding( len(dictionary), embed_dim, padding_idx=self.dictionary.pad(), initializer=trunc_normal(mean=.0, std=.02), ) # type embedding if token_types is not None: self.type_embed = Embedding( token_types, embed_dim, initializer=trunc_normal(mean=.0, std=.02), ) else: self.type_embed = None # positional embedding if max_positions is not None: self.positional_embed = Parameter( 1, max_positions, embed_dim, initializer=trunc_normal(mean=.0, std=.02), ) else: self.positional_embed = None # layer norm for embedding self.embed_layer_norm = LayerNorm(embed_dim) self.dropout = dropout # self attn self.num_layers = self_attn_layers self.layers = nn.ModuleList( [TransformerEncoderLayer(embed_dim, attention_heads, dropout, ffn_embed_dim, activation_fn) for _ in range(self_attn_layers)] ) # pooling pooling = kwargs.get('pooling', None) self.pooling = pooling1d(pooling) if 'weighted' in pooling: self.weight_layer = Linear(embed_dim, 1, bias=False, weight_initializer=xavier_uniform()) else: self.weight_layer = None
def build_embedding(dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx=padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def __init__(self, token_num: int, embed_size: int, rnn_type: str, hidden_size: int, layer_num: int, dropout: float, bidirectional: bool, attn_hops: int, attn_unit: int, ) -> None: super(SelfAttnEncoder, self).__init__() self.wemb = Embedding(token_num, embed_size, ) self.rnn = RNNEncoder(rnn_type, embed_size, hidden_size, layer_num, 0.0, bidirectional) self.self_attn = SelfAttention(hidden_size * (2 if bidirectional else 1), attn_hops=attn_hops, attn_unit=attn_unit * (2 if bidirectional else 1), dropout=dropout) self.dropout = dropout
def __init__(self, dictionary, embed_dim, out_channels, kernel_size, **kwargs): super().__init__(dictionary) # word embedding + positional embedding self.embed = Embedding( len(dictionary), embed_dim) # , padding_idx=self.dictionary.pad()) self.position_encoding = kwargs.get('position_encoding', None) if self.position_encoding == 'learned': self.position_embed = Parameter(1, kwargs['max_tokens'], embed_dim, initializer=trunc_normal(mean=0., std=0.02)) else: self.position_embed = None # pooling pooling = kwargs.get('pooling', None) self.pooling = pooling1d(pooling) if 'weighted' in pooling: self.weight_layer = Linear(embed_dim, 1, bias=False) else: self.weight_layer = None # conv1d self.out_channels = out_channels self.kernel_size = kernel_size # padding mode = ['valid'(default), 'same'] self.padding = kwargs.get('padding', 'valid') if self.padding == 'same': self.padding_size = [] for kernel_sz in self.kernel_size: padding_right = (kernel_sz - 1) // 2 padding_left = kernel_sz - 1 - padding_right self.padding_size.append(( 0, 0, padding_left, padding_right, )) self.conv_layers = nn.ModuleList([]) # input: [bsz, 1, seq_len, embed_dim] # filters = 1 -> embed_dim # kernel_size = (kernel_width, embed_dim) # => output: [bsz, embed_dim, seq_len - kernel_width + 1] for idx, kernel_sz in enumerate(self.kernel_size): self.conv_layers.append( Conv2d(in_channels=1, out_channels=embed_dim, kernel_size=(kernel_sz, embed_dim))) self.residual = kwargs.get('residual', False) # residual self.dropout = kwargs.get('dropout', None) activation_fn = kwargs.get('activation_fn', None) self.activation_fn = get_activation( activation_fn) if activation_fn else None
def __init__( self, dictionary, src_modalities=['code'], embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention=True, encoder_output_units=512, pretrained_embed=None, share_input_output_embed=False, adaptive_softmax_cutoff=None, max_target_positions=DEFAULT_MAX_TARGET_POSITIONS ): super().__init__(dictionary) self.src_modalities = src_modalities self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed self.need_attn = True self.max_target_positions = max_target_positions self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) else: self.embed_tokens = pretrained_embed self.encoder_output_units = encoder_output_units if encoder_output_units != hidden_size and encoder_output_units != 0: self.encoder_hidden_proj = Linear(encoder_output_units, hidden_size) self.encoder_cell_proj = Linear(encoder_output_units, hidden_size) else: self.encoder_hidden_proj = self.encoder_cell_proj = None # disable input feeding if there is no encoder # input feeding is described in arxiv.org/abs/1508.04025 input_feed_size = 0 if encoder_output_units == 0 else hidden_size self.layers = nn.ModuleList([ LSTMCell( input_size=input_feed_size + embed_dim if layer == 0 else hidden_size, hidden_size=hidden_size, ) for layer in range(num_layers) ]) if attention: # TODO make bias configurable # self.attention = AttentionLayer(hidden_size, encoder_output_units, hidden_size, bias=False) self.attention = None else: self.attention = None if hidden_size != out_embed_dim: self.additional_fc = Linear(hidden_size, out_embed_dim) # if adaptive_softmax_cutoff is not None: # # setting adaptive_softmax dropout to dropout_out for now but can be redefined # self.adaptive_softmax = AdaptiveSoftmax(num_embeddings, hidden_size, adaptive_softmax_cutoff, # dropout=dropout_out) elif not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
def __init__(self, args, dictionary): super(TransformerDecoder, self).__init__(dictionary) self.dropout = args['model']['dropout'] embed_dim = args['model']['decoder_embed_dim'] self.padding_idx = dictionary.pad() self.embed_tokens = Embedding(len(dictionary), embed_dim, padding_idx=dictionary.pad()) self.layers = nn.ModuleList([ TransformerDecoderLayer(args) for _ in range(args['model']['decoder_layers']) ]) self.num_layers = args['model']['decoder_layers'] self.out_layer_norm = LayerNorm(embed_dim)
def __init__( self, dictionary, embed_dim, pooling='weighted_mean', dropout=0.1, **kwargs, ): super().__init__(dictionary) self.padding_idx = self.dictionary.pad() self.embed = Embedding(len(dictionary), embed_dim, padding_idx=self.padding_idx, initializer=xavier_uniform()) self.dropout = dropout self.pooling = pooling1d(pooling) if self.pooling: self.weight_layer = Linear(embed_dim, 1, bias=False, weight_initializer=xavier_uniform()) \ if 'weighted' in pooling else None
def __init__( self, dictionary, embed_dim, embed_out, dropout, edge_types, # scoring/transform MLPs out_dropout, dim_inner, dim_out, ): super(PoemEncoder, self).__init__(dictionary) # embedding block if dictionary is not None: self.embed = Embedding(len(dictionary), embed_dim) else: self.embed = None self.embed_modules = nn.Sequential( Linear(embed_dim, embed_out, bias=False), nn.ReLU(), nn.Dropout(dropout)) # MLP-GNN self.gnn_modules = GNNEncoder(edge_types, dim_in=embed_out, dim_inner=dim_out, dim_out=embed_out, \ dropout=dropout) # scoring MLP def get_mlp(): return nn.Sequential( nn.Dropout(out_dropout), nn.Linear(embed_dim + embed_out, dim_inner, bias=False), nn.ReLU(), nn.Linear(dim_inner, dim_out, bias=False), nn.ReLU(), ) self.score_mlp = get_mlp() self.transform_mlp = get_mlp() self.out_linear = nn.Sequential( nn.Linear(dim_out, 2), nn.Sigmoid(), )
def __init__( self, dictionary, embed_dim=400, dropout=0.5, pretrained_embed=None, padding_idx=None, max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS, ): super().__init__(dictionary) self.dropout = dropout self.max_source_positions = max_source_positions num_embeddings = len(dictionary) self.padding_idx = padding_idx if padding_idx is not None else dictionary.pad( ) if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx) else: self.embed_tokens = pretrained_embed
def __init__( self, dictionary, embed_dim, dropout, # rnn config rnn_cell, rnn_hidden_dim, rnn_dropout, rnn_num_layers=1, rnn_bidirectional=False, **kwargs): super().__init__(dictionary) # word embedding + positional embedding self.embed = Embedding(len(dictionary), embed_dim, initializer=xavier_uniform()) self.dropout = dropout # pooling pooling = kwargs.get('pooling', None) self.pooling = pooling1d(pooling) if 'weighted' in pooling: self.weight_layer = Linear(embed_dim, 1, bias=False, weight_initializer=xavier_uniform()) else: self.weight_layer = None # rnn self.rnn_dropout = rnn_dropout self.rnn_num_layers = rnn_num_layers self.rnn_bidirectional = rnn_bidirectional self.rnn = getattr(nn, str.upper(rnn_cell))( embed_dim, rnn_hidden_dim, num_layers=rnn_num_layers, dropout=self.rnn_dropout, # rnn inner dropout between layers bidirectional=rnn_bidirectional, batch_first=True, )
def __init__( self, dictionary, embed_dim, # rnn config rnn_cell, rnn_hidden_dim, rnn_dropout=None, rnn_num_layers=2, rnn_bidirectional=False, # auxiliary input aux_dim=2, inner_dim=32, out_dim=2, ): super(DeepTuneEncoder, self).__init__(dictionary) self.embed = Embedding(len(dictionary), embed_dim) # LSTM self.rnn_dropout = rnn_dropout self.rnn = getattr(nn, str.upper(rnn_cell))( embed_dim, rnn_hidden_dim, num_layers=rnn_num_layers, dropout=self.rnn_dropout, # rnn inner dropout between layers bidirectional=rnn_bidirectional, batch_first=True, ) self.src_out_proj = nn.Sequential( Linear(rnn_hidden_dim, out_dim), nn.Sigmoid(), ) # Auxiliary inputs. wgsize and dsize self.bn = BatchNorm1d(rnn_hidden_dim + aux_dim) self.hybrid_out_proj = nn.Sequential( Linear(rnn_hidden_dim + aux_dim, inner_dim), nn.ReLU(), Linear(inner_dim, out_dim), nn.Sigmoid(), )
def __init__(self, dictionary, embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention=True, encoder_output_units=512, pretrained_embed=None, share_input_output_embed=False, adaptive_softmax_cutoff=None, max_target_positions=DEFAULT_MAX_TARGET_POSITIONS): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed self.need_attn = True self.max_target_positions = max_target_positions self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) else: self.embed_tokens = pretrained_embed self.encoder_output_units = encoder_output_units self.lstm = LSTM(hidden_size, hidden_size, dropout=dropout_in, batch_first=True) self.fc_out = Linear(out_embed_dim, num_embeddings, bias=False)
def build_model(cls, args, config, task): """Build a new model instance.""" # make sure that all args are properly defaulted (in case there are any new ones) # base_architecture(args) if args['model']['encoder_layers'] != args['model']['decoder_layers']: raise ValueError('--encoder-layers must match --decoder-layers') max_source_positions = args['model']['max_source_positions'] if args['model']['max_source_positions'] \ else DEFAULT_MAX_SOURCE_POSITIONS max_target_positions = args['model']['max_target_positions'] if args['model']['max_target_positions'] \ else DEFAULT_MAX_TARGET_POSITIONS def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens) if args['model']['encoder_embed']: pretrained_encoder_embed = load_pretrained_embedding_from_file( args['model']['encoder_embed_path'], task.source_dictionary, args['model']['encoder_embed_dim']) else: num_embeddings = len(task.source_dictionary) pretrained_encoder_embed = Embedding( num_embeddings, args['model']['encoder_embed_dim'], task.source_dictionary.pad()) if args['model']['share_all_embeddings']: # double check all parameters combinations are valid if task.source_dictionary != task.target_dictionary: raise ValueError( '--share-all-embeddings requires a joint dictionary') if args['model']['decoder_embed_path'] and ( args['model']['decoder_embed_path'] != args['model']['encoder_embed_path']): raise ValueError( '--share-all-embed not compatible with --decoder-embed-path' ) if args['model']['encoder_embed_dim'] != args['model'][ 'decoder_embed_dim']: raise ValueError( '--share-all-embeddings requires --encoder-embed-dim to ' 'match --decoder-embed-dim') pretrained_decoder_embed = pretrained_encoder_embed args['model']['share_decoder_input_output_embed'] = True else: # separate decoder input embeddings pretrained_decoder_embed = None if args['model']['decoder_embed']: pretrained_decoder_embed = load_pretrained_embedding_from_file( args['model']['decoder_embed'], task.target_dictionary, args['model']['decoder_embed_dim']) # one last double check of parameter combinations if args['model']['share_decoder_input_output_embed'] and ( args['model']['decoder_embed_dim'] != args['model']['decoder_out_embed_dim']): raise ValueError( '--share-decoder-input-output-embeddings requires ' '--decoder-embed-dim to match --decoder-out-embed-dim') if args['model']['encoder_freeze_embed']: pretrained_encoder_embed.weight.requires_grad = False if args['model']['decoder_freeze_embed']: pretrained_decoder_embed.weight.requires_grad = False encoder = NaryTreeLSTMEncoder( dictionary=task.source_dictionary, embed_dim=args['model']['encoder_embed_dim'], hidden_size=args['model']['encoder_hidden_size'], num_layers=args['model']['encoder_layers'], dropout_in=args['model']['encoder_dropout_in'], dropout_out=args['model']['encoder_dropout_out'], bidirectional=bool(args['model']['encoder_bidirectional']), left_pad=args['task']['left_pad_source'], pretrained_embed=pretrained_encoder_embed, max_source_positions=max_source_positions) decoder = LSTMDecoder( dictionary=task.target_dictionary, embed_dim=args['model']['decoder_embed_dim'], hidden_size=args['model']['decoder_hidden_size'], out_embed_dim=args['model']['decoder_out_embed_dim'], num_layers=args['model']['decoder_layers'], dropout_in=args['model']['decoder_dropout_in'], dropout_out=args['model']['decoder_dropout_out'], attention=args['model']['decoder_attention'], encoder_output_units=encoder.output_units, pretrained_embed=pretrained_decoder_embed, share_input_output_embed=args['model'] ['share_decoder_input_output_embed'], adaptive_softmax_cutoff=(args['model']['adaptive_softmax_cutoff'] if args['criterion'] == 'adaptive_loss' else None), max_target_positions=max_target_positions) return cls(encoder, decoder)
def build_model(cls, args, config, task): if args['model']['encoder_layers'] != args['model']['decoder_layers']: raise ValueError('--encoder-layers must match --decoder-layers') max_source_positions = args['model']['max_source_positions'] if args['model']['max_source_positions'] \ else DEFAULT_MAX_SOURCE_POSITIONS max_target_positions = args['model']['max_target_positions'] if args['model']['max_target_positions'] \ else DEFAULT_MAX_TARGET_POSITIONS def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens) # subtoken if args['model']['encoder_path_embed']: pretrained_encoder_path_embed = load_pretrained_embedding_from_file( args['model']['encoder_path_embed'], task.source_dictionary, args['model']['encoder_path_embed_dim']) else: num_embeddings = len(task.source_dictionary) pretrained_encoder_path_embed = Embedding( num_embeddings, args['model']['encoder_path_embed_dim'], padding_idx=task.source_dictionary.pad()) # type if args['model']['encoder_terminals_embed']: pretrained_encoder_terminals_embed = load_pretrained_embedding_from_file( args['model']['encoder_terminals_embed'], task.type_dict, args['model']['encoder_terminals_embed_dim']) else: num_embeddings = len(task.type_dict) pretrained_encoder_terminals_embed = Embedding( num_embeddings, args['model']['encoder_terminals_embed_dim'], padding_idx=task.type_dict.pad()) # decoder if args['model']['decoder_embed']: pretrained_decoder_embed = load_pretrained_embedding_from_file( args['model']['decoder_embed'], task.target_dictionary, args['model']['decoder_embed_dim']) else: num_embeddings = len(task.target_dictionary) pretrained_decoder_embed = Embedding( num_embeddings, args['model']['decoder_embed_dim'], padding_idx=task.target_dictionary.pad()) if args['model']['encoder_path_freeze_embed']: pretrained_encoder_path_embed.weight.requires_grad = False if args['model']['encoder_terminals_freeze_embed']: pretrained_encoder_terminals_embed.weight.requires_grad = False if args['model']['decoder_freeze_embed']: pretrained_decoder_embed.weight.requires_grad = False encoder = PathEncoder( dictionary=task.source_dictionary, node_dictionary=task.type_dict, embed_dim=args['model']['encoder_path_embed_dim'], type_embed_dim=args['model']['encoder_terminals_embed_dim'], hidden_size=args['model']['encoder_hidden_size'], decoder_hidden_size=args['model']['decoder_hidden_size'], num_layers=args['model']['encoder_layers'], dropout_in=args['model']['encoder_dropout_in'], dropout_out=args['model']['encoder_dropout_out'], bidirectional=bool(args['model']['encoder_bidirectional']), pretrained_embed=pretrained_encoder_path_embed, pretrained_terminals_embed=pretrained_encoder_terminals_embed, max_source_positions=max_source_positions) decoder = PathDecoder( dictionary=task.target_dictionary, embed_dim=args['model']['decoder_embed_dim'], hidden_size=args['model']['decoder_hidden_size'], out_embed_dim=args['model']['decoder_out_embed_dim'], num_layers=args['model']['decoder_layers'], dropout_in=args['model']['decoder_dropout_in'], dropout_out=args['model']['decoder_dropout_out'], attention=args['model']['decoder_attention'], encoder_output_units=encoder.output_units, pretrained_embed=pretrained_decoder_embed, share_input_output_embed=args['model'] ['share_decoder_input_output_embed'], adaptive_softmax_cutoff=(args['model']['adaptive_softmax_cutoff'] if args['criterion'] == 'adaptive_loss' else None), max_target_positions=max_target_positions) return cls(encoder, decoder)