def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens)
def build_embedding(dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def __init__( self, dictionary, embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, encoder_output_units=0, attn_type=None, attn_dim=0, need_attn=False, residual=False, pretrained_embed=None, share_input_output_embed=False, adaptive_softmax_cutoff=None, ): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed if attn_type is None or attn_type.lower() == 'none': # no attention, no encoder output needed (language model case) need_attn = False encoder_output_units = 0 self.need_attn = need_attn self.residual = residual self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) else: self.embed_tokens = pretrained_embed self.encoder_output_units = encoder_output_units self.layers = nn.ModuleList([ LSTMCell( input_size=encoder_output_units + (embed_dim if layer == 0 else hidden_size), hidden_size=hidden_size, ) for layer in range(num_layers) ]) if attn_type is None or attn_type.lower() == 'none': self.attention = None elif attn_type.lower() == 'bahdanau': self.attention = speech_attention.BahdanauAttention( hidden_size, encoder_output_units, attn_dim, ) elif attn_type.lower() == 'luong': self.attention = speech_attention.LuongAttention( hidden_size, encoder_output_units, ) else: raise ValueError('unrecognized attention type.') if hidden_size + encoder_output_units != out_embed_dim: self.additional_fc = Linear(hidden_size + encoder_output_units, out_embed_dim) if adaptive_softmax_cutoff is not None: # setting adaptive_softmax dropout to dropout_out for now but can be redefined self.adaptive_softmax = AdaptiveSoftmax(num_embeddings, hidden_size, adaptive_softmax_cutoff, dropout=dropout_out) elif not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
def __init__( self, dictionary: Dictionary, embed_dim: int = 512, hidden_size: int = 512, out_embed_dim: int = 512, num_layers: int = 1, dropout_in: float = 0.1, dropout_out: float = 0.1, attention: bool = True, encoder_embed_dim: int = 512, encoder_output_units: int = 512, pretrained_embed: Optional[nn.Embedding] = None, share_input_output_embed: bool = False, adaptive_softmax_cutoff: Optional[int] = None, ): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed self.need_attn = True self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) else: self.embed_tokens = pretrained_embed self.encoder_output_units = encoder_output_units self.layers = nn.ModuleList([ LSTMCell( input_size=hidden_size + embed_dim if layer == 0 else hidden_size, hidden_size=hidden_size, ) for layer in range(num_layers) ]) self.attention = AttentionLayer(hidden_size, encoder_output_units, hidden_size) if attention else None if hidden_size != out_embed_dim: self.additional_fc = Linear(hidden_size, out_embed_dim) if adaptive_softmax_cutoff is not None: # setting adaptive_softmax dropout to dropout_out for now but can be redefined self.adaptive_softmax = AdaptiveSoftmax(num_embeddings, embed_dim, adaptive_softmax_cutoff, dropout=dropout_out) elif not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
def __init__(self, dictionary, lang_dictionary, embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention=True, encoder_output_units=512, pretrained_embed=None, share_input_output_embed=False, adaptive_softmax_cutoff=None, lang_embedding_size=32): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed self.lang_embedding_size = lang_embedding_size self.lang_dictionary = lang_dictionary self.embed_langs = nn.Embedding(len(lang_dictionary), lang_embedding_size) self.need_attn = False self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) self.encoder_output_units = encoder_output_units self.encoder_hidden_proj = Linear(encoder_output_units, hidden_size) self.encoder_cell_proj = Linear(encoder_output_units, hidden_size) input_size = hidden_size + embed_dim + lang_embedding_size + encoder_output_units self.layers = nn.ModuleList([ LSTMCell( input_size=input_size if layer == 0 else hidden_size, hidden_size=hidden_size, ) for layer in range(num_layers) ]) self.attention = None if hidden_size != out_embed_dim: self.additional_fc = Linear(hidden_size, out_embed_dim) if not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
def __init__(self, dictionary, embed_dim=320, hidden_size=512, num_layers=1, bidirectional=False, left_pad=True, padding_value=0., dropout_in=0.1, dropout_out=0.1): super().__init__(dictionary) self.num_layers = num_layers self.dropout_in = dropout_in self.dropout_out = dropout_out self.bidirectional = bidirectional self.hidden_size = hidden_size num_embeddings = len(dictionary) self.padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx) self.lstm = nn.LSTM( input_size=embed_dim, hidden_size=hidden_size, num_layers=num_layers, dropout=self.dropout_out if num_layers > 1 else 0., bidirectional=bidirectional, ) self.left_pad = left_pad self.padding_value = padding_value self.output_units = hidden_size if bidirectional: self.output_units *= 2
def __init__( self, dictionary, embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention=True, encoder_embed_dim=512, encoder_output_units=512, pretrained_embed=None, share_input_output_embed=False, adaptive_softmax_cutoff=None, ): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed self.need_attn = True self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) else: self.embed_tokens = pretrained_embed self.encoder_output_units = encoder_output_units assert encoder_output_units == hidden_size, \ 'encoder_output_units ({}) != hidden_size ({})'.format(encoder_output_units, hidden_size) # TODO another Linear layer if not equal self.layers = nn.ModuleList([ LSTMCell( input_size=encoder_output_units + embed_dim if layer == 0 else hidden_size, hidden_size=hidden_size, ) for layer in range(num_layers) ]) self.attention_1 = AttentionLayer(encoder_output_units, hidden_size) if attention else None self.attention_2 = AttentionLayer(encoder_output_units, hidden_size) if attention else None # self.attention_combine_fc = Linear(2 * hidden_size, hidden_size) if hidden_size != out_embed_dim: self.additional_fc = Linear(hidden_size, out_embed_dim) if adaptive_softmax_cutoff is not None: # setting adaptive_softmax dropout to dropout_out for now but can be redefined self.adaptive_softmax = AdaptiveSoftmax(num_embeddings, embed_dim, adaptive_softmax_cutoff, dropout=dropout_out) elif not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
def __init__( self, dictionary, embed_dim=512, hidden_size=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, bidirectional=False, left_pad=True, pretrained_embed=None, padding_value=0., ): super().__init__(dictionary) self.num_layers = num_layers self.dropout_in = dropout_in self.dropout_out = dropout_out self.bidirectional = bidirectional self.hidden_size = hidden_size num_embeddings1 = len(dictionary[0]) num_embeddings2 = len(dictionary[1]) self.padding_idx_1 = dictionary[0].pad() self.padding_idx_2 = dictionary[1].pad() if pretrained_embed is None: self.embed_tokens_1 = Embedding(num_embeddings1, embed_dim, self.padding_idx_1) if bidirectional: self.embed_tokens_2 = Embedding(num_embeddings2, 2 * embed_dim, self.padding_idx_2) else: self.embed_tokens_2 = Embedding(num_embeddings2, embed_dim, self.padding_idx_2) else: self.embed_tokens_1, self.embed_tokens_2 = pretrained_embed self.lstm1 = LSTM( input_size=embed_dim, hidden_size=hidden_size, num_layers=num_layers, dropout=self.dropout_out if num_layers > 1 else 0., bidirectional=bidirectional, ) # self.lstm2 = LSTM( # input_size=embed_dim, # hidden_size=hidden_size, # num_layers=num_layers, # dropout=self.dropout_out if num_layers > 1 else 0., # bidirectional=bidirectional, # ) if self.bidirectional: self.fconv2 = FConvEncoder(dictionary[1], 2 * embed_dim, convolutions=[(512, 3)] * 15, dropout=dropout_in, left_pad=left_pad) else: self.fconv2 = FConvEncoder(dictionary[1], embed_dim, convolutions=[(512, 3)] * 15, dropout=dropout_in, left_pad=left_pad) self.fconv2.num_attention_layers = 1 self.left_pad = left_pad self.padding_value = padding_value self.output_units = hidden_size if bidirectional: self.output_units *= 2
def build_model(cls, args, task): """Build a new model instance.""" # make sure that all args are properly defaulted (in case there are any new ones) base_architecture(args) if args.encoder_layers != args.decoder_layers: raise ValueError('--encoder-layers must match --decoder-layers') def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens) if args.encoder_embed_path: pretrained_encoder_embed = load_pretrained_embedding_from_file( args.encoder_embed_path, task.source_dictionary, args.encoder_embed_dim) else: num_embeddings_1 = len(task.source_dictionary[0]) num_embeddings_2 = len(task.source_dictionary[1]) if args.encoder_bidirectional: pretrained_encoder_embed = [ Embedding(num_embeddings_1, args.encoder_embed_dim, task.source_dictionary[0].pad()), Embedding(num_embeddings_2, 2 * args.encoder_embed_dim, task.source_dictionary[1].pad()), ] else: pretrained_encoder_embed = [ Embedding(num_embeddings_1, args.encoder_embed_dim, task.source_dictionary[0].pad()), Embedding(num_embeddings_2, args.encoder_embed_dim, task.source_dictionary[1].pad()), ] if args.share_all_embeddings: # double check all parameters combinations are valid if task.source_dictionary != task.target_dictionary: raise ValueError( '--share-all-embeddings requires a joint dictionary') if args.decoder_embed_path and (args.decoder_embed_path != args.encoder_embed_path): raise ValueError( '--share-all-embed not compatible with --decoder-embed-path' ) if args.encoder_embed_dim != args.decoder_embed_dim: raise ValueError( '--share-all-embeddings requires --encoder-embed-dim to ' 'match --decoder-embed-dim') pretrained_decoder_embed = pretrained_encoder_embed args.share_decoder_input_output_embed = True else: # separate decoder input embeddings pretrained_decoder_embed = None if args.decoder_embed_path: pretrained_decoder_embed = load_pretrained_embedding_from_file( args.decoder_embed_path, task.target_dictionary, args.decoder_embed_dim) # one last double check of parameter combinations if args.share_decoder_input_output_embed and ( args.decoder_embed_dim != args.decoder_out_embed_dim): raise ValueError( '--share-decoder-input-output-embeddings requires ' '--decoder-embed-dim to match --decoder-out-embed-dim') encoder = MultiSourceLSTMEncoder( dictionary=task.source_dictionary, embed_dim=args.encoder_embed_dim, hidden_size=args.encoder_hidden_size, num_layers=args.encoder_layers, dropout_in=args.encoder_dropout_in, dropout_out=args.encoder_dropout_out, bidirectional=args.encoder_bidirectional, pretrained_embed=pretrained_encoder_embed, ) decoder = MultiSourceLSTMDecoder( dictionary=task.target_dictionary, embed_dim=args.decoder_embed_dim, hidden_size=args.decoder_hidden_size, out_embed_dim=args.decoder_out_embed_dim, num_layers=args.decoder_layers, dropout_in=args.decoder_dropout_in, dropout_out=args.decoder_dropout_out, attention=options.eval_bool(args.decoder_attention), encoder_embed_dim=args.encoder_embed_dim, encoder_output_units=encoder.output_units, pretrained_embed=pretrained_decoder_embed, share_input_output_embed=args.share_decoder_input_output_embed, adaptive_softmax_cutoff=(options.eval_str_list( args.adaptive_softmax_cutoff, type=int) if args.criterion == 'adaptive_loss' else None), ) return cls(encoder, decoder)
def __init__( self, dictionary, embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, encoder_output_units=0, attn_type=None, attn_dim=0, need_attn=False, residual=False, pretrained_embed=None, share_input_output_embed=False, adaptive_softmax_cutoff=None, max_target_positions=DEFAULT_MAX_TARGET_POSITIONS, scheduled_sampling_rate_scheduler=None, ): super().__init__(dictionary) self.dropout_in_module = FairseqDropout( dropout_in * 1.0, module_name=self.__class__.__name__ ) self.dropout_out_module = FairseqDropout( dropout_out * 1.0, module_name=self.__class__.__name__ ) self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed if attn_type is None or str(attn_type).lower() == "none": # no attention, no encoder output needed (language model case) need_attn = False encoder_output_units = 0 self.need_attn = need_attn self.residual = residual self.max_target_positions = max_target_positions self.num_layers = num_layers self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) else: self.embed_tokens = pretrained_embed self.encoder_output_units = encoder_output_units self.layers = nn.ModuleList( [ LSTMCell( input_size=encoder_output_units + (embed_dim if layer == 0 else hidden_size), hidden_size=hidden_size, ) for layer in range(num_layers) ] ) if attn_type is None or str(attn_type).lower() == "none": self.attention = None elif str(attn_type).lower() == "bahdanau": self.attention = speech_attention.BahdanauAttention( hidden_size, encoder_output_units, attn_dim, ) elif str(attn_type).lower() == "luong": self.attention = speech_attention.LuongAttention( hidden_size, encoder_output_units, ) else: raise ValueError("unrecognized attention type.") if hidden_size + encoder_output_units != out_embed_dim: self.additional_fc = Linear( hidden_size + encoder_output_units, out_embed_dim ) if adaptive_softmax_cutoff is not None: # setting adaptive_softmax dropout to dropout_out for now but can be redefined self.adaptive_softmax = AdaptiveSoftmax( num_embeddings, hidden_size, adaptive_softmax_cutoff, dropout=dropout_out, ) elif not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out) self.scheduled_sampling_rate_scheduler = scheduled_sampling_rate_scheduler
def __init__(self, dictionary, embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention=True, encoder_output_units=512, pretrained_embed=None, share_input_output_embed=False, adaptive_softmax_cutoff=None, use_scratchpad=False, residual=False): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed self.need_attn = True self.use_scratchpad = use_scratchpad self.residual = residual self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) else: self.embed_tokens = pretrained_embed self.encoder_output_units = encoder_output_units if encoder_output_units != hidden_size: self.encoder_hidden_proj = Linear(encoder_output_units, hidden_size) self.encoder_cell_proj = Linear(encoder_output_units, hidden_size) else: self.encoder_hidden_proj = self.encoder_cell_proj = None self.layers = nn.ModuleList([ LSTMCell( input_size=hidden_size + embed_dim if layer == 0 else hidden_size, hidden_size=hidden_size, ) for layer in range(num_layers) ]) if attention: # TODO make bias configurable self.attention = AttentionLayer(hidden_size, encoder_output_units, hidden_size, bias=False) else: self.attention = None if hidden_size != out_embed_dim: self.additional_fc = Linear(hidden_size, out_embed_dim) if adaptive_softmax_cutoff is not None: # setting adaptive_softmax dropout to dropout_out for now but can be redefined self.adaptive_softmax = AdaptiveSoftmax(num_embeddings, embed_dim, adaptive_softmax_cutoff, dropout=dropout_out) elif not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out) #EDITED if self.use_scratchpad: self.attentive_writer = AttentiveWriter(hidden_size, encoder_output_units, encoder_output_units)