def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_bi_lm_architecture(args) if not hasattr(args, 'max_source_positions'): args.max_source_positions = args.tokens_per_sample if not getattr(args, "max_target_positions", None): args.max_target_positions = args.tokens_per_sample if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, max_char_len=args.max_char_len, char_inputs=args.char_inputs, ) else: embed_tokens = Embedding(len(task.dictionary), args.decoder_embed_dim, task.dictionary.pad()) logger.info(args) decoder = BiTransformerDecoder(args, task.output_dictionary, embed_tokens) return BiTransformerLanguageModel(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if not hasattr(args, 'max_source_positions'): args.max_source_positions = args.tokens_per_sample if not hasattr(args, 'max_target_positions'): args.max_target_positions = args.tokens_per_sample if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) else: embed_tokens = Embedding(len(task.dictionary), args.decoder_input_dim, task.dictionary.pad()) decoder = TransformerDecoder(args, task.dictionary, embed_tokens, no_encoder_attn=True, final_norm=False) return TransformerLanguageModel(decoder)
def test_character_token_embedder(self): vocab = Dictionary() vocab.add_symbol('hello') vocab.add_symbol('there') embedder = CharacterTokenEmbedder(vocab, [(2, 16), (4, 32), (8, 64), (16, 2)], 64, 5, 2) test_sents = [['hello', 'unk', 'there'], ['there'], ['hello', 'there']] max_len = max(len(s) for s in test_sents) input = torch.LongTensor(len(test_sents), max_len + 2).fill_(vocab.pad()) for i in range(len(test_sents)): input[i][0] = vocab.eos() for j in range(len(test_sents[i])): input[i][j + 1] = vocab.index(test_sents[i][j]) input[i][j + 2] = vocab.eos() embs = embedder(input) assert embs.size() == (len(test_sents), max_len + 2, 5) self.assertAlmostEqual(embs[0][0], embs[1][0]) self.assertAlmostEqual(embs[0][0], embs[0][-1]) self.assertAlmostEqual(embs[0][1], embs[2][1]) self.assertAlmostEqual(embs[0][3], embs[1][1]) embs.sum().backward() assert embedder.char_embeddings.weight.grad is not None
def build_model_decoder(cls, args, dictionary, output_dictionary=None): if output_dictionary is None: output_dictionary = dictionary if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(dictionary), dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int)) else: embed_tokens = Embedding(len(dictionary), args.decoder_input_dim, dictionary.pad()) return BiTransformerDecoder(args, output_dictionary, embed_tokens, no_encoder_attn=True, final_norm=False)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if getattr(args, 'max_target_positions', None) is None: args.max_target_positions = getattr(args, 'tokens_per_sample', DEFAULT_MAX_TARGET_POSITIONS) if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.source_dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(task.source_dictionary), task.source_dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), ) else: embed_tokens = Embedding(len(task.source_dictionary), args.decoder_input_dim, task.source_dictionary.pad()) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.decoder_input_dim == args.decoder_output_dim decoder = TransformerDecoder( args, task.target_dictionary, embed_tokens, no_encoder_attn=True, ) return TransformerLanguageModel(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if not hasattr(args, 'max_source_positions'): args.max_source_positions = args.tokens_per_sample if not hasattr(args, 'max_target_positions'): args.max_target_positions = args.tokens_per_sample if args.character_embeddings: embed_tokens = CharacterTokenEmbedder(task.dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput(len(task.dictionary), task.dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int)) else: embed_tokens = Embedding(len(task.dictionary), args.decoder_input_dim, task.dictionary.pad()) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.decoder_input_dim == args.decoder_output_dim decoder = LightConvDecoder(args, task.output_dictionary, embed_tokens, no_encoder_attn=True, final_norm=False) return LightConvLanguageModel(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if hasattr(args, "decoder_layers_to_keep"): args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) if getattr(args, 'max_target_positions', None) is None: args.max_target_positions = getattr(args, 'tokens_per_sample', DEFAULT_MAX_TARGET_POSITIONS) if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.source_dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: print("Adaptive Input " + str(args.adaptive_input)) print("Adaptive Cutoff: " + str(args.adaptive_input_cutoff)) print("Vocab Size: " + str(len(task.source_dictionary.symbols))) embed_tokens = AdaptiveInput( len(task.source_dictionary), task.source_dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), args.quant_noise_pq, args.quant_noise_pq_block_size, ) else: embed_tokens = cls.build_embedding(args, task.source_dictionary, args.decoder_input_dim) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.decoder_input_dim == args.decoder_output_dim decoder = TransformerDecoder( args, task.target_dictionary, embed_tokens, no_encoder_attn=True, ) return cls(decoder)
def build_model_input(cls, args, dictionary): # make sure all arguments are present in older fairseq_ext args.context_embeddings = getattr(args, 'context_embeddings', False) args.context_embeddings_layers = getattr(args, 'context_embeddings_layers', [-1]) args.max_source_positions = args.tokens_per_sample args.max_target_positions = args.tokens_per_sample if args.context_embeddings: if args.context_embeddings_type == 'bert': embed_tokens = BERTEmbedder( args.context_embeddings_bert_model, layers=args.context_embeddings_layers) elif args.context_embeddings_type == 'transformers': embed_tokens = TransformerEmbedder( args.context_embeddings_bert_model, layers=args.context_embeddings_layers) else: raise NotImplementedError elif args.character_embeddings: embed_tokens = CharacterTokenEmbedder( dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(dictionary), dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int)) else: args.decoder_embed_pretrained = getattr( args, 'decoder_embed_pretrained', '') if args.decoder_embed_pretrained: embed_tokens = load_pretrained_embedding_from_file( args.decoder_embed_pretrained, dictionary, args.decoder_input_dim) else: embed_tokens = Embedding(len(dictionary), args.decoder_input_dim, dictionary.pad()) return embed_tokens
def build_model(cls, args, task): """Build a new model instance.""" if args.decoder_layers_to_keep: args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) if safe_getattr(args, "max_target_positions", None) is None: args.max_target_positions = safe_getattr( args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS ) if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.source_dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(task.source_dictionary), task.source_dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), args.quant_noise_pq, args.quant_noise_pq_block_size, ) else: embed_tokens = cls.build_embedding( args, task.source_dictionary, args.decoder_input_dim ) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert ( args.adaptive_softmax_cutoff == args.adaptive_input_cutoff ), "{} != {}".format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff ) assert args.decoder_input_dim == args.decoder_output_dim decoder = TransformerDecoder( args, task.target_dictionary, embed_tokens, no_encoder_attn=True ) return cls(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if hasattr( args, 'no_tie_adaptive_proj') and args.no_tie_adaptive_proj == False: # backward compatibility args.tie_adaptive_proj = True if not hasattr(args, 'max_positions'): args.max_positions = args.tokens_per_sample if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.dictionary, eval(args.character_filters), args.character_embedding_dim, args.embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(task.dictionary), task.dictionary.pad(), args.input_dim, args.adaptive_input_factor, args.embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int)) else: embed_tokens = nn.Embedding(len(task.dictionary), args.embed_dim, task.dictionary.pad()) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.input_dim == args.output_dim decoder = SpanTransformerDecoder(args, task.dictionary, embed_tokens, final_norm=False) return SpanTransformerAutoregressive(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure that all args are properly defaulted (in case there are any new ones) set_default_args(args) if args.character_embeddings: char_embed = CharacterTokenEmbedder( task.source_dictionary, eval(args.character_filters), args.character_embedding_dim, 160, args.char_embedder_highway_layers) args.char_embed = char_embed if args.encoder_layers != args.decoder_layers: raise ValueError("--encoder-layers must match --decoder-layers") def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens) args.pretrained_word_embed = load_pretrained_embedding_from_file( args.word_embed_path, task.source_dictionary, args.encoder_embed_dim) args.pretrained_word_embed.requires_grad = False if args.encoder_embed_path: args.pretrained_encoder_embed = load_pretrained_embedding_from_file( args.encoder_embed_path, task.source_dictionary, args.encoder_embed_dim) else: num_embeddings = len(task.source_dictionary) args.pretrained_encoder_embed = Embedding( num_embeddings, args.encoder_embed_dim, task.source_dictionary.pad()) if args.share_all_embeddings: # double check all parameters combinations are valid if task.source_dictionary != task.target_dictionary: raise ValueError( "--share-all-embeddings requires a joint dictionary") if args.decoder_embed_path and (args.decoder_embed_path != args.encoder_embed_path): raise ValueError( "--share-all-embed not compatible with --decoder-embed-path" ) if args.encoder_embed_dim != args.decoder_embed_dim: raise ValueError( "--share-all-embeddings requires --encoder-embed-dim to " "match --decoder-embed-dim") args.pretrained_decoder_embed = args.pretrained_encoder_embed args.share_decoder_input_output_embed = True else: # separate decoder input embeddings args.pretrained_decoder_embed = None if args.decoder_embed_path: args.pretrained_decoder_embed = load_pretrained_embedding_from_file( args.decoder_embed_path, task.target_dictionary, args.decoder_embed_dim, ) # one last double check of parameter combinations if args.share_decoder_input_output_embed and ( args.decoder_embed_dim != args.decoder_out_embed_dim): raise ValueError( "--share-decoder-input-output-embeddings requires " "--decoder-embed-dim to match --decoder-out-embed-dim") if args.encoder_freeze_embed: args.pretrained_encoder_embed.weight.requires_grad = False if args.decoder_freeze_embed: args.pretrained_decoder_embed.weight.requires_grad = False args.source_dictionary = task.source_dictionary args.target_dictionary = task.target_dictionary encoder = cls.build_encoder(task, args, task.source_dictionary) decoder = cls.build_decoder( task, args, task.target_dictionary, encoder, ) return cls(args, encoder, decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if args.decoder_layers_to_keep: args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) if getattr(args, "max_target_positions", None) is None: args.max_target_positions = getattr(args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS) if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.source_dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(task.source_dictionary), task.source_dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), args.quant_noise_pq, args.quant_noise_pq_block_size, ) else: embed_tokens = cls.build_embedding(args, task.source_dictionary, args.decoder_input_dim) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert (args.adaptive_softmax_cutoff == args.adaptive_input_cutoff ), "{} != {}".format(args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.decoder_input_dim == args.decoder_output_dim decoder = TransformerDecoder(args, task.target_dictionary, embed_tokens, no_encoder_attn=True) if getattr(args, "lm_path", None): print('load Transformer_LM from {}'.format(args.lm_path)) state = checkpoint_utils.load_checkpoint_to_cpu(args.lm_path) lm_args = state["args"] lm_args.data = args.data assert getattr(lm_args, "lm_path", None) is None task = tasks.setup_task(lm_args) decoder = task.build_model(lm_args) print('restore Transformer_LM from {}'.format(args.lm_path)) decoder.load_state_dict(state["model"], strict=True) decoder.dim_output = len(task.dictionary) return cls(decoder)
def __init__( self, dictionary, targets, character_embeddings, embed_dim=512, hidden_size=512, out_embed_dim=512, num_layers=2, dropout_in=0.1, dropout_out=0.1, encoder_output_units=512, adaptive_softmax_cutoff=None, bidirectional=True, memory_dim=4096, memory_clip_value=3.0, state_clip_value=3.0, residual=True, character_filters=None, character_embedding_dim=16, char_embedder_highway_layers=0, ): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.directions = 2 if bidirectional else 1 self.memory_dim = memory_dim self.memory_clip_value = memory_clip_value self.state_clip_value = state_clip_value self.residual = residual self.targets = targets self.num_layers = num_layers if character_embeddings: self.embed_tokens = CharacterTokenEmbedder( dictionary, eval(character_filters), character_embedding_dim, embed_dim, char_embedder_highway_layers) else: self.embed_tokens = Embedding(len(dictionary), embed_dim, dictionary.pad()) assert num_layers > 0 for layer_index in range(self.num_layers): forward_layer = LstmCellWithProjection( input_size=embed_dim if layer_index == 0 else hidden_size, hidden_size=hidden_size, cell_size=memory_dim, go_forward=True, recurrent_dropout_probability=dropout_out, memory_cell_clip_value=memory_clip_value, state_projection_clip_value=state_clip_value, is_training=self.training) self.add_module('forward_layer_{}'.format(layer_index), forward_layer) if bidirectional: backward_layer = LstmCellWithProjection( input_size=embed_dim if layer_index == 0 else hidden_size, hidden_size=hidden_size, cell_size=memory_dim, go_forward=False, recurrent_dropout_probability=dropout_out, memory_cell_clip_value=memory_clip_value, state_projection_clip_value=state_clip_value, is_training=self.training) self.add_module('backward_layer_{}'.format(layer_index), backward_layer) self.adaptive_softmax = self.additional_fc = self.fc_out = None if adaptive_softmax_cutoff is not None: self.adaptive_softmax = AdaptiveSoftmax(len(dictionary), out_embed_dim, adaptive_softmax_cutoff, dropout=dropout_out) else: if hidden_size != out_embed_dim: self.additional_fc = Linear(hidden_size, out_embed_dim, dropout=dropout_out) self.fc_out = Linear(out_embed_dim, len(dictionary), dropout=dropout_out) self.adaptive_softmax = None
def build_model_input(cls, args, dictionary): # make sure all arguments are present in older fairseq_ext args.context_embeddings = getattr(args, 'context_embeddings', False) args.max_source_positions = args.tokens_per_sample args.max_target_positions = args.tokens_per_sample if args.context_embeddings: if args.context_embeddings_type == 'qbert': embed_tokens = QBERTEmbedder.from_args( args, {"dictionary": dictionary}) elif args.context_embeddings_type == 'bert': assert not args.context_embeddings_use_embeddings embed_tokens = BERTEmbedder(args.context_embeddings_bert_model, False) elif args.context_embeddings_type == 'elmo': embed_tokens = ELMOEmbedder( args.context_embeddings_elmo_options, args.context_embeddings_elmo_weights, False) elif args.context_embeddings_type == 'flair': embed_tokens = FlairEmbedder( args.context_embeddings_flair_forward, args.context_embeddings_flair_backward, args.context_embeddings_flair_embeddings, False) else: raise NotImplementedError elif args.character_embeddings: embed_tokens = CharacterTokenEmbedder( dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(dictionary), dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int)) else: def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): from fairseq import utils num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens) if args.decoder_embed_pretrained: embed_tokens = load_pretrained_embedding_from_file( args.decoder_embed_pretrained, dictionary, args.decoder_input_dim) if getattr(args, 'decoder_embed_pretrained', False): for par in embed_tokens.parameters(): par.requires_grad = False else: embed_tokens = Embedding(len(dictionary), args.decoder_input_dim, dictionary.pad()) return embed_tokens