def build_model_decoder(cls, args, dictionary, output_dictionary=None): if output_dictionary is None: output_dictionary = dictionary if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(dictionary), dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int)) else: embed_tokens = Embedding(len(dictionary), args.decoder_input_dim, dictionary.pad()) return BiTransformerDecoder(args, output_dictionary, embed_tokens, no_encoder_attn=True, final_norm=False)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if getattr(args, 'max_target_positions', None) is None: args.max_target_positions = getattr(args, 'tokens_per_sample', DEFAULT_MAX_TARGET_POSITIONS) if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.source_dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(task.source_dictionary), task.source_dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), ) else: embed_tokens = Embedding(len(task.source_dictionary), args.decoder_input_dim, task.source_dictionary.pad()) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.decoder_input_dim == args.decoder_output_dim decoder = TransformerDecoder( args, task.target_dictionary, embed_tokens, no_encoder_attn=True, ) return TransformerLanguageModel(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if not hasattr(args, 'max_source_positions'): args.max_source_positions = args.tokens_per_sample if not hasattr(args, 'max_target_positions'): args.max_target_positions = args.tokens_per_sample if args.character_embeddings: embed_tokens = CharacterTokenEmbedder(task.dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput(len(task.dictionary), task.dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int)) else: embed_tokens = Embedding(len(task.dictionary), args.decoder_input_dim, task.dictionary.pad()) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.decoder_input_dim == args.decoder_output_dim decoder = LightConvDecoder(args, task.output_dictionary, embed_tokens, no_encoder_attn=True, final_norm=False) return LightConvLanguageModel(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if hasattr(args, "decoder_layers_to_keep"): args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) if getattr(args, 'max_target_positions', None) is None: args.max_target_positions = getattr(args, 'tokens_per_sample', DEFAULT_MAX_TARGET_POSITIONS) if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.source_dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: print("Adaptive Input " + str(args.adaptive_input)) print("Adaptive Cutoff: " + str(args.adaptive_input_cutoff)) print("Vocab Size: " + str(len(task.source_dictionary.symbols))) embed_tokens = AdaptiveInput( len(task.source_dictionary), task.source_dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), args.quant_noise_pq, args.quant_noise_pq_block_size, ) else: embed_tokens = cls.build_embedding(args, task.source_dictionary, args.decoder_input_dim) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.decoder_input_dim == args.decoder_output_dim decoder = TransformerDecoder( args, task.target_dictionary, embed_tokens, no_encoder_attn=True, ) return cls(decoder)
def build_model_input(cls, args, dictionary): # make sure all arguments are present in older fairseq_ext args.context_embeddings = getattr(args, 'context_embeddings', False) args.context_embeddings_layers = getattr(args, 'context_embeddings_layers', [-1]) args.max_source_positions = args.tokens_per_sample args.max_target_positions = args.tokens_per_sample if args.context_embeddings: if args.context_embeddings_type == 'bert': embed_tokens = BERTEmbedder( args.context_embeddings_bert_model, layers=args.context_embeddings_layers) elif args.context_embeddings_type == 'transformers': embed_tokens = TransformerEmbedder( args.context_embeddings_bert_model, layers=args.context_embeddings_layers) else: raise NotImplementedError elif args.character_embeddings: embed_tokens = CharacterTokenEmbedder( dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(dictionary), dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int)) else: args.decoder_embed_pretrained = getattr( args, 'decoder_embed_pretrained', '') if args.decoder_embed_pretrained: embed_tokens = load_pretrained_embedding_from_file( args.decoder_embed_pretrained, dictionary, args.decoder_input_dim) else: embed_tokens = Embedding(len(dictionary), args.decoder_input_dim, dictionary.pad()) return embed_tokens
def build_model(cls, args, task): """Build a new model instance.""" if args.decoder_layers_to_keep: args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) if safe_getattr(args, "max_target_positions", None) is None: args.max_target_positions = safe_getattr( args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS ) if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.source_dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(task.source_dictionary), task.source_dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), args.quant_noise_pq, args.quant_noise_pq_block_size, ) else: embed_tokens = cls.build_embedding( args, task.source_dictionary, args.decoder_input_dim ) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert ( args.adaptive_softmax_cutoff == args.adaptive_input_cutoff ), "{} != {}".format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff ) assert args.decoder_input_dim == args.decoder_output_dim decoder = TransformerDecoder( args, task.target_dictionary, embed_tokens, no_encoder_attn=True ) return cls(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if args.decoder_layers_to_keep: args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) if getattr(args, 'max_target_positions', None) is None: args.max_target_positions = getattr(args, 'tokens_per_sample', DEFAULT_MAX_TARGET_POSITIONS) if args.adaptive_input: map_layer = AdaptiveInput( len(task.source_dictionary), task.source_dictionary.pad(), args.delight_emb_map_dim, args.adaptive_input_factor, args.delight_emb_map_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), no_scale_emb=args.no_scale_embedding) else: map_layer = get_embedding_layer( num_embeddings=len(task.source_dictionary), embedding_dim=args.delight_emb_map_dim, padding_idx=task.source_dictionary.pad()) embed_tokens = DExTraEmb(args, map_layer=map_layer) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) decoder = DeLighTTransformerDecoder( args, task.target_dictionary, embed_tokens, no_encoder_attn=True, ) # print macs and params layer-wise if args.print_stats and is_master(args): cls.comptue_stats(args, decoder) return DeLighTTransformerLanguageModel(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if hasattr( args, 'no_tie_adaptive_proj') and args.no_tie_adaptive_proj == False: # backward compatibility args.tie_adaptive_proj = True if not hasattr(args, 'max_positions'): args.max_positions = args.tokens_per_sample if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.dictionary, eval(args.character_filters), args.character_embedding_dim, args.embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(task.dictionary), task.dictionary.pad(), args.input_dim, args.adaptive_input_factor, args.embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int)) else: embed_tokens = nn.Embedding(len(task.dictionary), args.embed_dim, task.dictionary.pad()) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.input_dim == args.output_dim decoder = SpanTransformerDecoder(args, task.dictionary, embed_tokens, final_norm=False) return SpanTransformerAutoregressive(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if args.decoder_layers_to_keep: args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) if getattr(args, "max_target_positions", None) is None: args.max_target_positions = getattr(args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS) if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.source_dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(task.source_dictionary), task.source_dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), args.quant_noise_pq, args.quant_noise_pq_block_size, ) else: embed_tokens = cls.build_embedding(args, task.source_dictionary, args.decoder_input_dim) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert (args.adaptive_softmax_cutoff == args.adaptive_input_cutoff ), "{} != {}".format(args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.decoder_input_dim == args.decoder_output_dim decoder = TransformerDecoder(args, task.target_dictionary, embed_tokens, no_encoder_attn=True) if getattr(args, "lm_path", None): print('load Transformer_LM from {}'.format(args.lm_path)) state = checkpoint_utils.load_checkpoint_to_cpu(args.lm_path) lm_args = state["args"] lm_args.data = args.data assert getattr(lm_args, "lm_path", None) is None task = tasks.setup_task(lm_args) decoder = task.build_model(lm_args) print('restore Transformer_LM from {}'.format(args.lm_path)) decoder.load_state_dict(state["model"], strict=True) decoder.dim_output = len(task.dictionary) return cls(decoder)
def build_model_input(cls, args, dictionary): # make sure all arguments are present in older fairseq_ext args.context_embeddings = getattr(args, 'context_embeddings', False) args.max_source_positions = args.tokens_per_sample args.max_target_positions = args.tokens_per_sample if args.context_embeddings: if args.context_embeddings_type == 'qbert': embed_tokens = QBERTEmbedder.from_args( args, {"dictionary": dictionary}) elif args.context_embeddings_type == 'bert': assert not args.context_embeddings_use_embeddings embed_tokens = BERTEmbedder(args.context_embeddings_bert_model, False) elif args.context_embeddings_type == 'elmo': embed_tokens = ELMOEmbedder( args.context_embeddings_elmo_options, args.context_embeddings_elmo_weights, False) elif args.context_embeddings_type == 'flair': embed_tokens = FlairEmbedder( args.context_embeddings_flair_forward, args.context_embeddings_flair_backward, args.context_embeddings_flair_embeddings, False) else: raise NotImplementedError elif args.character_embeddings: embed_tokens = CharacterTokenEmbedder( dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(dictionary), dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int)) else: def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): from fairseq import utils num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens) if args.decoder_embed_pretrained: embed_tokens = load_pretrained_embedding_from_file( args.decoder_embed_pretrained, dictionary, args.decoder_input_dim) if getattr(args, 'decoder_embed_pretrained', False): for par in embed_tokens.parameters(): par.requires_grad = False else: embed_tokens = Embedding(len(dictionary), args.decoder_input_dim, dictionary.pad()) return embed_tokens