def __init__( self, cfg: Wav2BartChrConfig, dictionary=None, embed_tokens=None, no_encoder_attn=False, ): super().__init__(dictionary) self.cfg = cfg # bart = torch.hub.load('pytorch/fairseq', 'bart.base') from fairseq.models.bart import BARTModel if os.path.isfile(os.path.join(cfg.bart_path, 'model.pt')): print('loading bart from cfg path') bart = BARTModel.from_pretrained(cfg.bart_path, checkpoint_file='model.pt') else: print('loading bart from relative path') bart = BARTModel.from_pretrained('models/bart.base', checkpoint_file='model.pt') bart_decoder = bart.model.decoder bart_dictionary_size = len(bart_decoder.dictionary) self.decoder = TransformerDecoder(bart_decoder.args, bart_decoder.dictionary, bart_decoder.embed_tokens) self.decoder.load_state_dict(bart_decoder.state_dict()) # self.output_embed_dim = cfg.decoder_embed_dim ################## Dirty hack to alter output embedding layer of the decoder self.decoder.share_input_output_embed = False self.output_projection = nn.Linear( bart_dictionary_size, len(dictionary), bias=False ) nn.init.normal_( self.output_projection.weight, mean=0, std=bart_dictionary_size ** -0.5 )
def __init__( self, cfg: Wav2BartPoolConfig, dictionary=None, embed_tokens=None, no_encoder_attn=False, ): super().__init__(dictionary) self.cfg = cfg # bart = torch.hub.load('pytorch/fairseq', 'bart.base') from fairseq.models.bart import BARTModel if os.path.isfile(os.path.join(cfg.bart_path, 'model.pt')): print('loading bart from cfg path') bart = BARTModel.from_pretrained(cfg.bart_path, checkpoint_file='model.pt') else: print('loading bart from relative path') bart = BARTModel.from_pretrained('models/bart.base', checkpoint_file='model.pt') bart_decoder = bart.model.decoder self.decoder = TransformerDecoder(bart_decoder.args, bart_decoder.dictionary, bart_decoder.embed_tokens) self.decoder.load_state_dict(bart_decoder.state_dict())
def from_roberta(roberta_enc: roberta.RobertaModel, args, dictionary): encoder = roberta_enc.encoder.sentence_encoder vocab_size, embed_dim = encoder.embed_tokens.weight.shape if args.share_all_embeddings: lm_head = roberta_enc.encoder.lm_head assert encoder.embed_tokens.weight is lm_head.weight, ( "Can't use --share-all-embeddings with a model " "that was pretraiend with --untie-weights-roberta_enc") else: lm_head = roberta.RobertaLMHead(embed_dim, vocab_size, roberta_enc.args.activation_fn) dec_embs = nn.Embedding(vocab_size, embed_dim, dictionary.pad()) if args.share_all_embeddings or args.share_decoder_input_output_embed: # Note: I wasn't able to use Embedding _weight parameter to achive this sharing. dec_embs.weight = lm_head.weight decoder = TransformerDecoder( RobertaEncDecModel.read_args_from_roberta(roberta_enc.args), dictionary, dec_embs, no_encoder_attn=False, output_projection=lm_head, ) if getattr(args, "pretrained_decoder", False): decoder_dict = encoder.state_dict() # TODO: hide setting "encoder_attn" layers behind a flag. for k, w in list(decoder_dict.items()): if ".self_attn" in k: k_enc_attn = k.replace(".self_attn", ".encoder_attn") decoder_dict[k_enc_attn] = w.detach().clone() for k, w in lm_head.state_dict().items(): decoder_dict["output_projection." + k] = w missing_keys, unexpected_keys = decoder.load_state_dict( decoder_dict, strict=False) # missing_keys = [m for m in missing_keys if ".encoder_attn" not in m] assert not missing_keys and not unexpected_keys, ( "Failed to load state dict. " f"Missing keys: {missing_keys}. " f"Unexpected keys: {unexpected_keys}.") if args.share_all_embeddings: assert decoder.output_projection.weight is decoder.embed_tokens.weight assert encoder.embed_tokens.weight is decoder.embed_tokens.weight elif args.share_decoder_input_output_embed: assert decoder.output_projection.weight is decoder.embed_tokens.weight assert encoder.embed_tokens.weight is not decoder.embed_tokens.weight else: assert decoder.output_projection.weight is not decoder.embed_tokens.weight assert encoder.embed_tokens.weight is not decoder.embed_tokens.weight return RobertaEncDecModel(encoder, decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_architecture(args) if not hasattr(args, 'max_source_positions'): args.max_source_positions = 1024 if not hasattr(args, 'max_target_positions'): args.max_target_positions = 1024 src_dict, tgt_dict = task.source_dictionary, task.target_dictionary def build_embedding(dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb if args.share_all_embeddings: if src_dict != tgt_dict: raise RuntimeError( '--share-all-embeddings requires a joined dictionary') if args.encoder_embed_dim != args.decoder_embed_dim: raise RuntimeError( '--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim' ) if args.decoder_embed_path and (args.decoder_embed_path != args.encoder_embed_path): raise RuntimeError( '--share-all-embeddings not compatible with --decoder-embed-path' ) encoder_embed_tokens = build_embedding(src_dict, args.encoder_embed_dim, args.encoder_embed_path) decoder_embed_tokens = encoder_embed_tokens args.share_decoder_input_output_embed = True else: encoder_embed_tokens = build_embedding(src_dict, args.encoder_embed_dim, args.encoder_embed_path) decoder_embed_tokens = build_embedding(tgt_dict, args.decoder_embed_dim, args.decoder_embed_path) encoder = TransformerEncoder(args, src_dict, encoder_embed_tokens) decoder = TransformerDecoder(args, tgt_dict, decoder_embed_tokens) encoder2 = TransformerEncoder(args, tgt_dict, decoder_embed_token) decoder2 = TransformerDecoder(args, src_dict, encoder_embed_tokens) return TransformerDualModel(encoder, decoder, encoder2, decoder2)
def __init__(self, cfg: WavBart2BartConfig, dictionary=None, embed_tokens=None, no_encoder_attn=False, bart=None): super().__init__(dictionary) self.cfg = cfg # bart = torch.hub.load('pytorch/fairseq', 'bart.base') bart_decoder = bart.model.decoder self.decoder = TransformerDecoder(bart_decoder.args, bart_decoder.dictionary, bart_decoder.embed_tokens) self.decoder.load_state_dict(bart_decoder.state_dict())
def build_decoder(cls, args, tgt_dict, embed_tokens): if (safe_hasattr(args, "decoder_self_attn_head_select") and args.decoder_self_attn_head_select) or (safe_hasattr(args, "dec_enc_attn_head_select") and args.dec_enc_attn_head_select): return HeadSelectionTransformerDecoder( args, tgt_dict, embed_tokens ) else: return TransformerDecoder(args, tgt_dict, embed_tokens)
def build_decoder(cls, args, tgt_dict, embed_tokens): return TransformerDecoder( args, tgt_dict, embed_tokens, no_encoder_attn=getattr(args, 'no_cross_attention', False), )
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if getattr(args, 'max_target_positions', None) is None: args.max_target_positions = getattr(args, 'tokens_per_sample', DEFAULT_MAX_TARGET_POSITIONS) if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.source_dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(task.source_dictionary), task.source_dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), ) else: embed_tokens = Embedding(len(task.source_dictionary), args.decoder_input_dim, task.source_dictionary.pad()) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.decoder_input_dim == args.decoder_output_dim decoder = TransformerDecoder( args, task.target_dictionary, embed_tokens, no_encoder_attn=True, ) return TransformerLanguageModel(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_architecture(args) if not hasattr(args, 'max_source_positions'): args.max_source_positions = 100000 if not hasattr(args, 'max_target_positions'): args.max_target_positions = 100000 src_dict, tgt_dict = task.source_dictionary, task.target_dictionary def build_embedding(dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb decoder_embed_tokens = build_embedding( tgt_dict, args.decoder_embed_dim, args.decoder_embed_path) encoder = ConvolutionalTransformerEncoder( args, src_dict if src_dict is not None else tgt_dict, audio_features=args.input_feat_per_channel) decoder = TransformerDecoder(args, tgt_dict, decoder_embed_tokens) return ConvolutionalTransformerModel(encoder, decoder)
def build_decoder(cls, args, task, embed_tokens): _args = copy.deepcopy(args) if args.adaptor_proj or args.encoder_proj: # not V0 arch _args.encoder_embed_dim = _args.decoder_embed_dim _args.dropout = args.decoder_dropout _args.attention_dropout = args.decoder_attention_dropout _args.activation_dropout = args.decoder_activation_dropout decoder = TransformerDecoder(_args, task.target_dictionary, embed_tokens) decoder = cls.maybe_load_pretrained( decoder, getattr(args, "load_pretrained_decoder_from", None)) for k, p in decoder.named_parameters(): p.requires_grad = need_finetuning(args.finetune_decoder_params, k) return decoder
def build_multitask_decoder(cls, args, tgt_dict, in_dim): decoder_args = args.decoder_args decoder_args.encoder_embed_dim = in_dim if args.decoder_type == "transformer": base_multitask_text_transformer_decoder_arch(decoder_args) task_decoder = TransformerDecoder( decoder_args, tgt_dict, embed_tokens=TransformerModelBase.build_embedding( decoder_args, tgt_dict, decoder_args.decoder_embed_dim, ), ) elif args.decoder_type == "ctc": task_decoder = CTCDecoder( dictionary=tgt_dict, in_dim=in_dim, ) else: raise NotImplementedError( "currently only support multitask decoder_type 'transformer', 'ctc'" ) return task_decoder
def __init__(self,args,dictionary): super().__init__() embedding_dim=768 self.padding_idx=1 self.dense = nn.Linear(embedding_dim, embedding_dim) self.layer_norm = LayerNorm(embedding_dim) init_bert_params(self.dense) self.encoder=TransformerSentenceEncoder( padding_idx=1, vocab_size=50265, num_encoder_layers=12, embedding_dim=768, ffn_embedding_dim=3072, num_attention_heads=12, dropout=0.1, attention_dropout=0.1, activation_dropout=0.0, layerdrop=0.0, max_seq_len=512, num_segments=0, encoder_normalize_before=True, apply_bert_init=True, activation_fn="gelu", q_noise=0.0, qn_block_size=8, ) embed_tokens=self.encoder.embed_tokens self.lm_head = RobertaLMHead( embed_dim=embedding_dim, output_dim=50265, activation_fn="gelu", weight=embed_tokens.weight, ) #args=base_architecture(args) if args.encoder_layers_to_keep: args.encoder_layers = len(args.encoder_layers_to_keep.split(",")) if args.decoder_layers_to_keep: args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) if getattr(args, "max_source_positions", None) is None: args.max_source_positions = 512 if getattr(args, "max_target_positions", None) is None: args.max_target_positions = 512 print('???',embed_tokens.embedding_dim) self.decoder=TransformerDecoder(args, dictionary, embed_tokens, no_encoder_attn=getattr(args, "no_cross_attention", False)) self.class_num=args.num_classes self.classification_heads = RobertaClassificationHead( 768, 768, self.class_num, 'tanh', 0.0, 0.0, 8, )
def build_decoder(cls, args, task): _args = copy.deepcopy(args) _args.dropout = args.mbart_dropout _args.attention_dropout = args.mbart_attention_dropout _args.activation_dropout = args.mbart_activation_dropout _args.max_target_positions = 1024 dec_emb = nn.Embedding( len(task.tgt_dict), _args.encoder_embed_dim, task.tgt_dict.pad() ) decoder = TransformerDecoder(_args, task.tgt_dict, dec_emb) if getattr(args, "load_pretrained_mbart_from", None): decoder = checkpoint_utils.load_pretrained_component_from_model( component=decoder, checkpoint=args.load_pretrained_mbart_from ) if getattr(args, "no_final_norm_decoder", False): decoder.layer_norm = None for k, p in decoder.named_parameters(): # Freeze pretrained models by default if safe_hasattr( args, "finetune_mbart_decoder_params" ) and need_finetuning( args.finetune_mbart_decoder_params, k ): p.requires_grad = True else: p.requires_grad = False compute_cross_attentive_loss = ( True if getattr(args, "attentive_cost_regularization", 0.0) > 0.0 else False ) cross_attentive_loss_without_norm = getattr( args, "attentive_cost_without_normalize", False ) cross_attentive_loss_reverse = ( False # getattr(args, "attentive_cost_reverse", False) ) decoder = TransformerMultiInputDecoder( dictionary=task.target_dictionary, spch_decoder=decoder, text_decoder=decoder, compute_cross_attentive_loss=compute_cross_attentive_loss, cross_attentive_loss_with_norm=True if not cross_attentive_loss_without_norm else False, cross_attentive_loss_reverse=cross_attentive_loss_reverse, ) return decoder
def build_model(cls, args, task): mode = { e.split('=')[0]: e.split('=')[1] if len(e.split('=')) > 1 else None for e in args.user_mode.split(',') } if 'gated' in mode: tmodel = GatedTransformerModel.build_model(args, task) elif any([m in mode for m in ['decomposable', 'sep_lm', 'sep_lm1']]): tmodel = DecomposableTransformerModel.build_model(args, task) elif any([m in mode for m in ['attn_endorse', 'dbg_log_endorsement']]): tmodel = SimpleTransformerModel.build_model( args, task, DecoderModelLayer=UserTransformerDecoderLayer) else: tmodel = SimpleTransformerModel.build_model(args, task) model = DistantTransformerModel(tmodel) model.args = args model.user_mode = mode model.sampler_grad = SequenceGeneratorGrad( model.model.decoder.dictionary, beam_size=1, max_len_b=60) model.sampler = SequenceGenerator(model.model.decoder.dictionary, beam_size=1, max_len_b=60) model.decoder = ProxyDecoder(tmodel, model.user_mode, args, task, model.sampler_grad, model.sampler) model.encoder = ProxyEncoder(tmodel, model.user_mode, args, task, model.sampler_grad, model.sampler) tmodel.encoder.user_mode = mode tmodel.decoder.user_mode = mode if any([ m in mode for m in [ 'diff_lm', 'pretrain_lm', 'sep_lm', 'max_lm_margin', 'sep_lm2', 'sep_lm3' ] ]): model.lm = TransformerDecoder(args, tmodel.decoder.dictionary, tmodel.decoder.embed_tokens, no_encoder_attn=True) model.decoder.lm = model.lm if 'sep_lm3' in mode: tmodel.decoder.gate_fc1 = Linear( len(tmodel.decoder.dictionary) * 2, len(tmodel.decoder.dictionary)) tmodel.decoder.gate_fc2 = Linear(len(tmodel.decoder.dictionary), 1) if any([m in mode for m in ['endorsement', 'rl_edm', 'beam_endorse']]): model.edm = EndorsementDetectorModel.build_model(args, task) model.decoder.edm = model.encoder.edm = model.edm model.encoder.edm.decoder.user_mode = model.encoder.edm.encoder.user_mode = mode if any([m in mode for m in ['self_align']]): model.self_edm = EndorsementDetectorModel.build_model( args, task) model.decoder.self_edm = model.encoder.self_edm = model.self_edm model.encoder.self_edm.decoder.user_mode = model.encoder.self_edm.encoder.user_mode = mode return model
def get_decoder(lang): if lang not in lang_decoders: if shared_decoder_embed_tokens is not None: decoder_embed_tokens = shared_decoder_embed_tokens else: decoder_embed_tokens = build_embedding( task.dicts[lang], args.decoder_embed_dim, args.decoder_embed_path ) lang_decoders[lang] = TransformerDecoder(args, task.dicts[lang], decoder_embed_tokens) return lang_decoders[lang]
def build_model(cls, args, task): """Build a new model instance.""" embed_tokens = cls.build_embedding( task.source_dictionary, args.decoder_input_dim ) decoder = TransformerDecoder( args, task.target_dictionary, embed_tokens, no_encoder_attn=True ) return cls(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if hasattr( args, 'no_tie_adaptive_proj') and args.no_tie_adaptive_proj is False: # backward compatibility args.tie_adaptive_proj = True if not hasattr(args, 'max_source_positions'): args.max_source_positions = args.tokens_per_sample if not hasattr(args, 'max_target_positions'): args.max_target_positions = args.tokens_per_sample if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(task.dictionary), task.dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), ) else: embed_tokens = Embedding(len(task.dictionary), args.decoder_input_dim, task.dictionary.pad()) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.decoder_input_dim == args.decoder_output_dim decoder = TransformerDecoder( args, task.output_dictionary, embed_tokens, no_encoder_attn=True, final_norm=False, ) return TransformerLanguageModel(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) if hasattr(args, "decoder_layers_to_keep"): args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) if getattr(args, 'max_target_positions', None) is None: args.max_target_positions = getattr(args, 'tokens_per_sample', DEFAULT_MAX_TARGET_POSITIONS) if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.source_dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: print("Adaptive Input " + str(args.adaptive_input)) print("Adaptive Cutoff: " + str(args.adaptive_input_cutoff)) print("Vocab Size: " + str(len(task.source_dictionary.symbols))) embed_tokens = AdaptiveInput( len(task.source_dictionary), task.source_dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), args.quant_noise_pq, args.quant_noise_pq_block_size, ) else: embed_tokens = cls.build_embedding(args, task.source_dictionary, args.decoder_input_dim) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff) assert args.decoder_input_dim == args.decoder_output_dim decoder = TransformerDecoder( args, task.target_dictionary, embed_tokens, no_encoder_attn=True, ) return cls(decoder)
def get_decoder(lang, lang_pair=None): if lang not in lang_decoders: if shared_decoder_embed_tokens is not None: decoder_embed_tokens = shared_decoder_embed_tokens elif args.share_all_langpair_embeddings: decoder_embed_tokens = lang_pair_embed[lang_pair] else: decoder_embed_tokens = build_embedding( task.dicts[lang], args.decoder_embed_dim, args.decoder_embed_path) lang_decoders[lang] = TransformerDecoder( args, task.dicts[lang], decoder_embed_tokens) return lang_decoders[lang]
def build_decoder(cls, args, task, embed_tokens): _args = copy.deepcopy(args) _args.dropout = args.decoder_dropout _args.attention_dropout = args.decoder_attention_dropout _args.activation_dropout = args.decoder_activation_dropout _args.max_target_positions = 1024 decoder = TransformerDecoder(_args, task.target_dictionary, embed_tokens) if getattr(args, "load_pretrained_decoder_from", None): decoder = checkpoint_utils.load_pretrained_component_from_model( component=decoder, checkpoint=args.load_pretrained_decoder_from) for k, p in decoder.named_parameters(): # Freeze pretrained models by default if safe_hasattr(args, 'finetune_decoder_params' ) and XMTransformerModel.finetune_params( args.finetune_decoder_params, k): p.requires_grad = True else: p.requires_grad = False return decoder
def build_model(cls, args, vocab_size): """Build a new model instance.""" # make sure all arguments are present in older models base_lm_architecture(args) embed_tokens = Embedding(vocab_size, args.decoder_input_dim) decoder = TransformerDecoder( args, None, embed_tokens, no_encoder_attn=True, ) return TransformerLanguageModel(decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_architecture(args) if not hasattr(args, 'max_source_positions'): args.max_source_positions = 100000 if not hasattr(args, 'max_target_positions'): args.max_target_positions = 100000 # This model requires a task that provides source dictionary and transcripts assert isinstance(task, SpeechTranslationCTCTask) src_dict, tgt_dict = task.source_dictionary, task.target_dictionary def build_embedding(dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb target_embed_tokens = build_embedding(tgt_dict, args.decoder_embed_dim, args.decoder_embed_path) src_embed_tokens = build_embedding(src_dict, args.decoder_embed_dim, args.auxiliary_decoder_embed_path) encoder = ConvolutionalTransformerEncoder( args, tgt_dict, audio_features=args.input_feat_per_channel) decoder = TransformerDecoder(args, tgt_dict, target_embed_tokens) auxiliary_decoder = TransformerDecoder(args, src_dict, src_embed_tokens) return ConvolutionalTransformerDualDecoder(encoder, decoder, auxiliary_decoder)
def build_model(cls, args, task): """Build a new model instance.""" if args.decoder_layers_to_keep: args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) if safe_getattr(args, "max_target_positions", None) is None: args.max_target_positions = safe_getattr( args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS ) if args.character_embeddings: embed_tokens = CharacterTokenEmbedder( task.source_dictionary, eval(args.character_filters), args.character_embedding_dim, args.decoder_embed_dim, args.char_embedder_highway_layers, ) elif args.adaptive_input: embed_tokens = AdaptiveInput( len(task.source_dictionary), task.source_dictionary.pad(), args.decoder_input_dim, args.adaptive_input_factor, args.decoder_embed_dim, options.eval_str_list(args.adaptive_input_cutoff, type=int), args.quant_noise_pq, args.quant_noise_pq_block_size, ) else: embed_tokens = cls.build_embedding( args, task.source_dictionary, args.decoder_input_dim ) if args.tie_adaptive_weights: assert args.adaptive_input assert args.adaptive_input_factor == args.adaptive_softmax_factor assert ( args.adaptive_softmax_cutoff == args.adaptive_input_cutoff ), "{} != {}".format( args.adaptive_softmax_cutoff, args.adaptive_input_cutoff ) assert args.decoder_input_dim == args.decoder_output_dim decoder = TransformerDecoder( args, task.target_dictionary, embed_tokens, no_encoder_attn=True ) return cls(decoder)
def _get_module_class(cls, is_encoder, args, lang_dict, embed_tokens, langs): if is_encoder: if hasattr(args, "encoder_latent_layer") and args.encoder_latent_layer: return LatentTransformerEncoder( args, lang_dict, embed_tokens, num_logits=len(langs) ) else: return TransformerEncoder(args, lang_dict, embed_tokens) else: if hasattr(args, "decoder_latent_layer") and args.decoder_latent_layer: return LatentTransformerDecoder( args, lang_dict, embed_tokens, num_logits=len(langs) ) else: return TransformerDecoder(args, lang_dict, embed_tokens)
def build_decoder(cls, args, tgt_dict, embed_tokens): if args.add_topic_decoder: return TopicTransformerDecoder( args, tgt_dict, embed_tokens, no_encoder_attn=getattr(args, "no_cross_attention", False), ) else: return TransformerDecoder( args, tgt_dict, embed_tokens, no_encoder_attn=getattr(args, "no_cross_attention", False), )
def build_model(cls, args, task): """Build a new model instance.""" # make sure that all args are properly defaulted (in case there are any new ones) base_architecture(args) if not hasattr(args, 'max_source_positions'): args.max_source_positions = 4000 if not hasattr(args, 'max_target_positions'): args.max_target_positions = 1024 tgt_dict = task.target_dictionary def build_embedding(dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb if args.decoder_embed_path: raise NotImplementedError( "Pretrained embedding not available with Ber2Transf yet") else: decoder_embed_tokens = build_embedding(tgt_dict, args.decoder_embed_dim, args.decoder_embed_path) encoder = ProxyEncoder( linear_dim=args.encoder_embed_dim, convolutions=eval(args.encoder_convolutions), layers=args.encoder_layers, dropout=args.encoder_dropout, max_positions=args.max_source_positions, normalization_constant=args.normalization_constant, weight_norm=args.weight_norm, audio_features=task.audio_features, ) args.dropout = args.decoder_dropout decoder = TransformerDecoder( args, tgt_dict, decoder_embed_tokens, ) return Ber2Transf(encoder, decoder)
def __init__( self, args, dictionary, embed_tokens, no_encoder_attn_phase1=None, no_encoder_decoder_attn_phase2=None, ): super().__init__(dictionary) self.register_buffer("version", torch.Tensor([3])) self.decoder_phase1 = TransformerDecoder(args, dictionary, embed_tokens, no_encoder_attn_phase1) self.decoder_phase2 = TransformerDecoderPhase2( args, dictionary, embed_tokens, no_encoder_decoder_attn_phase2)
def build_model(cls, args, task: SemparseClassificationTask): # Fairseq initializes models by calling the ``build_model()`` # function. This provides more flexibility, since the returned model # instance can be of a different type than the one that was called. # In this case we'll just return a SimpleLSTMModel instance. # Initialize our Encoder and Decoder. xlmr = XlmrEncoder(task.xlmr, task.max_positions()[0]) # encoder = LSTMEncoder( # dictionary=task.source_dictionary, # pretrained_embed=xlmr, # embed_dim=args.xlmr_out_dim, # hidden_size=args.decoder_hidden_dim, # dropout_in=args.decoder_dropout, # dropout_out=args.decoder_dropout # ) from fairseq.models.transformer_from_pretrained_xlm import TransformerDecoderFromPretrainedXLM dictionary = task.output_vocab num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, args.decoder_embed_dim, padding_idx) decoder = TransformerDecoder(args, dictionary, embed_tokens) # decoder = LSTMDecoder( # dictionary=task.target_dictionary, # encoder_output_units=args.encoder_hidden_dim, # embed_dim=args.decoder_embed_dim, # hidden_size=args.decoder_hidden_dim, # dropout_in=args.decoder_dropout, # dropout_out=args.decoder_dropout # ) model = XlmrTransformerEncoderDecoder(xlmr, decoder) # Print the model architecture. print(model) return model
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present base_architecture(args) if not hasattr(args, 'max_positions'): args.max_positions = args.tokens_per_sample if getattr(args, "max_source_positions", None) is None: args.max_source_positions = 512 if getattr(args, "max_target_positions", None) is None: args.max_target_positions = 512 encoder = RobertaEncoder(args, task.source_dictionary) decoder = TransformerDecoder(args, task.source_dictionary, encoder.sentence_encoder.embed_tokens, no_encoder_attn=getattr( args, "no_cross_attention", False)) return cls(args, encoder, decoder)
def build_decoder(vocab, tokens_embeddings, ffn_dim=32, layers=2, attention_heads=2): args = Namespace(**FAIRSEQ_DEFAULT_ARGS) d = tokens_embeddings.embedding_dim args.share_all_embeddings = True args.encoder_embed_dim = d args.decoder_embed_dim = d args.decoder_ffn_embed_dim = ffn_dim args.decoder_attention_heads = attention_heads args.decoder_layers = layers args.tie_adaptive_weights = False args.decoder_output_dim = d args.decoder_input_dim = d return TransformerDecoder(args, vocab, tokens_embeddings)