def __init__(self, args, shared_compressed_layer, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False): super().__init__(PrimerConfig.from_namespace(args), shared_compressed_layer, no_encoder_attn, add_bias_kv, add_zero_attn)
def __init__(self, args, shared_conv_layer): # wrap in a list so it's not automatically registered by PyTorch self.shared_conv_layer = [shared_conv_layer] super().__init__(PrimerConfig.from_namespace(args)) #self.fc1 = self.build_fc1( # args.compressed_dim, # args.encoder.ffn_embed_dim, # self.quant_noise, # self.quant_noise_block_size, #) self.register_buffer("version", torch.tensor(2))
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_architecture(args) if args.encoder_layers_to_keep: args.encoder_layers = len(args.encoder_layers_to_keep.split(",")) if args.decoder_layers_to_keep: args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) if getattr(args, "max_positions", None) is None: args.max_positions = DEFAULT_MAX_SOURCE_POSITIONS if getattr(args, "max_source_positions", None) is None: args.max_source_positions = DEFAULT_MAX_SOURCE_POSITIONS if getattr(args, "max_target_positions", None) is None: args.max_target_positions = DEFAULT_MAX_TARGET_POSITIONS src_dict, tgt_dict = task.source_dictionary, task.target_dictionary if args.share_all_embeddings: if src_dict != tgt_dict: raise ValueError( "--share-all-embeddings requires a joined dictionary") if args.encoder_embed_dim != args.decoder_embed_dim: raise ValueError( "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim" ) if args.decoder_embed_path and (args.decoder_embed_path != args.encoder_embed_path): raise ValueError( "--share-all-embeddings not compatible with --decoder-embed-path" ) args.share_decoder_input_output_embed = True if getattr(args, "offload_activations", False): args.checkpoint_activations = True # offloading implies checkpointing if not args.share_all_embeddings: args.min_params_to_wrap = getattr(args, "min_params_to_wrap", DEFAULT_MIN_PARAMS_TO_WRAP) cfg = PrimerConfig.from_namespace(args) return super().build_model(cfg, task)
def build_decoder(cls, args, tgt_dict, embed_tokens): return super().build_decoder(PrimerConfig.from_namespace(args), tgt_dict, embed_tokens)
def build_embedding(cls, args, dictionary, embed_dim, path=None): return super().build_embedding(PrimerConfig.from_namespace(args), dictionary, embed_dim, path)
def __init__(self, args, encoder, decoder): cfg = PrimerConfig.from_namespace(args) super().__init__(cfg, encoder, decoder) self.args = args
def __init__(self, args, shared_compressed_layer): super().__init__(PrimerConfig.from_namespace(args), shared_compressed_layer)