def build_model(cls, args, task): """Build a new model instance.""" if not has_megatron_submodule: raise ImportError('\n\nPlease install the megatron submodule:' '\n\n git submodule update --init ' 'fairseq/model_parallel/megatron') # make sure all arguments are present in older models base_lm_architecture(args) if args.decoder_layers_to_keep: args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) if getattr(args, 'max_target_positions', None) is None: args.max_target_positions = getattr(args, 'tokens_per_sample', DEFAULT_MAX_TARGET_POSITIONS) if args.character_embeddings: raise NotImplementedError( "Character embeddings is not supported for model parallel") elif args.adaptive_input: raise NotImplementedError( "Adaptive input is not supported for model parallel") else: embed_tokens = cls.build_embedding(args, task.source_dictionary, args.decoder_input_dim) decoder = ModelParallelTransformerDecoder( args, task.target_dictionary, embed_tokens, no_encoder_attn=True, ) return cls(decoder)
def transformer_lm_megatron_11b(args): args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 3072) args.decoder_ffn_embed_dim = getattr(args, 'decoder_ffn_embed_dim', 3072 * 6) args.decoder_layers = getattr(args, 'decoder_layers', 72) args.decoder_attention_heads = getattr(args, 'decoder_attention_heads', 32) args.dropout = getattr(args, 'dropout', 0.1) args.attention_dropout = getattr(args, 'attention_dropout', 0.1) args.activation_fn = getattr(args, 'activation_fn', 'gelu') base_lm_architecture(args)
def __init__(self, dictionary): args = Namespace() base_lm_architecture(args) args.decoder_layerdrop=0 args.max_target_positions = getattr(args, 'tokens_per_sample',DEFAULT_MAX_TARGET_POSITIONS) num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, args.decoder_embed_dim, padding_idx) super().__init__(args, dictionary, emb, False)
def transformer_numlm_base(args): args.add_number_token_attention_mask = getattr( args, "add_number_token_attention_mask", False) base_lm_architecture(args)