def base_architecture(args): args.compressed = getattr(args, "compressed", 4) args.shared_kv_compressed = getattr(args, "shared_kv_compressed", 0) args.shared_layer_kv_compressed = getattr(args, "shared_layer_kv_compressed", 0) args.freeze_compress = getattr(args, "freeze_compress", 0) roberta_base_architecture(args)
def xlm_align_base(args): roberta_base_architecture(args)
def reload_roberta_base(args): roberta_base_architecture(args)
def mpnet_base_architecture(args): roberta_base_architecture(args)
def infoxlm_base(args): roberta_base_architecture(args)
def model_parallel_roberta_postnorm_architecture(args): # the original BERT/RoBERTa uses the "Post-LN" formulation roberta_base_architecture(args)