from model.gran_model import GRANModel from optimization import optimization from evaluation import generate_ground_truth, batch_evaluation, compute_metrics from utils.args import ArgumentGroup, print_arguments from utils.init import init_pretraining_params, init_checkpoint logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S') logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) logger.info(logger.getEffectiveLevel()) # yapf: disable parser = argparse.ArgumentParser() model_g = ArgumentGroup(parser, "model", "model and checkpoint configuration.") model_g.add_arg("num_hidden_layers", int, 12, "Number of hidden layers.") model_g.add_arg("num_attention_heads", int, 4, "Number of attention heads.") model_g.add_arg("hidden_size", int, 256, "Hidden size.") model_g.add_arg("intermediate_size", int, 512, "Intermediate size.") model_g.add_arg("hidden_act", str, "gelu", "Hidden act.") model_g.add_arg("hidden_dropout_prob", float, 0.1, "Hidden dropout ratio.") model_g.add_arg("attention_dropout_prob", float, 0.1, "Attention dropout ratio.") model_g.add_arg("initializer_range", float, 0.02, "Initializer range.") model_g.add_arg("vocab_size", int, None, "Size of vocabulary.") model_g.add_arg("num_relations", int, None, "Number of relations.") model_g.add_arg("num_edges", int, 5, "Number of edge types, typically fixed to 5: no edge (0), relation-subject (1)," "relation-object (2), relation-attribute (3), attribute-value (4).") model_g.add_arg("max_seq_len", int, None, "Max sequence length.") model_g.add_arg("max_arity", int, None, "Max arity.")
# See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import time import argparse from utils.args import ArgumentGroup # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("ernie_config_path", str, None, "Path to the json file for ernie model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg("warmup_proportion", float, 0.1, "Proportion of training steps to perform linear learning rate warmup for.")
import time import argparse import numpy as np import multiprocessing import paddle.fluid as fluid import reader.cls as reader from model.bert import BertConfig from model.classifier import create_model from utils.args import ArgumentGroup, print_arguments from utils.init import init_pretraining_params # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "options to init, resume and save model.") model_g.add_arg("bert_config_path", str, None, "Path to the json file for bert model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("save_inference_model_path", str, None, "If set, save the inference model to this path.") model_g.add_arg("use_fp16", bool, False, "Whether to resume parameters from fp16 checkpoint.") data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data processing options.") data_g.add_arg("data_dir", str, None, "Directory to test data.") data_g.add_arg("vocab_path", str, None, "Vocabulary path.") data_g.add_arg("max_seq_len", int, 128, "Number of words of the longest seqence.") data_g.add_arg("batch_size", int, 32, "Total examples' number in batch for training. see also --in_tokens.") data_g.add_arg("in_tokens", bool, False, "If set, the batch size will be the maximum number of tokens in one batch. " "Otherwise, it will be the maximum number of examples in one batch.") data_g.add_arg("do_lower_case", bool, True, "Whether to lower case the input text. Should be True for uncased models and False for cased models.")
import paddle import paddle.fluid as fluid import reader.cls as reader from model.xlnet import XLNetConfig from model.classifier import create_model from optimization import optimization from utils.args import ArgumentGroup, print_arguments, check_cuda from utils.init import init_pretraining_params, init_checkpoint from utils.cards import get_cards num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("model_config_path", str, None, "Path to the json file for bert model config.") model_g.add_arg("dropout", float, 0.1, "Dropout rate.") model_g.add_arg("dropatt", float, 0.1, "Attention dropout rate.") model_g.add_arg("clamp_len", int, -1, "Clamp length.") model_g.add_arg("summary_type", str, "last", "Method used to summarize a sequence into a vector.", choices=['last']) model_g.add_arg("use_summ_proj", bool, True, "Whether to use projection for summarizing sequences.") model_g.add_arg("spiece_model_file", str, None, "Sentence Piece model path.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.")
# not take any effect. os.environ['FLAGS_eager_delete_tensor_gb'] = '0' # enable gc import paddle.fluid as fluid from reader.task_reader import ClassifyReader from model.ernie import ErnieConfig from finetune.classifier import create_model from utils.args import ArgumentGroup, print_arguments from utils.init import init_pretraining_params from finetune_args import parser # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "options to init, resume and save model.") model_g.add_arg("ernie_config_path", str, None, "Path to the json file for bert model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("save_inference_model_path", str, "inference_model", "If set, save the inference model to this path.") model_g.add_arg("use_fp16", bool, False, "Whether to resume parameters from fp16 checkpoint.") model_g.add_arg("num_labels", int, 2, "num labels for classify") data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data processing options.") data_g.add_arg("predict_set", str, None, "Predict set file") data_g.add_arg("vocab_path", str, None, "Vocabulary path.") data_g.add_arg("label_map_config", str, None, "Label_map_config json file.") data_g.add_arg("max_seq_len", int, 128, "Number of words of the longest seqence.") data_g.add_arg("batch_size", int, 32, "Total examples' number in batch for training. see also --in_tokens.") data_g.add_arg("do_lower_case", bool, True, "Whether to lower case the input text. Should be True for uncased models and False for cased models.")
import multiprocessing import os import time import numpy as np import paddle import paddle.fluid as fluid import pdb from reader.squad import DataProcessor, write_predictions from model.ernie import ErnieConfig, ErnieModel from utils.args import ArgumentGroup, print_arguments, check_cuda from optimization import optimization from utils.init import init_pretraining_params, init_checkpoint # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("ernie_config", str, None, "Path to the json file for ernie model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg("warmup_proportion", float, 0.1, "Proportion of training steps to perform linear learning rate warmup for.")
def main(): parser = argparse.ArgumentParser() model_g = ArgumentGroup(parser, "model", "model configuration and path.") model_g.add_arg("dataset", str, "record", "used dataset") model_g.add_arg("is_update_max_concept", bool, True, "weather update max concept for kg retriver") model_g.add_arg("full_table", bool, True, "full_table") model_g.add_arg("test", bool, False, "weather load superglue test set") model_g.add_arg("use_wn", bool, True, "wn") model_g.add_arg("use_nell", bool, True, "nell") model_g.add_arg("sentinel_trainable", bool, False, "sentinel_trainable") model_g.add_arg("memory_bank_update", bool, False, "memory_bank_update") model_g.add_arg("memory_bank_update_steps", int, 500, "memory_bank_update_steps") model_g.add_arg("memory_bank_keep_coef", float, 0.0, "what percent keep") model_g.add_arg("use_context_graph", bool, True, "use_context_graph") model_g.add_arg("schedule_strategy", str, "linear", "schedule_strategy") model_g.add_arg("tokenizer_path", str, "", "tokenizer_path") model_g.add_arg("save_model", bool, True, "whether save model") model_g.add_arg("data_preprocess", bool, False, "data process") model_g.add_arg("data_preprocess_evaluate", bool, False, "data_preprocess_evaluate") # multi-relational part model_g.add_arg("relation_agg", str, "sum", "the method to aggeregate multi-relational neoghbor") model_g.add_arg("is_lemma", bool, False, "whether trigger lemma") model_g.add_arg("is_filter", bool, True, "weather filter node not in wn18") model_g.add_arg("is_clean", bool, True, "weather filter node not in repeated_id") model_g.add_arg("is_morphy", bool, False, "weather morphy") model_g.add_arg("fewer_label", bool, False, "weather fewer_label") model_g.add_arg("label_rate", float, 0.1, "label rate") model_g.add_arg("relation_list", list, [ "_hyponym", "_hypernym", "_derivationally_related_form", "_member_meronym", "_member_holonym", "_part_of", "_has_part", "_member_of_domain_topic", "_synset_domain_topic_of", "_instance_hyponym", "_instance_hypernym", "_also_see", "_verb_group", "_member_of_domain_region", "_synset_domain_region_of", "_member_of_domain_usage", "_synset_domain_usage_of", "_similar_to" ], "The used relation.") model_g.add_arg("is_all_relation", bool, True, "use all relations") model_g.add_arg("selected_relation", str, "_hyponym,_hypernym,_derivationally_related_form", "relations") model_g.add_arg("wn18_dir", str, "", "wn18 dir") # SSL part model_g.add_arg("use_consistent_loss_wn", bool, False, "add consistent loss between entity embedding from WN.") model_g.add_arg("warm_up", int, 10000, "warm_up_iterations") model_g.add_arg("consistent_loss_wn_coeff", float, 2.0, "Weight decay if we apply some.") model_g.add_arg("consistent_loss_type", str, "kld", "consistent loss type") model_g.add_arg("mark", str, "test1", "mark") model_g.add_arg("tensorboard_dir", str, "./", "tensorboard_dir") model_g.add_arg("debug", bool, False, "debug") model_g.add_arg( "model_name_or_path", str, "", "Path to pretrained model or model identifier from huggingface.co/models" ) model_g.add_arg( "config_name", str, "", "Pretrained config name or path if not the same as model_name") model_g.add_arg("model_type", str, "kelm", "The classification model to be used.") model_g.add_arg("text_embed_model", str, "bert", "The model for embedding texts in KELM model.") model_g.add_arg("output_dir", str, "../outputs/test", "Path to save checkpoints.") model_g.add_arg("overwrite_output_dir", bool, True, "Overwrite the content of the output directory.") model_g.add_arg( "--tokenizer_name", default="", type=str, help="Pretrained tokenizer name or path if not the same as model_name", ) model_g.add_arg("per_gpu_train_batch_size", int, 6, "Batch size per GPU/CPU for training.") model_g.add_arg("per_gpu_eval_batch_size", int, 4, "Batch size per GPU/CPU for evaluation.") model_g.add_arg( "max_steps", int, -1, "If > 0: set total number of training steps to perform. Override num_train_epochs." ) model_g.add_arg( "gradient_accumulation_steps", int, 1, "Number of updates steps to accumulate before performing a backward/update pass." ) model_g.add_arg("num_train_epochs", float, 10, "Total number of training epochs to perform.") model_g.add_arg("weight_decay", float, 0.01, "Weight decay if we apply some.") model_g.add_arg("learning_rate", float, 3e-4, "The initial learning rate for Adam.") model_g.add_arg("adam_epsilon", float, 1e-8, "Epsilon for Adam optimizer.") model_g.add_arg("warmup_steps", int, 10, "Linear warmup over warmup_steps.") model_g.add_arg("max_grad_norm", float, 1.0, "Max gradient norm.") model_g.add_arg("evaluate_steps", int, 2, "Evaluate every X updates steps.") model_g.add_arg("evaluate_epoch", float, 0.0, "evaluate every X update epoch") model_g.add_arg("save_steps", int, 1, "Save every X updates steps.") model_g.add_arg("evaluate_during_training", bool, True, "Run evaluation during training at each logging step.") model_g.add_arg( "n_best_size", int, 20, "The total number of n-best predictions to generate in the nbest_predictions.json output file." ) model_g.add_arg( "verbose_logging", bool, False, "If true, all of the warnings related to data processing will be printed. " "A number of warnings are expected for a normal SQuAD evaluation.") model_g.add_arg("init_dir", str, "", "The path of loading pre-trained model.") model_g.add_arg("initializer_range", float, 0.02, "The initializer range for KELM") model_g.add_arg("cat_mul", bool, True, "The output part of vector in KELM") model_g.add_arg("cat_sub", bool, True, "The output part of vector in KELM") model_g.add_arg("cat_twotime", bool, True, "The output part of vector in KELM") model_g.add_arg("cat_twotime_mul", bool, True, "The output part of vector in KELM") model_g.add_arg("cat_twotime_sub", bool, False, "The output part of vector in KELM") data_g = ArgumentGroup( parser, "data", "Data paths, vocab paths and data processing options") data_g.add_arg("train_file", str, "record/train_0831.json", "ReCoRD json for training. E.g., train.json.") data_g.add_arg("predict_file", str, "record/dev_0831.json", "ReCoRD json for predictions. E.g. dev.json.") data_g.add_arg("cache_file_suffix", str, "test", "The suffix of cached file.") data_g.add_arg("cache_dir", str, "", "The cached data path.") data_g.add_arg("cache_store_dir", str, "", "The cached data path.") data_g.add_arg( "data_dir", str, "", "The input data dir. Should contain the .json files for the task." + "If no data dir or train/predict files are specified, will run with tensorflow_datasets." ) data_g.add_arg("vocab_path", str, "vocab.txt", "Vocabulary path.") data_g.add_arg( "do_lower_case", bool, False, "Whether to lower case the input text. Should be True for uncased models and False for cased models." ) data_g.add_arg("seed", int, 42, "Random seed.") data_g.add_arg("kg_paths", dict, { "wordnet": "kgs/", "nell": "kgs/" }, "The paths of knowledge graph files.") data_g.add_arg("wn_concept_embedding_path", str, "embedded/wn_concept2vec.txt", "The embeddings of concept in knowledge graph : Wordnet.") data_g.add_arg("nell_concept_embedding_path", str, "embedded/nell_concept2vec.txt", "The embeddings of concept in knowledge graph : Nell.") data_g.add_arg("use_kgs", list, ['nell', 'wordnet'], "The used knowledge graphs.") data_g.add_arg( "doc_stride", int, 128, "When splitting up a long document into chunks, how much stride to take between chunks." ) data_g.add_arg("max_seq_length", int, 384, "Number of words of the longest seqence.") data_g.add_arg("max_query_length", int, 64, "Max query length.") data_g.add_arg("max_answer_length", int, 30, "Max answer length.") data_g.add_arg("no_stopwords", bool, True, "Whether to include stopwords.") data_g.add_arg("ignore_length", int, 0, "The smallest size of token.") data_g.add_arg("print_loss_step", int, 100, "The steps to print loss.") run_type_g = ArgumentGroup(parser, "run_type", "running type options.") run_type_g.add_arg("use_fp16", bool, False, "Whether to use fp16 mixed precision training.") run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.") run_type_g.add_arg("max_n_gpu", int, 100, "The maximum number of GPU to use.") run_type_g.add_arg("use_fast_executor", bool, False, "If set, use fast parallel executor (in experiment).") run_type_g.add_arg( "num_iteration_per_drop_scope", int, 1, "Ihe iteration intervals to clean up temporary variables.") run_type_g.add_arg("do_train", bool, True, "Whether to perform training.") run_type_g.add_arg("do_eval", bool, False, "Whether to perform evaluation during training.") run_type_g.add_arg("do_predict", bool, False, "Whether to perform prediction.") run_type_g.add_arg("freeze", bool, True, "freeze bert parameters") run_type_g.add_arg("server_ip", str, "", "Can be used for distant debugging.") run_type_g.add_arg( "chunksize", int, 1024, "The chunksize for multiprocessing to convert examples to features.") run_type_g.add_arg("server_port", str, "", "Can be used for distant debugging.") run_type_g.add_arg("local_rank", int, -1, "Index for distributed training on gpus.") run_type_g.add_arg("threads", int, 50, "multiple threads for converting example to features") run_type_g.add_arg("overwrite_cache", bool, False, "Overwrite the cached training and evaluation sets") run_type_g.add_arg( "eval_all_checkpoints", bool, False, "Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number" ) run_type_g.add_arg( "min_diff_steps", int, 50, "The minimum saving steps before the last maximum steps.") args = parser.parse_args() logging.getLogger("transformers.modeling_utils").setLevel( logging.WARNING) # Reduce model loading logs if not args.is_all_relation: args.relation_list = args.selected_relation.split(",") logger.info("not use all relation, relation_list: {}".format( args.relation_list)) if args.doc_stride >= args.max_seq_length - args.max_query_length: logger.warning( "WARNING - You've set a doc stride which may be superior to the document length in some " "examples. This could result in errors when building features from the examples. Please reduce the doc " "stride or increase the maximum length to ensure the features are correctly built." ) if (os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir): raise ValueError( "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome." .format(args.output_dir)) # Setup distant debugging if needed if args.server_ip and args.server_port: # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script import ptvsd print("Waiting for debugger attach") ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) ptvsd.wait_for_attach() # Setup CUDA, GPU & distributed training if args.local_rank == -1 or not args.use_cuda: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs device = torch.device( "cuda" if torch.cuda.is_available() and args.use_cuda else "cpu") args.n_gpu = 0 if not args.use_cuda else min(args.max_n_gpu, torch.cuda.device_count()) else: torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend="nccl") args.n_gpu = 1 args.device = device if args.local_rank in [-1, 0] and not os.path.exists(args.output_dir): os.mkdir(args.output_dir) # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if args.local_rank in [-1, 0] else logging.WARNING, ) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.use_fp16, ) # Set seed set_seed(args) logger.info("Parameters from arguments are:\n{}".format(args)) # Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum if args.use_fp16 is set. # Otherwise it'll default to "promote" mode, and we'll get fp32 operations. Note that running `--fp16_opt_level="O2"` will # remove the need for this code, but it is still valid. if args.use_fp16: try: import apex apex.amp.register_half_function(torch, "einsum") except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) processor = RecordProcessor(args) input_dir = os.path.join( args.cache_store_dir, "cached_{}_{}".format( args.model_type, str(args.cache_file_suffix), )) if not os.path.exists(input_dir): os.mkdir(input_dir) if args.full_table: logger.warning("set full_table False and program exits") exit() else: args.wn_def_embed_mat_dir = os.path.join( input_dir, args.cache_file_suffix) + "_" + "definition_embedding" # if not os.path.exists(args.wn_def_embed_mat_dir): # data_path = os.path.join(args.data_dir, args.kg_paths["wordnet"]) # definition_embedding_mat = create_definition_table(args, data_path) # # torch.save({"definition_embedding_mat": definition_embedding_mat}, args.wn_def_embed_mat_dir) # # logger.info("definition embedding is done. program exits.") # exit() ## create data retrievers = dict() for kg in args.use_kgs: logger.info("Initialize kg:{}".format(kg)) kg_path = os.path.join(input_dir, args.kg_paths[kg]) data_path = os.path.join(args.data_dir, args.kg_paths[kg]) retrievers[kg] = initialize_kg_retriever(kg, kg_path, data_path, args.cache_file_suffix) if args.data_preprocess: logger.info("begin preprocess") create_dataset(args, processor, retrievers, relation_list=args.relation_list, evaluate=args.data_preprocess_evaluate, input_dir=input_dir) logger.info("data preprocess is done") # Load pretrained model and tokenizers if args.local_rank not in [-1, 0]: # Make sure only the first process in distributed training will download model & vocab torch.distributed.barrier() tokenizer, model = configure_tokenizer_model(args, logger, retrievers) if args.local_rank == 0: # Make sure only the first process in distributed training will download model & vocab torch.distributed.barrier() model.to(args.device) results = evaluate(args, model, processor, tokenizer, 100, input_dir, prefix=args.mark) if args.local_rank in [-1, 0]: logger.info("results: {}".format(results)) logger.info("eval is done")
import os import time import argparse from utils.args import ArgumentGroup class CustomAction(argparse.Action): """custom action""" def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, " ".join(values)) # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") model_g.add_arg("weight_sharing", bool, True, "If set, share weights between word embedding and masked lm.") model_g.add_arg("unimo_vocab_file", str, './model_files/dict/unimo_en.vocab.txt', "unimo vocab") model_g.add_arg("encoder_json_file", str, './model_files/dict/unimo_en.encoder.json', 'bpt map') model_g.add_arg("vocab_bpe_file", str, './model_files/dict/unimo_en.vocab.bpe', "vocab bpe") model_g.add_arg("unimo_config_path", str, "./model_files/config/unimo_base_en.json", "The file to save unimo configuration.") model_g.add_arg("object_file", str, "./data/coco_object_0.35_tot.ids", "The object file for image bounding boxes.") model_g.add_arg("adv_type", str, "villa", "The adversial learning type: freelb_image, freelb_text, villa") model_g.add_arg("adv_step", int, 4, "adv_step") model_g.add_arg("adv_lr", float, 0.05, "adv_lr")
from reader.squad import DataProcessor, write_predictions from model.xlnet import XLNetConfig, XLNetModel from utils.args import ArgumentGroup, print_arguments from optimization import optimization from utils.init import init_pretraining_params, init_checkpoint from modeling import log_softmax if six.PY2: import cPickle as pickle else: import pickle # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("model_config_path", str, None, "Path to the json file for xlnet model config.") model_g.add_arg("dropout", float, 0.1, "Dropout rate.") model_g.add_arg("dropatt", float, 0.1, "Attention dropout rate.") model_g.add_arg("clamp_len", int, -1, "Clamp length.") model_g.add_arg("summary_type", str, "last", "Method used to summarize a sequence into a vector.", choices=['last']) model_g.add_arg("spiece_model_file", str, None, "Sentence Piece model path.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") # Parameter initialization init_g = ArgumentGroup(parser, "init", "parameter initialization options.")
import os import time import argparse from utils.args import ArgumentGroup class CustomAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, " ".join(values)) # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("ernie_config_path", str, None, "Path to the json file for ernie model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") model_g.add_arg("weight_sharing", bool, True, "If set, share weights between word embedding and masked lm.") model_g.add_arg("role_type_size", int, 2, "role type size") model_g.add_arg("turn_type_size", int, 16, "turn type size") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay'])
import argparse import numpy as np import tokenization from utils.args import ArgumentGroup parser = argparse.ArgumentParser(__doc__) g = ArgumentGroup(parser, "model", "model configuration and paths.") g.add_arg("sample_strategy", str, 'max_sample', "Sample strategy.") g.add_arg("weights_path", str, None, "Path to the weights file.") g.add_arg("adv_text_path", str, None, "Path to save the generated adversarial sentences.") g.add_arg("bert_vocab_file", str, None, "Path to the bert vocab file.") args = parser.parse_args() adv_seq_len = -1 do_lower_case = True tokenizer = tokenization.FullTokenizer(vocab_file=args.bert_vocab_file, do_lower_case=do_lower_case) if args.sample_strategy == 'max_sample': with open(args.weights_path, 'r') as fin, open(args.adv_text_path, 'w') as fout: items = fin.read().split('\n\n') for idx, item in enumerate(items): if item == "": continue lines = item.split('\n') if idx == 0: adv_seq_len = int(lines[0]) del lines[0] assert lines.__len__() == adv_seq_len + 2 qas_id = lines[0]
# limitations under the License. """ args defination and default value """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import time import argparse from utils.args import ArgumentGroup, print_arguments # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("ernie_config_path", str, "./config/ernie_config.json", "json file path for ernie model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") model_g.add_arg("task_name", str, "vcr", "Task to finetune on ERNIE-ViL") model_g.add_arg("exp", str, "experiment", "Name of the experiment, Tagged onto csv") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 100, "Number of epoches for training.") train_g.add_arg("learning_rate", float, 0.0001, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay', 'manual_warmup_decay']) train_g.add_arg("decay_steps", str, "", "learning rate decay steps, list with ;") train_g.add_arg("lr_decay_ratio", float, 0.1, "learning rate decay ratio, used with manual_warmup_decay") train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg("num_train_steps", int, 1000000, "Total steps to perform pretraining.")
from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from __future__ import absolute_import import os import time import argparse from utils.args import ArgumentGroup # yapf: disable parser = argparse.ArgumentParser(__doc__) parser.add_argument('--use_cuda', action='store_true') model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("./save_dir", str, "./checkpoints", "Path to save checkpoints.") model_g.add_arg("hidden_size", int, 128, "hidden size.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg("warmup_proportion", float, 0.1,
import paddle import paddle.fluid as fluid from classifier import create_model import reader sys.path.append("./BERT") from model.bert import BertConfig from optimization import optimization from utils.args import ArgumentGroup, print_arguments from utils.init import init_pretraining_params, init_checkpoint import scipy from sklearn.model_selection import KFold, StratifiedKFold # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("bert_config_path", str, None, "Path to the json file for bert model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg("warmup_proportion", float, 0.1, "Proportion of training steps to perform linear learning rate warmup for.")
"""finetune args""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from __future__ import absolute_import import os import time import argparse from utils.args import ArgumentGroup # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") run_type_g = ArgumentGroup(parser, "run_type", "running type options.") run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.") run_type_g.add_arg("num_workers", int, 4, "use multiprocess to generate graph") run_type_g.add_arg("output_path", str, None, "path to save model") run_type_g.add_arg("model", str, None, "model to run") run_type_g.add_arg("hidden_size", int, 256, "model hidden-size")
def p_args(self): parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("bert_config_path", str, None, "Path to the json file for bert model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg( "init_pretraining_params", str, "uncased_L-24_H-1024_A-16/params", "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid." ) model_g.add_arg("checkpoints", str, "$PWD/tmp", "Path to save checkpoints.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg( "warmup_proportion", float, 0.1, "Proportion of training steps to perform linear learning rate warmup for." ) train_g.add_arg( "loss_scaling", float, 1.0, "Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled." ) log_g = ArgumentGroup(parser, "logging", "logging related.") log_g.add_arg("skip_steps", int, 1, "The steps interval to print loss.") log_g.add_arg("verbose", bool, False, "Whether to output verbose log.") data_g = ArgumentGroup( parser, "data", "Data paths, vocab paths and data processing options") data_g.add_arg("data_dir", str, "xnli", "Path to training data.") data_g.add_arg("vocab_path", str, "uncased_L-24_H-1024_A-16/vocab.txt", "Vocabulary path.") data_g.add_arg("max_seq_len", int, 32, "Number of words of the longest seqence.") data_g.add_arg( "batch_size", int, 5, "Total examples' number in batch for training. see also --in_tokens." ) data_g.add_arg( "in_tokens", bool, False, "If set, the batch size will be the maximum number of tokens in one batch. " "Otherwise, it will be the maximum number of examples in one batch." ) data_g.add_arg( "do_lower_case", bool, True, "Whether to lower case the input text. Should be True for uncased models and False for cased models." ) data_g.add_arg("random_seed", int, 0, "Random seed.") data_g.add_arg("shuffle_seed", int, 2, "Shuffle seed.") run_type_g = ArgumentGroup(parser, "run_type", "running type options.") run_type_g.add_arg( "use_fast_executor", bool, False, "If set, use fast parallel executor (in experiment).") run_type_g.add_arg("shuffle", bool, False, "") run_type_g.add_arg( "num_iteration_per_drop_scope", int, 1, "Ihe iteration intervals to clean up temporary variables.") run_type_g.add_arg( "task_name", str, "XNLI", "The name of task to perform fine-tuning, should be in {'xnli', 'mnli', 'cola', 'mrpc'}." ) run_type_g.add_arg("do_train", bool, True, "Whether to perform training.") args = parser.parse_args() return args
from paddle.fluid import profiler import reader.cls as reader from model.bert import BertConfig from model.classifier import create_model from optimization import optimization from utils.args import ArgumentGroup, print_arguments, check_cuda, check_version from utils.init import init_pretraining_params, init_checkpoint from utils.cards import get_cards import dist_utils num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("bert_config_path", str, None, "Path to the json file for bert model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg("warmup_proportion", float, 0.1, "Proportion of training steps to perform linear learning rate warmup for.")
# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from __future__ import absolute_import import os import time import argparse from utils.args import ArgumentGroup # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("model_path", str, None, "Pretrained ERNIE path.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("save_checkpoints", str, "checkpoints", "Path to save checkpoints.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg("warmup_proportion", float, 0.1, "Proportion of training steps to perform linear learning rate warmup for.") train_g.add_arg("use_fp16", bool, False, "Whether to use fp16 mixed precision training.") train_g.add_arg("use_dynamic_loss_scaling", bool, True, "Whether to use dynamic loss scaling.") train_g.add_arg("init_loss_scaling", float, 102400,
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """args for classification task""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import argparse from utils.args import ArgumentGroup # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") model_g.add_arg("save_checkpoints", bool, True, "Whether to save checkpoints") model_g.add_arg("weight_sharing", bool, True, "If set, share weights between word embedding and masked lm.") model_g.add_arg("unimo_vocab_file", str, './model_files/dict/unimo_en.vocab.txt', "unimo vocab") model_g.add_arg("encoder_json_file", str, './model_files/dict/unimo_en.encoder.json', 'bpt map') model_g.add_arg("vocab_bpe_file", str, './model_files/dict/unimo_en.vocab.bpe', "vocab bpe") model_g.add_arg("unimo_config_path", str, "./model_files/config/unimo_base_en.json", "The file to save unimo configuration.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
from model.bert import BertConfig, BertModel from model.layers import MemoryLayer, TriLinearTwoTimeSelfAttentionLayer from utils.args import ArgumentGroup, print_arguments from optimization import optimization from utils.init import init_pretraining_params, init_checkpoint logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logging.getLogger().setLevel(logging.INFO) logger = logging.getLogger(__name__) # yapf: disable parser = argparse.ArgumentParser() model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("bert_config_path", str, None, "Path to the json file for bert model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg("warmup_proportion", float, 0.1, "Proportion of training steps to perform linear learning rate warmup for.")
import argparse import numpy as np import multiprocessing import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import to_variable import reader.cls as reader from model.bert import BertConfig from model.cls import ClsModelLayer from optimization import Optimizer from utils.args import ArgumentGroup, print_arguments, check_cuda from utils.init import init_from_static_model # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("bert_config_path", str, "./config/bert_config.json", "Path to the json file for bert model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 100, "Number of epoches for training.") train_g.add_arg("learning_rate", float, 0.0001, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg("warmup_proportion", float, 0.1, "Proportion of training steps to perform linear learning rate warmup for.") train_g.add_arg("save_steps", int, 10000, "The steps interval to save checkpoints.")
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """All args for running all models""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import argparse from utils.args import ArgumentGroup # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("model_name", str, "seq2seq", "type of model to run", choices=["seq2seq", "graphsum", "roberta_graphsum", "ernie_graphsum", "ernie_seq2seq"]) model_g.add_arg("config_path", str, None, "Path to the json file for transformer seq2seq model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") model_g.add_arg("weight_sharing", bool, True, "If set, share weights between word embedding and masked lm.") run_type_g = ArgumentGroup(parser, "run_type", "running type options.") run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.") run_type_g.add_arg("is_distributed", bool, False, "If set, then start distributed training.") run_type_g.add_arg("use_fast_executor", bool, False, "If set, use fast parallel executor (in experiment).") run_type_g.add_arg("num_iteration_per_drop_scope", int, 10, "Iteration intervals to drop scope.")
# See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import time import argparse from utils.args import ArgumentGroup # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("ernie_config_path", str, None, "Path to the json file for ernie model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") model_g.add_arg("is_classify", bool, True, "is_classify") model_g.add_arg("is_regression", bool, False, "is_regression") model_g.add_arg("task_id", int, 0, "task id") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay",
import sys import subprocess import os import six import copy import argparse import time import logging from utils.args import ArgumentGroup, print_arguments, prepare_logger from pretrain_args import parser as worker_parser # yapf: disable parser = argparse.ArgumentParser(__doc__) multip_g = ArgumentGroup(parser, "multiprocessing", "start paddle training using multi-processing mode.") multip_g.add_arg("node_ips", str, None, "paddle trainer ips") multip_g.add_arg("node_id", int, 0, "the trainer id of the node for multi-node distributed training.") multip_g.add_arg("print_config", bool, True, "print the config of multi-processing mode.") multip_g.add_arg("current_node_ip", str, None, "the ip of current node.") multip_g.add_arg("split_log_path", str, "./log", "log path for each trainer.") multip_g.add_arg("log_prefix", str, "", "the prefix name of job log.") multip_g.add_arg("nproc_per_node", int, 8, "the number of process to use on each node.") multip_g.add_arg("selected_gpus", str, "0,1,2,3,4,5,6,7",
import argparse import numpy as np import multiprocessing import paddle import paddle.fluid as fluid from reader.pretraining import DataReader from model.bert import BertModel, BertConfig from optimization import optimization from utils.args import ArgumentGroup, print_arguments, check_cuda from utils.init import init_checkpoint, init_pretraining_params # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("bert_config_path", str, "./config/bert_config.json", "Path to the json file for bert model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") model_g.add_arg("weight_sharing", bool, True, "If set, share weights between word embedding and masked lm.") model_g.add_arg("generate_neg_sample", bool, True, "If set, randomly generate negtive samples by positive samples.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 100, "Number of epoches for training.") train_g.add_arg("learning_rate", float, 0.0001, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg("num_train_steps", int, 1000000, "Total steps to perform pretraining.") train_g.add_arg("warmup_steps", int, 4000, "Total steps to perform warmup when pretraining.") train_g.add_arg("save_steps", int, 10000, "The steps interval to save checkpoints.")
import os import argparse import numpy as np import multiprocessing import paddle.fluid as fluid import reader.task_reader as task_reader from model.ernie import ErnieConfig, ErnieModel from utils.args import ArgumentGroup, print_arguments from utils.init import init_pretraining_params # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("ernie_config_path", str, None, "Path to the json file for ernie model config.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("output_dir", str, "embeddings", "path to save embeddings extracted by ernie_encoder.") data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data processing options") data_g.add_arg("data_set", str, None, "Path to data for calculating ernie_embeddings.") data_g.add_arg("vocab_path", str, None, "Vocabulary path.") data_g.add_arg("max_seq_len", int, 512, "Number of words of the longest seqence.") data_g.add_arg("batch_size", int, 32, "Total examples' number in batch for training.") data_g.add_arg("do_lower_case", bool, True, "Whether to lower case the input text. Should be True for uncased models and False for cased models.") run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
"""finetune args""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from __future__ import absolute_import import os import time import argparse from utils.args import ArgumentGroup # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") run_type_g = ArgumentGroup(parser, "run_type", "running type options.") run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.") run_type_g.add_arg("num_workers", int, 1, "use multiprocess to generate graph") run_type_g.add_arg("output_path", str, None, "path to save model") run_type_g.add_arg("hidden_size", int, 128, "model hidden-size") run_type_g.add_arg("batch_size", int, 128, "batch_size")
# See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import time import argparse from utils.args import ArgumentGroup, print_arguments # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("ernie_config_path", str, "./config/ernie_config.json", "Path to the json file for ernie model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") model_g.add_arg("weight_sharing", bool, True, "If set, share weights between word embedding and masked lm.") model_g.add_arg("generate_neg_sample", bool, False, "If set, randomly generate negtive samples by positive samples.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 100, "Number of epoches for training.") train_g.add_arg("learning_rate", float, 0.0001, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg("num_train_steps", int, 1000000, "Total steps to perform pretraining.") train_g.add_arg("warmup_steps", int, 5000, "Total steps to perform warmup when pretraining.") train_g.add_arg("save_steps", int, 10000, "The steps interval to save checkpoints.")
"""finetune args""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from __future__ import absolute_import import os import time import argparse from utils.args import ArgumentGroup # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("ernie_config_path", str, None, "Path to the json file for ernie model config.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg("warmup_proportion", float, 0.1,
from evaluation import compute_kbc_metrics from evaluation import pathquery_batch_evaluation from evaluation import compute_pathquery_metrics from utils.args import ArgumentGroup, print_arguments from utils.init import init_pretraining_params, init_checkpoint logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logging.getLogger().setLevel(logging.INFO) logger = logging.getLogger(__name__) # yapf: disable parser = argparse.ArgumentParser() model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g.add_arg("hidden_size", int, 256, "CoKE model config: hidden size, default 256") model_g.add_arg("num_hidden_layers", int, 6, "CoKE model config: num_hidden_layers, default 6") model_g.add_arg("num_attention_heads", int, 4, "CoKE model config: num_attention_heads, default 4") model_g.add_arg("vocab_size", int, -1, "CoKE model config: vocab_size") model_g.add_arg("num_relations", int, None, "CoKE model config: vocab_size") model_g.add_arg("max_position_embeddings", int, 10, "CoKE model config: max_position_embeddings") model_g.add_arg("hidden_act", str, "gelu", "CoKE model config: hidden_ac, default gelu") model_g.add_arg("hidden_dropout_prob", float, 0.1, "CoKE model config: attention_probs_dropout_prob, default 0.1") model_g.add_arg("attention_probs_dropout_prob", float, 0.1, "CoKE model config: attention_probs_dropout_prob, default 0.1") model_g.add_arg("initializer_range", int, 0.02, "CoKE model config: initializer_range") model_g.add_arg("intermediate_size", int, 512, "CoKE model config: intermediate_size, default 512") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from, or for prediction only") model_g.add_arg("init_pretraining_params", str, None, "Init pre-training params which preforms fine-tuning from. If the " "arg 'init_checkpoint' has been set, this argument wouldn't be valid.")