def run_finetuning(config: configure_finetuning.FinetuningConfig): """Run finetuning.""" hvd.init() config.model_dir = config.model_dir if hvd.rank() == 0 else \ os.path.join(config.model_dir, str(hvd.rank())) config.train_batch_size = config.train_batch_size // hvd.size() # Setup for training results = [] trial = 1 heading_info = "model={:}, trial {:}/{:}".format( config.model_name, trial, config.num_trials) heading = lambda msg: utils.heading(msg + ": " + heading_info) heading("Config") utils.log_config(config) generic_model_dir = config.model_dir tasks = task_builder.get_tasks(config) # Train and evaluate num_trials models with different random seeds while config.num_trials < 0 or trial <= config.num_trials: config.model_dir = generic_model_dir + "_" + str(trial) if config.do_train: utils.rmkdir(config.model_dir) model_runner = ModelRunner(config, tasks, hvd) if config.do_train: heading("Start training") model_runner.train() utils.log() if config.do_eval: heading("Run dev set evaluation") results.append(model_runner.evaluate()) write_results(config, results) if config.write_test_outputs and trial <= config.n_writes_test: heading("Running on the test set and writing the predictions") for task in tasks: # Currently only writing preds for GLUE and SQuAD 2.0 is supported if task.name in ["cola", "mrpc", "mnli", "sst", "rte", "qnli", "qqp", "sts"]: for split in task.get_test_splits(): model_runner.write_classification_outputs([task], trial, split) elif task.name == "squad": scorer = model_runner.evaluate_task(task, "test", False) scorer.write_predictions() preds = utils.load_json(config.qa_preds_file("squad")) null_odds = utils.load_json(config.qa_na_file("squad")) for q, _ in preds.items(): if null_odds[q] > config.qa_na_threshold: preds[q] = "" utils.write_json(preds, config.test_predictions( task.name, "test", trial)) else: utils.log("Skipping task", task.name, "- writing predictions is not supported for this task") if trial != config.num_trials and (not config.keep_all_models): utils.rmrf(config.model_dir) trial += 1
def run_finetuning(config: configure_finetuning.FinetuningConfig): """Run finetuning.""" os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu) # Setup for training results = [] trial = 1 heading_info = "model={:}, trial {:}/{:}".format(config.model_name, trial, config.num_trials) heading = lambda msg: utils.heading(msg + ": " + heading_info) heading("Config") utils.log_config(config) generic_model_dir = config.model_dir tasks = task_builder.get_tasks(config) # Train and evaluate num_trials models with different random seeds while config.num_trials < 0 or trial <= config.num_trials: config.model_dir = generic_model_dir + "_" + str(trial) if config.do_train: utils.rmkdir(config.model_dir) model_runner = ModelRunner(config, tasks) if config.do_train: heading("Start training") model_runner.train() utils.log() if config.do_eval: heading("Run dev set evaluation") model_runner.evaluate() # results.append(model_runner.evaluate()) # write_results(config, results) # if config.write_test_outputs and trial <= config.n_writes_test: # heading("Running on the test set and writing the predictions") # for task in tasks: # # Currently only writing preds for GLUE and SQuAD 2.0 is supported # if task.name in ["cola", "mrpc", "mnli", "sst", "rte", "qnli", "qqp","sts","conv"]: # for split in task.get_test_splits(): # model_runner.write_classification_outputs([task], trial, split) # elif task.name == "squad": # scorer = model_runner.evaluate_task(task, "test", False) # scorer.write_predictions() # preds = utils.load_json(config.qa_preds_file("squad")) # null_odds = utils.load_json(config.qa_na_file("squad")) # for q, _ in preds.items(): # if null_odds[q] > config.qa_na_threshold: # preds[q] = "" # utils.write_json(preds, config.test_predictions( # task.name, "test", trial)) # else: # utils.log("Skipping task", task.name, # "- writing predictions is not supported for this task") if trial != config.num_trials and (not config.keep_all_models): utils.rmrf(config.model_dir) trial += 1
def predict(config: configure_finetuning.FinetuningConfig): """using a trained model for task name, do predictions for test data set and save the results """ no_trials = config.num_trials if config.num_trials > 0 else 1 print(no_trials) generic_model_dir = config.model_dir for trial in range(1, no_trials + 1): utils.log_config(config) tasks = task_builder.get_tasks(config) config.model_dir = generic_model_dir + "_" + str(trial) print("config.model_dir:{}".format(config.model_dir)) model_runner = ModelRunner(config, tasks) utils.heading("Running on the test set and writing the predictions") for task in tasks: if task.name in [ "cola", "mrpc", "mnli", "sst", "rte", "qnli", "qqp", "sts", "yesno", "reranker", "weighted-reranker", "gad", "chemprot" ]: for split in task.get_test_splits(): model_runner.write_classification_outputs([task], trial, split)
def create_obj_fun(config: configure_finetuning.FinetuningConfig): generic_model_dir = config.model_dir tasks = task_builder.get_tasks(config) def objective(params): num_epochs = params['num_epochs'] lr = params['lr'] batch_size = params['batch_size'] config.num_train_epochs = num_epochs config.learning_rate = lr config.train_batch_size = batch_size suffix = "{}_{:.6}_{}".format(num_epochs, lr, batch_size) config.model_dir = generic_model_dir + "_opt_" + suffix utils.rmkdir(config.model_dir) model_runner = ModelRunner(config, tasks) utils.heading("Start training " + suffix) model_runner.train() utils.log() utils.heading("Run dev set evaluation " + suffix) result = list(model_runner.evaluate().values())[0] return {'loss': -result['f1'], 'status': STATUS_OK} return objective
def construct_model(): tf.compat.v1.disable_eager_execution() batch_size = 1 iterations = 10 tf_datatype = tf.int32 np_datatype = np.int32 graph_features = {} graph_features["input_ids"] = tf.placeholder(dtype=tf_datatype, shape=[batch_size, 128], name="input_ids") graph_features["input_mask"] = tf.placeholder(dtype=tf_datatype, shape=[batch_size, 128], name="input_mask") graph_features["segment_ids"] = tf.placeholder(dtype=tf_datatype, shape=[batch_size, 128], name="token_type_ids") #graph_features["task_id"] = tf.placeholder(dtype=tf_datatype, shape=(batch_size,), name="task_id") #graph_features["cola_label_ids"] = tf.placeholder(dtype=tf_datatype, shape=(batch_size,), name="cola_label_ids") #graph_features["cola_eid"] = tf.placeholder(dtype=tf_datatype, shape=(batch_size,), name="cola_eid") features = {} features["input_ids"] = np.random.rand(batch_size, 128).astype(np_datatype) features["input_mask"] = np.random.rand(batch_size, 128).astype(np_datatype) features["segment_ids"] = np.random.rand(batch_size, 128).astype(np_datatype) #features["task_id"] = np.random.rand(batch_size).astype(np_datatype) #features["cola_label_ids"] = np.random.rand(batch_size).astype(np_datatype) #features["cola_eid"] = np.random.rand(batch_size).astype(np_datatype) features_feed_dict = { graph_features[key]: features[key] for key in graph_features } print(features_feed_dict) features_use = graph_features features_use["task_id"] = tf.constant(0, dtype=tf_datatype, shape=(batch_size, ), name="task_id") features_use["cola_label_ids"] = tf.constant(0, dtype=tf_datatype, shape=(batch_size, ), name="cola_label_ids") features_use["cola_eid"] = tf.constant(0, dtype=tf_datatype, shape=(batch_size, ), name="cola_eid") param = {"model_size": "medium-small", "task_names": ["cola"]} config = configure_finetuning.FinetuningConfig( model_name="convbert_medium-small", data_dir="./", **param) task = task_builder.get_tasks(config) print("Getting tasks:".format(task)) is_training = False nums_steps = 0 model = FinetuningModel(config, task, is_training, features_use, nums_steps) outputs = model.outputs out_dict = {} for tks in task: out_dict = outputs[tks.name] output_names = [] for key in out_dict: output_names.append(out_dict[key].name) print(out_dict[key].name) run_op_list = [] outputs_names_with_port = output_names outputs_names_without_port = [ name.split(":")[0] for name in outputs_names_with_port ] for index in range(len(outputs_names_without_port)): run_op_list.append(outputs_names_without_port[index]) print(run_op_list) inputs_names_with_port = [ graph_features[key].name for key in graph_features ] cfg = tf.ConfigProto() cfg.gpu_options.allow_growth = True with tf.Session(config=cfg) as sess: sess.run(tf.global_variables_initializer()) for i in range(iterations): sess.run(run_op_list, feed_dict=features_feed_dict) tf_time_sum = 0 a = datetime.now() for i in range(iterations): tf_result = sess.run(run_op_list, feed_dict=features_feed_dict) b = datetime.now() tf_time_sum = (b - a).total_seconds() tf_time = "[INFO] TF execution time: " + str( tf_time_sum * 1000 / iterations) + " ms" print(tf_time) frozen_graph = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, outputs_names_without_port) # frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph) # save frozen model with open("ConvBert.pb", "wb") as ofile: ofile.write(frozen_graph.SerializeToString()) exit(0) inputs_names_with_port.remove("task_id:0") onnx_model_file = "ConvBert.onnx" command = "python3 -m tf2onnx.convert --input ConvBert.pb --output %s --fold_const --opset 12 --verbose" % onnx_model_file command += " --inputs " for name in inputs_names_with_port: command += "%s," % name command = command[:-1] + " --outputs " for name in outputs_names_with_port: command += "%s," % name command = command[:-1] os.system(command) print(command) #exit(0) #do not convert now, it needs to modify onehot layer #command = "trtexec --onnx=ConvBert.onnx --verbose" #os.system(command) print(command)
def run_finetuning(config: configure_finetuning.FinetuningConfig): """Run finetuning.""" # Setup for training results = [] trial = 1 heading_info = "model={:}, trial {:}/{:}".format(config.model_name, trial, config.num_trials) heading = lambda msg: utils.heading(msg + ": " + heading_info) heading("Config") utils.log_config(config) generic_model_dir = config.model_dir tasks = task_builder.get_tasks(config) # Train and evaluate num_trials models with different random seeds while config.num_trials < 0 or trial <= config.num_trials: config.model_dir = generic_model_dir + "_" + str(trial) if config.do_train: utils.rmkdir(config.model_dir) model_runner = ModelRunner(config, tasks) if config.do_train: heading("Start training") model_runner.train() utils.log() if config.do_eval: heading("Run dev set evaluation") results.append(model_runner.evaluate()) if config.do_test: for task in tasks: test_score = model_runner.evaluate_task_test( task, results[-1][task.name]['checkpoint_path']) results[-1][task.name]["test_results"] = test_score write_results(config, results) if config.write_test_outputs and trial <= config.n_writes_test: heading("Running on the test set and writing the predictions") for task in tasks: # Currently only writing preds for GLUE and SQuAD 2.0 is supported if task.name in [ "cola", "mrpc", "mnli", "sst", "rte", "qnli", "qqp", "sts" ]: for split in task.get_test_splits(): model_runner.write_classification_outputs([task], trial, split) elif task.name == "squad": scorer = model_runner.evaluate_task( task, "test", False) scorer.write_predictions() preds = utils.load_json(config.qa_preds_file("squad")) null_odds = utils.load_json(config.qa_na_file("squad")) for q, _ in preds.items(): if null_odds[q] > config.qa_na_threshold: preds[q] = "" utils.write_json( preds, config.test_predictions(task.name, "test", trial)) else: utils.log( "Skipping task", task.name, "- writing predictions is not supported for this task" ) if config.do_predict: if "dev" in config.predict_split: results = model_runner.predict(tasks[0], config.predict_checkpoint_path, "dev") import pickle with open("predict_dev.pickle", "bw") as outfile: pickle.dump(results, outfile) if "train" in config.predict_split: results = model_runner.predict(tasks[0], config.predict_checkpoint_path, "train") import pickle with open("predict_train.pickle", "bw") as outfile: pickle.dump(results, outfile) if "test" in config.predict_split: results = model_runner.predict(tasks[0], config.predict_checkpoint_path, "test") import pickle with open("predict_test.pickle", "bw") as outfile: pickle.dump(results, outfile) if trial != config.num_trials and (not config.keep_all_models): utils.rmrf(config.model_dir) trial += 1
def run_finetuning(config: configure_finetuning.FinetuningConfig): """Run finetuning.""" tf.get_variable_scope().reuse_variables() #import pdb; pdb.set_trace() # Setup for training results = [] trial = 1 heading_info = "model={:}, trial {:}/{:}".format( config.model_name, trial, config.num_trials) heading = lambda msg: utils.heading(msg + ": " + heading_info) heading("Config") utils.log_config(config) generic_model_dir = config.model_dir tasks = task_builder.get_tasks(config) # Train and evaluate num_trials models with different random seeds while config.num_trials < 0 or trial <= config.num_trials: config.model_dir = generic_model_dir + "_" + str(trial) if config.do_train: utils.rmkdir(config.model_dir) model_runner = ModelRunner(config, tasks) if config.do_train: heading("Start training") model_runner.train() utils.log() if config.do_eval: heading("Run dev set evaluation") results.append(model_runner.evaluate()) write_results(config, results) if config.write_test_outputs and trial <= config.n_writes_test: heading("Running on the test set and writing the predictions") for task in tasks: # Currently only writing preds for GLUE and SQuAD 2.0 is supported if task.name in ["cola", "mrpc", "mnli", "sst", "rte", "qnli", "qqp", "sts"]: for split in task.get_test_splits(): model_runner.write_classification_outputs([task], trial, split) elif task.name == "squad": scorer = model_runner.evaluate_task(task, "test", False) scorer.write_predictions() preds = utils.load_json(config.qa_preds_file("squad")) null_odds = utils.load_json(config.qa_na_file("squad")) for q, _ in preds.items(): if null_odds[q] > config.qa_na_threshold: preds[q] = "" utils.write_json(preds, config.test_predictions( task.name, "test", trial)) else: utils.log("Skipping task", task.name, "- writing predictions is not supported for this task") if trial != config.num_trials and (not config.keep_all_models): utils.rmrf(config.model_dir) trial += 1 # exporting the model if config.export_dir: # with tf.variable_scope(tf.get_variable_scope(), reuse=True): # model_runner = ModelRunner(config, tasks) # tf.gfile.MakeDirs(config.export_dir) # checkpoint_path = os.path.join(config.init_checkpoint, "model.ckpt-6315") # squad_serving_input_fn = ( # build_squad_serving_input_fn(config.max_seq_length)) # utils.log("Starting to export model.") # subfolder = model_runner._estimator.export_saved_model( # export_dir_base=os.path.join(config.export_dir, "saved_model"), # serving_input_receiver_fn=squad_serving_input_fn) tf.get_variable_scope().reuse_variables() model_runner = ModelRunner(config, tasks) tf.gfile.MakeDirs(config.export_dir) checkpoint_path = os.path.join(config.init_checkpoint, "model.ckpt-6315") squad_serving_input_fn = ( build_squad_serving_input_fn(config.max_seq_length)) utils.log("Starting to export model.") subfolder = model_runner._estimator.export_saved_model( export_dir_base=os.path.join(config.export_dir, "saved_model"), serving_input_receiver_fn=squad_serving_input_fn)