def simple(): tf_logging.setLevel(logging.INFO) out_path = os.path.join(working_path, "dict_reader3") exist_or_mkdir(out_path) worker = DGenWorker(out_path) worker.gen.f_hide_word = False worker.work(1)
def main(): tf_logging2 = logging.getLogger('tensorflow') tf_logging.setLevel(logging.INFO) ab_logging.info("This is ab logging") tf_logging.info("This is TF log") tf_logging2.info("TFLog 2")
def generate_test(): tf_logging.setLevel(logging.INFO) out_path = os.path.join(working_path, "dict_reader2_test_dict") worker = DGenWorker(out_path) worker.gen.drop_none_dict = True worker.work(1) out_path = os.path.join(working_path, "dict_reader2_test_no_dict") worker = DGenWorker(out_path) worker.gen.no_dict_assist = True worker.work(1)
def dev_fn(): tf.compat.v1.disable_eager_execution() if FLAGS.task_completion_mark: if os.path.exists(FLAGS.task_completion_mark): tf_logging.warn("Task already completed") tf_logging.setLevel(logging.INFO) if FLAGS.log_debug: tf_logging.setLevel(logging.DEBUG) seq_max = 200 data_loader = nli.DataLoader(seq_max, "bert_voca.txt", True) is_training = FLAGS.do_train init_fn = get_checkpoint_init_fn() model_name = FLAGS.modeling if FLAGS.modeling == NAME_DUMMY_WSSDR: tf_logging.info("Using dummy WSSDR") model_name = "wssdr" model = get_model(seq_max, model_name, is_training) model_config = JsonConfig.from_json_file(FLAGS.model_config_file) # assert that the attribute exists. ( and it should be 0 or positive) assert model_config.lookup_threshold >= 0 assert model_config.lookup_min_step >= 0 assert model_config.lookup_train_frequency > 0 augment_data_loader = DictAugmentedDataLoader(FLAGS.modeling, data_loader, FLAGS.use_cache) model_path, run_name = get_model_path(FLAGS.output_dir) if FLAGS.do_train: saved_model = train_nli_w_dict(run_name, model, model_path, model_config, augment_data_loader, init_fn) if FLAGS.task_completion_mark: f = open(FLAGS.task_completion_mark, "w") f.write("Done") f.close() tf.compat.v1.reset_default_graph() eval_nli_w_dict(run_name, model, saved_model, augment_data_loader) elif FLAGS.do_eval: eval_nli_w_dict_lookup(run_name, model, model_path, augment_data_loader) else: demo_nli_w_dict(run_name, model, model_path, augment_data_loader)
def main(_): tf_logging.setLevel(logging.INFO) if FLAGS.log_debug: tf_logging.setLevel(logging.DEBUG) config = JsonConfig.from_json_file(FLAGS.model_config_file) train_config = TrainConfigEx.from_flags(FLAGS) is_training = FLAGS.do_train input_fn = input_fn_builder(get_input_files_from_flags(FLAGS), FLAGS, is_training) model_fn = mask_lm_as_seq2seq(config, train_config) run_estimator(model_fn, input_fn)
def main(_): tf_logging.setLevel(logging.INFO) if FLAGS.log_debug: tf_logging.setLevel(logging.DEBUG) if FLAGS.dbert_config_file: FLAGS.model_config_file = FLAGS.dbert_config_file tf.io.gfile.makedirs(FLAGS.output_dir) input_files = [] for input_pattern in FLAGS.input_file.split(","): input_files.extend(tf.io.gfile.glob(input_pattern)) lm_pretrain(input_files) tf_logging.info("Now terminating process")
def run_estimator_loop(model_fn, input_fn_list, output_name_list): tf_logging.setLevel(logging.INFO) if FLAGS.log_debug: tf_logging.setLevel(logging.DEBUG) tf.io.gfile.makedirs(FLAGS.output_dir) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) config = tf.compat.v1.ConfigProto(allow_soft_placement=False, ) is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.compat.v1.estimator.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, session_config=config, tpu_config=tf.compat.v1.estimator.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.compat.v1.estimator.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.eval_batch_size, ) if FLAGS.do_predict: tf_logging.info(" Batch size = %d", FLAGS.eval_batch_size) for input_fn, output_name in zip(input_fn_list, output_name_list): tf_logging.info("Predicting for %s", output_name) result = estimator.predict(input_fn=input_fn, yield_single_examples=False) pickle.dump(list(result), open(output_name, "wb")) else: raise Exception("Only predict expected")
def main(_): tf_logging.setLevel(logging.INFO) if FLAGS.log_debug: tf_logging.setLevel(logging.DEBUG) tf_logging.filters[0].excludes.extend() tf.io.gfile.makedirs(FLAGS.output_dir) tf_logging.info("Predict Runner") file_prefix = FLAGS.input_file step_size = 100 for st in range(FLAGS.predict_begin, FLAGS.predict_end, step_size): tf_logging.info("Starting {}".format(st)) input_files = [] ed = st + step_size FLAGS.out_file = FLAGS.out_file.format(st, ed) for i in range(st, ed): input_files.append(file_prefix + "{}".format(i)) lm_pretrain(input_files)
def run_estimator(model_fn, input_fn, host_call=None): tf_logging.setLevel(logging.INFO) if FLAGS.log_debug: tf_logging.setLevel(logging.DEBUG) #FLAGS.init_checkpoint = auto_resolve_init_checkpoint(FLAGS.init_checkpoint) tf.io.gfile.makedirs(FLAGS.output_dir) if FLAGS.do_predict: tf_logging.addFilter(CounterFilter()) tpu_cluster_resolver = None config = tf.compat.v1.ConfigProto(allow_soft_placement=False, ) is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.compat.v1.estimator.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, keep_checkpoint_max=FLAGS.keep_checkpoint_max, session_config=config, tf_random_seed=FLAGS.random_seed, ) if FLAGS.random_seed is not None: tf_logging.info("Using random seed : {}".format(FLAGS.random_seed)) tf.random.set_seed(FLAGS.random_seed) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.compat.v1.estimator.Estimator(model_fn=model_fn, config=run_config, params={'batch_size': 16}) if FLAGS.do_train: tf_logging.info("***** Running training *****") tf_logging.info(" Batch size = %d", FLAGS.train_batch_size) estimator.train(input_fn=input_fn, max_steps=FLAGS.num_train_steps)
def run_estimator(model_fn, input_fn, host_call=None): tf_logging.setLevel(logging.INFO) if FLAGS.log_debug: tf_logging.setLevel(logging.DEBUG) #FLAGS.init_checkpoint = auto_resolve_init_checkpoint(FLAGS.init_checkpoint) tf.io.gfile.makedirs(FLAGS.output_dir) if FLAGS.do_predict: tf_logging.addFilter(CounterFilter()) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) print("FLAGS.save_checkpoints_steps", FLAGS.save_checkpoints_steps) config = tf.compat.v1.ConfigProto(allow_soft_placement=False, ) is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.compat.v1.estimator.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, keep_checkpoint_max=FLAGS.keep_checkpoint_max, session_config=config, tf_random_seed=FLAGS.random_seed, tpu_config=tf.compat.v1.estimator.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) if FLAGS.random_seed is not None: tf_logging.info("Using random seed : {}".format(FLAGS.random_seed)) tf.random.set_seed(FLAGS.random_seed) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.compat.v1.estimator.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.eval_batch_size, ) if FLAGS.do_train: tf_logging.info("***** Running training *****") tf_logging.info(" Batch size = %d", FLAGS.train_batch_size) estimator.train(input_fn=input_fn, max_steps=FLAGS.num_train_steps) if FLAGS.do_eval: tf_logging.info("***** Running evaluation *****") tf_logging.info(" Batch size = %d", FLAGS.eval_batch_size) if FLAGS.initialize_to_predict: checkpoint = FLAGS.init_checkpoint else: checkpoint = None result = estimator.evaluate(input_fn=input_fn, steps=FLAGS.max_eval_steps, checkpoint_path=checkpoint) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.io.gfile.GFile(output_eval_file, "w") as writer: tf_logging.info("***** Eval results *****") for key in sorted(result.keys()): tf_logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) return result if FLAGS.do_predict: tf_logging.info("***** Running prediction *****") tf_logging.info(" Batch size = %d", FLAGS.eval_batch_size) if not FLAGS.initialize_to_predict: verify_checkpoint(estimator.model_dir) checkpoint = None time.sleep(1) else: checkpoint = FLAGS.init_checkpoint result = estimator.predict(input_fn=input_fn, checkpoint_path=checkpoint, yield_single_examples=False) pickle.dump(list(result), open(FLAGS.out_file, "wb")) tf_logging.info("Prediction saved at {}".format(FLAGS.out_file))
loss1 = vectors["grouped_loss1"][i][t_i][p_i] loss2 = vectors["grouped_loss2"][i][t_i][p_i] loss1_arr[loc] = loss1 loss2_arr[loc] = loss2 assert mask_valid[loc] == 0 mask_valid[loc] = 1 features = collections.OrderedDict() for key in basic_keys: features[key] = create_int_feature(vectors[key][i]) features["loss_valid"] = create_int_feature(mask_valid) features["loss1"] = create_float_feature(loss1_arr) features["loss2"] = create_float_feature(loss2_arr) features["next_sentence_labels"] = create_int_feature([0]) writer.write_feature(features) #if i < 20: # log_print_feature(features) writer.close() return "Done" if __name__ == "__main__": tf_logging.setLevel(logging.INFO) st = int(sys.argv[1]) ed = int(sys.argv[2]) for i in range(st, ed): ret = work(i) print("Job {} {}".format(i, ret))
output1.masked_lm_positions[i]) features = get_segment_and_mask_inner(output1.input_ids[i], sep_indice) except: tokens = tokenzier.convert_ids_to_tokens(output1.input_ids[i]) print(tokenization.pretty_tokens(tokens)) print(output1.masked_lm_ids[i]) print(output1.masked_lm_positions[i]) raise features["next_sentence_labels"] = create_int_feature([0]) features["masked_lm_positions"] = create_int_feature( output1.masked_lm_positions[i]) features["masked_lm_ids"] = create_int_feature( output1.masked_lm_ids[i]) features["masked_lm_weights"] = create_float_feature( output1.masked_lm_weights[i]) features["loss_base"] = create_float_feature( output1.masked_lm_example_loss[i]) features["loss_target"] = create_float_feature( output2.masked_lm_example_loss[i]) record_writer.write_feature(features) ticker.tick() record_writer.close() if __name__ == '__main__': tf_logging.setLevel(ab_logging.DEBUG) do(sys.argv[1])
def simple(): tf_logging.setLevel(logging.INFO) worker = init_worker() worker.work(int(sys.argv[1]))