def Test(): # Default TF1.x uses reference variables that are not supported by SavedModel # v1 Importer. To use SavedModel V1 Importer, resource variables should be # enabled. tf.enable_resource_variables() tf.compat.v1.disable_eager_execution() x = tf.constant([[1.0], [1.0], [1.0]]) y = tf.get_variable(name='y', shape=(1, 3), initializer=tf.random_normal_initializer(), trainable=True) r = tf.matmul(x, y) tensor_info_x = tf.saved_model.utils.build_tensor_info(x) tensor_info_r = tf.saved_model.utils.build_tensor_info(r) signature_def = tf.saved_model.signature_def_utils.build_signature_def( inputs={'x': tensor_info_x}, outputs={'r': tensor_info_r}, method_name=tf.saved_model.PREDICT_METHOD_NAME) # Create two signatures that share the same variable. return {'basic': signature_def, 'basic_2': signature_def}
def export_saved_model(self, **kwargs): tf.enable_resource_variables() driver = inference.ServingDriver(self.model_name, self.ckpt_path, self.image_size) driver.build(min_score_thresh=kwargs.get('min_score_thresh', 0.2), max_boxes_to_draw=kwargs.get('max_boxes_to_draw', 50)) driver.export(self.saved_model_dir)
def main(_): logging.set_verbosity(logging.INFO) tf.enable_resource_variables() runner = abps_runners.EvalRunner( root_dir=FLAGS.root_dir, env_name=suite_atari.game(name=FLAGS.game_name), **get_run_args()) runner.run()
def main(_): logging.set_verbosity(logging.INFO) tf.enable_resource_variables() if FLAGS.select_policy_way == 'independent': runner = baseline_runners.EvalRunner( root_dir=FLAGS.root_dir, env_name=suite_atari.game(name=FLAGS.game_name), **get_run_args()) runner.run()
def main(_): tf.disable_eager_execution() logging.set_verbosity(logging.INFO) tf.enable_resource_variables() runner = abps_runners.TrainRunner( root_dir=FLAGS.root_dir, env_name=suite_atari.game(name=FLAGS.game_name), **get_run_args()) runner.run()
def export_saved_model(self, **kwargs): """Export a saved model for inference.""" tf.enable_resource_variables() driver = inference.ServingDriver(self.model_name, self.ckpt_path, enable_ema=self.enable_ema) driver.build(params_override=self.model_overrides, min_score_thresh=kwargs.get('min_score_thresh', 0.2), max_boxes_to_draw=kwargs.get('max_boxes_to_draw', 50)) driver.export(self.saved_model_dir)
def export_saved_model(self, **kwargs): """Export a saved model for inference.""" tf.enable_resource_variables() driver = inference.ServingDriver(self.model_name, self.ckpt_path, enable_ema=self.enable_ema, use_xla=self.use_xla, data_format=self.data_format, **kwargs) driver.build(params_override=self.model_overrides) driver.export(self.saved_model_dir)
def main(argv, tuner=None): """Main function.""" assert argv is None or len(argv) == 1, ( 'This program expects no non-option arguments. Got {}.'.format(argv)) tf.enable_resource_variables() lamb_flags.initialize() if FLAGS.use_old_linear_names: utils._BIAS_VARIABLE_NAME = 'biases' # pylint: disable=protected-access utils._WEIGHTS_VARIABLE_NAME = 'weights' # pylint: disable=protected-access # Set seeds. The tensorflow seed is set later. random.seed(FLAGS.seed) np.random.seed(FLAGS.seed) # Load the files. assert FLAGS.training_file, 'No training file specified.' training_file_data = read_corpus(FLAGS.training_file) if FLAGS.test_file and FLAGS.eval_on_test: test_file_data = read_corpus(FLAGS.test_file) else: test_file_data = corpus.Corpus(data=[]) # Let's assemble the 'folds': training and eval set combinations, # plus the vocabulary. folds = [] def add_fold(training_corpus, eval_corpus, test_corpus): fold = _make_fold(training_corpus, eval_corpus, test_corpus) logging.info('number of examples in fold %d', len(folds)) logging.info(' training: %d', fold[0]['training'].size()) logging.info(' valid: %d', fold[0]['valid'].size()) logging.info(' test: %d', fold[0]['test'].size()) folds.append(fold) if FLAGS.crossvalidate: logging.info('Doing cross-validation.') assert FLAGS.validation_file == '' # pylint: disable=g-explicit-bool-comparison for _ in six.moves.range(FLAGS.crossvalidation_rounds): for training_set, validation_set in utils.cv_splits( training_file_data.data(), FLAGS.crossvalidation_folds): add_fold(corpus.Corpus(data=training_set), corpus.Corpus(data=validation_set), test_file_data) else: logging.info('Using dedicated eval data.') assert FLAGS.validation_file, 'No eval file specified.' validation_file_data = read_corpus(FLAGS.validation_file) add_fold(training_file_data, validation_file_data, test_file_data) experiment = Experiment(lamb_flags.get_config(), FLAGS.experiment_dir, tuner) experiment.run_training(folds=folds)
def main(_): logging.set_verbosity(logging.INFO) tf.enable_v2_behavior() tf.enable_resource_variables() tf.enable_control_flow_v2() logging.info('Executing eagerly: %s', tf.executing_eagerly()) logging.info('parsing config files: %s', FLAGS.gin_file) gin.parse_config_files_and_bindings( FLAGS.gin_file, FLAGS.gin_bindings, skip_unknown=True) trainer.train(root_dir, eval_metrics_callback=metrics_callback)
def export_saved_model(self, **kwargs): """Export a saved model for inference.""" tf.enable_resource_variables() driver = inference.ServingDriver( self.model_name, self.ckpt_path, batch_size=self.batch_size, use_xla=self.use_xla, model_params=self.model_config.as_dict(), **kwargs) driver.build() driver.export(self.saved_model_dir, self.tflite_path, self.tensorrt)
def main(_): tf.disable_v2_behavior() tf.enable_resource_variables() tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) if FLAGS.use_hpu: if FLAGS.use_bf16: if not is_workaround_enabled('FORCE_FP32'): os.environ['TF_BF16_CONVERSION'] = FLAGS.bf16_config_path else: print("Warning! BF16 precision is not supported in inference mode. Switching back to fp32...") if is_workaround_enabled('DISABLE_DYNAMIC_SHAPES'): os.environ['TF_ENABLE_DYNAMIC_SHAPES'] = 'false' from habana_frameworks.tensorflow import load_habana_module load_habana_module() prepare_recipe_cache() if FLAGS.score_file: filename = os.path.expanduser(FLAGS.score_file) if not tf.gfile.Exists(filename): raise ValueError("The file to score doesn't exist: %s" % filename) results = score_file(filename) if not FLAGS.decode_to_file: raise ValueError("To score a file, specify --decode_to_file for results.") write_file = tf.gfile.Open(os.path.expanduser(FLAGS.decode_to_file), "w") for score in results: write_file.write("%.6f\n" % score) write_file.close() return hp = create_hparams() hp.add_hparam("use_hpu", FLAGS.use_hpu) decode_hp = create_decode_hparams() run_config = trainer.create_run_config(hp) if FLAGS.disable_grappler_optimizations: run_config.session_config.graph_options.rewrite_options.disable_meta_optimizer = True # summary-hook in tf.estimator.EstimatorSpec requires # hparams.model_dir to be set. hp.add_hparam("model_dir", run_config.model_dir) estimator = trainer_lib.create_estimator( FLAGS.model, hp, run_config, decode_hparams=decode_hp, use_tpu=FLAGS.use_tpu) decode(estimator, hp, decode_hp)
def main(argv): del argv tf.enable_resource_variables() tf.disable_eager_execution() params = PARAMETERS[FLAGS.model] learned_model = core_model.EncodeProcessDecode(output_size=params['size'], latent_size=128, num_layers=2, message_passing_steps=15) model = params['model'].Model(learned_model) if FLAGS.mode == 'train': learner(model, params) elif FLAGS.mode == 'eval': evaluator(model, params)
def main(_): logging.set_verbosity(logging.INFO) tf.enable_resource_variables() if FLAGS.select_policy_way == 'independent': # runner = abps_runners.TrainIndependRunner( # root_dir=FLAGS.root_dir, # env_name=suite_atari.game(name=FLAGS.game_name), # **get_run_args()) runner = baseline_runners.PBTRunner( root_dir=FLAGS.root_dir, env_name=suite_atari.game(name=FLAGS.game_name), **get_run_args()) elif FLAGS.select_policy_way == 'controller': runner = baseline_runners.PBTController( root_dir=FLAGS.root_dir, env_name=suite_atari.game(name=FLAGS.game_name), **get_run_args()) runner.run()
def main(argv): del argv # Unused. # If using update_damping_immediately resource variables must be enabled. # (Although they probably will be by default on TPUs.) if FLAGS.update_damping_immediately: tf.enable_resource_variables() tf.set_random_seed(FLAGS.seed) # Invert using cholesky decomposition + triangular solve. This is the only # code path for matrix inversion supported on TPU right now. kfac.utils.set_global_constants(posdef_inv_method='cholesky') kfac.fisher_factors.set_global_constants( eigenvalue_decomposition_threshold=10000) if not FLAGS.use_sua_approx: if FLAGS.use_custom_patches_op: kfac.fisher_factors.set_global_constants( use_patches_second_moment_op=True) else: # Temporary measure to save memory with giant batches: kfac.fisher_factors.set_global_constants( sub_sample_inputs=True, inputs_to_extract_patches_factor=0.1) config = make_tpu_run_config(FLAGS.master, FLAGS.seed, FLAGS.model_dir, FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps) estimator = contrib_tpu.TPUEstimator(use_tpu=True, model_fn=_model_fn, config=config, train_batch_size=FLAGS.batch_size, eval_batch_size=1024) estimator.train(input_fn=mnist_input_fn, max_steps=FLAGS.train_steps, hooks=[])
from __future__ import division, print_function import random import scipy import scipy.io import numpy as np import tensorflow.compat.v1 as tf import Environment_marl_test import os from replay_memory import ReplayMemory import sys tf.enable_resource_variables() tf.disable_eager_execution() os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices' my_config = tf.ConfigProto() my_config.gpu_options.allow_growth = True class Agent(object): def __init__(self, memory_entry_size): self.discount = 1 self.double_q = True self.memory_entry_size = memory_entry_size self.memory = ReplayMemory(self.memory_entry_size) # ################## SETTINGS ###################### up_lanes = [ i / 2.0 for i in [
def set_tf_options(): # Default TF1.x uses reference variables that are not supported by SavedModel # v1 Importer. To use SavedModel V1 Importer, resource variables should be # enabled. tf.enable_resource_variables() tf.compat.v1.disable_eager_execution()
"Jonas Eschle <*****@*****.**>", "Albert Puig <*****@*****.**", "Rafael Silva Coutinho <*****@*****.**>", ] __all__ = [ "ztf", "z", "constraint", "pdf", "minimize", "loss", "core", "data", "func", "Parameter", "ComposedParameter", "ComplexParameter", "convert_to_parameter", "Space", "convert_to_space", "supports", "run", "settings" ] # Copyright (c) 2019 zfit import tensorflow.compat.v1 as tf tf.enable_resource_variables() # forward compat tf.enable_v2_tensorshape() # forward compat tf.disable_eager_execution() from . import ztf # legacy from . import ztf as z from .settings import ztypes # tf.get_variable_scope().set_use_resource(True) # tf.get_variable_scope().set_dtype(ztypes.float) from . import constraint, pdf, minimize, loss, core, data, func, param from .core.parameter import Parameter, ComposedParameter, ComplexParameter, convert_to_parameter from .core.limits import Space, convert_to_space, supports from .core.data import Data
def main(_): tf.disable_v2_behavior() tf.enable_resource_variables() if FLAGS.hparams is None: hparams = hparams_flags.hparams_from_flags() else: hparams = hparams_lib.HParams(FLAGS.hparams) cluster = None if FLAGS.use_tpu and FLAGS.master is None: if FLAGS.tpu_name: cluster = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) else: cluster = tf.distribute.cluster_resolver.TPUClusterResolver() tf.config.experimental_connect_to_cluster(cluster) tf.tpu.experimental.initialize_tpu_system(cluster) session_config = tf.ConfigProto() # Workaround for https://github.com/tensorflow/tensorflow/issues/26411 where # convolutions (used in blurring) get confused about data-format when used # inside a tf.data pipeline that is run on GPU. if (tf.test.is_built_with_cuda() and not hparams.input_data.preprocessing.defer_blurring): # RewriterConfig.OFF = 2 session_config.graph_options.rewrite_options.layout_optimizer = 2 run_config = tf.estimator.tpu.RunConfig( master=FLAGS.master, cluster=cluster, model_dir=FLAGS.model_dir, save_checkpoints_steps=FLAGS.save_interval_steps, keep_checkpoint_max=FLAGS.max_checkpoints_to_keep, keep_checkpoint_every_n_hours=(FLAGS.keep_checkpoint_interval_secs / (60.0 * 60.0)), log_step_count_steps=100, session_config=session_config, tpu_config=tf.estimator.tpu.TPUConfig( iterations_per_loop=FLAGS.steps_per_loop, per_host_input_for_training=True, experimental_host_call_every_n_steps=FLAGS.summary_interval_steps, tpu_job_name='train_tpu_worker' if FLAGS.mode == 'train' else None, eval_training_input_configuration=( tf.estimator.tpu.InputPipelineConfig.SLICED if FLAGS.use_tpu else tf.estimator.tpu.InputPipelineConfig.PER_HOST_V1))) params = { 'hparams': hparams, 'use_tpu': FLAGS.use_tpu, 'data_dir': FLAGS.data_dir, } estimator = tf.estimator.tpu.TPUEstimator( model_fn=model_fn, use_tpu=FLAGS.use_tpu, config=run_config, params=params, train_batch_size=hparams.bs, eval_batch_size=hparams.eval.batch_size) if hparams.input_data.input_fn not in dir(inputs): raise ValueError('Unknown input_fn: {hparams.input_data.input_fn}') input_fn = getattr(inputs, hparams.input_data.input_fn) training_set_size = inputs.get_num_train_images(hparams) steps_per_epoch = training_set_size / hparams.bs stage_1_epochs = hparams.stage_1.training.train_epochs stage_2_epochs = hparams.stage_2.training.train_epochs total_steps = int((stage_1_epochs + stage_2_epochs) * steps_per_epoch) num_eval_examples = inputs.get_num_eval_images(hparams) eval_steps = num_eval_examples // hparams.eval.batch_size if FLAGS.mode == 'eval': for ckpt_str in tf.train.checkpoints_iterator( FLAGS.model_dir, min_interval_secs=FLAGS.eval_interval_secs, timeout=60 * 60): result = estimator.evaluate(input_fn=input_fn, checkpoint_path=ckpt_str, steps=eval_steps) estimator.export_saved_model( os.path.join(FLAGS.model_dir, 'exports'), lambda: input_fn(tf.estimator.ModeKeys.PREDICT, params), checkpoint_path=ckpt_str) if result['global_step'] >= total_steps: return else: # 'train' or 'train_then_eval'. estimator.train(input_fn=input_fn, max_steps=total_steps) if FLAGS.mode == 'train_then_eval': result = estimator.evaluate(input_fn=input_fn, steps=eval_steps) estimator.export_saved_model( os.path.join(FLAGS.model_dir, 'exports'), lambda: input_fn(tf.estimator.ModeKeys.PREDICT, params))
def __init__(self, use_xla=False, optimizer=None, mixed_precision=False, single_device=False, optimizer_type='adamw', learning_rate=5e-5, num_train_epochs=1, train_steps=0, num_warmup_steps=0, warmup_proportion=0., gradient_accumulation_steps=1, max_checkpoints=1, max_grad=1.0, decay_method='poly', logging=True): """ trainer基类 :param use_xla: 是否使用xla优化 :param optimizer: 自定义优化器,若是不传入,需要定义下方的优化器参数 :param optimizer_type: 优化器类型,目前支持 tfbert.optimization.create_optimizer内部的优化器 :param learning_rate: 学习率 :param num_train_epochs: 训练轮次 :param train_steps: 每一轮训练步数 :param gradient_accumulation_steps: 梯度累积步数 :param max_checkpoints: 最大保持的ckpt数量 :param max_grad: 最大梯度,超过进行裁剪 :param warmup_proportion: warmup比例 :param num_warmup_steps: warmup步数,如果传入了warmup_proportion,就不需要传了 :param decay_method: 学习率衰减方法,见 tfbert.optimization.create_optimizer方法 :param mixed_precision: 是否使用混合精度 :param single_device: 是否只使用一个卡,否则使用全部卡 :param logging: 是否显示 tf logging日志 """ utils.setup_xla_flags() if logging: tf.logging.set_verbosity(tf.logging.INFO) # 获取环境变量的devices self.devices = utils.devices() if single_device: self.devices = [self.devices[0]] # 优化节点 self.train_op = None self.grads_and_vars = None self.train_outputs = {} self.eval_outputs = {} self.test_outputs = {} sess_conf = tf.ConfigProto() if use_xla: sess_conf.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 if mixed_precision: tf.enable_resource_variables() sess_conf.gpu_options.allow_growth = True sess_conf.allow_soft_placement = True self.session = tf.Session(config=sess_conf) self.saver = None self.inited = False self.compiled = False self.finished_build = False self.num_train_epochs = num_train_epochs self.max_checkpoints = max_checkpoints self.max_grad = max_grad self.num_train_steps = (train_steps * num_train_epochs // gradient_accumulation_steps) self.learning_rate = learning_rate self.num_warmup_steps = num_warmup_steps self.warmup_proportion = warmup_proportion if warmup_proportion > 0: self.num_warmup_steps = self.num_train_steps * warmup_proportion self.gradient_accumulation_steps = gradient_accumulation_steps self.decay_method = None if self.num_train_steps == 0 else decay_method self.optimizer_type = optimizer_type self.optimizer = optimizer self.mixed_precision = mixed_precision self.global_step = 0 # 全局步数 self.forward_steps = 0 # 前向步数 self.global_step_changed = False # 标识优化步数是否变换,避免梯度累积时重复验证的情况
def main(argv): tf.disable_v2_behavior() tf.enable_resource_variables() if FLAGS.use_hpu and FLAGS.recipe_cache: prepare_recipe_cache() if FLAGS.use_horovod: if FLAGS.use_hpu: from TensorFlow.common.horovod_helpers import hvd_init, horovod_enabled, hvd hvd_init() assert horovod_enabled() if FLAGS.recipe_cache: # Other ranks should wait for recipe cache to be removed. # This operation can't be done before hvd_init. from mpi4py import MPI MPI.COMM_WORLD.Barrier() else: import horovod.tensorflow as hvd hvd.init() assert hvd.size() > 1 os.environ['CUDA_VISIBLE_DEVICES'] = str(hvd.local_rank()) if FLAGS.use_hpu: if FLAGS.use_bf16: os.environ['TF_BF16_CONVERSION'] = FLAGS.bf16_config_path dyn_shapes_flag = 'TF_ENABLE_DYNAMIC_SHAPES' if dyn_shapes_flag not in os.environ: os.environ[dyn_shapes_flag] = 'false' from habana_frameworks.tensorflow import load_habana_module # noqa load_habana_module() usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # If we just have to print the registry, do that and exit early. maybe_log_registry_and_exit() # Create HParams. if argv: set_hparams_from_args(argv[1:]) if FLAGS.schedule != "run_std_server": hparams = create_hparams() if FLAGS.gpu_automatic_mixed_precision: setattr(hparams, "gpu_automatic_mixed_precision", True) if FLAGS.deterministic_dataset: hparams.add_hparam("deterministic_dataset", True) hparams.add_hparam("use_horovod", FLAGS.use_horovod) hparams.add_hparam("use_hpu", FLAGS.use_hpu) if FLAGS.use_horovod: hparams.add_hparam("hvd_worker_id", hvd.rank()) hparams.add_hparam("hvd_size", hvd.size()) if FLAGS.schedule == "run_std_server": run_std_server() trainer_lib.set_random_seed(FLAGS.random_seed) if FLAGS.generate_data: generate_data() exp_fn = create_experiment_fn() exp = exp_fn(create_run_config(hparams), hparams) if is_chief(): save_metadata(hparams) with dump_callback(): execute_schedule(exp)
def main(_): # If using update_damping_immediately resource variables must be enabled. if FLAGS.update_damping_immediately: tf.enable_resource_variables() if not FLAGS.use_sua_approx: if FLAGS.use_custom_patches_op: kfac.fisher_factors.set_global_constants( use_patches_second_moment_op=True) else: # Temporary measure to save memory with giant batches: kfac.fisher_factors.set_global_constants( sub_sample_inputs=True, inputs_to_extract_patches_factor=0.2) tf.set_random_seed(FLAGS.seed) (train_op, opt, batch_loss, batch_error, batch_size_schedule, batch_size, eval_loss, eval_error, eval_loss_avg, eval_error_avg) = construct_train_quants() global_step = tf.train.get_or_create_global_step() if FLAGS.optimizer == 'kfac': # We need to put the control depenency on train_op here so that we are # guaranteed to get the up-to-date values of these various quantities. # Otherwise there is a race condition and we might get the old values, # nondeterministically. Another solution would be to get these values in # a separate sess.run call, but this can sometimes cause problems with # training frameworks that use hooks (see the comments below). with tf.control_dependencies([train_op]): learning_rate = opt.learning_rate momentum = opt.momentum damping = opt.damping rho = opt.rho qmodel_change = opt.qmodel_change # Without setting allow_soft_placement=True there will be problems when # the optimizer tries to place certain ops like "mod" on the GPU (which isn't # supported). config = tf.ConfigProto(allow_soft_placement=True) # Train model. # It's good practice to put everything into a single sess.run call. The # reason is that certain "training frameworks" like to run hooks at each # sess.run call, and there is an implicit expectation there will only # be one sess.run call every "iteration" of the "optimizer". For example, # a framework might try to print the loss at each sess.run call, causing # the mini-batch to be advanced, thus completely breaking the "cached # batch" mechanism that the damping adaptation method may rely on. (Plus # there will also be the extra cost of having to reevaluate the loss # twice.) That being said we don't completely do that here because it's # inconvenient. with tf.train.MonitoredTrainingSession(save_checkpoint_secs=30, config=config) as sess: for _ in range(FLAGS.train_steps): i = sess.run(global_step) if FLAGS.use_batch_size_schedule: batch_size_ = batch_size_schedule[min( i, len(batch_size_schedule) - 1)] else: batch_size_ = FLAGS.batch_size if FLAGS.optimizer == 'kfac': (_, batch_loss_, batch_error_, learning_rate_, momentum_, damping_, rho_, qmodel_change_) = sess.run( [ train_op, batch_loss, batch_error, learning_rate, momentum, damping, rho, qmodel_change ], feed_dict={batch_size: batch_size_}) else: _, batch_loss_, batch_error_ = sess.run( [train_op, batch_loss, batch_error], feed_dict={batch_size: batch_size_}) # Print training stats. tf.logging.info('iteration: %d', i) tf.logging.info( 'mini-batch size: %d | mini-batch loss = %f | mini-batch error = %f ', batch_size_, batch_loss_, batch_error_) if FLAGS.optimizer == 'kfac': tf.logging.info('learning_rate = %f | momentum = %f', learning_rate_, momentum_) tf.logging.info('damping = %f | rho = %f | qmodel_change = %f', damping_, rho_, qmodel_change_) # "Eval" here means just compute stuff on the full training set. if (i + 1) % FLAGS.eval_every == 0: eval_loss_, eval_error_, eval_loss_avg_, eval_error_avg_ = sess.run( [eval_loss, eval_error, eval_loss_avg, eval_error_avg]) tf.logging.info( '-----------------------------------------------------') tf.logging.info('eval_loss = %f | eval_error = %f', eval_loss_, eval_error_) tf.logging.info('eval_loss_avg = %f | eval_error_avg = %f', eval_loss_avg_, eval_error_avg_) tf.logging.info( '-----------------------------------------------------') else: tf.logging.info('----')
def run_finetuning(train_tfrecord, dev_tfrecord, train_eval_fun=None, use_tpu=False, additional_train_params=None): """Main function to train and eval BLEURT.""" logging.info("Initializing BLEURT training pipeline.") bleurt_params = checkpoint_lib.get_bleurt_params_from_flags_or_ckpt() max_seq_length = bleurt_params["max_seq_length"] bert_config_file = bleurt_params["bert_config_file"] init_checkpoint = bleurt_params["init_checkpoint"] logging.info("Creating input data pipeline.") logging.info("Train/Eval batch size: {}".format(str(FLAGS.batch_size))) train_input_fn = input_fn_builder(train_tfrecord, seq_length=max_seq_length, is_training=True, batch_size=FLAGS.batch_size, drop_remainder=use_tpu) dev_input_fn = input_fn_builder(dev_tfrecord, seq_length=max_seq_length, is_training=False, batch_size=FLAGS.batch_size, drop_remainder=use_tpu) logging.info("Creating model.") bert_config = modeling.BertConfig.from_json_file(bert_config_file) num_train_steps = FLAGS.num_train_steps num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder(bert_config=bert_config, init_checkpoint=init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=use_tpu, use_one_hot_embeddings=use_tpu, n_hidden_layers=FLAGS.n_hidden_layers, hidden_layers_width=FLAGS.hidden_layers_width, dropout_rate=FLAGS.dropout_rate) logging.info("Creating TF Estimator.") exporters = [ tf.estimator.BestExporter( "bleurt_best", serving_input_receiver_fn=_serving_input_fn_builder( max_seq_length), event_file_pattern="eval_default/*.tfevents.*", compare_fn=_model_comparator, exports_to_keep=1) ] tf.enable_resource_variables() logging.info("*** Entering the Training / Eval phase ***") if not additional_train_params: additional_train_params = {} train_eval_fun(model_fn=model_fn, train_input_fn=train_input_fn, eval_input_fn=dev_input_fn, exporters=exporters, **additional_train_params)
def main(argv): del argv # Unused. tf.enable_resource_variables() tf.set_random_seed(FLAGS.seed) set_lr_schedule() set_custom_sparsity_map() folder_stub = os.path.join(FLAGS.training_method, str(FLAGS.end_sparsity), str(FLAGS.maskupdate_begin_step), str(FLAGS.maskupdate_end_step), str(FLAGS.maskupdate_frequency), str(FLAGS.drop_fraction), str(FLAGS.label_smoothing), str(FLAGS.weight_decay)) output_dir = FLAGS.output_dir if FLAGS.use_folder_stub: output_dir = os.path.join(output_dir, folder_stub) export_dir = os.path.join(output_dir, 'export_dir') # we pass the updated eval and train string to the params dictionary. params = {} params['output_dir'] = output_dir params['training_method'] = FLAGS.training_method params['use_tpu'] = FLAGS.use_tpu dataset_func = functools.partial( imagenet_input.ImageNetInput, data_dir=FLAGS.data_directory, transpose_input=False, num_parallel_calls=FLAGS.num_parallel_calls, use_bfloat16=False) imagenet_train, imagenet_eval = [ dataset_func(is_training=is_training) for is_training in [True, False] ] run_config = tpu_config.RunConfig( master=FLAGS.master, model_dir=output_dir, save_checkpoints_steps=FLAGS.steps_per_checkpoint, keep_checkpoint_max=FLAGS.keep_checkpoint_max, session_config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False), tpu_config=tpu_config.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_cores, tpu_job_name=FLAGS.tpu_job_name)) classifier = tpu_estimator.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=resnet_model_fn_w_pruning, params=params, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size) cpu_classifier = tpu_estimator.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=resnet_model_fn_w_pruning, params=params, config=run_config, train_batch_size=FLAGS.train_batch_size, export_to_tpu=False, eval_batch_size=FLAGS.eval_batch_size) if FLAGS.num_eval_images % FLAGS.eval_batch_size != 0: raise ValueError( 'eval_batch_size (%d) must evenly divide num_eval_images(%d)!' % (FLAGS.eval_batch_size, FLAGS.num_eval_images)) eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size if FLAGS.mode == 'eval_once': ckpt_path = os.path.join(output_dir, FLAGS.eval_once_ckpt_prefix) dataset = imagenet_train if FLAGS.eval_on_train else imagenet_eval classifier.evaluate(input_fn=dataset.input_fn, steps=eval_steps, checkpoint_path=ckpt_path, name='{0}'.format(FLAGS.eval_once_ckpt_prefix)) elif FLAGS.mode == 'eval': # Run evaluation when there's a new checkpoint for ckpt in evaluation.checkpoints_iterator(output_dir): tf.logging.info('Starting to evaluate.') try: dataset = imagenet_train if FLAGS.eval_on_train else imagenet_eval classifier.evaluate(input_fn=dataset.input_fn, steps=eval_steps, checkpoint_path=ckpt, name='eval') # Terminate eval job when final checkpoint is reached global_step = int(os.path.basename(ckpt).split('-')[1]) if global_step >= FLAGS.train_steps: tf.logging.info( 'Evaluation finished after training step %d' % global_step) break except tf.errors.NotFoundError: logging('Checkpoint no longer exists,skipping checkpoint.') else: global_step = estimator._load_global_step_from_checkpoint_dir( output_dir) # Session run hooks to export model for prediction export_hook = ExportModelHook(cpu_classifier, export_dir) hooks = [export_hook] if FLAGS.mode == 'train': tf.logging.info('start training...') classifier.train(input_fn=imagenet_train.input_fn, hooks=hooks, max_steps=FLAGS.train_steps) else: assert FLAGS.mode == 'train_and_eval' tf.logging.info('start training and eval...') while global_step < FLAGS.train_steps: next_checkpoint = min(global_step + FLAGS.steps_per_eval, FLAGS.train_steps) classifier.train(input_fn=imagenet_train.input_fn, max_steps=next_checkpoint) global_step = next_checkpoint logging('Completed training up to step :', global_step) classifier.evaluate(input_fn=imagenet_eval.input_fn, steps=eval_steps)
def a(demand_size): tf.enable_resource_variables() tf.disable_eager_execution() os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices' my_config = tf.ConfigProto() my_config.gpu_options.allow_growth=True class Agent(object): def __init__(self, memory_entry_size): self.discount = 1 self.double_q = True self.memory_entry_size = memory_entry_size self.memory = ReplayMemory(self.memory_entry_size) # ################## SETTINGS ###################### up_lanes = [i/2.0 for i in [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2]] down_lanes = [i/2.0 for i in [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2]] left_lanes = [i/2.0 for i in [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2]] right_lanes = [i/2.0 for i in [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2]] width = 750/2 height = 1298/2 # This main file is for testing only IS_TRAIN = 0 # hard-coded to 0 IS_TEST = 1-IS_TRAIN label = 'marl_model' label_sarl = 'sarl_model' n_veh = 4 n_neighbor = 1 n_RB = n_veh env = Environment_marl_test.Environ(down_lanes, up_lanes, left_lanes, right_lanes, width, height, n_veh, n_neighbor, demand_size) env.new_random_game() # initialize parameters in env n_episode = 3000 n_step_per_episode = int(env.time_slow/env.time_fast) epsi_final = 0.02 epsi_anneal_length = int(0.8*n_episode) mini_batch_step = n_step_per_episode target_update_step = n_step_per_episode*4 n_episode_test = 5 # test episodes ###################################################### def get_state(env, idx=(0,0), ind_episode=1., epsi=0.02): """ Get state from the environment """ # V2I_channel = (env.V2I_channels_with_fastfading[idx[0], :] - 80) / 60 V2I_fast = (env.V2I_channels_with_fastfading[idx[0], :] - env.V2I_channels_abs[idx[0]] + 10)/35 # V2V_channel = (env.V2V_channels_with_fastfading[:, env.vehicles[idx[0]].destinations[idx[1]], :] - 80) / 60 V2V_fast = (env.V2V_channels_with_fastfading[:, env.vehicles[idx[0]].destinations[idx[1]], :] - env.V2V_channels_abs[:, env.vehicles[idx[0]].destinations[idx[1]]] + 10)/35 V2V_interference = (-env.V2V_Interference_all[idx[0], idx[1], :] - 60) / 60 V2I_abs = (env.V2I_channels_abs[idx[0]] - 80) / 60.0 V2V_abs = (env.V2V_channels_abs[:, env.vehicles[idx[0]].destinations[idx[1]]] - 80)/60.0 load_remaining = np.asarray([env.demand[idx[0], idx[1]] / env.demand_size]) time_remaining = np.asarray([env.individual_time_limit[idx[0], idx[1]] / env.time_slow]) # return np.concatenate((np.reshape(V2V_channel, -1), V2V_interference, V2I_abs, V2V_abs, time_remaining, load_remaining, np.asarray([ind_episode, epsi]))) return np.concatenate((V2I_fast, np.reshape(V2V_fast, -1), V2V_interference, np.asarray([V2I_abs]), V2V_abs, time_remaining, load_remaining, np.asarray([ind_episode, epsi]))) def get_state_sarl(env, idx=(0,0), ind_episode=1., epsi=0.02): """ Get state from the environment """ # V2I_channel = (env.V2I_channels_with_fastfading[idx[0], :] - 80) / 60 V2I_fast = (env.V2I_channels_with_fastfading[idx[0], :] - env.V2I_channels_abs[idx[0]] + 10)/35 # V2V_channel = (env.V2V_channels_with_fastfading[:, env.vehicles[idx[0]].destinations[idx[1]], :] - 80) / 60 V2V_fast = (env.V2V_channels_with_fastfading[:, env.vehicles[idx[0]].destinations[idx[1]], :] - env.V2V_channels_abs[:, env.vehicles[idx[0]].destinations[idx[1]]] + 10)/35 V2V_interference = (-env.V2V_Interference_all_sarl[idx[0], idx[1], :] - 60) / 60 V2I_abs = (env.V2I_channels_abs[idx[0]] - 80) / 60.0 V2V_abs = (env.V2V_channels_abs[:, env.vehicles[idx[0]].destinations[idx[1]]] - 80)/60.0 load_remaining = np.asarray([env.demand_sarl[idx[0], idx[1]] / env.demand_size]) time_remaining = np.asarray([env.individual_time_limit_sarl[idx[0], idx[1]] / env.time_slow]) # return np.concatenate((np.reshape(V2V_channel, -1), V2V_interference, V2I_abs, V2V_abs, time_remaining, load_remaining, np.asarray([ind_episode, epsi]))) return np.concatenate((V2I_fast, np.reshape(V2V_fast, -1), V2V_interference, np.asarray([V2I_abs]), V2V_abs, time_remaining, load_remaining, np.asarray([ind_episode, epsi]))) # ----------------------------------------------------------- n_hidden_1 = 500 n_hidden_2 = 250 n_hidden_3 = 120 n_input = len(get_state(env=env)) n_output = n_RB * len(env.V2V_power_dB_List) g = tf.Graph() with g.as_default(): # ============== Training network ======================== x = tf.placeholder(tf.float32, [None, n_input]) w_1 = tf.Variable(tf.truncated_normal([n_input, n_hidden_1], stddev=0.1)) w_2 = tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2], stddev=0.1)) w_3 = tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3], stddev=0.1)) w_4 = tf.Variable(tf.truncated_normal([n_hidden_3, n_output], stddev=0.1)) b_1 = tf.Variable(tf.truncated_normal([n_hidden_1], stddev=0.1)) b_2 = tf.Variable(tf.truncated_normal([n_hidden_2], stddev=0.1)) b_3 = tf.Variable(tf.truncated_normal([n_hidden_3], stddev=0.1)) b_4 = tf.Variable(tf.truncated_normal([n_output], stddev=0.1)) layer_1 = tf.nn.relu(tf.add(tf.matmul(x, w_1), b_1)) layer_1_b = tf.layers.batch_normalization(layer_1) layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1_b, w_2), b_2)) layer_2_b = tf.layers.batch_normalization(layer_2) layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2_b, w_3), b_3)) layer_3_b = tf.layers.batch_normalization(layer_3) y = tf.nn.relu(tf.add(tf.matmul(layer_3, w_4), b_4)) g_q_action = tf.argmax(y, axis=1) # compute loss g_target_q_t = tf.placeholder(tf.float32, None, name="target_value") g_action = tf.placeholder(tf.int32, None, name='g_action') action_one_hot = tf.one_hot(g_action, n_output, 1.0, 0.0, name='action_one_hot') q_acted = tf.reduce_sum(y * action_one_hot, reduction_indices=1, name='q_acted') g_loss = tf.reduce_mean(tf.square(g_target_q_t - q_acted), name='g_loss') optim = tf.train.RMSPropOptimizer(learning_rate=0.001, momentum=0.95, epsilon=0.01).minimize(g_loss) # ==================== Prediction network ======================== x_p = tf.placeholder(tf.float32, [None, n_input]) w_1_p = tf.Variable(tf.truncated_normal([n_input, n_hidden_1], stddev=0.1)) w_2_p = tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2], stddev=0.1)) w_3_p = tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3], stddev=0.1)) w_4_p = tf.Variable(tf.truncated_normal([n_hidden_3, n_output], stddev=0.1)) b_1_p = tf.Variable(tf.truncated_normal([n_hidden_1], stddev=0.1)) b_2_p = tf.Variable(tf.truncated_normal([n_hidden_2], stddev=0.1)) b_3_p = tf.Variable(tf.truncated_normal([n_hidden_3], stddev=0.1)) b_4_p = tf.Variable(tf.truncated_normal([n_output], stddev=0.1)) layer_1_p = tf.nn.relu(tf.add(tf.matmul(x_p, w_1_p), b_1_p)) layer_1_p_b = tf.layers.batch_normalization(layer_1_p) layer_2_p = tf.nn.relu(tf.add(tf.matmul(layer_1_p_b, w_2_p), b_2_p)) layer_2_p_b = tf.layers.batch_normalization(layer_2_p) layer_3_p = tf.nn.relu(tf.add(tf.matmul(layer_2_p_b, w_3_p), b_3_p)) layer_3_p_b = tf.layers.batch_normalization(layer_3_p) y_p = tf.nn.relu(tf.add(tf.matmul(layer_3_p_b, w_4_p), b_4_p)) g_target_q_idx = tf.placeholder('int32', [None, None], 'output_idx') target_q_with_idx = tf.gather_nd(y_p, g_target_q_idx) init = tf.global_variables_initializer() saver = tf.train.Saver() def predict(sess, s_t, ep, test_ep = False): n_power_levels = len(env.V2V_power_dB_List) if np.random.rand() < ep and not test_ep: pred_action = np.random.randint(n_RB*n_power_levels) else: pred_action = sess.run(g_q_action, feed_dict={x: [s_t]})[0] return pred_action def predict_sarl(sess, s_t): pred_action = sess.run(g_q_action, feed_dict={x: [s_t]})[0] return pred_action def q_learning_mini_batch(current_agent, current_sess): """ Training a sampled mini-batch """ batch_s_t, batch_s_t_plus_1, batch_action, batch_reward = current_agent.memory.sample() if current_agent.double_q: # double q-learning pred_action = current_sess.run(g_q_action, feed_dict={x: batch_s_t_plus_1}) q_t_plus_1 = current_sess.run(target_q_with_idx, {x_p: batch_s_t_plus_1, g_target_q_idx: [[idx, pred_a] for idx, pred_a in enumerate(pred_action)]}) batch_target_q_t = current_agent.discount * q_t_plus_1 + batch_reward else: q_t_plus_1 = current_sess.run(y_p, {x_p: batch_s_t_plus_1}) max_q_t_plus_1 = np.max(q_t_plus_1, axis=1) batch_target_q_t = current_agent.discount * max_q_t_plus_1 + batch_reward _, loss_val = current_sess.run([optim, g_loss], {g_target_q_t: batch_target_q_t, g_action: batch_action, x: batch_s_t}) return loss_val def update_target_q_network(sess): """ Update target q network once in a while """ sess.run(w_1_p.assign(sess.run(w_1))) sess.run(w_2_p.assign(sess.run(w_2))) sess.run(w_3_p.assign(sess.run(w_3))) sess.run(w_4_p.assign(sess.run(w_4))) sess.run(b_1_p.assign(sess.run(b_1))) sess.run(b_2_p.assign(sess.run(b_2))) sess.run(b_3_p.assign(sess.run(b_3))) sess.run(b_4_p.assign(sess.run(b_4))) def save_models(sess, model_path): """ Save models to the current directory with the name filename """ current_dir = os.path.dirname(os.path.realpath(__file__)) model_path = os.path.join(current_dir, "model/" + model_path) if not os.path.exists(os.path.dirname(model_path)): os.makedirs(os.path.dirname(model_path)) saver.save(sess, model_path, write_meta_graph=False) def load_models(sess, model_path): """ Restore models from the current directory with the name filename """ dir_ = os.path.dirname(os.path.realpath(__file__)) model_path = os.path.join(dir_, "model/" + model_path) saver.restore(sess, model_path) def print_weight(sess, target=False): """ debug """ if not target: print(sess.run(w_1[0, 0:4])) else: print(sess.run(w_1_p[0, 0:4])) # -------------------------------------------------------------- agents = [] sesses = [] for ind_agent in range(n_veh * n_neighbor): # initialize agents # print("Initializing agent", ind_agent) agent = Agent(memory_entry_size=len(get_state(env))) agents.append(agent) sess = tf.Session(graph=g,config=my_config) sess.run(init) sesses.append(sess) agent_sarl = Agent(memory_entry_size=len(get_state(env))) sess_sarl = tf.Session(graph=g,config=my_config) sess_sarl.run(init) # -------------- Testing -------------- if IS_TEST: print("\nRestoring the model...") for i in range(n_veh): for j in range(n_neighbor): model_path = label + '/agent_' + str(i * n_neighbor + j) load_models(sesses[i * n_neighbor + j], model_path) # restore the single-agent model model_path_single = label_sarl + '/agent' load_models(sess_sarl, model_path_single) V2I_rate_list = [] V2V_success_list = [] V2I_rate_list_rand = [] V2V_success_list_rand = [] V2I_rate_list_sarl = [] V2V_success_list_sarl = [] V2I_rate_list_dpra = [] V2V_success_list_dpra = [] rate_marl = np.zeros([n_episode_test, n_step_per_episode, n_veh, n_neighbor]) rate_rand = np.zeros([n_episode_test, n_step_per_episode, n_veh, n_neighbor]) demand_marl = env.demand_size * np.ones([n_episode_test, n_step_per_episode+1, n_veh, n_neighbor]) demand_rand = env.demand_size * np.ones([n_episode_test, n_step_per_episode+1, n_veh, n_neighbor]) action_all_testing_sarl = np.zeros([n_veh, n_neighbor, 2], dtype='int32') action_all_testing_dpra = np.zeros([n_veh, n_neighbor, 2], dtype='int32') for idx_episode in range(n_episode_test): if idx_episode%100 == 0: print(demand_size, '----- Episode', idx_episode, '-----') env.renew_positions() env.renew_neighbor() env.renew_channel() env.renew_channels_fastfading() env.demand = env.demand_size * np.ones((env.n_Veh, env.n_neighbor)) env.individual_time_limit = env.time_slow * np.ones((env.n_Veh, env.n_neighbor)) env.active_links = np.ones((env.n_Veh, env.n_neighbor), dtype='bool') env.demand_rand = env.demand_size * np.ones((env.n_Veh, env.n_neighbor)) env.individual_time_limit_rand = env.time_slow * np.ones((env.n_Veh, env.n_neighbor)) env.active_links_rand = np.ones((env.n_Veh, env.n_neighbor), dtype='bool') env.demand_sarl = env.demand_size * np.ones((env.n_Veh, env.n_neighbor)) env.individual_time_limit_sarl = env.time_slow * np.ones((env.n_Veh, env.n_neighbor)) env.active_links_sarl = np.ones((env.n_Veh, env.n_neighbor), dtype='bool') env.demand_dpra = env.demand_size * np.ones((env.n_Veh, env.n_neighbor)) env.individual_time_limit_dpra = env.time_slow * np.ones((env.n_Veh, env.n_neighbor)) env.active_links_dpra = np.ones((env.n_Veh, env.n_neighbor), dtype='bool') V2I_rate_per_episode = [] V2I_rate_per_episode_rand = [] V2I_rate_per_episode_sarl = [] V2I_rate_per_episode_dpra = [] for test_step in range(n_step_per_episode): # trained models action_all_testing = np.zeros([n_veh, n_neighbor, 2], dtype='int32') for i in range(n_veh): for j in range(n_neighbor): state_old = get_state(env, [i, j], 1, epsi_final) action = predict(sesses[i*n_neighbor+j], state_old, epsi_final, True) action_all_testing[i, j, 0] = action % n_RB # chosen RB action_all_testing[i, j, 1] = int(np.floor(action / n_RB)) # power level action_temp = action_all_testing.copy() V2I_rate, V2V_success, V2V_rate = env.act_for_testing(action_temp) V2I_rate_per_episode.append(np.sum(V2I_rate)) # sum V2I rate in bps rate_marl[idx_episode, test_step,:,:] = V2V_rate demand_marl[idx_episode, test_step+1,:,:] = env.demand # random baseline action_rand = np.zeros([n_veh, n_neighbor, 2], dtype='int32') action_rand[:, :, 0] = np.random.randint(0, n_RB, [n_veh, n_neighbor]) # band action_rand[:, :, 1] = np.random.randint(0, len(env.V2V_power_dB_List), [n_veh, n_neighbor]) # power V2I_rate_rand, V2V_success_rand, V2V_rate_rand = env.act_for_testing_rand(action_rand) V2I_rate_per_episode_rand.append(np.sum(V2I_rate_rand)) # sum V2I rate in bps rate_rand[idx_episode, test_step, :, :] = V2V_rate_rand demand_rand[idx_episode, test_step+1,:,:] = env.demand_rand # SARL remainder = test_step % (n_veh * n_neighbor) i = int(np.floor(remainder/n_neighbor)) j = remainder % n_neighbor state_sarl = get_state_sarl(env, [i, j], 1, epsi_final) action = predict_sarl(sess_sarl, state_sarl) action_all_testing_sarl[i, j, 0] = action % n_RB # chosen RB action_all_testing_sarl[i, j, 1] = int(np.floor(action / n_RB)) # power level action_temp_sarl = action_all_testing_sarl.copy() V2I_rate_sarl, V2V_success_sarl, V2V_rate_sarl = env.act_for_testing_sarl(action_temp_sarl) V2I_rate_per_episode_sarl.append(np.sum(V2I_rate_sarl)) # sum V2I rate in bps # # Used as V2I upper bound only, no V2V transmission # action_all_testing_dpra[i, j, 0] = 0 # chosen RB # action_all_testing_dpra[i, j, 1] = 3 # power level, fixed to -100 dBm, no V2V transmission # # action_temp_dpra = action_all_testing_dpra.copy() # V2I_rate_dpra, V2V_success_dpra, V2V_rate_dpra = env.act_for_testing_dpra(action_temp_dpra) # V2I_rate_per_episode_dpra.append(np.sum(V2I_rate_dpra)) # sum V2I rate in bps # # V2V Upper bound only, centralized maxV2V # The following applies to n_veh = 4 and n_neighbor = 1 only action_dpra = np.zeros([n_veh, n_neighbor, 2], dtype='int32') # n_power_level = len(env.V2V_power_dB_List) n_power_level = 1 store_action = np.zeros([(n_RB*n_power_level)**4, 4]) rate_all_dpra = [] t = 0 # for i in range(n_RB*len(env.V2V_power_dB_List)):\ for i in range(n_RB): for j in range(n_RB): for m in range(n_RB): for n in range(n_RB): action_dpra[0, 0, 0] = i % n_RB action_dpra[0, 0, 1] = int(np.floor(i / n_RB)) # power level action_dpra[1, 0, 0] = j % n_RB action_dpra[1, 0, 1] = int(np.floor(j / n_RB)) # power level action_dpra[2, 0, 0] = m % n_RB action_dpra[2, 0, 1] = int(np.floor(m / n_RB)) # power level action_dpra[3, 0, 0] = n % n_RB action_dpra[3, 0, 1] = int(np.floor(n / n_RB)) # power level action_temp_findMax = action_dpra.copy() V2I_rate_findMax, V2V_rate_findMax = env.Compute_Rate(action_temp_findMax) rate_all_dpra.append(np.sum(V2V_rate_findMax)) store_action[t, :] = [i,j,m,n] t += 1 i = store_action[np.argmax(rate_all_dpra), 0] j = store_action[np.argmax(rate_all_dpra), 1] m = store_action[np.argmax(rate_all_dpra), 2] n = store_action[np.argmax(rate_all_dpra), 3] action_testing_dpra = np.zeros([n_veh, n_neighbor, 2], dtype='int32') action_testing_dpra[0, 0, 0] = i % n_RB action_testing_dpra[0, 0, 1] = int(np.floor(i / n_RB)) # power level action_testing_dpra[1, 0, 0] = j % n_RB action_testing_dpra[1, 0, 1] = int(np.floor(j / n_RB)) # power level action_testing_dpra[2, 0, 0] = m % n_RB action_testing_dpra[2, 0, 1] = int(np.floor(m / n_RB)) # power level action_testing_dpra[3, 0, 0] = n % n_RB action_testing_dpra[3, 0, 1] = int(np.floor(n / n_RB)) # power level V2I_rate_findMax, V2V_rate_findMax = env.Compute_Rate(action_testing_dpra) check_sum = np.sum(V2V_rate_findMax) action_temp_dpra = action_testing_dpra.copy() V2I_rate_dpra, V2V_success_dpra, V2V_rate_dpra = env.act_for_testing_dpra(action_temp_dpra) V2I_rate_per_episode_dpra.append(np.sum(V2I_rate_dpra)) # sum V2I rate in bps # update the environment and compute interference env.renew_channels_fastfading() env.Compute_Interference(action_temp) env.Compute_Interference_sarl(action_temp_sarl) env.Compute_Interference_dpra(action_temp_dpra) if test_step == n_step_per_episode - 1: V2V_success_list.append(V2V_success) V2V_success_list_rand.append(V2V_success_rand) V2V_success_list_sarl.append(V2V_success_sarl) V2V_success_list_dpra.append(V2V_success_dpra) V2I_rate_list.append(np.mean(V2I_rate_per_episode)) V2I_rate_list_rand.append(np.mean(V2I_rate_per_episode_rand)) V2I_rate_list_sarl.append(np.mean(V2I_rate_per_episode_sarl)) V2I_rate_list_dpra.append(np.mean(V2I_rate_per_episode_dpra)) # print('marl', round(np.average(V2I_rate_per_episode), 2), 'sarl', round(np.average(V2I_rate_per_episode_sarl), 2), 'rand', round(np.average(V2I_rate_per_episode_rand), 2), 'dpra', round(np.average(V2I_rate_per_episode_dpra), 2)) # print('marl', V2V_success_list[idx_episode], 'sarl', V2V_success_list_sarl[idx_episode], 'rand', V2V_success_list_rand[idx_episode], 'dpra', V2V_success_list_dpra[idx_episode]) return [ demand_size, round(np.average(V2I_rate_list), 2), round(np.average(V2V_success_list), 4), round(np.average(V2I_rate_list_sarl), 2), round(np.average(V2V_success_list_sarl), 4), round(np.average(V2I_rate_list_rand), 2), round(np.average(V2V_success_list_rand), 4), round(np.average(V2I_rate_list_dpra), 2), round(np.average(V2V_success_list_dpra), 4) ] print('-------- marl -------------') print('n_veh:', n_veh, ', n_neighbor:', n_neighbor) print('Sum V2I rate:', round(np.average(V2I_rate_list), 2), 'Mbps') print('Pr(V2V success):', round(np.average(V2V_success_list), 4)) # print('-------- sarl -------------') print('n_veh:', n_veh, ', n_neighbor:', n_neighbor) print('Sum V2I rate:', round(np.average(V2I_rate_list_sarl), 2), 'Mbps') print('Pr(V2V success):', round(np.average(V2V_success_list_sarl), 4)) print('-------- random -------------') print('n_veh:', n_veh, ', n_neighbor:', n_neighbor) print('Sum V2I rate:', round(np.average(V2I_rate_list_rand), 2), 'Mbps') print('Pr(V2V success):', round(np.average(V2V_success_list_rand), 4)) print('-------- DPRA -------------') print('n_veh:', n_veh, ', n_neighbor:', n_neighbor) print('Sum V2I rate:', round(np.average(V2I_rate_list_dpra), 2), 'Mbps') print('Pr(V2V success):', round(np.average(V2V_success_list_dpra), 4)) # The name "DPRA" is used for historical reasons. Not really the case... with open("Data.txt", "a") as f: f.write('-------- marl, ' + label + '------\n') f.write('n_veh: ' + str(n_veh) + ', n_neighbor: ' + str(n_neighbor) + '\n') f.write('Sum V2I rate: ' + str(round(np.average(V2I_rate_list), 5)) + ' Mbps\n') f.write('Pr(V2V): ' + str(round(np.average(V2V_success_list), 5)) + '\n') f.write('-------- sarl, ' + label_sarl + '------\n') f.write('n_veh: ' + str(n_veh) + ', n_neighbor: ' + str(n_neighbor) + '\n') f.write('Sum V2I rate: ' + str(round(np.average(V2I_rate_list_sarl), 5)) + ' Mbps\n') f.write('Pr(V2V): ' + str(round(np.average(V2V_success_list_sarl), 5)) + '\n') f.write('--------random ------------\n') f.write('Rand Sum V2I rate: ' + str(round(np.average(V2I_rate_list_rand), 5)) + ' Mbps\n') f.write('Rand Pr(V2V): ' + str(round(np.average(V2V_success_list_rand), 5)) + '\n') f.write('--------DPRA ------------\n') f.write('Dpra Sum V2I rate: ' + str(round(np.average(V2I_rate_list_dpra), 5)) + ' Mbps\n') f.write('Dpra Pr(V2V): ' + str(round(np.average(V2V_success_list_dpra), 5)) + '\n') f.write('----Payload----\n') f.write(str(env.demand_size) + '\n') current_dir = os.path.dirname(os.path.realpath(__file__)) marl_path = os.path.join(current_dir, "model/" + label + '/rate_marl.mat') scipy.io.savemat(marl_path, {'rate_marl': rate_marl}) rand_path = os.path.join(current_dir, "model/" + label + '/rate_rand.mat') scipy.io.savemat(rand_path, {'rate_rand': rate_rand}) demand_marl_path = os.path.join(current_dir, "model/" + label + '/demand_marl.mat') scipy.io.savemat(demand_marl_path, {'demand_marl': demand_marl}) demand_rand_path = os.path.join(current_dir, "model/" + label + '/demand_rand.mat') scipy.io.savemat(demand_rand_path, {'demand_rand': demand_rand}) # close sessions for sess in sesses: sess.close()
def run_finetuning(train_set, dev_set, scratch_dir, train_tfrecord, dev_tfrecord, train_eval_fun=None, use_tpu=False, additional_train_params=None): """Main function to train and eval BLEURT.""" logging.info("Initializing BLEURT training pipeline.") bleurt_params = checkpoint_lib.get_bleurt_params_from_flags_or_ckpt() max_seq_length = bleurt_params["max_seq_length"] bert_config_file = bleurt_params["bert_config_file"] init_checkpoint = bleurt_params["init_checkpoint"] logging.info("Creating input data pipeline.") logging.info("Train/Eval batch size: {}".format(str(FLAGS.batch_size))) # set up the training "reverse-dictionary" to capture year-lp logging.info("Starting to populate reverse group dictionary.") train_df = pd.read_json(train_set, lines=True) dev_df = pd.read_json(dev_set, lines=True) examples_df = pd.concat([train_df, dev_df]) #group_hash_dict = {} for g in examples_df['group'].unique(): h = hash_md5_16(g) year_lp = '|'.join(g.split('|')[1:]) group_hash_dict[h] = year_lp # debugging logging.info(f"Example - {g}:{h}:{group_hash_dict[h]}\n") logging.info("Group hash dict populated!") # == also, save the dictionary to a file for debugging purposes # with open(os.path.join(scratch_dir, 'group_hash_dict'), 'w') as f: # f.write(str(group_hash_dict)+'\n') train_input_fn = input_fn_builder(train_tfrecord, seq_length=max_seq_length, is_training=True, batch_size=FLAGS.batch_size, drop_remainder=use_tpu) dev_input_fn = input_fn_builder(dev_tfrecord, seq_length=max_seq_length, is_training=False, batch_size=FLAGS.batch_size, drop_remainder=use_tpu) logging.info("Creating model.") bert_config = modeling.BertConfig.from_json_file(bert_config_file) num_train_steps = FLAGS.num_train_steps num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder(bert_config=bert_config, init_checkpoint=init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=use_tpu, use_one_hot_embeddings=use_tpu, n_hidden_layers=FLAGS.n_hidden_layers, hidden_layers_width=FLAGS.hidden_layers_width, dropout_rate=FLAGS.dropout_rate) logging.info("Creating TF Estimator.") exporters = [ tf.estimator.BestExporter( "bleurt_best", serving_input_receiver_fn=_serving_input_fn_builder( max_seq_length), event_file_pattern="eval_default/*.tfevents.*", compare_fn=_model_comparator, exports_to_keep=1) ] tf.enable_resource_variables() logging.info("*** Entering the Training / Eval phase ***") if not additional_train_params: additional_train_params = {} train_eval_fun(model_fn=model_fn, train_input_fn=train_input_fn, eval_input_fn=dev_input_fn, exporters=exporters, **additional_train_params)