def _set_keras_mixed_precision(cls, use_mixed_precision: bool) -> bool: """ Enable the Keras experimental Mixed Precision API. Enables the Keras experimental Mixed Precision API if requested in the user configuration file. Parameters ---------- use_mixed_precision: bool ``True`` if experimental mixed precision support should be enabled for Nvidia GPUs otherwise ``False``. Returns ------- bool ``True`` if mixed precision has been enabled otherwise ``False`` """ logger.debug("use_mixed_precision: %s", use_mixed_precision) if get_backend() == "amd": logger.debug("No action to perform for 'mixed_precision' on backend '%s': " "use_mixed_precision: %s)", get_backend(), use_mixed_precision) return False if not use_mixed_precision: policy = mixedprecision.Policy('float32') mixedprecision.set_global_policy(policy) logger.debug("Disabling mixed precision. (Compute dtype: %s, variable_dtype: %s)", policy.compute_dtype, policy.variable_dtype) return False policy = mixedprecision.Policy('mixed_float16') mixedprecision.set_global_policy(policy) logger.debug("Enabled mixed precision. (Compute dtype: %s, variable_dtype: %s)", policy.compute_dtype, policy.variable_dtype) return True
def main(use_mixed_precision=False, training_batch_size=TRAINING_BATCH_SIZE, generation_batch_size=generation_batch_size, generator_optimizer=generator_optimizer, discriminator_optimizer=discriminator_optimizer): if use_mixed_precision: print('Using Mixed Precision') policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy) else: policy = mixed_precision.Policy('float32') mixed_precision.set_policy(policy) epsilon = 1e-7 dtype = 'float32' K.set_epsilon(epsilon) K.set_floatx(dtype) print(K.floatx(), K.epsilon(), training_batch_size) print('Compute dtype: %s' % policy.compute_dtype) print('Variable dtype: %s' % policy.variable_dtype) tf.autograph.set_verbosity(0, False) test_datagen = ImageDataGenerator(rescale=1. / 255) # if start_from_scratch: # initializeSnakeIdentifier( # train_datagen, # test_datagen # ) snek_generator = createSnekMaker() snek_discriminator = initializeSnakeIdentifier() # snek_discriminator.predict([baby_noise, tf.constant([0]*32)]) gan = make_gan(snek_discriminator, snek_generator) snek_discriminator.compile(optimizer=discriminator_optimizer, loss='binary_crossentropy') train_df = pd.read_csv('./classes_train.csv') checkpoint_dir = './snek_checkpoints' checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") print(checkpoint_prefix) checkpoint = tf.train.Checkpoint( generator_optimizer=generator_optimizer, discriminator_optimizer=discriminator_optimizer, snek_checker=snek_discriminator, snek_generator=snek_generator, gan=gan) checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) # checkpoint.save(file_prefix=checkpoint_prefix) # trainSnekMaker( # train_datagen, # check_model=snek_checker, # gen_model=snek_generator, # train_model=train_model # ) train(train_df, EPOCHS, snek_generator, snek_discriminator, gan, checkpoint, checkpoint_prefix)
def __init__(self, flags_obj): """Init function of TransformerMain. Args: flags_obj: Object containing parsed flag values, i.e., FLAGS. Raises: ValueError: if not using static batch for input data on TPU. """ self.flags_obj = flags_obj self.predict_model = None # Add flag-defined parameters to params object num_gpus = flags_core.get_num_gpus(flags_obj) self.params = params = misc.get_model_params(flags_obj.param_set, num_gpus) params["num_gpus"] = num_gpus params["use_ctl"] = flags_obj.use_ctl params["data_dir"] = flags_obj.data_dir params["model_dir"] = flags_obj.model_dir params["static_batch"] = flags_obj.static_batch params["max_length"] = flags_obj.max_length params["decode_batch_size"] = flags_obj.decode_batch_size params["decode_max_length"] = flags_obj.decode_max_length params["padded_decode"] = flags_obj.padded_decode params["num_parallel_calls"] = (flags_obj.num_parallel_calls or tf.data.experimental.AUTOTUNE) params["use_synthetic_data"] = flags_obj.use_synthetic_data params["batch_size"] = flags_obj.batch_size or params[ "default_batch_size"] params["repeat_dataset"] = None params["dtype"] = flags_core.get_tf_dtype(flags_obj) params["enable_tensorboard"] = flags_obj.enable_tensorboard params[ "enable_metrics_in_training"] = flags_obj.enable_metrics_in_training params["steps_between_evals"] = flags_obj.steps_between_evals logging.info("Running transformer with num_gpus = %d", num_gpus) if params["dtype"] == tf.float16: # TODO(reedwm): It's pretty ugly to set the global policy in a constructor # like this. What if multiple instances of TransformerTask are created? # We should have a better way in the tf.keras.mixed_precision API of doing # this. loss_scale = flags_core.get_loss_scale(flags_obj, default_for_fp16="dynamic") policy = mixed_precision.Policy("mixed_float16", loss_scale=loss_scale) mixed_precision.set_policy(policy) elif params["dtype"] == tf.bfloat16: policy = mixed_precision.Policy("mixed_bfloat16") mixed_precision.set_policy(policy)
def setup(datasets, fp16=True, device='auto', cross_device_ops=None): warnings.warn( "setup will be deprecated in hanser 1.0, " "use setup_runtime and distribute_datasets from hanser.distribute instead.", DeprecationWarning, ) if device == 'auto': strategy = get_colab_tpu() if strategy: device = 'TPU' else: gpus = tf.config.list_physical_devices('GPU') if len(gpus) == 0: device = 'CPU' elif len(gpus) == 1: device = 'GPU' else: device = 'GPUs' strategy = tf.distribute.MirroredStrategy( cross_device_ops=cross_device_ops) set_gpu_thread_mode_and_count(len(gpus)) elif device == 'TPU': strategy = get_colab_tpu() elif isinstance(device, list): strategy = tf.distribute.MirroredStrategy( devices=device, cross_device_ops=cross_device_ops) set_gpu_thread_mode_and_count(len(device)) else: strategy = None if device == 'TPU': if fp16: policy = mixed_precision.Policy('mixed_bfloat16') mixed_precision.set_policy(policy) tf.distribute.experimental_set_strategy(strategy) return [(strategy.experimental_distribute_dataset(ds) if not isinstance(ds, tf.distribute.DistributedDataset) else ds) for ds in datasets] elif device == 'GPU': if fp16: policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy) return datasets elif isinstance(device, list) or device == 'GPUs': tf.distribute.experimental_set_strategy(strategy) if fp16: policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy) return [(strategy.experimental_distribute_dataset(ds) if not isinstance(ds, tf.distribute.DistributedDataset) else ds) for ds in datasets] else: return datasets
def set_precision(precision): import tensorflow.keras.mixed_precision.experimental as mixed_precision print(f"SETTING PRECISION TO {precision}") if precision == 16: policy = mixed_precision.Policy("mixed_float16") elif precision == 32: policy = mixed_precision.Policy('float32') elif precision == 64: policy = mixed_precision.Policy('float64') else: raise NameError(f"Available precision: 16, 32, 64. Not {precision}!") mixed_precision.set_policy(policy)
def wrapped(*args, **kwargs): # Run in mixed precision mode, then return to float32 mode policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy) # call method in mixed precision mode try: out = func(*args, **kwargs) finally: # Return to float32 precision mode policy = mixed_precision.Policy('float32') mixed_precision.set_policy(policy) return out
def main(config): if config.gpu_growth: for gpu in tf.config.experimental.list_physical_devices("GPU"): tf.config.experimental.set_memory_growth(gpu, True) assert config.precision in (16, 32), config.precision if config.precision == 16: prec.set_policy(prec.Policy("mixed_float16")) config.steps = int(config.steps) config.logdir.mkdir(parents=True, exist_ok=True) print("Logdir", config.logdir) # Create environments. datadir = config.logdir / "episodes" writer = tf.summary.create_file_writer( str(config.logdir), max_queue=1000, flush_millis=20000 ) writer.set_as_default() train_envs = [ wrappers.Async( lambda: make_env(config, writer, "train", datadir, store=True), config.parallel, ) for _ in range(config.envs) ] test_envs = [ wrappers.Async( lambda: make_env(config, writer, "test", datadir, store=False), config.parallel, ) for _ in range(config.envs) ] actspace = train_envs[0].action_space # Prefill dataset with random episodes. step = count_steps(datadir, config) prefill = max(0, config.prefill - step) print(f"Prefill dataset with {prefill} steps.") random_agent = lambda o, d, _: ([actspace.sample() for _ in d], None) tools.simulate(random_agent, train_envs, prefill / config.action_repeat) writer.flush() # Train and regularly evaluate the agent. step = count_steps(datadir, config) print(f"Simulating agent for {config.steps-step} steps.") agent = Dreamer(config, datadir, actspace, writer) if (config.logdir / "variables.pkl").exists(): print("Load checkpoint.") agent.load(config.logdir / "variables.pkl") state = None while step < config.steps: print("Start evaluation.") tools.simulate(functools.partial(agent, training=False), test_envs, episodes=1) writer.flush() print("Start collection.") steps = config.eval_every // config.action_repeat state = tools.simulate(agent, train_envs, steps, state=state) step = count_steps(datadir, config) agent.save(config.logdir / "variables.pkl") for env in train_envs + test_envs: env.close()
def get_deepasrnetwork1(input_dim=None, output_dim=29, is_mixed_precision=True, random_state=1) -> keras.Model: """ input_dim: int i wielokrotność 4 output_dim: licba liter w słowniku """ if is_mixed_precision: policy = mixed_precision.Policy('float32') mixed_precision.set_policy(policy) np.random.seed(random_state) tf.random.set_seed(random_state) # the input input_data = Input(name='the_input', shape=(None, input_dim), dtype='float32') # Batch normalize bn1 = BatchNormalization(axis=-1, name='BN_1')(input_data) # 1D Convs conv = Conv1D(filters=220, kernel_size=5, strides=1, padding='valid', activation='relu', name='Conv1D_1')(bn1) conv = BatchNormalization(name="CNBN_1")(conv) conv1 = Conv1D(filters=220, kernel_size=5, strides=1, padding='valid', activation='relu', name='Conv1D_2')(conv) conv1 = BatchNormalization(name="CNBN_2")(conv1) # RNN gru_1 = GRU(512, return_sequences=True, name='gru_1')(conv1) gru_2 = GRU(512, return_sequences=True, go_backwards=True, name='gru_2')(conv1) # merge tow gpu ouputs merged = concatenate([gru_1, gru_2]) # Batch normalize bn2 = BatchNormalization(axis=-1, name="BN_2")(merged) dense = TimeDistributed(Dense(30))(bn2) y_pred = TimeDistributed(Dense(output_dim, activation='softmax', name='y_pred'), name='the_output')(dense) model = Model(inputs=input_data, outputs=y_pred) return model
def _set_keras_mixed_precision(cls, use_mixed_precision: bool, exclude_gpus: bool) -> bool: """ Enable the Keras experimental Mixed Precision API. Enables the Keras experimental Mixed Precision API if requested in the user configuration file. Parameters ---------- use_mixed_precision: bool ``True`` if experimental mixed precision support should be enabled for Nvidia GPUs otherwise ``False``. exclude_gpus: bool ``True`` If connected GPUs are being excluded otherwise ``False``. Returns ------- bool ``True`` if mixed precision has been enabled otherwise ``False`` """ logger.debug("use_mixed_precision: %s, exclude_gpus: %s", use_mixed_precision, exclude_gpus) if not use_mixed_precision: logger.debug( "Not enabling 'mixed_precision' (backend: %s, use_mixed_precision: %s)", get_backend(), use_mixed_precision) return False logger.info("Enabling Mixed Precision Training.") policy = mixedprecision.Policy('mixed_float16') mixedprecision.set_global_policy(policy) logger.debug( "Enabled mixed precision. (Compute dtype: %s, variable_dtype: %s)", policy.compute_dtype, policy.variable_dtype) return True
def startup_env(dtype="float16", enable_check_numerics=False, enable_xla=False): """ Startup program environments. """ if dtype not in ["float32", "float16"]: raise ValueError( f"Not supported dtype={dtype} (now only accept float32 and float16)" ) if dtype == "float16": logging.info("Using float16 as computation dtype.") if compat.IS_PREV_TF_2_4_0: from tensorflow.keras.mixed_precision import experimental as mixed_precision policy = mixed_precision.Policy("mixed_float16") mixed_precision.set_policy(policy) else: tf.keras.mixed_precision.set_global_policy("mixed_float16") compat.register_computation_dtype("float16", -6.e4) if enable_check_numerics: logging.info("Enable checking numerics.") tf.debugging.enable_check_numerics() if enable_xla: tf.config.optimizer.set_jit(True) # it causes OOM and performance reression tf.config.optimizer.set_experimental_options( {"pin_to_host_optimization": False})
def test_build_and_train_model(image_size, mixed_precision): if mixed_precision: mp.set_policy(mp.Policy("mixed_float16")) model = biggan.build_model( image_size=image_size, channels=4, num_classes=4, latent_dim=4, ) assert model.G.built and model.D.built def dummy_dataset(): return tf.data.Dataset.from_tensor_slices(( tf.random.normal((2, image_size, image_size, 3)), tf.random.uniform((2, 4)))).batch(1, drop_remainder=True) with tempfile.TemporaryDirectory() as model_path: biggan.train_model( model=model, dataset=dummy_dataset(), model_path=model_path, num_epochs=1, log_every=1, ) assert len(glob.glob(os.path.join(model_path, "ckpt_*"))) > 0 assert len(glob.glob(os.path.join(model_path, "events.out.tfevents.*"))) > 0
def make_fc_model(x_train, y_train, x_test, y_test): x_train = x_train.reshape(2115, -1) x_test = x_test.reshape(443, -1) y_train -= 769 y_test -= 769 policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy) num_units = 4096 inputs = keras.Input(shape=(22000, ), name='eeg_data') dense1 = layers.Dense(num_units, activation='relu', name='dense_1') x = dense1(inputs) dense2 = layers.Dense(num_units, activation='relu', name='dense_2') x = dense2(x) dense3 = layers.Dense(num_units, activation='relu', name='dense_3') x = dense3(x) # 'kernel' is dense1's variable x = layers.Dense(4, name='dense_logits')(x) outputs = layers.Activation('softmax', dtype='float32', name='predictions')(x) model = keras.Model(inputs=inputs, outputs=outputs) model.compile(loss='sparse_categorical_crossentropy', optimizer=keras.optimizers.Adam(), metrics=['accuracy']) history = model.fit(x_train, y_train, batch_size=20, epochs=10, validation_split=0.1) test_scores = model.evaluate(x_test, y_test, verbose=2) print('Test loss:', test_scores[0]) print('Test accuracy:', test_scores[1]) return model
def model_creator(config): wd = config["wd"] use_bf16 = config["bf16"] import tensorflow as tf import tensorflow.keras as keras if use_bf16: from tensorflow.keras.mixed_precision import experimental as mixed_precision policy = mixed_precision.Policy('mixed_bfloat16') # policy = mixed_precision.Policy('float32') mixed_precision.set_policy(policy) model = tf.keras.applications.resnet50.ResNet50(weights=None, classes=1001) model_config = model.get_config() for layer, layer_config in zip(model.layers, model_config['layers']): if hasattr(layer, 'kernel_regularizer'): regularizer = keras.regularizers.l2(wd) rg_config = { 'class_name': regularizer.__class__.__name__, 'config': regularizer.get_config() } layer_config['config']['kernel_regularizer'] = rg_config if type(layer) == keras.layers.BatchNormalization: layer_config['config']['momentum'] = 0.9 layer_config['config']['epsilon'] = 1e-5 model = tf.keras.models.Model.from_config(model_config) if use_bf16: model = tf.keras.models.Sequential([ model, tf.keras.layers.Lambda(lambda x: tf.cast(x, dtype=tf.float32)) ]) return model
def main(): parser = argparse.ArgumentParser(description="training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--restart", default=0, type=int) args = parser.parse_args() cfg.merge_from_file(args.config_file) cfg.freeze() print(cfg) # float16, mixed precision if cfg.MIXED_PRECISION: policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy) latest = tf.train.latest_checkpoint(cfg.OUTPUT_DIR) model = build_compiled_model(cfg) model = model.load_weights(latest) data = build_data(cfg) model.evaluate( cv2_imread(), use_multiprocessing=False, # workers=6, callbacks=build_callbacks(cfg))
def initialize_agent(self): parser = argparse.ArgumentParser() for key, value in dreamer.define_config().items(): parser.add_argument('--' + str(key), type=tools.args_type(value), default=value) config, unknown = parser.parse_known_args() if config.gpu_growth: for gpu in tf.config.experimental.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(gpu, True) assert config.precision in (16, 32), config.precision if config.precision == 16: prec.set_policy(prec.Policy('mixed_float16')) config.steps = int(config.steps) datadir = self.save_directory + '/episodes' actspace = gym.spaces.Box(np.array([-1, -1]), np.array([1, 1])) self.agent = dreamer.Dreamer(config, actspace) if pathlib.Path(self.save_directory).exists(): print('Load checkpoint.') self.agent.load(self.save_directory) else: raise ValueError('Could not load weights') self.state = None self.agent_not_initialized = False
def __init__(self, config): super(Decoder, self).__init__() self.ger_vocab = config['dataloader']['eng_vocab'] self.embed_size = config['rnn_attention']['embed_size'] self.gru_units = config['rnn_attention']['gru_units'] self.embed = tf.keras.layers.Embedding(input_dim=self.ger_vocab, output_dim=self.embed_size) self.gru1 = tf.keras.layers.GRU(units=self.gru_units, kernel_initializer='glorot_normal', return_sequences=True, return_state=True) self.gru2 = tf.keras.layers.GRU(units=self.gru_units, kernel_initializer='glorot_normal', return_sequences=True, return_state=True) self.attention = LuongAttention(config) self.fc = tf.keras.layers.TimeDistributed( tf.keras.layers.Dense(self.ger_vocab, dtype=mixed_precision.Policy('float32')))
def __init__(self, config): """ Initializer. :param cv: The cv fold. 0, 1, 2 for CV; 'train_all' for training on whole dataset. :param config: config dictionary """ super().__init__() gpu_available = tf.test.gpu_device_name() != '' self.use_mixed_precision = gpu_available if self.use_mixed_precision: policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy) self.cv = config.cv self.config = config self.data_format = 'channels_last' self.network_parameters = OrderedDict(num_filters_base=config.num_filters_base, activation=config.activation, num_levels=config.num_levels, data_format=self.data_format) if config.model == 'unet': self.network = Unet self.save_output_images = True self.save_debug_images = False self.image_folder = config.image_folder self.setup_folder = config.setup_folder self.output_folder = config.output_folder self.load_model_filenames = config.load_model_filenames self.image_size = [None, None, None] self.image_spacing = [config.spacing] * 3 images_files = sorted(glob(os.path.join(self.image_folder, '*.nii.gz'))) self.image_id_list = list(map(lambda filename: os.path.basename(filename)[:-len('.nii.gz')], images_files))
def __init__(self, args, data_loader, valid_loader=None): self.data_loader = data_loader self.valid_loader = valid_loader self.mode = args.mode self.batch_size = args.batch_size self.mixed_training = args.mixed_training self.n_epochs = args.n_epochs self.save_dir = args.save_dir if args.mixed_training: # 計算を早くする為にfloat16で計算する. # kerasのmixed_precicion.policyで設定可能. policy = mixed_precicion.Policy('mixed_float16') mixed_precicion.set_policy(policy) self.n_classes = len(np.unique(data_loader.y_train)) """ cifar10からMNISTに変更する為,以下の用にmodelのgetの仕方を変更した. """ #self.model = get_model((None, None, 3), self.n_classes) w = data_loader.x_test.shape[1] h = data_loader.x_test.shape[2] self.model = get_model((w, h, 1), 10) print("model input : " + str(self.model.input)) print("model output : " + str(self.model.output)) self.model.compile(loss=[ losses.SparseCategoricalCrossentropy(), losses.SparseCategoricalCrossentropy() ], optimizer=optimizers.Adam(lr=args.lr), metrics=['acc'])
def set_amp(enable): FLAGS.amp = enable if FLAGS.amp: policy = mixed_precision.Policy('mixed_float16') LOGGER.info( 'Kindly Reminder: mixed_precision enables you train model with double batch size and learning rate!!!' ) LOGGER.info( 'General rules of thumb: Dimensions (batch, channels, image size, dense nodes) in multiples of 8 If not, Tensor Cores probably still work, but might involve padding (less efficient) Dimensions < 256, use power of 2 Batch size (depending on model) might be optimal, please ref: https://docs.nvidia.com/deeplearning/sdk/dl-performance-guide/index.html#perf-guidelines' ) else: policy = mixed_precision.Policy('float32') mixed_precision.set_policy(policy) LOGGER.warn('Compute dtype: {}'.format(policy.compute_dtype)) LOGGER.warn('Variable dtype: {}'.format(policy.variable_dtype))
def build(self): tf.random.set_seed(self.config["seed"]) self.evalbatch = self.config["evalbatch"] if self.config["evalbatch"] > 0 else self.config["batch"] # Use TPU if available, otherwise resort to GPU/CPU try: self.tpu = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=self.config["tpuname"], zone=self.config["tpuzone"]) except ValueError: self.tpu = None logger.info("Could not find the tpu") # TPUStrategy for distributed training if self.tpu: logger.info("Utilizing TPUs") tf.config.experimental_connect_to_cluster(self.tpu) tf.tpu.experimental.initialize_tpu_system(self.tpu) self.strategy = tf.distribute.experimental.TPUStrategy(self.tpu) elif len(get_available_gpus()) > 1: self.strategy = tf.distribute.MirroredStrategy() else: # default strategy that works on CPU and single GPU self.strategy = tf.distribute.get_strategy() self.amp = self.config["amp"] if self.amp: policy = mixed_precision.Policy("mixed_bfloat16" if self.tpu else "mixed_float16") mixed_precision.set_policy(policy) # Defining some props that we will later initialize self.validate()
def build_model_partial(name="regular", classes=80, boxes=9, ltype="giou", use_mixed=True, w=None, h=None, dataset_name="coco", split='validation', batch_size=1, load_head=True, fixed_size=False): from yolo.modeling.yolo_v3 import Yolov3 import yolo.modeling.building_blocks as nn_blocks from yolo.dataloaders.preprocessing_functions import preprocessing import tensorflow_datasets as tfds if use_mixed: from tensorflow.keras.mixed_precision import experimental as mixed_precision # using mixed type policy give better performance than strictly float32 policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy) print('Compute dtype: %s' % policy.compute_dtype) print('Variable dtype: %s' % policy.variable_dtype) dtype = policy.compute_dtype else: dtype = tf.float32 if name != "tiny": masks = {"1024": [6, 7, 8], "512": [3, 4, 5], "256": [0, 1, 2]} anchors = [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)] thresh = 0.5 class_thresh = 0.45 scale = 1 else: masks = {"1024": [3, 4, 5], "256": [0, 1, 2]} anchors = [(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)] thresh = 0.45 class_thresh = 0.45 scale = 1 max_boxes = 200 model = Yolov3(classes=classes, boxes=boxes, type=name, input_shape=(batch_size, w, h, 3)) model.load_weights_from_dn( dn2tf_backbone=True, dn2tf_head=load_head) #, weights_file=f"yolov3-{name}.weights") w_scale = 416 if w == None else w loss_fns = load_loss(masks=masks, anchors=anchors, scale=w_scale, ltype=ltype) return model, loss_fns, anchors, masks
def __init__( self, seq_len, vocab_size, embedding_dim=20, hidden_dim=256, n_hidden=2, dff=512, n_epochs=1, batch_size=1000, inference_batch_size=1500, cache_dir='.', model_name='bilstm', seed=None, verbose=False ): super().__init__(seed=seed,) policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy) input_pre = Input(shape=(seq_len - 1,)) input_post = Input(shape=(seq_len - 1,)) embed = Embedding(vocab_size + 1, embedding_dim, input_length=seq_len - 1) x_pre = embed(input_pre) x_post = embed(input_post) for _ in range(n_hidden - 1): lstm = LSTM(hidden_dim, return_sequences=True) x_pre = lstm(x_pre) x_post = lstm(x_post) lstm = LSTM(hidden_dim) x_pre = lstm(x_pre) x_post = lstm(x_post) x = concatenate([ x_pre, x_post ], name='embed_layer') #x = Dense(dff, activation='relu')(x) x = Dense(vocab_size + 1)(x) output = Activation('softmax', dtype='float32')(x) self.model_ = Model(inputs=[ input_pre, input_post ], outputs=output) self.seq_len_ = seq_len self.vocab_size_ = vocab_size self.embedding_dim_ = embedding_dim self.hidden_dim_ = hidden_dim self.n_hidden_ = n_hidden self.dff_ = dff self.n_epochs_ = n_epochs self.batch_size_ = batch_size self.inference_batch_size_ = inference_batch_size self.cache_dir_ = cache_dir self.model_name_ = model_name self.verbose_ = verbose
def setup_mp(config): if config['mixed_precision']: print('Training with Mixed Precision') policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy) print(f'Compute dtype: {policy.compute_dtype}') print(f'Variable dtype: {policy.variable_dtype}') print(f'Loss scale: {policy.loss_scale}')
def set_policy(policy_name): if policy_name is None: return tf.float32 from tensorflow.keras.mixed_precision import experimental as mixed_precision policy = mixed_precision.Policy(policy_name) mixed_precision.set_policy(policy) dtype = policy.compute_dtype return dtype
def set_mixed_precision(self): """ This function is responsible for creating mixed precision policy, which allows us to train model on float16 data and predict on float32, so that in training model will consume less memory. """ policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy)
def set_mixed_precision_policy(policy='mixed_float16'): """ Document: https://www.tensorflow.org/guide/mixed_precision """ policy = mixed_precision.Policy(policy) mixed_precision.set_policy(policy) print('Compute dtype: %s' % policy.compute_dtype) print('Variable dtype: %s' % policy.variable_dtype)
def get_new_model(): tf.keras.backend.clear_session() policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy) model = DNNModel(n_stages=4, filters=192, kernel_size=3) optimizer = tf.optimizers.SGD(learning_rate=0.001) model.compile(optimizer=optimizer, loss='mean_squared_error') return model
def setup_gpu(fp16=True): assert has_gpu() tf.keras.backend.clear_session() gpus = tf.config.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(gpus[0], True) if fp16: policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy)
def set_precision(precision): if precision == 'float16': dtype = 'float16' K.set_floatx(dtype) # default is 1e-7 which is too small for float16. Without adjusting the epsilon, we will get NaN predictions because of divide by zero problems K.set_epsilon(1e-4) print_debug('Compute dtype: %s' % 'float16') print_debug('Variable dtype: %s' % 'float16') elif precision == 'mixed': policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy) print_debug('Compute dtype: %s' % policy.compute_dtype) print_debug('Variable dtype: %s' % policy.variable_dtype) else: policy = mixed_precision.Policy('float32') mixed_precision.set_policy(policy) print_debug('Compute dtype: %s' % policy.compute_dtype) print_debug('Variable dtype: %s' % policy.variable_dtype)
def set_precision_policy(hparams): policy = None if hparams.mixed_precision: policy = mixed_precision.Policy('mixed_float16') mixed_precision.set_policy(policy) if hparams.verbose: print('\nCompute dtype: {}\nVariable dtype: {}\n'.format( policy.compute_dtype, policy.variable_dtype)) return policy