def to_bfloat16_unbiased(x, quantization_noise=None): """Convert a float32 to a bfloat16 using randomized roundoff. The current implementation uses quantization_noise_from_step_num to generate quantization_noise, which requires global_step, and is not deterministic. To use it for inference, it might be feasible to replace the noise generation function with a constant, e.g., 0.5. Args: x: A float32 Tensor. quantization_noise: A float, specifying the quantization noise. Returns: A bfloat16 Tensor, with the same shape as x. """ if quantization_noise is None: quantization_noise = quantization_noise_from_step_num() x_sign = tf.sign(x) # Make sure x is positive. If it is zero, the two candidates are identical. x = x * x_sign + 1e-30 cand1 = tf.to_bfloat16(x) cand1_f = tf.to_float(cand1) # This relies on the fact that for a positive bfloat16 b, # b * 1.005 gives you the next higher bfloat16 and b*0.995 gives you the # next lower one. Both 1.005 and 0.995 are ballpark estimation. cand2 = tf.to_bfloat16( tf.where(tf.greater(x, cand1_f), cand1_f * 1.005, cand1_f * 0.995)) ret = randomized_roundoff_to_bfloat16(x, quantization_noise, cand1, cand2) return ret * tf.to_bfloat16(x_sign)
def alltoall(self, x, mesh_axis, split_axis, concat_axis): """Grouped alltoall (like MPI alltoall with splitting and concatenation). Args: x: a LaidOutTensor mesh_axis: an integer the mesh axis along which to group split_axis: an integer (the Tensor axis along which to split) concat_axis: an integer (the Tensor axis along which to concatenate) Returns: a LaidOutTensor """ x = x.to_laid_out_tensor() t = x.one_slice group_assignment = self._create_group_assignment([mesh_axis]) dtype = t.dtype if dtype == tf.float32: # There seems to be a bug with float32 alltoall. # Do it in bfloat16 until the bug is fixed. # TODO(noam): file a bug t = tf.to_bfloat16(t) t = tpu_ops.all_to_all(t, concat_dimension=concat_axis, split_dimension=split_axis, split_count=len(group_assignment[0]), group_assignment=group_assignment) t = tf.cast(t, dtype) x = self.LaidOutTensor([t]) return x
def _step(source_replica, target_replica, x_split, op="plus_eq"): """Helper function - one step of summing or copying. If op == "plus_eq", then adds source_replica into target_replica If op == "copy", then copies source_replica onto target_replica These operations happen for all shards. The replica numbers are offset by the shard numbers to keep all physical links busy. Args: source_replica: an integer target_replica: an integer x_split: a list of lists of tensors op: a string """ for shard in range(parallelism.n): source_device = (shard + source_replica) % parallelism.n target_device = (shard + target_replica) % parallelism.n source = x_split[source_device][shard] if use_bfloat16: with tf.device(parallelism.devices[source_device]): source = tf.to_bfloat16(source) with tf.device(parallelism.devices[target_device]): source = tf.to_float(source) if op == "plus_eq": x_split[target_device][shard] += source else: assert op == "copy" x_split[target_device][shard] = tf.identity(source)
def _to_bfloat16_unbiased(x, noise): """Convert a float32 to a bfloat16 using randomized roundoff. Args: x: A float32 Tensor. noise: a float32 Tensor with values in [0, 1), broadcastable to tf.shape(x) Returns: A float32 Tensor. """ x_sign = tf.sign(x) # Make sure x is positive. If it is zero, the two candidates are identical. x = x * x_sign + 1e-30 cand1 = tf.to_bfloat16(x) cand1_f = tf.to_float(cand1) # This relies on the fact that for a positive bfloat16 b, # b * 1.005 gives you the next higher bfloat16 and b*0.995 gives you the # next lower one. Both 1.005 and 0.995 are ballpark estimation. cand2 = tf.to_bfloat16(tf.where(tf.greater(x, cand1_f), cand1_f * 1.005, cand1_f * 0.995)) ret = _randomized_roundoff_to_bfloat16(x, noise, cand1, cand2) return ret * tf.to_bfloat16(x_sign)
def _to_bfloat16_unbiased(x, noise): """Convert a float32 to a bfloat16 using randomized roundoff. Args: x: A float32 Tensor. noise: a float32 Tensor with values in [0, 1), broadcastable to tf.shape(x) Returns: A float32 Tensor. """ x_sign = tf.sign(x) # Make sure x is positive. If it is zero, the two candidates are identical. x = x * x_sign + 1e-30 cand1 = tf.to_bfloat16(x) cand1_f = tf.to_float(cand1) # This relies on the fact that for a positive bfloat16 b, # b * 1.005 gives you the next higher bfloat16 and b*0.995 gives you the # next lower one. Both 1.005 and 0.995 are ballpark estimation. cand2 = tf.to_bfloat16( tf.where(tf.greater(x, cand1_f), cand1_f * 1.005, cand1_f * 0.995)) ret = _randomized_roundoff_to_bfloat16(x, noise, cand1, cand2) return ret * tf.to_bfloat16(x_sign)
def _to_bfloat16_unbiased(x): """Convert a float32 to a bfloat16 using randomized roundoff. Note: If this ever produces worse results than using float32 all the way through, we should try to diagnose and fix it. There are several things to try: 1. Encode parameter x for storage purposes as _to_bfloat16_unbiased(tf.pow(x, 5)) . This gives 5x the resolution while incurring overflow and underflow at 10^9 and 10^-9 instead of 10^37 and 10^-37. Comes at a cost of extracting fifth roots to decode parameters. Or use some other such scheme. 2. In this function, use actual random numbers, different for each parameter as opposed to the same for every parameter in the graph. 3. Look for bugs in this function. Args: x: A float32 Tensor. Returns: A float32 Tensor. """ # Not using random_uniform here due to a problem on TPU in that random seeds # are not respected, which may cause the parameters on different replicas # to go out-of-sync. quantization_noise = _quantization_noise_from_step_num() x_sign = tf.sign(x) # Make sure x is positive. If it is zero, the two candidates are identical. x = x * x_sign + 1e-30 cand1 = tf.to_bfloat16(x) cand1_f = tf.to_float(cand1) # This relies on the fact that for a positive bfloat16 b, # b * 1.005 gives you the next higher bfloat16 and b*0.995 gives you the # next lower one. Both 1.005 and 0.995 are ballpark estimation. cand2 = tf.to_bfloat16( tf.where(tf.greater(x, cand1_f), cand1_f * 1.005, cand1_f * 0.995)) ret = _randomized_roundoff_to_bfloat16(x, quantization_noise, cand1, cand2) return ret * tf.to_bfloat16(x_sign)
def _blocked_and_dtype_transformations(tensor): """Yields variants of a tensor, for standard blocking/dtype variants. Args: tensor (tf.Tensor): Input tensor. Yields: (modified_tensor, suffix) pairs, where `modified_tensor` is a transformed version of the input, and `suffix` is a string like "/blocked32". """ for blocking_level in (32, 48): blocked = make_padded_blocked_matrix(tensor, blocking_level) bfloat16_blocked = tf.to_bfloat16(bfloat16_permutation(blocked)) yield blocked, '/blocked{}'.format(blocking_level) yield bfloat16_blocked, '/blocked{}/bfloat16'.format(blocking_level)
def train_model(train=None, test=None, row_id=None, gpu_device='/gpu:0', cpu_device='/cpu:0', z=18, version='3d', build_model=None, experiment_params=None, tf_records=False, checkpoint=None, weight_loss=True, overwrite_training_params=False, force_jk=False, use_bfloat16=False, wd=False, use_lms=False): """Run an experiment with hGRUs.""" # Set up tensors (config, exp_label, prediction_dir, checkpoint_dir, summary_dir, test_data_meta, test_dataset_module, train_dataset_module, train_data_meta) = configure_model(train=train, test=test, row_id=row_id, gpu_device=gpu_device, z=z, version=version, build_model=build_model, experiment_params=experiment_params, evaluate=False) if overwrite_training_params: config = tf_fun.update_config(overwrite_training_params, config) config.ds_name = {'train': train, 'test': test} (test_images, test_labels, train_images, train_labels) = prepare_data(config=config, tf_records=tf_records, device=cpu_device, test_dataset_module=test_dataset_module, train_dataset_module=train_dataset_module, force_jk=force_jk, evaluate=False) if use_bfloat16: train_images = tf.to_bfloat16(train_images) test_images = tf.to_bfloat16(test_images) # Build training and test models with tf.device(gpu_device): train_logits = build_model( data_tensor=train_images, reuse=None, training=True, output_channels=config.train_label_shape[-1]) test_logits = build_model(data_tensor=test_images, reuse=tf.AUTO_REUSE, training=False, output_channels=config.test_label_shape[-1]) if use_bfloat16: train_logits = tf.cast(train_logits, experiment_params.tf_dtype) test_logits = tf.cast(test_logits, experiment_params.tf_dtype) # Derive loss if weight_loss: assert train_data_meta is not None, 'Could not find a train_data_meta' pos_weight = train_data_meta['weights'] train_loss = tf.reduce_mean( tf.nn.weighted_cross_entropy_with_logits(targets=train_labels, logits=train_logits, pos_weight=pos_weight)) else: train_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=train_labels, logits=train_logits)) test_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=test_labels, logits=test_logits)) if wd: WEIGHT_DECAY = 1e-4 train_loss += (WEIGHT_DECAY * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name ])) # Derive metrics train_scores = tf.reduce_mean(tf.sigmoid(train_logits[:, :, :, :, :3]), axis=-1) # config['gt_idx']]) test_scores = tf.reduce_mean(tf.sigmoid(test_logits[:, :, :, :, :3]), axis=-1) # config['gt_idx']]) train_gt = tf.cast( tf.greater(tf.reduce_mean(train_labels[:, :, :, :, :3], axis=-1), 0.5), tf.int32) # config['gt_idx']] test_gt = tf.cast( tf.greater(tf.reduce_mean(test_labels[:, :, :, :, :3], axis=-1), 0.5), tf.int32) # config['gt_idx']] try: train_pr, train_pr_update, train_pr_init = calculate_pr( labels=train_gt, predictions=train_scores, summation_method='careful_interpolation', name='train_pr') test_pr, test_pr_update, test_pr_init = calculate_pr( labels=test_gt, predictions=test_scores, summation_method='careful_interpolation', name='test_pr') except Exception: print 'Failed to use careful_interpolation' train_pr, train_pr_update, train_pr_init = calculate_pr( labels=train_gt, predictions=train_scores, summation_method='trapezoidal', name='train_pr') test_pr, test_pr_update, test_pr_init = calculate_pr( labels=test_gt, predictions=test_scores, summation_method='trapezoidal', name='test_pr') train_metrics = {'train_pr': train_pr, 'train_cce': train_loss} test_metrics = {'test_pr': test_pr, 'test_cce': test_loss} for k, v in train_metrics.iteritems(): if 'update' not in k: tf.summary.scalar(k, v) for k, v in test_metrics.iteritems(): if 'update' not in k: tf.summary.scalar(k, v) # Build optimizer lr = tf.placeholder(tf.float32, shape=[]) train_op = optimizers.get_optimizer(loss=train_loss, lr=lr, optimizer=config['optimizer']) # Create dictionaries of important training and test information train_dict = { 'train_loss': train_loss, 'train_images': train_images, 'train_labels': train_labels, 'train_op': train_op, 'train_pr_update': train_pr_update, 'train_logits': train_scores } test_dict = { 'test_loss': test_loss, 'test_images': test_images, 'test_labels': test_labels, 'test_pr_update': test_pr_update, 'test_logits': test_scores } train_metrics = { 'train_pr': train_pr, } test_metrics = { 'test_pr': test_pr, } reset_metrics = { 'train_pr_init': train_pr_init, 'test_pr_init': test_pr_init, } # Count model parameters parameter_count = tf_fun.count_parameters(tf.trainable_variables()) print 'Number of parameters in model: %s' % parameter_count # Create datastructure for saving data ds = data_structure.data(train_batch_size=config.train_batch_size, test_batch_size=config.test_batch_size, test_iters=config.test_iters, shuffle_train=config.shuffle_train, shuffle_test=config.shuffle_test, lr=config.lr, training_routine=config.training_routine, loss_function=config.loss_function, optimizer=config.optimizer, model_name=config.exp_label, train_dataset=config.train_dataset, test_dataset=config.test_dataset, output_directory=config.results, prediction_directory=prediction_dir, summary_dir=summary_dir, checkpoint_dir=checkpoint_dir, parameter_count=parameter_count, exp_label=exp_label) sess, summary_op, summary_writer, saver, adabn_init = initialize_tf( config=config, summary_dir=summary_dir) # Start training loop if use_lms: from tensorflow.contrib.lms import LMS lms_model = LMS({'cnn'}, lb=3) # Hardcoded model scope for now... lms_model.run(tf.get_default_graph()) if tf_records: # Coordinate for tfrecords coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) training_tf.training_loop(config=config, sess=sess, summary_op=summary_op, summary_writer=summary_writer, saver=saver, summary_dir=summary_dir, checkpoint_dir=checkpoint_dir, prediction_dir=prediction_dir, train_dict=train_dict, test_dict=test_dict, exp_label=config.exp_label, lr=lr, row_id=row_id, data_structure=ds, coord=coord, threads=threads, reset_metrics=reset_metrics, train_metrics=train_metrics, test_metrics=test_metrics, checkpoint=checkpoint, top_test=config['top_test']) else: training.training_loop(config=config, sess=sess, summary_op=summary_op, summary_writer=summary_writer, saver=saver, summary_dir=summary_dir, checkpoint_dir=checkpoint_dir, prediction_dir=prediction_dir, train_dict=train_dict, test_dict=test_dict, train_dataset_module=train_dataset_module, test_dataset_module=test_dataset_module, exp_label=config.exp_label, lr=lr, row_id=row_id, data_structure=ds, train_metrics=train_metrics, test_metrics=test_metrics, reset_metrics=reset_metrics, checkpoint=checkpoint, top_test=config['top_test'])
"""tf.to_bfloat16(x, name = 'ToBFloat16') 解释:这个函数是将一个Tensor的数据类型转换成bfloat16。 译者注:这个API的作用不是很理解,但我测试了一下,输入的x必须是浮点型的,别的类型都不行。 使用例子:""" import tensorflow as tf sess = tf.Session() data = tf.constant([x for x in range(20)], tf.float32) print(sess.run(data)) d = tf.to_bfloat16(data) print(sess.run(d)) """输入参数: ● x: 一个Tensor或者是SparseTensor。 ● name:(可选)为这个操作取一个名字。 输出参数: ● 一个Tensor或者SparseTensor,数据类型是bfloat16,数据维度和x相同。 提示: ● 错误: 如果x是不能被转换成bfloat16类型的,那么将报错。"""
x = np.empty(10, dtype="|S10") for i, e in enumerate(x): x[i] = "string_{}".format(i) z_string_to_number = tf.string_to_number(x, out_type=tf.int32) # tf.to_double x = np.random.rand(3, 5) z_to_double = tf.to_double(x) # tf.to_float x = np.random.rand(3, 5) z_to_float = tf.to_float(x) # tf.to_bfloat16 x = np.random.rand(3, 5).astype(np.float32) z_to_bfloat16 = tf.to_bfloat16(x) # tf.to_int32 x = np.random.rand(3, 5) * 10 z_to_int32 = tf.to_int32(x) # tf.to_int64 x = np.random.rand(3, 5) * 10 z_to_int64 = tf.to_int64(x) # tf.cast x = np.random.rand(3, 5) * 10 z_cast = tf.cast(x, dtype=tf.int16) with tf.Session() as sess:
tf.scan() tf.scatter_add() tf.scatter_div() tf.scatter_mul() tf.scatter_nd() tf.scatter_nd_add() tf.scatter_nd_non_aliasing_add() tf.scatter_nd_sub() tf.scatter_nd_update() tf.scatter tf.tables_initializer() tf.tensordot() tf.tf_logging tf.tile() tf.to_bfloat16() tf.to_double() tf.to_float() tf.to_int32() tf.to_int64() tf.trace() tf.trainable_variables() tf.transpose() tf.truncated_normal() tf.truediv() tf.sparse_transpose() tf.sparse_tensor_dense_matmul() tf.sparse_accumulator_apply_gradient() tf.sparse_accumulator_take_gradient() tf.sparse_add()
import tensorflow as tf sess = tf.InteractiveSession() s = tf.constant(['123', '257']) print(s.eval(), s.dtype) # 字符串转数字 num = tf.string_to_number(s) print('数字 : ', num.eval(), num.dtype) # 数字转double d_num = tf.to_double(num) print('double : ', d_num.eval(), d_num.dtype) # 数字转float f_num = tf.to_float(num) print('float : ', f_num.eval(), f_num.dtype) # 数字转bfloat16, bfloat16是什么鬼?网上没查出来 f16_num = tf.to_bfloat16(num) print('bfloat16 : ', f16_num.eval(), f16_num.dtype) # 数字转int32 i32_num = tf.to_int32(num) print('int32 : ', i32_num.eval(), i32_num.dtype) # 数字转int64 i64_num = tf.to_int64(num) print('int64 : ', i64_num.eval(), i64_num.dtype) # 转换为指定类型 cast_num = tf.cast(i64_num, tf.int8) print('cast : ', cast_num.eval(), cast_num.dtype) # bitcast转换 bit_num = tf.bitcast(i64_num, tf.int8) print('bitcast : ', bit_num.eval(), bit_num.dtype) # saturate_cast转换 saturate_cast = tf.saturate_cast(i64_num, tf.int32)
# -*- coding: utf-8 -*- #!/usr/bin/python import tensorflow as tf import tfutil const1 = tf.constant(1, dtype=tf.float32) tfutil.print_constant(const1) print(const1) bfloat1 = tf.to_bfloat16(const1) tfutil.print_operation_value(bfloat1) print(bfloat1) const2 = tf.constant([2, 3], dtype=tf.float32) tfutil.print_constant(const2) print(const2) bfloat2 = tf.to_bfloat16(const2) tfutil.print_operation_value(bfloat2) print(bfloat2) var1 = tf.Variable(4, dtype=tf.float32) tfutil.print_variable(var1) print(var1) bfloat3 = tf.to_bfloat16(var1) tfutil.print_operation_value(bfloat3) print(bfloat3) var2 = tf.Variable([5, 6], dtype=tf.float32) tfutil.print_variable(var2) print(var2) bfloat4 = tf.to_bfloat16(var2)