def _quantize(x, params, randomize=True): """Quantize x according to params, optionally randomizing the rounding.""" if not params.quantize: return x if not randomize: return tf.bitcast( tf.cast(x / params.quantization_scale, tf.int16), tf.float16) abs_x = tf.abs(x) sign_x = tf.sign(x) y = abs_x / params.quantization_scale y = tf.floor(y + tf.random_uniform(common_layers.shape_list(x))) y = tf.minimum(y, tf.int16.max) * sign_x q = tf.bitcast(tf.cast(y, tf.int16), tf.float16) return q
def _testBitcast(self, x, datatype, shape): with self.test_session(): tf_ans = tf.bitcast(x, datatype) out = tf_ans.eval() buff_after = memoryview(out).tobytes() buff_before = memoryview(x).tobytes() self.assertEqual(buff_before, buff_after) self.assertEqual(tf_ans.get_shape(), shape)
def _testBitcast(self, x, datatype, shape): with self.test_session(): tf_ans = tf.bitcast(x, datatype) out = tf_ans.eval() buff_after = np.getbuffer(out) buff_before = np.getbuffer(x) self.assertEqual(buff_before, buff_after) self.assertEqual(tf_ans.get_shape(), shape)
def bottom(self, x): """Transform input from data space to model space. Args: x: A Tensor with shape [batch, ...] Returns: body_input: A Tensor with shape [batch, ?, ?, body_input_depth]. """ inputs = x with tf.variable_scope(self.name): # TODO(aidangomez): Will need to sort out a better audio pipeline def xnet_resblock(x, filters, res_relu, name): """Xception-like block.""" with tf.variable_scope(name): # We only stride along the length dimension to preserve the spectral # bins (which are tiny in dimensionality relative to length) y = common_layers.separable_conv_block( x, filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))], first_relu=True, padding="SAME", force2d=True, name="sep_conv_block") y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 1)) return y + common_layers.conv_block( x, filters, [((1, 1), (1, 1))], padding="SAME", strides=(2, 1), first_relu=res_relu, force2d=True, name="res_conv0") # Bitcast back from int32 x = tf.bitcast(inputs, tf.float32) x.set_shape([None, None, None, 1]) for i in range(self._model_hparams.audio_compression): x = xnet_resblock(x, 2**(i + 1), True, "compress_block_%d" % i) return xnet_resblock(x, self._body_input_depth, False, "compress_block_final")
def _bitcast_convert_type(operand, new_dtype): return tf.bitcast(operand, new_dtype)
def testErrors(self): x = np.zeros([1, 1], np.int8) datatype = tf.int32 with self.assertRaisesRegexp(ValueError, "Cannot bitcast due to shape"): tf.bitcast(x, datatype, None)
def decode_image(self, img: tf.Tensor) -> tf.Tensor: img = super().decode_image(img) # Bitcast 4 channels uint8 -> 1 channel float32 img = tf.bitcast(img, tf.float32)[..., None] return img
def testUnknown(self): x = tf.placeholder(tf.float32) datatype = tf.int8 tf.bitcast(x, datatype, None)
def serialize_scene(scene: pyredner.Scene, num_samples: Union[int, Tuple[int, int]], max_bounces: int, channels=[redner.channels.radiance], sampler_type=redner.SamplerType.independent, use_primary_edge_sampling=True, use_secondary_edge_sampling=True, sample_pixel_center: bool = False) -> List: """ Given a pyredner scene & rendering options, convert them to a linear list of argument, so that we can use it in TensorFlow. Args ==== scene: pyredner.Scene num_samples: int number of samples per pixel for forward and backward passes can be an integer or a tuple of 2 integers if a single integer is provided, use the same number of samples for both max_bounces: int number of bounces for global illumination 1 means direct lighting only channels: List[redner.channels] | A list of channels that should present in the output image | following channels are supported\: | redner.channels.radiance, | redner.channels.alpha, | redner.channels.depth, | redner.channels.position, | redner.channels.geometry_normal, | redner.channels.shading_normal, | redner.channels.uv, | redner.channels.diffuse_reflectance, | redner.channels.specular_reflectance, | redner.channels.vertex_color, | redner.channels.roughness, | redner.channels.generic_texture, | redner.channels.shape_id, | redner.channels.material_id | all channels, except for shape id and material id, are differentiable sampler_type: redner.SamplerType | Which sampling pattern to use? | see `Chapter 7 of the PBRT book <http://www.pbr-book.org/3ed-2018/Sampling_and_Reconstruction.html>` for an explanation of the difference between different samplers. | Following samplers are supported: | redner.SamplerType.independent | redner.SamplerType.sobol use_primary_edge_sampling: bool use_secondary_edge_sampling: bool sample_pixel_center: bool Always sample at the pixel center when rendering. This trades noise with aliasing. If this option is activated, the rendering becomes non-differentiable (since there is no antialiasing integral), and redner's edge sampling becomes an approximation to the gradients of the aliased rendering. """ # TODO: figure out a way to determine whether a TF tensor requires gradient or not cam = scene.camera num_shapes = len(scene.shapes) num_materials = len(scene.materials) num_lights = len(scene.area_lights) num_channels = len(channels) for light_id, light in enumerate(scene.area_lights): scene.shapes[light.shape_id].light_id = light_id if max_bounces == 0: use_secondary_edge_sampling = False args = [] args.append(tf.constant(num_shapes)) args.append(tf.constant(num_materials)) args.append(tf.constant(num_lights)) with tf.device('/device:cpu:' + str(pyredner.get_cpu_device_id())): if cam.position is None: args.append(__EMPTY_TENSOR) args.append(__EMPTY_TENSOR) args.append(__EMPTY_TENSOR) else: args.append(tf.identity(cam.position)) args.append(tf.identity(cam.look_at)) args.append(tf.identity(cam.up)) if cam.cam_to_world is None: args.append(__EMPTY_TENSOR) args.append(__EMPTY_TENSOR) else: args.append(tf.identity(cam.cam_to_world)) args.append(tf.identity(cam.world_to_cam)) args.append(tf.identity(cam.intrinsic_mat_inv)) args.append(tf.identity(cam.intrinsic_mat)) args.append(tf.constant(cam.clip_near)) args.append(tf.constant(cam.resolution)) args.append(RednerCameraType.asTensor(cam.camera_type)) for shape in scene.shapes: with tf.device(pyredner.get_device_name()): args.append(tf.identity(shape.vertices)) # HACK: tf.bitcast forces tensorflow to copy int32 to GPU memory. # tf.identity stopped working since TF 2.1 (if you print the device # it will say it's on GPU, but the address returned by data_ptr is wrong). # Hopefully TF people will fix this in the future. args.append(tf.bitcast(shape.indices, type=tf.int32)) if shape.uvs is None: args.append(__EMPTY_TENSOR) else: args.append(tf.identity(shape.uvs)) if shape.normals is None: args.append(__EMPTY_TENSOR) else: args.append(tf.identity(shape.normals)) if shape.uv_indices is None: args.append(__EMPTY_TENSOR) else: args.append(tf.bitcast(shape.uv_indices, type=tf.int32)) if shape.normal_indices is None: args.append(__EMPTY_TENSOR) else: args.append(tf.bitcast(shape.normal_indices, type=tf.int32)) if shape.colors is None: args.append(__EMPTY_TENSOR) else: args.append(tf.identity(shape.colors)) args.append(tf.constant(shape.material_id)) args.append(tf.constant(shape.light_id)) for material in scene.materials: serialize_texture(material.diffuse_reflectance, args) serialize_texture(material.specular_reflectance, args) serialize_texture(material.roughness, args) serialize_texture(material.generic_texture, args) serialize_texture(material.normal_map, args) args.append(tf.constant(material.compute_specular_lighting)) args.append(tf.constant(material.two_sided)) args.append(tf.constant(material.use_vertex_color)) with tf.device('/device:cpu:' + str(pyredner.get_cpu_device_id())): for light in scene.area_lights: args.append(tf.constant(light.shape_id)) args.append(tf.identity(light.intensity)) args.append(tf.constant(light.two_sided)) if scene.envmap is not None: serialize_texture(scene.envmap.values, args) with tf.device('/device:cpu:' + str(pyredner.get_cpu_device_id())): args.append(tf.identity(scene.envmap.env_to_world)) args.append(tf.identity(scene.envmap.world_to_env)) with tf.device(pyredner.get_device_name()): args.append(tf.identity(scene.envmap.sample_cdf_ys)) args.append(tf.identity(scene.envmap.sample_cdf_xs)) args.append(scene.envmap.pdf_norm) else: args.append(__EMPTY_TENSOR) args.append(tf.constant(num_samples)) args.append(tf.constant(max_bounces)) args.append(tf.constant(num_channels)) for ch in channels: args.append(RednerChannels.asTensor(ch)) args.append(RednerSamplerType.asTensor(sampler_type)) args.append(tf.constant(use_primary_edge_sampling)) args.append(tf.constant(use_secondary_edge_sampling)) args.append(tf.constant(sample_pixel_center)) return args
def compress(self, tensor): tensor_shape = tf.shape(tensor) tensor_flatten = tf.reshape(tensor, [-1]) tensor_cast = tf.bitcast(tensor_flatten, tf.uint32) sign = tf.bitwise.bitwise_and(tensor_cast, 0b10000000000000000000000000000000) exp = tf.bitwise.bitwise_and(tensor_cast, 0b01111111100000000000000000000000) mantissa = tf.bitwise.bitwise_and(tensor_cast, 0b00000000011111111111111111111111) exp = tf.bitwise.right_shift(exp, 23) error_bound = 127 + int(math.log( self.error_bound / 2, 10)) # error_bound exponent: 117 for 2e-10 radius = math.ceil((127 - error_bound) / 2) mid = error_bound + radius mask_32bit = exp >= 127 mask_16bit = (exp >= mid) & (exp < 127) mask_8bit = (exp >= error_bound) & (exp < mid) indices_32bit = tf.reshape(tf.where(mask_32bit), [-1]) indices_16bit = tf.reshape(tf.where(mask_16bit), [-1]) indices_8bit = tf.reshape(tf.where(mask_8bit), [-1]) # no compress v_32bit = tf.gather(tensor_flatten, indices_32bit) # 16bit compress s_16bit = tf.gather(sign, indices_16bit) e_16bit = tf.gather(exp, indices_16bit) m_16bit = tf.gather(mantissa, indices_16bit) n_shift = 127 - tf.cast(e_16bit, dtype=tf.int32) n_shift = tf.cast(n_shift, tf.uint32) shifted_s = tf.bitwise.right_shift(s_16bit, 8) marker = 0b00000000010000000000000000000000 m_16bit_concat = tf.bitwise.bitwise_or( tf.bitwise.right_shift(m_16bit, 1), marker) shifted_m = tf.bitwise.right_shift(m_16bit_concat, n_shift) temp = tf.bitwise.bitwise_or(shifted_s, shifted_m) v_16bit = tf.cast(tf.bitwise.right_shift(temp, 8), dtype=tf.uint16) # 8bit compress s_8bit = tf.gather(sign, indices_8bit) e_8bit = tf.gather(exp, indices_8bit) m_8bit = tf.gather(mantissa, indices_8bit) n_shift = 127 - tf.cast(e_8bit, dtype=tf.int32) n_shift = tf.cast(n_shift, tf.uint32) shifted_s = tf.bitwise.right_shift(s_8bit, 8) marker = 0b00000000010000000000000000000000 m_8bit_concat = tf.bitwise.bitwise_or( tf.bitwise.right_shift(m_8bit, 1), marker) shifted_m = tf.bitwise.right_shift(m_8bit_concat, n_shift) temp = tf.bitwise.bitwise_or(shifted_s, shifted_m) v_8bit = tf.cast(tf.bitwise.right_shift(temp, 16), dtype=tf.uint8) # concat indices # indices_all = tf.concat([indices_32bit, indices_16bit, indices_8bit], 0) # indices_all = tf.cast(indices_all, dtype=tf.int32) def encode_byte(a): # input: int32 type tensor with values in range 0,1,2,3 (2'b00,2'b01,2'b10,3'b11) # output: encoded uint8 type tensor a = tf.reshape(a, [-1]) pad_size = 4 - tf.math.mod(tf.size(a), 4) pad = tf.range(0, pad_size) a = tf.concat([a, pad], 0) a_split1, a_split2, a_split3, a_split4 = tf.split(a, 4) # encode 4 grads into 1 Byte sum_1 = tf.add(a_split1, a_split2 * 4) sum_2 = tf.add(a_split3 * 16, a_split4 * 64) sum_all = tf.add(sum_1, sum_2) return tf.cast(sum_all, tf.uint8) # encode indices mask_encode = 0 for mask, code in zip([mask_8bit, mask_16bit, mask_32bit], [1, 2, 3]): mask_encode += tf.cast(mask, tf.int32) * code mask_encode = encode_byte(mask_encode) tensor_compressed = v_32bit, v_16bit, v_8bit, mask_encode ctx = tensor_shape return tensor_compressed, ctx
def __init__(self, queue, partition_size, t_c, dim=2, name="IsingReader", transpose_chan=True): self._part_sz = partition_size self._transposed = transpose_chan self._exm_shape = None # IsingFileRecordBase.__init__(self, queue, t_c, dim, name) self._dim = dim self._tc_tensor = tf.constant(t_c, dtype=tf.float32) self._q = queue # read header: l, k self._initialize_record(name) # override the shape that the 1d input will be read as #self.k # will change in mod state if self._dim == 2: self._batch_shape = \ [self.k, # self._part_sz, self.l, self.l, 1, 4] else: self._batch_shape = \ [self.k, # self._part_sz, self.l, self.l, self.l, 1, 4] self.states = \ tf.bitcast( tf.reshape( tf.slice(self._in_bytes, [16], [-1]), self._batch_shape), tf.float32) #transform from -1/1 to 0/1 representation #self.states = tf.minimum(self.states, 0) # shuffle the states self.states = tf.random_shuffle(self.states) #self._snap = tf.mul(self.states[0], self.states[1]) #if self._dim == 3: # self._snap = tf.reduce_mean(self._snap, axis=-2) #self._snap = tf.expand_dims(self._snap, axis=0) #tf.summary.image('glass', self._snap, max_outputs=20) # determine how many can be grouped together self._batch_len = tf.floor_div(self.k, self._part_sz) take_len = tf.mul(self._batch_len, self._part_sz) # excise as many states as possible # then reshape and transpose if needed if self._dim == 2: self.states = tf.slice( self.states, [0, 0, 0, 0], [take_len, -1, -1, -1]) self.states = tf.reshape(self.states, [self._batch_len, self._part_sz, self.l, self.l, 1]) if self._transposed: self.states = tf.transpose( self.states, [0, 4, 2, 3, 1]) else: self.states = tf.slice( self.states, [0, 0, 0, 0, 0], [take_len, -1, -1, -1, -1]) self.states = tf.reshape(self.states, [self._batch_len, self._part_sz, self.l, self.l, self.l, 1]) if self._transposed: self.states = tf.transpose( self.states, [0, 5, 2, 3, 4, 1]) #finally squeeze into final dims self._assign_labels() if self._transposed: self.states = tf.squeeze(self.states, axis=[1]) else: self.states = tf.squeeze(self.states, axis=[-1]) #self.label = tf.select( # tf.less(self._t, self._tc_tensor), # tf.constant(1, dtype=tf.int32), # tf.constant(0, dtype=tf.int32)) self.file_batch = [ self.states, self.label, self.indx, self.temps]
def export(checkpoint, img_shape): if img_shape is None: img_shape = [256, 256, 3] # placeholder for base64 string decoded to an png image input = tf.placeholder(tf.string, shape=[1]) input_data = tf.decode_base64(input[0]) input_image = tf.image.decode_png(input_data) # remove alpha channel if present input_image = tf.cond(tf.equal(tf.shape(input_image)[2], 4), lambda: input_image[:, :, :3], lambda: input_image) # convert grayscale to RGB input_image = tf.cond(tf.equal(tf.shape(input_image)[2], 1), lambda: tf.image.grayscale_to_rgb(input_image), lambda: input_image) input_image = tf.image.convert_image_dtype(input_image, dtype=tf.float32) input_image.set_shape(img_shape) # expected shape is (1, img_shape) because of batches batch_input = tf.expand_dims(input_image, axis=0) # create network batch_output = transform.net(batch_input) # clip RGB values to the allowed range and cast to uint8 batch_output = tf.clip_by_value(batch_output, 0, 255) batch_output = tf.bitcast(tf.cast(batch_output, tf.int8), tf.uint8) output_data = tf.image.encode_png(batch_output[0]) output = tf.convert_to_tensor([tf.encode_base64(output_data)]) # save inputs and outputs to collection key = tf.placeholder(tf.string, shape=[1]) inputs = {"key": key.name, "input": input.name} tf.add_to_collection("inputs", json.dumps(inputs)) outputs = { "key": tf.identity(key).name, "output": output.name, } tf.add_to_collection("outputs", json.dumps(outputs)) init_op = tf.global_variables_initializer() restore_saver = tf.train.Saver() export_saver = tf.train.Saver() with tf.Session() as sess: sess.run(init_op) if os.path.isdir(checkpoint): ckpt = tf.train.get_checkpoint_state(checkpoint) if ckpt and ckpt.model_checkpoint_path: restore_saver.restore(sess, ckpt.model_checkpoint_path) else: raise Exception("No checkpoint found...") else: restore_saver.restore(sess, checkpoint) print("exporting model") export_saver.export_meta_graph( filename=os.path.join(a.export, "export.meta")) export_saver.save(sess, os.path.join(a.export, "export"), write_meta_graph=False) return
s = tf.constant(['123', '257']) print(s.eval(), s.dtype) # 字符串转数字 num = tf.string_to_number(s) print('数字 : ', num.eval(), num.dtype) # 数字转double d_num = tf.to_double(num) print('double : ', d_num.eval(), d_num.dtype) # 数字转float f_num = tf.to_float(num) print('float : ', f_num.eval(), f_num.dtype) # 数字转bfloat16, bfloat16是什么鬼?网上没查出来 f16_num = tf.to_bfloat16(num) print('bfloat16 : ', f16_num.eval(), f16_num.dtype) # 数字转int32 i32_num = tf.to_int32(num) print('int32 : ', i32_num.eval(), i32_num.dtype) # 数字转int64 i64_num = tf.to_int64(num) print('int64 : ', i64_num.eval(), i64_num.dtype) # 转换为指定类型 cast_num = tf.cast(i64_num, tf.int8) print('cast : ', cast_num.eval(), cast_num.dtype) # bitcast转换 bit_num = tf.bitcast(i64_num, tf.int8) print('bitcast : ', bit_num.eval(), bit_num.dtype) # saturate_cast转换 saturate_cast = tf.saturate_cast(i64_num, tf.int32) print('saturate_cast : ', saturate_cast.eval(), saturate_cast.dtype)
min_out = 0 max_out = 0 [q_out, min_out, max_out] = gen_math_ops.quantized_mat_mul(q_a, q_b, a_min, a_max, b_min, b_max, Toutput=tf.qint32, name="qMatMul") print("------- quantized_mat_mul ------") print("min: ", min_out.eval(), " max: ", max_out.eval(), "mean: ", tf.reduce_mean(tf.bitcast(q_out, tf.int32)).eval()) [request_min_out, request_max_out] = gen_math_ops.requantization_range(q_out, min_out, max_out, name="rqRange") print("------- requantization_range ------") print("min: ", request_min_out.eval(), " max: ", request_max_out.eval()) [rq_out, rq_min_out, rq_max_out] = gen_math_ops.requantize(q_out, min_out, max_out, request_min_out, request_max_out,
def _create_make_unique(inputs): """Replaces the lower bits of each element with iota. The iota is used to derive the index, and also serves the purpose to make each element unique to break ties. Args: inputs: A tensor with rank of 2 and dtype of tf.float32. [batch_size, original_size]. Returns: A tensor after element wise transformation, with dtype the same as inputs. [batch_size, original_size]. Raises: ValueError: If the rank of the input tensor does not equal 2. """ if inputs.shape.ndims != 2: raise ValueError("Input of top_k_with_unique must be rank-2 " "but got: %s" % inputs.shape) height = inputs.shape[0] width = inputs.shape[1] zeros = tf.zeros([height, width], dtype=tf.int32) # Count_mask is used to mask away the low order bits to ensure that every # element is distinct. log2_ceiling = int(math.ceil(math.log(int(width), 2))) next_power_of_two = 1 << log2_ceiling count_mask = ~(next_power_of_two - 1) count_mask_r0 = tf.constant(count_mask) count_mask_r2 = tf.fill([height, width], count_mask_r0) # Smallest_normal is the bit representation of the smallest positive normal # floating point number. The sign is zero, exponent is one, and the fraction # is zero. smallest_normal = 1 << 23 smallest_normal_r0 = tf.constant(smallest_normal, dtype=tf.int32) smallest_normal_r2 = tf.fill([height, width], smallest_normal_r0) # Low_bit_mask is used to mask away the sign bit when computing the absolute # value. low_bit_mask = ~(1 << 31) low_bit_mask_r0 = tf.constant(low_bit_mask, dtype=tf.int32) low_bit_mask_r2 = tf.fill([height, width], low_bit_mask_r0) iota = tf.tile(tf.expand_dims(tf.range(width, dtype=tf.int32), 0), [height, 1]) # Compare the absolute value with positive zero to handle negative zero. input_r2 = tf.bitcast(inputs, tf.int32) abs_r2 = tf.bitwise.bitwise_and(input_r2, low_bit_mask_r2) if_zero_r2 = tf.equal(abs_r2, zeros) smallest_normal_preserving_sign_r2 = tf.bitwise.bitwise_or( input_r2, smallest_normal_r2) input_no_zeros_r2 = tf.where( if_zero_r2, smallest_normal_preserving_sign_r2, input_r2) # Discard the low-order bits and replace with iota. and_r2 = tf.bitwise.bitwise_and(input_no_zeros_r2, count_mask_r2) or_r2 = tf.bitwise.bitwise_or(and_r2, iota) return tf.bitcast(or_r2, tf.float32)
def _dequantize(q, params): """Dequantize q according to params.""" if not params.quantize: return q return tf.to_float(tf.bitcast(q, tf.int16)) * params.quantization_scale
def decompress(self, tensor_compressed, ctx): def decode_byte(encoded, real_size): # input: encoded uint8 type tensor # output: int32 type tensor with values in range 0,1,2,3 (2'b00,2'b01,2'b10,3'b11) a = tf.cast(encoded, tf.int32) a_split1 = tf.math.mod(a, 4) a_split2 = tf.cast(tf.math.mod(a / 4, 4), tf.int32) a_split3 = tf.cast(tf.math.mod(a / 16, 4), tf.int32) a_split4 = tf.cast(tf.math.mod(a / 64, 4), tf.int32) a = tf.concat([a_split1, a_split2, a_split3, a_split4], 0) a = a[:real_size] return a v_32bit, v_16bit, v_8bit, mask_encode = tensor_compressed tensor_shape = ctx tensor_size = tf.math.reduce_prod(tensor_shape) # decode mask and gather indices mask_decode = decode_byte(mask_encode, tensor_size) mask_32bit = tf.equal(mask_decode, 3) mask_16bit = tf.equal(mask_decode, 2) mask_8bit = tf.equal(mask_decode, 1) indices_32bit = tf.reshape(tf.where(mask_32bit), [-1]) indices_16bit = tf.reshape(tf.where(mask_16bit), [-1]) indices_8bit = tf.reshape(tf.where(mask_8bit), [-1]) edges_16bit = tf.constant([ 0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536 ], dtype=tf.int32) edges_8bit = tf.constant([0, 2, 4, 8, 16, 32, 64, 128, 256], dtype=tf.int32) import tensorflow_probability as tfp # 16bit decompress # get the sign bit s_16bit and remove MSB from v_16bit s_16bit = tf.bitwise.bitwise_and(v_16bit, 0b1000000000000000) s_16bit = tf.cast(s_16bit, dtype=tf.int32) s_16bit = tf.bitwise.left_shift(s_16bit, 16) v_16bit = tf.bitwise.left_shift(v_16bit, 1) # find the marker bit in v_16bit and get the exponent n_shift = 16 - tfp.stats.find_bins(tf.cast(v_16bit, dtype=tf.int32), edges_16bit) e_16bit = 127 - (n_shift - 1) e_16bit = tf.bitwise.left_shift(e_16bit, 23) # restore the mantissa n_shift = tf.cast(n_shift, dtype=tf.uint16) v_16bit = tf.bitwise.left_shift(v_16bit, n_shift) v_16bit = tf.cast(v_16bit, dtype=tf.int32) m_16bit = tf.bitwise.left_shift(v_16bit, 7) # concat all temp = tf.bitwise.bitwise_or(s_16bit, e_16bit) v_16bit = tf.bitwise.bitwise_or(temp, m_16bit) v_16bit = tf.bitcast(v_16bit, tf.float32) # 8bit decompress # get the sign bit s_8bit and remove MSB from v_8bit s_8bit = tf.bitwise.bitwise_and(v_8bit, 0b10000000) s_8bit = tf.cast(s_8bit, dtype=tf.int32) s_8bit = tf.bitwise.left_shift(s_8bit, 24) v_8bit = tf.bitwise.left_shift(v_8bit, 1) # find the marker bit in v_8bit and get the exponent n_shift = 8 - tfp.stats.find_bins(tf.cast(v_8bit, dtype=tf.int32), edges_8bit) e_8bit = 127 - (n_shift - 1) e_8bit = tf.bitwise.left_shift(e_8bit, 23) # restore the mantissa n_shift = tf.cast(n_shift, dtype=tf.uint8) v_8bit = tf.bitwise.left_shift(v_8bit, n_shift) v_8bit = tf.cast(v_8bit, dtype=tf.int32) m_8bit = tf.bitwise.left_shift(v_8bit, 15) # concat all temp = tf.bitwise.bitwise_or(s_8bit, e_8bit) v_8bit = tf.bitwise.bitwise_or(temp, m_8bit) v_8bit = tf.bitcast(v_8bit, tf.float32) indices = tf.expand_dims(indices_32bit, 1) temp32 = tf.scatter_nd(indices, v_32bit, [tensor_size]) indices = tf.expand_dims(indices_16bit, 1) temp16 = tf.tensor_scatter_nd_update(temp32, indices, v_16bit) indices = tf.expand_dims(indices_8bit, 1) tensor_decompressed = tf.tensor_scatter_nd_update( temp16, indices, v_8bit) tensor_decompressed = tf.reshape(tensor_decompressed, tensor_shape) return tensor_decompressed
def parse_into_ann_input_inference(max_boards, convert_to_nhwc=False): """ NOTES: 1) If a constant/operation is typed in a confusing manor, it's so the entirely of this can be done on GPU """ possible_lookup_nums = np.arange(2 ** 16, dtype=np.uint16) num_bits = popcount(possible_lookup_nums.astype(np.uint64)) location_lookup_ary = np.array([[[chess.square_rank(loc), chess.square_file(loc)] for loc in chess.SQUARES_180]], np.int32) location_lookup_ary = np.ones([max_boards, 1, 1], np.int32) * location_lookup_ary location_lookup_ary = location_lookup_ary.reshape([max_boards, 8, 8, 2])[:, ::-1] location_lookup_ary = location_lookup_ary.reshape([max_boards, 4, 16, 2]) mask_getter = lambda n: np.unpackbits(np.frombuffer(n, dtype=np.uint8)[::-1])[::-1] masks_to_gather_ary = np.array(list(map(mask_getter, possible_lookup_nums)), dtype=np.bool_) pieces_from_nums = lambda n: [n >> 4, (n & np.uint8(0x0F))] piece_lookup_ary = np.array(list(map(pieces_from_nums, possible_lookup_nums)), dtype=np.int32) range_repeater = numpy_style_repeat_1d_creator(max_multiple=33, max_to_repeat=max_boards, out_type=tf.int64) popcount_lookup = tf.constant(num_bits, tf.int64) locations_for_masking = tf.constant(location_lookup_ary, tf.int64) occupancy_mask_table = tf.constant(masks_to_gather_ary, tf.half) piece_lookup_table = tf.constant(piece_lookup_ary, tf.int64) ones_to_slice = tf.constant(np.ones(33 * max_boards), dtype=tf.float32) # This is used since there seems to be no simple/efficient way to broadcast for scatter_nd piece_indicators = tf.placeholder(tf.int32, shape=[None], name="piece_filters") #Given as an array of uint8s occupied_bbs = tf.placeholder(tf.int64, shape=[None], name="occupied_bbs") #Given as an array of uint64s # The code below this comment defines ops which are run during inference occupied_bitcasted = tf.cast(tf.bitcast(occupied_bbs, tf.uint16), dtype=tf.int32) partial_popcounts = tf.gather(popcount_lookup, occupied_bitcasted, "byte_popcount_loopkup") partial_popcounts = tf.cast(partial_popcounts, tf.int32) occupied_popcounts = tf.reduce_sum(partial_popcounts, axis=-1, name="popcount_lookup_sum") location_mask = tf.gather(occupancy_mask_table, occupied_bitcasted, "gather_location_mask") location_mask = tf.cast(location_mask, tf.bool) piece_coords = tf.boolean_mask(locations_for_masking, location_mask, "mask_desired_locations") gathered_pieces = tf.gather(piece_lookup_table, piece_indicators, "gather_pieces") piece_filter_indices = tf.reshape(gathered_pieces, [-1, 1]) repeated_board_numbers = range_repeater(occupied_popcounts) board_numbers_for_concat = tf.expand_dims(repeated_board_numbers, -1) # Removes either the last piece filter, or no filters (based on if the number of filters was odd and half of the final uint8 was padding) piece_filter_indices = piece_filter_indices[:tf.shape(board_numbers_for_concat)[0]] one_indices = tf.concat([board_numbers_for_concat, piece_filter_indices, piece_coords], axis=-1) #Should figure out how this can be done with (or similarly to) tf.parallel_stack boards = tf.scatter_nd( indices=one_indices, updates=ones_to_slice[:tf.shape(one_indices)[0]], shape=[tf.shape(occupied_bbs, out_type=tf.int64)[0], 15, 8, 8]) if convert_to_nhwc: boards = tf.transpose(boards, [0,2,3,1]) return (piece_indicators, occupied_bbs), boards
def topk_mask_internal(score, k): """Efficient implementation of topk_mask for TPUs. This is a more efficient implementation of the following snippet with support for higher rank tensors. It has the limitation that it only supports float32 as element type. The mask may contain more than k elements if other elements have the same value as the kth largest. The implementation binary searches for the kth value along each row of the input and once the kth value is found it creates the mask via a single select instruction. This approach is more than 100x faster on TPUs for large inputs compared with the above snippet. Args: score: 1-D or higher Tensor with last dimension at least k. k: Number of top elements to look for along the last dimension (along each row for matrices). """ def larger_count(data, limit): """Number of elements larger than limit along the most minor dimension. Args: data: Rn tensor with the data to compare. limit: Rn tensor with last dimension being 1 and rest of the dimensions being same as for data. Returns: Rn tensor with same shape as limit and int32 as element type containing the number of elements larger then limit inside data. """ return tf.reduce_sum(tf.cast(data > tf.broadcast_to(limit, data.shape), tf.int32), axis=-1, keepdims=True) # Predicate specifying if the kth value is negative or positive. kth_negative = (larger_count(score, 0.0) < k) # Value of the sign bit for each row. limit_sign = tf.where(kth_negative, tf.broadcast_to(1, kth_negative.shape), tf.broadcast_to(0, kth_negative.shape)) # Initial value for the binary search with the sign bit set. next_value = tf.bitwise.left_shift(limit_sign, 31) def cond(bit_index, _): return bit_index >= 0 def body(bit_index, value): """Body for the while loop executing the binary search. Args: bit_index: Index of the bit to be updated next. value: Current value of the binary search separator. Stored as an int32 but bitcasted to a float32 for comparison. Returns: The updated value of bit_index and value """ # Calculate new value via `new_value = value | (1 << bit_index)` new_value = tf.bitwise.bitwise_or(value, tf.bitwise.left_shift(1, bit_index)) # Calculate number of values larger than new_value larger = larger_count(score, tf.bitcast(new_value, tf.float32)) # Update next_value based on new_value. For positive numbers new_value is # larger than value while for negative numbers it is the other way around. next_value = tf.where(tf.logical_xor(larger >= k, kth_negative), new_value, value) return bit_index - 1, next_value # Executes a binary search for the value of the limits. We run the loop 31 # times to calculate the 31 bits of the float32 value (the sign is calculated # separately). _, limit = tf.while_loop(cond, body, (30, next_value)) # Create a mask by comparing the individual values to the kth value and then # selecting zero or one accordingly. return tf.where( score >= tf.broadcast_to(tf.bitcast(limit, tf.float32), score.shape), tf.ones(score.shape), tf.zeros(score.shape))
def input_pipeline(data_file_pattern, capacity, mode): """Input pipeline, returns a dictionary of tensors from queues.""" # Read from image TFRecords if the file has "image" in its name. if data_file_pattern and "image" in data_file_pattern: data_fields = { "image/encoded": tf.FixedLenFeature((), tf.string), "image/format": tf.FixedLenFeature((), tf.string), "image/class/label": tf.VarLenFeature(tf.int64) } data_items_to_decoders = { "inputs": tf.contrib.slim.tfexample_decoder.Image( image_key="image/encoded", format_key="image/format", channels=1 if "mnist" in data_file_pattern else 3), "targets": tf.contrib.slim.tfexample_decoder.Tensor("image/class/label"), } elif data_file_pattern and "audio" in data_file_pattern: data_type = tf.int64 if "timit" in data_file_pattern else tf.float32 data_fields = { "inputs": tf.VarLenFeature(data_type), "audio/sample_count": tf.FixedLenFeature((), tf.int64), "audio/sample_width": tf.FixedLenFeature((), tf.int64), "targets": tf.VarLenFeature(tf.int64), } data_items_to_decoders = None else: data_fields = { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) } data_items_to_decoders = None # Create placeholders for input, rather than reading data from disk. if data_file_pattern is None: feature_map = {} for (field, tp) in data_fields: if field != "targets": feature_map[field] = tf.placeholder( dtype=tp, shape=[None] * 4, name=field) return feature_map # Now the non-trivial case construction. examples = examples_queue( [data_file_pattern], data_fields, training=(mode == tf.contrib.learn.ModeKeys.TRAIN), capacity=capacity, data_items_to_decoders=data_items_to_decoders) if "image" in data_file_pattern: # Small single-example pre-processing for images. examples["inputs"] = tf.cast(examples["inputs"], tf.int64) if ("image_imagenet" in data_file_pattern or "image_mscoco" in data_file_pattern): # For imagnet/coco, resize images to 299x299 as is standard. def resize(img): return tf.to_int64(tf.image.resize_images(img, [299, 299])) def preprocess(img): img = tf.image.resize_images(img, [360, 360]) img = common_layers.image_augmentation(tf.to_float(img) / 255.) return tf.to_int64(img * 255.) inputs = examples["inputs"] if mode == tf.contrib.learn.ModeKeys.TRAIN: examples["inputs"] = tf.cond( # Preprocess 80% of the time. tf.less(tf.random_uniform([]), 0.8), lambda img=inputs: preprocess(img), lambda img=inputs: resize(img)) else: examples["inputs"] = tf.to_int64(resize(inputs)) elif "audio" in data_file_pattern: # Reshape audio to proper shape sample_count = tf.to_int32(examples.pop("audio/sample_count")) sample_width = tf.to_int32(examples.pop("audio/sample_width")) channel_count = 1 examples["inputs"] = tf.reshape(examples["inputs"], [sample_count, sample_width, channel_count]) if "wsj" in data_file_pattern: examples["inputs"] = tf.bitcast(examples["inputs"], tf.int32) elif "a2q_20161229" in data_file_pattern: # we forgot the EOS when we preprocessed this data. examples["targets"] = tf.concat([examples["targets"], [1]], 0) # We do not want int64s as they do are not supported on GPUs. return {k: tf.to_int32(v) for (k, v) in six.iteritems(examples)}
def get_board_data(): """ IMPORTANT NOTES: 1) Verify that accepting the uint8's as int32 is the best way to do this, casting is relatively fast so doing that wouldn't be a huge deal. """ piece_bbs = tf.placeholder(tf.int64, shape=[None, 1, 5], name="piece_bbs") color_occupied_bbs = tf.placeholder(tf.int64, shape=[None, 2, 1], name="color_occupied_bbs") ep_squares = tf.placeholder(tf.int32, shape=[None], name="ep_squares") castling_lookup_indices =tf.placeholder(tf.int32,shape=[None],name="castling_lookup_indices") kings = tf.placeholder(tf.int32, shape=[None, 2], name="kings") #[,,,[white_king_square,black_king_square],,,] ep_lookup_array = np.stack([np.unpackbits( np.array([1 << sq],dtype=np.uint64).view(np.uint8)).reshape(8,8,1).astype(np.float32) if sq != 0 else np.zeros((8,8,1), dtype=np.float32) for sq in range(64)]) ep_lookup_table = tf.constant(ep_lookup_array[...,::-1, :]) kings_table = np.zeros([64,64,64,2],dtype=np.float32) aranged = np.arange(64) kings_table[aranged,:,aranged,0] = 1 kings_table[:,aranged,aranged,1] = 1 kings_table = kings_table.reshape([64,64,8,8,2]) kings_lookup_table = tf.constant(kings_table, dtype=tf.float32) king_features = tf.gather_nd(kings_lookup_table, kings) castling_lookup_array = np.zeros((2**4, 8,8,2),dtype=np.bool_) possible_castling_square_array = np.array([bf.BB_A1, bf.BB_H1, bf.BB_A8, bf.BB_H8],dtype=np.uint64) castling_lookup_array[:, [0, 0, 7, 7], [0, 7, 0, 7], [0, 0, 1, 1]] = np.expand_dims(bf.POSSIBLE_CASTLING_RIGHTS.view(np.uint64), 1) & possible_castling_square_array != 0 castling_lookup_table = tf.constant(castling_lookup_array,dtype=tf.float32) castling_features = tf.gather(castling_lookup_table, castling_lookup_indices) ep_bitboards = tf.gather(ep_lookup_table, ep_squares) color_specific_piece_info = tf.bitwise.bitwise_and(color_occupied_bbs, piece_bbs) reshaped_color_specific_info = tf.reshape(color_specific_piece_info, [-1, 10]) the_bytes = tf.cast(tf.bitcast(reshaped_color_specific_info, tf.uint8), dtype=tf.int32) float_bool_masks = tf.constant( [np.unpackbits(num)[::-1].tolist() for num in np.arange(2 ** 8, dtype=np.uint8)], dtype=tf.float32) non_lookup_data = tf.gather(float_bool_masks, the_bytes) properly_arranged_non_lookup_data = tf.transpose(non_lookup_data, perm=[0, 2, 3, 1]) full_data = tf.concat([ ep_bitboards, king_features[...,0:1], properly_arranged_non_lookup_data[...,:5], castling_features[...,0:1], king_features[...,1:2], properly_arranged_non_lookup_data[...,5:], castling_features[...,1:2]], 3) # The below line of code will be used instead of the code above when inputs are eventually desired in that way # full_data = tf.concat([ep_bitboards, king_features, castling_features, properly_arranged_non_lookup_data], 3) return (piece_bbs, color_occupied_bbs, ep_squares, castling_lookup_indices, kings), full_data
# -*- coding: utf-8 -*- #!/usr/bin/python import tensorflow as tf import numpy as np import tfutil x = 37.0 const1 = tf.constant(x) print(const1) print(tf.shape(const1)) tfutil.print_constant(const1) bc_const1 = tf.bitcast(const1, tf.int32) print(bc_const1) print(tf.shape(bc_const1)) tfutil.print_operation_value(bc_const1) x = -1 invert_bits = tf.constant(x) - bc_const1 print(invert_bits) print(tf.shape(invert_bits)) tfutil.print_operation_value(invert_bits) bc_to_float = tf.bitcast(invert_bits, tf.float32) print(bc_to_float) print(tf.shape(bc_to_float)) tfutil.print_operation_value(bc_to_float)
tf_input = tf_sess.graph.get_tensor_by_name(input_names[0] + ':0') tf_scores = tf_sess.graph.get_tensor_by_name('detection_scores:0') tf_boxes = tf_sess.graph.get_tensor_by_name('detection_boxes:0') tf_classes = tf_sess.graph.get_tensor_by_name('detection_classes:0') tf_num_detections = tf_sess.graph.get_tensor_by_name('num_detections:0') IMAGE_PATH = "./images/kaffee-croissant-small.jpg" filenames = [IMAGE_PATH] filename_queue = tf.train.string_input_producer(filenames) reader = tf.WholeFileReader() key, value = reader.read(filename_queue) image = tf.image.decode_jpeg(value, channels=3) image2 = tf.image.resize(image, (300, 300)) image_resized = tf.bitcast(tf.cast(image2, dtype=tf.int8), tf.uint8) scores, boxes, classes, num_detections = tf_sess.run( [tf_scores, tf_boxes, tf_classes, tf_num_detections], feed_dict={tf_input: image_resized[None, ...]}) boxes = boxes[0] # index by 0 to remove batch dimension scores = scores[0] classes = classes[0] num_detections = int(num_detections[0]) # Boxes unit in pixels (image coordinates). boxes_pixels = [] for i in range(num_detections): # scale box to image coordinates box = boxes[i] * np.array([ image_resized.shape[0], image_resized.shape[1], image_resized.shape[0],