def check_cam_coherence(path): """Check the coherence of a camera path.""" cam_gt = path + 'cam0_gt.visim' cam_render = path + 'cam0.render' lines = tf.string_split([tf.read_file(cam_render)], '\n').values lines = lines[3:] lines = tf.strided_slice(lines, [0], [lines.shape_as_list()[0]], [2]) fields = tf.reshape(tf.string_split(lines, ' ').values, [-1, 10]) timestamp_from_render, numbers = tf.split(fields, [1, 9], -1) numbers = tf.strings.to_number(numbers) eye, lookat, up = tf.split(numbers, [3, 3, 3], -1) up_vector = tf.nn.l2_normalize(up - eye) lookat_vector = tf.nn.l2_normalize(lookat - eye) rotation_from_lookat = lookat_matrix(up_vector, lookat_vector) lines = tf.string_split([tf.read_file(cam_gt)], '\n').values lines = lines[1:] fields = tf.reshape(tf.string_split(lines, ',').values, [-1, 8]) timestamp_from_gt, numbers = tf.split(fields, [1, 7], -1) numbers = tf.strings.to_number(numbers) position, quaternion = tf.split(numbers, [3, 4], -1) rotation_from_quaternion = from_quaternion(quaternion) assert tf.reduce_all(tf.equal(timestamp_from_render, timestamp_from_gt)) assert tf.reduce_all(tf.equal(eye, position)) so3_diff = (tf.trace( tf.matmul(rotation_from_lookat, rotation_from_quaternion, transpose_a=True)) - 1) / 2 tf.assert_near(so3_diff, tf.ones_like(so3_diff))
def read_data(data_queue): # note : read one training data : pixel range : [0, 255] in_img = tf.image.decode_image(tf.read_file(data_queue[0]), channels=channels) gt_img = tf.image.decode_image(tf.read_file(data_queue[1]), channels=channels) def preprocessing(input): proc = tf.cast(input, tf.float32) proc.set_shape([IMG_WIDTH, IMG_HEIGH, channels]) # normalization proc = proc / 127.5 - 1 return proc # output pixel's range : [-1, 1] in_imgproc = preprocessing(in_img) gt_imgproc = preprocessing(gt_img) if False: offset = tf.cast(tf.floor( tf.random_uniform([2], 0, IMG_WIDTH - CROP_SIZE + 1)), dtype=tf.int32) in_imgproc = tf.image.crop_to_bounding_box(in_imgproc, offset[0], offset[1], CROP_SIZE, CROP_SIZE) gt_imgproc = tf.image.crop_to_bounding_box(gt_imgproc, offset[0], offset[1], CROP_SIZE, CROP_SIZE) return in_imgproc, gt_imgproc
def _parse_apply_preprocessing(self, input_queue): # Apply pre-processing to the image labels too (which are images for semantic segmentation), then convert them # back to binary masks if they were resized images = self._parse_preprocess_images(tf.read_file(input_queue[0]), channels=self._image_depth) labels = self._parse_preprocess_images(tf.read_file(input_queue[1]), channels=1) if self._resize_images: labels = tf.reduce_mean(labels, axis=2, keepdims=True) return images, labels
def _parse(self, image_blur, image_sharp): image_blur = tf.read_file(image_blur) image_sharp = tf.read_file(image_sharp) image_blur = tf.image.decode_png(image_blur, channels=self.channel) image_sharp = tf.image.decode_png(image_sharp, channels=self.channel) image_blur = tf.cast(image_blur, tf.float32) image_sharp = tf.cast(image_sharp, tf.float32) return image_blur, image_sharp
def _parse_Blur_only(self, image_blur): image_blur = tf.read_file(image_blur) image_blur = tf.image.decode_image(image_blur, channels=self.channel) image_blur = tf.cast(image_blur, tf.float32) return image_blur
def read_image(filename): image_string = tf.read_file(filename) image = tf.image.decode_jpeg(image_string, channels=3) # convert to float values in [0, 1] image = tf.image.convert_image_dtype(image, tf.float32) image = tf.clip_by_value(image, 0.0, 1.0) return image
def _parse_function(filename, label): image_string = tf.read_file(filename) preprocess_fn = self.get_preprocess_fn() image_decoded = preprocess_fn( image_string, is_training, image_size=self.image_size) image = tf.cast(image_decoded, tf.float32) return image, label
def read_png(filename): """Loads a PNG image file.""" string = tf.read_file(filename) image = tf.image.decode_image(string, channels=3) image = tf.cast(image, tf.float32) image /= 255 return image
def load_depth(filename, shape): """Load the 16-bit png depth map in milimeters given the filename.""" depth = tf.image.decode_png(tf.read_file(filename), 3, tf.dtypes.uint16) depth = tf.cast(depth, tf.float32) / 1000 depth.set_shape(shape) return depth
def create_input_pipeline(input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder): images_and_labels_list = [] for _ in range(nrof_preprocess_threads): filenames, label, control = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, 3) image = tf.cond(get_control_flag(control[0], RANDOM_ROTATE), lambda: tf.py_func(random_rotate_image, [image], tf.uint8), lambda: tf.identity(image)) image = tf.cond(get_control_flag(control[0], RANDOM_CROP), lambda: tf.random_crop(image, image_size + (3,)), lambda: tf.image.resize_image_with_crop_or_pad(image, image_size[0], image_size[1])) image = tf.cond(get_control_flag(control[0], RANDOM_FLIP), lambda: tf.image.random_flip_left_right(image), lambda: tf.identity(image)) image = tf.cond(get_control_flag(control[0], FIXED_STANDARDIZATION), lambda: (tf.cast(image, tf.float32) - 127.5) / 128.0, lambda: tf.image.per_image_standardization(image)) image = tf.cond(get_control_flag(control[0], FLIP), lambda: tf.image.flip_left_right(image), lambda: tf.identity(image)) # pylint: disable=no-member image.set_shape(image_size + (3,)) images.append(image) images_and_labels_list.append([images, label]) image_batch, label_batch = tf.train.batch_join( images_and_labels_list, batch_size=batch_size_placeholder, shapes=[image_size + (3,), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * 100, allow_smaller_final_batch=True) return image_batch, label_batch
def load_surface_normal(filename, shape): """Load the surface normal given the filename.""" normal = tf.image.decode_png(tf.read_file(filename), 3, tf.dtypes.uint16) normal = 2 * tf.cast(normal, tf.float32) / (2**16 - 1) - 1 normal.set_shape(shape) return normal
def example(movieId): # with summary_graph.as_default() as graph: # Getting poster movieId = tf_v1.Print(movieId, [movieId]) image_string = tf_v1.read_file(poster_dir + os.sep + movieId + '.jpg') image_decoded = tf_v1.image.decode_jpeg(image_string, channels=3) # image_decoded = tf_v1.image.resize_image_with_crop_or_pad(image_decoded, 256, 256) # image_decoded = tf_v2.expand_dims(image_decoded, 0) # print("Poster shape debug:") # print(image_decoded.shape) image = tf_v1.cast(image_decoded, tf_v1.float32) # print(image.shape) image = tf_v1.image.resize(image, [64, 64], name="poster_resize") # 64 x 64 image with 3 channels # print(image.shape) poster = tf_v1.reshape(image, [64, 64, 3], name="poster_reshape") # Getting trailer trailer_path = trailer_dir + os.sep + movieId + '.npy' trailer_mat = tf_v1.py_func(tf_np_load, [trailer_path], tf_v1.float32) trailer_mat = tf_v1.reshape(trailer_mat, (240, 240, 3, 20)) trailer_frames = [] for i in range(20): image = trailer_mat[:, :, :, i] image = tf_v1.reshape(image, (240, 240, 3)) image = tf_v1.cast(image, tf_v1.float32) resized = tf_v1.image.resize(image, [64, 64]) trailer_frames.append(tf_v1.reshape(resized, [64, 64, 3])) return trailer_frames, poster
def decode_wav(wav): audio_bytes = tf.read_file(wav) waveform, _ = tf.audio.decode_wav(audio_bytes, desired_channels=1, desired_samples=num_samples) waveform = tf.reshape(waveform, (1, num_samples)) return waveform
def read_tensor_from_image_file(self, file_name, input_height=299, input_width=299, input_mean=0, input_std=255): input_name = "file_reader" output_name = "normalized" file_reader = tf.read_file(file_name, input_name) if file_name.endswith(".png"): image_reader = tf.image.decode_png(file_reader, channels=3, name='png_reader') elif file_name.endswith(".gif"): image_reader = tf.squeeze( tf.image.decode_gif(file_reader, name='gif_reader')) elif file_name.endswith(".bmp"): image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader') else: image_reader = tf.image.decode_jpeg(file_reader, channels=3, name='jpeg_reader') float_caster = tf.cast(image_reader, tf.float32) dims_expander = tf.expand_dims(float_caster, 0) resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width]) normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std]) sess = tf.Session() result = sess.run(normalized) return result
def make_summary_example(movieId, poster_dir, trailer_dir): # REMEMBER: Tensorflow doesn't read in 0-256. Instead, it rescales to 0-1 for you!! # That means the poster data is on the 0-1 scale!! image_string = tf_v1.read_file(poster_dir + os.sep + movieId + '.jpg') image_decoded = tf_v1.image.decode_jpeg(image_string, channels=3) image_decoded = tf_v1.image.convert_image_dtype(image_decoded, tf_v1.float32) #image_decoded = tf_v1.image.resize_image_with_crop_or_pad(image_decoded, 256, 256) poster = tf_v1.image.resize(image_decoded, [64, 64], name="poster_resize") # 64 x 64 image with 3 channels # Getting trailer trailer_path = trailer_dir + os.sep + movieId + '.npy' trailer_mat = tf_np_load(trailer_path) trailer_mat = tf_v1.reshape(trailer_mat, (240, 240, 3, 20)) # trailer_img_paths = [os.path.join(trailer_path, f) for f in os.listdir(trailer_path) if # os.path.isfile(os.path.join(trailer_path, f))] trailer_frames = [] for i in range(20): image = trailer_mat[:, :, :, i] image = tf_v1.reshape(image, (240, 240, 3)) image = tf_v1.cast(image, tf_v1.float32) resized = tf_v1.image.resize(image, [64, 64]) trailer_frames.append(tf_v1.reshape(resized, [64, 64, 3])) return trailer_frames, poster
def read_png(filename): """Loads a image file as float32 HxWx3 array; tested to work on png and jpg images.""" string = tf.read_file(filename) image = tf.image.decode_image(string, channels=3) image = tf.cast(image, tf.float32) image /= 255 return image
def read_camera_parameters(path, n_timestamp, parallel_camera_process=10): """Read a camera's parameters.""" # parse the lines lines = tf.string_split([tf.read_file(path)], '\n').values # ignore the header lines = lines[6:] # parse the columns fields = tf.reshape(tf.string_split(lines, ' ').values, [-1, 15]) # convert string to float32 fields = tf.strings.to_number(fields) # <camera info: f, cx, cy, dist.coeff[0],dist.coeff[1],dist.coeff[2]> # <orientation: w,x,y,z> <position: x,y,z> <image resolution: width, height> camera_info, orientation, position, resolution = tf.split( fields, [6, 4, 3, 2], -1) camera_ds = tf.data.Dataset.from_tensor_slices( (camera_info, orientation, position, resolution)) def process_camera_parameters(camera_info, orientation, position, resolution): # convert quaternion to 3x3 matrix rotation_matrix = from_quaternion(orientation) # 3x4 pose matrix [R_3x3 |t_3x1] pose_matrix = tf.concat( [rotation_matrix, tf.expand_dims(position, -1)], -1) intrinsic_matrix = build_intrinsic_matrix(camera_info[0], camera_info[1], camera_info[2]) return (pose_matrix, intrinsic_matrix, resolution) return dataset_to_tensors(camera_ds, capacity=n_timestamp, map_fn=process_camera_parameters, parallelism=parallel_camera_process)
def tf_ops(self, capacity=32): im, label = tf.train.slice_input_producer( [tf.constant(self.images), tf.constant(self.labels)], capacity=capacity, shuffle=True) im = tf.read_file(im) im = tf.image.decode_image(im, channels=3) return im, label
def read_and_decode(self, filename_queue): img1_name = tf.string_join([self.img_dir, '/', filename_queue[0]]) img2_name = tf.string_join([self.img_dir, '/', filename_queue[1]]) img3_name = tf.string_join([self.img_dir, '/', filename_queue[2]]) img4_name = tf.string_join([self.img_dir, '/', filename_queue[3]]) img5_name = tf.string_join([self.img_dir, '/', filename_queue[4]]) img1 = tf.image.decode_png(tf.read_file(img1_name), channels=3) img1 = tf.cast(img1, tf.float32) img2 = tf.image.decode_png(tf.read_file(img2_name), channels=3) img2 = tf.cast(img2, tf.float32) img3 = tf.image.decode_png(tf.read_file(img3_name), channels=3) img3 = tf.cast(img3, tf.float32) img4 = tf.image.decode_png(tf.read_file(img4_name), channels=3) img4 = tf.cast(img4, tf.float32) img5 = tf.image.decode_png(tf.read_file(img5_name), channels=3) img5 = tf.cast(img5, tf.float32) return img1, img2, img3, img4, img5
def decode_wav(wav): audio_bytes = tf.read_file(wav) #相当于open函数 waveform, _ = tf.audio.decode_wav(audio_bytes, desired_channels=1, desired_samples=num_samples) #Decode a 16-bit PCM WAV file to a float tensor. waveform = tf.reshape(waveform, (1, num_samples)) return waveform
def read_png(filename): """Loads a PNG image file.""" string = tf.read_file(filename) image = tf.image.decode_image(string, channels=1) image = tf.cast(image, tf.float32) image = tf.reshape(image, (28, 28, 1)) image /= 255 return tf.random.normal((32, 32, 3))
def _preprocess_inference(image_path, label, resize=(32, 32)): # Preprocess individual images during inference image_path = tf.squeeze(image_path) image = tf.image.decode_png(tf.read_file(image_path)) image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = tf.image.per_image_standardization(image) image = tf.image.resize_images(image, size=resize) return image, label
def file_to_tensor(file_path): image_string = tf.read_file(file_path) image = tf.image.decode_image(image_string, channels=3) image.set_shape([None, None, None]) image = tf.image.resize_images(image, [image_size, image_size]) image = tf.divide(tf.subtract(image, [0]), [255]) image.set_shape([image_size, image_size, num_channel]) return image
def decode_wav(wav): audio_bytes = tf.read_file(wav) waveform, _ = tf.audio.decode_wav(audio_bytes, desired_samples=max_utt_length) waveform = tf.transpose(waveform) num_read_mics = tf.shape(waveform)[0] waveform = tf.cond( num_read_mics >= num_mics, lambda: waveform[:num_mics, :], lambda: _pad_mics_tf(waveform, num_mics - num_read_mics)) waveform = tf.reshape(waveform, (num_mics, max_utt_length)) return waveform
def read_images_from_disk(input_queue): """Consumes a single filename and label as a ' '-delimited string. Args: filename_and_label_tensor: A scalar string tensor. Returns: Two tensors: the decoded image, and the string label. """ label = input_queue[1] file_contents = tf.read_file(input_queue[0]) example = tf.image.decode_png(file_contents, channels=3) return example, label
def read_image_from_disk(filename_to_label_tuple): """ Consumes input tensor and loads image :param filename_to_label_tuple: :type filename_to_label_tuple: list :return: tuple of image and label """ label = filename_to_label_tuple[1] file_contents = tf.read_file(filename_to_label_tuple[0]) example = tf.image.decode_jpeg(file_contents, channels=3) return example, label
def main(m_path, out_dir, light=False, test_out=True): logger = get_logger("tf1_export", debug=test_out) g = Generator(light=light) t = tf.placeholder(tf.string, []) x = tf.expand_dims(tf.image.decode_jpeg(tf.read_file(t), channels=3), 0) x = (tf.cast(x, tf.float32) / 127.5) - 1 x = g(x, training=False) out = tf.cast((tf.squeeze(x, 0) + 1) * 127.5, tf.uint8) in_name, out_name = t.op.name, out.op.name try: with tf.Session() as sess: sess.run(tf.global_variables_initializer()) g.load_weights(tf.train.latest_checkpoint(m_path)) in_graph_def = tf.get_default_graph().as_graph_def() out_graph_def = tf.graph_util.convert_variables_to_constants( sess, in_graph_def, [out_name]) tf.reset_default_graph() tf.import_graph_def(out_graph_def, name='') except ValueError: logger.error("Failed to load specified weight.") logger.error( "If you trained your model with --light, " "consider adding --light when executing this script; otherwise, " "do not add --light when executing this script.") exit(1) makedirs(out_dir) m_cnt = 0 bpath = 'optimized_graph_light' if light else 'optimized_graph' out_path = os.path.join(out_dir, f'{bpath}_{m_cnt:04d}.pb') while os.path.exists(out_path): m_cnt += 1 out_path = os.path.join(out_dir, f'{bpath}_{m_cnt:04d}.pb') with tf.gfile.GFile(out_path, 'wb') as f: f.write(out_graph_def.SerializeToString()) if test_out: with tf.Graph().as_default(): gd = tf.GraphDef() with tf.gfile.GFile(out_path, 'rb') as f: gd.ParseFromString(f.read()) tf.import_graph_def(gd, name='') tf.get_default_graph().finalize() t = tf.get_default_graph().get_tensor_by_name(f"{in_name}:0") out = tf.get_default_graph().get_tensor_by_name(f"{out_name}:0") from time import time start = time() with tf.Session() as sess: img = Image.fromarray( sess.run(out, {t: "input_images/temple.jpg"})) img.show() elapsed = time() - start logger.debug(f"{elapsed} sec per img") logger.info(f"successfully exported ckpt to {out_path}") logger.info(f"input var name: {in_name}:0") logger.info(f"output var name: {out_name}:0")
def __readImages(self, filename): image_string = tf.read_file( filename) #Gets a string tensor from a file decodedInput = tf.image.decode_image( image_string) #Decode a string tensor as image floatInput = tf.image.convert_image_dtype( decodedInput, dtype=tf.float32) #Transform image to float32 assertion = tf.assert_equal(tf.shape(floatInput)[-1], 3, message="image does not have 3 channels") with tf.control_dependencies([assertion]): floatInput.set_shape([None, None, 3]) inputShape = floatInput.get_shape() if self.mode == "eval": #If the inputs are only the number of pictures declared blackTargets = tf.zeros([ self.inputImageSize, self.inputImageSize * self.nbTargetsToRead, 3 ]) floatInput = tf.concat([floatInput, blackTargets], axis=1) floatInputSplit = tf.split( floatInput, self.nbTargetsToRead + self.inputNumbers, axis=1, name="Split_input_data" ) #Splitted we get a list of nbTargets + inputNumbers images #Sets the inputs and outputs depending on the order of images if self.which_direction == "AtoB": inputs = floatInputSplit[:self.inputNumbers] targets = floatInputSplit[self.inputNumbers:] elif self.which_direction == "BtoA": inputs = floatInputSplit[self.inputNumbers:] targets = floatInputSplit[:self.inputNumbers] else: raise ValueError("Invalid direction") gammadInputs = inputs inputs = [tf.pow(input, 2.2) for input in inputs] #correct for the gamma #If we want to log the inputs, we do it here if self.logInput: inputs = [helpers.logTensor(input) for input in inputs] #The preprocess function puts the vectors value between [-1; 1] from [0;1] inputs = [helpers.preprocess(input) for input in inputs] #gammadInputs = [helpers.preprocess(gammadInput) for gammadInput in gammadInputs] targets = [helpers.preprocess(target) for target in targets] #We used to resize inputs and targets here, we have no functional need for it. Will see if there is a technical need to define the actual size. return filename, inputs, targets, gammadInputs
def findImages(input_path, output_path): image_contents = tf.read_file(input_path) image = tf.cast(tf.image.decode_png( image_contents, channels=cfg.FLAGS.input_image_channels), dtype=tf.float32) image = tf.image.resize_images( image, [cfg.FLAGS.inference_image_height, cfg.FLAGS.inference_image_width], method=tf.image.ResizeMethod.BILINEAR) return image, output_path
def _parse_function_inference(self, filename, label): one_hot = tf.one_hot(label, self.num_classes) img_string = tf.read_file(filename) img_decoded = tf.image.decode_png(img_string, channels=3) img_resized = tf.image.resize_images(img_decoded, [227, 227]) img_centered = tf.subtract(img_resized, IMAGENET_MEAN) img_bgr = img_centered[:, :, ::-1] return img_bgr, one_hot