def irevnet300( x, # pylint: disable=missing-docstring is_training, num_classes=1000, weight_decay=1e-4, activation_fn=tf.nn.relu, normalization_fn=batch_norm): unit = functools.partial(bottleneck_rev, simple=True) end_points = {} kernel_regularizer = l2_regularizer(scale=weight_decay) # Do inv. pooling first x = tf.space_to_depth(x, 2) layer_counts = [1, 6, 16, 72, 5] params = { 'activation_fn': activation_fn, 'normalization_fn': normalization_fn, 'training': is_training, 'kernel_regularizer': kernel_regularizer } for num_block, layer_count in enumerate(layer_counts): for _ in range(layer_count): x = unit(x, **params) end_points['block%i' % (num_block + 1)] = x if num_block < (len(layer_counts) - 1): x = tf.space_to_depth(x, 2) x = normalization_fn(x, training=is_training) end_points['last_invertible'] = x # Non-invertible part starts here x = activation_fn(x) x = tf.reduce_mean(x, axis=[1, 2], keepdims=True) end_points['pre_logits'] = tf.squeeze(x, [1, 2]) logits = tf.squeeze( tf.layers.conv2d(x, filters=num_classes, kernel_size=1, kernel_regularizer=kernel_regularizer), [1, 2]) end_points['logits'] = logits return logits, end_points
def build_graph(parameters): input_tensor = tf.compat.v1.placeholder( dtype=parameters["dtype"], name="input", shape=parameters["input_shape"]) out = tf.space_to_depth(input_tensor, block_size=parameters["block_size"]) return [input_tensor], [out]
def demosaic(bayer_images): """Bilinearly demosaics a batch of RGGB Bayer images.""" bayer_images.shape.assert_is_compatible_with((None, None, None, 4)) # This implementation exploits how edges are aligned when upsampling with # tf.image.resize_bilinear(). with tf.name_scope(None, 'demosaic'): shape = tf.shape(bayer_images) shape = [shape[1] * 2, shape[2] * 2] red = bayer_images[Ellipsis, 0:1] red = tf.image.resize_bilinear(red, shape) green_red = bayer_images[Ellipsis, 1:2] green_red = tf.image.flip_left_right(green_red) green_red = tf.image.resize_bilinear(green_red, shape) green_red = tf.image.flip_left_right(green_red) green_red = tf.space_to_depth(green_red, 2) green_blue = bayer_images[Ellipsis, 2:3] green_blue = tf.image.flip_up_down(green_blue) green_blue = tf.image.resize_bilinear(green_blue, shape) green_blue = tf.image.flip_up_down(green_blue) green_blue = tf.space_to_depth(green_blue, 2) green_at_red = (green_red[Ellipsis, 0] + green_blue[Ellipsis, 0]) / 2 green_at_green_red = green_red[Ellipsis, 1] green_at_green_blue = green_blue[Ellipsis, 2] green_at_blue = (green_red[Ellipsis, 3] + green_blue[Ellipsis, 3]) / 2 green_planes = [ green_at_red, green_at_green_red, green_at_green_blue, green_at_blue ] green = tf.depth_to_space(tf.stack(green_planes, axis=-1), 2) blue = bayer_images[Ellipsis, 3:4] blue = tf.image.flip_up_down(tf.image.flip_left_right(blue)) blue = tf.image.resize_bilinear(blue, shape) blue = tf.image.flip_up_down(tf.image.flip_left_right(blue)) rgb_images = tf.concat([red, green, blue], axis=-1) return rgb_images
def encoder(self, x): if self.bf16: x = tf.cast(x, tf.bfloat16) if self.stack_factor > 1: x = tf.space_to_depth(x, self.stack_factor) with tf.variable_scope("encoder"): for block, (stack, channels) in enumerate(self.convblocks): with tf.variable_scope(f"block_{block}"): for i in range(stack): with tf.variable_scope(f"layer_{i}"): if i == 0: # downsample x = self.conv2d(x, channels, (4, 4), (2, 2), padding="SAME", name=f"conv_downsample") else: # normal residual block def encoder_block(x, channels=channels): out = self.conv2d(x, channels, (3, 3), (1, 1), padding="SAME", name=f"conv_in") # out = self.norm(out, name=f"bn_in") out = self.activation(out, name=f"activ") out = self.conv2d(out, channels, (3, 3), (1, 1), padding="SAME", name=f"conv_out") # out = self.norm(out, name=f"bn_out") return out res_out = recompute_grad( encoder_block, self.bf16 )(x) if self.recompute_grad else encoder_block( x) x = x + res_out with tf.variable_scope(f"codebook"): self.n_hid = x.shape[-1] embedding = tf.get_variable("codebook", shape=[self.n_hid, self.num_tokens], dtype=tf.float32) if self.bf16: x = tf.cast(x, tf.float32) output = tf.matmul(x, embedding) return output
def position_sensitive_crop_regions(image, boxes, crop_size, num_spatial_bins, global_pool): """Position-sensitive crop and pool rectangular regions from a feature grid. The output crops are split into `spatial_bins_y` vertical bins and `spatial_bins_x` horizontal bins. For each intersection of a vertical and a horizontal bin the output values are gathered by performing `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of channels of the image. This reduces `depth` by a factor of `(spatial_bins_y * spatial_bins_x)`. When global_pool is True, this function implements a differentiable version of position-sensitive RoI pooling used in [R-FCN detection system](https://arxiv.org/abs/1605.06409). When global_pool is False, this function implements a differentiable version of position-sensitive assembling operation used in [instance FCN](https://arxiv.org/abs/1603.08678). Args: image: A `Tensor`. Must be one of the following types: `uint8`, `int8`, `int16`, `int32`, `int64`, `half`, `float32`, `float64`. A 3-D tensor of shape `[image_height, image_width, depth]`. Both `image_height` and `image_width` need to be positive. boxes: A `Tensor` of type `float32`. A 2-D tensor of shape `[num_boxes, 4]`. Each box is specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image height is mapped to `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in which case the sampled crop is an up-down flipped version of the original image. The width dimension is treated similarly. crop_size: A list of two integers `[crop_height, crop_width]`. All cropped image patches are resized to this size. The aspect ratio of the image content is not preserved. Both `crop_height` and `crop_width` need to be positive. num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`. Represents the number of position-sensitive bins in y and x directions. Both values should be >= 1. `crop_height` should be divisible by `spatial_bins_y`, and similarly for width. The number of image channels should be divisible by (spatial_bins_y * spatial_bins_x). Suggested value from R-FCN paper: [3, 3]. global_pool: A boolean variable. If True, we perform average global pooling on the features assembled from the position-sensitive score maps. If False, we keep the position-pooled features without global pooling over the spatial coordinates. Note that using global_pool=True is equivalent to but more efficient than running the function with global_pool=False and then performing global average pooling. Returns: position_sensitive_features: A 4-D tensor of shape `[num_boxes, K, K, crop_channels]`, where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`, where K = 1 when global_pool is True (Average-pooled cropped regions), and K = crop_size when global_pool is False. Raises: ValueError: Raised in four situations: `num_spatial_bins` is not >= 1; `num_spatial_bins` does not divide `crop_size`; `(spatial_bins_y*spatial_bins_x)` does not divide `depth`; `bin_crop_size` is not square when global_pool=False due to the constraint in function space_to_depth. """ total_bins = 1 bin_crop_size = [] for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size): if num_bins < 1: raise ValueError('num_spatial_bins should be >= 1') if crop_dim % num_bins != 0: raise ValueError('crop_size should be divisible by num_spatial_bins') total_bins *= num_bins bin_crop_size.append(crop_dim // num_bins) if not global_pool and bin_crop_size[0] != bin_crop_size[1]: raise ValueError('Only support square bin crop size for now.') ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1) spatial_bins_y, spatial_bins_x = num_spatial_bins # Split each box into spatial_bins_y * spatial_bins_x bins. position_sensitive_boxes = [] for bin_y in range(spatial_bins_y): step_y = (ymax - ymin) / spatial_bins_y for bin_x in range(spatial_bins_x): step_x = (xmax - xmin) / spatial_bins_x box_coordinates = [ymin + bin_y * step_y, xmin + bin_x * step_x, ymin + (bin_y + 1) * step_y, xmin + (bin_x + 1) * step_x, ] position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1)) image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=2) image_crops = [] for (split, box) in zip(image_splits, position_sensitive_boxes): if split.shape.is_fully_defined() and box.shape.is_fully_defined(): crop = tf.squeeze( matmul_crop_and_resize( tf.expand_dims(split, axis=0), tf.expand_dims(box, axis=0), bin_crop_size), axis=0) else: crop = tf.image.crop_and_resize( tf.expand_dims(split, 0), box, tf.zeros(tf.shape(boxes)[0], dtype=tf.int32), bin_crop_size) image_crops.append(crop) if global_pool: # Average over all bins. position_sensitive_features = tf.add_n(image_crops) / len(image_crops) # Then average over spatial positions within the bins. position_sensitive_features = tf.reduce_mean( position_sensitive_features, [1, 2], keepdims=True) else: # Reorder height/width to depth channel. block_size = bin_crop_size[0] if block_size >= 2: image_crops = [tf.space_to_depth( crop, block_size=block_size) for crop in image_crops] # Pack image_crops so that first dimension is for position-senstive boxes. position_sensitive_features = tf.stack(image_crops, axis=0) # Unroll the position-sensitive boxes to spatial positions. position_sensitive_features = tf.squeeze( tf.batch_to_space_nd(position_sensitive_features, block_shape=[1] + num_spatial_bins, crops=tf.zeros((3, 2), dtype=tf.int32)), axis=[0]) # Reorder back the depth channel. if block_size >= 2: position_sensitive_features = tf.depth_to_space( position_sensitive_features, block_size=block_size) return position_sensitive_features
def input_fn(params): """Input function which provides a single batch for train or eval.""" batch_size = params['batch_size'] index = params['dataset_index'] num_hosts = params['dataset_num_shards'] num_dataset_per_shard = max( 1, int( math.ceil(FLAGS.num_eval_images / FLAGS.eval_batch_size) * FLAGS.eval_batch_size / num_hosts)) padded_dataset = tf.data.Dataset.from_tensors( tf.constant( tf.train.Example( features=tf.train.Features( feature={ 'image/class/label': tf.train.Feature( int64_list=tf.train.Int64List(value=[-1])), 'image/encoded': tf.train.Feature( bytes_list=tf.train.BytesList( value=[str.encode('')])) })).SerializeToString(), dtype=tf.string)).repeat(num_dataset_per_shard) if data_dir is None: dataset = padded_dataset.repeat(batch_size * 100) else: file_pattern = os.path.join(data_dir, 'train-*' if is_training else 'validation-*') dataset = tf.data.Dataset.list_files(file_pattern, shuffle=False) dataset = dataset.shard(num_hosts, index) dataset = dataset.interleave(tf.data.TFRecordDataset, 64, 1, 64) if is_training: dataset = dataset.cache().shuffle(shuffle_size).repeat(100) else: dataset = dataset.concatenate(padded_dataset).take( num_dataset_per_shard) if cache_decoded_image and is_training: dataset = dataset.map(cached_parser, 64).repeat().map(crop_image, 64).batch(batch_size, True) else: dataset = dataset.map(dataset_parser, 64).batch(batch_size, True) if FLAGS.use_space_to_depth: dataset = dataset.map( lambda images, labels: (tf.space_to_depth(images, 2), labels), 64) # Transpose for performance on TPU if FLAGS.train_batch_size // FLAGS.num_replicas > 8: transpose_array = [1, 2, 3, 0] else: transpose_array = [1, 2, 0, 3] dataset = dataset.map( lambda imgs, labels: (tf.transpose(imgs, transpose_array), labels), 64) dataset = dataset.map(functools.partial(set_shapes, batch_size), 64) dataset = dataset.prefetch(10) options = tf.data.Options() options.experimental_deterministic = False options.experimental_threading.max_intra_op_parallelism = 1 options.experimental_threading.private_threadpool_size = 48 dataset = dataset.with_options(options) return dataset
def reorg(x, stride): return tf.space_to_depth(x, block_size=stride)
def inference( input_lr_dir, input_hr_dir, input_dir_len, num_resblock, vsr_scale, checkpoint_path, output_dir, output_pre, output_name, output_ext, ): """Main inference function.""" if checkpoint_path is None: raise ValueError('The checkpoint file is needed to performing the test.') # Declare the test data reader inference_data = inference_data_loader(input_lr_dir, input_hr_dir, input_dir_len) input_shape = [ 1, ] + list(inference_data.inputs[0].shape) output_shape = [1, input_shape[1] * vsr_scale, input_shape[2] * vsr_scale, 3] oh = input_shape[1] - input_shape[1] // 8 * 8 ow = input_shape[2] - input_shape[2] // 8 * 8 paddings = tf.constant([[0, 0], [0, oh], [0, ow], [0, 0]]) print('input shape:', input_shape) print('output shape:', output_shape) # build the graph inputs_raw = tf.placeholder(tf.float32, shape=input_shape, name='inputs_raw') pre_inputs = tf.Variable( tf.zeros(input_shape), trainable=False, name='pre_inputs') pre_gen = tf.Variable(tf.zeros(output_shape), trainable=False, name='pre_gen') pre_warp = tf.Variable( tf.zeros(output_shape), trainable=False, name='pre_warp') transpose_pre = tf.space_to_depth(pre_warp, vsr_scale) inputs_all = tf.concat((inputs_raw, transpose_pre), axis=-1) with tf.variable_scope('generator'): gen_output = generator_f( inputs_all, 3, num_resblock, vsr_scale, reuse=False) # Deprocess the images outputed from the model, and assign things for next # frame with tf.control_dependencies([tf.assign(pre_inputs, inputs_raw)]): outputs = tf.assign(pre_gen, ops.deprocess(gen_output)) inputs_frames = tf.concat((pre_inputs, inputs_raw), axis=-1) with tf.variable_scope('fnet'): gen_flow_lr = fnet(inputs_frames, reuse=False) gen_flow_lr = tf.pad(gen_flow_lr, paddings, 'SYMMETRIC') deconv_flow = gen_flow_lr deconv_flow = ops.conv2_tran( deconv_flow, 3, 64, 2, scope='deconv_flow_tran1') deconv_flow = tf.nn.relu(deconv_flow) deconv_flow = ops.conv2_tran( deconv_flow, 3, 64, 2, scope='deconv_flow_tran2') deconv_flow = tf.nn.relu(deconv_flow) deconv_flow = ops.conv2(deconv_flow, 3, 2, 1, scope='deconv_flow_conv') gen_flow = ops.upscale_x(gen_flow_lr * 4.0, scale=vsr_scale) gen_flow = deconv_flow + gen_flow gen_flow.set_shape(output_shape[:-1] + [2]) pre_warp_hi = tfa.image.dense_image_warp(pre_gen, gen_flow) pre_warp_hi = pre_warp_hi + extract_detail_ops(pre_warp_hi) before_ops = tf.assign(pre_warp, pre_warp_hi) print('Finish building the network') if FLAGS.use_ema: moving_average_decay = 0.99 global_step = tf.train.get_or_create_global_step() ema = tf.train.ExponentialMovingAverage(moving_average_decay, global_step) ema_vars = _get_ema_vars() # In inference time, we only need to restore the weight of the generator var_list = tf.trainable_variables() restore_vars_dict = {} if FLAGS.use_ema: for v in var_list: if re.match(v.name, '.*global_step.*'): restore_vars_dict[v.name[:-2]] = v else: restore_vars_dict[v.name[:-2] + '/ExponentialMovingAverage'] = v else: restore_vars_dict = var_list weight_initiallizer = tf.train.Saver(restore_vars_dict) # Define the initialization operation init_op = tf.global_variables_initializer() local_init_op = tf.local_variables_initializer() config = tf.ConfigProto() config.gpu_options.allow_growth = True if not gfile.exists(output_dir): gfile.mkdir(output_dir) if not output_pre: image_dir = output_dir else: image_dir = os.path.join(output_dir, output_pre) if not gfile.exists(image_dir): gfile.mkdir(image_dir) with tf.Session(config=config) as sess: # Load the pretrained model sess.run(init_op) sess.run(local_init_op) print('Loading weights from ckpt model') weight_initiallizer.restore(sess, checkpoint_path) max_iter = len(inference_data.inputs) srtime = 0 print('Frame evaluation starts!!') for i in range(max_iter): input_im = np.array([inference_data.inputs[i]]).astype(np.float32) feed_dict = {inputs_raw: input_im} t0 = time.time() if i != 0: sess.run(before_ops, feed_dict=feed_dict) output_frame = sess.run(outputs, feed_dict=feed_dict) srtime += time.time() - t0 if i >= 5: name, _ = os.path.splitext( os.path.basename(str(inference_data.paths_LR[i]))) filename = output_name + '_' + name out_path = os.path.join(image_dir, '%s.%s' % (filename, output_ext)) print('saving image %s' % out_path) with tf.gfile.Open(out_path, 'wb') as image_file: img = np.clip(output_frame[0] * 255.0, 0, 255).astype(np.uint8) _, buff = cv2.imencode('.png', img[:, :, ::-1]) image_file.write(buff.tostring()) else: print('Warming up %d' % (5 - i)) tf.reset_default_graph() print('total time ' + str(srtime) + ', frame number ' + str(max_iter))
def _PDS(self, X, r): X = tf.space_to_depth(X, r) return X
# build the graph inputs_raw = tf.placeholder(tf.float32, shape=input_shape, name='inputs_raw') pre_inputs = tf.Variable(tf.zeros(input_shape), trainable=False, name='pre_inputs') pre_gen = tf.Variable(tf.zeros(output_shape), trainable=False, name='pre_gen') pre_warp = tf.Variable(tf.zeros(output_shape), trainable=False, name='pre_warp') transpose_pre = tf.space_to_depth(pre_warp, 4) inputs_all = tf.concat((inputs_raw, transpose_pre), axis=-1) with tf.variable_scope('generator'): gen_output = generator_F(inputs_all, 3, reuse=False, FLAGS=FLAGS) # Deprocess the images outputed from the model, and assign things for next frame with tf.control_dependencies([tf.assign(pre_inputs, inputs_raw)]): outputs = tf.assign(pre_gen, deprocess(gen_output)) inputs_frames = tf.concat((pre_inputs, inputs_raw), axis=-1) with tf.variable_scope('fnet'): gen_flow_lr = fnet(inputs_frames, reuse=False) gen_flow_lr = tf.pad(gen_flow_lr, paddings, "SYMMETRIC") gen_flow = upscale_four(gen_flow_lr * 4.0) gen_flow.set_shape(output_shape[:-1] + [2]) pre_warp_hi = tfa.image.dense_image_warp(pre_gen, gen_flow) before_ops = tf.assign(pre_warp, pre_warp_hi)