def inception_v3_base(inputs, final_endpoint='Mixed_7c', min_depth=16, depth_multiplier=1.0, scope=None): """Inception model from http://arxiv.org/abs/1512.00567. Constructs an Inception v3 network from inputs to the given final endpoint. This method can construct the network up to the final inception block Mixed_7c. Note that the names of the layers in the paper do not correspond to the names of the endpoints registered by this function although they build the same network. Here is a mapping from the old_names to the new names: Old name | New name ======================================= conv0 | Conv2d_1a_3x3 conv1 | Conv2d_2a_3x3 conv2 | Conv2d_2b_3x3 pool1 | MaxPool_3a_3x3 conv3 | Conv2d_3b_1x1 conv4 | Conv2d_4a_3x3 pool2 | MaxPool_5a_3x3 mixed_35x35x256a | Mixed_5b mixed_35x35x288a | Mixed_5c mixed_35x35x288b | Mixed_5d mixed_17x17x768a | Mixed_6a mixed_17x17x768b | Mixed_6b mixed_17x17x768c | Mixed_6c mixed_17x17x768d | Mixed_6d mixed_17x17x768e | Mixed_6e mixed_8x8x1280a | Mixed_7a mixed_8x8x2048a | Mixed_7b mixed_8x8x2048b | Mixed_7c Args: inputs: a tensor of size [batch_size, height, width, channels]. final_endpoint: specifies the endpoint to construct the network up to. It can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. scope: Optional variable_scope. Returns: tensor_out: output tensor corresponding to the final_endpoint. end_points: a set of activations for external use, for example summaries or losses. Raises: ValueError: if final_endpoint is not set to one of the predefined values, or depth_multiplier <= 0 """ # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') depth = lambda d: max(int(d * depth_multiplier), min_depth) with tf.variable_scope(scope, 'InceptionV3', [inputs]): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='VALID'): # 299 x 299 x 3 end_point = 'Conv2d_1a_3x3' net = slim.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 149 x 149 x 32 end_point = 'Conv2d_2a_3x3' net = slim.conv2d(net, depth(32), [3, 3], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 147 x 147 x 32 end_point = 'Conv2d_2b_3x3' net = slim.conv2d(net, depth(64), [3, 3], padding='SAME', scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 147 x 147 x 64 end_point = 'MaxPool_3a_3x3' net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 73 x 73 x 64 end_point = 'Conv2d_3b_1x1' net = slim.conv2d(net, depth(80), [1, 1], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 73 x 73 x 80. end_point = 'Conv2d_4a_3x3' net = slim.conv2d(net, depth(192), [3, 3], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 71 x 71 x 192. end_point = 'MaxPool_5a_3x3' net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 35 x 35 x 192. # Inception blocks with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # mixed: 35 x 35 x 256. end_point = 'Mixed_5b' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(32), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_1: 35 x 35 x 288. end_point = 'Mixed_5c' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0b_1x1') branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], scope='Conv_1_0c_5x5') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(64), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_2: 35 x 35 x 288. end_point = 'Mixed_5d' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(64), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_3: 17 x 17 x 768. end_point = 'Mixed_6a' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(384), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_1x1') with tf.variable_scope('Branch_2'): branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed4: 17 x 17 x 768. end_point = 'Mixed_6b' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(128), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(128), [7, 1], scope='Conv2d_0b_7x1') branch_2 = slim.conv2d(branch_2, depth(128), [1, 7], scope='Conv2d_0c_1x7') branch_2 = slim.conv2d(branch_2, depth(128), [7, 1], scope='Conv2d_0d_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_5: 17 x 17 x 768. end_point = 'Mixed_6c' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(160), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], scope='Conv2d_0b_7x1') branch_2 = slim.conv2d(branch_2, depth(160), [1, 7], scope='Conv2d_0c_1x7') branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], scope='Conv2d_0d_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_6: 17 x 17 x 768. end_point = 'Mixed_6d' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(160), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], scope='Conv2d_0b_7x1') branch_2 = slim.conv2d(branch_2, depth(160), [1, 7], scope='Conv2d_0c_1x7') branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], scope='Conv2d_0d_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_7: 17 x 17 x 768. end_point = 'Mixed_6e' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(192), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(192), [7, 1], scope='Conv2d_0b_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0c_1x7') branch_2 = slim.conv2d(branch_2, depth(192), [7, 1], scope='Conv2d_0d_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_8: 8 x 8 x 1280. end_point = 'Mixed_7a' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_0 = slim.conv2d(branch_0, depth(320), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(192), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_9: 8 x 8 x 2048. end_point = 'Mixed_7b' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1') branch_1 = tf.concat(axis=3, values=[ slim.conv2d( branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1') ]) with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') branch_2 = tf.concat(axis=3, values=[ slim.conv2d( branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1') ]) with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_10: 8 x 8 x 2048. end_point = 'Mixed_7c' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1') branch_1 = tf.concat(axis=3, values=[ slim.conv2d( branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1') ]) with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') branch_2 = tf.concat(axis=3, values=[ slim.conv2d( branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1') ]) with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points raise ValueError('Unknown final endpoint %s' % final_endpoint)
def add_contrastive_loss(hidden, hidden_norm=True, temperature=1.0, tpu_context=None, weights=1.0): """Compute loss for model. Args: hidden: hidden vector (`Tensor`) of shape (2 * bsz, dim). hidden_norm: whether or not to use normalization on the hidden vector. temperature: a `floating` number for temperature scaling. tpu_context: context information for tpu. weights: a weighting number or vector. Returns: A loss scalar. The logits for contrastive prediction task. The labels for contrastive prediction task. """ # Get (normalized) hidden1 and hidden2. if hidden_norm: hidden = tf.math.l2_normalize(hidden, -1) hidden1, hidden2 = tf.split( hidden, 2, 0 ) # splits hidden in half along 0 axis (batch size axis?), but should be duplicating hidden?? batch_size = tf.shape( hidden1 )[0] # maybe one batch from dataloader = bs/2 images + bs/2 transformed images ?? # we need to change how hidden1 and hidden2 are calculated so that they are fed through different base models # Gather hidden1/hidden2 across replicas and create local labels. if tpu_context is not None: hidden1_large = tpu_cross_replica_concat(hidden1, tpu_context) hidden2_large = tpu_cross_replica_concat(hidden2, tpu_context) enlarged_batch_size = tf.shape(hidden1_large)[0] # TODO(iamtingchen): more elegant way to convert u32 to s32 for replica_id. replica_id = tf.cast(tf.cast(xla.replica_id(), tf.uint32), tf.int32) labels_idx = tf.range(batch_size) + replica_id * batch_size labels = tf.one_hot(labels_idx, enlarged_batch_size * 2) masks = tf.one_hot(labels_idx, enlarged_batch_size) else: hidden1_large = hidden1 hidden2_large = hidden2 labels = tf.one_hot(tf.range(batch_size), batch_size * 2) masks = tf.one_hot(tf.range(batch_size), batch_size) logits_aa = tf.matmul(hidden1, hidden1_large, transpose_b=True) / temperature logits_aa = logits_aa - masks * LARGE_NUM logits_bb = tf.matmul(hidden2, hidden2_large, transpose_b=True) / temperature logits_bb = logits_bb - masks * LARGE_NUM logits_ab = tf.matmul(hidden1, hidden2_large, transpose_b=True) / temperature logits_ba = tf.matmul(hidden2, hidden1_large, transpose_b=True) / temperature loss_a = tf.losses.softmax_cross_entropy(labels, tf.concat([logits_ab, logits_aa], 1), weights=weights) loss_b = tf.losses.softmax_cross_entropy(labels, tf.concat([logits_ba, logits_bb], 1), weights=weights) loss = loss_a + loss_b return loss, logits_ab, labels
def test_decompress(args): """Decompresses an image.""" # Read the shape information and compressed string from the binary file. string = tf.placeholder(tf.string, [1]) side_string = tf.placeholder(tf.string, [1]) x_shape = tf.placeholder(tf.int32, [2]) y_shape = tf.placeholder(tf.int32, [2]) z_shape = tf.placeholder(tf.int32, [2]) with open(args.input_file, "rb") as f: packed = tfc.PackedTensors(f.read()) tensors = [string, side_string, x_shape, y_shape, z_shape] arrays = packed.unpack(tensors) # Instantiate model. synthesis_transform = SynthesisTransform(args.num_filters) hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck(dtype=tf.float32) # Decompress and transform the image back. z_shape = tf.concat([z_shape, [args.num_filters]], axis=0) z_hat = entropy_bottleneck.decompress(side_string, z_shape, channels=args.num_filters) sigma = hyper_synthesis_transform(z_hat) sigma = sigma[:, :y_shape[0], :y_shape[1], :] scale_table = np.exp( np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table, dtype=tf.float32) y_hat_all = conditional_bottleneck.decompress(string) x = read_png("kodak/kodim01.png") x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_shape = tf.shape(x) x *= 255 active = 192 y_hat = y_hat_all[:, :, :, :active] x_hat = synthesis_transform(y_hat) x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) #x_hat = x_hat[0, :x_shape[0], :x_shape[1], :] #op = write_png(args.output_file, x_hat) sess = tf.Session() latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) #sess.run(op, feed_dict=dict(zip(tensors, arrays))) #vmse, vpsnr, vmsssim = sess.run([mse, psnr, msssim], feed_dict=dict(zip(tensors, arrays))) #print(vmse, vpsnr, vmsssim) for active in range(192, 0, -8): y_hat = y_hat_all[:, :, :, :active] x_hat = synthesis_transform(y_hat) x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) vmse, vpsnr, vmsssim = sess.run([mse, psnr, msssim], feed_dict=dict(zip(tensors, arrays))) print(active, vmse, vpsnr, vmsssim)
def _build_activation_vars(self, input_act_vars): return tf.concat(axis=self.axis, values=input_act_vars)
def detection_loss(cls_outputs, box_outputs, labels, params): """Computes total detection loss. Computes total detection loss including box and class loss from all levels. Args: cls_outputs: an OrderDict with keys representing levels and values representing logits in [batch_size, height, width, num_anchors]. box_outputs: an OrderDict with keys representing levels and values representing box regression targets in [batch_size, height, width, num_anchors * 4]. labels: the dictionary that returned from dataloader that includes groundtruth targets. params: the dictionary including training parameters specified in default_haprams function in this file. Returns: total_loss: an integer tensor representing total loss reducing from class and box losses from all levels. cls_loss: an integer tensor representing total class loss. box_loss: an integer tensor representing total box regression loss. box_iou_loss: an integer tensor representing total box iou loss. """ # Sum all positives in a batch for normalization and avoid zero # num_positives_sum, which would lead to inf loss during training num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0 positives_momentum = params.get('positives_momentum', None) or 0 if positives_momentum > 0: # normalize the num_positive_examples for training stability. moving_normalizer_var = tf.Variable( 0.0, name='moving_normalizer', dtype=tf.float32, synchronization=tf.VariableSynchronization.ON_READ, trainable=False, aggregation=tf.VariableAggregation.MEAN) num_positives_sum = tf.keras.backend.moving_average_update( moving_normalizer_var, num_positives_sum, momentum=params['positives_momentum']) elif positives_momentum < 0: num_positives_sum = utils.cross_replica_mean(num_positives_sum) levels = cls_outputs.keys() cls_losses = [] box_losses = [] for level in levels: # Onehot encoding for classification labels. cls_targets_at_level = tf.one_hot(labels['cls_targets_%d' % level], params['num_classes']) if params['data_format'] == 'channels_first': bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list() cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, -1, width, height]) else: bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list() cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, width, height, -1]) box_targets_at_level = labels['box_targets_%d' % level] cls_loss = focal_loss( cls_outputs[level], cls_targets_at_level, params['alpha'], params['gamma'], normalizer=num_positives_sum, label_smoothing=params['label_smoothing']) if params['data_format'] == 'channels_first': cls_loss = tf.reshape(cls_loss, [bs, -1, width, height, params['num_classes']]) else: cls_loss = tf.reshape(cls_loss, [bs, width, height, -1, params['num_classes']]) cls_loss *= tf.cast( tf.expand_dims(tf.not_equal(labels['cls_targets_%d' % level], -2), -1), tf.float32) cls_losses.append(tf.clip_by_value(tf.reduce_sum(cls_loss), 0.0, 2.0)) if params['box_loss_weight']: box_losses.append( _box_loss( box_outputs[level], box_targets_at_level, num_positives_sum, delta=params['delta'])) if params['iou_loss_type']: input_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) box_output_list = [tf.reshape(box_outputs[i], [-1, 4]) for i in levels] box_outputs = tf.concat(box_output_list, axis=0) box_target_list = [ tf.reshape(labels['box_targets_%d' % level], [-1, 4]) for level in levels ] box_targets = tf.concat(box_target_list, axis=0) anchor_boxes = tf.tile(input_anchors.boxes, [params['batch_size'], 1]) box_outputs = anchors.decode_box_outputs(box_outputs, anchor_boxes) box_targets = anchors.decode_box_outputs(box_targets, anchor_boxes) box_iou_loss = _box_iou_loss(box_outputs, box_targets, num_positives_sum, params['iou_loss_type']) else: box_iou_loss = 0 # Sum per level losses to total loss. cls_loss = tf.add_n(cls_losses) box_loss = tf.add_n(box_losses) if box_losses else 0 total_loss = ( cls_loss + params['box_loss_weight'] * box_loss + params['iou_loss_weight'] * box_iou_loss) return total_loss, cls_loss, box_loss, box_iou_loss
def quantizable_concat(inputs, axis, is_training, is_quantized=True, default_min=0, default_max=6, ema_decay=0.999, scope='quantized_concat'): """Concat replacement with quantization option. Allows concat inputs to share the same min max ranges, from experimental/gazelle/synthetic/model/tpu/utils.py. Args: inputs: list of tensors to concatenate. axis: dimension along which to concatenate. is_training: true if the graph is a training graph. is_quantized: flag to enable/disable quantization. default_min: default min value for fake quant op. default_max: default max value for fake quant op. ema_decay: the moving average decay for the quantization variables. scope: Optional scope for variable_scope. Returns: Tensor resulting from concatenation of input tensors """ if is_quantized: with tf.variable_scope(scope): tf.logging.info('inputs: {}'.format(inputs)) for t in inputs: tf.logging.info(t) min_var = _quant_var('min', default_min) max_var = _quant_var('max', default_max) if not is_training: # If we are building an eval graph just use the values in the variables. quant_inputs = [ tf.fake_quant_with_min_max_vars(t, min_var, max_var) for t in inputs ] tf.logging.info('min_val: {}'.format(min_var)) tf.logging.info('max_val: {}'.format(max_var)) else: concat_tensors = tf.concat(inputs, axis=axis) tf.logging.info('concat_tensors: {}'.format(concat_tensors)) # TFLite requires that 0.0 is always in the [min; max] range. range_min = tf.minimum(tf.reduce_min(concat_tensors), 0.0, name='SafeQuantRangeMin') range_max = tf.maximum(tf.reduce_max(concat_tensors), 0.0, name='SafeQuantRangeMax') # Otherwise we need to keep track of the moving averages of the min and # of the elements of the input tensor max. min_val = moving_averages.assign_moving_average( min_var, range_min, ema_decay, name='AssignMinEma') max_val = moving_averages.assign_moving_average( max_var, range_max, ema_decay, name='AssignMaxEma') tf.logging.info('min_val: {}'.format(min_val)) tf.logging.info('max_val: {}'.format(max_val)) quant_inputs = [ tf.fake_quant_with_min_max_vars(t, min_val, max_val) for t in inputs ] tf.logging.info('quant_inputs: {}'.format(quant_inputs)) outputs = tf.concat(quant_inputs, axis=axis) tf.logging.info('outputs: {}'.format(outputs)) else: outputs = tf.concat(inputs, axis=axis) return outputs
def ssd_parse_example_proto(example_serialized): """Parses an Example proto containing a training example of an image. Each Example proto contains the following fields that we care about: image/encoded: <JPEG encoded string> image/source_id: tf.string image/height: tf.int64 image/width: tf.int64 image/object/bbox/xmin: tf.VarLenFeature(tf.float32) image/object/bbox/xmax: tf.VarLenFeature(tf.float32) image/object/bbox/ymin: tf.VarLenFeature(tf.float32 image/object/bbox/ymax: tf.VarLenFeature(tf.float32) image/object/class/label: tf.VarLenFeature(tf.int64) image/object/class/text: tf.VarLenFeature(tf.string) Complete decoder can be found in: https://github.com/tensorflow/models/blob/master/research/object_detection/data_decoders/tf_example_decoder.py Args: example_serialized: scalar Tensor tf.string containing a serialized Example protocol buffer. Returns: A dictionary with the following key-values: image_buffer: Tensor tf.string containing the contents of a JPEG file. groundtruth_boxes: Tensor tf.float32 of shape [num_boxes, 4], containing coordinates of object bounding boxes. groundtruth_classeS: Tensor tf.int64 of shape [num_boxes, 1], containing class labels of objects. source_id: unique image identifier. raw_shape: [height, width, 3]. """ feature_map = { 'image/encoded': tf.FixedLenFeature((), dtype=tf.string, default_value=''), 'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''), 'image/height': tf.FixedLenFeature((), tf.int64, default_value=1), 'image/width': tf.FixedLenFeature((), tf.int64, default_value=1), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/class/label': tf.VarLenFeature(dtype=tf.int64), } features = tf.parse_single_example(example_serialized, feature_map) xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 1) ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 1) xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 1) ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 1) image_buffer = features['image/encoded'] # Bounding box coordinates should be in ltrb order boxes = tf.concat([ymin, xmin, ymax, xmax], 1) classes = tf.expand_dims(features['image/object/class/label'].values, 1) source_id = features['image/source_id'] raw_shape = tf.stack( [features['image/height'], features['image/width'], 3]) return { 'image_buffer': image_buffer, 'groundtruth_boxes': boxes, 'groundtruth_classes': classes, 'source_id': source_id, 'raw_shape': raw_shape }
def input_fn(data_files, batch_size, repeat=-1, data_source=DataSource.RICO_SCA, required_agreement=2, max_range=1000, max_dom_pos=2000, max_pixel_pos=100, load_dom_dist=False, load_extra=False, buffer_size=8 * 1024, shuffle_size=8 * 1024, required_rule_id_list=None, shuffle_repeat=True, mean_synthetic_length=1.0, stddev_synthetic_length=0.0, load_screen=True, shuffle_files=True): """Retrieves batches of data for training. Adds padding to ensure all dimension in one batch are always same. Args: data_files: A list of file names to initialize the TFRecordDataset batch_size: Number for the size of the batch. repeat: the number of times to repeat the input data. data_source: A DataSource instance. required_agreement: the minimum agreement required. max_range: the max range. max_dom_pos: the max dom pos. max_pixel_pos: the max screen pixels. load_dom_dist: whether to load the dom distance feature. load_extra: whether to load the raw text data. buffer_size: the buffer size for prefetching. shuffle_size: the shuffle size. required_rule_id_list: the list of required rule ids. shuffle_repeat: whether to shuffle and repeat. mean_synthetic_length: the mean length for synthetic sequence. stddev_synthetic_length: the stddev length for synthetic sequence. load_screen: whether to load screen features. shuffle_files: shuffling file names. Returns: a tf.dataset.Dateset object. Raises: ValueError: The data_format is neither 'recordio' nor 'tfrecord'. """ if not isinstance(data_source, DataSource): assert False, 'data_source %s unsupported' % str(data_source) padded_shapes, padded_values = _construct_padding_info( data_source, load_dom_dist, load_extra) if not isinstance(data_files, (list,)): data_files = [data_files] all_files = tf.concat( values=[tf.matching_files(f) for f in data_files], axis=0) if repeat == -1 and shuffle_files: all_files = tf.random.shuffle(all_files) if data_files[0].endswith('.recordio'): dataset = tf.data.RecordIODataset(all_files) elif data_files[0].endswith('.tfrecord'): dataset = tf.data.TFRecordDataset( all_files, num_parallel_reads=10 if repeat == -1 else None) else: assert False, 'Data_format %s is not supported.' % data_files[0] def _map_fn(x): return parse_tf_example(x, data_source, max_range, max_dom_pos, max_pixel_pos, load_dom_dist=load_dom_dist, load_extra=load_extra, append_eos=(data_source != DataSource.RICO_SCA or mean_synthetic_length == 1.0), load_screen=load_screen) dataset = dataset.map(_map_fn) def _is_enough_agreement(example): return tf.greater_equal(example['agreement_count'], required_agreement) dataset = dataset.filter(_is_enough_agreement) def _length_filter(example): return tf.less(tf.shape(example['obj_refs'])[0], 20) dataset = dataset.filter(_length_filter) def _filter_data_by_rule(example, rule_id_list): return tf.reduce_any( [tf.equal(example['rule'], rule_id) for rule_id in rule_id_list]) if data_source == DataSource.RICO_SCA and required_rule_id_list is not None: dataset = dataset.filter( lambda x: _filter_data_by_rule(x, required_rule_id_list)) # (TODO: liyang) tf.data.experimental.bucket_by_sequence_length if shuffle_repeat: dataset = dataset.apply(tf.data.experimental.shuffle_and_repeat( shuffle_size, count=repeat)) dataset = dataset.padded_batch( batch_size, padded_shapes=padded_shapes, padding_values=padded_values) if data_source == DataSource.RICO_SCA and mean_synthetic_length > 1.0: def _stitch_fn(x): return _batch_stitch(x, mean_length=mean_synthetic_length, stddev=stddev_synthetic_length) dataset = dataset.map(_stitch_fn) dataset = dataset.prefetch(buffer_size=buffer_size) return dataset
def parse_tf_example(example_proto, data_source, max_range=100, max_dom_pos=2000, max_pixel_pos=100, load_dom_dist=False, load_extra=False, append_eos=True, load_screen=True): """Parses an example TFRecord proto into dictionary of tensors. Args: example_proto: TFRecord format proto that contains screen information. data_source: A DataSource instance. max_range: the max range. max_dom_pos: the maximum dom positoin. max_pixel_pos: the max dom position. load_dom_dist: whether to load the feature. load_extra: whether to load the extra data for debugging. append_eos: whether to append eos. load_screen: whether to load screen features. Returns: feature: The parsed tensor dictionary with the input feature data label: The parsed label tensor with the input label for the feature """ feature_spec = { 'instruction_word_id_seq': tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True), 'input_str_position_seq': tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True), 'obj_desc_position_seq': tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True), 'verb_str_position_seq': tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True), 'agreement_count': tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True), 'instruction_rule_id': tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True) } if load_screen: feature_spec['verb_id_seq'] = tf.FixedLenSequenceFeature( [], tf.int64, allow_missing=True) feature_spec['ui_target_id_seq'] = tf.FixedLenSequenceFeature( [], tf.int64, allow_missing=True) feature_spec['ui_obj_word_id_seq'] = tf.FixedLenSequenceFeature( [], tf.int64, allow_missing=True) feature_spec['ui_obj_type_id_seq'] = tf.FixedLenSequenceFeature( [], tf.int64, allow_missing=True) feature_spec['ui_obj_clickable_seq'] = tf.FixedLenSequenceFeature( [], tf.int64, allow_missing=True) feature_spec['ui_obj_cord_x_seq'] = tf.FixedLenSequenceFeature( [], tf.float32, allow_missing=True) feature_spec['ui_obj_cord_y_seq'] = tf.FixedLenSequenceFeature( [], tf.float32, allow_missing=True) feature_spec['ui_obj_dom_location_seq'] = tf.FixedLenSequenceFeature( [], tf.int64, allow_missing=True) if load_dom_dist: feature_spec['ui_obj_dom_distance'] = tf.FixedLenSequenceFeature( [], tf.int64, allow_missing=True) if load_extra: feature_spec['instruction_str'] = tf.FixedLenSequenceFeature( [], tf.string, allow_missing=True) feature_spec['task_id'] = tf.FixedLenSequenceFeature( [], tf.string, allow_missing=True) feature_spec['ui_obj_str_seq'] = tf.FixedLenSequenceFeature( [], tf.string, allow_missing=True) feature_dict = tf.parse_single_example(example_proto, feature_spec) for key in feature_dict: if feature_dict[key].dtype == tf.int64: feature_dict[key] = tf.cast(feature_dict[key], tf.int32) if data_source == DataSource.ANDROID_HOWTO: tf.logging.info('Parsing android_howto dataset') feature = _process_android_howto(feature_dict, max_range=max_range, load_dom_dist=load_dom_dist, load_extra=load_extra) elif data_source == DataSource.RICO_SCA: tf.logging.info('Parsing synthetic dataset') feature = _process_rico_sca( feature_dict, max_range=max_range, max_dom_pos=max_dom_pos, load_dom_dist=load_dom_dist, load_extra=load_extra, load_screen=load_screen) elif data_source == DataSource.PIXEL_HELP: tf.logging.info('Parsing test dataset') feature = _process_pixel_help(feature_dict, data_source, load_dom_dist=load_dom_dist, load_extra=load_extra) else: raise ValueError('Unsupported datasource %s' % str(data_source)) # Remove padding from "task" feature['task'] = tf.boolean_mask(feature['task'], tf.not_equal(feature['task'], 0)) feature['obj_screen_pos'] = tf.to_int32( feature['obj_screen_pos'] * (max_pixel_pos - 1)) # Appending EOS and padding to match the appended length if append_eos: feature['input_refs'] = tf.pad(feature['input_refs'], [[0, 1], [0, 0]]) feature['obj_refs'] = tf.pad(feature['obj_refs'], [[0, 1], [0, 0]]) step_num = tf.size(feature['task']) feature['verb_refs'] = tf.concat( [feature['verb_refs'], [[step_num, step_num + 1]]], axis=0) feature['task'] = tf.pad(feature['task'], [[0, 1]], constant_values=1) feature['obj_text'] = tf.pad(feature['obj_text'], [[0, 1], [0, 0], [0, 0]]) feature['obj_clickable'] = tf.pad(feature['obj_clickable'], [[0, 1], [0, 0]]) feature['obj_type'] = tf.pad( feature['obj_type'], [[0, 1], [0, 0]], constant_values=-1) feature['obj_screen_pos'] = tf.pad(feature['obj_screen_pos'], [[0, 1], [0, 0], [0, 0]]) feature['obj_dom_pos'] = tf.pad(feature['obj_dom_pos'], [[0, 1], [0, 0], [0, 0]]) if load_dom_dist: feature['obj_dom_dist'] = tf.pad(feature['obj_dom_dist'], [[0, 1], [0, 0], [0, 0]]) feature['objects'] = tf.pad(feature['objects'], [[0, 1]]) feature['verbs'] = tf.pad(feature['verbs'], [[0, 1]]) return feature
def _build_sampler(self): """Build the sampler ops and the log_prob ops.""" hidden_size = self.params.controller_hidden_size num_layers = self.params.controller_num_layers arc_seq = [] sample_log_probs = [] sample_entropy = [] all_h = [tf.zeros([1, hidden_size], dtype=tf.float32)] all_h_w = [tf.zeros([1, hidden_size], dtype=tf.float32)] # sampler ops inputs = self.g_emb prev_c = tf.zeros([1, hidden_size], dtype=tf.float32) prev_h = tf.zeros([1, hidden_size], dtype=tf.float32) inputs = self.g_emb for layer_id in range(1, num_layers + 1): next_c, next_h = _lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h all_h.append(next_h) all_h_w.append(tf.matmul(next_h, self.attn_w_1)) query = tf.matmul(next_h, self.attn_w_2) query = query + tf.concat(all_h_w[:-1], axis=0) query = tf.tanh(query) logits = tf.matmul(query, self.attn_v) logits = tf.reshape(logits, [1, layer_id]) if self.params.controller_temperature: logits /= self.params.controller_temperature if self.params.controller_tanh_constant: logits = self.params.controller_tanh_constant * tf.tanh(logits) diff = tf.to_float(layer_id - tf.range(0, layer_id))**2 logits -= tf.reshape(diff, [1, layer_id]) / 6.0 skip_index = tf.multinomial(logits, 1) skip_index = tf.to_int32(skip_index) skip_index = tf.reshape(skip_index, [1]) arc_seq.append(skip_index) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=skip_index) sample_log_probs.append(log_prob) entropy = log_prob * tf.exp(-log_prob) sample_entropy.append(tf.stop_gradient(entropy)) inputs = tf.nn.embedding_lookup(tf.concat(all_h[:-1], axis=0), skip_index) inputs /= (0.1 + tf.to_float(layer_id - skip_index)) next_c, next_h = _lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h logits = tf.matmul(next_h, self.w_emb, transpose_b=True) if self.params.controller_temperature: logits /= self.params.controller_temperature if self.params.controller_tanh_constant: logits = self.params.controller_tanh_constant * tf.tanh(logits) func = tf.multinomial(logits, 1) func = tf.to_int32(func) func = tf.reshape(func, [1]) arc_seq.append(func) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=func) sample_log_probs.append(log_prob) entropy = log_prob * tf.exp(-log_prob) sample_entropy.append(tf.stop_gradient(entropy)) inputs = tf.nn.embedding_lookup(self.w_emb, func) arc_seq = tf.concat(arc_seq, axis=0) self.sample_arc = arc_seq self.sample_log_probs = tf.concat(sample_log_probs, axis=0) self.ppl = tf.exp(tf.reduce_mean(self.sample_log_probs)) sample_entropy = tf.concat(sample_entropy, axis=0) self.sample_entropy = tf.reduce_sum(sample_entropy) self.all_h = all_h
def eager_eval_loop(detection_model, configs, eval_dataset, use_tpu=False, postprocess_on_cpu=False, global_step=None): """Evaluate the model eagerly on the evaluation dataset. This method will compute the evaluation metrics specified in the configs on the entire evaluation dataset, then return the metrics. It will also log the metrics to TensorBoard. Args: detection_model: A DetectionModel (based on Keras) to evaluate. configs: Object detection configs that specify the evaluators that should be used, as well as whether regularization loss should be included and if bfloat16 should be used on TPUs. eval_dataset: Dataset containing evaluation data. use_tpu: Whether a TPU is being used to execute the model for evaluation. postprocess_on_cpu: Whether model postprocessing should happen on the CPU when using a TPU to execute the model. global_step: A variable containing the training step this model was trained to. Used for logging purposes. Returns: A dict of evaluation metrics representing the results of this evaluation. """ train_config = configs['train_config'] eval_input_config = configs['eval_input_config'] eval_config = configs['eval_config'] add_regularization_loss = train_config.add_regularization_loss is_training = False detection_model._is_training = is_training # pylint: disable=protected-access tf.keras.backend.set_learning_phase(is_training) evaluator_options = eval_util.evaluator_options_from_eval_config( eval_config) class_agnostic_category_index = ( label_map_util.create_class_agnostic_category_index()) class_agnostic_evaluators = eval_util.get_evaluators( eval_config, list(class_agnostic_category_index.values()), evaluator_options) class_aware_evaluators = None if eval_input_config.label_map_path: class_aware_category_index = ( label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path)) class_aware_evaluators = eval_util.get_evaluators( eval_config, list(class_aware_category_index.values()), evaluator_options) evaluators = None loss_metrics = {} @tf.function def compute_eval_dict(features, labels): """Compute the evaluation result on an image.""" # For evaling on train data, it is necessary to check whether groundtruth # must be unpadded. boxes_shape = (labels[ fields.InputDataFields.groundtruth_boxes].get_shape().as_list()) unpad_groundtruth_tensors = boxes_shape[1] is not None and not use_tpu labels = model_lib.unstack_batch( labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) losses_dict, prediction_dict = _compute_losses_and_predictions_dicts( detection_model, features, labels, add_regularization_loss) def postprocess_wrapper(args): return detection_model.postprocess(args[0], args[1]) # TODO(kaftan): Depending on how postprocessing will work for TPUS w/ ## TPUStrategy, may be good to move wrapping to a utility method if use_tpu and postprocess_on_cpu: detections = contrib_tpu.outside_compilation( postprocess_wrapper, (prediction_dict, features[fields.InputDataFields.true_image_shape])) else: detections = postprocess_wrapper( (prediction_dict, features[fields.InputDataFields.true_image_shape])) class_agnostic = (fields.DetectionResultFields.detection_classes not in detections) # TODO(kaftan) (or anyone): move `_prepare_groundtruth_for_eval to eval_util ## and call this from there. groundtruth = model_lib._prepare_groundtruth_for_eval( # pylint: disable=protected-access detection_model, class_agnostic, eval_input_config.max_number_of_boxes) use_original_images = fields.InputDataFields.original_image in features if use_original_images: eval_images = features[fields.InputDataFields.original_image] true_image_shapes = tf.slice( features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3]) original_image_spatial_shapes = features[ fields.InputDataFields.original_image_spatial_shape] else: eval_images = features[fields.InputDataFields.image] true_image_shapes = None original_image_spatial_shapes = None eval_dict = eval_util.result_dict_for_batched_example( eval_images, features[inputs.HASH_KEY], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=True, original_image_spatial_shapes=original_image_spatial_shapes, true_image_shapes=true_image_shapes) return eval_dict, losses_dict, class_agnostic agnostic_categories = label_map_util.create_class_agnostic_category_index() per_class_categories = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) keypoint_edges = [(kp.start, kp.end) for kp in eval_config.keypoint_edge] for i, (features, labels) in enumerate(eval_dataset): eval_dict, losses_dict, class_agnostic = compute_eval_dict( features, labels) if class_agnostic: category_index = agnostic_categories else: category_index = per_class_categories if i % 100 == 0: tf.logging.info('Finished eval step %d', i) use_original_images = fields.InputDataFields.original_image in features if use_original_images and i < eval_config.num_visualizations: sbys_image_list = vutils.draw_side_by_side_evaluation_image( eval_dict, category_index=category_index, max_boxes_to_draw=eval_config.max_num_boxes_to_visualize, min_score_thresh=eval_config.min_score_threshold, use_normalized_coordinates=False, keypoint_edges=keypoint_edges or None) sbys_images = tf.concat(sbys_image_list, axis=0) tf.compat.v2.summary.image( name='eval_side_by_side_' + str(i), step=global_step, data=sbys_images, max_outputs=eval_config.num_visualizations) if eval_util.has_densepose(eval_dict): dp_image_list = vutils.draw_densepose_visualizations(eval_dict) dp_images = tf.concat(dp_image_list, axis=0) tf.compat.v2.summary.image( name='densepose_detections_' + str(i), step=global_step, data=dp_images, max_outputs=eval_config.num_visualizations) if evaluators is None: if class_agnostic: evaluators = class_agnostic_evaluators else: evaluators = class_aware_evaluators for evaluator in evaluators: evaluator.add_eval_dict(eval_dict) for loss_key, loss_tensor in iter(losses_dict.items()): if loss_key not in loss_metrics: loss_metrics[loss_key] = tf.keras.metrics.Mean() # Skip the loss with value equal or lower than 0.0 when calculating the # average loss since they don't usually reflect the normal loss values # causing spurious average loss value. if loss_tensor <= 0.0: continue loss_metrics[loss_key].update_state(loss_tensor) eval_metrics = {} for evaluator in evaluators: eval_metrics.update(evaluator.evaluate()) for loss_key in loss_metrics: eval_metrics[loss_key] = loss_metrics[loss_key].result() eval_metrics = {str(k): v for k, v in eval_metrics.items()} tf.logging.info('Eval metrics at step %d', global_step) for k in eval_metrics: tf.compat.v2.summary.scalar(k, eval_metrics[k], step=global_step) tf.logging.info('\t+ %s: %f', k, eval_metrics[k]) return eval_metrics
def _build(self, features, parent_transform=None, parent_presence=None): """Builds the module. Args: features: Tensor of encodings of shape [B, n_enc_dims]. parent_transform: Tuple of (matrix, vector). parent_presence: pass Returns: A bunch of stuff. """ batch_size = features.shape.as_list()[0] batch_shape = [batch_size, self._n_caps] # Predict capsule and additional params from the input encoding. # [B, n_caps, n_caps_dims] if self._n_caps_params is not None: # Use separate parameters to do predictions for different capsules. mlp = BatchMLP(self._n_hiddens + [self._n_caps_params]) raw_caps_params = mlp(features) caps_params = tf.reshape(raw_caps_params, batch_shape + [self._n_caps_params]) else: assert features.shape[:2].as_list() == batch_shape caps_params = features if self._caps_dropout_rate == 0.0: caps_exist = tf.ones(batch_shape + [1], dtype=tf.float32) else: pmf = tfd.Bernoulli(1. - self._caps_dropout_rate, dtype=tf.float32) caps_exist = pmf.sample(batch_shape + [1]) caps_params = tf.concat([caps_params, caps_exist], -1) output_shapes = ( [self._n_votes, self._n_transform_params], # CPR_dynamic [1, self._n_transform_params], # CCR [1], # per-capsule presence [self._n_votes], # per-vote-presence [self._n_votes], # per-vote scale ) splits = [np.prod(i).astype(np.int32) for i in output_shapes] n_outputs = sum(splits) # we don't use bias in the output layer in order to separate the static # and dynamic parts of the CPR caps_mlp = BatchMLP([self._n_hiddens, n_outputs], use_bias=False) all_params = caps_mlp(caps_params) all_params = tf.split(all_params, splits, -1) res = [ tf.reshape(i, batch_shape + s) for (i, s) in zip(all_params, output_shapes) ] cpr_dynamic = res[0] # add bias to all remaining outputs res = [snt.AddBias()(i) for i in res[1:]] ccr, pres_logit_per_caps, pres_logit_per_vote, scale_per_vote = res if self._caps_dropout_rate != 0.0: pres_logit_per_caps += math_ops.safe_log(caps_exist) cpr_static = tf.get_variable( 'cpr_static', shape=[1, self._n_caps, self._n_votes, self._n_transform_params]) def add_noise(tensor): """Adds noise to tensors.""" if self._noise_type == 'uniform': noise = tf.random.uniform(tensor.shape, minval=-.5, maxval=.5) * self._noise_scale elif self._noise_type == 'logistic': pdf = tfd.Logistic(0., self._noise_scale) noise = pdf.sample(tensor.shape) elif not self._noise_type: noise = 0. else: raise ValueError('Invalid noise type: "{}".'.format( self._noise_type)) return tensor + noise pres_logit_per_caps = add_noise(pres_logit_per_caps) pres_logit_per_vote = add_noise(pres_logit_per_vote) # this is for hierarchical if parent_transform is None: ccr = self._make_transform(ccr) else: ccr = parent_transform if not self._deformations: cpr_dynamic = tf.zeros_like(cpr_dynamic) cpr = self._make_transform(cpr_dynamic + cpr_static) ccr_per_vote = snt.TileByDim([2], [self._n_votes])(ccr) votes = tf.matmul(ccr_per_vote, cpr) if parent_presence is not None: pres_per_caps = parent_presence else: pres_per_caps = tf.nn.sigmoid(pres_logit_per_caps) pres_per_vote = pres_per_caps * tf.nn.sigmoid(pres_logit_per_vote) if self._learn_vote_scale: # for numerical stability scale_per_vote = tf.nn.softplus(scale_per_vote + .5) + 1e-2 else: scale_per_vote = tf.zeros_like(scale_per_vote) + 1. return AttrDict( vote=votes, scale=scale_per_vote, vote_presence=pres_per_vote, pres_logit_per_caps=pres_logit_per_caps, pres_logit_per_vote=pres_logit_per_vote, dynamic_weights_l2=tf.nn.l2_loss(cpr_dynamic) / batch_size, raw_caps_params=raw_caps_params, raw_caps_features=features, )
def _build(self, x, presence=None): # x is [B, n_input_points, n_input_dims] batch_size, n_input_points = x.shape[:2].as_list() # votes and scale have shape [B, n_caps, n_input_points, n_input_dims|1] # since scale is a per-caps scalar and we have one vote per capsule vote_component_pdf = self._get_pdf(self._votes, tf.expand_dims(self._scales, -1)) # expand along caps dimensions -> [B, 1, n_input_points, n_input_dims] expanded_x = tf.expand_dims(x, 1) vote_log_prob_per_dim = vote_component_pdf.log_prob(expanded_x) # [B, n_caps, n_input_points] vote_log_prob = tf.reduce_sum(vote_log_prob_per_dim, -1) dummy_vote_log_prob = tf.zeros([batch_size, 1, n_input_points]) dummy_vote_log_prob -= 2. * tf.log(10.) # [B, n_caps + 1, n_input_points] vote_log_prob = tf.concat([vote_log_prob, dummy_vote_log_prob], 1) # [B, n_caps, n_input_points] mixing_logits = math_ops.safe_log(self._vote_presence_prob) dummy_logit = tf.zeros([batch_size, 1, 1]) - 2. * tf.log(10.) dummy_logit = snt.TileByDim([2], [n_input_points])(dummy_logit) # [B, n_caps + 1, n_input_points] mixing_logits = tf.concat([mixing_logits, dummy_logit], 1) mixing_log_prob = mixing_logits - tf.reduce_logsumexp( mixing_logits, 1, keepdims=True) # [B, n_input_points] mixture_log_prob_per_point = tf.reduce_logsumexp( mixing_logits + vote_log_prob, 1) if presence is not None: presence = tf.to_float(presence) mixture_log_prob_per_point *= presence # [B,] mixture_log_prob_per_example\ = tf.reduce_sum(mixture_log_prob_per_point, 1) # [] mixture_log_prob_per_batch = tf.reduce_mean( mixture_log_prob_per_example) # [B, n_caps + 1, n_input_points] posterior_mixing_logits_per_point = mixing_logits + vote_log_prob # [B, n_input_points] winning_vote_idx = tf.argmax(posterior_mixing_logits_per_point[:, :-1], 1) batch_idx = tf.expand_dims(tf.range(batch_size, dtype=tf.int64), 1) batch_idx = snt.TileByDim([1], [n_input_points])(batch_idx) point_idx = tf.expand_dims(tf.range(n_input_points, dtype=tf.int64), 0) point_idx = snt.TileByDim([0], [batch_size])(point_idx) idx = tf.stack([batch_idx, winning_vote_idx, point_idx], -1) winning_vote = tf.gather_nd(self._votes, idx) winning_pres = tf.gather_nd(self._vote_presence_prob, idx) vote_presence = tf.greater(mixing_logits[:, :-1], mixing_logits[:, -1:]) # the first four votes belong to the square is_from_capsule = winning_vote_idx // self._n_votes posterior_mixing_probs = tf.nn.softmax( posterior_mixing_logits_per_point, 1) dummy_vote = tf.get_variable('dummy_vote', shape=self._votes[:1, :1].shape) dummy_vote = snt.TileByDim([0], [batch_size])(dummy_vote) dummy_pres = tf.zeros([batch_size, 1, n_input_points]) votes = tf.concat((self._votes, dummy_vote), 1) pres = tf.concat([self._vote_presence_prob, dummy_pres], 1) soft_winner = tf.reduce_sum( tf.expand_dims(posterior_mixing_probs, -1) * votes, 1) soft_winner_pres = tf.reduce_sum(posterior_mixing_probs * pres, 1) posterior_mixing_probs = tf.transpose(posterior_mixing_probs[:, :-1], (0, 2, 1)) assert winning_vote.shape == x.shape return self.OutputTuple( log_prob=mixture_log_prob_per_batch, vote_presence=tf.to_float(vote_presence), winner=winning_vote, winner_pres=winning_pres, soft_winner=soft_winner, soft_winner_pres=soft_winner_pres, posterior_mixing_probs=posterior_mixing_probs, is_from_capsule=is_from_capsule, mixing_logits=mixing_logits, mixing_log_prob=mixing_log_prob, )
def _build(self, x, presence=None): batch_size, n_input_points = x.shape[:2].as_list() # we don't know what order the initial points came in, so we need to create # a big mixture of all votes for every input point # [B, 1, n_votes, n_input_dims] expanded_votes = tf.expand_dims(self._votes, 1) expanded_scale = tf.expand_dims(tf.expand_dims(self._scales, 1), -1) vote_component_pdf = self._get_pdf(expanded_votes, expanded_scale) # [B, n_points, n_caps, n_votes, n_input_dims] expanded_x = tf.expand_dims(x, 2) vote_log_prob_per_dim = vote_component_pdf.log_prob(expanded_x) # [B, n_points, n_votes] vote_log_prob = tf.reduce_sum(vote_log_prob_per_dim, -1) dummy_vote_log_prob = tf.zeros([batch_size, n_input_points, 1]) dummy_vote_log_prob -= 2. * tf.log(10.) vote_log_prob = tf.concat([vote_log_prob, dummy_vote_log_prob], 2) # [B, n_points, n_votes] mixing_logits = math_ops.safe_log(self._vote_presence_prob) dummy_logit = tf.zeros([batch_size, 1]) - 2. * tf.log(10.) mixing_logits = tf.concat([mixing_logits, dummy_logit], 1) mixing_log_prob = mixing_logits - tf.reduce_logsumexp( mixing_logits, 1, keepdims=True) expanded_mixing_logits = tf.expand_dims(mixing_log_prob, 1) mixture_log_prob_per_component\ = tf.reduce_logsumexp(expanded_mixing_logits + vote_log_prob, 2) if presence is not None: presence = tf.to_float(presence) mixture_log_prob_per_component *= presence mixture_log_prob_per_example\ = tf.reduce_sum(mixture_log_prob_per_component, 1) mixture_log_prob_per_batch = tf.reduce_mean( mixture_log_prob_per_example) # [B, n_points, n_votes] posterior_mixing_logits_per_point = expanded_mixing_logits + vote_log_prob # [B, n_points] winning_vote_idx = tf.argmax( posterior_mixing_logits_per_point[:, :, :-1], 2) batch_idx = tf.expand_dims(tf.range(batch_size, dtype=tf.int64), -1) batch_idx = snt.TileByDim([1], [winning_vote_idx.shape[-1]])(batch_idx) idx = tf.stack([batch_idx, winning_vote_idx], -1) winning_vote = tf.gather_nd(self._votes, idx) winning_pres = tf.gather_nd(self._vote_presence_prob, idx) vote_presence = tf.greater(mixing_logits[:, :-1], mixing_logits[:, -1:]) # the first four votes belong to the square is_from_capsule = winning_vote_idx // self._n_votes posterior_mixing_probs = tf.nn.softmax( posterior_mixing_logits_per_point, -1)[Ellipsis, :-1] assert winning_vote.shape == x.shape return self.OutputTuple( log_prob=mixture_log_prob_per_batch, vote_presence=tf.to_float(vote_presence), winner=winning_vote, winner_pres=winning_pres, is_from_capsule=is_from_capsule, mixing_logits=mixing_logits, mixing_log_prob=mixing_log_prob, # TODO(adamrk): this is broken soft_winner=tf.zeros_like(winning_vote), soft_winner_pres=tf.zeros_like(winning_pres), posterior_mixing_probs=posterior_mixing_probs, )
def run( self, *in_arrays: Tuple[Union[np.ndarray, None], ...], input_transform: dict = None, output_transform: dict = None, return_as_list: bool = False, print_progress: bool = False, minibatch_size: int = None, num_gpus: int = 1, assume_frozen: bool = False, **dynamic_kwargs ) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]: """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s). Args: input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network. The dict must contain a 'func' field that points to a top-level function. The function is called with the input TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network. The dict must contain a 'func' field that points to a top-level function. The function is called with the output TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs. print_progress: Print progress to the console? Useful for very large input arrays. minibatch_size: Maximum minibatch size to use, None = disable batching. num_gpus: Number of GPUs to use. assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls. dynamic_kwargs: Additional keyword arguments to be passed into the network build function. """ assert len(in_arrays) == self.num_inputs assert not all(arr is None for arr in in_arrays) assert input_transform is None or util.is_top_level_function( input_transform["func"]) assert output_transform is None or util.is_top_level_function( output_transform["func"]) output_transform, dynamic_kwargs = _handle_legacy_output_transforms( output_transform, dynamic_kwargs) num_items = in_arrays[0].shape[0] if minibatch_size is None: minibatch_size = num_items # Construct unique hash key from all arguments that affect the TensorFlow graph. key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs) def unwind_key(obj): if isinstance(obj, dict): return [(key, unwind_key(value)) for key, value in sorted(obj.items())] if callable(obj): return util.get_top_level_function_name(obj) return obj key = repr(unwind_key(key)) # Build graph. if key not in self._run_cache: with tfutil.absolute_name_scope( self.scope + "/_Run"), tf.control_dependencies(None): with tf.device("/cpu:0"): in_expr = [ tf.placeholder(tf.float32, name=name) for name in self.input_names ] in_split = list( zip(*[tf.split(x, num_gpus) for x in in_expr])) out_split = [] for gpu in range(num_gpus): with tf.device("/gpu:%d" % gpu): net_gpu = self.clone() if assume_frozen else self in_gpu = in_split[gpu] if input_transform is not None: in_kwargs = dict(input_transform) in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs) in_gpu = [in_gpu] if tfutil.is_tf_expression( in_gpu) else list(in_gpu) assert len(in_gpu) == self.num_inputs out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs) if output_transform is not None: out_kwargs = dict(output_transform) out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs) out_gpu = [out_gpu] if tfutil.is_tf_expression( out_gpu) else list(out_gpu) assert len(out_gpu) == self.num_outputs out_split.append(out_gpu) with tf.device("/cpu:0"): out_expr = [ tf.concat(outputs, axis=0) for outputs in zip(*out_split) ] self._run_cache[key] = in_expr, out_expr # Run minibatches. in_expr, out_expr = self._run_cache[key] out_arrays = [ np.empty([num_items] + tfutil.shape_to_list(expr.shape)[1:], expr.dtype.name) for expr in out_expr ] for mb_begin in range(0, num_items, minibatch_size): if print_progress: print("\r%d / %d" % (mb_begin, num_items), end="") mb_end = min(mb_begin + minibatch_size, num_items) mb_num = mb_end - mb_begin mb_in = [ src[mb_begin:mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes) ] mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in))) for dst, src in zip(out_arrays, mb_out): dst[mb_begin:mb_end] = src # Done. if print_progress: print("\r%d / %d" % (num_items, num_items)) if not return_as_list: out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple( out_arrays) return out_arrays
def _stitch(features): """Stitch features on the first dimension.""" full_mask = tf.greater(features['task'], 1) step_mask = tf.reduce_any(full_mask, axis=-1) step_mask_exclude_last = tf.pad(step_mask, [[0, 0], [0, 1]], constant_values=False)[:, 1:] num_sequences = common_layers.shape_list(features['task'])[0] num_steps = common_layers.shape_list(features['task'])[1] connectors = tf.constant(PADDED_CONCATENATORS) # Select connectors connector_indices = tf.random.uniform( [num_sequences * num_steps], minval=0, maxval=len(PADDED_CONCATENATORS), dtype=tf.int32) selected_connectors = tf.reshape( tf.gather(connectors, connector_indices), [num_sequences, num_steps, len(PADDED_CONCATENATORS[0])]) selected_connectors = tf.multiply( selected_connectors, tf.expand_dims(tf.to_int32(step_mask_exclude_last), 2), name='connector_mask') features['task'] = tf.concat([features['task'], selected_connectors], axis=-1) ref_offsets = tf.expand_dims( tf.cumsum(tf.reduce_sum(tf.to_int32(tf.greater(features['task'], 1)), -1), exclusive=True, axis=-1), 2) features['task'] = tf.reshape(features['task'], [num_sequences, -1]) full_mask = tf.greater(features['task'], 1) full_mask_int = tf.to_int32(full_mask) indices = tf.where(tf.sequence_mask(lengths=tf.reduce_sum(full_mask_int, -1))) values = tf.boolean_mask(tf.reshape(features['task'], [-1]), tf.reshape(full_mask, [-1])) sparse_task = tf.sparse.SparseTensor( indices=indices, values=values, dense_shape=tf.to_int64(tf.shape(features['task']))) # Stitch task and raw_task stitched_features = {} stitched_features['task'] = tf.sparse_tensor_to_dense(sparse_task) max_len = tf.reduce_max( tf.reduce_sum(tf.to_int32(tf.greater(stitched_features['task'], 1)), -1)) stitched_features['task'] = stitched_features['task'][:, :max_len] if 'raw_task' in features: connector_strs = tf.reshape( tf.gather(tf.constant(CONCATENATORS_STR), connector_indices), [num_sequences, num_steps]) masked_connector_strs = tf.where( step_mask_exclude_last, connector_strs, tf.fill(tf.shape(connector_strs), '')) stitched_features['raw_task'] = tf.strings.reduce_join( tf.strings.reduce_join(tf.concat([ tf.expand_dims(features['raw_task'], 2), tf.expand_dims(masked_connector_strs, 2)], axis=2), axis=-1), -1) # Stitch screen sequences action_lengths = tf.reduce_sum(tf.to_int32( tf.greater(features['verb_refs'][:, :, 0, 1], features['verb_refs'][:, :, 0, 0])), -1) max_action_length = tf.reduce_max(action_lengths) def _pad(tensor, padding_value=0): shape_list = common_layers.shape_list(tensor) assert len(shape_list) >= 2 padding_list = [[0, 0], [0, 1]] + [[0, 0]] * (len(shape_list) - 2) return tf.pad(tensor[:, :max_action_length], padding_list, constant_values=padding_value) for key in features.keys(): if key.endswith('_refs'): features[key] = tf.squeeze(features[key], 2) ref_mask = tf.expand_dims(tf.to_int32( tf.not_equal(features[key][:, :, 0], features[key][:, :, 1])), 2) stitched_features[key] = tf.multiply( (features[key] + ref_offsets), ref_mask, name='ref_mask') stitched_features[key] = _pad(stitched_features[key]) elif key in ['verbs', 'objects', 'consumed', 'obj_dom_pos', 'obj_text', 'obj_type', 'obj_clickable', 'obj_screen_pos', 'verb_refs', 'obj_refs', 'input_refs', 'obj_dom_dist']: features[key] = tf.squeeze(features[key], 2) stitched_features[key] = features[key] stitched_features[key] = _pad( stitched_features[key], padding_value=-1 if key == 'obj_type' else 0) elif key not in ['task', 'raw_task']: stitched_features[key] = features[key][:, 0] # Append eos to 'task' stitched_features['task'] = tf.pad(stitched_features['task'], [[0, 0], [0, 1]]) task_mask = tf.to_int32(tf.greater(stitched_features['task'], 1)) task_eos_mask = tf.pad(task_mask, [[0, 0], [1, 0]], constant_values=1)[:, :-1] stitched_features['task'] = stitched_features['task'] + ( task_eos_mask - task_mask) # Append eos verb_mask = tf.to_int32(tf.greater(stitched_features['verbs'], 1)) verb_eos_mask = tf.pad(verb_mask, [[0, 0], [1, 0]], constant_values=1)[:, :-1] verb_eos = verb_eos_mask - verb_mask stitched_features['verbs'] = stitched_features['verbs'] + verb_eos # Append last step refs to 'verb_refs' task_lengths = tf.where(tf.equal(stitched_features['task'], 1))[:, 1] eos_pos = tf.to_int32(tf.stack([task_lengths, task_lengths + 1], axis=1)) action_mask = tf.to_int32( tf.sequence_mask(action_lengths, max_action_length + 1)) action_and_eos_mask = tf.pad(action_mask, [[0, 0], [1, 0]], constant_values=1)[:, :-1] verb_ref_eos = action_and_eos_mask - action_mask eos_refs = tf.multiply( tf.tile(tf.expand_dims(eos_pos, 1), [1, max_action_length + 1, 1]), tf.expand_dims(verb_ref_eos, 2), name='verb_ref_eos') stitched_features['verb_refs'] += eos_refs return stitched_features
# Nt = 10 t_np = np.linspace(0, 1, N) X, T = np.meshgrid(x_np, t_np) x = X.ravel() t = T.ravel() ## The construction phase zeros = tf.reshape(tf.convert_to_tensor(np.zeros(x.shape)), shape=(-1, 1)) x = tf.reshape(tf.convert_to_tensor(x), shape=(-1, 1)) t = tf.reshape(tf.convert_to_tensor(t), shape=(-1, 1)) points = tf.concat([x, t], 1) num_iter = 10000 num_hidden_neurons = [20, 20, 20] X = tf.convert_to_tensor(X) T = tf.convert_to_tensor(T) with tf.variable_scope('dnn'): num_hidden_layers = np.size(num_hidden_neurons) previous_layer = points for l in range(num_hidden_layers): current_layer = tf.layers.dense(previous_layer, num_hidden_neurons[l],
def _make_obj_screen_pos(): return tf.concat([ tf.reshape(feature_dict['ui_obj_cord_x_seq'], [1, -1, 2]), tf.reshape(feature_dict['ui_obj_cord_y_seq'], [1, -1, 2]) ], 2)
def inception_v3(inputs, dropout_keep_prob=0.8, num_classes=1000, is_training=True, restore_logits=True, scope=''): """Latest Inception from http://arxiv.org/abs/1512.00567. "Rethinking the Inception Architecture for Computer Vision" Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna Args: inputs: a tensor of size [batch_size, height, width, channels]. dropout_keep_prob: dropout keep_prob. num_classes: number of predicted classes. is_training: whether is training or not. restore_logits: whether or not the logits layers should be restored. Useful for fine-tuning a model with different num_classes. scope: Optional scope for name_scope. Returns: a list containing 'logits', 'aux_logits' Tensors. """ # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} with tf.name_scope(scope, 'inception_v3', [inputs]): with scopes.arg_scope( [ops.conv2d, ops.fc, ops.batch_norm, ops.dropout], is_training=is_training): with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], stride=1, padding='VALID'): # 299 x 299 x 3 end_points['conv0'] = ops.conv2d(inputs, 32, [3, 3], stride=2, scope='conv0') # 149 x 149 x 32 end_points['conv1'] = ops.conv2d(end_points['conv0'], 32, [3, 3], scope='conv1') # 147 x 147 x 32 end_points['conv2'] = ops.conv2d(end_points['conv1'], 64, [3, 3], padding='SAME', scope='conv2') # 147 x 147 x 64 end_points['pool1'] = ops.max_pool(end_points['conv2'], [3, 3], stride=2, scope='pool1') # 73 x 73 x 64 end_points['conv3'] = ops.conv2d(end_points['pool1'], 80, [1, 1], scope='conv3') # 73 x 73 x 80. end_points['conv4'] = ops.conv2d(end_points['conv3'], 192, [3, 3], scope='conv4') # 71 x 71 x 192. end_points['pool2'] = ops.max_pool(end_points['conv4'], [3, 3], stride=2, scope='pool2') # 35 x 35 x 192. net = end_points['pool2'] # Inception blocks with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], stride=1, padding='SAME'): # mixed: 35 x 35 x 256. with tf.variable_scope('mixed_35x35x256a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 32, [1, 1]) net = tf.concat( [branch1x1, branch5x5, branch3x3dbl, branch_pool], 3) end_points['mixed_35x35x256a'] = net # mixed_1: 35 x 35 x 288. with tf.variable_scope('mixed_35x35x288a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 64, [1, 1]) net = tf.concat( [branch1x1, branch5x5, branch3x3dbl, branch_pool], 3) end_points['mixed_35x35x288a'] = net # mixed_2: 35 x 35 x 288. with tf.variable_scope('mixed_35x35x288b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 64, [1, 1]) with tf.variable_scope('branch5x5'): branch5x5 = ops.conv2d(net, 48, [1, 1]) branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 64, [1, 1]) net = tf.concat( [branch1x1, branch5x5, branch3x3dbl, branch_pool], 3) end_points['mixed_35x35x288b'] = net # mixed_3: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768a'): with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 64, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch_pool'): branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID') net = tf.concat([branch3x3, branch3x3dbl, branch_pool], 3) end_points['mixed_17x17x768a'] = net # mixed4: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 128, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 128, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 128, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3) end_points['mixed_17x17x768b'] = net # mixed_5: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768c'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 160, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 160, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 160, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3) end_points['mixed_17x17x768c'] = net # mixed_6: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768d'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 160, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 160, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 160, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3) end_points['mixed_17x17x768d'] = net # mixed_7: 17 x 17 x 768. with tf.variable_scope('mixed_17x17x768e'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 192, [1, 1]) with tf.variable_scope('branch7x7'): branch7x7 = ops.conv2d(net, 192, [1, 1]) branch7x7 = ops.conv2d(branch7x7, 192, [1, 7]) branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) with tf.variable_scope('branch7x7dbl'): branch7x7dbl = ops.conv2d(net, 192, [1, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1]) branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3) end_points['mixed_17x17x768e'] = net # Auxiliary Head logits aux_logits = tf.identity(end_points['mixed_17x17x768e']) with tf.variable_scope('aux_logits'): aux_logits = ops.avg_pool(aux_logits, [5, 5], stride=3, padding='VALID') aux_logits = ops.conv2d(aux_logits, 128, [1, 1], scope='proj') # Shape of feature map before the final layer. shape = aux_logits.get_shape() aux_logits = ops.conv2d(aux_logits, 768, shape[1:3], stddev=0.01, padding='VALID') aux_logits = ops.flatten(aux_logits) aux_logits = ops.fc(aux_logits, num_classes, activation=None, stddev=0.001, restore=restore_logits) end_points['aux_logits'] = aux_logits # mixed_8: 8 x 8 x 1280. # Note that the scope below is not changed to not void previous # checkpoints. # (TODO) Fix the scope when appropriate. with tf.variable_scope('mixed_17x17x1280a'): with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 192, [1, 1]) branch3x3 = ops.conv2d(branch3x3, 320, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch7x7x3'): branch7x7x3 = ops.conv2d(net, 192, [1, 1]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [1, 7]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [7, 1]) branch7x7x3 = ops.conv2d(branch7x7x3, 192, [3, 3], stride=2, padding='VALID') with tf.variable_scope('branch_pool'): branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID') net = tf.concat([branch3x3, branch7x7x3, branch_pool], 3) end_points['mixed_17x17x1280a'] = net # mixed_9: 8 x 8 x 2048. with tf.variable_scope('mixed_8x8x2048a'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 320, [1, 1]) with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [1, 1]) branch3x3 = tf.concat([ ops.conv2d(branch3x3, 384, [1, 3]), ops.conv2d(branch3x3, 384, [3, 1]) ], 3) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 448, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3]) branch3x3dbl = tf.concat([ ops.conv2d(branch3x3dbl, 384, [1, 3]), ops.conv2d(branch3x3dbl, 384, [3, 1]) ], 3) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( [branch1x1, branch3x3, branch3x3dbl, branch_pool], 3) end_points['mixed_8x8x2048a'] = net # mixed_10: 8 x 8 x 2048. with tf.variable_scope('mixed_8x8x2048b'): with tf.variable_scope('branch1x1'): branch1x1 = ops.conv2d(net, 320, [1, 1]) with tf.variable_scope('branch3x3'): branch3x3 = ops.conv2d(net, 384, [1, 1]) branch3x3 = tf.concat([ ops.conv2d(branch3x3, 384, [1, 3]), ops.conv2d(branch3x3, 384, [3, 1]) ], 3) with tf.variable_scope('branch3x3dbl'): branch3x3dbl = ops.conv2d(net, 448, [1, 1]) branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3]) branch3x3dbl = tf.concat([ ops.conv2d(branch3x3dbl, 384, [1, 3]), ops.conv2d(branch3x3dbl, 384, [3, 1]) ], 3) with tf.variable_scope('branch_pool'): branch_pool = ops.avg_pool(net, [3, 3]) branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) net = tf.concat( [branch1x1, branch3x3, branch3x3dbl, branch_pool], 3) end_points['mixed_8x8x2048b'] = net # Final pooling and prediction with tf.variable_scope('logits'): shape = net.get_shape() net = ops.avg_pool(net, shape[1:3], padding='VALID', scope='pool') # 1 x 1 x 2048 net = ops.dropout(net, dropout_keep_prob, scope='dropout') net = ops.flatten(net, scope='flatten') # 2048 logits = ops.fc(net, num_classes, activation=None, scope='logits', restore=restore_logits) # 1000 end_points['logits'] = logits end_points['predictions'] = tf.nn.softmax( logits, name='predictions') return logits, end_points
def _process_pixel_help(feature_dict, data_source, load_dom_dist=False, load_extra=False): """Processes testing data feature dictionary. Args: feature_dict: feature dictionary data_source: TEST_PIXEL_HELP load_dom_dist: whether to load the dom distance feature. load_extra: whether to load the extra data for debugging. Returns: A processed feature dictionary. """ step_num = tf.size(feature_dict['verb_id_seq']) feature = { 'task': tf.reshape(feature_dict['instruction_word_id_seq'], [-1]), 'obj_text': tf.reshape(feature_dict['ui_obj_word_id_seq'], [ step_num, MAX_UI_OBJECT_NUM[data_source], MAX_TOKEN_NUM[data_source] ]), 'obj_type': tf.reshape(feature_dict['ui_obj_type_id_seq'], [step_num, MAX_UI_OBJECT_NUM[data_source]]), 'obj_clickable': tf.reshape(feature_dict['ui_obj_clickable_seq'], [step_num, MAX_UI_OBJECT_NUM[data_source]]), # pylint: disable=g-long-ternary 'obj_screen_pos': ( tf.reshape(tf.concat([ tf.reshape(feature_dict['ui_obj_cord_x_seq'], [step_num, -1, 2]), tf.reshape(feature_dict['ui_obj_cord_y_seq'], [step_num, -1, 2]) ], axis=2), [step_num, MAX_UI_OBJECT_NUM[data_source], 4])), 'obj_dom_pos': tf.reshape(feature_dict['ui_obj_dom_location_seq'], [step_num, MAX_UI_OBJECT_NUM[data_source], 3]), 'verbs': tf.reshape(feature_dict['verb_id_seq'], [step_num]), 'objects': tf.reshape(feature_dict['ui_target_id_seq'], [step_num]), 'input_refs': tf.reshape(feature_dict['input_str_position_seq'], [step_num, 2]), 'obj_refs': tf.reshape(feature_dict['obj_desc_position_seq'], [step_num, 2]), 'verb_refs': # No data for Pixel on the field tf.zeros([step_num, 2], tf.int32), 'agreement_count': tf.constant(100, dtype=tf.int32), } if load_dom_dist: feature['obj_dom_dist'] = tf.reshape( feature_dict['ui_obj_dom_distance'], [step_num, MAX_UI_OBJECT_NUM[data_source], MAX_UI_OBJECT_NUM[data_source]]) feature['rule'] = tf.constant(5, dtype=tf.int32) if load_extra: feature['task_id'] = tf.reshape(feature_dict['task_id'], []) feature['raw_task'] = tf.reshape(feature_dict['instruction_str'], []) feature['obj_raw_text'] = tf.reshape( feature_dict['ui_obj_str_seq'], [step_num, MAX_UI_OBJECT_NUM[data_source]]) feature['data_source'] = tf.constant(2, dtype=tf.int32) return feature
def crop_proposal(): rand_vec = lambda minval, maxval: tf.random_uniform(shape=( ssd_constants.NUM_CROP_PASSES, 1), minval=minval, maxval=maxval, dtype=tf.float32) width, height = rand_vec(0.3, 1), rand_vec(0.3, 1) left, top = rand_vec(0, 1 - width), rand_vec(0, 1 - height) right = left + width bottom = top + height ltrb = tf.concat([left, top, right, bottom], axis=1) min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0] ious = calc_iou_tensor(ltrb, boxes) # discard any bboxes whose center not in the cropped image xc, yc = [ tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :], (ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2) ] masks = tf.reduce_all(tf.stack([ tf.greater(xc, tf.tile(left, (1, num_boxes))), tf.less(xc, tf.tile(right, (1, num_boxes))), tf.greater(yc, tf.tile(top, (1, num_boxes))), tf.less(yc, tf.tile(bottom, (1, num_boxes))), ], axis=2), axis=2) # Checks of whether a crop is valid. valid_aspect = tf.logical_and(tf.less(height / width, 2), tf.less(width / height, 2)) valid_ious = tf.reduce_all(tf.greater(ious, min_iou), axis=1, keepdims=True) valid_masks = tf.reduce_any(masks, axis=1, keepdims=True) valid_all = tf.cast( tf.reduce_all(tf.concat([valid_aspect, valid_ious, valid_masks], axis=1), axis=1), tf.int32) # One indexed, as zero is needed for the case of no matches. index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32) # Either one-hot, or zeros if there is no valid crop. selection = tf.equal(tf.reduce_max(index * valid_all), index) use_crop = tf.reduce_any(selection) output_ltrb = tf.reduce_sum(tf.multiply( ltrb, tf.tile(tf.cast(selection, tf.float32)[:, tf.newaxis], (1, 4))), axis=0) output_masks = tf.reduce_any(tf.logical_and( masks, tf.tile(selection[:, tf.newaxis], (1, num_boxes))), axis=0) return use_crop, output_ltrb, output_masks
def multilevel_roi_align(features, boxes, box_levels, output_size, num_samples_per_cell_y=1, num_samples_per_cell_x=1, align_corners=False, extrapolation_value=0.0, scope=None): """Applies RoI Align op and returns feature for boxes. Given multiple features maps indexed by different levels, and a set of boxes where each box is mapped to a certain level, this function selectively crops and resizes boxes from the corresponding feature maps. We follow the RoI Align technique in https://arxiv.org/pdf/1703.06870.pdf figure 3. Specifically, each box is subdivided uniformly into a grid consisting of output_size[0] x output_size[1] rectangular cells. Within each cell we select `num_points` points uniformly and compute feature values using bilinear interpolation. Finally, we average pool the interpolated values in each cell to obtain a [output_size[0], output_size[1], channels] feature. If `align_corners` is true, sampling points are uniformly spread such that corner points exactly overlap corners of the boxes. In this function we also follow the convention of treating feature pixels as point objects with no spatial extent. Args: features: A list of 4D float tensors of shape [batch_size, max_height, max_width, channels] containing features. Note that each feature map must have the same number of channels. boxes: A 3D float tensor of shape [batch_size, num_boxes, 4] containing boxes of the form [ymin, xmin, ymax, xmax] in normalized coordinates. box_levels: A 3D int32 tensor of shape [batch_size, num_boxes] representing the feature level index for each box. output_size: An list of two integers [size_y, size_x] indicating the output feature size for each box. num_samples_per_cell_y: Number of grid points to sample along y axis in each cell. num_samples_per_cell_x: Number of grid points to sample along x axis in each cell. align_corners: Whether to align the corner grid points exactly with box corners. extrapolation_value: a float value to use for extrapolation. scope: Scope name to use for this op. Returns: A 5D float tensor of shape [batch_size, num_boxes, output_size[0], output_size[1], channels] representing the cropped features. """ with tf.name_scope(scope, 'MultiLevelRoIAlign'): features, true_feature_shapes = pad_to_max_size(features) batch_size = shape_utils.combined_static_and_dynamic_shape(features)[0] num_levels = features.get_shape().as_list()[1] max_feature_height = tf.shape(features)[2] max_feature_width = tf.shape(features)[3] num_filters = features.get_shape().as_list()[4] num_boxes = tf.shape(boxes)[1] # Convert boxes to absolute co-ordinates. true_feature_shapes = tf.cast(true_feature_shapes, dtype=boxes.dtype) true_feature_shapes = tf.gather(true_feature_shapes, box_levels) boxes *= tf.concat([true_feature_shapes - 1] * 2, axis=-1) size_y = output_size[0] * num_samples_per_cell_y size_x = output_size[1] * num_samples_per_cell_x box_grid_y, box_grid_x = box_grid_coordinate_vectors( boxes, size_y=size_y, size_x=size_x, align_corners=align_corners) (feature_grid_y0, feature_grid_x0, feature_grid_y1, feature_grid_x1) = feature_grid_coordinate_vectors( box_grid_y, box_grid_x) feature_grid_y = tf.reshape( tf.stack([feature_grid_y0, feature_grid_y1], axis=3), [batch_size, num_boxes, -1]) feature_grid_x = tf.reshape( tf.stack([feature_grid_x0, feature_grid_x1], axis=3), [batch_size, num_boxes, -1]) feature_coordinates = ravel_indices(feature_grid_y, feature_grid_x, num_levels, max_feature_height, max_feature_width, box_levels) valid_indices = _valid_indicator(feature_grid_y, feature_grid_x, true_feature_shapes) feature_coordinates = tf.where(valid_indices, feature_coordinates, -1 * tf.ones_like(feature_coordinates)) flattened_features = tf.reshape(features, [-1, num_filters]) flattened_feature_values = _gather_valid_indices( flattened_features, feature_coordinates, extrapolation_value) features_per_box = tf.reshape( flattened_feature_values, [batch_size, num_boxes, size_y * 2, size_x * 2, num_filters]) # Cast tensors into dtype of features. box_grid_y = tf.cast(box_grid_y, dtype=features_per_box.dtype) box_grid_x = tf.cast(box_grid_x, dtype=features_per_box.dtype) feature_grid_y0 = tf.cast(feature_grid_y0, dtype=features_per_box.dtype) feature_grid_x0 = tf.cast(feature_grid_x0, dtype=features_per_box.dtype) # RoI Align operation is a bilinear interpolation of four # neighboring feature points f0, f1, f2, and f3 onto point y, x given by # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T # [f10, f11]] # # Unrolling the matrix multiplies gives us: # f(y, x) = (hy * hx) f00 + (hy * lx) f01 + (ly * hx) f10 + (lx * ly) f11 # f(y, x) = w00 * f00 + w01 * f01 + w10 * f10 + w11 * f11 # # This can be computed by applying pointwise multiplication and sum_pool in # a 2x2 window. ly = box_grid_y - feature_grid_y0 lx = box_grid_x - feature_grid_x0 hy = 1.0 - ly hx = 1.0 - lx kernel_y = tf.reshape(tf.stack([hy, ly], axis=3), [batch_size, num_boxes, size_y * 2, 1]) kernel_x = tf.reshape(tf.stack([hx, lx], axis=3), [batch_size, num_boxes, 1, size_x * 2]) # Multiplier 4 is to make tf.nn.avg_pool behave like sum_pool. interpolation_kernel = kernel_y * kernel_x * 4 # Interpolate the gathered features with computed interpolation kernels. features_per_box *= tf.expand_dims(interpolation_kernel, axis=4), features_per_box = tf.reshape( features_per_box, [batch_size * num_boxes, size_y * 2, size_x * 2, num_filters]) # This combines the two pooling operations - sum_pool to perform bilinear # interpolation and avg_pool to pool the values in each bin. features_per_box = tf.nn.avg_pool( features_per_box, [1, num_samples_per_cell_y * 2, num_samples_per_cell_x * 2, 1], [1, num_samples_per_cell_y * 2, num_samples_per_cell_x * 2, 1], 'VALID') features_per_box = tf.reshape(features_per_box, [ batch_size, num_boxes, output_size[0], output_size[1], num_filters ]) return features_per_box
def _get_action_logits(encoder_output, decoder_output, output_vocab_embeddings_table, output_vocab_size, model_config, input_copy_mask=None, use_gating_mechanism=True): """Generate output logits given decoder output. This effectively combines a Pointer Network (Vinyals et al., 2015) with a standard softmax output layer for selecting symbols from an output vocabulary, similar to: - Jia and Liang, 2016 (https://arxiv.org/abs/1606.03622) - Gulcehre et al., 2016 (https://arxiv.org/abs/1603.08148) - Gu et al., 2016 (https://arxiv.org/abs/1603.06393) - See et al. 2017 (https://arxiv.org/abs/1704.04368) Args: encoder_output: Tensor representing encoder output of shape (batch size, input length, encoder dims). decoder_output: Tensor representing decoder output of shape (batch size, # decoded steps, decoder dims). output_vocab_embeddings_table: Embeddings for output vocabulary of shape (output_vocab_size, target embedding dims). output_vocab_size: Integer size of output_vocab_embeddings_table outer dim. model_config: ModelConfig proto. input_copy_mask: Mask of the input sequence for copying. use_gating_mechanism: Whether to use gating mechanism. Returns: Tensor of shape (batch_size, output_vocab_size + input length) representing unnormalized logits for both copy and generate actions. """ with tf.variable_scope("logits_transforms"): decoder_dims = decoder_output.get_shape()[-1] target_embedding_dims = model_config.model_parameters.target_embedding_dims # Dot product the decoder output with representations of each of the output # symbols to get a set of unnormalized logits for each output vocab item. # We need to tile the output vocab embeddings across the batch. output_vocab_transform = tf.expand_dims(output_vocab_embeddings_table, 0) batch_size = tf.shape(decoder_output)[0] output_vocab_transform = tf.tile(output_vocab_transform, [batch_size, 1, 1]) # Transform representations to the target_embedding_dims. if decoder_dims != target_embedding_dims: transformed_decoder_output = common_layers.linear_transform( decoder_output, target_embedding_dims, "decoder_transform") else: transformed_decoder_output = decoder_output generate_logits = tf.matmul(transformed_decoder_output, output_vocab_transform, transpose_b=True) generate_logits_bias = tf.get_variable("generate_logits_bias", shape=(output_vocab_size)) generate_logits += generate_logits_bias # Dot product the decoder output with representations from the encoder # output. # This is necessary vs. re-using the encoder-decoder attention weights # because those use multihead attention. # First, need to transform representations to the decoder dimensions. transformed_encoder_output = common_layers.linear_transform( encoder_output, decoder_dims, "encoder_transform") copy_logits = tf.matmul(decoder_output, transformed_encoder_output, transpose_b=True) # This contains scores representing the probability of copying from input # (3rd dim) to output (2nd dim). # Optionally apply a soft gating mechanism to determine whether # to select from copy or generate logits. # TODO(petershaw): Evaluate and improve this gating mechanism. # The current implementation is most likely not optimal, since it applies # a scalar in the range [0,1] prior to softmax. if use_gating_mechanism: prob_gen_unnormalized = common_layers.linear_transform( decoder_output, 1, "prob_gen") prob_gen_bias = tf.get_variable("prob_gen_bias", shape=(1)) prob_gen_unnormalized += prob_gen_bias prob_gen = tf.sigmoid(prob_gen_unnormalized) # Squeeze so that prob_gen has shape [batch_size, decode_length] prob_gen = tf.squeeze(prob_gen, axis=2) # These are the 'generate' logits so are scaled by P_gen. generate_logits *= tf.expand_dims(prob_gen, axis=-1) # These are the 'copy' logits so are scaled by 1 - P_gen. copy_logits *= tf.expand_dims(1 - prob_gen, axis=-1) if input_copy_mask is not None: copy_mask = (1 - tf.dtypes.cast( input_copy_mask, dtype=tf.dtypes.float32)) * LOGIT_MASK_VALUE copy_logits += tf.expand_dims(copy_mask, axis=1) # Concatenate logits into a single vector; first N (fixed) inputs are the # generation probabilities, and next are the copy probabilities for each # input (well, they aren't really probabilities, but scores.) extended_logits = tf.concat([generate_logits, copy_logits], axis=2) return extended_logits
def test(first, second, out): data_frame1 = np.expand_dims(imread(first), 0) data_frame3 = np.expand_dims(imread(second), 0) H = data_frame1.shape[1] W = data_frame1.shape[2] adatptive_H = int(np.ceil(H / 32.0) * 32.0) adatptive_W = int(np.ceil(W / 32.0) * 32.0) pad_up = int(np.ceil((adatptive_H - H) / 2.0)) pad_bot = int(np.floor((adatptive_H - H) / 2.0)) pad_left = int(np.ceil((adatptive_W - W) / 2.0)) pad_right = int(np.floor((adatptive_W - W) / 2.0)) print(str(H) + ', ' + str(W)) print(str(adatptive_H) + ', ' + str(adatptive_W)) """Perform test on a trained model.""" with tf.Graph().as_default(): # Create input and target placeholder. input_placeholder = tf.placeholder(tf.float32, shape=(None, H, W, 2)) input_pad = tf.pad( input_placeholder, [[0, 0], [pad_up, pad_bot], [pad_left, pad_right], [0, 0]], 'SYMMETRIC') edge_vgg_1 = Vgg16(input_pad[:, :, :, :1], reuse=None) edge_vgg_3 = Vgg16(input_pad[:, :, :, 1:2], reuse=True) edge_1 = tf.nn.sigmoid(edge_vgg_1.fuse) edge_3 = tf.nn.sigmoid(edge_vgg_3.fuse) edge_1 = tf.reshape(edge_1, [ -1, input_pad.get_shape().as_list()[1], input_pad.get_shape().as_list()[2], 1 ]) edge_3 = tf.reshape(edge_3, [ -1, input_pad.get_shape().as_list()[1], input_pad.get_shape().as_list()[2], 1 ]) with tf.variable_scope("Cycle_DVF"): # Prepare model. model = Voxel_flow_model(is_train=False) prediction = model.inference( tf.concat([input_pad, edge_1, edge_3], 3))[0] # Create a saver and load. sess = tf.Session() # Restore checkpoint from file. if FLAGS.pretrained_model_checkpoint_path: restorer = tf.train.Saver() restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path) print('%s: Pre-trained model restored from %s' % (datetime.now(), FLAGS.pretrained_model_checkpoint_path)) feed_dict = { input_placeholder: np.concatenate((data_frame1, data_frame3), 3) } # Run single step update. prediction_np = sess.run(prediction, feed_dict=feed_dict) output = prediction_np[-1, pad_up:adatptive_H - pad_bot, pad_left:adatptive_W - pad_right, :] output = np.round(((output + 0.5) * 255.0)).astype(np.uint8) #output = np.dstack((output[:, :, 2], output[:, :, 1], output[:, :, 0])) cv2.imwrite(out, output)
def get_model(point_cloud, cls_label, is_training, bn_decay=None): """ Classification PointNet, input is BxNx3, output Bx40 """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} l0_xyz = tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3]) l0_points = tf.slice(point_cloud, [0, 0, 3], [-1, -1, 3]) # Set abstraction layers l1_xyz, l1_points = pointnet_sa_module_msg( l0_xyz, l0_points, 512, [0.1, 0.2, 0.4], [32, 64, 128], [[32, 32, 64], [64, 64, 128], [64, 96, 128]], is_training, bn_decay, scope='layer1') l2_xyz, l2_points = pointnet_sa_module_msg( l1_xyz, l1_points, 128, [0.4, 0.8], [64, 128], [[128, 128, 256], [128, 196, 256]], is_training, bn_decay, scope='layer2') l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=None, radius=None, nsample=None, mlp=[256, 512, 1024], mlp2=None, group_all=True, is_training=is_training, bn_decay=bn_decay, scope='layer3') # Feature propagation layers l2_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256, 256], is_training, bn_decay, scope='fa_layer1') l1_points = pointnet_fp_module(l1_xyz, l2_xyz, l1_points, l2_points, [256, 128], is_training, bn_decay, scope='fa_layer2') cls_label_one_hot = tf.one_hot(cls_label, depth=NUM_CATEGORIES, on_value=1.0, off_value=0.0) cls_label_one_hot = tf.reshape(cls_label_one_hot, [batch_size, 1, NUM_CATEGORIES]) cls_label_one_hot = tf.tile(cls_label_one_hot, [1, num_point, 1]) l0_points = pointnet_fp_module(l0_xyz, l1_xyz, tf.concat( [cls_label_one_hot, l0_xyz, l0_points], axis=-1), l1_points, [128, 128], is_training, bn_decay, scope='fp_layer3') # FC layers net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay) end_points['feats'] = net net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1') net = tf_util.conv1d(net, 50, 1, padding='VALID', activation_fn=None, scope='fc2') return net, end_points
def multihead_attention(queries, keys, times=None, num_units=None, num_heads=1, dropout_rate=0, is_training=True, use_prior="none", causality=True, scope="multihead_attention", residual=False, time_exp_base=None, overlapping_chunks=None, reuse=None, with_qk=False): """Applies multihead attention. Args: queries: A 3d tensor with shape of [N, T_q, C_q]. keys: A 3d tensor with shape of [N, T_k, C_k]. times: A 3d tensor with shape of [N, T_q, T_k]. num_units: A scalar. Attention size. num_heads: An int. Number of heads. dropout_rate: A floating point number. is_training: Boolean. Controller of mechanism for dropout. use_prior: String. Whether to use prior for attention heads. Supported values include: none, position. causality: Boolean. If true, units that reference the future are masked. scope: Optional scope for `variable_scope`. residual: Boolean. Whether to use residual connection. time_exp_base: A scalar. Base for exponential time intervals. Only used for the case where use_prior='time'. overlapping_chunks: Boolean. Whether to use (non)/overlapping chunks for the case where use_prior='time'. reuse: Boolean, whether to reuse the weights of a previous layer by the same name. Returns A 3d tensor with shape of (N, T_q, C) with_qk: Whether to use qk. Returns: Output of multihead attention. """ tf.logging.info( "Computing attention with prior: {} and num of heads: {}".format( use_prior, num_heads)) with tf.variable_scope(scope, reuse=reuse): # Set the fall back option for num_units if num_units is None: num_units = queries.get_shape().as_list[-1] # pylint: disable=invalid-name # Linear projections # Q = tf.layers.dense(queries, num_units, activation=tf.nn.relu) # K = tf.layers.dense(keys, num_units, activation=tf.nn.relu) # V = tf.layers.dense(keys, num_units, activation=tf.nn.relu) Q = tf.layers.dense(queries, num_units, activation=None) # (N, T_q, C) K = tf.layers.dense(keys, num_units, activation=None) # (N, T_k, C) V = tf.layers.dense(keys, num_units, activation=None) # (N, T_k, C) # Split and concat Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0) # (h*N, T_q, C/h) K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0) # (h*N, T_k, C/h) V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0) # (h*N, T_k, C/h) # pylint: enable=invalid-name # Multiplication outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1])) # (h*N, T_q, T_k) # Scale outputs = outputs / (K_.get_shape().as_list()[-1]**0.5) # Key Masking key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1))) # (N, T_k) key_masks = tf.tile(key_masks, [num_heads, 1]) # (h*N, T_k) key_masks = tf.tile(tf.expand_dims(key_masks, 1), [1, tf.shape(queries)[1], 1]) # (h*N, T_q, T_k) paddings = tf.ones_like(outputs) * (-2**32 + 1) outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs) # (h*N, T_q, T_k) # Causality = Future blinding if causality: diag_vals = tf.ones_like(outputs[0, :, :]) # (T_q, T_k) tril = tf.linalg.LinearOperatorLowerTriangular( diag_vals).to_dense() # (T_q, T_k) masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(outputs)[0], 1, 1]) # (h*N, T_q, T_k) paddings = tf.ones_like(masks) * (-2**32 + 1) outputs = tf.where(tf.equal(masks, 0), paddings, outputs) # (h*N, T_q, T_k) # Position/Time prior is only used in multi-head case. if num_heads > 1: # Scaling head weights with position prior. if use_prior == "position": # Each head focuses on a window of items whose size is computed below. attn_size = int(outputs.get_shape().as_list()[-1] / num_heads) outputs = tf.concat(_compute_head_weights_with_position_prior( outputs, masks, paddings, num_heads, attn_size), axis=0) # (H*N, T_q, T_k) tf.logging.info( "After position-wise sliding window attention.") tf.logging.info(outputs.shape) # Scaling head weights with time prior. elif use_prior == "time": # Convert time deltas from seconds to days. if times is None: raise ValueError("Times tensor is needed.") time_deltas = _compute_time_deltas(times) / SECS_TO_DAYS outputs = tf.concat(_compute_head_weights_with_time_prior( outputs, paddings, time_deltas, num_heads, time_exp_base, overlapping_chunks), axis=0) # (H*N, T_q, T_k) # Activation outputs = tf.nn.softmax(outputs) # (h*N, T_q, T_k) # Query Masking query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis=-1))) # (N, T_q) query_masks = tf.tile(query_masks, [num_heads, 1]) # (h*N, T_q) query_masks = tf.tile(tf.expand_dims(query_masks, -1), [1, 1, tf.shape(keys)[1]]) # (h*N, T_q, T_k) outputs *= query_masks # broadcasting. (h*N, T_q, C) # Dropouts outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=tf.convert_to_tensor(is_training)) # Weighted sum outputs = tf.matmul(outputs, V_) # (h*N, T_q, C/h) # Restore shape outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2) # (N, T_q, C) # Residual connection if residual: outputs += queries if with_qk: return Q, K else: return outputs
def concat(tensors, axis, *args, **kwargs): return tf_v1.concat(tensors, axis, *args, **kwargs)
def embedding(inputs, vocab_size, num_units, zero_pad=True, scale=True, l2_reg=0.0, scope="embedding", with_t=False, reuse=None): """Embeds a given tensor. Args: inputs: A `Tensor` with type `int32` or `int64` containing the ids to be looked up in `lookup table`. vocab_size: An int. Vocabulary size. num_units: An int. Number of embedding hidden units. zero_pad: A boolean. If True, all the values of the fist row (id 0) should be constant zeros. scale: A boolean. If True. the outputs is multiplied by sqrt num_units. l2_reg: L2 regularization weight. scope: Optional scope for `variable_scope`. with_t: If True, return the embedding table. reuse: Boolean, whether to reuse the weights of a previous layer by the same name. Returns: A `Tensor` with one more rank than inputs's. The last dimensionality should be `num_units`. For example, ``` import tensorflow as tf inputs = tf.to_int32(tf.reshape(tf.range(2*3), (2, 3))) outputs = embedding(inputs, 6, 2, zero_pad=True) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print sess.run(outputs) >> [[[ 0. 0. ] [ 0.09754146 0.67385566] [ 0.37864095 -0.35689294]] [[-1.01329422 -1.09939694] [ 0.7521342 0.38203377] [-0.04973143 -0.06210355]]] ``` ``` import tensorflow as tf inputs = tf.to_int32(tf.reshape(tf.range(2*3), (2, 3))) outputs = embedding(inputs, 6, 2, zero_pad=False) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print sess.run(outputs) >> [[[-0.19172323 -0.39159766] [-0.43212751 -0.66207761] [ 1.03452027 -0.26704335]] [[-0.11634696 -0.35983452] [ 0.50208133 0.53509563] [ 1.22204471 -0.96587461]]] ``` """ with tf.variable_scope(scope, reuse=reuse): lookup_table = tf.get_variable( "lookup_table", dtype=tf.float32, shape=[vocab_size, num_units], # initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.keras.regularizers.l2(l2_reg)) if zero_pad: lookup_table = tf.concat( (tf.zeros(shape=[1, num_units]), lookup_table[1:, :]), 0) outputs = tf.nn.embedding_lookup(lookup_table, inputs) if scale: outputs = outputs * (num_units**0.5) if with_t: return outputs, lookup_table else: return outputs
def preprocess_targets(targets, word2int, batch_size): left_side = tf.fill([batch_size, 1], word2int['<SOS>']) right_side = tf.strided_slice(targets, [0, 0], [batch_size, -1], [1, 1]) #slide as 1,1 preprocess_targets = tf.concat(left_side, right_side, axis=1) return preprocess_targets
def _construct_inner_networks(self): """Creates the Tensorflow subgraph for the inner optimization loop.""" self.inner_train_inputs = [] self.inner_train_outputs = [] # for debugging self.inner_train_next_inputs = [] self.inner_train_actions = [] self.inner_train_advantages = [] self.inner_test_inputs = [] self.inner_test_outputs = [] self.inner_test_actions = [] self.inner_test_advantages = [] self.inner_train_losses = [] self.inner_test_losses = [] self.train_policies = [] self.test_policies = [] self.all_test_weights = [] # inner "train" networks, 1 per task # technically, all these networks do the same, # just makes the code easier to maintain. for idx in range(self.tasks_batch_size): tf.logging.info('creating task train network: %d', idx) with tf.name_scope('task_%d' % idx): with tf.name_scope('train'): # Inner network: train network_input_train = tf.placeholder( tf.float32, shape=(None, self.input_dims), name='network_input_train_%d' % idx) network_output_inner_train = self.network_generator.construct_network( network_input_train, self.weights, scope='network_inner_train_%d' % idx) network_next_input_train = tf.placeholder( tf.float32, shape=(None, self.input_dims), name='network_next_input_train_%d' % idx) # Slap a policy on top of the network train_policy = self.policy(network_input_train, network_output_inner_train, self.output_dims, self.weights['policy_logstd']) self.train_policies.append(train_policy) self.inner_train_inputs.append(network_input_train) self.inner_train_outputs.append(network_output_inner_train) self.inner_train_next_inputs.append(network_next_input_train) # Compute policy gradient for this task # == gradient of expected reward wrt weights # We need a batch of rollouts for this. train_actions = tf.placeholder( tf.float32, shape=(None, self.output_dims), name='network_actions_train_%d' % idx) if not self.learn_advantage_function_inner: train_advantages = tf.placeholder( tf.float32, shape=(None, 1), name='network_advantages_train_%d' % idx) else: adv_input = tf.concat( [network_next_input_train, network_input_train, train_actions], 1) train_advantages = self.advantage_generator.construct_network( adv_input, self.adv_weights, scope='network_advantages_train_%d' % idx) train_policy_log_prob = train_policy.log_likelihood_op(train_actions) if self.ppo and (not self.learn_advantage_function_inner): # use PPO only if the advantage function is not learned old_train_policy_log_prob = tf.stop_gradient(train_policy_log_prob) ratio = tf.exp(train_policy_log_prob - old_train_policy_log_prob) clipped_ratio = tf.clip_by_value(ratio, 1 - self.ppo_clip_value, 1 + self.ppo_clip_value) loss_inner_train = -tf.reduce_mean( tf.minimum(clipped_ratio * train_advantages, ratio * train_advantages)) else: loss_inner_train = -tf.reduce_mean( train_policy_log_prob * train_advantages) self.inner_train_actions.append(train_actions) self.inner_train_advantages.append(train_advantages) self.inner_train_losses.append(loss_inner_train) grad_inner_train = {} for weight_key in self.weights: grad_inner_train[weight_key] = tf.gradients( loss_inner_train, self.weights[weight_key], name='%s_inner_%d' % (weight_key, idx))[0] test_weights = {} for weight_key in self.weights: theta = self.weights[weight_key] if self.first_order: grad = tf.stop_gradient(grad_inner_train[weight_key]) else: grad = grad_inner_train[weight_key] if not self.learn_inner_lr_tensor: a = self.inner_lr else: a = self.inner_lr[weight_key] if self.learn_offset: e = self.e_weights[weight_key] test_weights[weight_key] = theta - a * grad + e else: test_weights[weight_key] = theta - a * grad # inner "test" networks, 1 per task, weights = 1 gradient step of # corresponding "train" network with tf.name_scope('test'): # Inner network: test network_input_test = tf.placeholder( tf.float32, shape=(None, self.input_dims), name='network_input_test_%d' % idx) network_output_inner_test = self.network_generator.construct_network( network_input_test, test_weights, scope='network_inner_test_%d' % idx) # Slap a policy on top of the network test_policy = self.policy(network_input_test, network_output_inner_test, self.output_dims, test_weights['policy_logstd']) self.test_policies.append(test_policy) test_actions = tf.placeholder( tf.float32, shape=(None, self.output_dims), name='network_actions_test_%d' % idx) test_advantages = tf.placeholder( tf.float32, shape=(None, 1), name='network_advantages_test_%d' % idx) test_policy_log_prob = test_policy.log_likelihood_op(test_actions) if not self.ppo: loss_inner_test = -tf.reduce_mean(test_policy_log_prob * (test_advantages)) else: old_test_policy_log_prob = tf.stop_gradient(test_policy_log_prob) ratio = tf.exp(test_policy_log_prob - old_test_policy_log_prob) clipped_ratio = tf.clip_by_value(ratio, 1 - self.ppo_clip_value, 1 + self.ppo_clip_value) loss_inner_test = -tf.reduce_mean( tf.minimum(clipped_ratio * test_advantages, ratio * test_advantages)) # sum up all loss_inner_test variables to compute outer loss self.inner_test_losses.append(loss_inner_test) self.inner_test_inputs.append(network_input_test) self.inner_test_outputs.append(network_output_inner_test) self.inner_test_actions.append(test_actions) self.inner_test_advantages.append(test_advantages) self.all_test_weights.append(test_weights)