Esempio n. 1
0
def inception_v3_base(inputs,
                      final_endpoint='Mixed_7c',
                      min_depth=16,
                      depth_multiplier=1.0,
                      scope=None):
    """Inception model from http://arxiv.org/abs/1512.00567.
  Constructs an Inception v3 network from inputs to the given final endpoint.
  This method can construct the network up to the final inception block
  Mixed_7c.
  Note that the names of the layers in the paper do not correspond to the names
  of the endpoints registered by this function although they build the same
  network.
  Here is a mapping from the old_names to the new names:
  Old name          | New name
  =======================================
  conv0             | Conv2d_1a_3x3
  conv1             | Conv2d_2a_3x3
  conv2             | Conv2d_2b_3x3
  pool1             | MaxPool_3a_3x3
  conv3             | Conv2d_3b_1x1
  conv4             | Conv2d_4a_3x3
  pool2             | MaxPool_5a_3x3
  mixed_35x35x256a  | Mixed_5b
  mixed_35x35x288a  | Mixed_5c
  mixed_35x35x288b  | Mixed_5d
  mixed_17x17x768a  | Mixed_6a
  mixed_17x17x768b  | Mixed_6b
  mixed_17x17x768c  | Mixed_6c
  mixed_17x17x768d  | Mixed_6d
  mixed_17x17x768e  | Mixed_6e
  mixed_8x8x1280a   | Mixed_7a
  mixed_8x8x2048a   | Mixed_7b
  mixed_8x8x2048b   | Mixed_7c
  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
      'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
      'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c',
      'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'].
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    scope: Optional variable_scope.
  Returns:
    tensor_out: output tensor corresponding to the final_endpoint.
    end_points: a set of activations for external use, for example summaries or
                losses.
  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
                or depth_multiplier <= 0
  """
    # end_points will collect relevant activations for external use, for example
    # summaries or losses.
    end_points = {}

    if depth_multiplier <= 0:
        raise ValueError('depth_multiplier is not greater than zero.')
    depth = lambda d: max(int(d * depth_multiplier), min_depth)

    with tf.variable_scope(scope, 'InceptionV3', [inputs]):
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                            stride=1,
                            padding='VALID'):
            # 299 x 299 x 3
            end_point = 'Conv2d_1a_3x3'
            net = slim.conv2d(inputs,
                              depth(32), [3, 3],
                              stride=2,
                              scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 149 x 149 x 32
            end_point = 'Conv2d_2a_3x3'
            net = slim.conv2d(net, depth(32), [3, 3], scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 147 x 147 x 32
            end_point = 'Conv2d_2b_3x3'
            net = slim.conv2d(net,
                              depth(64), [3, 3],
                              padding='SAME',
                              scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 147 x 147 x 64
            end_point = 'MaxPool_3a_3x3'
            net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 73 x 73 x 64
            end_point = 'Conv2d_3b_1x1'
            net = slim.conv2d(net, depth(80), [1, 1], scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 73 x 73 x 80.
            end_point = 'Conv2d_4a_3x3'
            net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 71 x 71 x 192.
            end_point = 'MaxPool_5a_3x3'
            net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 35 x 35 x 192.

        # Inception blocks
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                            stride=1,
                            padding='SAME'):
            # mixed: 35 x 35 x 256.
            end_point = 'Mixed_5b'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(48), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(64), [5, 5],
                                           scope='Conv2d_0b_5x5')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0c_3x3')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(32), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # mixed_1: 35 x 35 x 288.
            end_point = 'Mixed_5c'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(48), [1, 1],
                                           scope='Conv2d_0b_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(64), [5, 5],
                                           scope='Conv_1_0c_5x5')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0c_3x3')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # mixed_2: 35 x 35 x 288.
            end_point = 'Mixed_5d'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(48), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(64), [5, 5],
                                           scope='Conv2d_0b_5x5')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0c_3x3')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # mixed_3: 17 x 17 x 768.
            end_point = 'Mixed_6a'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(384), [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(96), [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1a_1x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.max_pool2d(net, [3, 3],
                                               stride=2,
                                               padding='VALID',
                                               scope='MaxPool_1a_3x3')
                net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # mixed4: 17 x 17 x 768.
            end_point = 'Mixed_6b'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(128), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(128), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(128), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(128), [7, 1],
                                           scope='Conv2d_0b_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(128), [1, 7],
                                           scope='Conv2d_0c_1x7')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(128), [7, 1],
                                           scope='Conv2d_0d_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0e_1x7')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # mixed_5: 17 x 17 x 768.
            end_point = 'Mixed_6c'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(160), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(160), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(160), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [7, 1],
                                           scope='Conv2d_0b_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [1, 7],
                                           scope='Conv2d_0c_1x7')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [7, 1],
                                           scope='Conv2d_0d_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0e_1x7')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # mixed_6: 17 x 17 x 768.
            end_point = 'Mixed_6d'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(160), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(160), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(160), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [7, 1],
                                           scope='Conv2d_0b_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [1, 7],
                                           scope='Conv2d_0c_1x7')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [7, 1],
                                           scope='Conv2d_0d_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0e_1x7')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # mixed_7: 17 x 17 x 768.
            end_point = 'Mixed_6e'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0b_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0c_1x7')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0d_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0e_1x7')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # mixed_8: 8 x 8 x 1280.
            end_point = 'Mixed_7a'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_0 = slim.conv2d(branch_0,
                                           depth(320), [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.max_pool2d(net, [3, 3],
                                               stride=2,
                                               padding='VALID',
                                               scope='MaxPool_1a_3x3')
                net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # mixed_9: 8 x 8 x 2048.
            end_point = 'Mixed_7b'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(320), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(384), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_1,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0b_1x3'),
                                             slim.conv2d(branch_1,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0b_3x1')
                                         ])
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(448), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(384), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_2,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0c_1x3'),
                                             slim.conv2d(branch_2,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0d_3x1')
                                         ])
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # mixed_10: 8 x 8 x 2048.
            end_point = 'Mixed_7c'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(320), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(384), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_1,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0b_1x3'),
                                             slim.conv2d(branch_1,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0c_3x1')
                                         ])
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(448), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(384), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_2,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0c_1x3'),
                                             slim.conv2d(branch_2,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0d_3x1')
                                         ])
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
        raise ValueError('Unknown final endpoint %s' % final_endpoint)
Esempio n. 2
0
def add_contrastive_loss(hidden,
                         hidden_norm=True,
                         temperature=1.0,
                         tpu_context=None,
                         weights=1.0):
    """Compute loss for model.

  Args:
    hidden: hidden vector (`Tensor`) of shape (2 * bsz, dim).
    hidden_norm: whether or not to use normalization on the hidden vector.
    temperature: a `floating` number for temperature scaling.
    tpu_context: context information for tpu.
    weights: a weighting number or vector.

  Returns:
    A loss scalar.
    The logits for contrastive prediction task.
    The labels for contrastive prediction task.
  """
    # Get (normalized) hidden1 and hidden2.
    if hidden_norm:
        hidden = tf.math.l2_normalize(hidden, -1)
    hidden1, hidden2 = tf.split(
        hidden, 2, 0
    )  # splits hidden in half along 0 axis (batch size axis?), but should be duplicating hidden??
    batch_size = tf.shape(
        hidden1
    )[0]  # maybe one batch from dataloader = bs/2 images + bs/2 transformed images ??
    # we need to change how hidden1 and hidden2 are calculated so that they are fed through different base models

    # Gather hidden1/hidden2 across replicas and create local labels.
    if tpu_context is not None:
        hidden1_large = tpu_cross_replica_concat(hidden1, tpu_context)
        hidden2_large = tpu_cross_replica_concat(hidden2, tpu_context)
        enlarged_batch_size = tf.shape(hidden1_large)[0]
        # TODO(iamtingchen): more elegant way to convert u32 to s32 for replica_id.
        replica_id = tf.cast(tf.cast(xla.replica_id(), tf.uint32), tf.int32)
        labels_idx = tf.range(batch_size) + replica_id * batch_size
        labels = tf.one_hot(labels_idx, enlarged_batch_size * 2)
        masks = tf.one_hot(labels_idx, enlarged_batch_size)
    else:
        hidden1_large = hidden1
        hidden2_large = hidden2
        labels = tf.one_hot(tf.range(batch_size), batch_size * 2)
        masks = tf.one_hot(tf.range(batch_size), batch_size)

    logits_aa = tf.matmul(hidden1, hidden1_large,
                          transpose_b=True) / temperature
    logits_aa = logits_aa - masks * LARGE_NUM
    logits_bb = tf.matmul(hidden2, hidden2_large,
                          transpose_b=True) / temperature
    logits_bb = logits_bb - masks * LARGE_NUM
    logits_ab = tf.matmul(hidden1, hidden2_large,
                          transpose_b=True) / temperature
    logits_ba = tf.matmul(hidden2, hidden1_large,
                          transpose_b=True) / temperature

    loss_a = tf.losses.softmax_cross_entropy(labels,
                                             tf.concat([logits_ab, logits_aa],
                                                       1),
                                             weights=weights)
    loss_b = tf.losses.softmax_cross_entropy(labels,
                                             tf.concat([logits_ba, logits_bb],
                                                       1),
                                             weights=weights)
    loss = loss_a + loss_b

    return loss, logits_ab, labels
Esempio n. 3
0
def test_decompress(args):
    """Decompresses an image."""

    # Read the shape information and compressed string from the binary file.
    string = tf.placeholder(tf.string, [1])
    side_string = tf.placeholder(tf.string, [1])
    x_shape = tf.placeholder(tf.int32, [2])
    y_shape = tf.placeholder(tf.int32, [2])
    z_shape = tf.placeholder(tf.int32, [2])
    with open(args.input_file, "rb") as f:
        packed = tfc.PackedTensors(f.read())
    tensors = [string, side_string, x_shape, y_shape, z_shape]
    arrays = packed.unpack(tensors)

    # Instantiate model.
    synthesis_transform = SynthesisTransform(args.num_filters)
    hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
    entropy_bottleneck = tfc.EntropyBottleneck(dtype=tf.float32)

    # Decompress and transform the image back.
    z_shape = tf.concat([z_shape, [args.num_filters]], axis=0)
    z_hat = entropy_bottleneck.decompress(side_string,
                                          z_shape,
                                          channels=args.num_filters)
    sigma = hyper_synthesis_transform(z_hat)
    sigma = sigma[:, :y_shape[0], :y_shape[1], :]
    scale_table = np.exp(
        np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
    conditional_bottleneck = tfc.GaussianConditional(sigma,
                                                     scale_table,
                                                     dtype=tf.float32)
    y_hat_all = conditional_bottleneck.decompress(string)

    x = read_png("kodak/kodim01.png")
    x = tf.expand_dims(x, 0)
    x.set_shape([1, None, None, 3])
    x_shape = tf.shape(x)
    x *= 255

    active = 192
    y_hat = y_hat_all[:, :, :, :active]
    x_hat = synthesis_transform(y_hat)
    x_hat = tf.clip_by_value(x_hat, 0, 1)
    x_hat = tf.round(x_hat * 255)
    mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
    psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
    msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))

    #x_hat = x_hat[0, :x_shape[0], :x_shape[1], :]
    #op = write_png(args.output_file, x_hat)

    sess = tf.Session()
    latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
    tf.train.Saver().restore(sess, save_path=latest)
    #sess.run(op, feed_dict=dict(zip(tensors, arrays)))

    #vmse, vpsnr, vmsssim = sess.run([mse, psnr, msssim], feed_dict=dict(zip(tensors, arrays)))
    #print(vmse, vpsnr, vmsssim)

    for active in range(192, 0, -8):
        y_hat = y_hat_all[:, :, :, :active]
        x_hat = synthesis_transform(y_hat)
        x_hat = tf.clip_by_value(x_hat, 0, 1)
        x_hat = tf.round(x_hat * 255)
        mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
        psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
        msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))
        vmse, vpsnr, vmsssim = sess.run([mse, psnr, msssim],
                                        feed_dict=dict(zip(tensors, arrays)))
        print(active, vmse, vpsnr, vmsssim)
Esempio n. 4
0
 def _build_activation_vars(self, input_act_vars):
     return tf.concat(axis=self.axis, values=input_act_vars)
Esempio n. 5
0
def detection_loss(cls_outputs, box_outputs, labels, params):
  """Computes total detection loss.

  Computes total detection loss including box and class loss from all levels.
  Args:
    cls_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width, num_anchors].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in [batch_size, height, width,
      num_anchors * 4].
    labels: the dictionary that returned from dataloader that includes
      groundtruth targets.
    params: the dictionary including training parameters specified in
      default_haprams function in this file.

  Returns:
    total_loss: an integer tensor representing total loss reducing from
      class and box losses from all levels.
    cls_loss: an integer tensor representing total class loss.
    box_loss: an integer tensor representing total box regression loss.
    box_iou_loss: an integer tensor representing total box iou loss.
  """
  # Sum all positives in a batch for normalization and avoid zero
  # num_positives_sum, which would lead to inf loss during training
  num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0
  positives_momentum = params.get('positives_momentum', None) or 0
  if positives_momentum > 0:
    # normalize the num_positive_examples for training stability.
    moving_normalizer_var = tf.Variable(
        0.0,
        name='moving_normalizer',
        dtype=tf.float32,
        synchronization=tf.VariableSynchronization.ON_READ,
        trainable=False,
        aggregation=tf.VariableAggregation.MEAN)
    num_positives_sum = tf.keras.backend.moving_average_update(
        moving_normalizer_var,
        num_positives_sum,
        momentum=params['positives_momentum'])
  elif positives_momentum < 0:
    num_positives_sum = utils.cross_replica_mean(num_positives_sum)

  levels = cls_outputs.keys()
  cls_losses = []
  box_losses = []
  for level in levels:
    # Onehot encoding for classification labels.
    cls_targets_at_level = tf.one_hot(labels['cls_targets_%d' % level],
                                      params['num_classes'])

    if params['data_format'] == 'channels_first':
      bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list()
      cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                        [bs, -1, width, height])
    else:
      bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list()
      cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                        [bs, width, height, -1])
    box_targets_at_level = labels['box_targets_%d' % level]

    cls_loss = focal_loss(
        cls_outputs[level],
        cls_targets_at_level,
        params['alpha'],
        params['gamma'],
        normalizer=num_positives_sum,
        label_smoothing=params['label_smoothing'])

    if params['data_format'] == 'channels_first':
      cls_loss = tf.reshape(cls_loss,
                            [bs, -1, width, height, params['num_classes']])
    else:
      cls_loss = tf.reshape(cls_loss,
                            [bs, width, height, -1, params['num_classes']])
    cls_loss *= tf.cast(
        tf.expand_dims(tf.not_equal(labels['cls_targets_%d' % level], -2), -1),
        tf.float32)
    cls_losses.append(tf.clip_by_value(tf.reduce_sum(cls_loss), 0.0, 2.0))

    if params['box_loss_weight']:
      box_losses.append(
          _box_loss(
              box_outputs[level],
              box_targets_at_level,
              num_positives_sum,
              delta=params['delta']))

  if params['iou_loss_type']:
    input_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                    params['num_scales'],
                                    params['aspect_ratios'],
                                    params['anchor_scale'],
                                    params['image_size'])
    box_output_list = [tf.reshape(box_outputs[i], [-1, 4]) for i in levels]
    box_outputs = tf.concat(box_output_list, axis=0)
    box_target_list = [
        tf.reshape(labels['box_targets_%d' % level], [-1, 4])
        for level in levels
    ]
    box_targets = tf.concat(box_target_list, axis=0)
    anchor_boxes = tf.tile(input_anchors.boxes, [params['batch_size'], 1])
    box_outputs = anchors.decode_box_outputs(box_outputs, anchor_boxes)
    box_targets = anchors.decode_box_outputs(box_targets, anchor_boxes)
    box_iou_loss = _box_iou_loss(box_outputs, box_targets, num_positives_sum,
                                 params['iou_loss_type'])

  else:
    box_iou_loss = 0

  # Sum per level losses to total loss.
  cls_loss = tf.add_n(cls_losses)
  box_loss = tf.add_n(box_losses) if box_losses else 0

  total_loss = (
      cls_loss +
      params['box_loss_weight'] * box_loss +
      params['iou_loss_weight'] * box_iou_loss)

  return total_loss, cls_loss, box_loss, box_iou_loss
Esempio n. 6
0
def quantizable_concat(inputs,
                       axis,
                       is_training,
                       is_quantized=True,
                       default_min=0,
                       default_max=6,
                       ema_decay=0.999,
                       scope='quantized_concat'):
    """Concat replacement with quantization option.

  Allows concat inputs to share the same min max ranges,
  from experimental/gazelle/synthetic/model/tpu/utils.py.

  Args:
    inputs: list of tensors to concatenate.
    axis: dimension along which to concatenate.
    is_training: true if the graph is a training graph.
    is_quantized: flag to enable/disable quantization.
    default_min: default min value for fake quant op.
    default_max: default max value for fake quant op.
    ema_decay: the moving average decay for the quantization variables.
    scope: Optional scope for variable_scope.

  Returns:
    Tensor resulting from concatenation of input tensors
  """
    if is_quantized:
        with tf.variable_scope(scope):
            tf.logging.info('inputs: {}'.format(inputs))
            for t in inputs:
                tf.logging.info(t)

            min_var = _quant_var('min', default_min)
            max_var = _quant_var('max', default_max)
            if not is_training:
                # If we are building an eval graph just use the values in the variables.
                quant_inputs = [
                    tf.fake_quant_with_min_max_vars(t, min_var, max_var)
                    for t in inputs
                ]
                tf.logging.info('min_val: {}'.format(min_var))
                tf.logging.info('max_val: {}'.format(max_var))
            else:
                concat_tensors = tf.concat(inputs, axis=axis)
                tf.logging.info('concat_tensors: {}'.format(concat_tensors))
                # TFLite requires that 0.0 is always in the [min; max] range.
                range_min = tf.minimum(tf.reduce_min(concat_tensors),
                                       0.0,
                                       name='SafeQuantRangeMin')
                range_max = tf.maximum(tf.reduce_max(concat_tensors),
                                       0.0,
                                       name='SafeQuantRangeMax')
                # Otherwise we need to keep track of the moving averages of the min and
                # of the elements of the input tensor max.
                min_val = moving_averages.assign_moving_average(
                    min_var, range_min, ema_decay, name='AssignMinEma')
                max_val = moving_averages.assign_moving_average(
                    max_var, range_max, ema_decay, name='AssignMaxEma')
                tf.logging.info('min_val: {}'.format(min_val))
                tf.logging.info('max_val: {}'.format(max_val))
                quant_inputs = [
                    tf.fake_quant_with_min_max_vars(t, min_val, max_val)
                    for t in inputs
                ]
            tf.logging.info('quant_inputs: {}'.format(quant_inputs))
            outputs = tf.concat(quant_inputs, axis=axis)
            tf.logging.info('outputs: {}'.format(outputs))
    else:
        outputs = tf.concat(inputs, axis=axis)
    return outputs
def ssd_parse_example_proto(example_serialized):
    """Parses an Example proto containing a training example of an image.

  Each Example proto contains the following fields that we care about:

    image/encoded: <JPEG encoded string>
    image/source_id: tf.string
    image/height: tf.int64
    image/width: tf.int64
    image/object/bbox/xmin: tf.VarLenFeature(tf.float32)
    image/object/bbox/xmax: tf.VarLenFeature(tf.float32)
    image/object/bbox/ymin: tf.VarLenFeature(tf.float32
    image/object/bbox/ymax: tf.VarLenFeature(tf.float32)
    image/object/class/label: tf.VarLenFeature(tf.int64)
    image/object/class/text: tf.VarLenFeature(tf.string)

  Complete decoder can be found in:
  https://github.com/tensorflow/models/blob/master/research/object_detection/data_decoders/tf_example_decoder.py

  Args:
    example_serialized: scalar Tensor tf.string containing a serialized
      Example protocol buffer.

  Returns:
    A dictionary with the following key-values:
    image_buffer: Tensor tf.string containing the contents of a JPEG file.
    groundtruth_boxes: Tensor tf.float32 of shape [num_boxes, 4], containing
      coordinates of object bounding boxes.
    groundtruth_classeS: Tensor tf.int64 of shape [num_boxes, 1], containing
      class labels of objects.
    source_id: unique image identifier.
    raw_shape: [height, width, 3].
  """
    feature_map = {
        'image/encoded': tf.FixedLenFeature((),
                                            dtype=tf.string,
                                            default_value=''),
        'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/height': tf.FixedLenFeature((), tf.int64, default_value=1),
        'image/width': tf.FixedLenFeature((), tf.int64, default_value=1),
        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/class/label': tf.VarLenFeature(dtype=tf.int64),
    }
    features = tf.parse_single_example(example_serialized, feature_map)

    xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 1)
    ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 1)
    xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 1)
    ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 1)

    image_buffer = features['image/encoded']
    # Bounding box coordinates should be in ltrb order
    boxes = tf.concat([ymin, xmin, ymax, xmax], 1)
    classes = tf.expand_dims(features['image/object/class/label'].values, 1)
    source_id = features['image/source_id']
    raw_shape = tf.stack(
        [features['image/height'], features['image/width'], 3])

    return {
        'image_buffer': image_buffer,
        'groundtruth_boxes': boxes,
        'groundtruth_classes': classes,
        'source_id': source_id,
        'raw_shape': raw_shape
    }
Esempio n. 8
0
def input_fn(data_files,
             batch_size,
             repeat=-1,
             data_source=DataSource.RICO_SCA,
             required_agreement=2,
             max_range=1000,
             max_dom_pos=2000,
             max_pixel_pos=100,
             load_dom_dist=False,
             load_extra=False,
             buffer_size=8 * 1024,
             shuffle_size=8 * 1024,
             required_rule_id_list=None,
             shuffle_repeat=True,
             mean_synthetic_length=1.0,
             stddev_synthetic_length=0.0,
             load_screen=True,
             shuffle_files=True):
  """Retrieves batches of data for training.

  Adds padding to ensure all dimension in one batch are always same.

  Args:
    data_files: A list of file names to initialize the TFRecordDataset
    batch_size:  Number for the size of the batch.
    repeat: the number of times to repeat the input data.
    data_source: A DataSource instance.
    required_agreement: the minimum agreement required.
    max_range: the max range.
    max_dom_pos: the max dom pos.
    max_pixel_pos: the max screen pixels.
    load_dom_dist: whether to load the dom distance feature.
    load_extra: whether to load the raw text data.
    buffer_size: the buffer size for prefetching.
    shuffle_size: the shuffle size.
    required_rule_id_list: the list of required rule ids.
    shuffle_repeat: whether to shuffle and repeat.
    mean_synthetic_length: the mean length for synthetic sequence.
    stddev_synthetic_length: the stddev length for synthetic sequence.
    load_screen: whether to load screen features.
    shuffle_files: shuffling file names.
  Returns:
    a tf.dataset.Dateset object.
  Raises:
    ValueError: The data_format is neither 'recordio' nor 'tfrecord'.
  """
  if not isinstance(data_source, DataSource):
    assert False, 'data_source %s unsupported' % str(data_source)
  padded_shapes, padded_values = _construct_padding_info(
      data_source, load_dom_dist, load_extra)
  if not isinstance(data_files, (list,)):
    data_files = [data_files]
  all_files = tf.concat(
      values=[tf.matching_files(f) for f in data_files], axis=0)
  if repeat == -1 and shuffle_files:
    all_files = tf.random.shuffle(all_files)
  if data_files[0].endswith('.recordio'):
    dataset = tf.data.RecordIODataset(all_files)
  elif data_files[0].endswith('.tfrecord'):
    dataset = tf.data.TFRecordDataset(
        all_files, num_parallel_reads=10 if repeat == -1 else None)
  else:
    assert False, 'Data_format %s is not supported.' % data_files[0]

  def _map_fn(x):
    return parse_tf_example(x, data_source, max_range, max_dom_pos,
                            max_pixel_pos, load_dom_dist=load_dom_dist,
                            load_extra=load_extra,
                            append_eos=(data_source != DataSource.RICO_SCA or
                                        mean_synthetic_length == 1.0),
                            load_screen=load_screen)
  dataset = dataset.map(_map_fn)
  def _is_enough_agreement(example):
    return tf.greater_equal(example['agreement_count'], required_agreement)
  dataset = dataset.filter(_is_enough_agreement)

  def _length_filter(example):
    return tf.less(tf.shape(example['obj_refs'])[0], 20)
  dataset = dataset.filter(_length_filter)

  def _filter_data_by_rule(example, rule_id_list):
    return tf.reduce_any(
        [tf.equal(example['rule'], rule_id) for rule_id in rule_id_list])
  if data_source == DataSource.RICO_SCA and required_rule_id_list is not None:
    dataset = dataset.filter(
        lambda x: _filter_data_by_rule(x, required_rule_id_list))

  # (TODO: liyang) tf.data.experimental.bucket_by_sequence_length
  if shuffle_repeat:
    dataset = dataset.apply(tf.data.experimental.shuffle_and_repeat(
        shuffle_size, count=repeat))
  dataset = dataset.padded_batch(
      batch_size, padded_shapes=padded_shapes, padding_values=padded_values)
  if data_source == DataSource.RICO_SCA and mean_synthetic_length > 1.0:
    def _stitch_fn(x):
      return _batch_stitch(x, mean_length=mean_synthetic_length,
                           stddev=stddev_synthetic_length)
    dataset = dataset.map(_stitch_fn)
  dataset = dataset.prefetch(buffer_size=buffer_size)
  return dataset
Esempio n. 9
0
def parse_tf_example(example_proto,
                     data_source,
                     max_range=100,
                     max_dom_pos=2000,
                     max_pixel_pos=100,
                     load_dom_dist=False,
                     load_extra=False,
                     append_eos=True,
                     load_screen=True):
  """Parses an example TFRecord proto into dictionary of tensors.

  Args:
    example_proto: TFRecord format proto that contains screen information.
    data_source: A DataSource instance.
    max_range: the max range.
    max_dom_pos: the maximum dom positoin.
    max_pixel_pos: the max dom position.
    load_dom_dist: whether to load the feature.
    load_extra: whether to load the extra data for debugging.
    append_eos: whether to append eos.
    load_screen: whether to load screen features.
  Returns:
    feature: The parsed tensor dictionary with the input feature data
    label: The parsed label tensor with the input label for the feature
  """
  feature_spec = {
      'instruction_word_id_seq':
          tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
      'input_str_position_seq':
          tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
      'obj_desc_position_seq':
          tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
      'verb_str_position_seq':
          tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
      'agreement_count':
          tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
      'instruction_rule_id':
          tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True)
  }
  if load_screen:
    feature_spec['verb_id_seq'] = tf.FixedLenSequenceFeature(
        [], tf.int64, allow_missing=True)
    feature_spec['ui_target_id_seq'] = tf.FixedLenSequenceFeature(
        [], tf.int64, allow_missing=True)
    feature_spec['ui_obj_word_id_seq'] = tf.FixedLenSequenceFeature(
        [], tf.int64, allow_missing=True)
    feature_spec['ui_obj_type_id_seq'] = tf.FixedLenSequenceFeature(
        [], tf.int64, allow_missing=True)
    feature_spec['ui_obj_clickable_seq'] = tf.FixedLenSequenceFeature(
        [], tf.int64, allow_missing=True)
    feature_spec['ui_obj_cord_x_seq'] = tf.FixedLenSequenceFeature(
        [], tf.float32, allow_missing=True)
    feature_spec['ui_obj_cord_y_seq'] = tf.FixedLenSequenceFeature(
        [], tf.float32, allow_missing=True)
    feature_spec['ui_obj_dom_location_seq'] = tf.FixedLenSequenceFeature(
        [], tf.int64, allow_missing=True)

  if load_dom_dist:
    feature_spec['ui_obj_dom_distance'] = tf.FixedLenSequenceFeature(
        [], tf.int64, allow_missing=True)
  if load_extra:
    feature_spec['instruction_str'] = tf.FixedLenSequenceFeature(
        [], tf.string, allow_missing=True)
    feature_spec['task_id'] = tf.FixedLenSequenceFeature(
        [], tf.string, allow_missing=True)
    feature_spec['ui_obj_str_seq'] = tf.FixedLenSequenceFeature(
        [], tf.string, allow_missing=True)

  feature_dict = tf.parse_single_example(example_proto, feature_spec)

  for key in feature_dict:
    if feature_dict[key].dtype == tf.int64:
      feature_dict[key] = tf.cast(feature_dict[key], tf.int32)
  if data_source == DataSource.ANDROID_HOWTO:
    tf.logging.info('Parsing android_howto dataset')
    feature = _process_android_howto(feature_dict, max_range=max_range,
                                     load_dom_dist=load_dom_dist,
                                     load_extra=load_extra)
  elif data_source == DataSource.RICO_SCA:
    tf.logging.info('Parsing synthetic dataset')
    feature = _process_rico_sca(
        feature_dict, max_range=max_range, max_dom_pos=max_dom_pos,
        load_dom_dist=load_dom_dist,
        load_extra=load_extra,
        load_screen=load_screen)
  elif data_source == DataSource.PIXEL_HELP:
    tf.logging.info('Parsing test dataset')
    feature = _process_pixel_help(feature_dict, data_source,
                                  load_dom_dist=load_dom_dist,
                                  load_extra=load_extra)
  else:
    raise ValueError('Unsupported datasource %s' % str(data_source))
  # Remove padding from "task"
  feature['task'] = tf.boolean_mask(feature['task'],
                                    tf.not_equal(feature['task'], 0))
  feature['obj_screen_pos'] = tf.to_int32(
      feature['obj_screen_pos'] * (max_pixel_pos - 1))
  # Appending EOS and padding to match the appended length
  if append_eos:
    feature['input_refs'] = tf.pad(feature['input_refs'], [[0, 1], [0, 0]])
    feature['obj_refs'] = tf.pad(feature['obj_refs'], [[0, 1], [0, 0]])
    step_num = tf.size(feature['task'])
    feature['verb_refs'] = tf.concat(
        [feature['verb_refs'], [[step_num, step_num + 1]]], axis=0)
    feature['task'] = tf.pad(feature['task'], [[0, 1]], constant_values=1)
    feature['obj_text'] = tf.pad(feature['obj_text'], [[0, 1], [0, 0], [0, 0]])
    feature['obj_clickable'] = tf.pad(feature['obj_clickable'],
                                      [[0, 1], [0, 0]])
    feature['obj_type'] = tf.pad(
        feature['obj_type'], [[0, 1], [0, 0]], constant_values=-1)
    feature['obj_screen_pos'] = tf.pad(feature['obj_screen_pos'],
                                       [[0, 1], [0, 0], [0, 0]])
    feature['obj_dom_pos'] = tf.pad(feature['obj_dom_pos'],
                                    [[0, 1], [0, 0], [0, 0]])
    if load_dom_dist:
      feature['obj_dom_dist'] = tf.pad(feature['obj_dom_dist'],
                                       [[0, 1], [0, 0], [0, 0]])
    feature['objects'] = tf.pad(feature['objects'], [[0, 1]])
    feature['verbs'] = tf.pad(feature['verbs'], [[0, 1]])
  return feature
Esempio n. 10
0
    def _build_sampler(self):
        """Build the sampler ops and the log_prob ops."""
        hidden_size = self.params.controller_hidden_size
        num_layers = self.params.controller_num_layers

        arc_seq = []
        sample_log_probs = []
        sample_entropy = []
        all_h = [tf.zeros([1, hidden_size], dtype=tf.float32)]
        all_h_w = [tf.zeros([1, hidden_size], dtype=tf.float32)]

        # sampler ops
        inputs = self.g_emb
        prev_c = tf.zeros([1, hidden_size], dtype=tf.float32)
        prev_h = tf.zeros([1, hidden_size], dtype=tf.float32)

        inputs = self.g_emb
        for layer_id in range(1, num_layers + 1):
            next_c, next_h = _lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h
            all_h.append(next_h)
            all_h_w.append(tf.matmul(next_h, self.attn_w_1))

            query = tf.matmul(next_h, self.attn_w_2)
            query = query + tf.concat(all_h_w[:-1], axis=0)
            query = tf.tanh(query)
            logits = tf.matmul(query, self.attn_v)
            logits = tf.reshape(logits, [1, layer_id])

            if self.params.controller_temperature:
                logits /= self.params.controller_temperature
            if self.params.controller_tanh_constant:
                logits = self.params.controller_tanh_constant * tf.tanh(logits)
            diff = tf.to_float(layer_id - tf.range(0, layer_id))**2
            logits -= tf.reshape(diff, [1, layer_id]) / 6.0

            skip_index = tf.multinomial(logits, 1)
            skip_index = tf.to_int32(skip_index)
            skip_index = tf.reshape(skip_index, [1])
            arc_seq.append(skip_index)

            log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=skip_index)
            sample_log_probs.append(log_prob)

            entropy = log_prob * tf.exp(-log_prob)
            sample_entropy.append(tf.stop_gradient(entropy))

            inputs = tf.nn.embedding_lookup(tf.concat(all_h[:-1], axis=0),
                                            skip_index)
            inputs /= (0.1 + tf.to_float(layer_id - skip_index))

            next_c, next_h = _lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h
            logits = tf.matmul(next_h, self.w_emb, transpose_b=True)
            if self.params.controller_temperature:
                logits /= self.params.controller_temperature
            if self.params.controller_tanh_constant:
                logits = self.params.controller_tanh_constant * tf.tanh(logits)
            func = tf.multinomial(logits, 1)
            func = tf.to_int32(func)
            func = tf.reshape(func, [1])
            arc_seq.append(func)
            log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=func)
            sample_log_probs.append(log_prob)
            entropy = log_prob * tf.exp(-log_prob)
            sample_entropy.append(tf.stop_gradient(entropy))
            inputs = tf.nn.embedding_lookup(self.w_emb, func)

        arc_seq = tf.concat(arc_seq, axis=0)
        self.sample_arc = arc_seq

        self.sample_log_probs = tf.concat(sample_log_probs, axis=0)
        self.ppl = tf.exp(tf.reduce_mean(self.sample_log_probs))

        sample_entropy = tf.concat(sample_entropy, axis=0)
        self.sample_entropy = tf.reduce_sum(sample_entropy)

        self.all_h = all_h
Esempio n. 11
0
def eager_eval_loop(detection_model,
                    configs,
                    eval_dataset,
                    use_tpu=False,
                    postprocess_on_cpu=False,
                    global_step=None):
    """Evaluate the model eagerly on the evaluation dataset.

  This method will compute the evaluation metrics specified in the configs on
  the entire evaluation dataset, then return the metrics. It will also log
  the metrics to TensorBoard.

  Args:
    detection_model: A DetectionModel (based on Keras) to evaluate.
    configs: Object detection configs that specify the evaluators that should
      be used, as well as whether regularization loss should be included and
      if bfloat16 should be used on TPUs.
    eval_dataset: Dataset containing evaluation data.
    use_tpu: Whether a TPU is being used to execute the model for evaluation.
    postprocess_on_cpu: Whether model postprocessing should happen on
      the CPU when using a TPU to execute the model.
    global_step: A variable containing the training step this model was trained
      to. Used for logging purposes.

  Returns:
    A dict of evaluation metrics representing the results of this evaluation.
  """
    train_config = configs['train_config']
    eval_input_config = configs['eval_input_config']
    eval_config = configs['eval_config']
    add_regularization_loss = train_config.add_regularization_loss

    is_training = False
    detection_model._is_training = is_training  # pylint: disable=protected-access
    tf.keras.backend.set_learning_phase(is_training)

    evaluator_options = eval_util.evaluator_options_from_eval_config(
        eval_config)

    class_agnostic_category_index = (
        label_map_util.create_class_agnostic_category_index())
    class_agnostic_evaluators = eval_util.get_evaluators(
        eval_config, list(class_agnostic_category_index.values()),
        evaluator_options)

    class_aware_evaluators = None
    if eval_input_config.label_map_path:
        class_aware_category_index = (
            label_map_util.create_category_index_from_labelmap(
                eval_input_config.label_map_path))
        class_aware_evaluators = eval_util.get_evaluators(
            eval_config, list(class_aware_category_index.values()),
            evaluator_options)

    evaluators = None
    loss_metrics = {}

    @tf.function
    def compute_eval_dict(features, labels):
        """Compute the evaluation result on an image."""
        # For evaling on train data, it is necessary to check whether groundtruth
        # must be unpadded.
        boxes_shape = (labels[
            fields.InputDataFields.groundtruth_boxes].get_shape().as_list())
        unpad_groundtruth_tensors = boxes_shape[1] is not None and not use_tpu
        labels = model_lib.unstack_batch(
            labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)

        losses_dict, prediction_dict = _compute_losses_and_predictions_dicts(
            detection_model, features, labels, add_regularization_loss)

        def postprocess_wrapper(args):
            return detection_model.postprocess(args[0], args[1])

        # TODO(kaftan): Depending on how postprocessing will work for TPUS w/
        ## TPUStrategy, may be good to move wrapping to a utility method
        if use_tpu and postprocess_on_cpu:
            detections = contrib_tpu.outside_compilation(
                postprocess_wrapper,
                (prediction_dict,
                 features[fields.InputDataFields.true_image_shape]))
        else:
            detections = postprocess_wrapper(
                (prediction_dict,
                 features[fields.InputDataFields.true_image_shape]))

        class_agnostic = (fields.DetectionResultFields.detection_classes
                          not in detections)
        # TODO(kaftan) (or anyone): move `_prepare_groundtruth_for_eval to eval_util
        ## and call this from there.
        groundtruth = model_lib._prepare_groundtruth_for_eval(  # pylint: disable=protected-access
            detection_model, class_agnostic,
            eval_input_config.max_number_of_boxes)
        use_original_images = fields.InputDataFields.original_image in features
        if use_original_images:
            eval_images = features[fields.InputDataFields.original_image]
            true_image_shapes = tf.slice(
                features[fields.InputDataFields.true_image_shape], [0, 0],
                [-1, 3])
            original_image_spatial_shapes = features[
                fields.InputDataFields.original_image_spatial_shape]
        else:
            eval_images = features[fields.InputDataFields.image]
            true_image_shapes = None
            original_image_spatial_shapes = None

        eval_dict = eval_util.result_dict_for_batched_example(
            eval_images,
            features[inputs.HASH_KEY],
            detections,
            groundtruth,
            class_agnostic=class_agnostic,
            scale_to_absolute=True,
            original_image_spatial_shapes=original_image_spatial_shapes,
            true_image_shapes=true_image_shapes)

        return eval_dict, losses_dict, class_agnostic

    agnostic_categories = label_map_util.create_class_agnostic_category_index()
    per_class_categories = label_map_util.create_category_index_from_labelmap(
        eval_input_config.label_map_path)
    keypoint_edges = [(kp.start, kp.end) for kp in eval_config.keypoint_edge]

    for i, (features, labels) in enumerate(eval_dataset):
        eval_dict, losses_dict, class_agnostic = compute_eval_dict(
            features, labels)

        if class_agnostic:
            category_index = agnostic_categories
        else:
            category_index = per_class_categories

        if i % 100 == 0:
            tf.logging.info('Finished eval step %d', i)

        use_original_images = fields.InputDataFields.original_image in features
        if use_original_images and i < eval_config.num_visualizations:
            sbys_image_list = vutils.draw_side_by_side_evaluation_image(
                eval_dict,
                category_index=category_index,
                max_boxes_to_draw=eval_config.max_num_boxes_to_visualize,
                min_score_thresh=eval_config.min_score_threshold,
                use_normalized_coordinates=False,
                keypoint_edges=keypoint_edges or None)
            sbys_images = tf.concat(sbys_image_list, axis=0)
            tf.compat.v2.summary.image(
                name='eval_side_by_side_' + str(i),
                step=global_step,
                data=sbys_images,
                max_outputs=eval_config.num_visualizations)
            if eval_util.has_densepose(eval_dict):
                dp_image_list = vutils.draw_densepose_visualizations(eval_dict)
                dp_images = tf.concat(dp_image_list, axis=0)
                tf.compat.v2.summary.image(
                    name='densepose_detections_' + str(i),
                    step=global_step,
                    data=dp_images,
                    max_outputs=eval_config.num_visualizations)

        if evaluators is None:
            if class_agnostic:
                evaluators = class_agnostic_evaluators
            else:
                evaluators = class_aware_evaluators

        for evaluator in evaluators:
            evaluator.add_eval_dict(eval_dict)

        for loss_key, loss_tensor in iter(losses_dict.items()):
            if loss_key not in loss_metrics:
                loss_metrics[loss_key] = tf.keras.metrics.Mean()
            # Skip the loss with value equal or lower than 0.0 when calculating the
            # average loss since they don't usually reflect the normal loss values
            # causing spurious average loss value.
            if loss_tensor <= 0.0:
                continue
            loss_metrics[loss_key].update_state(loss_tensor)

    eval_metrics = {}

    for evaluator in evaluators:
        eval_metrics.update(evaluator.evaluate())
    for loss_key in loss_metrics:
        eval_metrics[loss_key] = loss_metrics[loss_key].result()

    eval_metrics = {str(k): v for k, v in eval_metrics.items()}
    tf.logging.info('Eval metrics at step %d', global_step)
    for k in eval_metrics:
        tf.compat.v2.summary.scalar(k, eval_metrics[k], step=global_step)
        tf.logging.info('\t+ %s: %f', k, eval_metrics[k])

    return eval_metrics
Esempio n. 12
0
    def _build(self, features, parent_transform=None, parent_presence=None):
        """Builds the module.

    Args:
      features: Tensor of encodings of shape [B, n_enc_dims].
      parent_transform: Tuple of (matrix, vector).
      parent_presence: pass

    Returns:
      A bunch of stuff.
    """
        batch_size = features.shape.as_list()[0]
        batch_shape = [batch_size, self._n_caps]

        # Predict capsule and additional params from the input encoding.
        # [B, n_caps, n_caps_dims]
        if self._n_caps_params is not None:

            # Use separate parameters to do predictions for different capsules.
            mlp = BatchMLP(self._n_hiddens + [self._n_caps_params])
            raw_caps_params = mlp(features)

            caps_params = tf.reshape(raw_caps_params,
                                     batch_shape + [self._n_caps_params])

        else:
            assert features.shape[:2].as_list() == batch_shape
            caps_params = features

        if self._caps_dropout_rate == 0.0:
            caps_exist = tf.ones(batch_shape + [1], dtype=tf.float32)
        else:
            pmf = tfd.Bernoulli(1. - self._caps_dropout_rate, dtype=tf.float32)
            caps_exist = pmf.sample(batch_shape + [1])

        caps_params = tf.concat([caps_params, caps_exist], -1)

        output_shapes = (
            [self._n_votes, self._n_transform_params],  # CPR_dynamic
            [1, self._n_transform_params],  # CCR
            [1],  # per-capsule presence
            [self._n_votes],  # per-vote-presence
            [self._n_votes],  # per-vote scale
        )

        splits = [np.prod(i).astype(np.int32) for i in output_shapes]
        n_outputs = sum(splits)

        # we don't use bias in the output layer in order to separate the static
        # and dynamic parts of the CPR
        caps_mlp = BatchMLP([self._n_hiddens, n_outputs], use_bias=False)
        all_params = caps_mlp(caps_params)
        all_params = tf.split(all_params, splits, -1)
        res = [
            tf.reshape(i, batch_shape + s)
            for (i, s) in zip(all_params, output_shapes)
        ]

        cpr_dynamic = res[0]

        # add bias to all remaining outputs
        res = [snt.AddBias()(i) for i in res[1:]]
        ccr, pres_logit_per_caps, pres_logit_per_vote, scale_per_vote = res

        if self._caps_dropout_rate != 0.0:
            pres_logit_per_caps += math_ops.safe_log(caps_exist)

        cpr_static = tf.get_variable(
            'cpr_static',
            shape=[1, self._n_caps, self._n_votes, self._n_transform_params])

        def add_noise(tensor):
            """Adds noise to tensors."""
            if self._noise_type == 'uniform':
                noise = tf.random.uniform(tensor.shape, minval=-.5,
                                          maxval=.5) * self._noise_scale

            elif self._noise_type == 'logistic':
                pdf = tfd.Logistic(0., self._noise_scale)
                noise = pdf.sample(tensor.shape)

            elif not self._noise_type:
                noise = 0.

            else:
                raise ValueError('Invalid noise type: "{}".'.format(
                    self._noise_type))

            return tensor + noise

        pres_logit_per_caps = add_noise(pres_logit_per_caps)
        pres_logit_per_vote = add_noise(pres_logit_per_vote)

        # this is for hierarchical
        if parent_transform is None:
            ccr = self._make_transform(ccr)
        else:
            ccr = parent_transform

        if not self._deformations:
            cpr_dynamic = tf.zeros_like(cpr_dynamic)

        cpr = self._make_transform(cpr_dynamic + cpr_static)

        ccr_per_vote = snt.TileByDim([2], [self._n_votes])(ccr)
        votes = tf.matmul(ccr_per_vote, cpr)

        if parent_presence is not None:
            pres_per_caps = parent_presence
        else:
            pres_per_caps = tf.nn.sigmoid(pres_logit_per_caps)

        pres_per_vote = pres_per_caps * tf.nn.sigmoid(pres_logit_per_vote)

        if self._learn_vote_scale:
            # for numerical stability
            scale_per_vote = tf.nn.softplus(scale_per_vote + .5) + 1e-2
        else:
            scale_per_vote = tf.zeros_like(scale_per_vote) + 1.

        return AttrDict(
            vote=votes,
            scale=scale_per_vote,
            vote_presence=pres_per_vote,
            pres_logit_per_caps=pres_logit_per_caps,
            pres_logit_per_vote=pres_logit_per_vote,
            dynamic_weights_l2=tf.nn.l2_loss(cpr_dynamic) / batch_size,
            raw_caps_params=raw_caps_params,
            raw_caps_features=features,
        )
Esempio n. 13
0
    def _build(self, x, presence=None):

        # x is [B, n_input_points, n_input_dims]
        batch_size, n_input_points = x.shape[:2].as_list()

        # votes and scale have shape [B, n_caps, n_input_points, n_input_dims|1]
        # since scale is a per-caps scalar and we have one vote per capsule
        vote_component_pdf = self._get_pdf(self._votes,
                                           tf.expand_dims(self._scales, -1))

        # expand along caps dimensions -> [B, 1, n_input_points, n_input_dims]
        expanded_x = tf.expand_dims(x, 1)
        vote_log_prob_per_dim = vote_component_pdf.log_prob(expanded_x)
        # [B, n_caps, n_input_points]
        vote_log_prob = tf.reduce_sum(vote_log_prob_per_dim, -1)
        dummy_vote_log_prob = tf.zeros([batch_size, 1, n_input_points])
        dummy_vote_log_prob -= 2. * tf.log(10.)

        # [B, n_caps + 1, n_input_points]
        vote_log_prob = tf.concat([vote_log_prob, dummy_vote_log_prob], 1)

        # [B, n_caps, n_input_points]
        mixing_logits = math_ops.safe_log(self._vote_presence_prob)

        dummy_logit = tf.zeros([batch_size, 1, 1]) - 2. * tf.log(10.)
        dummy_logit = snt.TileByDim([2], [n_input_points])(dummy_logit)

        # [B, n_caps + 1, n_input_points]
        mixing_logits = tf.concat([mixing_logits, dummy_logit], 1)
        mixing_log_prob = mixing_logits - tf.reduce_logsumexp(
            mixing_logits, 1, keepdims=True)
        # [B, n_input_points]
        mixture_log_prob_per_point = tf.reduce_logsumexp(
            mixing_logits + vote_log_prob, 1)

        if presence is not None:
            presence = tf.to_float(presence)
            mixture_log_prob_per_point *= presence

        # [B,]
        mixture_log_prob_per_example\
          = tf.reduce_sum(mixture_log_prob_per_point, 1)

        # []
        mixture_log_prob_per_batch = tf.reduce_mean(
            mixture_log_prob_per_example)

        # [B, n_caps + 1, n_input_points]
        posterior_mixing_logits_per_point = mixing_logits + vote_log_prob

        # [B, n_input_points]
        winning_vote_idx = tf.argmax(posterior_mixing_logits_per_point[:, :-1],
                                     1)

        batch_idx = tf.expand_dims(tf.range(batch_size, dtype=tf.int64), 1)
        batch_idx = snt.TileByDim([1], [n_input_points])(batch_idx)

        point_idx = tf.expand_dims(tf.range(n_input_points, dtype=tf.int64), 0)
        point_idx = snt.TileByDim([0], [batch_size])(point_idx)

        idx = tf.stack([batch_idx, winning_vote_idx, point_idx], -1)
        winning_vote = tf.gather_nd(self._votes, idx)
        winning_pres = tf.gather_nd(self._vote_presence_prob, idx)
        vote_presence = tf.greater(mixing_logits[:, :-1], mixing_logits[:,
                                                                        -1:])

        # the first four votes belong to the square
        is_from_capsule = winning_vote_idx // self._n_votes

        posterior_mixing_probs = tf.nn.softmax(
            posterior_mixing_logits_per_point, 1)

        dummy_vote = tf.get_variable('dummy_vote',
                                     shape=self._votes[:1, :1].shape)
        dummy_vote = snt.TileByDim([0], [batch_size])(dummy_vote)
        dummy_pres = tf.zeros([batch_size, 1, n_input_points])

        votes = tf.concat((self._votes, dummy_vote), 1)
        pres = tf.concat([self._vote_presence_prob, dummy_pres], 1)

        soft_winner = tf.reduce_sum(
            tf.expand_dims(posterior_mixing_probs, -1) * votes, 1)
        soft_winner_pres = tf.reduce_sum(posterior_mixing_probs * pres, 1)

        posterior_mixing_probs = tf.transpose(posterior_mixing_probs[:, :-1],
                                              (0, 2, 1))

        assert winning_vote.shape == x.shape

        return self.OutputTuple(
            log_prob=mixture_log_prob_per_batch,
            vote_presence=tf.to_float(vote_presence),
            winner=winning_vote,
            winner_pres=winning_pres,
            soft_winner=soft_winner,
            soft_winner_pres=soft_winner_pres,
            posterior_mixing_probs=posterior_mixing_probs,
            is_from_capsule=is_from_capsule,
            mixing_logits=mixing_logits,
            mixing_log_prob=mixing_log_prob,
        )
Esempio n. 14
0
    def _build(self, x, presence=None):

        batch_size, n_input_points = x.shape[:2].as_list()

        # we don't know what order the initial points came in, so we need to create
        # a big mixture of all votes for every input point
        # [B, 1, n_votes, n_input_dims]
        expanded_votes = tf.expand_dims(self._votes, 1)
        expanded_scale = tf.expand_dims(tf.expand_dims(self._scales, 1), -1)
        vote_component_pdf = self._get_pdf(expanded_votes, expanded_scale)

        # [B, n_points, n_caps, n_votes, n_input_dims]
        expanded_x = tf.expand_dims(x, 2)
        vote_log_prob_per_dim = vote_component_pdf.log_prob(expanded_x)
        # [B, n_points, n_votes]
        vote_log_prob = tf.reduce_sum(vote_log_prob_per_dim, -1)
        dummy_vote_log_prob = tf.zeros([batch_size, n_input_points, 1])
        dummy_vote_log_prob -= 2. * tf.log(10.)
        vote_log_prob = tf.concat([vote_log_prob, dummy_vote_log_prob], 2)

        # [B, n_points, n_votes]
        mixing_logits = math_ops.safe_log(self._vote_presence_prob)

        dummy_logit = tf.zeros([batch_size, 1]) - 2. * tf.log(10.)
        mixing_logits = tf.concat([mixing_logits, dummy_logit], 1)

        mixing_log_prob = mixing_logits - tf.reduce_logsumexp(
            mixing_logits, 1, keepdims=True)

        expanded_mixing_logits = tf.expand_dims(mixing_log_prob, 1)
        mixture_log_prob_per_component\
          = tf.reduce_logsumexp(expanded_mixing_logits + vote_log_prob, 2)

        if presence is not None:
            presence = tf.to_float(presence)
            mixture_log_prob_per_component *= presence

        mixture_log_prob_per_example\
          = tf.reduce_sum(mixture_log_prob_per_component, 1)

        mixture_log_prob_per_batch = tf.reduce_mean(
            mixture_log_prob_per_example)

        # [B, n_points, n_votes]
        posterior_mixing_logits_per_point = expanded_mixing_logits + vote_log_prob
        # [B, n_points]
        winning_vote_idx = tf.argmax(
            posterior_mixing_logits_per_point[:, :, :-1], 2)

        batch_idx = tf.expand_dims(tf.range(batch_size, dtype=tf.int64), -1)
        batch_idx = snt.TileByDim([1], [winning_vote_idx.shape[-1]])(batch_idx)

        idx = tf.stack([batch_idx, winning_vote_idx], -1)
        winning_vote = tf.gather_nd(self._votes, idx)
        winning_pres = tf.gather_nd(self._vote_presence_prob, idx)
        vote_presence = tf.greater(mixing_logits[:, :-1], mixing_logits[:,
                                                                        -1:])

        # the first four votes belong to the square
        is_from_capsule = winning_vote_idx // self._n_votes

        posterior_mixing_probs = tf.nn.softmax(
            posterior_mixing_logits_per_point, -1)[Ellipsis, :-1]

        assert winning_vote.shape == x.shape

        return self.OutputTuple(
            log_prob=mixture_log_prob_per_batch,
            vote_presence=tf.to_float(vote_presence),
            winner=winning_vote,
            winner_pres=winning_pres,
            is_from_capsule=is_from_capsule,
            mixing_logits=mixing_logits,
            mixing_log_prob=mixing_log_prob,
            # TODO(adamrk): this is broken
            soft_winner=tf.zeros_like(winning_vote),
            soft_winner_pres=tf.zeros_like(winning_pres),
            posterior_mixing_probs=posterior_mixing_probs,
        )
Esempio n. 15
0
    def run(
        self,
        *in_arrays: Tuple[Union[np.ndarray, None], ...],
        input_transform: dict = None,
        output_transform: dict = None,
        return_as_list: bool = False,
        print_progress: bool = False,
        minibatch_size: int = None,
        num_gpus: int = 1,
        assume_frozen: bool = False,
        **dynamic_kwargs
    ) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]:
        """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s).

        Args:
            input_transform:    A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network.
                                The dict must contain a 'func' field that points to a top-level function. The function is called with the input
                                TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
            output_transform:   A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network.
                                The dict must contain a 'func' field that points to a top-level function. The function is called with the output
                                TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
            return_as_list:     True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs.
            print_progress:     Print progress to the console? Useful for very large input arrays.
            minibatch_size:     Maximum minibatch size to use, None = disable batching.
            num_gpus:           Number of GPUs to use.
            assume_frozen:      Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls.
            dynamic_kwargs:     Additional keyword arguments to be passed into the network build function.
        """
        assert len(in_arrays) == self.num_inputs
        assert not all(arr is None for arr in in_arrays)
        assert input_transform is None or util.is_top_level_function(
            input_transform["func"])
        assert output_transform is None or util.is_top_level_function(
            output_transform["func"])
        output_transform, dynamic_kwargs = _handle_legacy_output_transforms(
            output_transform, dynamic_kwargs)
        num_items = in_arrays[0].shape[0]
        if minibatch_size is None:
            minibatch_size = num_items

        # Construct unique hash key from all arguments that affect the TensorFlow graph.
        key = dict(input_transform=input_transform,
                   output_transform=output_transform,
                   num_gpus=num_gpus,
                   assume_frozen=assume_frozen,
                   dynamic_kwargs=dynamic_kwargs)

        def unwind_key(obj):
            if isinstance(obj, dict):
                return [(key, unwind_key(value))
                        for key, value in sorted(obj.items())]
            if callable(obj):
                return util.get_top_level_function_name(obj)
            return obj

        key = repr(unwind_key(key))

        # Build graph.
        if key not in self._run_cache:
            with tfutil.absolute_name_scope(
                    self.scope + "/_Run"), tf.control_dependencies(None):
                with tf.device("/cpu:0"):
                    in_expr = [
                        tf.placeholder(tf.float32, name=name)
                        for name in self.input_names
                    ]
                    in_split = list(
                        zip(*[tf.split(x, num_gpus) for x in in_expr]))

                out_split = []
                for gpu in range(num_gpus):
                    with tf.device("/gpu:%d" % gpu):
                        net_gpu = self.clone() if assume_frozen else self
                        in_gpu = in_split[gpu]

                        if input_transform is not None:
                            in_kwargs = dict(input_transform)
                            in_gpu = in_kwargs.pop("func")(*in_gpu,
                                                           **in_kwargs)
                            in_gpu = [in_gpu] if tfutil.is_tf_expression(
                                in_gpu) else list(in_gpu)

                        assert len(in_gpu) == self.num_inputs
                        out_gpu = net_gpu.get_output_for(*in_gpu,
                                                         return_as_list=True,
                                                         **dynamic_kwargs)

                        if output_transform is not None:
                            out_kwargs = dict(output_transform)
                            out_gpu = out_kwargs.pop("func")(*out_gpu,
                                                             **out_kwargs)
                            out_gpu = [out_gpu] if tfutil.is_tf_expression(
                                out_gpu) else list(out_gpu)

                        assert len(out_gpu) == self.num_outputs
                        out_split.append(out_gpu)

                with tf.device("/cpu:0"):
                    out_expr = [
                        tf.concat(outputs, axis=0)
                        for outputs in zip(*out_split)
                    ]
                    self._run_cache[key] = in_expr, out_expr

        # Run minibatches.
        in_expr, out_expr = self._run_cache[key]
        out_arrays = [
            np.empty([num_items] + tfutil.shape_to_list(expr.shape)[1:],
                     expr.dtype.name) for expr in out_expr
        ]

        for mb_begin in range(0, num_items, minibatch_size):
            if print_progress:
                print("\r%d / %d" % (mb_begin, num_items), end="")

            mb_end = min(mb_begin + minibatch_size, num_items)
            mb_num = mb_end - mb_begin
            mb_in = [
                src[mb_begin:mb_end]
                if src is not None else np.zeros([mb_num] + shape[1:])
                for src, shape in zip(in_arrays, self.input_shapes)
            ]
            mb_out = tf.get_default_session().run(out_expr,
                                                  dict(zip(in_expr, mb_in)))

            for dst, src in zip(out_arrays, mb_out):
                dst[mb_begin:mb_end] = src

        # Done.
        if print_progress:
            print("\r%d / %d" % (num_items, num_items))

        if not return_as_list:
            out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(
                out_arrays)
        return out_arrays
Esempio n. 16
0
def _stitch(features):
  """Stitch features on the first dimension."""
  full_mask = tf.greater(features['task'], 1)
  step_mask = tf.reduce_any(full_mask, axis=-1)
  step_mask_exclude_last = tf.pad(step_mask,
                                  [[0, 0], [0, 1]],
                                  constant_values=False)[:, 1:]
  num_sequences = common_layers.shape_list(features['task'])[0]
  num_steps = common_layers.shape_list(features['task'])[1]
  connectors = tf.constant(PADDED_CONCATENATORS)
  # Select connectors
  connector_indices = tf.random.uniform(
      [num_sequences * num_steps], minval=0,
      maxval=len(PADDED_CONCATENATORS), dtype=tf.int32)
  selected_connectors = tf.reshape(
      tf.gather(connectors, connector_indices),
      [num_sequences, num_steps, len(PADDED_CONCATENATORS[0])])
  selected_connectors = tf.multiply(
      selected_connectors,
      tf.expand_dims(tf.to_int32(step_mask_exclude_last), 2),
      name='connector_mask')
  features['task'] = tf.concat([features['task'], selected_connectors], axis=-1)
  ref_offsets = tf.expand_dims(
      tf.cumsum(tf.reduce_sum(tf.to_int32(tf.greater(features['task'], 1)), -1),
                exclusive=True, axis=-1), 2)
  features['task'] = tf.reshape(features['task'], [num_sequences, -1])
  full_mask = tf.greater(features['task'], 1)
  full_mask_int = tf.to_int32(full_mask)
  indices = tf.where(tf.sequence_mask(lengths=tf.reduce_sum(full_mask_int, -1)))
  values = tf.boolean_mask(tf.reshape(features['task'], [-1]),
                           tf.reshape(full_mask, [-1]))
  sparse_task = tf.sparse.SparseTensor(
      indices=indices, values=values,
      dense_shape=tf.to_int64(tf.shape(features['task'])))
  # Stitch task and raw_task
  stitched_features = {}
  stitched_features['task'] = tf.sparse_tensor_to_dense(sparse_task)
  max_len = tf.reduce_max(
      tf.reduce_sum(tf.to_int32(tf.greater(stitched_features['task'], 1)), -1))
  stitched_features['task'] = stitched_features['task'][:, :max_len]
  if 'raw_task' in features:
    connector_strs = tf.reshape(
        tf.gather(tf.constant(CONCATENATORS_STR), connector_indices),
        [num_sequences, num_steps])
    masked_connector_strs = tf.where(
        step_mask_exclude_last,
        connector_strs, tf.fill(tf.shape(connector_strs), ''))
    stitched_features['raw_task'] = tf.strings.reduce_join(
        tf.strings.reduce_join(tf.concat([
            tf.expand_dims(features['raw_task'], 2),
            tf.expand_dims(masked_connector_strs, 2)], axis=2), axis=-1), -1)
  # Stitch screen sequences
  action_lengths = tf.reduce_sum(tf.to_int32(
      tf.greater(features['verb_refs'][:, :, 0, 1],
                 features['verb_refs'][:, :, 0, 0])), -1)
  max_action_length = tf.reduce_max(action_lengths)
  def _pad(tensor, padding_value=0):
    shape_list = common_layers.shape_list(tensor)
    assert len(shape_list) >= 2
    padding_list = [[0, 0], [0, 1]] + [[0, 0]] * (len(shape_list) - 2)
    return tf.pad(tensor[:, :max_action_length],
                  padding_list, constant_values=padding_value)
  for key in features.keys():
    if key.endswith('_refs'):
      features[key] = tf.squeeze(features[key], 2)
      ref_mask = tf.expand_dims(tf.to_int32(
          tf.not_equal(features[key][:, :, 0],
                       features[key][:, :, 1])), 2)
      stitched_features[key] = tf.multiply(
          (features[key] + ref_offsets), ref_mask, name='ref_mask')
      stitched_features[key] = _pad(stitched_features[key])
    elif key in ['verbs', 'objects', 'consumed', 'obj_dom_pos',
                 'obj_text', 'obj_type', 'obj_clickable', 'obj_screen_pos',
                 'verb_refs', 'obj_refs', 'input_refs', 'obj_dom_dist']:
      features[key] = tf.squeeze(features[key], 2)
      stitched_features[key] = features[key]
      stitched_features[key] = _pad(
          stitched_features[key],
          padding_value=-1 if key == 'obj_type' else 0)
    elif key not in ['task', 'raw_task']:
      stitched_features[key] = features[key][:, 0]
  # Append eos to 'task'
  stitched_features['task'] = tf.pad(stitched_features['task'],
                                     [[0, 0], [0, 1]])
  task_mask = tf.to_int32(tf.greater(stitched_features['task'], 1))
  task_eos_mask = tf.pad(task_mask, [[0, 0], [1, 0]], constant_values=1)[:, :-1]
  stitched_features['task'] = stitched_features['task'] + (
      task_eos_mask - task_mask)
  # Append eos
  verb_mask = tf.to_int32(tf.greater(stitched_features['verbs'], 1))
  verb_eos_mask = tf.pad(verb_mask, [[0, 0], [1, 0]], constant_values=1)[:, :-1]
  verb_eos = verb_eos_mask - verb_mask
  stitched_features['verbs'] = stitched_features['verbs'] + verb_eos
  # Append last step refs to 'verb_refs'
  task_lengths = tf.where(tf.equal(stitched_features['task'], 1))[:, 1]
  eos_pos = tf.to_int32(tf.stack([task_lengths, task_lengths + 1], axis=1))
  action_mask = tf.to_int32(
      tf.sequence_mask(action_lengths, max_action_length + 1))
  action_and_eos_mask = tf.pad(action_mask, [[0, 0], [1, 0]],
                               constant_values=1)[:, :-1]
  verb_ref_eos = action_and_eos_mask - action_mask
  eos_refs = tf.multiply(
      tf.tile(tf.expand_dims(eos_pos, 1), [1, max_action_length + 1, 1]),
      tf.expand_dims(verb_ref_eos, 2), name='verb_ref_eos')
  stitched_features['verb_refs'] += eos_refs
  return stitched_features
Esempio n. 17
0
    # Nt = 10
    t_np = np.linspace(0, 1, N)

    X, T = np.meshgrid(x_np, t_np)

    x = X.ravel()
    t = T.ravel()

    ## The construction phase

    zeros = tf.reshape(tf.convert_to_tensor(np.zeros(x.shape)), shape=(-1, 1))
    x = tf.reshape(tf.convert_to_tensor(x), shape=(-1, 1))
    t = tf.reshape(tf.convert_to_tensor(t), shape=(-1, 1))

    points = tf.concat([x, t], 1)

    num_iter = 10000
    num_hidden_neurons = [20, 20, 20]

    X = tf.convert_to_tensor(X)
    T = tf.convert_to_tensor(T)

    with tf.variable_scope('dnn'):
        num_hidden_layers = np.size(num_hidden_neurons)

        previous_layer = points

        for l in range(num_hidden_layers):
            current_layer = tf.layers.dense(previous_layer,
                                            num_hidden_neurons[l],
Esempio n. 18
0
 def _make_obj_screen_pos():
   return tf.concat([
       tf.reshape(feature_dict['ui_obj_cord_x_seq'], [1, -1, 2]),
       tf.reshape(feature_dict['ui_obj_cord_y_seq'], [1, -1, 2])
   ], 2)
Esempio n. 19
0
def inception_v3(inputs,
                 dropout_keep_prob=0.8,
                 num_classes=1000,
                 is_training=True,
                 restore_logits=True,
                 scope=''):
    """Latest Inception from http://arxiv.org/abs/1512.00567.

    "Rethinking the Inception Architecture for Computer Vision"

    Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
    Zbigniew Wojna

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    dropout_keep_prob: dropout keep_prob.
    num_classes: number of predicted classes.
    is_training: whether is training or not.
    restore_logits: whether or not the logits layers should be restored.
      Useful for fine-tuning a model with different num_classes.
    scope: Optional scope for name_scope.

  Returns:
    a list containing 'logits', 'aux_logits' Tensors.
  """
    # end_points will collect relevant activations for external use, for example
    # summaries or losses.
    end_points = {}
    with tf.name_scope(scope, 'inception_v3', [inputs]):
        with scopes.arg_scope(
            [ops.conv2d, ops.fc, ops.batch_norm, ops.dropout],
                is_training=is_training):
            with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool],
                                  stride=1,
                                  padding='VALID'):
                # 299 x 299 x 3
                end_points['conv0'] = ops.conv2d(inputs,
                                                 32, [3, 3],
                                                 stride=2,
                                                 scope='conv0')
                # 149 x 149 x 32
                end_points['conv1'] = ops.conv2d(end_points['conv0'],
                                                 32, [3, 3],
                                                 scope='conv1')
                # 147 x 147 x 32
                end_points['conv2'] = ops.conv2d(end_points['conv1'],
                                                 64, [3, 3],
                                                 padding='SAME',
                                                 scope='conv2')
                # 147 x 147 x 64
                end_points['pool1'] = ops.max_pool(end_points['conv2'], [3, 3],
                                                   stride=2,
                                                   scope='pool1')
                # 73 x 73 x 64
                end_points['conv3'] = ops.conv2d(end_points['pool1'],
                                                 80, [1, 1],
                                                 scope='conv3')
                # 73 x 73 x 80.
                end_points['conv4'] = ops.conv2d(end_points['conv3'],
                                                 192, [3, 3],
                                                 scope='conv4')
                # 71 x 71 x 192.
                end_points['pool2'] = ops.max_pool(end_points['conv4'], [3, 3],
                                                   stride=2,
                                                   scope='pool2')
                # 35 x 35 x 192.
                net = end_points['pool2']
            # Inception blocks
            with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool],
                                  stride=1,
                                  padding='SAME'):
                # mixed: 35 x 35 x 256.
                with tf.variable_scope('mixed_35x35x256a'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 64, [1, 1])
                    with tf.variable_scope('branch5x5'):
                        branch5x5 = ops.conv2d(net, 48, [1, 1])
                        branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
                    with tf.variable_scope('branch3x3dbl'):
                        branch3x3dbl = ops.conv2d(net, 64, [1, 1])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 32, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch5x5, branch3x3dbl, branch_pool], 3)
                    end_points['mixed_35x35x256a'] = net
                # mixed_1: 35 x 35 x 288.
                with tf.variable_scope('mixed_35x35x288a'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 64, [1, 1])
                    with tf.variable_scope('branch5x5'):
                        branch5x5 = ops.conv2d(net, 48, [1, 1])
                        branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
                    with tf.variable_scope('branch3x3dbl'):
                        branch3x3dbl = ops.conv2d(net, 64, [1, 1])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 64, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch5x5, branch3x3dbl, branch_pool], 3)
                    end_points['mixed_35x35x288a'] = net
                # mixed_2: 35 x 35 x 288.
                with tf.variable_scope('mixed_35x35x288b'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 64, [1, 1])
                    with tf.variable_scope('branch5x5'):
                        branch5x5 = ops.conv2d(net, 48, [1, 1])
                        branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
                    with tf.variable_scope('branch3x3dbl'):
                        branch3x3dbl = ops.conv2d(net, 64, [1, 1])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 64, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch5x5, branch3x3dbl, branch_pool], 3)
                    end_points['mixed_35x35x288b'] = net
                # mixed_3: 17 x 17 x 768.
                with tf.variable_scope('mixed_17x17x768a'):
                    with tf.variable_scope('branch3x3'):
                        branch3x3 = ops.conv2d(net,
                                               384, [3, 3],
                                               stride=2,
                                               padding='VALID')
                    with tf.variable_scope('branch3x3dbl'):
                        branch3x3dbl = ops.conv2d(net, 64, [1, 1])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
                        branch3x3dbl = ops.conv2d(branch3x3dbl,
                                                  96, [3, 3],
                                                  stride=2,
                                                  padding='VALID')
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.max_pool(net, [3, 3],
                                                   stride=2,
                                                   padding='VALID')
                    net = tf.concat([branch3x3, branch3x3dbl, branch_pool], 3)
                    end_points['mixed_17x17x768a'] = net
                # mixed4: 17 x 17 x 768.
                with tf.variable_scope('mixed_17x17x768b'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 192, [1, 1])
                    with tf.variable_scope('branch7x7'):
                        branch7x7 = ops.conv2d(net, 128, [1, 1])
                        branch7x7 = ops.conv2d(branch7x7, 128, [1, 7])
                        branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
                    with tf.variable_scope('branch7x7dbl'):
                        branch7x7dbl = ops.conv2d(net, 128, [1, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [1, 7])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
                    end_points['mixed_17x17x768b'] = net
                # mixed_5: 17 x 17 x 768.
                with tf.variable_scope('mixed_17x17x768c'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 192, [1, 1])
                    with tf.variable_scope('branch7x7'):
                        branch7x7 = ops.conv2d(net, 160, [1, 1])
                        branch7x7 = ops.conv2d(branch7x7, 160, [1, 7])
                        branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
                    with tf.variable_scope('branch7x7dbl'):
                        branch7x7dbl = ops.conv2d(net, 160, [1, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
                    end_points['mixed_17x17x768c'] = net
                # mixed_6: 17 x 17 x 768.
                with tf.variable_scope('mixed_17x17x768d'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 192, [1, 1])
                    with tf.variable_scope('branch7x7'):
                        branch7x7 = ops.conv2d(net, 160, [1, 1])
                        branch7x7 = ops.conv2d(branch7x7, 160, [1, 7])
                        branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
                    with tf.variable_scope('branch7x7dbl'):
                        branch7x7dbl = ops.conv2d(net, 160, [1, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
                    end_points['mixed_17x17x768d'] = net
                # mixed_7: 17 x 17 x 768.
                with tf.variable_scope('mixed_17x17x768e'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 192, [1, 1])
                    with tf.variable_scope('branch7x7'):
                        branch7x7 = ops.conv2d(net, 192, [1, 1])
                        branch7x7 = ops.conv2d(branch7x7, 192, [1, 7])
                        branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
                    with tf.variable_scope('branch7x7dbl'):
                        branch7x7dbl = ops.conv2d(net, 192, [1, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1])
                        branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
                    end_points['mixed_17x17x768e'] = net
                # Auxiliary Head logits
                aux_logits = tf.identity(end_points['mixed_17x17x768e'])
                with tf.variable_scope('aux_logits'):
                    aux_logits = ops.avg_pool(aux_logits, [5, 5],
                                              stride=3,
                                              padding='VALID')
                    aux_logits = ops.conv2d(aux_logits,
                                            128, [1, 1],
                                            scope='proj')
                    # Shape of feature map before the final layer.
                    shape = aux_logits.get_shape()
                    aux_logits = ops.conv2d(aux_logits,
                                            768,
                                            shape[1:3],
                                            stddev=0.01,
                                            padding='VALID')
                    aux_logits = ops.flatten(aux_logits)
                    aux_logits = ops.fc(aux_logits,
                                        num_classes,
                                        activation=None,
                                        stddev=0.001,
                                        restore=restore_logits)
                    end_points['aux_logits'] = aux_logits
                # mixed_8: 8 x 8 x 1280.
                # Note that the scope below is not changed to not void previous
                # checkpoints.
                # (TODO) Fix the scope when appropriate.
                with tf.variable_scope('mixed_17x17x1280a'):
                    with tf.variable_scope('branch3x3'):
                        branch3x3 = ops.conv2d(net, 192, [1, 1])
                        branch3x3 = ops.conv2d(branch3x3,
                                               320, [3, 3],
                                               stride=2,
                                               padding='VALID')
                    with tf.variable_scope('branch7x7x3'):
                        branch7x7x3 = ops.conv2d(net, 192, [1, 1])
                        branch7x7x3 = ops.conv2d(branch7x7x3, 192, [1, 7])
                        branch7x7x3 = ops.conv2d(branch7x7x3, 192, [7, 1])
                        branch7x7x3 = ops.conv2d(branch7x7x3,
                                                 192, [3, 3],
                                                 stride=2,
                                                 padding='VALID')
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.max_pool(net, [3, 3],
                                                   stride=2,
                                                   padding='VALID')
                    net = tf.concat([branch3x3, branch7x7x3, branch_pool], 3)
                    end_points['mixed_17x17x1280a'] = net
                # mixed_9: 8 x 8 x 2048.
                with tf.variable_scope('mixed_8x8x2048a'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 320, [1, 1])
                    with tf.variable_scope('branch3x3'):
                        branch3x3 = ops.conv2d(net, 384, [1, 1])
                        branch3x3 = tf.concat([
                            ops.conv2d(branch3x3, 384, [1, 3]),
                            ops.conv2d(branch3x3, 384, [3, 1])
                        ], 3)
                    with tf.variable_scope('branch3x3dbl'):
                        branch3x3dbl = ops.conv2d(net, 448, [1, 1])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3])
                        branch3x3dbl = tf.concat([
                            ops.conv2d(branch3x3dbl, 384, [1, 3]),
                            ops.conv2d(branch3x3dbl, 384, [3, 1])
                        ], 3)
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch3x3, branch3x3dbl, branch_pool], 3)
                    end_points['mixed_8x8x2048a'] = net
                # mixed_10: 8 x 8 x 2048.
                with tf.variable_scope('mixed_8x8x2048b'):
                    with tf.variable_scope('branch1x1'):
                        branch1x1 = ops.conv2d(net, 320, [1, 1])
                    with tf.variable_scope('branch3x3'):
                        branch3x3 = ops.conv2d(net, 384, [1, 1])
                        branch3x3 = tf.concat([
                            ops.conv2d(branch3x3, 384, [1, 3]),
                            ops.conv2d(branch3x3, 384, [3, 1])
                        ], 3)
                    with tf.variable_scope('branch3x3dbl'):
                        branch3x3dbl = ops.conv2d(net, 448, [1, 1])
                        branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3])
                        branch3x3dbl = tf.concat([
                            ops.conv2d(branch3x3dbl, 384, [1, 3]),
                            ops.conv2d(branch3x3dbl, 384, [3, 1])
                        ], 3)
                    with tf.variable_scope('branch_pool'):
                        branch_pool = ops.avg_pool(net, [3, 3])
                        branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
                    net = tf.concat(
                        [branch1x1, branch3x3, branch3x3dbl, branch_pool], 3)
                    end_points['mixed_8x8x2048b'] = net
                # Final pooling and prediction
                with tf.variable_scope('logits'):
                    shape = net.get_shape()
                    net = ops.avg_pool(net,
                                       shape[1:3],
                                       padding='VALID',
                                       scope='pool')
                    # 1 x 1 x 2048
                    net = ops.dropout(net, dropout_keep_prob, scope='dropout')
                    net = ops.flatten(net, scope='flatten')
                    # 2048
                    logits = ops.fc(net,
                                    num_classes,
                                    activation=None,
                                    scope='logits',
                                    restore=restore_logits)
                    # 1000
                    end_points['logits'] = logits
                    end_points['predictions'] = tf.nn.softmax(
                        logits, name='predictions')
            return logits, end_points
Esempio n. 20
0
def _process_pixel_help(feature_dict, data_source, load_dom_dist=False,
                        load_extra=False):
  """Processes testing data feature dictionary.

  Args:
    feature_dict: feature dictionary
    data_source: TEST_PIXEL_HELP
    load_dom_dist: whether to load the dom distance feature.
    load_extra: whether to load the extra data for debugging.
  Returns:
    A processed feature dictionary.
  """
  step_num = tf.size(feature_dict['verb_id_seq'])
  feature = {
      'task':
          tf.reshape(feature_dict['instruction_word_id_seq'], [-1]),
      'obj_text':
          tf.reshape(feature_dict['ui_obj_word_id_seq'], [
              step_num, MAX_UI_OBJECT_NUM[data_source],
              MAX_TOKEN_NUM[data_source]
          ]),
      'obj_type':
          tf.reshape(feature_dict['ui_obj_type_id_seq'],
                     [step_num, MAX_UI_OBJECT_NUM[data_source]]),
      'obj_clickable':
          tf.reshape(feature_dict['ui_obj_clickable_seq'],
                     [step_num, MAX_UI_OBJECT_NUM[data_source]]),
      # pylint: disable=g-long-ternary
      'obj_screen_pos': (
          tf.reshape(tf.concat([
              tf.reshape(feature_dict['ui_obj_cord_x_seq'], [step_num, -1, 2]),
              tf.reshape(feature_dict['ui_obj_cord_y_seq'], [step_num, -1, 2])
          ], axis=2), [step_num, MAX_UI_OBJECT_NUM[data_source], 4])),
      'obj_dom_pos':
          tf.reshape(feature_dict['ui_obj_dom_location_seq'],
                     [step_num, MAX_UI_OBJECT_NUM[data_source], 3]),
      'verbs':
          tf.reshape(feature_dict['verb_id_seq'], [step_num]),
      'objects':
          tf.reshape(feature_dict['ui_target_id_seq'], [step_num]),
      'input_refs':
          tf.reshape(feature_dict['input_str_position_seq'], [step_num, 2]),
      'obj_refs':
          tf.reshape(feature_dict['obj_desc_position_seq'], [step_num, 2]),
      'verb_refs':  # No data for Pixel on the field
          tf.zeros([step_num, 2], tf.int32),
      'agreement_count':
          tf.constant(100, dtype=tf.int32),
  }
  if load_dom_dist:
    feature['obj_dom_dist'] = tf.reshape(
        feature_dict['ui_obj_dom_distance'],
        [step_num, MAX_UI_OBJECT_NUM[data_source],
         MAX_UI_OBJECT_NUM[data_source]])
  feature['rule'] = tf.constant(5, dtype=tf.int32)
  if load_extra:
    feature['task_id'] = tf.reshape(feature_dict['task_id'], [])
    feature['raw_task'] = tf.reshape(feature_dict['instruction_str'], [])
    feature['obj_raw_text'] = tf.reshape(
        feature_dict['ui_obj_str_seq'],
        [step_num, MAX_UI_OBJECT_NUM[data_source]])
  feature['data_source'] = tf.constant(2, dtype=tf.int32)
  return feature
    def crop_proposal():
        rand_vec = lambda minval, maxval: tf.random_uniform(shape=(
            ssd_constants.NUM_CROP_PASSES, 1),
                                                            minval=minval,
                                                            maxval=maxval,
                                                            dtype=tf.float32)

        width, height = rand_vec(0.3, 1), rand_vec(0.3, 1)
        left, top = rand_vec(0, 1 - width), rand_vec(0, 1 - height)

        right = left + width
        bottom = top + height

        ltrb = tf.concat([left, top, right, bottom], axis=1)

        min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0]
        ious = calc_iou_tensor(ltrb, boxes)

        # discard any bboxes whose center not in the cropped image
        xc, yc = [
            tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :],
                    (ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2)
        ]

        masks = tf.reduce_all(tf.stack([
            tf.greater(xc, tf.tile(left, (1, num_boxes))),
            tf.less(xc, tf.tile(right, (1, num_boxes))),
            tf.greater(yc, tf.tile(top, (1, num_boxes))),
            tf.less(yc, tf.tile(bottom, (1, num_boxes))),
        ],
                                       axis=2),
                              axis=2)

        # Checks of whether a crop is valid.
        valid_aspect = tf.logical_and(tf.less(height / width, 2),
                                      tf.less(width / height, 2))
        valid_ious = tf.reduce_all(tf.greater(ious, min_iou),
                                   axis=1,
                                   keepdims=True)
        valid_masks = tf.reduce_any(masks, axis=1, keepdims=True)

        valid_all = tf.cast(
            tf.reduce_all(tf.concat([valid_aspect, valid_ious, valid_masks],
                                    axis=1),
                          axis=1), tf.int32)

        # One indexed, as zero is needed for the case of no matches.
        index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32)

        # Either one-hot, or zeros if there is no valid crop.
        selection = tf.equal(tf.reduce_max(index * valid_all), index)

        use_crop = tf.reduce_any(selection)
        output_ltrb = tf.reduce_sum(tf.multiply(
            ltrb,
            tf.tile(tf.cast(selection, tf.float32)[:, tf.newaxis], (1, 4))),
                                    axis=0)
        output_masks = tf.reduce_any(tf.logical_and(
            masks, tf.tile(selection[:, tf.newaxis], (1, num_boxes))),
                                     axis=0)

        return use_crop, output_ltrb, output_masks
def multilevel_roi_align(features,
                         boxes,
                         box_levels,
                         output_size,
                         num_samples_per_cell_y=1,
                         num_samples_per_cell_x=1,
                         align_corners=False,
                         extrapolation_value=0.0,
                         scope=None):
    """Applies RoI Align op and returns feature for boxes.

  Given multiple features maps indexed by different levels, and a set of boxes
  where each box is mapped to a certain level, this function selectively crops
  and resizes boxes from the corresponding feature maps.

  We follow the RoI Align technique in https://arxiv.org/pdf/1703.06870.pdf
  figure 3. Specifically, each box is subdivided uniformly into a grid
  consisting of output_size[0] x output_size[1] rectangular cells. Within each
  cell we select `num_points` points uniformly and compute feature values using
  bilinear interpolation. Finally, we average pool the interpolated values in
  each cell to obtain a [output_size[0], output_size[1], channels] feature.

  If `align_corners` is true, sampling points are uniformly spread such that
  corner points exactly overlap corners of the boxes.

  In this function we also follow the convention of treating feature pixels as
  point objects with no spatial extent.

  Args:
    features: A list of 4D float tensors of shape [batch_size, max_height,
      max_width, channels] containing features. Note that each feature map must
      have the same number of channels.
    boxes: A 3D float tensor of shape [batch_size, num_boxes, 4] containing
      boxes of the form [ymin, xmin, ymax, xmax] in normalized coordinates.
    box_levels: A 3D int32 tensor of shape [batch_size, num_boxes]
      representing the feature level index for each box.
    output_size: An list of two integers [size_y, size_x] indicating the output
      feature size for each box.
    num_samples_per_cell_y: Number of grid points to sample along y axis in each
      cell.
    num_samples_per_cell_x: Number of grid points to sample along x axis in each
      cell.
    align_corners: Whether to align the corner grid points exactly with box
      corners.
    extrapolation_value: a float value to use for extrapolation.
    scope: Scope name to use for this op.

  Returns:
    A 5D float tensor of shape [batch_size, num_boxes, output_size[0],
    output_size[1], channels] representing the cropped features.
  """
    with tf.name_scope(scope, 'MultiLevelRoIAlign'):
        features, true_feature_shapes = pad_to_max_size(features)
        batch_size = shape_utils.combined_static_and_dynamic_shape(features)[0]
        num_levels = features.get_shape().as_list()[1]
        max_feature_height = tf.shape(features)[2]
        max_feature_width = tf.shape(features)[3]
        num_filters = features.get_shape().as_list()[4]
        num_boxes = tf.shape(boxes)[1]

        # Convert boxes to absolute co-ordinates.
        true_feature_shapes = tf.cast(true_feature_shapes, dtype=boxes.dtype)
        true_feature_shapes = tf.gather(true_feature_shapes, box_levels)
        boxes *= tf.concat([true_feature_shapes - 1] * 2, axis=-1)

        size_y = output_size[0] * num_samples_per_cell_y
        size_x = output_size[1] * num_samples_per_cell_x
        box_grid_y, box_grid_x = box_grid_coordinate_vectors(
            boxes, size_y=size_y, size_x=size_x, align_corners=align_corners)
        (feature_grid_y0, feature_grid_x0, feature_grid_y1,
         feature_grid_x1) = feature_grid_coordinate_vectors(
             box_grid_y, box_grid_x)
        feature_grid_y = tf.reshape(
            tf.stack([feature_grid_y0, feature_grid_y1], axis=3),
            [batch_size, num_boxes, -1])
        feature_grid_x = tf.reshape(
            tf.stack([feature_grid_x0, feature_grid_x1], axis=3),
            [batch_size, num_boxes, -1])
        feature_coordinates = ravel_indices(feature_grid_y, feature_grid_x,
                                            num_levels, max_feature_height,
                                            max_feature_width, box_levels)
        valid_indices = _valid_indicator(feature_grid_y, feature_grid_x,
                                         true_feature_shapes)
        feature_coordinates = tf.where(valid_indices, feature_coordinates,
                                       -1 * tf.ones_like(feature_coordinates))
        flattened_features = tf.reshape(features, [-1, num_filters])
        flattened_feature_values = _gather_valid_indices(
            flattened_features, feature_coordinates, extrapolation_value)
        features_per_box = tf.reshape(
            flattened_feature_values,
            [batch_size, num_boxes, size_y * 2, size_x * 2, num_filters])

        # Cast tensors into dtype of features.
        box_grid_y = tf.cast(box_grid_y, dtype=features_per_box.dtype)
        box_grid_x = tf.cast(box_grid_x, dtype=features_per_box.dtype)
        feature_grid_y0 = tf.cast(feature_grid_y0,
                                  dtype=features_per_box.dtype)
        feature_grid_x0 = tf.cast(feature_grid_x0,
                                  dtype=features_per_box.dtype)

        # RoI Align operation is a bilinear interpolation of four
        # neighboring feature points f0, f1, f2, and f3 onto point y, x given by
        # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
        #                       [f10, f11]]
        #
        # Unrolling the matrix multiplies gives us:
        # f(y, x) = (hy * hx) f00 + (hy * lx) f01 + (ly * hx) f10 + (lx * ly) f11
        # f(y, x) = w00 * f00 + w01 * f01 + w10 * f10 + w11 * f11
        #
        # This can be computed by applying pointwise multiplication and sum_pool in
        # a 2x2 window.
        ly = box_grid_y - feature_grid_y0
        lx = box_grid_x - feature_grid_x0
        hy = 1.0 - ly
        hx = 1.0 - lx

        kernel_y = tf.reshape(tf.stack([hy, ly], axis=3),
                              [batch_size, num_boxes, size_y * 2, 1])

        kernel_x = tf.reshape(tf.stack([hx, lx], axis=3),
                              [batch_size, num_boxes, 1, size_x * 2])

        # Multiplier 4 is to make tf.nn.avg_pool behave like sum_pool.
        interpolation_kernel = kernel_y * kernel_x * 4

        # Interpolate the gathered features with computed interpolation kernels.
        features_per_box *= tf.expand_dims(interpolation_kernel, axis=4),
        features_per_box = tf.reshape(
            features_per_box,
            [batch_size * num_boxes, size_y * 2, size_x * 2, num_filters])

        # This combines the two pooling operations - sum_pool to perform bilinear
        # interpolation and avg_pool to pool the values in each bin.
        features_per_box = tf.nn.avg_pool(
            features_per_box,
            [1, num_samples_per_cell_y * 2, num_samples_per_cell_x * 2, 1],
            [1, num_samples_per_cell_y * 2, num_samples_per_cell_x * 2, 1],
            'VALID')
        features_per_box = tf.reshape(features_per_box, [
            batch_size, num_boxes, output_size[0], output_size[1], num_filters
        ])

        return features_per_box
Esempio n. 23
0
def _get_action_logits(encoder_output,
                       decoder_output,
                       output_vocab_embeddings_table,
                       output_vocab_size,
                       model_config,
                       input_copy_mask=None,
                       use_gating_mechanism=True):
    """Generate output logits given decoder output.

  This effectively combines a Pointer Network (Vinyals et al., 2015) with a
  standard softmax output layer for selecting symbols from an output vocabulary,
  similar to:
      - Jia and Liang, 2016 (https://arxiv.org/abs/1606.03622)
      - Gulcehre et al., 2016 (https://arxiv.org/abs/1603.08148)
      - Gu et al., 2016 (https://arxiv.org/abs/1603.06393)
      - See et al. 2017 (https://arxiv.org/abs/1704.04368)

  Args:
    encoder_output: Tensor representing encoder output of shape (batch size,
      input length, encoder dims).
    decoder_output: Tensor representing decoder output of shape (batch size, #
      decoded steps, decoder dims).
    output_vocab_embeddings_table: Embeddings for output vocabulary of shape
      (output_vocab_size, target embedding dims).
    output_vocab_size: Integer size of output_vocab_embeddings_table outer dim.
    model_config: ModelConfig proto.
    input_copy_mask: Mask of the input sequence for copying.
    use_gating_mechanism: Whether to use gating mechanism.

  Returns:
    Tensor of shape (batch_size, output_vocab_size + input length) representing
    unnormalized logits for both copy and generate actions.
  """
    with tf.variable_scope("logits_transforms"):
        decoder_dims = decoder_output.get_shape()[-1]
        target_embedding_dims = model_config.model_parameters.target_embedding_dims

        # Dot product the decoder output with representations of each of the output
        # symbols to get a set of unnormalized logits for each output vocab item.
        # We need to tile the output vocab embeddings across the batch.
        output_vocab_transform = tf.expand_dims(output_vocab_embeddings_table,
                                                0)
        batch_size = tf.shape(decoder_output)[0]
        output_vocab_transform = tf.tile(output_vocab_transform,
                                         [batch_size, 1, 1])
        # Transform representations to the target_embedding_dims.
        if decoder_dims != target_embedding_dims:
            transformed_decoder_output = common_layers.linear_transform(
                decoder_output, target_embedding_dims, "decoder_transform")
        else:
            transformed_decoder_output = decoder_output
        generate_logits = tf.matmul(transformed_decoder_output,
                                    output_vocab_transform,
                                    transpose_b=True)
        generate_logits_bias = tf.get_variable("generate_logits_bias",
                                               shape=(output_vocab_size))
        generate_logits += generate_logits_bias

        # Dot product the decoder output with representations from the encoder
        # output.
        # This is necessary vs. re-using the encoder-decoder attention weights
        # because those use multihead attention.
        # First, need to transform representations to the decoder dimensions.
        transformed_encoder_output = common_layers.linear_transform(
            encoder_output, decoder_dims, "encoder_transform")

        copy_logits = tf.matmul(decoder_output,
                                transformed_encoder_output,
                                transpose_b=True)
        # This contains scores representing the probability of copying from input
        # (3rd dim) to output (2nd dim).

        # Optionally apply a soft gating mechanism to determine whether
        # to select from copy or generate logits.
        # TODO(petershaw): Evaluate and improve this gating mechanism.
        # The current implementation is most likely not optimal, since it applies
        # a scalar in the range [0,1] prior to softmax.
        if use_gating_mechanism:
            prob_gen_unnormalized = common_layers.linear_transform(
                decoder_output, 1, "prob_gen")
            prob_gen_bias = tf.get_variable("prob_gen_bias", shape=(1))
            prob_gen_unnormalized += prob_gen_bias
            prob_gen = tf.sigmoid(prob_gen_unnormalized)
            # Squeeze so that prob_gen has shape [batch_size, decode_length]
            prob_gen = tf.squeeze(prob_gen, axis=2)

            # These are the 'generate' logits so are scaled by P_gen.
            generate_logits *= tf.expand_dims(prob_gen, axis=-1)
            # These are the 'copy' logits so are scaled by 1 - P_gen.
            copy_logits *= tf.expand_dims(1 - prob_gen, axis=-1)

        if input_copy_mask is not None:
            copy_mask = (1 - tf.dtypes.cast(
                input_copy_mask, dtype=tf.dtypes.float32)) * LOGIT_MASK_VALUE
            copy_logits += tf.expand_dims(copy_mask, axis=1)

        # Concatenate logits into a single vector; first N (fixed) inputs are the
        # generation probabilities, and next are the copy probabilities for each
        # input (well, they aren't really probabilities, but scores.)
        extended_logits = tf.concat([generate_logits, copy_logits], axis=2)
        return extended_logits
Esempio n. 24
0
def test(first, second, out):

    data_frame1 = np.expand_dims(imread(first), 0)
    data_frame3 = np.expand_dims(imread(second), 0)

    H = data_frame1.shape[1]
    W = data_frame1.shape[2]

    adatptive_H = int(np.ceil(H / 32.0) * 32.0)
    adatptive_W = int(np.ceil(W / 32.0) * 32.0)

    pad_up = int(np.ceil((adatptive_H - H) / 2.0))
    pad_bot = int(np.floor((adatptive_H - H) / 2.0))
    pad_left = int(np.ceil((adatptive_W - W) / 2.0))
    pad_right = int(np.floor((adatptive_W - W) / 2.0))

    print(str(H) + ', ' + str(W))
    print(str(adatptive_H) + ', ' + str(adatptive_W))
    """Perform test on a trained model."""
    with tf.Graph().as_default():
        # Create input and target placeholder.
        input_placeholder = tf.placeholder(tf.float32, shape=(None, H, W, 2))

        input_pad = tf.pad(
            input_placeholder,
            [[0, 0], [pad_up, pad_bot], [pad_left, pad_right], [0, 0]],
            'SYMMETRIC')

        edge_vgg_1 = Vgg16(input_pad[:, :, :, :1], reuse=None)
        edge_vgg_3 = Vgg16(input_pad[:, :, :, 1:2], reuse=True)

        edge_1 = tf.nn.sigmoid(edge_vgg_1.fuse)
        edge_3 = tf.nn.sigmoid(edge_vgg_3.fuse)

        edge_1 = tf.reshape(edge_1, [
            -1,
            input_pad.get_shape().as_list()[1],
            input_pad.get_shape().as_list()[2], 1
        ])
        edge_3 = tf.reshape(edge_3, [
            -1,
            input_pad.get_shape().as_list()[1],
            input_pad.get_shape().as_list()[2], 1
        ])

        with tf.variable_scope("Cycle_DVF"):
            # Prepare model.
            model = Voxel_flow_model(is_train=False)
            prediction = model.inference(
                tf.concat([input_pad, edge_1, edge_3], 3))[0]

        # Create a saver and load.
        sess = tf.Session()

        # Restore checkpoint from file.
        if FLAGS.pretrained_model_checkpoint_path:
            restorer = tf.train.Saver()
            restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path)
            print('%s: Pre-trained model restored from %s' %
                  (datetime.now(), FLAGS.pretrained_model_checkpoint_path))

        feed_dict = {
            input_placeholder: np.concatenate((data_frame1, data_frame3), 3)
        }
        # Run single step update.
        prediction_np = sess.run(prediction, feed_dict=feed_dict)

        output = prediction_np[-1, pad_up:adatptive_H - pad_bot,
                               pad_left:adatptive_W - pad_right, :]
        output = np.round(((output + 0.5) * 255.0)).astype(np.uint8)
        #output = np.dstack((output[:, :, 2], output[:, :, 1], output[:, :, 0]))
        cv2.imwrite(out, output)
def get_model(point_cloud, cls_label, is_training, bn_decay=None):
    """ Classification PointNet, input is BxNx3, output Bx40 """
    batch_size = point_cloud.get_shape()[0].value
    num_point = point_cloud.get_shape()[1].value
    end_points = {}
    l0_xyz = tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3])
    l0_points = tf.slice(point_cloud, [0, 0, 3], [-1, -1, 3])

    # Set abstraction layers
    l1_xyz, l1_points = pointnet_sa_module_msg(
        l0_xyz,
        l0_points,
        512, [0.1, 0.2, 0.4], [32, 64, 128],
        [[32, 32, 64], [64, 64, 128], [64, 96, 128]],
        is_training,
        bn_decay,
        scope='layer1')
    l2_xyz, l2_points = pointnet_sa_module_msg(
        l1_xyz,
        l1_points,
        128, [0.4, 0.8], [64, 128], [[128, 128, 256], [128, 196, 256]],
        is_training,
        bn_decay,
        scope='layer2')
    l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz,
                                                       l2_points,
                                                       npoint=None,
                                                       radius=None,
                                                       nsample=None,
                                                       mlp=[256, 512, 1024],
                                                       mlp2=None,
                                                       group_all=True,
                                                       is_training=is_training,
                                                       bn_decay=bn_decay,
                                                       scope='layer3')

    # Feature propagation layers
    l2_points = pointnet_fp_module(l2_xyz,
                                   l3_xyz,
                                   l2_points,
                                   l3_points, [256, 256],
                                   is_training,
                                   bn_decay,
                                   scope='fa_layer1')
    l1_points = pointnet_fp_module(l1_xyz,
                                   l2_xyz,
                                   l1_points,
                                   l2_points, [256, 128],
                                   is_training,
                                   bn_decay,
                                   scope='fa_layer2')

    cls_label_one_hot = tf.one_hot(cls_label,
                                   depth=NUM_CATEGORIES,
                                   on_value=1.0,
                                   off_value=0.0)
    cls_label_one_hot = tf.reshape(cls_label_one_hot,
                                   [batch_size, 1, NUM_CATEGORIES])
    cls_label_one_hot = tf.tile(cls_label_one_hot, [1, num_point, 1])
    l0_points = pointnet_fp_module(l0_xyz,
                                   l1_xyz,
                                   tf.concat(
                                       [cls_label_one_hot, l0_xyz, l0_points],
                                       axis=-1),
                                   l1_points, [128, 128],
                                   is_training,
                                   bn_decay,
                                   scope='fp_layer3')

    # FC layers
    net = tf_util.conv1d(l0_points,
                         128,
                         1,
                         padding='VALID',
                         bn=True,
                         is_training=is_training,
                         scope='fc1',
                         bn_decay=bn_decay)
    end_points['feats'] = net
    net = tf_util.dropout(net,
                          keep_prob=0.5,
                          is_training=is_training,
                          scope='dp1')
    net = tf_util.conv1d(net,
                         50,
                         1,
                         padding='VALID',
                         activation_fn=None,
                         scope='fc2')

    return net, end_points
Esempio n. 26
0
def multihead_attention(queries,
                        keys,
                        times=None,
                        num_units=None,
                        num_heads=1,
                        dropout_rate=0,
                        is_training=True,
                        use_prior="none",
                        causality=True,
                        scope="multihead_attention",
                        residual=False,
                        time_exp_base=None,
                        overlapping_chunks=None,
                        reuse=None,
                        with_qk=False):
    """Applies multihead attention.

  Args:
    queries: A 3d tensor with shape of [N, T_q, C_q].
    keys: A 3d tensor with shape of [N, T_k, C_k].
    times: A 3d tensor with shape of [N, T_q, T_k].
    num_units: A scalar. Attention size.
    num_heads: An int. Number of heads.
    dropout_rate: A floating point number.
    is_training: Boolean. Controller of mechanism for dropout.
    use_prior: String. Whether to use prior for attention heads. Supported
      values include: none, position.
    causality: Boolean. If true, units that reference the future are masked.
    scope: Optional scope for `variable_scope`.
    residual: Boolean. Whether to use residual connection.
    time_exp_base: A scalar. Base for exponential time intervals. Only used for
      the case where use_prior='time'.
    overlapping_chunks: Boolean. Whether to use (non)/overlapping chunks for the
      case where use_prior='time'.
    reuse: Boolean, whether to reuse the weights of a previous layer by the
      same name.  Returns A 3d tensor with shape of (N, T_q, C)
    with_qk: Whether to use qk.
  Returns:
    Output of multihead attention.
  """
    tf.logging.info(
        "Computing attention with prior: {} and num of heads: {}".format(
            use_prior, num_heads))
    with tf.variable_scope(scope, reuse=reuse):
        # Set the fall back option for num_units
        if num_units is None:
            num_units = queries.get_shape().as_list[-1]

        # pylint: disable=invalid-name
        # Linear projections
        # Q = tf.layers.dense(queries, num_units, activation=tf.nn.relu)
        # K = tf.layers.dense(keys, num_units, activation=tf.nn.relu)
        # V = tf.layers.dense(keys, num_units, activation=tf.nn.relu)
        Q = tf.layers.dense(queries, num_units, activation=None)  # (N, T_q, C)
        K = tf.layers.dense(keys, num_units, activation=None)  # (N, T_k, C)
        V = tf.layers.dense(keys, num_units, activation=None)  # (N, T_k, C)

        # Split and concat
        Q_ = tf.concat(tf.split(Q, num_heads, axis=2),
                       axis=0)  # (h*N, T_q, C/h)
        K_ = tf.concat(tf.split(K, num_heads, axis=2),
                       axis=0)  # (h*N, T_k, C/h)
        V_ = tf.concat(tf.split(V, num_heads, axis=2),
                       axis=0)  # (h*N, T_k, C/h)
        # pylint: enable=invalid-name

        # Multiplication
        outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1]))  # (h*N, T_q, T_k)

        # Scale
        outputs = outputs / (K_.get_shape().as_list()[-1]**0.5)

        # Key Masking
        key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1)))  # (N, T_k)
        key_masks = tf.tile(key_masks, [num_heads, 1])  # (h*N, T_k)
        key_masks = tf.tile(tf.expand_dims(key_masks, 1),
                            [1, tf.shape(queries)[1], 1])  # (h*N, T_q, T_k)

        paddings = tf.ones_like(outputs) * (-2**32 + 1)
        outputs = tf.where(tf.equal(key_masks, 0), paddings,
                           outputs)  # (h*N, T_q, T_k)

        # Causality = Future blinding
        if causality:
            diag_vals = tf.ones_like(outputs[0, :, :])  # (T_q, T_k)
            tril = tf.linalg.LinearOperatorLowerTriangular(
                diag_vals).to_dense()  # (T_q, T_k)
            masks = tf.tile(tf.expand_dims(tril, 0),
                            [tf.shape(outputs)[0], 1, 1])  # (h*N, T_q, T_k)

            paddings = tf.ones_like(masks) * (-2**32 + 1)
            outputs = tf.where(tf.equal(masks, 0), paddings,
                               outputs)  # (h*N, T_q, T_k)

        # Position/Time prior is only used in multi-head case.
        if num_heads > 1:
            # Scaling head weights with position prior.
            if use_prior == "position":
                # Each head focuses on a window of items whose size is computed below.
                attn_size = int(outputs.get_shape().as_list()[-1] / num_heads)
                outputs = tf.concat(_compute_head_weights_with_position_prior(
                    outputs, masks, paddings, num_heads, attn_size),
                                    axis=0)  # (H*N, T_q, T_k)
                tf.logging.info(
                    "After position-wise sliding window attention.")
                tf.logging.info(outputs.shape)

            # Scaling head weights with time prior.
            elif use_prior == "time":
                # Convert time deltas from seconds to days.
                if times is None:
                    raise ValueError("Times tensor is needed.")
                time_deltas = _compute_time_deltas(times) / SECS_TO_DAYS
                outputs = tf.concat(_compute_head_weights_with_time_prior(
                    outputs, paddings, time_deltas, num_heads, time_exp_base,
                    overlapping_chunks),
                                    axis=0)  # (H*N, T_q, T_k)

        # Activation
        outputs = tf.nn.softmax(outputs)  # (h*N, T_q, T_k)

        # Query Masking
        query_masks = tf.sign(tf.abs(tf.reduce_sum(queries,
                                                   axis=-1)))  # (N, T_q)
        query_masks = tf.tile(query_masks, [num_heads, 1])  # (h*N, T_q)
        query_masks = tf.tile(tf.expand_dims(query_masks, -1),
                              [1, 1, tf.shape(keys)[1]])  # (h*N, T_q, T_k)
        outputs *= query_masks  # broadcasting. (h*N, T_q, C)

        # Dropouts
        outputs = tf.layers.dropout(outputs,
                                    rate=dropout_rate,
                                    training=tf.convert_to_tensor(is_training))

        # Weighted sum
        outputs = tf.matmul(outputs, V_)  # (h*N, T_q, C/h)

        # Restore shape
        outputs = tf.concat(tf.split(outputs, num_heads, axis=0),
                            axis=2)  # (N, T_q, C)

        # Residual connection
        if residual:
            outputs += queries

    if with_qk:
        return Q, K
    else:
        return outputs
Esempio n. 27
0
 def concat(tensors, axis, *args, **kwargs):
     return tf_v1.concat(tensors, axis, *args, **kwargs)
Esempio n. 28
0
def embedding(inputs,
              vocab_size,
              num_units,
              zero_pad=True,
              scale=True,
              l2_reg=0.0,
              scope="embedding",
              with_t=False,
              reuse=None):
    """Embeds a given tensor.

  Args:
    inputs: A `Tensor` with type `int32` or `int64` containing the ids to be
      looked up in `lookup table`.
    vocab_size: An int. Vocabulary size.
    num_units: An int. Number of embedding hidden units.
    zero_pad: A boolean. If True, all the values of the fist row (id 0) should
      be constant zeros.
    scale: A boolean. If True. the outputs is multiplied by sqrt num_units.
    l2_reg: L2 regularization weight.
    scope: Optional scope for `variable_scope`.
    with_t: If True, return the embedding table.
    reuse: Boolean, whether to reuse the weights of a previous layer by the
      same name.

  Returns:
    A `Tensor` with one more rank than inputs's. The last dimensionality
      should be `num_units`.

  For example,

  ```
  import tensorflow as tf

  inputs = tf.to_int32(tf.reshape(tf.range(2*3), (2, 3)))
  outputs = embedding(inputs, 6, 2, zero_pad=True)
  with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      print sess.run(outputs)
  >>
  [[[ 0.          0.        ]
    [ 0.09754146  0.67385566]
    [ 0.37864095 -0.35689294]]

   [[-1.01329422 -1.09939694]
    [ 0.7521342   0.38203377]
    [-0.04973143 -0.06210355]]]
  ```

  ```
  import tensorflow as tf

  inputs = tf.to_int32(tf.reshape(tf.range(2*3), (2, 3)))
  outputs = embedding(inputs, 6, 2, zero_pad=False)
  with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      print sess.run(outputs)
  >>
  [[[-0.19172323 -0.39159766]
    [-0.43212751 -0.66207761]
    [ 1.03452027 -0.26704335]]

   [[-0.11634696 -0.35983452]
    [ 0.50208133  0.53509563]
    [ 1.22204471 -0.96587461]]]
  ```
  """
    with tf.variable_scope(scope, reuse=reuse):
        lookup_table = tf.get_variable(
            "lookup_table",
            dtype=tf.float32,
            shape=[vocab_size, num_units],
            # initializer=tf.contrib.layers.xavier_initializer(),
            regularizer=tf.keras.regularizers.l2(l2_reg))
        if zero_pad:
            lookup_table = tf.concat(
                (tf.zeros(shape=[1, num_units]), lookup_table[1:, :]), 0)
        outputs = tf.nn.embedding_lookup(lookup_table, inputs)

        if scale:
            outputs = outputs * (num_units**0.5)
    if with_t:
        return outputs, lookup_table
    else:
        return outputs
Esempio n. 29
0
def preprocess_targets(targets, word2int, batch_size):
    left_side = tf.fill([batch_size, 1], word2int['<SOS>'])
    right_side = tf.strided_slice(targets, [0, 0], [batch_size, -1],
                                  [1, 1])  #slide as 1,1
    preprocess_targets = tf.concat(left_side, right_side, axis=1)
    return preprocess_targets
Esempio n. 30
0
  def _construct_inner_networks(self):
    """Creates the Tensorflow subgraph for the inner optimization loop."""
    self.inner_train_inputs = []
    self.inner_train_outputs = []  # for debugging
    self.inner_train_next_inputs = []
    self.inner_train_actions = []
    self.inner_train_advantages = []
    self.inner_test_inputs = []
    self.inner_test_outputs = []
    self.inner_test_actions = []
    self.inner_test_advantages = []
    self.inner_train_losses = []
    self.inner_test_losses = []
    self.train_policies = []
    self.test_policies = []

    self.all_test_weights = []

    # inner "train" networks, 1 per task
    # technically, all these networks do the same,
    # just makes the code easier to maintain.
    for idx in range(self.tasks_batch_size):
      tf.logging.info('creating task train network: %d', idx)
      with tf.name_scope('task_%d' % idx):
        with tf.name_scope('train'):
          # Inner network: train
          network_input_train = tf.placeholder(
              tf.float32,
              shape=(None, self.input_dims),
              name='network_input_train_%d' % idx)
          network_output_inner_train = self.network_generator.construct_network(
              network_input_train,
              self.weights,
              scope='network_inner_train_%d' % idx)
          network_next_input_train = tf.placeholder(
              tf.float32,
              shape=(None, self.input_dims),
              name='network_next_input_train_%d' % idx)

          # Slap a policy on top of the network
          train_policy = self.policy(network_input_train,
                                     network_output_inner_train,
                                     self.output_dims,
                                     self.weights['policy_logstd'])

          self.train_policies.append(train_policy)
          self.inner_train_inputs.append(network_input_train)
          self.inner_train_outputs.append(network_output_inner_train)
          self.inner_train_next_inputs.append(network_next_input_train)

          # Compute policy gradient for this task
          # == gradient of expected reward wrt weights
          # We need a batch of rollouts for this.
          train_actions = tf.placeholder(
              tf.float32,
              shape=(None, self.output_dims),
              name='network_actions_train_%d' % idx)
          if not self.learn_advantage_function_inner:
            train_advantages = tf.placeholder(
                tf.float32,
                shape=(None, 1),
                name='network_advantages_train_%d' % idx)
          else:
            adv_input = tf.concat(
                [network_next_input_train, network_input_train, train_actions],
                1)
            train_advantages = self.advantage_generator.construct_network(
                adv_input,
                self.adv_weights,
                scope='network_advantages_train_%d' % idx)

          train_policy_log_prob = train_policy.log_likelihood_op(train_actions)

          if self.ppo and (not self.learn_advantage_function_inner):
            # use PPO only if the advantage function is not learned
            old_train_policy_log_prob = tf.stop_gradient(train_policy_log_prob)
            ratio = tf.exp(train_policy_log_prob - old_train_policy_log_prob)
            clipped_ratio = tf.clip_by_value(ratio, 1 - self.ppo_clip_value,
                                             1 + self.ppo_clip_value)
            loss_inner_train = -tf.reduce_mean(
                tf.minimum(clipped_ratio * train_advantages,
                           ratio * train_advantages))
          else:
            loss_inner_train = -tf.reduce_mean(
                train_policy_log_prob * train_advantages)

          self.inner_train_actions.append(train_actions)
          self.inner_train_advantages.append(train_advantages)
          self.inner_train_losses.append(loss_inner_train)

          grad_inner_train = {}
          for weight_key in self.weights:
            grad_inner_train[weight_key] = tf.gradients(
                loss_inner_train,
                self.weights[weight_key],
                name='%s_inner_%d' % (weight_key, idx))[0]

          test_weights = {}
          for weight_key in self.weights:
            theta = self.weights[weight_key]
            if self.first_order:
              grad = tf.stop_gradient(grad_inner_train[weight_key])
            else:
              grad = grad_inner_train[weight_key]

            if not self.learn_inner_lr_tensor:
              a = self.inner_lr
            else:
              a = self.inner_lr[weight_key]

            if self.learn_offset:
              e = self.e_weights[weight_key]
              test_weights[weight_key] = theta - a * grad + e
            else:
              test_weights[weight_key] = theta - a * grad

    # inner "test" networks, 1 per task, weights = 1 gradient step of
    # corresponding "train" network
      with tf.name_scope('test'):
        # Inner network: test
        network_input_test = tf.placeholder(
            tf.float32,
            shape=(None, self.input_dims),
            name='network_input_test_%d' % idx)
        network_output_inner_test = self.network_generator.construct_network(
            network_input_test,
            test_weights,
            scope='network_inner_test_%d' % idx)

        # Slap a policy on top of the network
        test_policy = self.policy(network_input_test, network_output_inner_test,
                                  self.output_dims,
                                  test_weights['policy_logstd'])
        self.test_policies.append(test_policy)

        test_actions = tf.placeholder(
            tf.float32,
            shape=(None, self.output_dims),
            name='network_actions_test_%d' % idx)
        test_advantages = tf.placeholder(
            tf.float32,
            shape=(None, 1),
            name='network_advantages_test_%d' % idx)
        test_policy_log_prob = test_policy.log_likelihood_op(test_actions)

        if not self.ppo:
          loss_inner_test = -tf.reduce_mean(test_policy_log_prob *
                                            (test_advantages))
        else:
          old_test_policy_log_prob = tf.stop_gradient(test_policy_log_prob)
          ratio = tf.exp(test_policy_log_prob - old_test_policy_log_prob)
          clipped_ratio = tf.clip_by_value(ratio, 1 - self.ppo_clip_value,
                                           1 + self.ppo_clip_value)
          loss_inner_test = -tf.reduce_mean(
              tf.minimum(clipped_ratio * test_advantages,
                         ratio * test_advantages))
        # sum up all loss_inner_test variables to compute outer loss
        self.inner_test_losses.append(loss_inner_test)
        self.inner_test_inputs.append(network_input_test)
        self.inner_test_outputs.append(network_output_inner_test)
        self.inner_test_actions.append(test_actions)
        self.inner_test_advantages.append(test_advantages)
        self.all_test_weights.append(test_weights)