Ejemplo n.º 1
0
def add_input_distortions(flip_left_right, random_crop, random_scale,
                          random_brightness):
  """Creates the operations to apply the specified distortions.
  During training it can help to improve the results if we run the images
  through simple distortions like crops, scales, and flips. These reflect the
  kind of variations we expect in the real world, and so can help train the
  model to cope with natural data more effectively. Here we take the supplied
  parameters and construct a network of operations to apply them to an image.
  Cropping
  ~~~~~~~~
  Cropping is done by placing a bounding box at a random position in the full
  image. The cropping parameter controls the size of that box relative to the
  input image. If it's zero, then the box is the same size as the input and no
  cropping is performed. If the value is 50%, then the crop box will be half the
  width and height of the input. In a diagram it looks like this:
  <       width         >
  +---------------------+
  |                     |
  |   width - crop%     |
  |    <      >         |
  |    +------+         |
  |    |      |         |
  |    |      |         |
  |    |      |         |
  |    +------+         |
  |                     |
  |                     |
  +---------------------+
  Scaling
  ~~~~~~~
  Scaling is a lot like cropping, except that the bounding box is always
  centered and its size varies randomly within the given range. For example if
  the scale percentage is zero, then the bounding box is the same size as the
  input and no scaling is applied. If it's 50%, then the bounding box will be in
  a random range between half the width and height and full size.
  Args:
    flip_left_right: Boolean whether to randomly mirror images horizontally.
    random_crop: Integer percentage setting the total margin used around the
    crop box.
    random_scale: Integer percentage of how much to vary the scale by.
    random_brightness: Integer range to randomly multiply the pixel values by.
    graph.
  Returns:
    The jpeg input layer and the distorted result tensor.
  """

  jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput')
  decoded_image = tf.image.decode_jpeg(jpeg_data, channels=MODEL_INPUT_DEPTH)
  decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32)
  decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
  margin_scale = 1.0 + (random_crop / 100.0)
  resize_scale = 1.0 + (random_scale / 100.0)
  margin_scale_value = tf.constant(margin_scale)
  resize_scale_value = tf.random_uniform(tensor_shape.scalar(),
                                         minval=1.0,
                                         maxval=resize_scale)
  scale_value = tf.multiply(margin_scale_value, resize_scale_value)
  precrop_width = tf.multiply(scale_value, MODEL_INPUT_WIDTH)
  precrop_height = tf.multiply(scale_value, MODEL_INPUT_HEIGHT)
  precrop_shape = tf.stack([precrop_height, precrop_width])
  precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32)
  precropped_image = tf.image.resize_bilinear(decoded_image_4d,
                                              precrop_shape_as_int)
  precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0])
  cropped_image = tf.random_crop(precropped_image_3d,
                                 [MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH,
                                  MODEL_INPUT_DEPTH])
  if flip_left_right:
    flipped_image = tf.image.random_flip_left_right(cropped_image)
  else:
    flipped_image = cropped_image
  brightness_min = 1.0 - (random_brightness / 100.0)
  brightness_max = 1.0 + (random_brightness / 100.0)
  brightness_value = tf.random_uniform(tensor_shape.scalar(),
                                       minval=brightness_min,
                                       maxval=brightness_max)
  brightened_image = tf.multiply(flipped_image, brightness_value)
  distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult')
  return jpeg_data, distort_result
Ejemplo n.º 2
0
    def train_wordvec(self, vocabulary_size, batch_size, embedding_size,
                      window_size, num_sampled, num_steps, data):
        # 定义 CBOW的Word2Vec模型的网络结构
        graph = tf.Graph()
        with graph.as_default(), tf.device('/cpu:0'):
            train_dataset = tf.placeholder(tf.int32,
                                           shape=[batch_size, 2 * window_size])
            train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
            embeddings = tf.Variable(
                tf.random_uniform([vocabulary_size, embedding_size], -1.0,
                                  1.0))
            softmax_weights = tf.Variable(
                tf.truncated_normal([vocabulary_size, embedding_size],
                                    stddev=1.0 / math.sqrt(embedding_size)))
            softmax_biases = tf.Variable(tf.zeros([vocabulary_size]))
            # 与skipgram不同, cbow的输入是上下文向量的均值,因此需要做相应变换
            context_embeddings = []
            for i in range(2 * window_size):
                context_embeddings.append(
                    tf.nn.embedding_lookup(embeddings, train_dataset[:, i]))
            avg_embed = tf.reduce_mean(tf.stack(axis=0,
                                                values=context_embeddings),
                                       0,
                                       keep_dims=False)
            # 将训练数据按行重叠打包,之后求平均
            loss = tf.reduce_mean(
                tf.nn.sampled_softmax_loss(weights=softmax_weights,
                                           biases=softmax_biases,
                                           inputs=avg_embed,
                                           labels=train_labels,
                                           num_sampled=num_sampled,
                                           num_classes=vocabulary_size))
            optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)  # 优化器
            norm = tf.sqrt(
                tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
            normalized_embeddings = embeddings / norm

        with tf.Session(graph=graph) as session:
            tf.global_variables_initializer().run()
            sess = tf.Session()
            print('Initialized')
            average_loss = 0
            for step in range(num_steps):
                batch_data, batch_labels = self.generate_batch(
                    batch_size, window_size, data)
                feed_dict = {
                    train_dataset: batch_data,
                    train_labels: batch_labels
                }
                # final_embeddings, logits, normalized_embeddings, avg_embed, softmax_weights, softmax_biases = session.run([optimizer, loss, normalized_embeddings, avg_embed, softmax_weights, softmax_biases], feed_dict=feed_dict)
                final_embeddings, l, n, a_e, s_w, s_b = session.run(
                    [
                        optimizer, loss, normalized_embeddings, avg_embed,
                        softmax_weights, softmax_biases
                    ],
                    feed_dict=feed_dict)
                average_loss += l
                if step % 2000 == 0:
                    if step > 0:
                        average_loss = average_loss / 2000
                    print('Average loss at step %d: %f' % (step, average_loss))
                    average_loss = 0
            final_embeddings = n
            avg_embed = a_e
            softmax_weights = s_w
            softmax_biases = s_b
        return final_embeddings, avg_embed, softmax_weights, softmax_biases
Ejemplo n.º 3
0
  def __call__(self,
               mixed_features2d,
               cell_state,
               logits2d,
               is_training=False,
               policy="learned"):
    """Builds Saccader cell.

    Args:
      mixed_features2d: 4-D Tensor of shape [batch, height, width, channels].
      cell_state: 4-D Tensor of shape [batch, height, width, 1] with cell state.
      logits2d: 4-D Tensor of shape [batch, height, width, channels].
      is_training: (Boolean) To indicate training or inference modes.
      policy: (String) 'learned': uses learned policy, 'random': uses random
        policy, or 'center': uses center look policy.
    Returns:
      logits: Model logits.
      cell_state: New cell state.
      endpoints: Dictionary with cell parameters.
    """
    batch_size = tf.shape(mixed_features2d)[0]
    _, height, width, channels = mixed_features2d.shape.as_list()
    reuse = True if self.var_list else False
    position_channels = utils.position_channels(mixed_features2d)

    variables_before = set(tf.global_variables())
    with tf.variable_scope("saccader_cell", reuse=reuse):
      # Compute 2D weights of features across space.
      features_space_logits = tf.layers.dense(
          mixed_features2d, units=1,
          use_bias=False, name="attention_weights") / tf.math.sqrt(
              float(channels))

      features_space_logits += (cell_state * -1.e5)  # Mask used locations.
      features_space_weights = utils.softmax2d(features_space_logits)

      # Compute 1D weights of features across channels.
      features_channels_logits = tf.reduce_sum(
          mixed_features2d * features_space_weights, axis=[1, 2])
      features_channels_weights = tf.nn.softmax(
          features_channels_logits, axis=1)

      # Compute location probability.
      locations_logits2d = tf.reduce_sum(
          (mixed_features2d *
           features_channels_weights[:, tf.newaxis, tf.newaxis, :]),
          axis=-1, keepdims=True)

      locations_logits2d += (cell_state * -1e5)  # Mask used locations.
      locations_prob2d = utils.softmax2d(locations_logits2d)

    variables_after = set(tf.global_variables())
    # Compute best locations.
    locations_logits = tf.reshape(
        locations_logits2d, (batch_size, -1))
    all_positions = tf.reshape(
        position_channels, [batch_size, height*width, 2])

    best_locations_labels = tf.argmax(locations_logits, axis=-1)
    best_locations = utils.batch_gather_nd(
        all_positions, best_locations_labels, axis=1)

    # Sample locations.
    if policy == "learned":
      if is_training:
        dist = tfp.distributions.Categorical(logits=locations_logits)
        locations_labels = dist.sample()
        # At training samples location from the learned distribution.
        locations = utils.batch_gather_nd(
            all_positions, locations_labels, axis=1)
        # Ensures range [-1., 1.]
        locations = tf.clip_by_value(locations, -1., 1)
        tf.logging.info("Sampling locations.")
        tf.logging.info("==================================================")
      else:
        # At inference uses the mean value for the location.
        locations = best_locations
        locations_labels = best_locations_labels
    elif policy == "random":
      # Use random policy for location.
      locations = tf.random_uniform(
          shape=(batch_size, 2),
          minval=-1.,
          maxval=1.)
      locations_labels = None
    elif policy == "center":
      # Use center look policy.
      locations = tf.zeros(
          shape=(batch_size, 2))
      locations_labels = None

    # Update cell_state.
    cell_state += utils.onehot2d(cell_state, locations)
    cell_state = tf.clip_by_value(cell_state, 0, 1)
    #########################################################################
    # Extract logits from the 2D logits.
    if self.soft_attention:
      logits = tf.reduce_sum(logits2d * locations_prob2d, axis=[1, 2])
    else:
      logits = gather_2d(logits2d, locations)
    ############################################################
    endpoints = {}
    endpoints["cell_outputs"] = {
        "locations": locations,
        "locations_labels": locations_labels,
        "best_locations": best_locations,
        "best_locations_labels": best_locations_labels,
        "locations_logits2d": locations_logits2d,
        "locations_prob2d": locations_prob2d,
        "cell_state": cell_state,
        "features_space_logits": features_space_logits,
        "features_space_weights": features_space_weights,
        "features_channels_logits": features_channels_logits,
        "features_channels_weights": features_channels_weights,
        "locations_logits": locations_logits,
        "all_positions": all_positions,
    }
    if not reuse:
      self.collect_variables(list(variables_after - variables_before))

    return logits, cell_state, endpoints
def weight_init(shape, name):
    return tf.Variable(tf.random_uniform(shape,
                                         -tf.sqrt(6. / (shape[0] + shape[-1])),
                                         tf.sqrt(6. / (shape[0] + shape[-1]))),
                       name=name)
Ejemplo n.º 5
0
def random_apply(func, p, x):
    """Randomly apply function func to x with probability p."""
    return tf.cond(
        tf.less(tf.random_uniform([], minval=0, maxval=1, dtype=tf.float32),
                tf.cast(p, tf.float32)), lambda: func(x), lambda: x)
Ejemplo n.º 6
0
def get_batchpairs_coco(unused_source,
                        max_shift,
                        batch_size=2,
                        queue_size=60,
                        num_threads=3,
                        train_height=128,
                        train_width=128,
                        pixel_noise=0.0,
                        mix=True,
                        screen=False,
                        to_gray=True,
                        mode='train'):
    """Prepares training image batches from MS COCO dataset.

  Note currently this function only generates dummy data.

  Args:
    unused_source: pattern of input data containing source images from MS COCO
      dataset. Not used now.
    max_shift: the range each image corner point can move
    batch_size: the size of training or testing batches
    queue_size: the queue size of the shuffle buffer
    num_threads: the number of threads of the shuffle buffer
    train_height: the height of the training/testing images
    train_width: the width of the training/testing images
    pixel_noise: the magnitude of additive noises
    mix: whether mix the magnitude of corner point shifts
    screen: whether remove highly distorted homographies
    to_gray: whether prepare color or gray scale training images
    mode: 'train' or 'eval', specifying whether preparing images for training or
      testing
  Returns:
    a batch of training images and the corresponding ground-truth homographies
  """
    frames = get_two_dummy_frames()
    if to_gray:
        output_frames = tf.image.rgb_to_grayscale(frames)
        num_channel = 1
    else:
        output_frames = frames
        num_channel = 3

    frame_height = tf.shape(output_frames)[1]
    frame_width = tf.shape(output_frames)[2]

    max_crop_shift_x = tf.cast(frame_width - train_width, tf.float32)
    max_crop_shift_y = tf.cast(frame_height - train_height, tf.float32)

    crop_shift_x = tf.random_uniform([],
                                     minval=max_shift + 1,
                                     maxval=max_crop_shift_x - max_shift - 1,
                                     dtype=tf.float32)
    crop_shift_y = tf.random_uniform([],
                                     minval=max_shift + 1,
                                     maxval=max_crop_shift_y - max_shift - 1,
                                     dtype=tf.float32)

    rand_shift_base = tf.random_uniform([8],
                                        minval=-max_shift,
                                        maxval=max_shift,
                                        dtype=tf.float32)
    if mix:
        p = tf.random_uniform([], minval=0, maxval=1, dtype=tf.float32)
        scale = (tf.to_float(tf.greater(p, 0.1)) + tf.to_float(
            tf.greater(p, 0.2)) + tf.to_float(tf.greater(p, 0.3))) / 3
    else:
        scale = 1.0

    if screen:
        angles = calc_homography_distortion(train_width, train_height,
                                            scale * rand_shift_base)
        max_angle = tf.reduce_min(angles)
        rand_shift = tf.to_float(max_angle >= -0.707) * scale * rand_shift_base
    else:
        rand_shift = scale * rand_shift_base

    dy1 = crop_shift_y + rand_shift[1]
    dx1 = crop_shift_x + rand_shift[0]
    dy2 = crop_shift_y + train_height - 1 + rand_shift[3]
    dx2 = crop_shift_x + rand_shift[2]
    dy3 = crop_shift_y + rand_shift[5]
    dx3 = crop_shift_x + train_width - 1 + rand_shift[4]
    dy4 = crop_shift_y + train_height - 1 + rand_shift[7]
    dx4 = crop_shift_x + train_width - 1 + rand_shift[6]
    cropped_frame1 = subpixel_homography(output_frames[0], train_height,
                                         train_width, dy1, dx1, dy2, dx2, dy3,
                                         dx3, dy4, dx4)
    cropped_frame2 = subpixel_crop(output_frames[0], crop_shift_y,
                                   crop_shift_x, train_height, train_width)
    noise_im1 = tf.truncated_normal(shape=tf.shape(cropped_frame1),
                                    mean=0.0,
                                    stddev=pixel_noise,
                                    dtype=tf.float32)
    noise_im2 = tf.truncated_normal(shape=tf.shape(cropped_frame2),
                                    mean=0.0,
                                    stddev=pixel_noise,
                                    dtype=tf.float32)
    normalized_im1 = normalize_image(
        tf.cast(cropped_frame1, tf.float32) + noise_im1)
    normalized_im2 = normalize_image(
        tf.cast(cropped_frame2, tf.float32) + noise_im2)
    cropped_pair = tf.reshape(tf.stack((normalized_im1, normalized_im2), 2),
                              (train_height, train_width, 2 * num_channel))
    label = rand_shift

    if mode == 'train':
        min_after_dequeue = int(queue_size / 3)
    else:
        min_after_dequeue = batch_size * 3
    batch_frames, batch_labels = tf.train.shuffle_batch(
        [cropped_pair, label],
        batch_size=batch_size,
        num_threads=num_threads,
        capacity=queue_size,
        min_after_dequeue=min_after_dequeue,
        enqueue_many=False)

    return tf.cast(batch_frames, tf.float32), tf.cast(batch_labels, tf.float32)
    def __init__(self,
                 review_num_u,
                 review_num_i,
                 user_num,
                 item_num,
                 num_classes,
                 n_latent,
                 embedding_id,
                 attention_size,
                 embedding_size,
                 l2_reg_lambda=0.0):
        # input_u较原来改成直接改成输入embedding,不需要lookup了
        self.input_u = tf.placeholder(tf.float32,
                                      [None, review_num_u, embedding_size],
                                      name="input_u")
        self.input_i = tf.placeholder(tf.float32,
                                      [None, review_num_i, embedding_size],
                                      name="input_i")

        self.input_reuid = tf.placeholder(tf.int32, [None, review_num_u],
                                          name='input_reuid')
        self.input_reiid = tf.placeholder(tf.int32, [None, review_num_i],
                                          name='input_reiid')
        self.input_y = tf.placeholder(tf.float32, [None, 1], name="input_y")
        self.input_uid = tf.placeholder(tf.int32, [None, 1], name="input_uid")
        self.input_iid = tf.placeholder(tf.int32, [None, 1], name="input_iid")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")
        self.drop0 = tf.placeholder(tf.float32, name="dropout0")
        iidW = tf.Variable(tf.random_uniform([item_num + 2, embedding_id],
                                             -0.1, 0.1),
                           name="iidW")
        uidW = tf.Variable(tf.random_uniform([user_num + 2, embedding_id],
                                             -0.1, 0.1),
                           name="uidW")

        l2_loss_x = tf.constant(0.0)

        with tf.name_scope("dropout"):
            self.h_drop_u = tf.nn.dropout(self.input_u, 1.0)
            self.h_drop_i = tf.nn.dropout(self.input_i, 1.0)
            # self.h_drop_u = tf.Print(self.h_drop_u, ["h_drop_u: ", self.h_drop_u])
            # self.h_drop_i = tf.Print(self.h_drop_i, ["h_drop_i: ", self.h_drop_i])
        with tf.name_scope("attention"):
            Wau = tf.Variable(tf.random_uniform(
                [embedding_size, attention_size], -0.1, 0.1),
                              name='Wau')
            Wru = tf.Variable(tf.random_uniform([embedding_id, attention_size],
                                                -0.1, 0.1),
                              name='Wru')
            Wpu = tf.Variable(tf.random_uniform([attention_size, 1], -0.1,
                                                0.1),
                              name='Wpu')
            bau = tf.Variable(tf.constant(0.1, shape=[attention_size]),
                              name="bau")
            bbu = tf.Variable(tf.constant(0.1, shape=[1]), name="bbu")
            # self.iid_a = tf.nn.relu(tf.nn.embedding_lookup(iidW, self.input_reuid))
            self.iid_a = tf.nn.embedding_lookup(iidW, self.input_reuid)
            # self.u_j = tf.einsum('ajk,kl->ajl', tf.nn.relu(
            #     tf.einsum('ajk,kl->ajl', self.h_drop_u, Wau) + tf.einsum('ajk,kl->ajl', self.iid_a, Wru) + bau),
            #                      Wpu) + bbu  # None*u_len*1
            self.u_j = tf.matmul(
                tf.einsum('ajk,kl->ajl', self.iid_a, Wru),
                tf.einsum('ajk,kl->ajl', self.h_drop_u, Wau),
                transpose_b=True) / tf.sqrt(
                    tf.constant(attention_size,
                                dtype=tf.float32))  # None*u_len*1
            self.u_j = tf.Print(
                self.u_j,
                ["u_j:", self.u_j, tf.shape(self.u_j)], summarize=50)

            self.u_a = tf.nn.softmax(self.u_j, 1)  # none*u_len*1
            # self.u_a = tf.Print(self.u_a, ["u_a:", self.u_a, tf.shape(self.u_a)], summarize=50)

            Wai = tf.Variable(tf.random_uniform(
                [embedding_size, attention_size], -0.1, 0.1),
                              name='Wai')
            Wri = tf.Variable(tf.random_uniform([embedding_id, attention_size],
                                                -0.1, 0.1),
                              name='Wri')
            Wpi = tf.Variable(tf.random_uniform([attention_size, 1], -0.1,
                                                0.1),
                              name='Wpi')
            bai = tf.Variable(tf.constant(0.1, shape=[attention_size]),
                              name="bai")
            bbi = tf.Variable(tf.constant(0.1, shape=[1]), name="bbi")
            # self.uid_a = tf.nn.relu(tf.nn.embedding_lookup(uidW, self.input_reiid))
            self.uid_a = tf.nn.embedding_lookup(uidW, self.input_reiid)
            # self.i_j = tf.einsum('ajk,kl->ajl', tf.nn.relu(
            #     tf.einsum('ajk,kl->ajl', self.h_drop_i, Wai) + tf.einsum('ajk,kl->ajl', self.uid_a, Wri) + bai),
            #                      Wpi) + bbi
            self.i_j = tf.matmul(tf.einsum('ajk,kl->ajl', self.uid_a, Wri),
                                 tf.einsum('ajk,kl->ajl', self.h_drop_i, Wai),
                                 transpose_b=True) / tf.sqrt(
                                     tf.constant(attention_size,
                                                 dtype=tf.float32))

            self.i_a = tf.nn.softmax(self.i_j, 1)  # none*len*1
            # self.i_a = tf.Print(self.i_a, ["i_a:", self.i_a, tf.shape(self.i_a)], summarize=50)

            l2_loss_x += tf.nn.l2_loss(Wau)
            l2_loss_x += tf.nn.l2_loss(Wru)
            l2_loss_x += tf.nn.l2_loss(Wri)
            l2_loss_x += tf.nn.l2_loss(Wai)

        with tf.name_scope("add_reviews"):
            self.u_feas = tf.reduce_sum(tf.multiply(self.u_a, self.h_drop_u),
                                        1)
            self.u_feas = tf.nn.dropout(self.u_feas, self.dropout_keep_prob)
            self.i_feas = tf.reduce_sum(tf.multiply(self.i_a, self.h_drop_i),
                                        1)
            self.i_feas = tf.nn.dropout(self.i_feas, self.dropout_keep_prob)
            # self.u_feas = tf.Print(self.u_feas, ["u_feas: ", self.u_feas, tf.shape(self.u_feas)], summarize=50)
            # self.i_feas = tf.Print(self.i_feas, ["i_feas: ", self.i_feas, tf.shape(self.u_feas)], summarize=50)
        with tf.name_scope("get_fea"):

            # uidmf = tf.Variable(tf.random_uniform([user_num + 2, embedding_id], -0.1, 0.1), name="uidmf")
            # iidmf = tf.Variable(tf.random_uniform([item_num + 2, embedding_id], -0.1, 0.1), name="iidmf")
            # uidmf = tf.Print(uidmf, ["uidmf: ", uidmf, tf.shape(uidmf)], summarize=50)
            # iidmf = tf.Print(iidmf, ["iidmf: ", iidmf, tf.shape(iidmf)], summarize=50)

            self.uid = tf.nn.embedding_lookup(uidW, self.input_uid)
            self.iid = tf.nn.embedding_lookup(iidW, self.input_iid)
            # self.uid = tf.Print(self.uid, ["uid: ", self.uid, tf.shape(self.uid)], summarize=50)
            # self.iid = tf.Print(self.iid, ["iid: ", self.iid, tf.shape(self.iid)], summarize=50)
            self.uid = tf.reshape(self.uid, [-1, embedding_id])
            self.iid = tf.reshape(self.iid, [-1, embedding_id])
            # self.uid = tf.Print(self.uid, ["uid: ", self.uid, tf.shape(self.uid)], summarize=50)
            # self.iid = tf.Print(self.iid, ["iid: ", self.iid, tf.shape(self.iid)], summarize=50)
            Wu = tf.Variable(tf.random_uniform([embedding_size, n_latent],
                                               -0.1, 0.1),
                             name='Wu')
            bu = tf.Variable(tf.constant(0.1, shape=[n_latent]), name="bu")
            # qu(即uid)+Xu
            self.u_feas = tf.matmul(self.u_feas, Wu) + self.uid + bu

            Wi = tf.Variable(tf.random_uniform([embedding_size, n_latent],
                                               -0.1, 0.1),
                             name='Wi')
            bi = tf.Variable(tf.constant(0.1, shape=[n_latent]), name="bi")
            # pi+Yi(W0*Oi+b0)
            self.i_feas = tf.matmul(self.i_feas, Wi) + self.iid + bi

            # self.u_feas = tf.Print(self.u_feas, ["u_feas: ", self.u_feas, tf.shape(self.u_feas)], summarize=50)
            # self.i_feas = tf.Print(self.i_feas, ["i_feas: ", self.i_feas, tf.shape(self.u_feas)], summarize=50)

        with tf.name_scope('prediction'):
            # h0
            self.FM = tf.multiply(self.u_feas, self.i_feas, name="h0")
            self.FM = tf.nn.relu(self.FM)
            self.FM = tf.nn.dropout(self.FM, self.dropout_keep_prob)
            # self.FM = tf.Print(self.FM, ["FM: ", self.FM, tf.shape(self.FM)], summarize=50)

            # Wmul = tf.Variable(
            #     tf.random_uniform([n_latent, 1], -0.1, 0.1), name='wmul')
            Wmul = tf.constant(1,
                               shape=[n_latent, 1],
                               name='wmul',
                               dtype=tf.float32)

            # W1T*h0
            self.mul = tf.matmul(self.FM, Wmul)
            self.score = tf.reduce_sum(self.mul, 1, keep_dims=True)
            # self.score = tf.Print(self.score, ["score: ", self.score, tf.shape(self.score)], summarize=50)

            self.uidW2 = tf.Variable(tf.constant(0.1, shape=[user_num + 2]),
                                     name="uidW2")
            self.iidW2 = tf.Variable(tf.constant(0.1, shape=[item_num + 2]),
                                     name="iidW2")
            self.u_bias = tf.gather(self.uidW2, self.input_uid)
            self.i_bias = tf.gather(self.iidW2, self.input_iid)
            self.Feature_bias = self.u_bias + self.i_bias

            self.bised = tf.Variable(tf.constant(0.1), name='bias')

            self.predictions = self.score + self.Feature_bias + self.bised

        with tf.name_scope("loss"):
            losses = tf.nn.l2_loss(tf.subtract(self.predictions, self.input_y))

            self.loss = losses + l2_reg_lambda * l2_loss_x

        with tf.name_scope("accuracy"):
            self.mae = tf.reduce_mean(
                tf.abs(tf.subtract(self.predictions, self.input_y)))
            self.accuracy = tf.sqrt(
                tf.reduce_mean(
                    tf.square(tf.subtract(self.predictions, self.input_y))))
Ejemplo n.º 8
0
'''
SFFF (S = start point, safe)
FHFH (F = Frozen surface, safe)
FFFH (H = hole)
HFFG (G = goal, target)
'''

tf.disable_v2_behavior()

env = gym.make('FrozenLake-v0')

tf.reset_default_graph()

#These lines establish the feed-forward part of the network used to choose actions
inputs1 = tf.placeholder(shape=[1, 16], dtype=tf.float32)
W = tf.Variable(tf.random_uniform([16, 4], 0, 0.01))
Qout = tf.matmul(inputs1, W)
predict = tf.argmax(Qout, 1)

#Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
nextQ = tf.placeholder(shape=[1, 4], dtype=tf.float32)
loss = tf.reduce_sum(tf.square(nextQ - Qout))
trainer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
updateModel = trainer.minimize(loss)

init = tf.initialize_all_variables()

# Set learning parameters
y = .99
e = 0.1
num_episodes = 2000
Ejemplo n.º 9
0
def sample(logits):
    noise = tf.random_uniform(tf.shape(logits))
    return tf.argmax(logits - tf.log(-tf.log(noise)), 1)
Ejemplo n.º 10
0
def _gen_mask(shape, drop_prob):
    """Generate a droppout mask."""
    keep_prob = 1. - drop_prob
    mask = tf.random_uniform(shape, minval=0., maxval=1., dtype=tf.float32)
    mask = tf.floor(mask + keep_prob) / keep_prob
    return mask
Ejemplo n.º 11
0
def location_guide(image,
                   image_size=32,
                   open_fraction=0.2,
                   uniform_noise=False,
                   block_probability=0.5):
    """Provides supervised signal to guide glimpsing controller.

  All image is blocked (using zeros or uniform noise) except for a square
  window. The center location of this window is sent as a guide for glimpse
  controller.

  Args:
    image: Tensor of shape [height, width, channels].
    image_size: (Integer) image size.
    open_fraction: (Float) fraction of image_size to leave intact the rest of
      the image will be blocked.
    uniform_noise: (Boolean) whether to use uniform noise to block the image or
      block with zeros.
    block_probability: [0 - 1] probability of blocking the image.

  Returns:
    image: The resulting image is a tensor of same shape as input image.
    location: Normalized location of the center of the window where the image is
      intact. If the image was not blocked this will be [0, 0]. Also, if the
      open window is exactly at the center this will be [0, 0].
    blocked_indicator: Indicator if the image was blocked or not (1: blocked,
      0: output image is the same as input image).
  """
    def location_guide_helper(x):
        """Helper function."""
        window_size = int(open_fraction * image_size)
        mask = tf.ones([window_size, window_size, 3])

        mask = tf.image.resize_image_with_crop_or_pad(
            mask, 2 * image_size - window_size, 2 * image_size - window_size)

        # range of bounding boxes is from [0, image_size-window_size]
        offset_height = tf.random_uniform(shape=(),
                                          minval=0,
                                          maxval=image_size - window_size,
                                          dtype=tf.int32)
        offset_width = tf.random_uniform(shape=(),
                                         minval=0,
                                         maxval=image_size - window_size,
                                         dtype=tf.int32)

        mask = tf.image.crop_to_bounding_box(mask, offset_height, offset_width,
                                             image_size, image_size)

        x *= mask
        if uniform_noise:
            x += tf.random_uniform(
                (image_size, image_size, 3), 0, 1.0) * (1. - mask)

        center_ix = tf.convert_to_tensor([
            image_size - window_size - offset_height + window_size // 2,
            image_size - window_size - offset_width + window_size // 2
        ],
                                         dtype=tf.int32)

        location = index_to_normalized_location(center_ix, image_size)
        return x, location

    image, location, blocked_indicator = tf.cond(
        tf.math.less(tf.random_uniform([], 0, 1.0), block_probability), lambda:
        (location_guide_helper(image) +
         (1., )), lambda: (image, tf.zeros(shape=(2, ), dtype=tf.float32), 0.))
    return image, location, blocked_indicator
Ejemplo n.º 12
0
    def preprocess(self, dataset, mode, hparams, interleave=True):
        def split_on_batch(x):
            """Split x on batch dimension into x[:size, ...] and x[size:, ...]."""
            length = len(x.get_shape())
            size = hparams.video_num_input_frames
            if length < 1:
                raise ValueError("Batched tensor of length < 1.")
            if length == 1:
                return x[:size], x[size:]
            if length == 2:
                return x[:size, :], x[size:, :]
            if length == 3:
                return x[:size, :, :], x[size:, :, :]
            if length == 4:
                return x[:size, :, :, :], x[size:, :, :, :]
            # TODO(lukaszkaiser): use tf.split for the general case.
            raise ValueError(
                "Batch splitting on general dimensions not done yet.")

        def features_from_batch(batched_prefeatures):
            """Construct final features from the batched inputs.

      This function gets prefeatures.

      Args:
        batched_prefeatures: single-frame features (from disk) as batch tensors.

      Returns:
        Features dictionary with joint features per-frame.
      """
            features = {}
            for k, v in six.iteritems(batched_prefeatures):
                if k == "frame":  # We rename past frames to inputs and targets.
                    s1, s2 = split_on_batch(v)
                    features["inputs"] = s1
                    features["targets"] = s2
                else:
                    s1, s2 = split_on_batch(v)
                    features["input_%s" % k] = s1
                    features["target_%s" % k] = s2
            return features

        # Batch and construct features.
        def _preprocess(example):
            return self.preprocess_example(example, mode, hparams)

        def avoid_break_batching(dataset):
            """Smart preprocessing to avoid break between videos!

      Simple batching of images into videos may result into broken videos
      with two parts from two different videos. This preprocessing avoids
      this using the frame number.

      Args:
        dataset: raw not-batched dataset.

      Returns:
        batched not-broken videos.

      """
            def check_integrity_and_batch(*datasets):
                """Checks whether a sequence of frames are from the same video.

        Args:
          *datasets: datasets each skipping 1 frame from the previous one.

        Returns:
          batched data and the integrity flag.
        """
                not_broken = tf.constant(True)
                if "frame_number" in datasets[0]:
                    frame_numbers = [
                        dataset["frame_number"][0] for dataset in datasets
                    ]

                    not_broken = tf.equal(frame_numbers[-1] - frame_numbers[0],
                                          num_frames - 1)
                    if self.only_keep_videos_from_0th_frame:
                        not_broken = tf.logical_and(
                            not_broken, tf.equal(frame_numbers[0], 0))
                    if self.avoid_overlapping_frames:
                        non_overlap = tf.equal(
                            tf.mod(frame_numbers[0], num_frames), 0)
                        not_broken = tf.logical_and(not_broken, non_overlap)
                else:
                    tf.logging.warning("use_not_breaking_batching is True but "
                                       "no frame_number is in the dataset.")

                features = {}
                for key in datasets[0].keys():
                    values = [dataset[key] for dataset in datasets]
                    batch = tf.stack(values)
                    features[key] = batch
                return features, not_broken

            ds = [dataset.skip(i) for i in range(num_frames)]
            dataset = tf.data.Dataset.zip(tuple(ds))
            dataset = dataset.map(check_integrity_and_batch)
            dataset = dataset.filter(lambda _, not_broken: not_broken)
            dataset = dataset.map(lambda features, _: features)

            return dataset

        preprocessed_dataset = dataset.map(_preprocess)

        num_frames = (hparams.video_num_input_frames +
                      hparams.video_num_target_frames)
        if mode == tf_estimator.ModeKeys.PREDICT:
            num_frames = min(self.max_frames_per_video(hparams), num_frames)

        # We jump by a random position at the beginning to add variety.
        if (self.random_skip and self.settable_random_skip and interleave
                and mode == tf_estimator.ModeKeys.TRAIN):
            random_skip = tf.random_uniform([],
                                            maxval=num_frames,
                                            dtype=tf.int64)
            preprocessed_dataset = preprocessed_dataset.skip(random_skip)
        if (self.use_not_breaking_batching
                and self.settable_use_not_breaking_batching):
            batch_dataset = avoid_break_batching(preprocessed_dataset)
        else:
            batch_dataset = preprocessed_dataset.batch(num_frames,
                                                       drop_remainder=True)
        dataset = batch_dataset.map(features_from_batch)
        if self.shuffle and interleave and mode == tf_estimator.ModeKeys.TRAIN:
            dataset = dataset.shuffle(hparams.get("shuffle_buffer_size", 128))
        return dataset
Ejemplo n.º 13
0
import os
import tensorflow.compat.v1 as tf

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
#tf.disable_v2_behavior()
tf.compat.v1.disable_eager_execution()

# x: [1, 2, 3]
# y: [1, 2, 3]
# y = 1.0 * x + 0.0인 비례관계의 기울기(1.0)과 y절편(0.0)을 선형회귀 모델로 구해냄

x_data = [1, 2, 3]
y_data = [1, 2, 3]

W = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
b = tf.Variable(tf.random_uniform([1], -1.0, 1.0))

X = tf.placeholder(tf.float32, name="X")
Y = tf.placeholder(tf.float32, name="Y")

# 텐서를 이용하여 1차원 회귀 모델을 구성함
hypothesis = W * X + b

# 비용함수는 가설과 계산된 y값의 차이의 제곱의 평균으로 정의함
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# 경사하강법으로 0.01의 비율로 최적화하여 비용이 가장 작은 Variable을 찾아내도록 정의함
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
train_op = optimizer.minimize(cost)

with tf.Session() as sess:
    def __init__(self, args):
        self.args = args

        inputs = tf.placeholder(shape=(args.batch_size, None),
                                dtype=tf.int32,
                                name='inputs')
        mask = tf.placeholder(shape=(args.batch_size, None),
                              dtype=tf.float32,
                              name='inputs_mask')
        seq_length = tf.placeholder(shape=args.batch_size,
                                    dtype=tf.float32,
                                    name='seq_length')

        self.input_form = [inputs, mask, seq_length]

        encoder_inputs = inputs
        decoder_inputs = tf.concat(
            [tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32), inputs],
            axis=1)
        decoder_targets = tf.concat(
            [inputs,
             tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32)],
            axis=1)
        decoder_mask = tf.concat(
            [mask,
             tf.zeros(shape=(args.batch_size, 1), dtype=tf.float32)],
            axis=1)

        x_size = out_size = args.map_size[0] * args.map_size[1]
        embeddings = tf.Variable(tf.random_uniform(
            [x_size, args.x_latent_size], -1.0, 1.0),
                                 dtype=tf.float32)
        encoder_inputs_embedded = tf.nn.embedding_lookup(
            embeddings, encoder_inputs)
        decoder_inputs_embedded = tf.nn.embedding_lookup(
            embeddings, decoder_inputs)

        with tf.variable_scope("encoder"):
            encoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size)
            # tf.nn.dynamic_rnn returns (outputs, state)
            # 'outputs' is a tensor of shape [batch_size, max_time, cell_output_size]
            # 'state' is a tensor of shape [batch_size, cell_state_size] = (128, 256)
            _, encoder_final_state = tf.nn.dynamic_rnn(
                encoder_cell,
                encoder_inputs_embedded,
                sequence_length=seq_length,
                dtype=tf.float32,
            )

        with tf.variable_scope("clusters"):
            # mem_num = size of sd memory = 5, rnn_size = 256
            mu_c = tf.get_variable("mu_c", [args.mem_num, args.rnn_size],
                                   initializer=tf.random_uniform_initializer(
                                       0.0, 1.0))
            log_sigma_sq_c = tf.get_variable(
                "sigma_sq_c", [args.mem_num, args.rnn_size],
                initializer=tf.constant_initializer(0.0),
                trainable=False)
            log_pi_prior = tf.get_variable(
                "log_pi_prior",
                args.mem_num,
                initializer=tf.constant_initializer(0.0),
                trainable=False)
            pi_prior = tf.nn.softmax(log_pi_prior)

            init_mu_c = tf.placeholder(shape=(args.mem_num, args.rnn_size),
                                       dtype=tf.float32,
                                       name='init_mu_c')
            init_sigma_c = tf.placeholder(shape=(args.mem_num, args.rnn_size),
                                          dtype=tf.float32,
                                          name='init_sigma_c')
            init_pi = tf.placeholder(shape=args.mem_num,
                                     dtype=tf.float32,
                                     name='init_pi')
            self.cluster_init = [init_mu_c, init_sigma_c, init_pi]

            # tf.compat.v1.assign(ref, value, ...)
            self.init_mu_c_op = tf.assign(mu_c, init_mu_c)
            self.init_sigma_c_op = tf.assign(log_sigma_sq_c, init_sigma_c)
            self.init_pi_op = tf.assign(log_pi_prior, init_pi)

            self.mu_c = mu_c
            self.sigma_c = log_sigma_sq_c
            self.pi = pi_prior

            # shape=(128, 5, 256)
            stack_mu_c = tf.stack([mu_c] * args.batch_size, axis=0)
            stack_log_sigma_sq_c = tf.stack([log_sigma_sq_c] * args.batch_size,
                                            axis=0)

        with tf.variable_scope("latent"):
            with tf.variable_scope("mu_z"):
                mu_z_w = tf.get_variable(
                    "mu_z_w", [args.rnn_size, args.rnn_size],
                    tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.02))
                mu_z_b = tf.get_variable(
                    "mu_z_b", [args.rnn_size],
                    tf.float32,
                    initializer=tf.constant_initializer(0.0))
                mu_z = tf.matmul(encoder_final_state, mu_z_w) + mu_z_b
            with tf.variable_scope("sigma_z"):
                sigma_z_w = tf.get_variable(
                    "sigma_z_w", [args.rnn_size, args.rnn_size],
                    tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.02))
                sigma_z_b = tf.get_variable(
                    "sigma_z_b", [args.rnn_size],
                    tf.float32,
                    initializer=tf.constant_initializer(0.0))
                log_sigma_sq_z = tf.matmul(encoder_final_state,
                                           sigma_z_w) + sigma_z_b

            # shape = (128, 256)
            eps_z = tf.random_normal(shape=tf.shape(log_sigma_sq_z),
                                     mean=0,
                                     stddev=1,
                                     dtype=tf.float32)
            # z = mu_z + sigma_z * eps_z
            z = mu_z + tf.sqrt(tf.exp(log_sigma_sq_z)) * eps_z

            # shape = (128, 5, 256)  5 clusters
            stack_mu_z = tf.stack([mu_z] * args.mem_num, axis=1)
            stack_log_sigma_sq_z = tf.stack([log_sigma_sq_z] * args.mem_num,
                                            axis=1)
            stack_z = tf.stack([z] * args.mem_num, axis=1)

            self.batch_post_embedded = z

        # for batch_latent_loss
        with tf.variable_scope("attention"):
            # att_logits/att.shape = (128, 5)
            att_logits = -tf.reduce_sum(
                tf.square(stack_z - stack_mu_c) / tf.exp(stack_log_sigma_sq_c),
                axis=-1)
            att = tf.nn.softmax(att_logits) + 1e-10
            self.batch_att = att

        def generation(h):
            with tf.variable_scope("generation", reuse=tf.AUTO_REUSE):
                with tf.variable_scope("decoder"):
                    decoder_init_state = h
                    decoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size)
                    # tf.nn.dynamic_rnn returns (outputs, state)
                    # 'outputs' is a tensor of shape [batch_size, max_time, cell_output_size] = (128, None, 256)
                    # 'state' is a tensor of shape [batch_size, cell_state_size]
                    decoder_outputs, _ = tf.nn.dynamic_rnn(
                        decoder_cell,
                        decoder_inputs_embedded,
                        initial_state=decoder_init_state,
                        sequence_length=seq_length,
                        dtype=tf.float32,
                    )
                with tf.variable_scope("outputs"):
                    # out_w.shape = (16900, 256)
                    out_w = tf.get_variable(
                        "out_w", [out_size, args.rnn_size], tf.float32,
                        tf.random_normal_initializer(stddev=0.02))
                    out_b = tf.get_variable(
                        "out_b", [out_size],
                        tf.float32,
                        initializer=tf.constant_initializer(0.0))

                    # batch_rec_loss.shape=(128,)  decoder_mask.shape=(128, None)
                    # decoder_targets.shape=(128, None)  decoder_outputs.shape=(128, None, 256)
                    batch_rec_loss = tf.reduce_mean(decoder_mask * tf.reshape(
                        tf.nn.sampled_softmax_loss(
                            weights=out_w,
                            biases=out_b,
                            labels=tf.reshape(decoder_targets, [-1, 1]),
                            inputs=tf.reshape(decoder_outputs,
                                              [-1, args.rnn_size]),
                            num_sampled=args.neg_size,
                            num_classes=out_size), [args.batch_size, -1]),
                                                    axis=-1)
                    target_out_w = tf.nn.embedding_lookup(
                        out_w, decoder_targets)  # (128, None, 256)
                    target_out_b = tf.nn.embedding_lookup(
                        out_b, decoder_targets)  # (128, None)

                    # both shape=(128,)
                    batch_likelihood = tf.reduce_mean(
                        decoder_mask * tf.log_sigmoid(
                            tf.reduce_sum(decoder_outputs * target_out_w, -1) +
                            target_out_b),
                        axis=-1,
                        name="batch_likelihood")

                    # KL divergence between c and z distribution
                    batch_latent_loss = 0.5 * tf.reduce_sum(
                        att * tf.reduce_mean(
                            stack_log_sigma_sq_c + tf.exp(stack_log_sigma_sq_z)
                            / tf.exp(stack_log_sigma_sq_c) +
                            tf.square(stack_mu_z - stack_mu_c) /
                            tf.exp(stack_log_sigma_sq_c),
                            axis=-1),
                        axis=-1) - 0.5 * tf.reduce_mean(1 + log_sigma_sq_z,
                                                        axis=-1)
                    # batch_cate_loss = tf.reduce_sum(att * (tf.log(att)), axis=-1)
                    # batch_cate_loss.shape=()
                    batch_cate_loss = tf.reduce_mean(
                        tf.reduce_mean(att, axis=0) *
                        tf.log(tf.reduce_mean(att, axis=0)))
                return batch_rec_loss, batch_latent_loss, batch_cate_loss, batch_likelihood

        if args.eval:
            results = tf.map_fn(
                fn=generation,
                elems=tf.stack([mu_c] * args.batch_size, axis=1),
                dtype=(tf.float32, tf.float32, tf.float32, tf.float32),
                parallel_iterations=args.mem_num)  # results.shape=(10, 128)
            # results = my_map(fn=generation,
            #                  elems=tf.stack([mu_c] * args.batch_size, axis=1),
            #                  dtype=(tf.float32, tf.float32, tf.float32, tf.float32),
            #                  parallel_iterations=args.mem_num)

            self.batch_likelihood = tf.reduce_max(
                results[3], axis=0)  # batch_likelihood.shape=(128,)
        else:
            results = generation(z)
            self.batch_likelihood = results[-1]
            # all loss shape=()
            self.rec_loss = rec_loss = tf.reduce_mean(results[0])
            self.latent_loss = latent_loss = tf.reduce_mean(results[1])
            # self.cate_loss = cate_loss = tf.reduce_mean(results[2])
            self.cate_loss = cate_loss = results[2]
            self.loss = loss = rec_loss + latent_loss + 0.1 * cate_loss
            self.pretrain_loss = pretrain_loss = rec_loss
            self.pretrain_op = tf.train.AdamOptimizer(
                args.learning_rate).minimize(pretrain_loss)
            self.train_op = tf.train.AdamOptimizer(
                args.learning_rate).minimize(loss)

        saver = tf.train.Saver(
            tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
        self.save, self.restore = saver.save, saver.restore
Ejemplo n.º 15
0
def random_patch(image, bboxes=None, min_height=600, min_width=600,
                 seed=None):
    """Gets a random patch from an image.

    min_height and min_width values will be normalized if they are not possible
    given the input image's shape. See also patch_image.

    Args:
        image: Tensor with shape (H, W, 3).
        bboxes: Tensor with the ground-truth boxes. Shaped (total_boxes, 5).
            The last element in each box is the category label.
        min_height: Minimum height of the patch.
        min_width: Minimum width of the patch.
        seed: Seed to be used in randomizing functions.

    Returns:
        image: Tensor with shape (H', W', 3), with H' <= H and W' <= W. A
            random patch of the input image.
        bboxes: Tensor with shape (new_total_boxes, 5), where we keep
            bboxes that have their center inside the patch, cropping
            them to the patch boundaries. If we didn't get any bboxes, the
            return dict will not have the 'bboxes' key defined.
    """
    # Start by normalizing the arguments.
    # Our patch can't be larger than the original image.
    im_shape = tf.shape(image)
    min_height = tf.minimum(min_height, im_shape[0] - 1)
    min_width = tf.minimum(min_width, im_shape[1] - 1)

    # Now get the patch using tf.image.crop_to_bounding_box.
    # See the documentation on tf.image.crop_to_bounding_box or the explanation
    # in patch_image for the meaning of these variables.
    offset_width = tf.random_uniform(
        shape=[],
        minval=0,
        maxval=tf.subtract(
            im_shape[1],
            min_width
        ),
        dtype=tf.int32,
        seed=seed
    )
    offset_height = tf.random_uniform(
        shape=[],
        minval=0,
        maxval=tf.subtract(
            im_shape[0],
            min_height
        ),
        dtype=tf.int32,
        seed=seed
    )
    target_width = tf.random_uniform(
        shape=[],
        minval=min_width,
        maxval=tf.subtract(
            im_shape[1],
            offset_width
        ),
        dtype=tf.int32,
        seed=seed
    )
    target_height = tf.random_uniform(
        shape=[],
        minval=min_height,
        maxval=tf.subtract(
            im_shape[0],
            offset_height
        ),
        dtype=tf.int32,
        seed=seed
    )
    return patch_image(
        image, bboxes=bboxes,
        offset_height=offset_height, offset_width=offset_width,
        target_height=target_height, target_width=target_width
    )
Ejemplo n.º 16
0
def random_crop(image_list, crop_height, crop_width):
    """Crops the given list of images.

  The function applies the same crop to each image in the list. This can be
  effectively applied when there are multiple image inputs of the same
  dimension such as:
    image, depths, normals = random_crop([image, depths, normals], 120, 150)
  Args:
    image_list: a list of image tensors of the same dimension but possibly
      varying channel.
    crop_height: the new height.
    crop_width: the new width.

  Returns:
    the image_list with cropped images.
  Raises:
    ValueError: if there are multiple image inputs provided with different size
      or the images are smaller than the crop dimensions.
  """
    if not image_list:
        raise ValueError('Empty image_list.')

    # Compute the rank assertions.
    rank_assertions = []
    for i in range(len(image_list)):
        image_rank = tf.rank(image_list[i])
        rank_assert = tf.Assert(tf.equal(image_rank, 3), [
            'Wrong rank for tensor  %s [expected] [actual]',
            image_list[i].name, 3, image_rank
        ])
        rank_assertions.append(rank_assert)

    with tf.control_dependencies([rank_assertions[0]]):
        image_shape = tf.shape(image_list[0])
    image_height = image_shape[0]
    image_width = image_shape[1]
    crop_size_assert = tf.Assert(
        tf.logical_and(tf.greater_equal(image_height, crop_height),
                       tf.greater_equal(image_width, crop_width)),
        ['Crop size greater than the image size.'])

    asserts = [rank_assertions[0], crop_size_assert]

    for i in range(1, len(image_list)):
        image = image_list[i]
        asserts.append(rank_assertions[i])
        with tf.control_dependencies([rank_assertions[i]]):
            shape = tf.shape(image)
        height = shape[0]
        width = shape[1]

        height_assert = tf.Assert(tf.equal(height, image_height), [
            'Wrong height for tensor %s [expected][actual]', image.name,
            height, image_height
        ])
        width_assert = tf.Assert(tf.equal(width, image_width), [
            'Wrong width for tensor %s [expected][actual]', image.name, width,
            image_width
        ])
        asserts.extend([height_assert, width_assert])

    # Create a random bounding box.
    #
    # Use tf.random_uniform and not numpy.random.rand as doing the former would
    # generate random numbers at graph eval time, unlike the latter which
    # generates random numbers at graph definition time.
    with tf.control_dependencies(asserts):
        max_offset_height = tf.reshape(image_height - crop_height + 1, [])
        max_offset_width = tf.reshape(image_width - crop_width + 1, [])
    offset_height = tf.random_uniform([],
                                      maxval=max_offset_height,
                                      dtype=tf.int32)
    offset_width = tf.random_uniform([],
                                     maxval=max_offset_width,
                                     dtype=tf.int32)

    return [
        _crop(image, offset_height, offset_width, crop_height, crop_width)
        for image in image_list
    ]
Ejemplo n.º 17
0
def augment_seqs_ava(raw_frames,
                     num_frame,
                     max_shift,
                     batch_size=2,
                     queue_size=60,
                     num_threads=3,
                     train_height=128,
                     train_width=128,
                     pixel_noise=0.0,
                     mix=True,
                     screen=False,
                     mode='train',
                     to_gray=True):
    """Prepares training sequence batches from AVA dataset.

  Args:
    raw_frames: input video frames from AVA dataset
    num_frame: the number of frames in a sequence
    max_shift: the range each image corner point can move
    batch_size: the size of training or testing batches
    queue_size: the queue size of the shuffle buffer
    num_threads: the number of threads of the shuffle buffer
    train_height: the height of the training/testing images
    train_width: the width of the training/testing images
    pixel_noise: the magnitude of additive noises
    mix: whether mix the magnitude of corner point shifts
    screen: whether remove highly distorted homographies
    mode: 'train' or 'eval', specifying whether preparing images for training or
      testing
    to_gray: whether prepare color or gray scale training images
  Returns:
    a batch of training images and the corresponding ground-truth homographies
  """
    if to_gray:
        output_frames = tf.image.rgb_to_grayscale(raw_frames)
        num_channel = 1
    else:
        output_frames = raw_frames
        num_channel = 3

    frame_height = tf.to_float(tf.shape(output_frames)[1])
    frame_width = tf.to_float(tf.shape(output_frames)[2])

    if mix:
        p = tf.random_uniform([], minval=0, maxval=1, dtype=tf.float32)
        scale = (tf.to_float(tf.greater(p, 0.1)) + tf.to_float(
            tf.greater(p, 0.2)) + tf.to_float(tf.greater(p, 0.3))) / 3
    else:
        scale = 1.0
    new_max_shift = max_shift * scale
    rand_shift_base = tf.random_uniform([num_frame, 8],
                                        minval=-new_max_shift,
                                        maxval=new_max_shift,
                                        dtype=tf.float32)
    crop_width = frame_width - 2 * new_max_shift - 1
    crop_height = frame_height - 2 * new_max_shift - 1
    ref_window = tf.to_float(
        tf.stack([
            0, 0, 0, crop_height - 1, crop_width - 1, 0, crop_width - 1,
            crop_height - 1
        ]))
    if screen:
        new_shift_list = []
        flag_list = []
        hmg_list = []
        src_points = tf.reshape(ref_window, [4, 2])
        for i in range(num_frame):
            dst_points = tf.reshape(
                rand_shift_base[i] + ref_window + new_max_shift, [4, 2])
            hmg = calc_homography_from_points(src_points, dst_points)
            hmg_list.append(hmg)
        for i in range(num_frame - 1):
            hmg = tf.matmul(tf.matrix_inverse(hmg_list[i + 1]), hmg_list[i])
            shift = homography_to_shifts(hmg, crop_width, crop_height)
            angles = calc_homography_distortion(crop_width, crop_height, shift)
            max_angle = tf.reduce_min(angles)
            flag = tf.to_float(max_angle >= -0.707)
            flag_list.append(flag)
            if i > 0:
                new_shift = rand_shift_base[i] * flag * flag_list[i - 1]
            else:
                new_shift = rand_shift_base[i] * flag
            new_shift_list.append(new_shift)
        new_shift_list.append(rand_shift_base[num_frame - 1] *
                              flag_list[num_frame - 2])
        rand_shift = tf.stack(new_shift_list)
    else:
        rand_shift = rand_shift_base

    mat_scale = tf.diag(
        tf.stack([crop_width / train_width, crop_height / train_height, 1.0]))
    inv_mat_scale = tf.matrix_inverse(mat_scale)
    hmg_list = []
    frame_list = []
    for i in range(num_frame):
        src_points = tf.reshape(ref_window, [4, 2])
        dst_points = tf.reshape(rand_shift[i] + ref_window + new_max_shift,
                                [4, 2])
        hmg = calc_homography_from_points(src_points, dst_points)
        hmg_list.append(hmg)
        transform = tf.reshape(hmg, [9]) / hmg[2, 2]
        warped = contrib_image.transform(output_frames[i], transform[:8],
                                         'bilinear')
        crop_window = tf.expand_dims(
            tf.stack([
                0, 0, (crop_height - 1) / (frame_height - 1),
                (crop_width - 1) / (frame_width - 1)
            ]), 0)
        resized_base = tf.image.crop_and_resize(tf.expand_dims(warped, 0),
                                                crop_window, [0],
                                                [train_height, train_width])
        resized = tf.squeeze(resized_base, [0])

        noise_im = tf.truncated_normal(shape=tf.shape(resized),
                                       mean=0.0,
                                       stddev=pixel_noise,
                                       dtype=tf.float32)
        noise_frame = normalize_image(tf.to_float(resized) + noise_im)
        frame_list.append(noise_frame)
    noise_frames = tf.reshape(tf.stack(
        frame_list, 2), (train_height, train_width, num_frame * num_channel))

    label_list = []
    for i in range(num_frame - 1):
        hmg_combine = tf.matmul(tf.matrix_inverse(hmg_list[i + 1]),
                                hmg_list[i])
        hmg_final = tf.matmul(inv_mat_scale, tf.matmul(hmg_combine, mat_scale))
        label = homography_to_shifts(hmg_final, train_width, train_height)
        label_list.append(label)
    labels = tf.reshape(tf.stack(label_list, 0), [(num_frame - 1) * 8])

    if mode == 'train':
        min_after_dequeue = int(queue_size / 3)
    else:
        min_after_dequeue = batch_size * 3
    batch_frames, batch_labels = tf.train.shuffle_batch(
        [noise_frames, labels],
        batch_size=batch_size,
        num_threads=num_threads,
        capacity=queue_size,
        min_after_dequeue=min_after_dequeue,
        enqueue_many=False)

    return tf.cast(batch_frames, tf.float32), tf.cast(batch_labels, tf.float32)
Ejemplo n.º 18
0
 def _sample_n(n):
     """Sample vector of Bernoullis."""
     new_shape = tf.concat([[n], batch_shape_tensor], 0)
     uniform = tf.random_uniform(new_shape, seed=seed, dtype=probs.dtype)
     return tf.cast(tf.less(uniform, probs), dtype)
Ejemplo n.º 19
0
 def test_discriminator_run(self):
     image = tf.random_uniform([5, 32, 32, 3], -1, 1)
     output, _ = dcgan.discriminator(image)
     with self.test_session() as sess:
         sess.run(tf.global_variables_initializer())
         output.eval()
Ejemplo n.º 20
0
def _randomly_negate_tensor(tensor):
    """With 50% prob turn the tensor negative."""
    should_flip = tf.cast(tf.floor(tf.random_uniform([]) + 0.5), tf.bool)
    final_tensor = tf.cond(should_flip, lambda: tensor, lambda: -tensor)
    return final_tensor
Ejemplo n.º 21
0
def main():

  # Build the model.
  learnable_model = learned_simulator.LearnedSimulator(
      num_dimensions=NUM_DIMENSIONS,
      connectivity_radius=0.05,
      graph_network_kwargs=dict(
          latent_size=128,
          mlp_hidden_size=128,
          mlp_num_hidden_layers=2,
          num_message_passing_steps=10,
      ),
      boundaries=DUMMY_BOUNDARIES,
      normalization_stats={"acceleration": DUMMY_STATS,
                           "velocity": DUMMY_STATS,
                           "context": DUMMY_CONTEXT_STATS,},
      num_particle_types=NUM_PARTICLE_TYPES,
      particle_type_embedding_size=16,
    )

  # Sample a batch of particle sequences with shape:
  # [TOTAL_NUM_PARTICLES, SEQUENCE_LENGTH, NUM_DIMENSIONS]
  sampled_position_sequences = [
      sample_random_position_sequence() for _ in range(BATCH_SIZE)]
  position_sequence_batch = tf.concat(sampled_position_sequences, axis=0)

  # Count how many particles are present in each element in the batch.
  # [BATCH_SIZE]
  n_particles_per_example = tf.stack(
      [tf.shape(seq)[0] for seq in sampled_position_sequences], axis=0)

  # Sample particle types.
  # [TOTAL_NUM_PARTICLES]
  particle_types = tf.random_uniform(
      [tf.shape(position_sequence_batch)[0]],
      0, NUM_PARTICLE_TYPES, dtype=tf.int32)

  # Sample global context.
  global_context = tf.random_uniform(
      [BATCH_SIZE, GLOBAL_CONTEXT_SIZE], -1., 1., dtype=tf.float32)

  # Separate input sequence from target sequence.
  # [TOTAL_NUM_PARTICLES, INPUT_SEQUENCE_LENGTH, NUM_DIMENSIONS]
  input_position_sequence = position_sequence_batch[:, :-1]
  # [TOTAL_NUM_PARTICLES, NUM_DIMENSIONS]
  target_next_position = position_sequence_batch[:, -1]

  # Single step of inference with the model to predict next position for each
  # particle [TOTAL_NUM_PARTICLES, NUM_DIMENSIONS].
  predicted_next_position = learnable_model(
      input_position_sequence, n_particles_per_example, global_context,
      particle_types)
  print(f"Per-particle output tensor: {predicted_next_position}")

  # Obtaining predicted and target normalized accelerations for training.
  position_sequence_noise = (
      noise_utils.get_random_walk_noise_for_position_sequence(
          input_position_sequence, noise_std_last_step=6.7e-4))

  # Both with shape [TOTAL_NUM_PARTICLES, NUM_DIMENSIONS]
  predicted_normalized_acceleration, target_normalized_acceleration = (
      learnable_model.get_predicted_and_target_normalized_accelerations(
          target_next_position, position_sequence_noise,
          input_position_sequence, n_particles_per_example, global_context,
          particle_types))
  print(f"Predicted norm. acceleration: {predicted_normalized_acceleration}")
  print(f"Target norm. acceleration: {target_normalized_acceleration}")

  with tf.train.SingularMonitoredSession() as sess:
    sess.run([predicted_next_position,
              predicted_normalized_acceleration,
              target_normalized_acceleration])
def construct_input(sequence_feature_map, categorical_values,
                    categorical_seq_feature, feature_value, mode, normalize,
                    momentum, min_value, max_value, input_keep_prob):
  """Returns a function to build the model.

  Args:
    sequence_feature_map: A dictionary of (Sparse)Tensors of dense shape
      [batch_size, max_sequence_length, None] keyed by the feature name.
    categorical_values: Potential values of the categorical_seq_feature.
    categorical_seq_feature: Name of feature of observation code.
    feature_value: Name of feature of observation value.
    mode: The execution mode, as defined in tf.estimator.ModeKeys.
    normalize: Whether to normalize each lab test.
    momentum: For the batch normalization mean and variance will be updated as
      momentum*old_value + (1-momentum) * new_value.
    min_value: Observation values smaller than this will be capped to min_value.
    max_value: Observation values larger than this will be capped to max_value.
    input_keep_prob: Keep probability for input observation values.

  Returns:
    - diff_delta_time: Tensor of shape [batch_size, max_seq_length, 1]
      with the
    - obs_values: A dense representation of the observation_values with
                  obs_values[b, t, :] has at most one non-zero value at the
                  position of the corresponding lab test from obs_code_ids with
                  the value of the lab result. A padded Tensor of shape
                  [batch_size, max_sequence_length, vocab_size] of type float32
                  of possibly normalized observation values.
    - indicator: A one-hot encoding of whether a value in obs_values comes from
                 observation_values or is just filled in to be 0. A Tensor of
                 shape [batch_size, max_sequence_length, vocab_size] and type
                 float32.
  """
  with tf.variable_scope('input'):
    sequence_feature_map = {
        k: tf.sparse_reorder(s) if isinstance(s, tf.SparseTensor) else s
        for k, s in sequence_feature_map.items()
    }
    # Filter out invalid values.
    # For invalid observation values we do this through a sparse retain.
    # This makes sure that the invalid values will not be considered in the
    # normalization.
    observation_values = sequence_feature_map[feature_value]
    observation_code_sparse = sequence_feature_map[categorical_seq_feature]
    # Future work: Create a flag for the missing value indicator.
    valid_values = tf.abs(observation_values.values - 9999999.0) > TOLERANCE
    # apply input dropout
    if input_keep_prob < 1.0:
      random_tensor = input_keep_prob
      random_tensor += tf.random_uniform(tf.shape(observation_values.values))
      # 0. if [input_keep_prob, 1.0) and 1. if [1.0, 1.0 + input_keep_prob)
      dropout_mask = tf.floor(random_tensor)
      if mode == tf_estimator.ModeKeys.TRAIN:
        valid_values = tf.to_float(valid_values) * dropout_mask
        valid_values = valid_values > 0.5
    sequence_feature_map[feature_value] = tf.sparse_retain(
        observation_values, valid_values)
    sequence_feature_map[categorical_seq_feature] = tf.sparse_retain(
        observation_code_sparse, valid_values)

    # 1. Construct the sequence of observation values to feed into the RNN
    #    and their indicator.
    # We assign each observation code an id from 0 to vocab_size-1. At each
    # timestep we will lookup the id for the observation code and take the value
    # of the lab test and a construct a vector with all zeros but the id-th
    # position is set to the lab test value.
    obs_code = sequence_feature_map[categorical_seq_feature]
    obs_code_dense_ids = contrib_lookup.index_table_from_tensor(
        tuple(categorical_values), num_oov_buckets=0,
        name='vocab_lookup').lookup(obs_code.values)
    obs_code_sparse = tf.SparseTensor(
        values=obs_code_dense_ids,
        indices=obs_code.indices,
        dense_shape=obs_code.dense_shape)
    obs_code_sparse = tf.sparse_reorder(obs_code_sparse)
    observation_values = sequence_feature_map[feature_value]
    observation_values = tf.sparse_reorder(observation_values)
    vocab_size = len(categorical_values)
    obs_values, indicator = combine_observation_code_and_values(
        obs_code_sparse, observation_values, vocab_size, mode, normalize,
        momentum, min_value, max_value)

    # 2. We compute the diff_delta_time as additional sequence feature.
    # Note, the LSTM is very sensitive to how you encode time.
    delta_time = sequence_feature_map['deltaTime']
    diff_delta_time = tf.concat(
        [delta_time[:, :1, :], delta_time[:, :-1, :]], axis=1) - delta_time
    diff_delta_time = tf.to_float(diff_delta_time) / (60.0 * 60.0)

  return (diff_delta_time, obs_values, indicator)
Ejemplo n.º 23
0
def implicit_quantile_network(num_actions, quantile_embedding_dim,
                              network_type, state, num_quantiles):
    """The Implicit Quantile ConvNet.

    Args:
      num_actions: int, number of actions.
      quantile_embedding_dim: int, embedding dimension for the quantile input.
      network_type: namedtuple, collection of expected values to return.
      state: `tf.Tensor`, contains the agent's current state.
      num_quantiles: int, number of quantile inputs.

    Returns:
      net: _network_type object containing the tensors output by the network.
    """
    weights_initializer = contrib_slim.variance_scaling_initializer(
        factor=1.0 / np.sqrt(3.0), mode='FAN_IN', uniform=True)

    state_net = tf.cast(state, tf.float32)
    state_net = tf.div(state_net, 255.)
    state_net = contrib_slim.conv2d(state_net,
                                    32, [8, 8],
                                    stride=4,
                                    weights_initializer=weights_initializer)
    state_net = contrib_slim.conv2d(state_net,
                                    64, [4, 4],
                                    stride=2,
                                    weights_initializer=weights_initializer)
    state_net = contrib_slim.conv2d(state_net,
                                    64, [3, 3],
                                    stride=1,
                                    weights_initializer=weights_initializer)
    state_net = contrib_slim.flatten(state_net)
    state_net_size = state_net.get_shape().as_list()[-1]
    state_net_tiled = tf.tile(state_net, [num_quantiles, 1])

    batch_size = state_net.get_shape().as_list()[0]
    quantiles_shape = [num_quantiles * batch_size, 1]
    quantiles = tf.random_uniform(quantiles_shape,
                                  minval=0,
                                  maxval=1,
                                  dtype=tf.float32)

    quantile_net = tf.tile(quantiles, [1, quantile_embedding_dim])
    pi = tf.constant(math.pi)
    quantile_net = tf.cast(tf.range(1, quantile_embedding_dim + 1, 1),
                           tf.float32) * pi * quantile_net
    quantile_net = tf.cos(quantile_net)
    quantile_net = contrib_slim.fully_connected(
        quantile_net, state_net_size, weights_initializer=weights_initializer)
    # Hadamard product.
    net = tf.multiply(state_net_tiled, quantile_net)

    net = contrib_slim.fully_connected(net,
                                       512,
                                       weights_initializer=weights_initializer)
    quantile_values = contrib_slim.fully_connected(
        net,
        num_actions,
        activation_fn=None,
        weights_initializer=weights_initializer)

    return network_type(quantile_values=quantile_values, quantiles=quantiles)
Ejemplo n.º 24
0
 def symbols_to_logits(_):
     # Just return random logits
     return tf.random_uniform((batch_size * beam_size, vocab_size))
Ejemplo n.º 25
0
 def generate_selector(p, bsz):
     shape = [bsz, 1, 1, 1]
     selector = tf.cast(
         tf.less(tf.random_uniform(shape, 0, 1, dtype=tf.float32), p),
         tf.float32)
     return selector
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()


def one_hot_encoder(state):
    return np.identity(16)[state:state + 1]


env = gym.make("FrozenLake-v0")

input_size = env.observation_space.n
output_size = env.action_space.n
learning_rate = 0.1

X = tf.placeholder(shape=[1, input_size], dtype=tf.float32)
W = tf.Variable(tf.random_uniform([input_size, output_size], 0, 0.01))

q_predict = tf.matmul(X, W)
Y = tf.placeholder(shape=[1, output_size], dtype=tf.float32)
loss = tf.reduce_sum(tf.square(Y - q_predict))
train = tf.train.GradientDescentOptimizer(
    learning_rate=learning_rate).minimize(loss)

num_episodes = 2000
r = .99
rList = []

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for i in range(num_episodes):
Ejemplo n.º 27
0
def maybe_gen_fake_data_based_on_real_data(image, label, reso,
                                           min_fake_lesion_ratio,
                                           gen_fake_probability):
    """Remove real lesion and synthesize lesion."""
    # TODO(lehou): Replace magic numbers with flag variables.
    gen_prob_indicator = tf.random_uniform(shape=[],
                                           minval=0.0,
                                           maxval=1.0,
                                           dtype=tf.float32)

    background_mask = tf.less(label, 0.5)
    lesion_mask = tf.greater(label, 1.5)
    liver_mask = tf.logical_not(tf.logical_or(background_mask, lesion_mask))

    liver_intensity = tf.boolean_mask(image, liver_mask)
    lesion_intensity = tf.boolean_mask(image, lesion_mask)

    intensity_diff = tf.reduce_mean(liver_intensity) - (
        tf.reduce_mean(lesion_intensity))
    intensity_diff *= 1.15
    intensity_diff = tf.cond(tf.is_nan(intensity_diff), lambda: 0.0,
                             lambda: intensity_diff)

    lesion_liver_ratio = 0.0
    lesion_liver_ratio += tf.random.normal(shape=[], mean=0.01, stddev=0.01)
    lesion_liver_ratio += tf.random.normal(shape=[], mean=0.0, stddev=0.05)
    lesion_liver_ratio = tf.clip_by_value(lesion_liver_ratio,
                                          min_fake_lesion_ratio,
                                          min_fake_lesion_ratio + 0.20)

    fake_lesion_mask = tf.logical_and(
        _gen_rand_mask(ratio_mean=lesion_liver_ratio,
                       ratio_stddev=0.0,
                       scale=reso // 32,
                       shape=label.shape,
                       smoothness=reso // 32), tf.logical_not(background_mask))
    liver_mask = tf.logical_not(
        tf.logical_or(background_mask, fake_lesion_mask))

    # Blur the masks
    lesion_mask_blur = tf.squeeze(
        tf.nn.conv3d(tf.expand_dims(
            tf.expand_dims(tf.cast(lesion_mask, tf.float32), -1), 0),
                     filter=tf.ones([reso // 32] * 3 + [1, 1], tf.float32) /
                     (reso // 32)**3,
                     strides=[1, 1, 1, 1, 1],
                     padding='SAME'))
    fake_lesion_mask_blur = tf.squeeze(
        tf.nn.conv3d(tf.expand_dims(
            tf.expand_dims(tf.cast(fake_lesion_mask, tf.float32), -1), 0),
                     filter=tf.ones([reso // 32] * 3 + [1, 1], tf.float32) /
                     (reso // 32)**3,
                     strides=[1, 1, 1, 1, 1],
                     padding='SAME'))

    # Remove real lesion and add fake lesion.
    # If the intensitify is too small (maybe no liver or lesion region labeled),
    # do not generate fake data.
    gen_prob_indicator = tf.cond(tf.greater(intensity_diff, 0.0001),
                                 lambda: gen_prob_indicator, lambda: 0.0)
    # pylint: disable=g-long-lambda
    image = tf.cond(
        tf.greater(gen_prob_indicator, 1 - gen_fake_probability),
        lambda: image + intensity_diff * lesion_mask_blur \
                      - intensity_diff * fake_lesion_mask_blur,
        lambda: image)
    label = tf.cond(
        tf.greater(gen_prob_indicator, 1 - gen_fake_probability),
        lambda: tf.cast(background_mask, tf.float32) * 0 + \
            tf.cast(liver_mask, tf.float32) * 1 + \
            tf.cast(fake_lesion_mask, tf.float32) * 2,
        lambda: label)
    # pylint: enable=g-long-lambda

    return image, label
Ejemplo n.º 28
0
 def no_crop_check():
   return (tf.random_uniform(shape=(), minval=0, maxval=1, dtype=tf.float32)
           < constants.P_NO_CROP_PER_PASS)
Ejemplo n.º 29
0
    def __call__(self, net, is_training=False):
        """Builds Dropblock layer.

    Args:
      net: `Tensor` input tensor.
      is_training: `bool` if True, the model is in training mode.

    Returns:
      A version of input tensor with DropBlock applied.
    """
        if not is_training or self._dropblock_keep_prob is None:
            return net

        logging.info('Applying DropBlock: dropblock_size %d,'
                     'net.shape %s', self._dropblock_size, net.shape)

        if self._data_format == 'channels_last':
            _, height, width, _ = net.get_shape().as_list()
        else:
            _, _, height, width = net.get_shape().as_list()

        total_size = width * height
        dropblock_size = min(self._dropblock_size, min(width, height))
        # Seed_drop_rate is the gamma parameter of DropBlcok.
        seed_drop_rate = (1.0 - self._dropblock_keep_prob
                          ) * total_size / dropblock_size**2 / (
                              (width - self._dropblock_size + 1) *
                              (height - self._dropblock_size + 1))

        # Forces the block to be inside the feature map.
        w_i, h_i = tf.meshgrid(tf.range(width), tf.range(height))
        valid_block = tf.logical_and(
            tf.logical_and(w_i >= int(dropblock_size // 2),
                           w_i < width - (dropblock_size - 1) // 2),
            tf.logical_and(h_i >= int(dropblock_size // 2),
                           h_i < width - (dropblock_size - 1) // 2))

        if self._data_format == 'channels_last':
            valid_block = tf.reshape(valid_block, [1, height, width, 1])
        else:
            valid_block = tf.reshape(valid_block, [1, 1, height, width])

        randnoise = tf.random_uniform(net.shape, dtype=tf.float32)
        valid_block = tf.cast(valid_block, dtype=tf.float32)
        seed_keep_rate = tf.cast(1 - seed_drop_rate, dtype=tf.float32)
        block_pattern = (1 - valid_block + seed_keep_rate + randnoise) >= 1
        block_pattern = tf.cast(block_pattern, dtype=tf.float32)

        if self._data_format == 'channels_last':
            ksize = [1, self._dropblock_size, self._dropblock_size, 1]
        else:
            ksize = [1, 1, self._dropblock_size, self._dropblock_size]
        block_pattern = -tf.nn.max_pool(-block_pattern,
                                        ksize=ksize,
                                        strides=[1, 1, 1, 1],
                                        padding='SAME',
                                        data_format='NHWC' if self._data_format
                                        == 'channels_last' else 'NCHW')

        percent_ones = tf.cast(
            tf.reduce_sum(block_pattern), tf.float32) / tf.cast(
                tf.size(block_pattern), tf.float32)

        net = net / tf.cast(percent_ones, net.dtype) * tf.cast(
            block_pattern, net.dtype)
        return net
Ejemplo n.º 30
0
def runNetwork(train, valid, test, neurons, learningRate, threshold,
               numAttributes, possibleLabels, outputFile):
    numLabels = len(possibleLabels)
    if numLabels == 2:
        numLabels = 1  # binary classification uses a single variable that is 0 or 1

    # first, construct the neural network

    # build the input layer
    x = tf.placeholder(tf.float32, shape=[None, numAttributes])

    # build the placeholder for the true labels
    y = tf.placeholder(tf.float32, shape=[None, numLabels])

    # create the hidden layer
    W_hidden = tf.Variable(
        tf.random_uniform([numAttributes, neurons], minval=-0.1, maxval=0.1))
    b_hidden = tf.Variable(
        tf.random_uniform([neurons], minval=-0.1, maxval=0.1))

    #W_hidden = tf.Variable(tf.zeros([numAttributes, NUM_NEURONS]))
    #b_hidden = tf.Variable(tf.constant(0.0, shape=[NUM_NEURONS]))

    # create the calculations in the hidden layer
    hidden_net = tf.matmul(x, W_hidden) + b_hidden
    hidden_out = tf.sigmoid(hidden_net)

    # create the output layer
    W_outlayer = tf.Variable(
        tf.random_uniform([neurons, numLabels], minval=-0.1, maxval=0.1))
    b_outlayer = tf.Variable(
        tf.random_uniform([numLabels], minval=-0.1, maxval=0.1))

    #W_outlayer = tf.Variable(tf.zeros([NUM_NEURONS, numLabels]))
    #b_outlayer = tf.Variable(tf.constant(0.0, shape=[numLabels]))

    # create the calculations in the output layer
    output_net = tf.matmul(hidden_out, W_outlayer) + b_outlayer

    if numLabels == 1:
        yhat = tf.sigmoid(output_net)
    else:
        yhat = tf.nn.softmax(output_net)

    # setup training
    if numLabels == 1:
        cost = tf.reduce_sum(0.5 * (y - yhat) * (y - yhat))
    else:
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=y,
                                                    logits=output_net))
    trainStep = tf.train.AdamOptimizer(learningRate).minimize(cost)

    # start the session
    sess = tf.Session()
    init = tf.initialize_all_variables().run(session=sess)

    # setup the params
    epoch = 0
    printEvery = 1
    maxEpochs = 500
    totalTime = 0
    validAcc = 0.0

    while epoch < maxEpochs and validAcc < 0.99:
        epoch += 1

        # train the network
        startTime = time.process_time()
        sess.run(trainStep, feed_dict={x: train[0], y: train[1]})
        totalTime += time.process_time() - startTime

        if epoch % printEvery == 0:
            p = sess.run(yhat, feed_dict={x: train[0]})

            print("\n###################################################")
            print("\nEpoch:", epoch, "\tTime:", totalTime / epoch)

            print("Training:")
            cm = confusion_matrix.buildConfusionMatrix(p, train[1], numLabels,
                                                       threshold)
            confusion_matrix.printConfusionMatrix(cm, possibleLabels, None)
            confusion_matrix.printAccuracy(cm, None)

            print("\nValidation:")
            p = sess.run(yhat, feed_dict={x: valid[0]})
            cm = confusion_matrix.buildConfusionMatrix(p, valid[1], numLabels,
                                                       threshold)
            confusion_matrix.printConfusionMatrix(cm, possibleLabels, None)
            confusion_matrix.printAccuracy(cm, outputFile)

    # evaluate the test accuracy
    p = sess.run(yhat, feed_dict={x: test[0]})

    print("\n***************************************************")
    print("\nConfusion Matrix on Test Set:")
    cm = confusion_matrix.buildConfusionMatrix(p, test[1], numLabels,
                                               threshold)
    confusion_matrix.printConfusionMatrix(cm, possibleLabels, outputFile)
    confusion_matrix.printAccuracy(cm, outputFile)
    print("Average time:", totalTime / epoch)