Example #1
0
  def test_backward_grads_with_nativepy(self):
    if not tf.test.is_gpu_available():
      self.skipTest("GPU not available")

    input_shape = (128, 8, 8)
    data_shape = (16,) + input_shape
    x = tf.random_normal(shape=data_shape, dtype=tf.float64)
    dy = tf.random_normal(shape=data_shape, dtype=tf.float64)
    dy1, dy2 = tf.split(dy, num_or_size_splits=2, axis=1)
    block = blocks.RevBlock(
        n_res=3,
        filters=128,
        strides=(1, 1),
        input_shape=input_shape,
        fused=False,
        dtype=tf.float64)
    with tf.GradientTape() as tape:
      tape.watch(x)
      x1, x2 = tf.split(x, num_or_size_splits=2, axis=1)
      y1, y2 = block((x1, x2), training=True)
      y = tf.concat((y1, y2), axis=1)

    # Compute true grads
    dx_true = tape.gradient(y, x, output_gradients=dy)

    # Compute grads from reconstruction
    (dx1, dx2), _ = block.backward_grads(
        x=(x1, x2), y=(y1, y2), dy=(dy1, dy2), training=True)
    dx = tf.concat((dx1, dx2), axis=1)

    thres = 1e-5
    diff_abs = tf.reshape(abs(dx - dx_true), [-1])
    assert all(diff_abs < thres)
 def make_net(self, input_images, input_measurements, input_actions, input_objectives, reuse=False):
     if reuse:
         tf.get_variable_scope().reuse_variables()
     
     self.fc_val_params = np.copy(self.fc_joint_params)
     self.fc_val_params['out_dims'][-1] = self.target_dim
     self.fc_adv_params = np.copy(self.fc_joint_params)
     self.fc_adv_params['out_dims'][-1] = len(self.net_discrete_actions) * self.target_dim
     p_img_conv = my_ops.conv_encoder(input_images, self.conv_params, 'p_img_conv', msra_coeff=0.9)
     p_img_fc = my_ops.fc_net(my_ops.flatten(p_img_conv), self.fc_img_params, 'p_img_fc', msra_coeff=0.9)
     p_meas_fc = my_ops.fc_net(input_measurements, self.fc_meas_params, 'p_meas_fc', msra_coeff=0.9)
     if isinstance(self.fc_obj_params, np.ndarray):
         p_obj_fc = my_ops.fc_net(input_objectives, self.fc_obj_params, 'p_obj_fc', msra_coeff=0.9)
         p_concat_fc = tf.concat([p_img_fc,p_meas_fc,p_obj_fc], 1)
     else:
         p_concat_fc = tf.concat([p_img_fc,p_meas_fc], 1)
         if self.random_objective_coeffs:
             raise Exception('Need fc_obj_params with randomized objectives')
         
     p_val_fc = my_ops.fc_net(p_concat_fc, self.fc_val_params, 'p_val_fc', last_linear=True, msra_coeff=0.9)
     p_adv_fc = my_ops.fc_net(p_concat_fc, self.fc_adv_params, 'p_adv_fc', last_linear=True, msra_coeff=0.9)
     
     adv_reshape = tf.reshape(p_adv_fc, [-1, len(self.net_discrete_actions), self.target_dim])
     
     pred_all_nomean = adv_reshape - tf.reduce_mean(adv_reshape, reduction_indices=1, keep_dims=True)
     pred_all = pred_all_nomean + tf.reshape(p_val_fc, [-1, 1, self.target_dim])
     pred_relevant = tf.boolean_mask(pred_all, tf.cast(input_actions, tf.bool))
     
     return pred_all, pred_relevant
Example #3
0
def wide_model(numeric_input, category_input, vocabs):
    transpose_category_input = tf.transpose(category_input)
    category_sum = None
    # Append embadding category to numeric_sum
    for i in range(0, len(vocabs)):
        embedding = tf.get_variable("wideem" + str(i), [vocabs[i], 8],
                                    initializer=tf.contrib.layers.xavier_initializer()
                                    #partitioner=tf.fixed_size_partitioner(n_pss))
                                    #partitioner=tf.min_max_variable_partitioner(n_pss, 0, 2 << 10)
                                    )
        # Pick one column from category input
        col = tf.gather(transpose_category_input, [i])[0]
        #col = tf.nn.embedding_lookup(transpose_category_input, [i])[0]

        # Same as make [0001]*[w1,w2,w3,w4] = lookup w4
        #embedded_col = embedding_lookup(tf.identity(embedding), col)  # number * embedding output number
        embedded_col = embedding_ops.embedding_lookup_unique(embedding, col)

        if category_sum is None:
            category_sum = embedded_col
        else:
            category_sum = tf.concat([category_sum, embedded_col], 1)

    tf.set_random_seed(1)
    w = tf.get_variable("W", [numeric_input.shape[1] + category_sum.shape[1], 1], initializer=tf.contrib.layers.xavier_initializer())
    wmodel_logits_sum = tf.matmul(tf.concat([numeric_input, category_sum], 1), w)

    return wmodel_logits_sum
    def _construct(self):
        """
        Construct the model; main part of it goes here
        """
        # our query = m_u + e_i
        query = (self._cur_user, self._cur_item)
        neg_query = (self._cur_user, self._cur_item_negative)

        # Positive
        neighbor = self._mem_layer(query,
                                   self.user_memory(self.input_neighborhoods),
                                   self.user_output(self.input_neighborhoods),
                                   self.input_neighborhood_lengths,
                                   self.config.max_neighbors)[-1].output
        self.score = self._output_module(tf.concat([self._cur_user * self._cur_item,
                                                    neighbor], axis=1))

        # Negative
        neighbor_negative = self._mem_layer(neg_query,
                                            self.user_memory(self.input_neighborhoods_negative),
                                            self.user_output(self.input_neighborhoods_negative),
                                            self.input_neighborhood_lengths_negative,
                                            self.config.max_neighbors)[-1].output
        negative_output = self._output_module(tf.concat(
            [self._cur_user * self._cur_item_negative, neighbor_negative], axis=1))

        # Loss and Optimizer
        self.loss = LossLayer()(self.score, negative_output)
        self._optimizer = OptimizerLayer(self.config.optimizer, clip=self.config.grad_clip,
                                         params=self.config.optimizer_params)
        self.train = self._optimizer(self.loss)

        tf.add_to_collection(GraphKeys.PREDICTION, self.score)
Example #5
0
  def encode_coordinates_alt(self, net):
    """An alternative implemenation for the encoding coordinates.

    Args:
      net: a tensor of shape=[batch_size, height, width, num_features]

    Returns:
      a list of tensors with encoded image coordinates in them.
    """
    batch_size, h, w, _ = net.shape.as_list()
    h_loc = [
      tf.tile(
          tf.reshape(
              tf.contrib.layers.one_hot_encoding(
                  tf.constant([i]), num_classes=h), [h, 1]), [1, w])
      for i in xrange(h)
    ]
    h_loc = tf.concat([tf.expand_dims(t, 2) for t in h_loc], 2)
    w_loc = [
      tf.tile(
          tf.contrib.layers.one_hot_encoding(tf.constant([i]), num_classes=w),
          [h, 1]) for i in xrange(w)
    ]
    w_loc = tf.concat([tf.expand_dims(t, 2) for t in w_loc], 2)
    loc = tf.concat([h_loc, w_loc], 2)
    loc = tf.tile(tf.expand_dims(loc, 0), [batch_size, 1, 1, 1])
    return tf.concat([net, loc], 3)
def prepare_image_question_encoder(image_feat, question, hparams):
  """Prepare encoder.

  Args:
    image_feat: a Tensor.
    question: a Tensor.
    hparams: run hyperparameters

  Returns:
    encoder_input: a Tensor, bottom of encoder stack
    encoder_self_attention_bias: a bias tensor for use in encoder self-attention
  """

  encoder_input = tf.concat([image_feat, question], axis=1)
  encoder_padding = common_attention.embedding_to_padding(encoder_input)
  ignore_padding = common_attention.attention_bias_ignore_padding(
      encoder_padding)
  encoder_self_attention_bias = ignore_padding
  encoder_decoder_attention_bias = ignore_padding
  # Usual case - not a packed dataset.
  if hparams.pos == "timing":
    question = common_attention.add_timing_signal_1d(question)
  elif hparams.pos == "emb":
    question = common_attention.add_positional_embedding(
        question, hparams.max_length, "inputs_positional_embedding",
        None)
  encoder_input = tf.concat([image_feat, question], axis=1)

  return (encoder_input, encoder_self_attention_bias,
          encoder_decoder_attention_bias)
Example #7
0
def mmd_objective(z, s, sdim):
    """
    Compute the MMD from latent space and nuisance_id

    Notes:
    Reimplementation in tensorflow of the Variational Fair Autoencoder
    https://arxiv.org/abs/1511.00830
    """
    
    #mmd_method = mmd_rbf
    mmd_method = mmd_fourier
    
    z_dim = z.get_shape().as_list()[1]

    # STEP 1: construct lists of samples in their proper batches
    z_part = tf.dynamic_partition(z, s, sdim)

                
    # STEP 2: add noise to all of them and get the mmd
    mmd = 0
    for j, z_j in enumerate(z_part):
        z0_ = z_j
        aux_z0 = tf.random_normal([1, z_dim])  # if an S category does not have any samples
        z0 = tf.concat([z0_, aux_z0], 0)
        if len(z_part) == 2:
            z1_ = z_part[j + 1]
            aux_z1 = tf.random_normal((1, z_dim))
            z1 = tf.concat([z1_, aux_z1], axis=0)
            return mmd_method(z0, z1)
        z1 = z
        mmd += mmd_method(z0, z1)
    return mmd
  def testSampleFromDiscretizedMixLogistic(self):
    batch = 2
    height = 4
    width = 4
    num_mixtures = 5
    seed = 42
    logits = tf.concat(  # assign all probability mass to first component
        [tf.ones([batch, height, width, 1]) * 1e8,
         tf.zeros([batch, height, width, num_mixtures - 1])],
        axis=-1)
    locs = tf.random_uniform([batch, height, width, num_mixtures * 3],
                             minval=-.9, maxval=.9)
    log_scales = tf.ones([batch, height, width, num_mixtures * 3]) * -1e8
    coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3]))
    pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1)

    locs_0 = locs[..., :3]
    expected_sample = tf.clip_by_value(locs_0, -1., 1.)

    actual_sample = common_layers.sample_from_discretized_mix_logistic(
        pred, seed=seed)
    actual_sample_val, expected_sample_val = self.evaluate(
        [actual_sample, expected_sample])
    # Use a low tolerance: samples numerically differ, as the actual
    # implementation clips log-scales so they always contribute to sampling.
    self.assertAllClose(actual_sample_val, expected_sample_val, atol=1e-2)
def get_idx_map(shape):
    """Get index map for a image.
    Args:
        shape: [B, T, H, W] or [B, H, W]
    Returns:
        idx: [B, T, H, W, 2], or [B, H, W, 2]
    """
    s = shape
    ndims = tf.shape(s)
    wdim = ndims - 1
    hdim = ndims - 2
    idx_shape = tf.concat(0, [s, tf.constant([1])])
    ones_h = tf.ones(hdim - 1, dtype='int32')
    ones_w = tf.ones(wdim - 1, dtype='int32')
    h_shape = tf.concat(0, [ones_h, tf.constant([-1]), tf.constant([1, 1])])
    w_shape = tf.concat(0, [ones_w, tf.constant([-1]), tf.constant([1])])

    idx_y = tf.zeros(idx_shape, dtype='float')
    idx_x = tf.zeros(idx_shape, dtype='float')

    h = tf.slice(s, ndims - 2, [1])
    w = tf.slice(s, ndims - 1, [1])
    idx_y += tf.reshape(tf.to_float(tf.range(h[0])), h_shape)
    idx_x += tf.reshape(tf.to_float(tf.range(w[0])), w_shape)
    idx = tf.concat(ndims[0], [idx_y, idx_x])

    return idx
Example #10
0
def din_fcn_shine(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False):
    if isinstance(facts, tuple):
        # In case of Bi-RNN, concatenate the forward and the backward RNN
        # outputs.
        facts = tf.concat(facts, 2)

    if time_major:
        # (T,B,D) => (B,T,D)
        facts = tf.array_ops.transpose(facts, [1, 0, 2])
    # Trainable parameters
    mask = tf.equal(mask, tf.ones_like(mask))
    # D value - hidden size of the RNN layer
    facts_size = facts.get_shape().as_list()[-1]
    querry_size = query.get_shape().as_list()[-1]
    query = tf.layers.dense(
        query, facts_size, activation=None, name='f1_trans_shine' + stag)
    query = prelu(query)
    queries = tf.tile(query, [1, tf.shape(facts)[1]])
    queries = tf.reshape(queries, tf.shape(facts))
    din_all = tf.concat(
        [queries, facts, queries - facts, queries * facts], axis=-1)
    d_layer_1_all = tf.layers.dense(
        din_all, facts_size, activation=tf.nn.sigmoid, name='f1_shine_att' + stag)
    d_layer_2_all = tf.layers.dense(
        d_layer_1_all, facts_size, activation=tf.nn.sigmoid, name='f2_shine_att' + stag)
    d_layer_2_all = tf.reshape(d_layer_2_all, tf.shape(facts))
    output = d_layer_2_all
    return output
  def testDiscretizedMixLogisticLoss(self):
    batch = 2
    height = 4
    width = 4
    channels = 3
    num_mixtures = 5
    logits = tf.concat(  # assign all probability mass to first component
        [tf.ones([batch, height, width, 1]) * 1e8,
         tf.zeros([batch, height, width, num_mixtures - 1])],
        axis=-1)
    locs = tf.random_uniform([batch, height, width, num_mixtures * 3],
                             minval=-.9, maxval=.9)
    log_scales = tf.random_uniform([batch, height, width, num_mixtures * 3],
                                   minval=-1., maxval=1.)
    coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3]))
    pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1)

    # Test labels that don't satisfy edge cases where 8-bit value is 0 or 255.
    labels = tf.random_uniform([batch, height, width, channels],
                               minval=-.9, maxval=.9)
    locs_0 = locs[..., :3]
    log_scales_0 = log_scales[..., :3]
    centered_labels = labels - locs_0
    inv_stdv = tf.exp(-log_scales_0)
    plus_in = inv_stdv * (centered_labels + 1. / 255.)
    min_in = inv_stdv * (centered_labels - 1. / 255.)
    cdf_plus = tf.nn.sigmoid(plus_in)
    cdf_min = tf.nn.sigmoid(min_in)
    expected_loss = -tf.reduce_sum(tf.log(cdf_plus - cdf_min), axis=-1)

    actual_loss = common_layers.discretized_mix_logistic_loss(
        pred=pred, labels=labels)
    actual_loss_val, expected_loss_val = self.evaluate(
        [actual_loss, expected_loss])
    self.assertAllClose(actual_loss_val, expected_loss_val, rtol=1e-5)
Example #12
0
 def embed_sequences(self, embed_sequence_batch):
     """Return sentence embeddings as a tensor with with shape
     [batch_size, hidden_size * 2]
     """
     forward_values = embed_sequence_batch.values
     forward_mask = embed_sequence_batch.mask
     backward_values = tf.reverse(forward_values, [False, True, False])
     backward_mask = tf.reverse(forward_mask, [False, True])
     # Initialize LSTMs
     self._forward_lstm = LSTM(self.hidden_size, return_sequences=True)
     self._backward_lstm = LSTM(self.hidden_size, return_sequences=True)
     # Pass input through the LSTMs
     # Shape: (batch_size, seq_length, hidden_size)
     forward_seq = self._forward_lstm(forward_values, forward_mask)
     forward_seq.set_shape((None, self.seq_length, self.hidden_size))
     backward_seq = self._backward_lstm(backward_values, backward_mask)
     backward_seq.set_shape((None, self.seq_length, self.hidden_size))
     # Stitch the outputs together --> hidden states (for computing attention)
     # Final dimension: (batch_size, seq_length, hidden_size * 2)
     lstm_states = tf.concat(2, [forward_seq, tf.reverse(backward_seq, [False, True, False])])
     self._hidden_states = SequenceBatch(lstm_states, forward_mask)
     # Stitch the final outputs together --> sequence embedding
     # Final dimension: (batch_size, hidden_size * 2)
     seq_length = tf.shape(forward_values)[1]
     forward_final = tf.slice(forward_seq, [0, seq_length - 1, 0], [-1, 1, self.hidden_size])
     backward_final = tf.slice(backward_seq, [0, seq_length - 1, 0], [-1, 1, self.hidden_size])
     return tf.squeeze(tf.concat(2, [forward_final, backward_final]), [1])
Example #13
0
  def _define_distance_to_clusters(self, data):
    """Defines the Mahalanobis distance to the assigned Gaussian."""
    # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input -
    # mean) from log probability function.
    self._all_scores = []
    for shard in data:
      all_scores = []
      shard = tf.expand_dims(shard, 0)
      for c in xrange(self._num_classes):
        if self._covariance_type == FULL_COVARIANCE:
          cov = self._covs[c, :, :]
        elif self._covariance_type == DIAG_COVARIANCE:
          cov = tf.diag(self._covs[c, :])
        inverse = tf.matrix_inverse(cov + self._min_var)
        inv_cov = tf.tile(
            tf.expand_dims(inverse, 0),
            tf.pack([self._num_examples, 1, 1]))
        diff = tf.transpose(shard - self._means[c, :, :], perm=[1, 0, 2])
        m_left = tf.batch_matmul(diff, inv_cov)
        all_scores.append(tf.sqrt(tf.batch_matmul(
            m_left, tf.transpose(diff, perm=[0, 2, 1])
        )))
      self._all_scores.append(tf.reshape(
          tf.concat(1, all_scores),
          tf.pack([self._num_examples, self._num_classes])))

    # Distance to the associated class.
    self._all_scores = tf.concat(0, self._all_scores)
    assignments = tf.concat(0, self.assignments())
    rows = tf.to_int64(tf.range(0, self._num_examples))
    indices = tf.concat(1, [tf.expand_dims(rows, 1),
                            tf.expand_dims(assignments, 1)])
    self._scores = tf.gather_nd(self._all_scores, indices)
Example #14
0
    def __init__(self, input_files, num_epochs, batch_size):
        filename_queue = tf.train.string_input_producer(input_files, num_epochs=num_epochs)
        reader = tf.TFRecordReader()
        _, records = reader.read(filename_queue)
        decoded = tf.parse_single_example(records,
            dense_keys=['image', 'text', 'result', 'len'],
            dense_types=['float', 'int64', 'int64', 'int64'],
            dense_shapes=[(1, config.image_features_count),
                          (config.sents_per_sample, config.max_len),
                          (config.sents_per_sample, config.max_len),
                          (config.sents_per_sample, 1)])

        self.image, self.text, self.result, self.lens = \
            decoded['image'], decoded['text'], decoded['result'], decoded['len']
        self.image = tf.concat(0, [self.image] * config.sents_per_sample)

        # result requires one-hot encoding
        clamped_result = tf.minimum(self.result, config.output_words_count)
        sliced_result = [tf.squeeze(tensor, [0]) for tensor in tf.split(0, config.sents_per_sample, clamped_result)]
        sliced_categorical_result = [self.to_categorical(tensor) for tensor in sliced_result]
        self.categorical_result = tf.concat(0, [tf.expand_dims(tensor, 0) for tensor in sliced_categorical_result])

        self.image_input, self.text_input, self.result_input, self.lens_input = tf.train.shuffle_batch(
            [self.image, self.text, self.categorical_result, self.lens],
            batch_size=batch_size,
            capacity=256+config.batch_size,
            min_after_dequeue=128,
            enqueue_many=True)
Example #15
0
    def __init__(self, session, input_pipeline):
        self.session = session
        self.input_pipeline = input_pipeline

        text_embeddings = weight_init(config.words_count + 2, config.hidden_count)

        embedded = tf.split(1, config.max_len, tf.nn.embedding_lookup(text_embeddings, input_pipeline.text_input))
        inputs = [tf.squeeze(input_, [1]) for input_ in embedded]

        w_image = weight_init(config.image_features_count, config.hidden_count)
        b_image = bias_init([config.hidden_count])

        image_transform = tf.matmul(input_pipeline.image_input, w_image) + b_image
        hidden_start = tf.concat(1, [tf.zeros_like(image_transform), image_transform])

        cell = WordCell(config.hidden_count, config.output_words_count + 1)
        probs_list, self.hidden = rnn.rnn(
            cell=cell,
            inputs=inputs,
            initial_state=hidden_start,
            sequence_length=input_pipeline.lens_input)
        self.probs = tf.concat(1, [tf.expand_dims(prob, 1) for prob in probs_list])

        float_lens = tf.cast(input_pipeline.lens_input, 'float')
        sample_losses = tf.reduce_sum(self.probs * input_pipeline.result_input, [1, 2]) / float_lens
        self.loss = -tf.reduce_mean(sample_losses)
        self.train_task = tf.train.AdamOptimizer(1e-4).minimize(self.loss)
        self.loss_summary = tf.scalar_summary('loss', self.loss)

        self.saver = tf.train.Saver()
  def _marginal_hidden_probs(self):
    """Compute marginal pdf for each individual observable."""

    initial_log_probs = tf.broadcast_to(self._log_init,
                                        tf.concat([self.batch_shape_tensor(),
                                                   [self._num_states]],
                                                  axis=0))
    # initial_log_probs :: batch_shape num_states

    if self._num_steps > 1:
      transition_log_probs = self._log_trans

      def forward_step(log_probs, _):
        return _log_vector_matrix(log_probs, transition_log_probs)

      dummy_index = tf.zeros(self._num_steps - 1, dtype=tf.float32)

      forward_log_probs = tf.scan(forward_step, dummy_index,
                                  initializer=initial_log_probs,
                                  name="forward_log_probs")

      forward_log_probs = tf.concat([[initial_log_probs], forward_log_probs],
                                    axis=0)
    else:
      forward_log_probs = initial_log_probs[tf.newaxis, ...]

    # returns :: num_steps batch_shape num_states

    return tf.exp(forward_log_probs)
Example #17
0
  def _RunAndVerifyGradientsRandom(self, use_gpu):
    # Random dims of rank 5
    input_shape = np.random.randint(1, 5, size=5)
    # Random number of tensors
    num_tensors = np.random.randint(1, 10)
    # Random dim to concat on
    concat_dim = np.random.randint(5)
    concat_dim_sizes = np.random.randint(1, 5, size=num_tensors)
    with self.test_session(use_gpu=use_gpu):
      inp = []
      inp_tensors = []
      for x in concat_dim_sizes:
        shape = input_shape
        shape[concat_dim] = x
        t = np.random.rand(*shape).astype("f")
        inp.append(t)
        inp_tensors.append(
            tf.constant([float(y) for y in t.flatten()],
                                 shape=shape, dtype=tf.float32))
      c = tf.concat(concat_dim, inp_tensors)
      output_shape = input_shape
      output_shape[concat_dim] = concat_dim_sizes.sum()
      grad_inp = np.random.rand(*output_shape).astype("f")
      grad_tensor = tf.constant([float(x) for x in grad_inp.flatten()],
                                         shape=output_shape)
      grad = tf.gradients([c], inp_tensors, [grad_tensor])
      concated_grad = tf.concat(concat_dim, grad)
      result = concated_grad.eval()

    self.assertAllEqual(result, grad_inp)
    def _add_gtboxes_as_first_stage_proposals(self, first_stage_proposals, first_stage_scores, gtboxes):

        # 1. jitter gtboxes
        ws = gtboxes[:, 2]
        hs = gtboxes[:, 3]
        thetas = gtboxes[:, 4]

        hs_offset = (tf.random_normal(shape=tf.shape(hs)) - 0.5)*0.1*hs
        ws_offset = (tf.random_normal(shape=tf.shape(ws)) - 0.5)*0.1*ws
        thetas_offset = (tf.random_normal(shape=tf.shape(thetas)) - 0.5)*0.1*thetas
        hs = hs + hs_offset
        ws = ws + ws_offset
        thetas = thetas + thetas_offset

        new_boxes = tf.transpose(tf.stack([gtboxes[:, 0], gtboxes[:, 1], ws, hs, thetas], axis=0))

        # 2. get needed added gtboxes
        num_needed_add = tf.minimum(tf.cast(cfgs.FAST_RCNN_MINIBATCH_SIZE*cfgs.FAST_RCNN_POSITIVE_RATE*0.5, tf.int32),
                                    tf.shape(gtboxes)[0])
        added_boxes_indices = tf.random_shuffle(tf.range(start=0, limit=tf.shape(new_boxes)[0]))
        added_boxes_indices = tf.slice(added_boxes_indices, begin=[0], size=[num_needed_add])
        added_boxes = tf.gather(new_boxes, added_boxes_indices)

        # 3. add them
        all_boxes = tf.concat([first_stage_proposals, added_boxes], axis=0)
        all_scores = tf.concat([first_stage_scores,  tf.ones(shape=[tf.shape(added_boxes)[0]])*0.95], axis=0)
        return all_boxes, all_scores
 def rotate(first, second, offset=None):
     rotations = [tf.concat(first[:offset], axis=3)]
     elem = first
     for e in second:
         elem = elem[1:]+[e]
         rotations.append(tf.concat(elem[:offset], axis=3))
     return rotations
Example #20
0
def multilevel_roi_align(features, rcnn_boxes, resolution):
    """
    Args:
        features ([tf.Tensor]): 4 FPN feature level 2-5
        rcnn_boxes (tf.Tensor): nx4 boxes
        resolution (int): output spatial resolution
    Returns:
        NxC x res x res
    """
    assert len(features) == 4, features
    # Reassign rcnn_boxes to levels
    level_ids, level_boxes = fpn_map_rois_to_levels(rcnn_boxes)
    all_rois = []

    # Crop patches from corresponding levels
    for i, boxes, featuremap in zip(itertools.count(), level_boxes, features):
        with tf.name_scope('roi_level{}'.format(i + 2)):
            boxes_on_featuremap = boxes * (1.0 / cfg.FPN.ANCHOR_STRIDES[i])
            all_rois.append(roi_align(featuremap, boxes_on_featuremap, resolution))

    all_rois = tf.concat(all_rois, axis=0)  # NCHW
    # Unshuffle to the original order, to match the original samples
    level_id_perm = tf.concat(level_ids, axis=0)  # A permutation of 1~N
    level_id_invert_perm = tf.invert_permutation(level_id_perm)
    all_rois = tf.gather(all_rois, level_id_invert_perm)
    return all_rois
Example #21
0
def make_multivariate_mixture(batch_shape, num_components, event_shape,
                              use_static_graph, batch_shape_tensor=None):
  if batch_shape_tensor is None:
    batch_shape_tensor = batch_shape
  batch_shape_tensor = tf.convert_to_tensor(batch_shape_tensor, tf.int32)
  logits = tf.random_uniform(
      tf.concat((batch_shape_tensor, [num_components]), 0),
      -1,
      1,
      dtype=tf.float32) - 50.
  logits.set_shape(tf.TensorShape(batch_shape).concatenate(num_components))
  static_batch_and_event_shape = (
      tf.TensorShape(batch_shape).concatenate(event_shape))
  event_shape = tf.convert_to_tensor(event_shape, tf.int32)
  batch_and_event_shape = tf.concat((batch_shape_tensor, event_shape), 0)

  def create_component():
    loc = tf.random_normal(batch_and_event_shape)
    scale_diag = 10 * tf.random_uniform(batch_and_event_shape)
    loc.set_shape(static_batch_and_event_shape)
    scale_diag.set_shape(static_batch_and_event_shape)
    return tfd.MultivariateNormalDiag(loc=loc, scale_diag=scale_diag)
  components = [create_component() for _ in range(num_components)]
  cat = tfd.Categorical(logits, dtype=tf.int32)
  return tfd.Mixture(cat, components, use_static_graph=use_static_graph)
Example #22
0
def SequenceToImageAndDiff(images):
  """Convert image sequence batch into image and diff batch.

    Each image pair is converted to the first image and their diff.
    Batch size will increase if sequence length is larger than 2.

  Args:
    images: Image sequence with shape
        [batch_size, seq_len, image_size, image_size, channel]

  Returns:
    the list of (image, diff) tuples with shape
        [batch_size2, image_size, image_size, channel]. image_sizes are
        [32, 64, 128, 256].
  """
  image_diff_list = []
  image_seq = tf.unstack(images, axis=1)
  for size in [32, 64, 128, 256]:
    resized_images = [
        tf.image.resize_images(i, [size, size]) for i in image_seq]
    diffs = []
    for i in xrange(0, len(resized_images)-1):
      diffs.append(resized_images[i+1] - resized_images[i])
    image_diff_list.append(
        (tf.concat(axis=0, values=resized_images[:-1]), tf.concat(axis=0, values=diffs)))
  return image_diff_list
Example #23
0
  def test_get_predictions_with_feature_maps_of_dynamic_shape(
      self):
    image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
    conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
        is_training=False,
        num_classes=0,
        conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
        depth=32,
        num_layers_before_predictor=1,
        box_code_size=4)
    box_predictions = conv_box_predictor.predict(
        [image_features], num_predictions_per_location=[5],
        scope='BoxPredictor')
    box_encodings = tf.concat(box_predictions[box_predictor.BOX_ENCODINGS],
                              axis=1)
    objectness_predictions = tf.concat(box_predictions[
        box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
    init_op = tf.global_variables_initializer()

    resolution = 32
    expected_num_anchors = resolution*resolution*5
    with self.test_session() as sess:
      sess.run(init_op)
      (box_encodings_shape,
       objectness_predictions_shape) = sess.run(
           [tf.shape(box_encodings), tf.shape(objectness_predictions)],
           feed_dict={image_features:
                      np.random.rand(4, resolution, resolution, 64)})
      self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 4])
      self.assertAllEqual(objectness_predictions_shape,
                          [4, expected_num_anchors, 1])
Example #24
0
def build_lstm_forward(H, x, googlenet, phase, reuse):
    grid_size = H['arch']['grid_width'] * H['arch']['grid_height']
    outer_size = grid_size * H['arch']['batch_size']
    input_mean = 117.
    x -= input_mean
    Z = googlenet_load.model(x, googlenet, H)
    with tf.variable_scope('decoder', reuse=reuse):
        scale_down = 0.01
        if H['arch']['early_dropout'] and phase == 'train':
            Z = tf.nn.dropout(Z, 0.5)
        lstm_input = tf.reshape(Z * scale_down, (H['arch']['batch_size'] * grid_size, 1024))
        lstm_outputs = build_lstm_inner(lstm_input, H)

        pred_boxes = []
        pred_logits = []
        for i in range(H['arch']['rnn_len']):
            output = lstm_outputs[i]
            if H['arch']['late_dropout'] and phase == 'train':
                output = tf.nn.dropout(output, 0.5)
            box_weights = tf.get_variable('box_ip%d' % i, shape=(H['arch']['lstm_size'], 4),
                initializer=tf.random_uniform_initializer(-0.1, 0.1))
            conf_weights = tf.get_variable('conf_ip%d' % i, shape=(H['arch']['lstm_size'], 2),
                initializer=tf.random_uniform_initializer(-0.1, 0.1))
            pred_boxes.append(tf.reshape(tf.matmul(output, box_weights) * 50,
                                         [outer_size, 1, 4]))
            pred_logits.append(tf.reshape(tf.matmul(output, conf_weights),
                                         [outer_size, 1, 2]))
        pred_boxes = tf.concat(1, pred_boxes)
        pred_logits = tf.concat(1, pred_logits)
        pred_logits_squash = tf.reshape(pred_logits,
                                        [outer_size * H['arch']['rnn_len'], 2])
        pred_confidences_squash = tf.nn.softmax(pred_logits_squash)
        pred_confidences = tf.reshape(pred_confidences_squash,
                                      [outer_size, H['arch']['rnn_len'], 2])
    return pred_boxes, pred_logits, pred_confidences
Example #25
0
  def test_get_correct_box_encoding_and_class_prediction_shapes(self):
    image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
    proposal_boxes = tf.random_normal([4, 2, 4], dtype=tf.float32)
    rfcn_box_predictor = box_predictor.RfcnBoxPredictor(
        is_training=False,
        num_classes=2,
        conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
        num_spatial_bins=[3, 3],
        depth=4,
        crop_size=[12, 12],
        box_code_size=4
    )
    box_predictions = rfcn_box_predictor.predict(
        [image_features], num_predictions_per_location=[1],
        scope='BoxPredictor',
        proposal_boxes=proposal_boxes)
    box_encodings = tf.concat(
        box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
    class_predictions_with_background = tf.concat(
        box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
        axis=1)

    init_op = tf.global_variables_initializer()
    with self.test_session() as sess:
      sess.run(init_op)
      (box_encodings_shape,
       class_predictions_shape) = sess.run(
           [tf.shape(box_encodings),
            tf.shape(class_predictions_with_background)])
      self.assertAllEqual(box_encodings_shape, [8, 1, 2, 4])
      self.assertAllEqual(class_predictions_shape, [8, 1, 3])
Example #26
0
    def loss_layer(self, project_logits, lengths, name=None):

        with tf.variable_scope("crf_loss" if not name else name):
            small = -1000.0
            start_logits = tf.concat(
                [small * tf.ones(shape=[self.batch_size, 1, self.num_tags]), tf.zeros(shape=[self.batch_size, 1, 1])],
                axis=-1)

            pad_logits = tf.cast(small * tf.ones([self.batch_size, self.num_steps, 1]), tf.float32)
            logits = tf.concat([project_logits, pad_logits], axis=-1)
            logits = tf.concat([start_logits, logits], axis=1)
            targets = tf.concat(
                [tf.cast(self.num_tags * tf.ones([self.batch_size, 1]), tf.int32), self.targets], axis=-1)

            self.trans = tf.get_variable(
                "transitions",
                shape=[self.num_tags + 1, self.num_tags + 1],
                initializer=self.initializer)

            log_likelihood, self.trans = crf_log_likelihood(
                inputs=logits,
                tag_indices=targets,
                transition_params=self.trans,
                sequence_lengths=lengths + 1)

            return tf.reduce_mean(-log_likelihood)
Example #27
0
def one_hot_matrix(tensor_in, num_classes, on_value=1.0, off_value=0.0):
    """Encodes indices from given tensor as one-hot tensor.

    TODO(ilblackdragon): Ideally implementation should be
    part of TensorFlow with Eigen-native operation.

    Args:
        tensor_in: Input tensor of shape [N1, N2].
        num_classes: Number of classes to expand index into.
        on_value: Tensor or float, value to fill-in given index.
        off_value: Tensor or float, value to fill-in everything else.
    Returns:
        Tensor of shape [N1, N2, num_classes] with 1.0 for each id in original
        tensor.
    """
    tensor_in = tf.convert_to_tensor(tensor_in)
    sparse_values = tf.to_int64(tf.reshape(tensor_in, [-1, 1]))
    size = tf.shape(sparse_values)[0]
    dims = tf.shape(tensor_in)
    indices = tf.to_int64(tf.reshape(tf.range(0, size), [-1, 1]))
    indices_values = tf.concat(1, [indices, sparse_values])
    outshape = tf.to_int64(expand_concat(0, [size, num_classes]))
    one_hot_vector = tf.sparse_to_dense(indices_values, outshape, on_value, off_value)
    ret = tf.reshape(one_hot_vector, tf.concat(0, [dims, [num_classes]]))
    ret.set_shape(tensor_in.get_shape().concatenate(num_classes))
    return ret
Example #28
0
 def random_shift(v):
     if random_shift_y:
         v = tf.concat([v[-random_shift_y:], v, v[:random_shift_y]], 0)
     if random_shift_x:
         v = tf.concat([v[:, -random_shift_x:], v, v[:, :random_shift_x]],
                       1)
     return tf.random_crop(v, [resize[0], resize[1], size[2]])
Example #29
0
    def __call__(self, inputs, seq_len, keep_prob=1.0, is_train=None, concat_layers=True):
        outputs = [tf.transpose(inputs, [1, 0, 2])]
        for layer in range(self.num_layers):
            gru_fw, gru_bw = self.grus[layer]
            init_fw, init_bw = self.inits[layer]
            mask_fw, mask_bw = self.dropout_mask[layer]
            with tf.variable_scope('fw_{}'.format(layer), reuse=tf.AUTO_REUSE):
                with tf.variable_scope('cudnn_gru', reuse=tf.AUTO_REUSE):
                    out_fw, _ = tf.nn.dynamic_rnn(cell=gru_fw, inputs=outputs[-1] * mask_fw, time_major=True,
                                                  initial_state=tuple(tf.unstack(init_fw, axis=0)))

            with tf.variable_scope('bw_{}'.format(layer), reuse=tf.AUTO_REUSE):
                with tf.variable_scope('cudnn_gru', reuse=tf.AUTO_REUSE):
                    inputs_bw = tf.reverse_sequence(
                        outputs[-1] * mask_bw, seq_lengths=seq_len, seq_dim=0, batch_dim=1)
                    out_bw, _ = tf.nn.dynamic_rnn(cell=gru_bw, inputs=inputs_bw, time_major=True,
                                                  initial_state=tuple(tf.unstack(init_bw, axis=0)))
                    out_bw = tf.reverse_sequence(
                        out_bw, seq_lengths=seq_len, seq_dim=0, batch_dim=1)

            outputs.append(tf.concat([out_fw, out_bw], axis=2))
        if concat_layers:
            res = tf.concat(outputs[1:], axis=2)
        else:
            res = outputs[-1]
        res = tf.transpose(res, [1, 0, 2])
        return res
Example #30
0
def get_model(name):
    name = functools.partial('{}-{}'.format, name)

    self_pos = tf.placeholder(Config.dtype, Config.data_shape, name='self_pos')
    self_ability = tf.placeholder(Config.dtype, Config.data_shape, name='self_ability')
    enemy_pos = tf.placeholder(Config.dtype, Config.data_shape, name='enemy_pos')
    input_label = tf.placeholder(Config.dtype, Config.label_shape, name='input_label')

    x = tf.concat(3, [self_pos, self_ability, enemy_pos], name=name('input_concat'))
    y = input_label

    nl = tf.nn.tanh

    def conv_pip(name, x):
        name = functools.partial('{}_{}'.format, name)

        x = conv2d(name('0'), x, Config.data_shape[3]*2, kernel=3, stride=1, nl=nl)
        x = conv2d(name('1'), x, Config.data_shape[3], kernel=3, stride=1, nl=nl)
        return x

    pred = conv_pip(name('conv0'), x)
    for layer in range(5):
        pred_branch = tf.concat(3, [pred,x], name=name('concate%d'%layer))
        pred += conv_pip(name('conv%d'%(layer+1)), pred_branch)

    x = tf.tanh(pred, name=name('control_tanh'))

    z = tf.mul(tf.exp(x), self_ability)
    z_sum = tf.reduce_sum(z, reduction_indices=[1,2,3], name=name('partition_function')) # partition function

    # another formula of y*logy
    loss = -tf.reduce_sum(tf.mul(x, y), reduction_indices=[1,2,3]) + tf.log(z_sum)
    z_sum = tf.reshape(z_sum, [-1, 1, 1, 1])
    pred = tf.div(z, z_sum, name=name('predict'))
    return Model([self_pos, self_ability, enemy_pos], input_label, loss, pred, debug=z)
  def next_frame(self, frames, actions, rewards, target_frame,
                 internal_states, video_extra):
    del rewards, video_extra

    hparams = self.hparams
    filters = hparams.hidden_size
    kernel2 = (4, 4)
    action = actions[-1]

    # Stack the inputs.
    if internal_states is not None and hparams.concat_internal_states:
      # Use the first part of the first internal state if asked to concatenate.
      batch_size = common_layers.shape_list(frames[0])[0]
      internal_state = internal_states[0][0][:batch_size, :, :, :]
      stacked_frames = tf.concat(frames + [internal_state], axis=-1)
    else:
      stacked_frames = tf.concat(frames, axis=-1)
    inputs_shape = common_layers.shape_list(stacked_frames)

    # Update internal states early if requested.
    if hparams.concat_internal_states:
      internal_states = self.update_internal_states_early(
          internal_states, frames)

    # Using non-zero bias initializer below for edge cases of uniform inputs.
    x = tf.layers.dense(
        stacked_frames, filters, name="inputs_embed",
        bias_initializer=tf.random_normal_initializer(stddev=0.01))
    x = common_attention.add_timing_signal_nd(x)

    # Down-stride.
    layer_inputs = [x]
    for i in range(hparams.num_compress_steps):
      with tf.variable_scope("downstride%d" % i):
        layer_inputs.append(x)
        x = tf.nn.dropout(x, 1.0 - self.hparams.dropout)
        x = common_layers.make_even_size(x)
        if i < hparams.filter_double_steps:
          filters *= 2
        x = common_attention.add_timing_signal_nd(x)
        x = tf.layers.conv2d(x, filters, kernel2, activation=common_layers.belu,
                             strides=(2, 2), padding="SAME")
        x = common_layers.layer_norm(x)

    if self.has_actions:
      with tf.variable_scope("policy"):
        x_flat = tf.layers.flatten(x)
        policy_pred = tf.layers.dense(x_flat, self.hparams.problem.num_actions)
        value_pred = tf.layers.dense(x_flat, 1)
        value_pred = tf.squeeze(value_pred, axis=-1)
    else:
      policy_pred, value_pred = None, None

    # Add embedded action if present.
    if self.has_actions:
      x = common_video.inject_additional_input(
          x, action, "action_enc", hparams.action_injection)

    # Inject latent if present. Only for stochastic models.
    x, extra_loss = self.inject_latent(x, frames, target_frame, action)

    x_mid = tf.reduce_mean(x, axis=[1, 2], keepdims=True)
    x, internal_states = self.middle_network(x, internal_states)

    # Up-convolve.
    layer_inputs = list(reversed(layer_inputs))
    for i in range(hparams.num_compress_steps):
      with tf.variable_scope("upstride%d" % i):
        x = tf.nn.dropout(x, 1.0 - self.hparams.dropout)
        if self.has_actions:
          x = common_video.inject_additional_input(
              x, action, "action_enc", hparams.action_injection)
        if i >= hparams.num_compress_steps - hparams.filter_double_steps:
          filters //= 2
        x = tf.layers.conv2d_transpose(
            x, filters, kernel2, activation=common_layers.belu,
            strides=(2, 2), padding="SAME")
        y = layer_inputs[i]
        shape = common_layers.shape_list(y)
        x = x[:, :shape[1], :shape[2], :]
        x = common_layers.layer_norm(x + y)
        x = common_attention.add_timing_signal_nd(x)

    # Cut down to original size.
    x = x[:, :inputs_shape[1], :inputs_shape[2], :]
    x_fin = tf.reduce_mean(x, axis=[1, 2], keepdims=True)
    if self.is_per_pixel_softmax:
      x = tf.layers.dense(x, hparams.problem.num_channels * 256, name="logits")
    else:
      x = tf.layers.dense(x, hparams.problem.num_channels, name="logits")

    reward_pred = None
    if self.has_rewards:
      # Reward prediction based on middle and final logits.
      reward_pred = tf.concat([x_mid, x_fin], axis=-1)
      reward_pred = tf.nn.relu(tf.layers.dense(
          reward_pred, 128, name="reward_pred"))
      reward_pred = tf.squeeze(reward_pred, axis=1)  # Remove extra dims
      reward_pred = tf.squeeze(reward_pred, axis=1)  # Remove extra dims

    return x, reward_pred, policy_pred, value_pred, extra_loss, internal_states
Example #32
0
def concatenate(arrs, axis=0):
    return tf.concat(axis=axis, values=arrs)
Example #33
0
def conv1d_layer_sentence_representation(sent_wordembeddings):
    """Apply mulitple conv1d filters to extract sentence respresentations
  Args: 
  sent_wordembeddings: [None, max_sent_length, wordembed_size]
  Returns:
  sent_representations: [None, sentembed_size]
  """

    representation_from_filters = []

    output_channel = 0
    if FLAGS.handle_filter_output == "sum":
        output_channel = FLAGS.sentembed_size
    else:  # concat
        output_channel = FLAGS.sentembed_size / FLAGS.max_filter_length
        if (output_channel * FLAGS.max_filter_length != FLAGS.sentembed_size):
            print(
                "Error: Make sure (output_channel *  FLAGS.max_filter_length) is equal to FLAGS.sentembed_size."
            )
            exit(0)

    for filterwidth in xrange(1, FLAGS.max_filter_length + 1):
        # print(filterwidth)

        with tf.variable_scope("Conv1D_%d" % filterwidth) as scope:

            # Convolution
            conv_filter = variable_on_cpu(
                "conv_filter_%d" % filterwidth,
                [filterwidth, FLAGS.wordembed_size, output_channel],
                tf.truncated_normal_initializer())
            # print(conv_filter.name, conv_filter.get_shape())
            conv = tf.nn.conv1d(
                sent_wordembeddings, conv_filter, 1, padding='VALID'
            )  # [None, out_width=(max_sent_length-(filterwidth-1)), output_channel]
            conv_biases = variable_on_cpu("conv_biases_%d" % filterwidth,
                                          [output_channel],
                                          tf.constant_initializer(0.0))
            pre_activation = tf.nn.bias_add(conv, conv_biases)
            conv = tf.nn.relu(
                pre_activation)  #  [None, out_width, output_channel]
            # print(conv.name, conv.get_shape())

            # Max pool: Reshape conv to use max_pool
            conv_reshaped = tf.expand_dims(
                conv, 1)  # [None, out_height:1, out_width, output_channel]
            # print(conv_reshaped.name, conv_reshaped.get_shape())
            out_height = conv_reshaped.get_shape()[1].value
            out_width = conv_reshaped.get_shape()[2].value
            # print(out_height,out_width)
            maxpool = tf.nn.max_pool(
                conv_reshaped, [1, out_height, out_width, 1], [1, 1, 1, 1],
                padding='VALID')  # [None, 1, 1, output_channel]
            # print(maxpool.name, maxpool.get_shape())

            # Local Response Normalization
            maxpool_norm = tf.nn.lrn(maxpool,
                                     4,
                                     bias=1.0,
                                     alpha=0.001 / 9.0,
                                     beta=0.75)  # Settings from cifar10
            # print(maxpool_norm.name, maxpool_norm.get_shape())

            # Get back to original dimension
            maxpool_sqz = tf.squeeze(maxpool_norm,
                                     [1, 2])  # [None, output_channel]
            # print(maxpool_sqz.name, maxpool_sqz.get_shape())

        representation_from_filters.append(maxpool_sqz)
        # print(representation_from_filters)

    final_representation = []
    with tf.variable_scope("FinalOut") as scope:
        if FLAGS.handle_filter_output == "sum":
            final_representation = tf.add_n(representation_from_filters)
        else:
            final_representation = tf.concat(1, representation_from_filters)

    return final_representation
Example #34
0
def main():
    """Create the model and start the training.
  """
    # Read CL arguments and snapshot the arguments into text file.
    args = get_arguments()
    utils.general.snapshot_arg(args)

    # The segmentation network is stride 8 by default.
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)
    innet_size = (int(math.ceil(h / 8)), int(math.ceil(w / 8)))

    # Initialize the random seed.
    tf.set_random_seed(args.random_seed)

    # Create queue coordinator.
    coord = tf.train.Coordinator()

    # current step
    step_ph = tf.placeholder(dtype=tf.float32, shape=())

    # Load the reader.
    with tf.device('/cpu:0'):
        with tf.name_scope('create_inputs'):
            reader = ImageReader(args.data_dir, args.data_list, input_size,
                                 args.random_scale, args.random_mirror,
                                 args.random_crop, args.ignore_label, IMG_MEAN)

            image_batch, label_batch = reader.dequeue(args.batch_size)

    # Allocate data evenly to each gpu.
    images_mgpu = nn_mgpu.split(image_batch, args.num_gpu)
    labels_mgpu = nn_mgpu.split(label_batch, args.num_gpu)

    # Create network and output predictions.
    outputs_mgpu = model(images_mgpu, args.num_classes, args.is_training,
                         args.use_global_status)

    # Grab variable names which should be restored from checkpoints.
    restore_var = [
        v for v in tf.global_variables()
        if 'block5' not in v.name or not args.not_restore_classifier
    ]

    # Collect losses from each gpu.
    mean_losses = []
    mean_l2_losses = []
    for outputs, lab in zip(outputs_mgpu, labels_mgpu):
        with tf.device(lab.device):
            # Shrink labels to the size of the network output.
            lab = tf.cast(lab, dtype=tf.float32)
            lab = tf.image.resize_nearest_neighbor(lab,
                                                   innet_size,
                                                   name='label_shrink')
            lab = tf.reshape(lab, [
                -1,
            ])

            # Ignore the location where the label value is larger than args.num_classes.
            not_ignore_pixel = tf.less_equal(lab, args.num_classes - 1)

            # Extract the indices of pixel where the gradients are propogated.
            pixel_inds = tf.squeeze(tf.where(not_ignore_pixel), 1)
            lab_gather = tf.to_int32(tf.gather(lab, pixel_inds))

            # Define softmax loss.
            for i, out in enumerate(outputs):
                # Get mini-batch size on each GPU device.
                n = out.get_shape().as_list()[0]

                # Flatten predictions.
                out = tf.reshape(out, [-1, args.num_classes])
                out_gather = tf.gather(out, pixel_inds)
                loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=out_gather, labels=lab_gather)
                loss = tf.reduce_mean(loss)
                loss *= float(n) / float(args.batch_size)
                mean_losses.append(loss)

            # Define weight regularization loss.
            w = args.weight_decay
            l2_losses = [
                w * tf.nn.l2_loss(v) for v in tf.trainable_variables()
                if 'weights' in v.name
            ]
            l2_loss = tf.add_n(l2_losses) / float(args.num_gpu)
            mean_l2_losses.append(l2_loss)

    # Sum all loss terms.
    mean_seg_loss = tf.add_n(mean_losses)
    mean_l2_loss = tf.add_n(mean_l2_losses)
    reduced_loss = mean_seg_loss + mean_l2_loss

    # Grab variable names which are used for training.
    all_trainable = tf.trainable_variables()
    fc_trainable = [v for v in all_trainable if 'block5' in v.name]  # lr*10
    base_trainable = [v for v in all_trainable
                      if 'block5' not in v.name]  # lr*1

    # Computes gradients per iteration.
    grads = tf.gradients(reduced_loss,
                         base_trainable + fc_trainable,
                         colocate_gradients_with_ops=True)
    grads_base = grads[:len(base_trainable)]
    grads_fc = grads[len(base_trainable):]

    # Define optimisation parameters.
    base_lr = tf.constant(args.learning_rate)
    learning_rate = tf.scalar_mul(
        base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))

    opt_base = tf.train.MomentumOptimizer(learning_rate * 1.0, args.momentum)
    opt_fc = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum)

    # Define tensorflow operations which apply gradients to update variables.
    train_op_base = opt_base.apply_gradients(zip(grads_base, base_trainable))
    train_op_fc = opt_fc.apply_gradients(zip(grads_fc, fc_trainable))
    train_op = tf.group(train_op_base, train_op_fc)

    # Process for visualisation.
    with tf.device('/cpu:0'):
        # Image summary for input image, ground-truth label and prediction.
        cat_output = tf.concat([o[-1] for o in outputs_mgpu], axis=0)
        output_vis = tf.image.resize_nearest_neighbor(
            cat_output,
            tf.shape(image_batch)[1:3, ])
        output_vis = tf.argmax(output_vis, axis=3)
        output_vis = tf.expand_dims(output_vis, dim=3)
        output_vis = tf.cast(output_vis, dtype=tf.uint8)

        labels_vis = tf.cast(label_batch, dtype=tf.uint8)

        in_summary = tf.py_func(utils.general.inv_preprocess,
                                [image_batch, IMG_MEAN], tf.uint8)
        gt_summary = tf.py_func(utils.general.decode_labels,
                                [labels_vis, args.num_classes], tf.uint8)
        out_summary = tf.py_func(utils.general.decode_labels,
                                 [output_vis, args.num_classes], tf.uint8)
        # Concatenate image summaries in a row.
        total_summary = tf.summary.image(
            'images',
            tf.concat(axis=2, values=[in_summary, gt_summary, out_summary]),
            max_outputs=args.batch_size)

        # Scalar summary for different loss terms.
        seg_loss_summary = tf.summary.scalar('seg_loss', mean_seg_loss)
        total_summary = tf.summary.merge_all()

        summary_writer = tf.summary.FileWriter(args.snapshot_dir,
                                               graph=tf.get_default_graph())

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)

    # Load variables if the checkpoint is provided.
    if args.restore_from is not None:
        loader = tf.train.Saver(var_list=restore_var)
        load(loader, sess, args.restore_from)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    pbar = tqdm(range(args.num_steps))
    for step in pbar:
        start_time = time.time()
        feed_dict = {step_ph: step}

        step_loss = 0
        for it in range(args.iter_size):
            # Update summary periodically.
            if it == args.iter_size - 1 and step % args.update_tb_every == 0:
                sess_outs = [reduced_loss, total_summary, train_op]
                loss_value, summary, _ = sess.run(sess_outs,
                                                  feed_dict=feed_dict)
                summary_writer.add_summary(summary, step)
            else:
                sess_outs = [reduced_loss, train_op]
                loss_value, _ = sess.run(sess_outs, feed_dict=feed_dict)

            step_loss += loss_value

        step_loss /= args.iter_size

        lr = sess.run(learning_rate, feed_dict=feed_dict)

        # Save trained model periodically.
        if step % args.save_pred_every == 0 and step > 0:
            save(saver, sess, args.snapshot_dir, step)

        duration = time.time() - start_time
        desc = 'loss = {:.3f}, lr = {:.6f}'.format(step_loss, lr)
        pbar.set_description(desc)

    coord.request_stop()
    coord.join(threads)
 def concat(self, inputs, axis, name):
     return tf.concat(axis=axis, values=inputs, name=name)
Example #36
0
    def inference(self):
        with tf.variable_scope('first_order_part'):
            first_ord_w = tf.get_variable(name='first_ord_w',
                                          shape=[self.feat_num, 1],
                                          dtype=tf.float32)
            first_order = tf.nn.embedding_lookup(first_ord_w,
                                                 self.index)  # (batch, m, 1)
            first_order = tf.reduce_sum(tf.multiply(
                first_order, tf.expand_dims(self.x, axis=2)),
                                        axis=2)  # (batch, m)

        with tf.variable_scope('emb_part'):
            embed_matrix = tf.get_variable(name='second_ord_v',
                                           shape=[self.feat_num, self.vec_dim],
                                           dtype=tf.float32)
            embed_v = tf.nn.embedding_lookup(embed_matrix,
                                             self.index)  # (batch, m, D)

            embed_x = tf.multiply(tf.expand_dims(self.x, axis=2),
                                  embed_v)  # (batch, m, D)
            embed_x = tf.layers.dropout(
                embed_x, rate=self.dropout_rate,
                training=self.is_train)  # (batch, m, D)
            node_num = self.field_num * self.vec_dim
            embed_x = tf.reshape(embed_x,
                                 shape=[-1, node_num])  # (batch, node_num)

        with tf.variable_scope('cin_part'):
            cross_tensors = []
            x0_tensor = tf.reshape(embed_x,
                                   shape=[-1, self.field_num,
                                          self.vec_dim])  # (batch, m, D)
            cross_tensors.append(x0_tensor)
            field_nums = []
            field_nums.append(int(self.field_num))
            for i, layer_num in enumerate(self.cin_layer_num):
                xk_tensor = self.cin_layer(x0_tensor, cross_tensors[-1],
                                           field_nums[-1], layer_num,
                                           'cin_layer_%d' % i)
                cross_tensors.append(xk_tensor)
                field_nums.append(layer_num)
            p_vec = [tf.reduce_sum(x, axis=2) for x in cross_tensors]
            cin = tf.concat(p_vec, axis=1)
            cin_lens = np.sum(field_nums)

        with tf.variable_scope('dnn_part'):
            dnn = embed_x
            in_num = node_num
            for i in range(len(self.dnn_layers)):
                out_num = self.dnn_layers[i]
                w = tf.get_variable(name='w_%d' % i,
                                    shape=[in_num, out_num],
                                    dtype=tf.float32)
                b = tf.get_variable(name='b_%d' % i,
                                    shape=[out_num],
                                    dtype=tf.float32)
                dnn = tf.matmul(dnn, w) + b
                dnn = tf.layers.dropout(tf.nn.relu(dnn),
                                        rate=self.dropout_rate,
                                        training=self.is_train)
                in_num = out_num

        with tf.variable_scope('output_part'):
            output = tf.concat([first_order, cin, dnn], axis=1)
            global_w = tf.get_variable(
                name='global_w',
                shape=[self.field_num + cin_lens + in_num, 1],
                dtype=tf.float32)
            global_b = tf.get_variable(name='global_b',
                                       shape=[1],
                                       dtype=tf.float32)
            self.y_logits = tf.matmul(output, global_w) + global_b

        self.y_hat = tf.nn.sigmoid(self.y_logits)
        self.pred_label = tf.cast(self.y_hat > 0.5, tf.int32)
        self.loss = -tf.reduce_mean(self.y * tf.log(self.y_hat + 1e-8) +
                                    (1 - self.y) *
                                    tf.log(1 - self.y_hat + 1e-8))
        self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)
def calc_gradients(
        test_file,
        model_name,
        output_file_dir,
        max_iter,
        learning_rate=0.0001,
        targets=None,
        weight_loss2=1,
        data_spec=None,
        batch_size=1,
        seq_len=40):

    """Compute the gradients for the given network and images."""    
    spec = data_spec

    modifier = tf.Variable(0.01*np.ones((1, seq_len, spec.crop_size,spec.crop_size,spec.channels),dtype=np.float32))
    input_image = tf.placeholder(tf.float32, (batch_size, seq_len, spec.crop_size, spec.crop_size, spec.channels))
    input_label = tf.placeholder(tf.int32, (batch_size))

    # temporal mask, 1 indicates the selected frame
    indicator = [0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0]   

    true_image = tf.minimum(tf.maximum(modifier[0,0,:,:,:]+input_image[0,0,:,:,:]*255.0, -spec.mean+spec.rescale[0]), -spec.mean+spec.rescale[1])/255.0
    true_image = tf.expand_dims(true_image, 0)
    for ll in range(seq_len-1):
        if indicator[ll+1] == 1:
           mask_temp = tf.minimum(tf.maximum(modifier[0,ll+1,:,:,:]+input_image[0,ll+1,:,:,:]*255.0, -spec.mean+spec.rescale[0]), -spec.mean+spec.rescale[1])/255.0
        else:
           mask_temp = input_image[0,ll+1,:,:,:]
        mask_temp = tf.expand_dims(mask_temp,0)
        true_image = tf.concat([true_image, mask_temp],0)
    true_image = tf.expand_dims(true_image, 0)

    for kk in range(batch_size-1):
        true_image_temp = tf.minimum(tf.maximum(modifier[0,0,:,:,:]+input_image[kk+1,0,:,:,:]*255.0, -spec.mean+spec.rescale[0]), -spec.mean+spec.rescale[1])/255.0
        true_image_temp = tf.expand_dims(true_image_temp, 0)
        for ll in range(seq_len-1):
            if indicator[ll+1] == 1:
               mask_temp = tf.minimum(tf.maximum(modifier[0,ll+1,:,:,:]+input_image[kk+1,ll+1,:,:,:]*255.0, -spec.mean+spec.rescale[0]), -spec.mean+spec.rescale[1])/255.0
            else:
               mask_temp = input_image[kk+1,ll+1,:,:,:]
            mask_temp = tf.expand_dims(mask_temp,0)
            true_image_temp = tf.concat([true_image_temp, mask_temp],0)
        true_image_temp = tf.expand_dims(true_image_temp, 0)

        true_image = tf.concat([true_image, true_image_temp],0)

    loss2 = tf.reduce_sum(tf.sqrt(tf.reduce_mean(tf.square(true_image-input_image), axis=[0, 2, 3, 4])))
    norm_frame = tf.reduce_mean(tf.abs(modifier), axis=[2,3,4])

    sess = tf.Session()
    probs, variable_set, pre_label,ince_output, pre_node = models.get_model(sess, true_image, model_name, False) 
    true_label_prob = tf.reduce_sum(probs*tf.one_hot(input_label,101),[1])
    if targets is None:
        loss1 = -tf.log(1 - true_label_prob + 1e-6)
    else:
        loss1 = -tf.log(true_label_prob + 1e-6)
    loss1 = tf.reduce_mean(loss1)
    loss = loss1 + weight_loss2 * loss2

    optimizer = tf.train.AdamOptimizer(learning_rate)
    print('optimizer.minimize....')
    train = optimizer.minimize(loss, var_list=[modifier])
    # initiallize all uninitialized varibales
    init_varibale_list = set(tf.all_variables()) - variable_set
    sess.run(tf.initialize_variables(init_varibale_list))

    data = DataSet(test_list=test_file, seq_length=seq_len,image_shape=(spec.crop_size, spec.crop_size, spec.channels))
    all_names = []
    all_images = []
    all_labels = []
    
    def_len = 40
    for video in data.test_data:
        frames = data.get_frames_for_sample(video)
        if len(frames) < def_len:
           continue
        frames = data.rescale_list(frames, def_len)
        frames_data = data.build_image_sequence(frames)
        all_images.append(frames_data)
        label, hot_labels = data.get_class_one_hot(video[1])
        all_labels.append(label)
        all_names.append(frames)
    total = len(all_names)
    all_indices = range(total)
    num_batch = total/batch_size
    print('process data length:', num_batch)

    correct_ori = 0
    correct_noi = 0
    tot_image = 0
    
    for ii in range(num_batch):        
        images = all_images[ii*batch_size : (ii+1)*batch_size]
        names = all_names[ii*batch_size : (ii+1)*batch_size]
        labels = all_labels[ii*batch_size : (ii+1)*batch_size]
        indices = all_indices[ii*batch_size : (ii+1)*batch_size]
        print('------------------prediction for clean video-------------------')
        print('---video-level prediction---')
        for xx in range(len(indices)):
            print(names[xx][0],'label:', labels[xx], 'indice:',indices[xx], 'size:', len(images[xx]), len(images[xx][0]), len(images[xx][0][0]), len(images[xx][0][0][0]))
        sess.run(tf.initialize_variables(init_varibale_list))
        if targets is not None:
            labels = [targets[e] for e in names]
        
        feed_dict = {input_image: [images[0][0:seq_len]], input_label: labels}
        var_loss, true_prob, var_loss1, var_loss2, var_pre, var_node = sess.run((loss, true_label_prob, loss1, loss2, pre_label, pre_node), feed_dict=feed_dict)
        
        correct_pre = correct_ori
        for xx in range(len(indices)):
           if labels[xx] == var_pre[xx]:
              correct_ori += 1

        tot_image += 1
        print 'Start!'
        min_loss = var_loss
        last_min = -1
        print('---frame-wise prediction---')
        print('node_label:', var_node, 'label loss:', var_loss1, 'content loss:', var_loss2, 'prediction:', var_pre, 'probib', true_prob)
        # record numer of iteration
        tot_iter = 0

        if correct_pre == correct_ori:
           ii += 1
           continue
       
        print('------------------prediction for adversarial video-------------------')

        for cur_iter in range(max_iter):
            tot_iter += 1
            sess.run(train, feed_dict=feed_dict)
            var_loss, true_prob, var_loss1, var_loss2, var_pre, var_node = sess.run((loss, true_label_prob, loss1, loss2, pre_label, pre_node), feed_dict=feed_dict)
            print('iter:', cur_iter, 'total loss:', var_loss, 'label loss:', var_loss1, 'content loss:', var_loss2, 'prediction:', var_pre, 'probib:', true_prob)
            break_condition = False
            if var_loss < min_loss:
                if np.absolute(var_loss-min_loss) < 0.00001:
                   break_condition = True
                   print(last_min)
                min_loss = var_loss
                last_min = cur_iter

            if cur_iter + 1 == max_iter or break_condition:
                print('iter:', cur_iter, 'node_label:', var_node, 'label loss:', var_loss1, 'content loss:', var_loss2, 'prediction:', var_pre, 'probib:', true_prob)
                var_diff, var_probs, noise_norm = sess.run((modifier, probs, norm_frame), feed_dict=feed_dict)
                for pp in range(seq_len):
                    # print the map value for each frame
                    print(noise_norm[0][pp])
                for i in range(len(indices)):
                    top1 = var_probs[i].argmax()
                    if labels[i] == top1:
                        correct_noi += 1
                break
        print('saved modifier paramters.', ii)
        
        for ll in range(len(indices)):
            for kk in range(def_len):
                if kk < seq_len:
                   attack_img = np.clip(images[ll][kk]*255.0+var_diff[0][kk]+data_spec.mean,data_spec.rescale[0],data_spec.rescale[1])
                   diff = np.clip(np.absolute(var_diff[0][kk])*255.0, data_spec.rescale[0],data_spec.rescale[1])
                else:
                   attack_img = np.clip(images[ll][kk]*255.0+data_spec.mean,data_spec.rescale[0],data_spec.rescale[1])
                   diff = np.zeros((spec.crop_size,spec.crop_size,spec.channels))
                im_diff = scipy.misc.toimage(arr=diff, cmin=data_spec.rescale[0], cmax=data_spec.rescale[1])
                im = scipy.misc.toimage(arr=attack_img, cmin=data_spec.rescale[0], cmax=data_spec.rescale[1])
                new_name = names[ll][kk].split('/')
                 
                adv_dir = output_file_dir+'/adversarial/'
                dif_dir = output_file_dir+'/noise/'
                if not os.path.exists(adv_dir):
                   os.mkdir(adv_dir)
                   os.mkdir(dif_dir)

                tmp_dir = adv_dir+new_name[-2]
                tmp1_dir = dif_dir+new_name[-2]
                if not os.path.exists(tmp_dir):
                   os.mkdir(tmp_dir)
                   os.mkdir(tmp1_dir)
               
                new_name = new_name[-1] + '.png'
                im.save(tmp_dir + '/' +new_name)
                im_diff.save(tmp1_dir + '/' +new_name)
        print('saved adversarial frames.', ii)
        print('correct_ori:', correct_ori, 'correct_noi:', correct_noi)
    def add_logits_op_conv(self):
        """Defines self.logits

        For each word in each sentence of the batch, it corresponds to a vector
        of scores, of dimension equal to the number of tags.
        """

        with tf.name_scope("conv-maxpool"):

            # ( (BATCH_SIZE*WORDS), WINDOW_LEN, DIM, 1 )
            pooled_out = []
            for i, filter_size in enumerate(self.config.FILTER_SIZE):
                filter_shape = [
                    filter_size, self.config.DIM, 1,
                    self.config.NUMBER_OF_FEATURE_MAPS[i]
                ]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name="W")
                #W = tf.get_variable(shape = filter_shape, initializer = tf.truncated_normal_initializer(stddev=0.001), name="W"+str(i))
                b = tf.Variable(tf.constant(
                    0.1, shape=[self.config.NUMBER_OF_FEATURE_MAPS[i]]),
                                name="b")

                conv = tf.nn.conv2d(self.image_patches_reshaped,
                                    filter=W,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")
                #print(tf.Print(conv,[conv]))
                # conv = tf.squeeze(conv) # ( (BATCH_SIZE*WORDS), WINDOW_LEN-FILTER_SIZE + 1, NUMBER_OF_FEATURE_MAPS)
                #conv = tf.nn.bias_add(conv,b)
                #conv = tf.nn.relu(conv)
                #conv_non_linear = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # ( (BATCH_SIZE*WORDS), WINDOW_LEN-FILTER_SIZE + 1, 1, NUMBER_OF_FEATURE_MAPS)

                pooled = tf.nn.max_pool(conv,
                                        ksize=[
                                            1,
                                            (self.config.WINDOW_LEN -
                                             filter_size + 1), 1, 1
                                        ],
                                        strides=[1, 1, 1, 1],
                                        padding='VALID',
                                        data_format='NHWC',
                                        name="pool")
                pooled = tf.squeeze(
                    pooled)  # ( (BATCH_SIZE*WORDS), NUMBER_OF_FEATURE_MAPS)
                self.output = tf.reshape(pooled, (-1, tf.shape(
                    self.word_ids)[1], self.config.NUMBER_OF_FEATURE_MAPS[i]))
                pooled_out.append(self.output)

        self.h_pool = tf.concat(pooled_out, 2)

        with tf.name_scope("size_calc"):
            size = 0
            for i in range(len(self.config.FILTER_SIZE)):
                size += self.config.NUMBER_OF_FEATURE_MAPS[i]

        with tf.name_scope("conv2-maxpool"):
            filter_shape = [
                self.config.conv2_filter_size, size, self.config.conv2_dim
            ]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                            name="W_")
            b = tf.Variable(tf.constant(0.1, shape=[self.config.conv2_dim]),
                            name="b_")
            conv_ = tf.nn.conv1d(self.h_pool,
                                 filters=W,
                                 stride=1,
                                 padding="SAME",
                                 name="conv_")
            #conv_ = tf.squeeze(conv_)

        with tf.variable_scope("proj"):

            dense_input = tf.reshape(self.h_pool, (-1, size))
            #dense_input = tf.nn.dropout(dense_input, self.dropout_conv)

            output = tf.contrib.layers.fully_connected(
                dense_input,
                self.config.mlp_size,
                activation_fn=tf.nn.relu,
                normalizer_fn=None,
                normalizer_params=None,
                weights_initializer=tf.contrib.layers.xavier_initializer(
                    uniform=True, seed=1227),
                weights_regularizer=tf.contrib.layers.l2_regularizer(0.001),
                biases_initializer=tf.zeros_initializer(),
                trainable=True,
                scope="input1")

            # dense_input = tf.reshape(self.image_patches_reshaped, (-1, self.config.WINDOW_LEN * self.config.dim_word))
            # #dense_input = tf.nn.dropout(dense_input, self.dropout_conv)
            # output2 = tf.contrib.layers.fully_connected(
            # dense_input,
            # self.config.mlp_size,
            # activation_fn=tf.nn.relu,
            # normalizer_fn=None,
            # normalizer_params=None,
            # weights_initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=1227),
            # #weights_regularizer=tf.contrib.layers.l2_regularizer(0.001),
            # biases_initializer=tf.zeros_initializer(),
            # trainable=True,
            # scope="input3"
            # )

            # #output = tf.concat([tf.reshape(output,(-1, tf.shape(self.word_ids)[1], self.config.mlp_size)),tf.reshape(output2,(-1, tf.shape(self.word_ids)[1], self.config.mlp_size))],axis=2)

            # #output = tf.reshape(output, (-1, tf.shape(self.word_ids)[1]*2*self.config.mlp_size))

            # output = tf.concat([output, output2],axis = 1)

            output = tf.nn.dropout(output, self.dropout_conv)

            output = tf.contrib.layers.fully_connected(
                output,
                self.config.ntags,
                activation_fn=None,
                normalizer_fn=None,
                normalizer_params=None,
                weights_initializer=tf.contrib.layers.xavier_initializer(
                    uniform=True, seed=1227),
                weights_regularizer=tf.contrib.layers.l2_regularizer(0.001),
                biases_initializer=tf.zeros_initializer(),
                trainable=True,
                scope="input2")

        self.logits = tf.reshape(
            output, (-1, tf.shape(self.word_ids)[1], self.config.ntags))
Example #39
0
    def _build_sampler(self):
        """Build the sampler ops and the log_prob ops."""

        print "-" * 80
        print "Build controller sampler"
        anchors = []
        anchors_w_1 = []

        arc_seq = []
        entropys = []
        log_probs = []
        skip_count = []
        skip_penaltys = []

        prev_c = [
            tf.zeros([1, self.lstm_size], tf.float32)
            for _ in xrange(self.lstm_num_layers)
        ]
        prev_h = [
            tf.zeros([1, self.lstm_size], tf.float32)
            for _ in xrange(self.lstm_num_layers)
        ]
        inputs = self.g_emb
        skip_targets = tf.constant([1.0 - self.skip_target, self.skip_target],
                                   dtype=tf.float32)
        for layer_id in xrange(self.num_layers):
            if self.search_whole_channels:
                next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                            self.w_lstm)
                prev_c, prev_h = next_c, next_h
                logit = tf.matmul(next_h[-1], self.w_soft)
                if self.temperature is not None:
                    logit /= self.temperature
                if self.tanh_constant is not None:
                    logit = self.tanh_constant * tf.tanh(logit)
                if self.search_for == "macro" or self.search_for == "branch":
                    branch_id = tf.multinomial(logit, 1)
                    branch_id = tf.to_int32(branch_id)
                    branch_id = tf.reshape(branch_id, [1])
                elif self.search_for == "connection":
                    branch_id = tf.constant([0], dtype=tf.int32)
                else:
                    raise ValueError("Unknown search_for {}".format(
                        self.search_for))
                arc_seq.append(branch_id)
                log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logit, labels=branch_id)
                log_probs.append(log_prob)
                entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob))
                entropys.append(entropy)
                inputs = tf.nn.embedding_lookup(self.w_emb, branch_id)
            else:
                for branch_id in xrange(self.num_branches):
                    next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                                self.w_lstm)
                    prev_c, prev_h = next_c, next_h
                    logit = tf.matmul(next_h[-1],
                                      self.w_soft["start"][branch_id])
                    if self.temperature is not None:
                        logit /= self.temperature
                    if self.tanh_constant is not None:
                        logit = self.tanh_constant * tf.tanh(logit)
                    start = tf.multinomial(logit, 1)
                    start = tf.to_int32(start)
                    start = tf.reshape(start, [1])
                    arc_seq.append(start)
                    log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logit, labels=start)
                    log_probs.append(log_prob)
                    entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob))
                    entropys.append(entropy)
                    inputs = tf.nn.embedding_lookup(
                        self.w_emb["start"][branch_id], start)

                    next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                                self.w_lstm)
                    prev_c, prev_h = next_c, next_h
                    logit = tf.matmul(next_h[-1],
                                      self.w_soft["count"][branch_id])
                    if self.temperature is not None:
                        logit /= self.temperature
                    if self.tanh_constant is not None:
                        logit = self.tanh_constant * tf.tanh(logit)
                    mask = tf.range(0,
                                    limit=self.out_filters - 1,
                                    delta=1,
                                    dtype=tf.int32)
                    mask = tf.reshape(mask, [1, self.out_filters - 1])
                    mask = tf.less_equal(mask, self.out_filters - 1 - start)
                    logit = tf.where(mask,
                                     x=logit,
                                     y=tf.fill(tf.shape(logit), -np.inf))
                    count = tf.multinomial(logit, 1)
                    count = tf.to_int32(count)
                    count = tf.reshape(count, [1])
                    arc_seq.append(count + 1)
                    log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logit, labels=count)
                    log_probs.append(log_prob)
                    entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob))
                    entropys.append(entropy)
                    inputs = tf.nn.embedding_lookup(
                        self.w_emb["count"][branch_id], count)

            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h

            if layer_id > 0:
                query = tf.concat(anchors_w_1, axis=0)
                query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2))
                query = tf.matmul(query, self.v_attn)
                logit = tf.concat([-query, query], axis=1)
                if self.temperature is not None:
                    logit /= self.temperature
                if self.tanh_constant is not None:
                    logit = self.tanh_constant * tf.tanh(logit)

                skip = tf.multinomial(logit, 1)
                skip = tf.to_int32(skip)
                skip = tf.reshape(skip, [layer_id])
                arc_seq.append(skip)

                skip_prob = tf.sigmoid(logit)
                kl = skip_prob * tf.log(skip_prob / skip_targets)
                kl = tf.reduce_sum(kl)
                skip_penaltys.append(kl)

                log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logit, labels=skip)
                log_probs.append(tf.reduce_sum(log_prob, keep_dims=True))

                entropy = tf.stop_gradient(
                    tf.reduce_sum(log_prob * tf.exp(-log_prob),
                                  keep_dims=True))
                entropys.append(entropy)

                skip = tf.to_float(skip)
                skip = tf.reshape(skip, [1, layer_id])
                skip_count.append(tf.reduce_sum(skip))
                inputs = tf.matmul(skip, tf.concat(anchors, axis=0))
                inputs /= (1.0 + tf.reduce_sum(skip))
            else:
                inputs = self.g_emb

            anchors.append(next_h[-1])
            anchors_w_1.append(tf.matmul(next_h[-1], self.w_attn_1))

        arc_seq = tf.concat(arc_seq, axis=0)
        self.sample_arc = tf.reshape(arc_seq, [-1])

        entropys = tf.stack(entropys)
        self.sample_entropy = tf.reduce_sum(entropys)

        log_probs = tf.stack(log_probs)
        self.sample_log_prob = tf.reduce_sum(log_probs)

        skip_count = tf.stack(skip_count)
        self.skip_count = tf.reduce_sum(skip_count)

        skip_penaltys = tf.stack(skip_penaltys)
        self.skip_penaltys = tf.reduce_mean(skip_penaltys)
Example #40
0
 def combine_children(left_tensor, right_tensor):
   return tf.nn.relu(tf.matmul(tf.concat(1, [left_tensor, right_tensor]), W1) + b1)
Example #41
0
def multi_modal_network(dim_input=27, dim_output=7, batch_size=25, network_config=None):
    """
    An example a network in tf that has both state and image inputs.

    Args:
        dim_input: Dimensionality of input.
        dim_output: Dimensionality of the output.
        batch_size: Batch size.
        network_config: dictionary of network structure parameters
    Returns:
        A tfMap object that stores inputs, outputs, and scalar loss.
    """
    n_layers = 2
    layer_size = 20
    dim_hidden = (n_layers - 1)*[layer_size]
    dim_hidden.append(dim_output)
    pool_size = 2
    filter_size = 3

    # List of indices for state (vector) data and image (tensor) data in observation.
    x_idx, img_idx, i = [], [], 0
    for sensor in network_config['obs_include']:
        dim = network_config['sensor_dims'][sensor]
        if sensor in network_config['obs_image_data']:
            img_idx = img_idx + list(range(i, i+dim))
        else:
            x_idx = x_idx + list(range(i, i+dim))
        i += dim

    nn_input, action, precision = get_input_layer(dim_input, dim_output)

    state_input = nn_input[:, 0:x_idx[-1]+1]
    image_input = nn_input[:, x_idx[-1]+1:img_idx[-1]+1]

    # image goes through 2 convnet layers
    num_filters = network_config['num_filters']

    im_height = network_config['image_height']
    im_width = network_config['image_width']
    num_channels = network_config['image_channels']
    image_input = tf.reshape(image_input, [-1, im_width, im_height, num_channels])

    # we pool twice, each time reducing the image size by a factor of 2.
    conv_out_size = int(im_width/(2.0*pool_size)*im_height/(2.0*pool_size)*num_filters[1])
    first_dense_size = conv_out_size + len(x_idx)

    # Store layers weight & bias
    weights = {
        'wc1': get_xavier_weights([filter_size, filter_size, num_channels, num_filters[0]], (pool_size, pool_size)), # 5x5 conv, 1 input, 32 outputs
        'wc2': get_xavier_weights([filter_size, filter_size, num_filters[0], num_filters[1]], (pool_size, pool_size)), # 5x5 conv, 32 inputs, 64 outputs
    }

    biases = {
        'bc1': init_bias([num_filters[0]]),
        'bc2': init_bias([num_filters[1]]),
    }

    conv_layer_0 = conv2d(img=image_input, w=weights['wc1'], b=biases['bc1'])

    conv_layer_0 = max_pool(conv_layer_0, k=pool_size)

    conv_layer_1 = conv2d(img=conv_layer_0, w=weights['wc2'], b=biases['bc2'])

    conv_layer_1 = max_pool(conv_layer_1, k=pool_size)

    conv_out_flat = tf.reshape(conv_layer_1, [-1, conv_out_size])

    fc_input = tf.concat(axis=1, values=[conv_out_flat, state_input])

    fc_output, _, _ = get_mlp_layers(fc_input, n_layers, dim_hidden)

    loss = euclidean_loss_layer(a=action, b=fc_output, precision=precision, batch_size=batch_size)
    return TfMap.init_from_lists([nn_input, action, precision], [fc_output], [loss])
    def add_word_embeddings_op(self):
        """Defines self.word_embeddings

        If self.config.embeddings is not None and is a np array initialized
        with pre-trained word vectors, the word embeddings is just a look-up
        and we don't train the vectors. Otherwise, a random matrix with
        the correct shape is initialized.
        """
        with tf.variable_scope("words"):
            if self.config.embeddings is None:
                self.logger.info("WARNING: randomly initializing word vectors")
                _word_embeddings = tf.get_variable(
                    name="_word_embeddings",
                    dtype=tf.float32,
                    shape=[self.config.nwords, self.config.dim_word])
            else:
                _word_embeddings = tf.Variable(
                    self.config.embeddings,
                    name="_word_embeddings",
                    dtype=tf.float32,
                    trainable=self.config.train_embeddings)

            word_embeddings = tf.nn.embedding_lookup(_word_embeddings,
                                                     self.word_ids,
                                                     name="word_embeddings")

        with tf.variable_scope("chars"):
            if self.config.use_chars:
                # get char embeddings matrix
                _char_embeddings = tf.get_variable(
                    name="_char_embeddings",
                    dtype=tf.float32,
                    shape=[self.config.nchars, self.config.dim_char])
                char_embeddings = tf.nn.embedding_lookup(
                    _char_embeddings, self.char_ids, name="char_embeddings")

                # put the time dimension on axis=1
                s = tf.shape(char_embeddings)
                char_embeddings = tf.reshape(
                    char_embeddings,
                    shape=[s[0] * s[1], s[-2], self.config.dim_char])
                word_lengths = tf.reshape(self.word_lengths,
                                          shape=[s[0] * s[1]])

                # bi lstm on chars
                cell_fw = tf.contrib.rnn.LSTMCell(self.config.hidden_size_char,
                                                  state_is_tuple=True)
                cell_bw = tf.contrib.rnn.LSTMCell(self.config.hidden_size_char,
                                                  state_is_tuple=True)
                _output = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw,
                    cell_bw,
                    char_embeddings,
                    sequence_length=word_lengths,
                    dtype=tf.float32)

                # read and concat output
                _, ((_, output_fw), (_, output_bw)) = _output
                output = tf.concat([output_fw, output_bw], axis=-1)

                # shape = (batch size, max sentence length, char hidden size)
                output = tf.reshape(
                    output,
                    shape=[s[0], s[1], 2 * self.config.hidden_size_char])
                word_embeddings = tf.concat([word_embeddings, output], axis=-1)

        self.word_embeddings = tf.nn.dropout(word_embeddings, self.dropout)

        #print(tf.Print(self.word_embeddings,[self.word_embeddings]))

        if self.config.conv:
            self.temp = tf.squeeze(
                tf.extract_image_patches(
                    self.word_embeddings[:, :, :, tf.newaxis],
                    ksizes=[1, self.config.WINDOW_LEN, self.config.DIM, 1],
                    strides=[1, self.config.stride, self.config.DIM, 1],
                    rates=[1, 1, 1, 1],
                    padding='SAME'))
            self.image_patches = tf.reshape(
                self.temp, (-1, tf.shape(self.word_ids)[1],
                            self.config.WINDOW_LEN, self.config.DIM))
            self.image_patches_reshaped = tf.reshape(
                self.image_patches,
                (-1, self.config.WINDOW_LEN, self.config.DIM))[:, :, :,
                                                               tf.newaxis]
Example #43
0
    def build_model(self, dataset):
        tf.set_random_seed(self.seed)
        self.dataset = dataset
        self.field_size = dataset.train_feat_indices.shape[1]
        self.feature_size = dataset.feature_size
        self.n_users = dataset.n_users
        self.n_items = dataset.n_items
        self.global_mean = dataset.global_mean
        self.total_items_unique = self.item_info
        if dataset.lower_upper_bound is not None:
            self.lower_bound = dataset.lower_upper_bound[0]
            self.upper_bound = dataset.lower_upper_bound[1]
        else:
            self.lower_bound = None
            self.upper_bound = None

        self.feature_indices = tf.placeholder(tf.int32, shape=[None, self.field_size], name="indices")
        self.feature_values = tf.placeholder(tf.float32, shape=[None, self.field_size], name="values")
        self.labels = tf.placeholder(tf.float32, shape=[None])

        self.w = tf.Variable(tf.truncated_normal([self.feature_size + 1, 1], 0.0, 0.01))  # feature_size + 1####
        self.v = tf.Variable(tf.truncated_normal([self.feature_size + 1, self.n_factors], 0.0, 0.01))
        self.feature_values_reshape = tf.reshape(self.feature_values, shape=[-1, self.field_size, 1])

        self.linear_embedding = tf.nn.embedding_lookup(self.w, self.feature_indices)   # N * F * 1
        self.linear_term = tf.reduce_sum(tf.multiply(self.linear_embedding, self.feature_values_reshape), 2)

        self.feature_embedding = tf.nn.embedding_lookup(self.v, self.feature_indices)  # N * F * K
        self.feature_embedding = tf.multiply(self.feature_embedding, self.feature_values_reshape)

        self.pairwise_term = 0.5 * tf.subtract(
            tf.square(tf.reduce_sum(self.feature_embedding, axis=2)), # axis=1 ?
            tf.reduce_sum(tf.square(self.feature_embedding), axis=2))

        self.concat = tf.concat([self.linear_term, self.pairwise_term], axis=1)

        if self.task == "rating":
            self.pred = tf.layers.dense(inputs=self.concat, units=1, name="pred")
            self.loss = tf.losses.mean_squared_error(labels=tf.reshape(self.labels, [-1, 1]),
                                                     predictions=self.pred)

            if self.lower_bound is not None and self.upper_bound is not None:
                self.rmse = tf.sqrt(tf.losses.mean_squared_error(abels=tf.reshape(self.labels, [-1, 1]),
                                predictions=tf.clip_by_value(self.pred, self.lower_bound, self.upper_bound)))
            else:
                self.rmse = self.loss

        #    reg_w = self.reg * tf.nn.l2_loss(self.w)
            reg_v = self.reg * tf.nn.l2_loss(self.v)
            self.total_loss = tf.add_n([self.loss, reg_v])

        elif self.task == "ranking":
            self.logits = tf.layers.dense(inputs=self.concat, units=1, name="logits")
            self.logits = tf.reshape(self.logits, [-1])
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels, logits=self.logits))

            self.y_prob = tf.sigmoid(self.logits, name="prob")
            self.pred = tf.where(self.y_prob >= 0.5,
                                 tf.fill(tf.shape(self.logits), 1.0),
                                 tf.fill(tf.shape(self.logits), 0.0), name="pred")
            self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.pred, self.labels), tf.float32))
            self.precision = precision_tf(self.pred, self.labels)

        #    reg_w = self.reg * tf.nn.l2_loss(self.w)
            reg_v = self.reg * tf.nn.l2_loss(self.v)
            self.total_loss = tf.add_n([self.loss, reg_v])
def main():
  # Get hyperparameters
  if FLAGS.enable_colored_log:
    import coloredlogs
    coloredlogs.install()
  logging.basicConfig(level=logging.INFO)
  INPUT_FILE_FORMAT = FLAGS.input_file_format
  if INPUT_FILE_FORMAT not in ["tfrecord", "csv"]:
    logging.error("Unknow input file format: {}".format(INPUT_FILE_FORMAT))
    exit(1)
  FEATURE_SIZE = FLAGS.feature_size
  LABEL_SIZE = FLAGS.label_size
  EPOCH_NUMBER = FLAGS.epoch_number
  if EPOCH_NUMBER <= 0:
    EPOCH_NUMBER = None
  BATCH_THREAD_NUMBER = FLAGS.batch_thread_number
  MIN_AFTER_DEQUEUE = FLAGS.min_after_dequeue
  BATCH_CAPACITY = BATCH_THREAD_NUMBER * FLAGS.batch_size + MIN_AFTER_DEQUEUE
  MODE = FLAGS.mode
  MODEL = FLAGS.model
  CHECKPOINT_PATH = FLAGS.checkpoint_path
  if not CHECKPOINT_PATH.startswith("fds://") and not os.path.exists(
      CHECKPOINT_PATH):
    os.makedirs(CHECKPOINT_PATH)
  CHECKPOINT_FILE = CHECKPOINT_PATH + "/checkpoint.ckpt"
  LATEST_CHECKPOINT = tf.train.latest_checkpoint(CHECKPOINT_PATH)
  OUTPUT_PATH = FLAGS.output_path
  if not OUTPUT_PATH.startswith("fds://") and not os.path.exists(OUTPUT_PATH):
    os.makedirs(OUTPUT_PATH)
  pprint.PrettyPrinter().pprint(FLAGS.__flags)

  # Process TFRecoreds files
  def read_and_decode_tfrecord(filename_queue):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(
        serialized_example,
        features={
            "label": tf.FixedLenFeature([], tf.float32),
            "features": tf.FixedLenFeature([FEATURE_SIZE], tf.float32),
        })
    label = features["label"]
    features = features["features"]
    return label, features

  def read_and_decode_csv(filename_queue):
    # TODO: Not generic for all datasets
    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)

    # Default values, in case of empty columns. Also specifies the type of the
    # decoded result.
    #record_defaults = [[1], [1], [1], [1], [1]]
    record_defaults = [[1], [1.0], [1.0], [1.0], [1.0]]
    col1, col2, col3, col4, col5 = tf.decode_csv(
        value, record_defaults=record_defaults)
    label = col1
    features = tf.stack([col2, col3, col4, col4])
    return label, features

  # Read TFRecords files for training
  filename_queue = tf.train.string_input_producer(
      tf.train.match_filenames_once(FLAGS.train_file), num_epochs=EPOCH_NUMBER)
  if INPUT_FILE_FORMAT == "tfrecord":
    label, features = read_and_decode_tfrecord(filename_queue)
  elif INPUT_FILE_FORMAT == "csv":
    label, features = read_and_decode_csv(filename_queue)
  batch_labels, batch_features = tf.train.shuffle_batch(
      [label, features],
      batch_size=FLAGS.batch_size,
      num_threads=BATCH_THREAD_NUMBER,
      capacity=BATCH_CAPACITY,
      min_after_dequeue=MIN_AFTER_DEQUEUE)

  # Read TFRecords file for validatioin
  validate_filename_queue = tf.train.string_input_producer(
      tf.train.match_filenames_once(FLAGS.validate_file),
      num_epochs=EPOCH_NUMBER)
  if INPUT_FILE_FORMAT == "tfrecord":
    validate_label, validate_features = read_and_decode_tfrecord(
        validate_filename_queue)
  elif INPUT_FILE_FORMAT == "csv":
    validate_label, validate_features = read_and_decode_csv(
        validate_filename_queue)
  validate_batch_labels, validate_batch_features = tf.train.shuffle_batch(
      [validate_label, validate_features],
      batch_size=FLAGS.validate_batch_size,
      num_threads=BATCH_THREAD_NUMBER,
      capacity=BATCH_CAPACITY,
      min_after_dequeue=MIN_AFTER_DEQUEUE)

  # Define the model
  input_units = FEATURE_SIZE
  output_units = LABEL_SIZE
  model_network_hidden_units = [int(i) for i in FLAGS.model_network.split()]

  def full_connect(inputs, weights_shape, biases_shape, is_train=True):
    weights = tf.get_variable(
        "weights", weights_shape, initializer=tf.random_normal_initializer())
    biases = tf.get_variable(
        "biases", biases_shape, initializer=tf.random_normal_initializer())
    layer = tf.matmul(inputs, weights) + biases

    if FLAGS.enable_bn and is_train:
      mean, var = tf.nn.moments(layer, axes=[0])
      scale = tf.get_variable(
          "scale", biases_shape, initializer=tf.random_normal_initializer())
      shift = tf.get_variable(
          "shift", biases_shape, initializer=tf.random_normal_initializer())
      layer = tf.nn.batch_normalization(layer, mean, var, shift, scale,
                                        FLAGS.bn_epsilon)
    return layer

  def full_connect_relu(inputs, weights_shape, biases_shape, is_train=True):
    layer = full_connect(inputs, weights_shape, biases_shape, is_train)
    layer = tf.nn.relu(layer)
    return layer

  def customized_inference(inputs, is_train=True):
    hidden1_units = 128
    hidden2_units = 32
    hidden3_units = 8

    with tf.variable_scope("input"):
      layer = full_connect_relu(inputs, [input_units, hidden1_units],
                                [hidden1_units], is_train)
    with tf.variable_scope("layer0"):
      layer = full_connect_relu(layer, [hidden1_units, hidden2_units],
                                [hidden2_units], is_train)
    with tf.variable_scope("layer1"):
      layer = full_connect_relu(layer, [hidden2_units, hidden3_units],
                                [hidden3_units], is_train)
    if FLAGS.enable_dropout and is_train:
      layer = tf.nn.dropout(layer, FLAGS.dropout_keep_prob)
    with tf.variable_scope("output"):
      layer = full_connect(layer, [hidden3_units, output_units],
                           [output_units], is_train)
    return layer

  def dnn_inference(inputs, is_train=True):
    with tf.variable_scope("input"):
      layer = full_connect_relu(inputs,
                                [input_units, model_network_hidden_units[0]],
                                [model_network_hidden_units[0]], is_train)

    for i in range(len(model_network_hidden_units) - 1):
      with tf.variable_scope("layer{}".format(i)):
        layer = full_connect_relu(layer, [
            model_network_hidden_units[i], model_network_hidden_units[i + 1]
        ], [model_network_hidden_units[i + 1]], is_train)

    with tf.variable_scope("output"):
      layer = full_connect(layer,
                           [model_network_hidden_units[-1],
                            output_units], [output_units], is_train)
    return layer

  def lr_inference(inputs, is_train=True):
    with tf.variable_scope("lr"):
      layer = full_connect(inputs, [input_units, output_units], [output_units])
    return layer

  def wide_and_deep_inference(inputs, is_train=True):
    return lr_inference(inputs, is_train) + dnn_inference(inputs, is_train)

  def cnn_inference(inputs, is_train=True):
    # TODO: Change if validate_batch_size is different
    # [BATCH_SIZE, 512 * 512 * 1] -> [BATCH_SIZE, 512, 512, 1]
    inputs = tf.reshape(inputs, [FLAGS.batch_size, 512, 512, 1])

    # [BATCH_SIZE, 512, 512, 1] -> [BATCH_SIZE, 128, 128, 8]
    with tf.variable_scope("conv0"):
      weights = tf.get_variable(
          "weights", [3, 3, 1, 8], initializer=tf.random_normal_initializer())
      bias = tf.get_variable(
          "bias", [8], initializer=tf.random_normal_initializer())

      layer = tf.nn.conv2d(
          inputs, weights, strides=[1, 1, 1, 1], padding="SAME")
      layer = tf.nn.bias_add(layer, bias)
      layer = tf.nn.relu(layer)
      layer = tf.nn.max_pool(
          layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME")

    # [BATCH_SIZE, 128, 128, 8] -> [BATCH_SIZE, 32, 32, 8]
    with tf.variable_scope("conv1"):
      weights = tf.get_variable(
          "weights", [3, 3, 8, 8], initializer=tf.random_normal_initializer())
      bias = tf.get_variable(
          "bias", [8], initializer=tf.random_normal_initializer())

      layer = tf.nn.conv2d(
          layer, weights, strides=[1, 1, 1, 1], padding="SAME")
      layer = tf.nn.bias_add(layer, bias)
      layer = tf.nn.relu(layer)
      layer = tf.nn.max_pool(
          layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME")

    # [BATCH_SIZE, 32, 32, 8] -> [BATCH_SIZE, 8, 8, 8]
    with tf.variable_scope("conv2"):
      weights = tf.get_variable(
          "weights", [3, 3, 8, 8], initializer=tf.random_normal_initializer())
      bias = tf.get_variable(
          "bias", [8], initializer=tf.random_normal_initializer())

      layer = tf.nn.conv2d(
          layer, weights, strides=[1, 1, 1, 1], padding="SAME")
      layer = tf.nn.bias_add(layer, bias)
      layer = tf.nn.relu(layer)
      layer = tf.nn.max_pool(
          layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME")

    # [BATCH_SIZE, 8, 8, 8] -> [BATCH_SIZE, 8 * 8 * 8]
    layer = tf.reshape(layer, [-1, 8 * 8 * 8])

    # [BATCH_SIZE, 8 * 8 * 8] -> [BATCH_SIZE, LABEL_SIZE]
    with tf.variable_scope("output"):
      weights = tf.get_variable(
          "weights", [8 * 8 * 8, LABEL_SIZE],
          initializer=tf.random_normal_initializer())
      bias = tf.get_variable(
          "bias", [LABEL_SIZE], initializer=tf.random_normal_initializer())
      layer = tf.add(tf.matmul(layer, weights), bias)

    return layer

  def inference(inputs, is_train=True):
    if MODEL == "dnn":
      return dnn_inference(inputs, is_train)
    elif MODEL == "lr":
      return lr_inference(inputs, is_train)
    elif MODEL == "wide_and_deep":
      return wide_and_deep_inference(inputs, is_train)
    elif MODEL == "customized":
      return customized_inference(inputs, is_train)
    elif MODEL == "cnn":
      return cnn_inference(inputs, is_train)
    else:
      logging.error("Unknown model, exit now")
      exit(1)

  logging.info("Use the model: {}, model network: {}".format(
      MODEL, FLAGS.model_network))
  logits = inference(batch_features, True)
  batch_labels = tf.to_int64(batch_labels)
  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
      logits=logits, labels=batch_labels)
  loss = tf.reduce_mean(cross_entropy, name="loss")
  global_step = tf.Variable(0, name="global_step", trainable=False)
  if FLAGS.enable_lr_decay:
    logging.info(
        "Enable learning rate decay rate: {}".format(FLAGS.lr_decay_rate))
    starter_learning_rate = FLAGS.learning_rate
    learning_rate = tf.train.exponential_decay(
        starter_learning_rate,
        global_step,
        100000,
        FLAGS.lr_decay_rate,
        staircase=True)
  else:
    learning_rate = FLAGS.learning_rate
  optimizer = get_optimizer(FLAGS.optimizer, learning_rate)
  train_op = optimizer.minimize(loss, global_step=global_step)
  tf.get_variable_scope().reuse_variables()

  # Define accuracy op for train data
  train_accuracy_logits = inference(batch_features, False)
  train_softmax = tf.nn.softmax(train_accuracy_logits)
  train_correct_prediction = tf.equal(
      tf.argmax(train_softmax, 1), batch_labels)
  train_accuracy = tf.reduce_mean(
      tf.cast(train_correct_prediction, tf.float32))

  # Define auc op for train data
  batch_labels = tf.cast(batch_labels, tf.int32)
  sparse_labels = tf.reshape(batch_labels, [-1, 1])
  derived_size = tf.shape(batch_labels)[0]
  indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1])
  concated = tf.concat(axis=1, values=[indices, sparse_labels])
  outshape = tf.stack([derived_size, LABEL_SIZE])
  new_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0)
  _, train_auc = tf.contrib.metrics.streaming_auc(train_softmax,
                                                  new_batch_labels)

  # Define accuracy op for validate data
  validate_accuracy_logits = inference(validate_batch_features, False)
  validate_softmax = tf.nn.softmax(validate_accuracy_logits)
  validate_batch_labels = tf.to_int64(validate_batch_labels)
  validate_correct_prediction = tf.equal(
      tf.argmax(validate_softmax, 1), validate_batch_labels)
  validate_accuracy = tf.reduce_mean(
      tf.cast(validate_correct_prediction, tf.float32))

  # Define auc op for validate data
  validate_batch_labels = tf.cast(validate_batch_labels, tf.int32)
  sparse_labels = tf.reshape(validate_batch_labels, [-1, 1])
  derived_size = tf.shape(validate_batch_labels)[0]
  indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1])
  concated = tf.concat(axis=1, values=[indices, sparse_labels])
  outshape = tf.stack([derived_size, LABEL_SIZE])
  new_validate_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0)
  _, validate_auc = tf.contrib.metrics.streaming_auc(validate_softmax,
                                                     new_validate_batch_labels)

  # Define inference op
  inference_features = tf.placeholder("float", [None, FEATURE_SIZE])
  inference_logits = inference(inference_features, False)
  inference_softmax = tf.nn.softmax(inference_logits)
  inference_op = tf.argmax(inference_softmax, 1)
  keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1])
  keys = tf.identity(keys_placeholder)
  model_signature = {
      "inputs":
      exporter.generic_signature({
          "keys": keys_placeholder,
          "features": inference_features
      }),
      "outputs":
      exporter.generic_signature({
          "keys": keys,
          "softmax": inference_softmax,
          "prediction": inference_op
      })
  }

  # Initialize saver and summary
  saver = tf.train.Saver()
  tf.summary.scalar("loss", loss)
  tf.summary.scalar("train_accuracy", train_accuracy)
  tf.summary.scalar("train_auc", train_auc)
  tf.summary.scalar("validate_accuracy", validate_accuracy)
  tf.summary.scalar("validate_auc", validate_auc)
  summary_op = tf.summary.merge_all()
  init_op = [
      tf.global_variables_initializer(),
      tf.local_variables_initializer()
  ]

  # Create session to run
  with tf.Session() as sess:
    logging.info("Start to run with mode: {}".format(MODE))
    writer = tf.summary.FileWriter(OUTPUT_PATH, sess.graph)
    sess.run(init_op)

    if MODE == "train":
      # Restore session and start queue runner
      restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT)
      coord = tf.train.Coordinator()
      threads = tf.train.start_queue_runners(coord=coord, sess=sess)
      start_time = datetime.datetime.now()

      try:
        while not coord.should_stop():
          if FLAGS.benchmark_mode:
            sess.run(train_op)
          else:
            _, step = sess.run([train_op, global_step])

            # Print state while training
            if step % FLAGS.steps_to_validate == 0:
              loss_value, train_accuracy_value, train_auc_value, validate_accuracy_value, validate_auc_value, summary_value = sess.run(
                  [
                      loss, train_accuracy, train_auc, validate_accuracy,
                      validate_auc, summary_op
                  ])
              end_time = datetime.datetime.now()
              logging.info(
                  "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}".
                  format(end_time - start_time, step, loss_value,
                         train_accuracy_value, train_auc_value,
                         validate_accuracy_value, validate_auc_value))
              writer.add_summary(summary_value, step)
              saver.save(sess, CHECKPOINT_FILE, global_step=step)
              start_time = end_time
      except tf.errors.OutOfRangeError:
        if FLAGS.benchmark_mode:
          print("Finish training for benchmark")
          exit(0)
        else:
          # Export the model after training
          export_model(sess, saver, model_signature, FLAGS.model_path,
                       FLAGS.model_version)
      finally:
        coord.request_stop()
      coord.join(threads)

    elif MODE == "export":
      if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT):
        logging.error("No checkpoint found, exit now")
        exit(1)

      # Export the model
      export_model(sess, saver, model_signature, FLAGS.model_path,
                   FLAGS.model_version)

    elif MODE == "savedmodel":
      if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT):
        logging.error("No checkpoint found, exit now")
        exit(1)

      logging.info(
          "Export the saved model to {}".format(FLAGS.saved_model_path))
      export_path_base = FLAGS.saved_model_path
      export_path = os.path.join(
          compat.as_bytes(export_path_base),
          compat.as_bytes(str(FLAGS.model_version)))

      model_signature = signature_def_utils.build_signature_def(
          inputs={
              "keys": utils.build_tensor_info(keys_placeholder),
              "features": utils.build_tensor_info(inference_features)
          },
          outputs={
              "keys": utils.build_tensor_info(keys),
              "softmax": utils.build_tensor_info(inference_softmax),
              "prediction": utils.build_tensor_info(inference_op)
          },
          method_name=signature_constants.PREDICT_METHOD_NAME)

      try:
        builder = saved_model_builder.SavedModelBuilder(export_path)
        builder.add_meta_graph_and_variables(
            sess,
            [tag_constants.SERVING],
            clear_devices=True,
            signature_def_map={
                signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                model_signature,
            },
            #legacy_init_op=legacy_init_op)
            legacy_init_op=tf.group(
                tf.initialize_all_tables(), name="legacy_init_op"))

        builder.save()
      except Exception as e:
        logging.error("Fail to export saved model, exception: {}".format(e))

    elif MODE == "inference":
      if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT):
        logging.error("No checkpoint found, exit now")
        exit(1)

      # Load inference test data
      inference_result_file_name = FLAGS.inference_result_file
      inference_test_file_name = FLAGS.inference_test_file
      inference_data = np.genfromtxt(inference_test_file_name, delimiter=",")
      inference_data_features = inference_data[:, 0:9]
      inference_data_labels = inference_data[:, 9]

      # Run inference
      start_time = datetime.datetime.now()
      prediction, prediction_softmax = sess.run(
          [inference_op, inference_softmax],
          feed_dict={inference_features: inference_data_features})
      end_time = datetime.datetime.now()

      # Compute accuracy
      label_number = len(inference_data_labels)
      correct_label_number = 0
      for i in range(label_number):
        if inference_data_labels[i] == prediction[i]:
          correct_label_number += 1
      accuracy = float(correct_label_number) / label_number

      # Compute auc
      y_true = np.array(inference_data_labels)
      y_score = prediction_softmax[:, 1]
      fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score, pos_label=1)
      auc = metrics.auc(fpr, tpr)
      logging.info("[{}] Inference accuracy: {}, auc: {}".format(
          end_time - start_time, accuracy, auc))

      # Save result into the file
      np.savetxt(inference_result_file_name, prediction_softmax, delimiter=",")
      logging.info(
          "Save result to file: {}".format(inference_result_file_name))
def detect_video(Yolo,
                 video_path,
                 output_path,
                 input_size=416,
                 show=False,
                 CLASSES=YOLO_COCO_CLASSES,
                 score_threshold=0.3,
                 iou_threshold=0.45,
                 rectangle_colors=''):
    times, times_2 = [], []
    vid = cv2.VideoCapture(video_path)

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps,
                          (width, height))  # output_path must be .mp4

    while True:
        _, img = vid.read()

        try:
            original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        except:
            break

        image_data = image_preprocess(np.copy(original_image),
                                      [input_size, input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if YOLO_FRAMEWORK == "tf":
            pred_bbox = Yolo.predict(image_data)
        elif YOLO_FRAMEWORK == "trt":
            batched_input = tf.constant(image_data)
            result = Yolo(batched_input)
            pred_bbox = []
            for key, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)

        t2 = time.time()

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_image, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        image = draw_bbox(original_image,
                          bboxes,
                          CLASSES=CLASSES,
                          rectangle_colors=rectangle_colors)

        t3 = time.time()
        times.append(t2 - t1)
        times_2.append(t3 - t1)

        times = times[-20:]
        times_2 = times_2[-20:]

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(times_2) / len(times_2) * 1000)

        image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        # CreateXMLfile("XML_Detections", str(int(time.time())), original_image, bboxes, read_class_names(CLASSES))

        print(
            "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(
                ms, fps, fps2))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)
            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
Example #46
0
def multi_modal_network_fp(dim_input=27, dim_output=7, batch_size=25, network_config=None):
    """
    An example a network in tf that has both state and image inputs, with the feature
    point architecture (spatial softmax + expectation).
    Args:
        dim_input: Dimensionality of input.
        dim_output: Dimensionality of the output.
        batch_size: Batch size.
        network_config: dictionary of network structure parameters
    Returns:
        A tfMap object that stores inputs, outputs, and scalar loss.
    """
    n_layers = 3
    layer_size = 20
    dim_hidden = (n_layers - 1)*[layer_size]
    dim_hidden.append(dim_output)
    pool_size = 2
    filter_size = 5

    # List of indices for state (vector) data and image (tensor) data in observation.
    x_idx, img_idx, i = [], [], 0
    for sensor in network_config['obs_include']:
        dim = network_config['sensor_dims'][sensor]
        if sensor in network_config['obs_image_data']:
            img_idx = img_idx + list(range(i, i+dim))
        else:
            x_idx = x_idx + list(range(i, i+dim))
        i += dim

    nn_input, action, precision = get_input_layer(dim_input, dim_output)

    state_input = nn_input[:, 0:x_idx[-1]+1]
    image_input = nn_input[:, x_idx[-1]+1:img_idx[-1]+1]

    # image goes through 3 convnet layers
    num_filters = network_config['num_filters']

    im_height = network_config['image_height']
    im_width = network_config['image_width']
    num_channels = network_config['image_channels']
    image_input = tf.reshape(image_input, [-1, num_channels, im_width, im_height])
    image_input = tf.transpose(image_input, perm=[0,3,2,1])

    # we pool twice, each time reducing the image size by a factor of 2.
    conv_out_size = int(im_width/(2.0*pool_size)*im_height/(2.0*pool_size)*num_filters[1])
    first_dense_size = conv_out_size + len(x_idx)

    # Store layers weight & bias
    with tf.variable_scope('conv_params'):
        weights = {
            'wc1': init_weights([filter_size, filter_size, num_channels, num_filters[0]], name='wc1'), # 5x5 conv, 1 input, 32 outputs
            'wc2': init_weights([filter_size, filter_size, num_filters[0], num_filters[1]], name='wc2'), # 5x5 conv, 32 inputs, 64 outputs
            'wc3': init_weights([filter_size, filter_size, num_filters[1], num_filters[2]], name='wc3'), # 5x5 conv, 32 inputs, 64 outputs
        }

        biases = {
            'bc1': init_bias([num_filters[0]], name='bc1'),
            'bc2': init_bias([num_filters[1]], name='bc2'),
            'bc3': init_bias([num_filters[2]], name='bc3'),
        }

    conv_layer_0 = conv2d(img=image_input, w=weights['wc1'], b=biases['bc1'], strides=[1,2,2,1])
    conv_layer_1 = conv2d(img=conv_layer_0, w=weights['wc2'], b=biases['bc2'])
    conv_layer_2 = conv2d(img=conv_layer_1, w=weights['wc3'], b=biases['bc3'])

    _, num_rows, num_cols, num_fp = conv_layer_2.get_shape()
    num_rows, num_cols, num_fp = [int(x) for x in [num_rows, num_cols, num_fp]]
    x_map = np.empty([num_rows, num_cols], np.float32)
    y_map = np.empty([num_rows, num_cols], np.float32)

    for i in range(num_rows):
        for j in range(num_cols):
            x_map[i, j] = (i - num_rows / 2.0) / num_rows
            y_map[i, j] = (j - num_cols / 2.0) / num_cols

    x_map = tf.convert_to_tensor(x_map)
    y_map = tf.convert_to_tensor(y_map)

    x_map = tf.reshape(x_map, [num_rows * num_cols])
    y_map = tf.reshape(y_map, [num_rows * num_cols])

    # rearrange features to be [batch_size, num_fp, num_rows, num_cols]
    features = tf.reshape(tf.transpose(conv_layer_2, [0,3,1,2]),
                          [-1, num_rows*num_cols])
    softmax = tf.nn.softmax(features)

    fp_x = tf.reduce_sum(tf.multiply(x_map, softmax), [1], keep_dims=True)
    fp_y = tf.reduce_sum(tf.multiply(y_map, softmax), [1], keep_dims=True)

    fp = tf.reshape(tf.concat(axis=1, values=[fp_x, fp_y]), [-1, num_fp*2])

    fc_input = tf.concat(axis=1, values=[fp, state_input])

    fc_output, weights_FC, biases_FC = get_mlp_layers(fc_input, n_layers, dim_hidden)
    fc_vars = weights_FC + biases_FC

    loss = euclidean_loss_layer(a=action, b=fc_output, precision=precision, batch_size=batch_size)
    nnet = TfMap.init_from_lists([nn_input, action, precision], [fc_output], [loss], fp=fp)
    last_conv_vars = fc_input

    return nnet, fc_vars, last_conv_vars
Example #47
0
b_conv2 = bias_variable([12])
h_conv2 = tf.nn.relu(conv2d(h_pool1_drop, W_conv2) + b_conv2) 
h_pool2 = max_pool_4x4(h_conv2)
#h_pool2_drop = tf.nn.dropout(h_pool2, keep_prob)


W_fc1 = weight_variable([3*3*12 + 49, 30])
b_fc1 = bias_variable([30])


h_flat = tf.reshape(h_pool2, [-1,3*3*12])
#h_flat_drop = tf.nn.dropout(h_flat, keep_prob)
#h_flat_sigmoid = tf.nn.sigmoid(h_flat)

features = tf.placeholder(tf.float32,  [None,49])
h_flat_features = tf.concat([h_flat,features],1)
h_flat_features_drop = tf.nn.dropout(h_flat_features, 0.8)


h_fc1 = tf.nn.relu(tf.matmul(h_flat_features, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

#W_fc2 = weight_variable([30, 15])
#b_fc2 = bias_variable([15])
#h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
#h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob)


W_fc3 = weight_variable([30, 1]) 
b_fc3 = bias_variable_out([1])
def detect_video_bgs(Yolo,
                     video_path,
                     output_path,
                     log_path,
                     input_size=416,
                     show=False,
                     CLASSES=YOLO_COCO_CLASSES,
                     score_threshold=0.3,
                     iou_threshold=0.45,
                     rectangle_colors='',
                     draw_roi=False,
                     zoom=0,
                     show_diver=True):

    times, times_2 = [], []
    vid = cv2.VideoCapture(video_path)

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps,
                          (width, height))  # output_path must be .mp4

    LOW = np.array([80, 0, 200])
    HIGH = np.array([255, 110, 255])

    log = pd.DataFrame(columns=[
        "vis_px", "vis_px_pc", "total_px", "total_px_pc", "diff", "diff_pc"
    ])
    while True:
        _, img = vid.read()
        try:
            original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        except:
            break

        image_data = image_preprocess(np.copy(original_image),
                                      [input_size, input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        pred_bbox = Yolo.predict(image_data)
        t2 = time.time()

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)
        bboxes = postprocess_boxes(pred_bbox, original_image, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        #Countour BGS:
        hsv = cv2.cvtColor(original_image, cv2.COLOR_BGR2HSV)
        # mask image
        fgMask = cv2.inRange(hsv, LOW, HIGH)

        #(x1, y1), (x2, y2) = (bboxes[0], bboxes[1]), (bboxes[2], bboxes[3])
        splash_boxes = [
            i for i in bboxes if CLASS_INDECES[int(i[5])] == "splash"
        ]

        if splash_boxes:
            splash_x_min, splash_y_min, splash_x_max, splash_y_max = splash_bbox_roi(
                splash_boxes=splash_boxes, zoom=zoom)

            #normal_image:
            number_of_white_pix = np.sum(fgMask == 255)
            number_total_pix = fgMask.shape[0] * fgMask.shape[1]
            print("Normal_image: Number of white pixels: {} ({}%)".format(
                number_of_white_pix,
                round((number_of_white_pix / number_total_pix) * 100), 2))

            #splash_roi:
            splash_roi = fgMask[splash_y_min:splash_y_max,
                                splash_x_min:splash_x_max]
            roi_number_of_white_pix = np.sum(splash_roi == 255)
            # roi_number_total_pix = splash_roi.shape[0]*splash_roi.shape[1]
            print("Roi: Number of white pixels: {} ({}%)".format(
                roi_number_of_white_pix,
                round((roi_number_of_white_pix / number_total_pix) * 100), 2))

            pixel_diff = abs(roi_number_of_white_pix - number_of_white_pix)

            image = cv2.cvtColor(fgMask, cv2.COLOR_GRAY2RGB)

            if draw_roi:
                # image = draw_bbox(image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors)
                #splash_x_min,splash_y_min,splash_x_max,splash_y_max
                image = cv2.rectangle(image, (splash_x_min, splash_y_min),
                                      (splash_x_max, splash_y_max),
                                      (255, 0, 0), 2)

            else:
                # create mask and apply
                mask = np.zeros(image.shape[:2], dtype="uint8")
                cv2.rectangle(mask, (splash_x_min, splash_y_min),
                              (splash_x_max, splash_y_max), 255, -1)
                masked = cv2.bitwise_and(image, image, mask=mask)

                image = masked

            #Recolor
            image = recolor_bw(image, splash_red=True)

            #Calcs
            vis_px_pc = round(
                (roi_number_of_white_pix / number_total_pix) * 100, 2)
            total_px_pc = round((number_of_white_pix / number_total_pix) * 100,
                                2)
            diff_pc = round(
                (roi_number_of_white_pix / number_of_white_pix) * 100, 2)

            image = cv2.putText(
                image,
                "Vis. PXs (roi): {} ({}%) Total wPXs: {} ({}%) Diff: {} ({}%) "
                .format(roi_number_of_white_pix, vis_px_pc,
                        number_of_white_pix, total_px_pc, pixel_diff, diff_pc),
                (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.7, (0, 0, 255), 1)
            # Create logs:
            log = log.append(
                {
                    "vis_px": roi_number_of_white_pix,
                    "vis_px_pc": vis_px_pc,
                    "total_px": number_of_white_pix,
                    "total_px_pc": total_px_pc,
                    "diff": pixel_diff,
                    "diff_pc": diff_pc
                },
                ignore_index=True)

        else:
            if not show_diver:
                #No splash and no diver should be shown.
                image = np.zeros(original_image.shape[:2], dtype="uint8")
                image = recolor_bw(image, splash_red=False)

            else:
                image = draw_bbox(original_image,
                                  bboxes,
                                  CLASSES=CLASSES,
                                  rectangle_colors=rectangle_colors)

        t3 = time.time()
        times.append(t2 - t1)
        times_2.append(t3 - t1)

        times = times[-20:]
        times_2 = times_2[-20:]

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(times_2) / len(times_2) * 1000)

        # image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,
        #                     (0, 0, 255), 2)

        # CreateXMLfile("XML_Detections", str(int(time.time())), original_image, bboxes, read_class_names(CLASSES))

        print(
            "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(
                ms, fps, fps2))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)
            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    log.to_csv(log_path)
    def _build(self,
               inputs,
               order='btu',
               medium=None,
               sequence_length_major=None,
               sequence_length_minor=None,
               **kwargs):
        """Encodes the inputs.

        Args:
            inputs: A 4-D tensor of shape `[B, T, U, dim]`, where

                - B: batch_size
                - T: the max length of high-level sequences. E.g., the max \
                number of utterances in dialog history.
                - U: the max length of low-level sequences. E.g., the max \
                length of each utterance in dialog history.
                - dim: embedding dimension

                The order of first three dimensions can be changed
                according to :attr:`order`.

            order: A 3-char string containing 'b', 't', and 'u',
                that specifies the order of inputs dimensions above.
                Following four can be accepted:

                    - **'btu'**: None of the encoders are time-major.
                    - **'utb'**: Both encoders are time-major.
                    - **'tbu'**: The major encoder is time-major.
                    - **'ubt'**: The minor encoder is time-major.

            medium (optional): A list of callables that subsequently process the
                final states of minor encoder and obtain the inputs
                for the major encoder.
                If not specified, :meth:`flatten` is used for processing
                the minor's final states.
            sequence_length_major (optional): The `sequence_length` argument
                sent to major encoder. This is a 1-D Tensor of shape
                `[B]`.
            sequence_length_minor (optional): The `sequence_length` argument
                sent to minor encoder. It can be either a 1-D Tensor of shape
                `[B*T]`, or a 2-D Tensor of shape `[B, T]` or `[T, B]`
                according to :attr:`order`.
            **kwargs: Other keyword arguments for the major and minor encoders,
                such as `initial_state`, etc.
                Note that `sequence_length`, and `time_major`
                must not be included here.
                `time_major` is derived from :attr:`order` automatically.
                By default, arguments will be sent to both major and minor
                encoders. To specify which encoder an argument should be sent
                to, add '_minor'/'_major' as its suffix.

                Note that `initial_state_minor` must have a batch dimension
                of size `B*T`. If you have an initial state of batch dimension
                = `T`, use :meth:`tile_initial_state_minor` to tile it
                according to `order`.

        Returns:
            A tuple `(outputs, final_state)` by the major encoder.

            See
            the return values of `_build()` method of respective encoder class
            for details.
        """

        def _kwargs_split(kwargs):
            kwargs_minor, kwargs_major = {}, {}
            for k, v in kwargs.items():
                if len(k) >= 6 and k[-6:] == ['_minor']:
                    kwargs_minor[k[:-6]] = v
                if len(k) >= 6 and k[-6:] == ['_major']:
                    kwargs_major[k[:-6]] = v
            return kwargs_minor, kwargs_major

        kwargs_minor, kwargs_major = _kwargs_split(kwargs)
        if sequence_length_minor is not None:
            sequence_length_minor = tf.reshape(sequence_length_minor, [-1])
        kwargs_minor['sequence_length'] = sequence_length_minor
        kwargs_major['sequence_length'] = sequence_length_major

        expand, shape = self._get_flatten_order(
            order, kwargs_minor, kwargs_major, tf.shape(inputs))

        inputs = tf.reshape(inputs, shape + [inputs.shape[3]])

        _, states_minor = self._encoder_minor(inputs, **kwargs_minor)

        self.states_minor_before_medium = states_minor

        if medium is None:
            states_minor = self.flatten(states_minor)
        else:
            if not isinstance(medium, collections.Sequence):
                medium = [medium]
            for fn in medium:
                if isinstance(fn, str) and fn == 'flatten':
                    states_minor = self.flatten(states_minor)
                else:
                    states_minor = fn(states_minor)

        self.states_minor_after_medium = states_minor

        states_minor = tf.reshape(
            states_minor, tf.concat([expand, tf.shape(states_minor)[1:]], 0))

        outputs_major, states_major = self._encoder_major(states_minor,
                                                          **kwargs_major)

        # Add trainable variables of `self._cell` which may be constructed
        # externally
        if not self._built:
            self._add_trainable_variable(
                self._encoder_minor.trainable_variables)
            self._add_trainable_variable(
                self._encoder_major.trainable_variables)
            self._built = True

        return outputs_major, states_major
def detect_video_knn(Yolo,
                     video_path,
                     output_path,
                     input_size=416,
                     show=False,
                     CLASSES=YOLO_COCO_CLASSES,
                     score_threshold=0.3,
                     iou_threshold=0.45,
                     rectangle_colors='',
                     draw_roi=False,
                     zoom=0):

    #different background subtraction methods

    # backSub = cv2.createBackgroundSubtractorMOG2(history=500, varThreshold=40, detectShadows=False)
    backSub = cv2.createBackgroundSubtractorKNN()

    #KNN
    backSub.setDetectShadows(False)
    backSub.setDist2Threshold(13000)
    backSub.setkNNSamples(6)
    backSub.setNSamples(30)

    times, times_2 = [], []
    vid = cv2.VideoCapture(video_path)

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps,
                          (width, height))  # output_path must be .mp4
    while True:
        _, img = vid.read()
        try:
            original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        except:
            break

        image_data = image_preprocess(np.copy(original_image),
                                      [input_size, input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        pred_bbox = Yolo.predict(image_data)

        t2 = time.time()

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_image, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        fgMask = backSub.apply(original_image, learningRate=0.9)

        #(x1, y1), (x2, y2) = (bboxes[0], bboxes[1]), (bboxes[2], bboxes[3])
        splash_boxes = [
            i for i in bboxes if CLASS_INDECES[int(i[5])] == "splash"
        ]

        if splash_boxes:
            splash_x_min, splash_y_min, splash_x_max, splash_y_max = splash_bbox_roi(
                splash_boxes=splash_boxes, zoom=zoom)

            #normal_image:
            number_of_white_pix = np.sum(fgMask == 255)
            number_total_pix = fgMask.shape[0] * fgMask.shape[1]
            print("Normal_image: Number of white pixels: {} ({}%)".format(
                number_of_white_pix,
                round((number_of_white_pix / number_total_pix) * 100), 2))

            #splash_roi:
            splash_roi = fgMask[splash_y_min:splash_y_max,
                                splash_x_min:splash_x_max]
            roi_number_of_white_pix = np.sum(splash_roi == 255)
            # roi_number_total_pix = splash_roi.shape[0]*splash_roi.shape[1]
            print("Roi: Number of white pixels: {} ({}%)".format(
                roi_number_of_white_pix,
                round((roi_number_of_white_pix / number_total_pix) * 100), 2))

            pixel_diff = abs(roi_number_of_white_pix - number_of_white_pix)

            image = cv2.cvtColor(fgMask, cv2.COLOR_GRAY2RGB)

            if draw_roi:
                # image = draw_bbox(image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors)
                #splash_x_min,splash_y_min,splash_x_max,splash_y_max
                image = cv2.rectangle(image, (splash_x_min, splash_y_min),
                                      (splash_x_max, splash_y_max),
                                      (255, 0, 0), 2)

            else:
                # create mask and apply
                mask = np.zeros(image.shape[:2], dtype="uint8")
                cv2.rectangle(mask, (splash_x_min, splash_y_min),
                              (splash_x_max, splash_y_max), 255, -1)
                masked = cv2.bitwise_and(image, image, mask=mask)

                image = masked

            image = cv2.putText(
                image,
                "Vis. PXs (roi): {} ({}%) Total wPXs: {} ({}%) Diff: {} ({}%) "
                .format(
                    roi_number_of_white_pix,
                    round((roi_number_of_white_pix / number_total_pix) * 100,
                          2), number_of_white_pix,
                    round((number_of_white_pix / number_total_pix) * 100, 2),
                    pixel_diff,
                    round(
                        (roi_number_of_white_pix / number_of_white_pix) * 100,
                        2)), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.7,
                (0, 0, 255), 1)

        else:
            #TODO what todo with no splash images ?

            image = draw_bbox(original_image,
                              bboxes,
                              CLASSES=CLASSES,
                              rectangle_colors=rectangle_colors)

        t3 = time.time()
        times.append(t2 - t1)
        times_2.append(t3 - t1)

        times = times[-20:]
        times_2 = times_2[-20:]

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(times_2) / len(times_2) * 1000)

        # image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,
        #                     (0, 0, 255), 2)

        # CreateXMLfile("XML_Detections", str(int(time.time())), original_image, bboxes, read_class_names(CLASSES))

        print(
            "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(
                ms, fps, fps2))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)
            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
Example #51
0
    def call(self,
             w,
             r,
             attn_mask,
             mems,
             head_mask,
             output_attentions,
             training=False):
        qlen, rlen, bsz = shape_list(w)[0], shape_list(r)[0], shape_list(w)[1]

        if mems is not None:
            cat = tf.concat([mems, w], 0)
            if self.pre_lnorm:
                w_heads = self.qkv_net(self.layer_norm(cat))
            else:
                w_heads = self.qkv_net(cat)
            r_head_k = self.r_net(r)

            w_head_q, w_head_k, w_head_v = tf.split(w_heads, 3, axis=-1)
            w_head_q = w_head_q[-qlen:]
        else:
            if self.pre_lnorm:
                w_heads = self.qkv_net(self.layer_norm(w))
            else:
                w_heads = self.qkv_net(w)
            r_head_k = self.r_net(r)

            w_head_q, w_head_k, w_head_v = tf.split(w_heads, 3, axis=-1)

        klen = shape_list(w_head_k)[0]

        w_head_q = tf.reshape(w_head_q,
                              (qlen, bsz, self.n_head,
                               self.d_head))  # qlen x bsz x n_head x d_head
        w_head_k = tf.reshape(w_head_k,
                              (klen, bsz, self.n_head,
                               self.d_head))  # qlen x bsz x n_head x d_head
        w_head_v = tf.reshape(w_head_v,
                              (klen, bsz, self.n_head,
                               self.d_head))  # qlen x bsz x n_head x d_head

        r_head_k = tf.reshape(
            r_head_k,
            (rlen, self.n_head, self.d_head))  # qlen x n_head x d_head

        # compute attention score
        rw_head_q = w_head_q + self.r_w_bias  # qlen x bsz x n_head x d_head
        AC = tf.einsum("ibnd,jbnd->ijbn", rw_head_q,
                       w_head_k)  # qlen x klen x bsz x n_head

        rr_head_q = w_head_q + self.r_r_bias
        BD = tf.einsum("ibnd,jnd->ijbn", rr_head_q,
                       r_head_k)  # qlen x klen x bsz x n_head
        BD = self._rel_shift(BD)

        # [qlen x klen x bsz x n_head]
        attn_score = AC + BD
        attn_score = attn_score * self.scale

        # compute attention probability
        if attn_mask is not None:
            attn_mask_t = attn_mask[:, :, None, None]
            attn_score = attn_score * (1 - attn_mask_t) - 1e30 * attn_mask_t

        # [qlen x klen x bsz x n_head]
        attn_prob = tf.nn.softmax(attn_score, axis=1)
        attn_prob = self.dropatt(attn_prob, training=training)

        # Mask heads if we want to
        if head_mask is not None:
            attn_prob = attn_prob * head_mask

        # compute attention vector
        attn_vec = tf.einsum("ijbn,jbnd->ibnd", attn_prob, w_head_v)

        # [qlen x bsz x n_head x d_head]
        attn_vec_sizes = shape_list(attn_vec)
        attn_vec = tf.reshape(
            attn_vec,
            (attn_vec_sizes[0], attn_vec_sizes[1], self.n_head * self.d_head))

        # linear projection
        attn_out = self.o_net(attn_vec)
        attn_out = self.drop(attn_out, training=training)

        if self.pre_lnorm:
            # residual connection
            outputs = [w + attn_out]
        else:
            # residual connection + layer normalization
            outputs = [self.layer_norm(w + attn_out)]

        if output_attentions:
            outputs.append(attn_prob)

        return outputs
Example #52
0
def TFGAN(inputs,targets):

    traindir = os.path.join(logdir, 'GG12\\PIX2PIX_MINMAX_1024')
    if tf.gfile.Exists(traindir):
      tf.gfile.DeleteRecursively(traindir)
    tf.gfile.MakeDirs(traindir)

    # Create a GANModel tuple.
    fiber_output, fiber_input = inputs
    encoder, label = targets
    real_data = tf.concat((label,fiber_input),-1)
    #######################################################################
    ##########################  GAN MODEL #################################
    #######################################################################
    gan_model = tfgan.gan_model(
        generator_fn=generator_fn,
        discriminator_fn=pix2pix_D,
        real_data=real_data,
        generator_inputs=fiber_output,
        generator_scope='Generator',
        discriminator_scope='Discriminator')

    #######################################################################
    ##########################  GAN SUMMARY ###############################
    #######################################################################
    with tf.name_scope('Train_summary'):
      generated_data, generated_input = tf.split(gan_model.generated_data,2,-1)
      reshaped_fiber_input = get_summary_image(fiber_input, FLAGS.grid_size)
      reshaped_label = get_summary_image(label, FLAGS.grid_size)
      reshaped_generated_input = get_summary_image(generated_input, FLAGS.grid_size)
      reshaped_generated_data = get_summary_image(generated_data, FLAGS.grid_size)
      tf.summary.image('Input_Fiber', reshaped_fiber_input)
      tf.summary.image('Input_Generator', reshaped_generated_input)
      tf.summary.image('Data_Real', reshaped_label)
      tf.summary.image('Data_Generator', reshaped_generated_data)

    #######################################################################
    ##########################  GAN LOSS  #################################
    #######################################################################
    with tf.name_scope('pixel_loss'):
      pixel_loss = combine_loss(gan_model.generated_data,
                                gan_model.real_data,
                                add_summary=True)
    with tf.name_scope('gan_loss'):
      gan_loss = tfgan.gan_loss(
        gan_model,
        generator_loss_fn=tfgan.losses.modified_generator_loss,
        discriminator_loss_fn=tfgan.losses.modified_discriminator_loss,
        gradient_penalty_weight=1.0, # only in wassertein_loss
      )
      tfgan.eval.add_regularization_loss_summaries(gan_model)
    with tf.name_scope('Train_Loss'):
      gan_loss = tfgan.losses.combine_adversarial_loss(
          gan_loss, gan_model, pixel_loss,
          weight_factor=FLAGS.adversarial_loss_weight)

    #######################################################################
    ##########################   GAN OPS   ################################
    #######################################################################
    with tf.name_scope('Train_ops'):
      gen_lr = get_lr(1e-5,decay_steps=5000)
      dis_lr = get_lr(5e-5,decay_steps=5000)
      train_ops = tfgan.gan_train_ops(
          gan_model,  gan_loss,
          generator_optimizer=get_optimizer(gen_lr),
          discriminator_optimizer=get_optimizer(dis_lr),
          # summarize_gradients=False,
          # colocate_gradients_with_ops=True,
          # transform_grads_fn=tf.contrib.training.clip_gradient_norms_fn(1e3),
          # aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)
          )
      psnr = tf.reduce_mean(tf.image.psnr(generated_data, label, max_val = 1.0))
      ssim = tf.reduce_mean(tf.image.ssim(generated_data, label, max_val = 1.0))
      corr = correlation(generated_data, label)
      tf.summary.scalar('PSNR', psnr)
      tf.summary.scalar('SSIM', ssim)
      tf.summary.scalar('Relation', corr)
      tf.summary.scalar('generator_lr', gen_lr)
      # tf.summary.scalar('discriminator_lr', dis_lr)

    #######################################################################
    ##########################   GAN TRAIN   ##############################
    #######################################################################
    train_steps = tfgan.GANTrainSteps(generator_train_steps=1, discriminator_train_steps=1)
    message = tf.string_join([' Train step: ', tf.as_string(tf.train.get_or_create_global_step()),
                              '   PSNR:', tf.as_string(psnr), '   SSIM:', tf.as_string(ssim),
                              '   Correlation:', tf.as_string(corr)
                              ], name='status_message')

    tfgan.gan_train(train_ops, logdir = traindir,  get_hooks_fn=tfgan.get_joint_train_hooks(train_steps),
                    hooks=[tf.train.StopAtStepHook(num_steps=FLAGS.max_iter),
                           tf.train.LoggingTensorHook([message], every_n_iter=FLAGS.log_n_steps),
                           get_tfgan_init_fn('E:\GitHub\MMFI\log\\GG12\\CNN', 'Generator'),
                           # get_tfgan_init_fn('E:\GitHub\MMFI\log\\G2\\pix2pix_D', 'Discriminator'),
                           ],
                    save_summaries_steps = FLAGS.save_summaries_steps*2,
                    save_checkpoint_secs = FLAGS.save_interval_secs)
Example #53
0
def encoder(source, params):
    mask = tf.to_float(tf.cast(source, tf.bool))
    hidden_size = params.hidden_size

    source, mask = util.remove_invalid_seq(source, mask)

    embed_name = "embedding" if params.shared_source_target_embedding \
        else "src_embedding"
    src_emb = tf.get_variable(embed_name,
                              [params.src_vocab.size(), params.embed_size])
    src_bias = tf.get_variable("bias", [params.embed_size])

    inputs = tf.gather(src_emb, source)
    inputs = tf.nn.bias_add(inputs, src_bias)

    if util.valid_dropout(params.dropout):
        inputs = tf.nn.dropout(inputs, 1. - params.dropout)

    with tf.variable_scope("encoder"):
        x = inputs

        for layer in range(params.num_encoder_layer):
            with tf.variable_scope("layer_{}".format(layer)):
                # forward rnn
                with tf.variable_scope('forward'):
                    outputs = rnn.rnn(params.cell,
                                      x,
                                      hidden_size,
                                      mask=mask,
                                      ln=params.layer_norm,
                                      sm=params.swap_memory,
                                      dp=params.dropout)
                    output_fw, state_fw = outputs[1]
                if layer == 0:
                    # backward rnn
                    with tf.variable_scope('backward'):
                        if not params.caencoder:
                            outputs = rnn.rnn(params.cell,
                                              tf.reverse(x, [1]),
                                              hidden_size,
                                              mask=tf.reverse(mask, [1]),
                                              ln=params.layer_norm,
                                              sm=params.swap_memory,
                                              dp=params.dropout)
                            output_bw, state_bw = outputs[1]
                        else:
                            outputs = rnn.cond_rnn(params.cell,
                                                   tf.reverse(x, [1]),
                                                   tf.reverse(output_fw, [1]),
                                                   hidden_size,
                                                   mask=tf.reverse(mask, [1]),
                                                   ln=params.layer_norm,
                                                   sm=params.swap_memory,
                                                   num_heads=params.num_heads,
                                                   one2one=True)
                            output_bw, state_bw = outputs[1]

                        output_bw = tf.reverse(output_bw, [1])

                    if not params.caencoder:
                        y = tf.concat([output_fw, output_bw], -1)
                        z = tf.concat([state_fw, state_bw], -1)
                    else:
                        y = output_bw
                        z = state_bw
                else:
                    y = output_fw
                    z = state_fw

                y = func.linear(y, hidden_size, ln=False, scope="ff")

                # short cut via residual connection
                if x.get_shape()[-1].value == y.get_shape()[-1].value:
                    x = func.residual_fn(x, y, dropout=params.dropout)
                else:
                    x = y
                if params.layer_norm:
                    x = func.layer_norm(x, scope="ln")

    with tf.variable_scope("decoder_initializer"):
        decoder_cell = rnn.get_cell(params.cell,
                                    hidden_size,
                                    ln=params.layer_norm)

    return {
        "encodes": x,
        "decoder_initializer": {
            "layer_{}".format(l):
            decoder_cell.get_init_state(x=z, scope="layer_{}".format(l))
            for l in range(params.num_decoder_layer)
        },
        "mask": mask
    }
Example #54
0
    def call(
        self,
        inputs,
        mems=None,
        head_mask=None,
        inputs_embeds=None,
        output_attentions=None,
        output_hidden_states=None,
        training=False,
    ):
        if isinstance(inputs, (tuple, list)):
            input_ids = inputs[0]
            mems = inputs[1] if len(inputs) > 1 else mems
            head_mask = inputs[2] if len(inputs) > 2 else head_mask
            inputs_embeds = inputs[3] if len(inputs) > 3 else inputs_embeds
            output_attentions = inputs[4] if len(
                inputs) > 4 else output_attentions
            output_hidden_states = inputs[5] if len(
                inputs) > 4 else output_hidden_states
            assert len(inputs) <= 6, "Too many inputs."
        elif isinstance(inputs, (dict, BatchEncoding)):
            input_ids = inputs.get("input_ids")
            mems = inputs.get("mems", mems)
            head_mask = inputs.get("head_mask", head_mask)
            inputs_embeds = inputs.get("inputs_embeds", inputs_embeds)
            output_attentions = inputs.get("output_attentions",
                                           output_attentions)
            output_hidden_states = inputs.get("output_hidden_states",
                                              output_hidden_states)
            assert len(inputs) <= 6, "Too many inputs."
        else:
            input_ids = inputs

        output_attentions = output_attentions if output_attentions is not None else self.output_attentions
        output_hidden_states = output_hidden_states if output_hidden_states is not None else self.output_hidden_states

        # the original code for Transformer-XL used shapes [len, bsz] but we want a unified interface in the library
        # so we transpose here from shape [bsz, len] to shape [len, bsz]
        if input_ids is not None and inputs_embeds is not None:
            raise ValueError(
                "You cannot specify both input_ids and inputs_embeds at the same time"
            )
        elif input_ids is not None:
            input_ids = tf.transpose(input_ids, perm=(1, 0))
            qlen, bsz = shape_list(input_ids)
        elif inputs_embeds is not None:
            inputs_embeds = tf.transpose(inputs_embeds, perm=(1, 0, 2))
            qlen, bsz = shape_list(inputs_embeds)[:2]
        else:
            raise ValueError(
                "You have to specify either input_ids or inputs_embeds")

        if mems is None:
            mems = self.init_mems(bsz)

        # Prepare head mask if needed
        # 1.0 in head_mask indicate we keep the head
        # attention_probs has shape bsz x n_heads x N x N
        # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] (a head_mask for each layer)
        # and head_mask is converted to shape [num_hidden_layers x qlen x klen x bsz x n_head]
        if head_mask is not None:
            raise NotImplementedError
        else:
            head_mask = [None] * self.n_layer

        if inputs_embeds is not None:
            word_emb = inputs_embeds
        else:
            word_emb = self.word_emb(input_ids)

        mlen = shape_list(mems[0])[0] if mems is not None else 0
        klen = mlen + qlen

        attn_mask = tf.ones([qlen, qlen])
        mask_u = tf.linalg.band_part(attn_mask, 0, -1)
        mask_dia = tf.linalg.band_part(attn_mask, 0, 0)
        attn_mask_pad = tf.zeros([qlen, mlen])
        dec_attn_mask = tf.concat([attn_mask_pad, mask_u - mask_dia], 1)
        if self.same_length:
            mask_l = tf.linalg.band_part(attn_mask, -1, 0)
            dec_attn_mask = tf.concat([
                dec_attn_mask[:, :qlen] + mask_l - mask_dia,
                dec_attn_mask[:, qlen:]
            ], 1)
        # ::: PyTorch masking code for reference :::
        # if self.same_length:
        #     all_ones = word_emb.new_ones((qlen, klen), dtype=torch.uint8)
        #     mask_len = klen - self.mem_len
        #     if mask_len > 0:
        #         mask_shift_len = qlen - mask_len
        #     else:
        #         mask_shift_len = qlen
        #     dec_attn_mask = (torch.triu(all_ones, 1+mlen)
        #             + torch.tril(all_ones, -mask_shift_len))[:, :, None] # -1
        # else:
        #     dec_attn_mask = torch.triu(
        #         word_emb.new_ones((qlen, klen), dtype=torch.uint8), diagonal=1+mlen)[:,:,None]

        hids = []
        attentions = []
        if self.attn_type == 0:  # default
            pos_seq = tf.range(klen - 1, -1, -1.0)
            if self.clamp_len > 0:
                pos_seq = tf.minimum(pos_seq, self.clamp_len)
            pos_emb = self.pos_emb(pos_seq)

            core_out = self.drop(word_emb, training=training)
            pos_emb = self.drop(pos_emb, training=training)

            for i, layer in enumerate(self.layers):
                hids.append(core_out)
                mems_i = None if mems is None else mems[i]
                layer_outputs = layer(
                    core_out,
                    pos_emb,
                    dec_attn_mask,
                    mems_i,
                    head_mask[i],
                    output_attentions,
                    training=training,
                )
                core_out = layer_outputs[0]
                if output_attentions:
                    attentions.append(layer_outputs[1])
        else:  # learnable embeddings and absolute embeddings
            raise NotImplementedError  # Removed these to avoid maintaining dead code - They are not used in our pretrained checkpoint

        core_out = self.drop(core_out, training=training)

        new_mems = self._update_mems(hids, mems, mlen, qlen)

        # We transpose back here to shape [bsz, len, hidden_dim]
        outputs = [tf.transpose(core_out, perm=(1, 0, 2)), new_mems]
        if output_hidden_states:
            # Add last layer and transpose to library standard shape [bsz, len, hidden_dim]
            hids.append(core_out)
            hids = list(tf.transpose(t, perm=(1, 0, 2)) for t in hids)
            outputs.append(hids)
        if output_attentions:
            # Transpose to library standard shape [bsz, n_heads, query_seq_len, key_seq_len]
            attentions = list(
                tf.transpose(t, perm=(2, 3, 0, 1)) for t in attentions)
            outputs.append(attentions)
        return outputs  # last hidden state, new_mems, (all hidden states), (all attentions)
Example #55
0
    def __init__(self, constants_dictionary, data_dictionary):

        self.encoder_train_inp = data_dictionary['encoder_train_inp']
        self.source_embedding = data_dictionary['encoder_embedding']
        self.encoder_sequence_length = data_dictionary['encoder_sequence_length']
        encoder_emb_inp = tf.nn.embedding_lookup(self.source_embedding, self.encoder_train_inp)
        encoder_emb_inp_time_major = tf.transpose(encoder_emb_inp, perm=[1, 0, 2])
        # encoder_sequence_length = tf.placeholder(tf.int32, shape=[None, ])

        self.decoder_train_inp = data_dictionary['decoder_train_inp']
        self.target_embedding = data_dictionary['decoder_embedding']
        self.decoder_sequence_length = data_dictionary['decoder_sequence_length']
        decoder_emb_inp = tf.nn.embedding_lookup(self.target_embedding, self.decoder_train_inp)
        decoder_emb_inp_time_major = tf.transpose(decoder_emb_inp, perm=[1, 0, 2])
        # decoder_sequence_length = tf.placeholder(tf.int32, shape=[None, ])

        self.dec_train_labels = data_dictionary['dec_train_labels']
        target_train_one_hot = tf.one_hot(self.dec_train_labels, constants_dictionary['TARGET_VOCAB_SIZE'], on_value=1.0, off_value=0.0)

        # processed_input_encoder = tf.transpose(enc_emb, perm=[1, 0, 2])
        initial_hidden_encoder = tf.zeros([constants_dictionary['BATCH_SIZE'], constants_dictionary['HIDDEN_LAYER_SIZE_ENCODER']])
        projection_layer = tf.layers.Dense(constants_dictionary['TARGET_VOCAB_SIZE'], use_bias=False)

        with tf.variable_scope('encoder'):
            encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(constants_dictionary['NUM_UNITS'])
            encoder_outputs, encoder_state = tf.nn.dynamic_rnn(encoder_cell, encoder_emb_inp, sequence_length=self.encoder_sequence_length, dtype=tf.float32)

        with tf.variable_scope('decoder'):
            decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(constants_dictionary['NUM_UNITS'])
            helper = tf.contrib.seq2seq.TrainingHelper(decoder_emb_inp, self.decoder_sequence_length)
            decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, encoder_state,  output_layer=projection_layer)
            outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder)
            logits = outputs.rnn_output

        self.prediction_output = tf.argmax(tf.nn.softmax(logits), axis=2)
        self.loss_batch = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=tf.concat(axis=0, values=target_train_one_hot))
        self.loss = tf.reduce_mean(self.loss_batch)
Example #56
0
 def get_slice(data, idx, parts):
     shape = tf.shape(data)
     size = tf.concat(0, [ shape[:1]/parts, shape[1:] ])
     stride = tf.concat(0, [ shape[:1]/parts, shape[1:]*0 ])
     start = stride * idx
     return tf.slice(data, start, size)
def create_generator(generator_inputs, generator_outputs_channels):
    layers = []

    print('encoder:')
    print(generator_inputs.shape)
    # encoder_1: [batch, 256, 256, in_channels] => [batch, 128, 128, ngf]
    with tf.variable_scope("encoder_1"):
        output = gen_conv(generator_inputs, a.ngf)
        layers.append(output)
        print(output.shape)

    layer_specs = [
        a.ngf * 2, # encoder_2: [batch, 128, 128, ngf] => [batch, 64, 64, ngf * 2]
        a.ngf * 4, # encoder_3: [batch, 64, 64, ngf * 2] => [batch, 32, 32, ngf * 4]
        a.ngf * 8, # encoder_4: [batch, 32, 32, ngf * 4] => [batch, 16, 16, ngf * 8]
        a.ngf * 8, # encoder_5: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8]
        a.ngf * 8, # encoder_6: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8]
        a.ngf * 8, # encoder_7: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8]
        a.ngf * 8, # encoder_8: [batch, 2, 2, ngf * 8] => [batch, 1, 1, ngf * 8]
    ]

    for out_channels in layer_specs[:6]:
        with tf.variable_scope("encoder_%d" % (len(layers) + 1)):
            rectified = lrelu(layers[-1], 0.2)
            # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels]
            
            # orig: ---------------------------
            # convolved = gen_conv(rectified, out_channels)
            # Moha: ---------------------------
            convolved = gen_conv_dilate(rectified, out_channels)
            #convolved = gen_conv(rectified, out_channels)
            print(convolved.shape)
            # end Moha -----------------------------
            output = batchnorm(convolved)
            layers.append(output)
            
    with tf.variable_scope("encoder_%d" % (len(layers) + 1)):
            rectified = lrelu(layers[-1], 0.2)
            convolved = gen_conv(rectified, layer_specs[6])
            print(convolved.shape)
            output = batchnorm(convolved)
            layers.append(output)  

    print('decoder:')
    layer_specs = [
        (a.ngf * 8, 0.5),   # decoder_8: [batch, 1, 1, ngf * 8] => [batch, 2, 2, ngf * 8 * 2]
        (a.ngf * 8, 0.5),   # decoder_7: [batch, 2, 2, ngf * 8 * 2] => [batch, 4, 4, ngf * 8 * 2]
        (a.ngf * 8, 0.5),   # decoder_6: [batch, 4, 4, ngf * 8 * 2] => [batch, 8, 8, ngf * 8 * 2]
        (a.ngf * 8, 0.0),   # decoder_5: [batch, 8, 8, ngf * 8 * 2] => [batch, 16, 16, ngf * 8 * 2]
        (a.ngf * 4, 0.0),   # decoder_4: [batch, 16, 16, ngf * 8 * 2] => [batch, 32, 32, ngf * 4 * 2]
        (a.ngf * 2, 0.0),   # decoder_3: [batch, 32, 32, ngf * 4 * 2] => [batch, 64, 64, ngf * 2 * 2]
        (a.ngf, 0.0),       # decoder_2: [batch, 64, 64, ngf * 2 * 2] => [batch, 128, 128, ngf * 2]
    ]

    num_encoder_layers = len(layers)
    for decoder_layer, (out_channels, dropout) in enumerate(layer_specs):
        skip_layer = num_encoder_layers - decoder_layer - 1
        with tf.variable_scope("decoder_%d" % (skip_layer + 1)):
            if decoder_layer == 0:
                # first decoder layer doesn't have skip connections
                # since it is directly connected to the skip_layer
                input = layers[-1]
            else:
                input = tf.concat([layers[-1], layers[skip_layer]], axis=3)

            rectified = tf.nn.relu(input)
            # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels]
            output = gen_deconv(rectified, out_channels)
            output = batchnorm(output)

            if dropout > 0.0:
                output = tf.nn.dropout(output, keep_prob=1 - dropout)

            layers.append(output)
            print(output.shape)

    # decoder_1: [batch, 128, 128, ngf * 2] => [batch, 256, 256, generator_outputs_channels]
    with tf.variable_scope("decoder_1"):
        input = tf.concat([layers[-1], layers[0]], axis=3)
        rectified = tf.nn.relu(input)
        output = gen_deconv(rectified, generator_outputs_channels)
        output = tf.tanh(output)
        layers.append(output)
        print(output.shape)

    return layers[-1]
Example #58
0
def decoder(target, state, params):
    mask = tf.to_float(tf.cast(target, tf.bool))
    hidden_size = params.hidden_size

    if 'decoder' not in state:
        target, mask = util.remove_invalid_seq(target, mask)

    embed_name = "embedding" if params.shared_source_target_embedding \
        else "tgt_embedding"
    tgt_emb = tf.get_variable(embed_name,
                              [params.tgt_vocab.size(), params.embed_size])
    tgt_bias = tf.get_variable("bias", [params.embed_size])

    inputs = tf.gather(tgt_emb, target)
    inputs = tf.nn.bias_add(inputs, tgt_bias)

    # shift
    if 'decoder' not in state:
        inputs = tf.pad(inputs, [[0, 0], [1, 0], [0, 0]])
        inputs = inputs[:, :-1, :]
    else:
        inputs = tf.cond(
            tf.reduce_all(tf.equal(target, params.tgt_vocab.pad())),
            lambda: tf.zeros_like(inputs), lambda: inputs)
        mask = tf.ones_like(mask)

    if util.valid_dropout(params.dropout):
        inputs = tf.nn.dropout(inputs, 1. - params.dropout)

    with tf.variable_scope("decoder"):
        x = inputs
        for layer in range(params.num_decoder_layer):
            with tf.variable_scope("layer_{}".format(layer)):
                init_state = state["decoder_initializer"]["layer_{}".format(
                    layer)]
                if 'decoder' in state:
                    init_state = state["decoder"]["state"]["layer_{}".format(
                        layer)]
                if layer == 0 or params.use_deep_att:
                    returns = rnn.cond_rnn(params.cell,
                                           x,
                                           state["encodes"],
                                           hidden_size,
                                           init_state=init_state,
                                           mask=mask,
                                           num_heads=params.num_heads,
                                           mem_mask=state["mask"],
                                           ln=params.layer_norm,
                                           sm=params.swap_memory,
                                           one2one=False,
                                           dp=params.dropout)
                    (_, hidden_state), (outputs,
                                        _), contexts, attentions = returns
                    c = contexts
                else:
                    if params.caencoder:
                        returns = rnn.cond_rnn(params.cell,
                                               x,
                                               c,
                                               hidden_size,
                                               init_state=init_state,
                                               mask=mask,
                                               mem_mask=mask,
                                               ln=params.layer_norm,
                                               sm=params.swap_memory,
                                               num_heads=params.num_heads,
                                               one2one=True,
                                               dp=params.dropout)
                        (_, hidden_state), (outputs,
                                            _), contexts, attentions = returns
                    else:
                        outputs = rnn.rnn(params.cell,
                                          tf.concat([x, c], -1),
                                          hidden_size,
                                          mask=mask,
                                          init_state=init_state,
                                          ln=params.layer_norm,
                                          sm=params.swap_memory,
                                          dp=params.dropout)
                        outputs, hidden_state = outputs[1]
                if 'decoder' in state:
                    state['decoder']['state']['layer_{}'.format(
                        layer)] = hidden_state

                y = func.linear(outputs, hidden_size, ln=False, scope="ff")

                # short cut via residual connection
                if x.get_shape()[-1].value == y.get_shape()[-1].value:
                    x = func.residual_fn(x, y, dropout=params.dropout)
                else:
                    x = y
                if params.layer_norm:
                    x = func.layer_norm(x, scope="ln")

    feature = func.linear(tf.concat([x, c], -1),
                          params.embed_size,
                          ln=params.layer_norm,
                          scope="ff")
    feature = tf.nn.tanh(feature)

    if util.valid_dropout(params.dropout):
        feature = tf.nn.dropout(feature, 1. - params.dropout)

    if 'dev_decode' in state:
        feature = x[:, -1, :]

    embed_name = "tgt_embedding" if params.shared_target_softmax_embedding \
        else "softmax_embedding"
    embed_name = "embedding" if params.shared_source_target_embedding \
        else embed_name
    softmax_emb = tf.get_variable(embed_name,
                                  [params.tgt_vocab.size(), params.embed_size])
    feature = tf.reshape(feature, [-1, params.embed_size])
    logits = tf.matmul(feature, softmax_emb, False, True)

    soft_label, normalizer = util.label_smooth(target,
                                               util.shape_list(logits)[-1],
                                               factor=params.label_smooth)
    centropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                          labels=soft_label)
    centropy -= normalizer
    centropy = tf.reshape(centropy, tf.shape(target))

    loss = tf.reduce_sum(centropy * mask, -1) / tf.reduce_sum(mask, -1)
    loss = tf.reduce_mean(loss)

    # these mask tricks mainly used to deal with zero shapes, such as [0, 1]
    loss = tf.cond(tf.equal(tf.shape(target)[0], 0),
                   lambda: tf.constant(0, dtype=tf.float32), lambda: loss)

    return loss, logits, state
Example #59
0
 def comp2re(self, x):
   return tf.concat((x[:,0],x[:,1]), axis=0)
def create_model_MT(inputs, targets_1, targets_2):

    def create_discriminator(discrim_inputs, discrim_targets):
        n_layers = 3
        layers = []
        print('discriminator:')

        # 2x [batch, height, width, in_channels] => [batch, height, width, in_channels * 2]
        input = tf.concat([discrim_inputs, discrim_targets], axis=3)
        print(input.shape)
        # layer_1: [batch, 256, 256, in_channels * 2] => [batch, 128, 128, ndf]
        with tf.variable_scope("layer_1"):
            convolved = discrim_conv(input, a.ndf, stride=2)
            rectified = lrelu(convolved, 0.2)
            layers.append(rectified)
            print(convolved.shape)

        # layer_2: [batch, 128, 128, ndf] => [batch, 64, 64, ndf * 2]
        # layer_3: [batch, 64, 64, ndf * 2] => [batch, 32, 32, ndf * 4]
        # layer_4: [batch, 32, 32, ndf * 4] => [batch, 31, 31, ndf * 8]
        for i in range(n_layers):
            with tf.variable_scope("layer_%d" % (len(layers) + 1)):
                out_channels = a.ndf * min(2**(i+1), 8)
                stride = 1 if i == n_layers - 1 else 2  # last layer here has stride 1
                convolved = discrim_conv(layers[-1], out_channels, stride=stride)
                normalized = batchnorm(convolved)
                rectified = lrelu(normalized, 0.2)
                layers.append(rectified)
                print(convolved.shape)

        # layer_5: [batch, 31, 31, ndf * 8] => [batch, 30, 30, 1]
        with tf.variable_scope("layer_%d" % (len(layers) + 1)):
            convolved = discrim_conv(rectified, out_channels=1, stride=1)
            output = tf.sigmoid(convolved)
            layers.append(output)
            print(output.shape)

        return layers[-1]


    targets=tf.concat([targets_1, targets_2],axis=3)

    with tf.variable_scope("generator"):
        out_channels = int(targets.get_shape()[-1])
        outputs = create_generator(inputs, out_channels)

    # create two copies of discriminator, one for real pairs and one for fake pairs
    # they share the same underlying variables
    with tf.name_scope("real_discriminator"):
        with tf.variable_scope("discriminator"):
            # 2x [batch, height, width, channels] => [batch, 30, 30, 1]
            predict_real = create_discriminator(inputs, targets)

    with tf.name_scope("fake_discriminator"):
        with tf.variable_scope("discriminator", reuse=True):
            # 2x [batch, height, width, channels] => [batch, 30, 30, 1]
            predict_fake = create_discriminator(inputs, outputs)

    with tf.name_scope("discriminator_loss"):
        # minimizing -tf.log will try to get inputs to 1
        # predict_real => 1 
        # predict_fake => 0
        discrim_loss = tf.reduce_mean(-(tf.log(predict_real + EPS) + tf.log(1 - predict_fake + EPS)))
        discrim_loss_real=tf.reduce_mean(-(tf.log(predict_real + EPS)))
        discrim_loss_fake=tf.reduce_mean(-(tf.log(1 - predict_fake + EPS)))
    
    with tf.name_scope("generator_loss"):
        # predict_fake => 1
        # abs(targets - outputs) => 0
        gen_loss_GAN = tf.reduce_mean(-tf.log(predict_fake + EPS))
        gen_loss_L1 = tf.reduce_mean(tf.abs(targets - outputs))        
        gen_loss_dice=1 - dice_coe(outputs, targets,loss_type='sorensen')
        gen_loss_jaccard=1 - dice_coe(outputs, targets,loss_type='jaccard')      
        
        gen_loss_Tversky=tf.abs(1-tversky_loss(targets,outputs))
        gen_loss = gen_loss_GAN * a.gan_weight + gen_loss_L1 * a.l1_weight 
    

    with tf.name_scope("discriminator_train"):
        discrim_tvars = [var for var in tf.trainable_variables() if var.name.startswith("discriminator")]
        discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
        discrim_grads_and_vars = discrim_optim.compute_gradients(discrim_loss, var_list=discrim_tvars)
        discrim_train = discrim_optim.apply_gradients(discrim_grads_and_vars)

    with tf.name_scope("generator_train"):
        with tf.control_dependencies([discrim_train]):
            gen_tvars = [var for var in tf.trainable_variables() if var.name.startswith("generator")]
            gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
            gen_grads_and_vars = gen_optim.compute_gradients(gen_loss, var_list=gen_tvars)
            gen_train = gen_optim.apply_gradients(gen_grads_and_vars)

    ema = tf.train.ExponentialMovingAverage(decay=0.99)
    
    # orig: ----------------------
    #update_losses = ema.apply([discrim_loss, gen_loss_GAN, gen_loss_L1])
    # end orig
    # Moha: ----------------------
    update_losses = ema.apply([discrim_loss, gen_loss_GAN, gen_loss_L1,
                               gen_loss,discrim_loss_real,discrim_loss_fake, gen_loss_jaccard, 
                               gen_loss_dice,gen_loss_Tversky])

    outputs_1=outputs[:,:,:,:3]
    outputs_2=outputs[:,:,:,3:]
    # End Moha
    
    global_step = tf.train.get_or_create_global_step()
    incr_global_step = tf.assign(global_step, global_step+1)

    
    return Model(
        predict_real=predict_real,
        predict_fake=predict_fake,
        
        discrim_loss=ema.average(discrim_loss),        
        discrim_grads_and_vars=discrim_grads_and_vars,
                
        gen_loss_GAN=ema.average(gen_loss_GAN),
        gen_loss_L1=ema.average(gen_loss_L1),    
        gen_grads_and_vars=gen_grads_and_vars,


        train=tf.group(update_losses, incr_global_step, gen_train),
        # Noha: -----------------
        outputs_1=outputs_1,
        outputs_2=outputs_2,
        gen_loss=ema.average(gen_loss),
        discrim_loss_fake=ema.average(discrim_loss_fake),
        discrim_loss_real=ema.average(discrim_loss_real),        
        gen_loss_jaccard=ema.average(gen_loss_jaccard),
        gen_loss_dice=ema.average(gen_loss_dice),
        gen_loss_Tversky=ema.average(gen_loss_Tversky)
        # End Moha
    )