Example #1
0
  def __init__(self, sess, t_test, t_learn_start, model_dir, variables, max_to_keep=20):
    self.sess = sess
    self.t_test = t_test
    self.t_learn_start = t_learn_start

    self.reset()
    self.max_avg_ep_reward = 0

    with tf.variable_scope('t'):
      self.t_op = tf.Variable(0, trainable=False, name='t')
      self.t_add_op = self.t_op.assign_add(1)

    self.model_dir = model_dir
    self.saver = tf.train.Saver(variables + [self.t_op], max_to_keep=max_to_keep)
    self.writer = tf.train.SummaryWriter('./logs/%s' % self.model_dir, self.sess.graph)

    with tf.variable_scope('summary'):
      scalar_summary_tags = [
        'average/reward', 'average/loss', 'average/q',
        'episode/max reward', 'episode/min reward', 'episode/avg reward',
      ]

      self.summary_placeholders = {}
      self.summary_ops = {}

      for tag in scalar_summary_tags:
        self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag.replace(' ', '_'))
        self.summary_ops[tag]  = tf.scalar_summary(tag, self.summary_placeholders[tag])

      histogram_summary_tags = ['episode/rewards']

      for tag in histogram_summary_tags:
        self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag.replace(' ', '_'))
        self.summary_ops[tag]  = tf.histogram_summary(tag, self.summary_placeholders[tag])
    def _build_net(self):
        def build_layers(s, c_names, n_l1, w_initializer, b_initializer):
            with tf.variable_scope('l1'):
                w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names)
                b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names)
                l1 = tf.nn.relu(tf.matmul(s, w1) + b1)

            with tf.variable_scope('l2'):
                w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names)
                b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
                out = tf.matmul(l1, w2) + b2
            return out
        # ------------------ build evaluate_net ------------------
        self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s')  # input
        self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target')  # for calculating loss

        with tf.variable_scope('eval_net'):
            c_names, n_l1, w_initializer, b_initializer = \
                ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 20, \
                tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)  # config of layers

            self.q_eval = build_layers(self.s, c_names, n_l1, w_initializer, b_initializer)

        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval))
        with tf.variable_scope('train'):
            self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)

        # ------------------ build target_net ------------------
        self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_')    # input
        with tf.variable_scope('target_net'):
            c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]

            self.q_next = build_layers(self.s_, c_names, n_l1, w_initializer, b_initializer)
Example #3
0
  def build_greedy_training(self, state, network_states):
    """Extracts features and advances a batch using the oracle path.

    Args:
      state: MasterState from the 'AdvanceMaster' op that advances the
          underlying master to this component.
      network_states: dictionary of component NetworkState objects

    Returns:
      state handle: final state after advancing
      cost: regularization cost, possibly associated with embedding matrices
      correct: since no gold path is available, 0.
      total: since no gold path is available, 0.
    """
    logging.info('Building component: %s', self.spec.name)
    stride = state.current_batch_size * self.training_beam_size
    with tf.variable_scope(self.name, reuse=True):
      state.handle, fixed_embeddings = fetch_differentiable_fixed_embeddings(
          self, state, stride)

    linked_embeddings = [
        fetch_linked_embedding(self, network_states, spec)
        for spec in self.spec.linked_feature
    ]

    with tf.variable_scope(self.name, reuse=True):
      tensors = self.network.create(
          fixed_embeddings, linked_embeddings, None, None, True, stride=stride)
    update_network_states(self, tensors, network_states, stride)
    cost = self.add_regularizer(tf.constant(0.))

    correct, total = tf.constant(0), tf.constant(0)
    return state.handle, cost, correct, total
Example #4
0
def inference_small_config(x, c):
    c["bottleneck"] = False
    c["ksize"] = 3
    c["stride"] = 1
    with tf.variable_scope("scale1"):
        c["conv_filters_out"] = 16
        c["block_filters_internal"] = 16
        c["stack_stride"] = 1
        x = conv(x, c)
        x = bn(x, c)
        x = activation(x)
        x = stack(x, c)

    with tf.variable_scope("scale2"):
        c["block_filters_internal"] = 32
        c["stack_stride"] = 2
        x = stack(x, c)

    with tf.variable_scope("scale3"):
        c["block_filters_internal"] = 64
        c["stack_stride"] = 2
        x = stack(x, c)

    # post-net
    x = tf.reduce_mean(x, reduction_indices=[1, 2], name="avg_pool")

    if c["num_classes"] != None:
        with tf.variable_scope("fc"):
            x = fc(x, c)

    return x
Example #5
0
  def _extract_feature_ids(self, state, network_states, during_training):
    """Extracts feature IDs and advances a batch using the oracle path.

    Args:
      state: MasterState from the 'AdvanceMaster' op that advances the
          underlying master to this component.
      network_states: Dictionary of component NetworkState objects.
      during_training: Whether the graph is being constructed during training.

    Returns:
      state handle: Final state after advancing.
    """
    logging.info('Building component: %s', self.spec.name)

    if during_training:
      stride = state.current_batch_size * self.training_beam_size
    else:
      stride = state.current_batch_size * self.inference_beam_size

    with tf.variable_scope(self.name, reuse=True):
      state.handle, ids = extract_fixed_feature_ids(self, state, stride)

    with tf.variable_scope(self.name, reuse=True):
      tensors = self.network.create(
          ids, [], None, None, during_training, stride=stride)
    update_network_states(self, tensors, network_states, stride)
    return state.handle
Example #6
0
    def __call__(self, inputs, state, scope=None):
        """Gated recurrent unit (GRU) with nunits cells."""
        with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"
            with tf.variable_scope("Gates"):  # Reset gate and update gate.
                # We start with bias of 1.0 to not reset and not update.
                r, u = array_ops.split(1, 2, _linear([inputs, state],
                    2 * self._num_units, True, 1.0, self.weights_init,
                    self.trainable, self.restore, self.reuse))
                r, u = self._inner_activation(r), self._inner_activation(u)
            with tf.variable_scope("Candidate"):
                c = self._activation(
                    _linear([inputs, r * state], self._num_units, True, 0.,
                            self.weights_init, self.trainable, self.restore,
                            self.reuse))
            new_h = u * state + (1 - u) * c

            self.W, self.b = list(), list()
            # Retrieve RNN Variables
            with tf.variable_scope('Gates/Linear', reuse=True):
                self.W.append(tf.get_variable('Matrix'))
                self.b.append(tf.get_variable('Bias'))
            with tf.variable_scope('Candidate/Linear', reuse=True):
                self.W.append(tf.get_variable('Matrix'))
                self.b.append(tf.get_variable('Bias'))

        return new_h, new_h
Example #7
0
    def build_graph(self, input, output):
        input, output = input / 128.0 - 1, output / 128.0 - 1

        with argscope([Conv2D, Conv2DTranspose], kernel_initializer=tf.truncated_normal_initializer(stddev=0.02)):
            with tf.variable_scope('gen'):
                fake_output = self.generator(input)
            with tf.variable_scope('discrim'):
                real_pred = self.discriminator(input, output)
                fake_pred = self.discriminator(input, fake_output)

        self.build_losses(real_pred, fake_pred)
        errL1 = tf.reduce_mean(tf.abs(fake_output - output), name='L1_loss')
        self.g_loss = tf.add(self.g_loss, LAMBDA * errL1, name='total_g_loss')
        add_moving_summary(errL1, self.g_loss)

        # tensorboard visualization
        if IN_CH == 1:
            input = tf.image.grayscale_to_rgb(input)
        if OUT_CH == 1:
            output = tf.image.grayscale_to_rgb(output)
            fake_output = tf.image.grayscale_to_rgb(fake_output)

        visualize_tensors('input,output,fake', [input, output, fake_output], max_outputs=max(30, BATCH))

        self.collect_variables()
def inference(inputs, name):
    '''
    アーキテクチャの定義、グラフのビルド
    '''
    # layer1
    layer1_name = 'fc1_' + name
    with tf.variable_scope(layer1_name) as scope:
        weights = _variable_with_weight_decay(
            'weights',
            shape=[9, 12],
            stddev=0.04,
            wd=0.004
        )
        biases = _variable_on_cpu('biases', [12], tf.constant_initializer(0.1))
        #bn1 = batch_normalization(4, tf.matmul(inputs, weights))
        #local1 = tf.nn.relu(bn1)
        #inner_product = tf.matmul(inputs, weights)
        local1 = tf.nn.relu(tf.add(tf.matmul(inputs, weights), biases))
        #local1 = tf.nn.relu_layer(inputs, weights, biases, name=scope.name)
        #_activation_summary(local1)
    # softmax
    layer2_name = 'fc2_' + name
    with tf.variable_scope(layer2_name) as scope:
        weights = _variable_with_weight_decay(
            'weights',
            [12, NUM_CLASSES],
            stddev=0.04,
            wd=0.0
        )
        biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0))
        linear = tf.nn.xw_plus_b(local1, weights, biases, name=scope.name)
        #_activation_summary(linear)

    return linear
Example #9
0
    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM)."""
        with tf.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
            # Parameters of gates are concatenated into one multiply for efficiency.
            if self._state_is_tuple:
                c, h = state
            else:
                c, h = array_ops.split(1, 2, state)
            concat = _linear([inputs, h], 4 * self._num_units, True, 0.,
                             self.weights_init, self.trainable, self.restore,
                             self.reuse)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = array_ops.split(1, 4, concat)

            new_c = (c * self._inner_activation(f + self._forget_bias) +
                     self._inner_activation(i) *
                     self._activation(j))
            new_h = self._activation(new_c) * self._inner_activation(o)

            if self._state_is_tuple:
                new_state = _rnn_cell.LSTMStateTuple(new_c, new_h)
            else:
                new_state = array_ops.concat(1, [new_c, new_h])

            # Retrieve RNN Variables
            with tf.variable_scope('Linear', reuse=True):
                self.W = tf.get_variable('Matrix')
                self.b = tf.get_variable('Bias')

            return new_h, new_state
Example #10
0
    def loss(self, logits, labels):
        """Adds loss ops to the computational graph.

        Hint: Use sparse_softmax_cross_entropy_with_logits
        Hint: Remember to add l2_loss (see tf.nn.l2_loss)
        Args:
            logits: tensor(num_nodes, output_size)
            labels: python list, len = num_nodes
        Returns:
            loss: tensor 0-D
        """
        loss = None
        # YOUR CODE HERE
        labels = tf.convert_to_tensor(labels, dtype=tf.int64)
        softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels)

        l2 = self.config.l2
        with tf.variable_scope('Composition', reuse=True):
            W1 = tf.get_variable("W1")
        with tf.variable_scope('Projection', reuse=True):
            U = tf.get_variable("U")
        l2_loss = tf.nn.l2_loss(W1) + tf.nn.l2_loss(U)
        l2_loss *= l2

        loss = tf.reduce_sum(softmax_loss) + l2_loss
        # END YOUR CODE
        return loss
Example #11
0
    def add_model_vars(self):
        '''
        You model contains the following parameters:
            embedding:  tensor(vocab_size, embed_size)
            W1:         tensor(2* embed_size, embed_size)
            b1:         tensor(1, embed_size)
            U:          tensor(embed_size, output_size)
            bs:         tensor(1, output_size)
        Hint: Add the tensorflow variables to the graph here and *reuse* them while building
                the compution graphs for composition and projection for each tree
        Hint: Use a variable_scope "Composition" for the composition layer, and
              "Projection") for the linear transformations preceding the softmax.
        '''
        embed_size = self.config.embed_size
        vocab_size = len(self.vocab)
        output_size = self.config.label_size
        with tf.variable_scope('Composition'):
            ### YOUR CODE HERE
            embedding = tf.get_variable("embedding", shape=(vocab_size, embed_size))
            W1 = tf.get_variable("W1", shape=(2 * embed_size, embed_size))
            b1 = tf.get_variable("b1", shape=(1, embed_size))
            ### END YOUR CODE
        with tf.variable_scope('Projection'):
            ### YOUR CODE HERE
            U = tf.get_variable("U", shape=(embed_size, output_size))
            bs = tf.get_variable("bs", shape=(1, output_size))
            ### END YOUR CODE

        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.config.lr)
        # dummy_total is a simple sum to ensure that the variables for the AdamOptimizer
        # are created for initialization and before restore the variables later.
        # It should never actually get executed.
        dummy_total = tf.constant(0.0)
        for v in tf.trainable_variables(): dummy_total +=tf.reduce_sum(v)
        self.dummy_minimizer = self.optimizer.minimize(dummy_total)
Example #12
0
    def _conv_layers(self,x):
        conv_layers = Layers(x)
        
        # Convolutional layers
        res_blocks = [1,3,4,23,3]
        output_channels = [64,256,512,1024,2048]
        
        with tf.variable_scope('scale0'):
            conv_layers.conv2d(filter_size=7,output_channels=output_channels[0],stride=2,padding='SAME',b_value=None)
            conv_layers.maxpool(k=3)
        with tf.variable_scope('scale1'):
            conv_layers.res_layer(filter_size=3, output_channels=output_channels[1], stride=2)
            for block in range(res_blocks[1]-1):
                conv_layers.conv_layers.res_layer(filter_size=3, output_channels=output_channels[1], stride=1)
        with tf.variable_scope('scale2'):
            conv_layers.res_layer(filter_size=3, output_channels=output_channels[2], stride=2)
            for block in range(res_blocks[2]-1):
                conv_layers.conv_layers.res_layer(filter_size=3, output_channels=output_channels[2], stride=1)
        with tf.variable_scope('scale3'):
            conv_layers.res_layer(filter_size=3, output_channels=output_channels[3], stride=2)
            for block in range(res_blocks[3]-1):
                conv_layers.conv_layers.res_layer(filter_size=3, output_channels=output_channels[3], stride=1)
        with tf.variable_scope('scale4'):
            conv_layers.res_layer(filter_size=3, output_channels=output_channels[4], stride=2)
            for block in range(res_blocks[4]-1):
                conv_layers.conv_layers.res_layer(filter_size=3, output_channels=output_channels[4], stride=1)
        
        conv_layers.avgpool(globe=True)
        
        # Fully Connected Layer
        conv_layers.fc(output_nodes=10)

        return conv_layers.get_output()
Example #13
0
    def testBlockGRUToGRUCellSingleStep(self):
        with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()) as sess:
            batch_size = 4
            cell_size = 5
            input_size = 6

            seed = 1994
            initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=seed)

            # Inputs
            x = tf.zeros([batch_size, input_size])
            h = tf.zeros([batch_size, cell_size])

            # Values for the inputs.
            x_value = np.random.rand(batch_size, input_size)
            h_value = np.random.rand(batch_size, cell_size)

            # Output from the basic GRU cell implementation.
            with tf.variable_scope("basic", initializer=initializer):
                output = tf.nn.rnn_cell.GRUCell(cell_size)(x, h)
                sess.run([tf.initialize_all_variables()])
                basic_res = sess.run([output], {x: x_value, h: h_value})

            # Output from the block GRU cell implementation.
            with tf.variable_scope("block", initializer=initializer):
                output = gru_ops.GRUBlockCell(cell_size)(x, h)
                sess.run([tf.initialize_all_variables()])
                block_res = sess.run([output], {x: x_value, h: h_value})

            self.assertEqual(len(block_res), len(basic_res))
            for block, basic in zip(block_res, basic_res):
                self.assertAllClose(block, basic)
def forward_propagation(images):
  with tf.variable_scope('conv1') as scope:
      W_conv1 = weight_variable([5, 5, 3, 32])
      b_conv1 = bias_variable([32])
      image_matrix = tf.reshape(images, [-1, 1750, 1750, 3])
      h_conv1 = tf.nn.sigmoid(conv2d(image_matrix, W_conv1) + b_conv1)
      _activation_summary(h_conv1)
      h_pool1 = max_pool_5x5(h_conv1)

  with tf.variable_scope('conv2') as scope:
      W_conv2 = weight_variable([5, 5, 32, 64])
      b_conv2 = bias_variable([64])
      h_conv2 = tf.nn.sigmoid(conv2d(h_pool1, W_conv2) + b_conv2)
      _activation_summary(h_conv2)
      h_pool2 = max_pool_5x5(h_conv2)

  with tf.variable_scope('conv3') as scope:
      W_conv3 = weight_variable([5, 5, 64, 128])
      b_conv3 = bias_variable([128])
      h_conv3 = tf.nn.sigmoid(conv2d(h_pool2, W_conv3) + b_conv3)
      _activation_summary(h_conv3)
      h_pool3 = max_pool_5x5(h_conv3)

  with tf.variable_scope('local3') as scope:
      W_fc1 = weight_variable([14 * 14 * 128, 256])
      b_fc1 = bias_variable([256])
      h_pool3_flat = tf.reshape(h_pool3, [-1, 14 * 14 * 128])
      h_fc1 = tf.nn.sigmoid(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
      _activation_summary(h_fc1)
      keep_prob = tf.Variable(1.0)
      W_fc2 = weight_variable([256, 4])
      b_fc2 = bias_variable([4])
      y_conv = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)
      _activation_summary(y_conv)
      return y_conv
def transformer_layers_sharded(dp,
                               ps_devices,
                               inputs,
                               num_layers,
                               hparams,
                               self_attention_bias=None,
                               enc_output=None,
                               attention_type=AttentionType.GLOBAL,
                               name="transformer"):
  """Multi layer transformer, sharded by the data parallelism dp."""
  x = inputs
  extra_loss = tf.constant(0.0)
  moe_hidden_sizes = [int(s) for s in hparams.moe_hidden_sizes.split(",")]
  expert_fn = expert_utils.ffn_expert_fn(
      hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size)
  x = dp(tf.nn.dropout, x, 1.0 - hparams.layer_prepostprocess_dropout)
  for layer in range(num_layers):
    with tf.variable_scope("%s_layer_%d" % (name, layer)):
      # self-attention
      if attention_type == AttentionType.LOCAL_2D:
        y = dp(local_attention_2d(common_layers.layer_preprocess(x, hparams),
                                  hparams,
                                  attention_type="masked_local_attention_2d"))
      elif attention_type == AttentionType.LOCAL_1D:
        y = dp(local_attention_1d(common_layers.layer_preprocess(x, hparams),
                                  hparams,
                                  attention_type="local_mask_right",
                                  q_padding="LEFT", kv_padding="LEFT"))
      elif attention_type == AttentionType.GLOCAL:
        y = dp(local_global_attention(
            common_layers.layer_preprocess(x, hparams), self_attention_bias,
            hparams, q_padding="LEFT", kv_padding="LEFT"))
      elif attention_type == AttentionType.GLOBAL:
        self_attention_bias = dp(get_self_attention_bias(x))
        y = dp(full_self_attention(common_layers.layer_preprocess(x, hparams),
                                   self_attention_bias, hparams,
                                   q_padding="LEFT", kv_padding="LEFT"))
      x = common_layers.layer_postprocess(x, y, hparams)
      if enc_output is not None:
        y = dp(encdec_attention_1d(common_layers.layer_preprocess(x, hparams),
                                   enc_output, None, hparams))
        x = dp(common_layers.layer_postprocess, x, y, hparams)
      with tf.variable_scope("ffn"):
        if str(layer) in hparams.moe_layers_decoder.split(","):
          y, loss = expert_utils.distributed_moe(
              dp,
              ps_devices,
              common_layers.layer_preprocess(x, hparams),
              hparams.mode == tf.estimator.ModeKeys.TRAIN,
              input_size=hparams.hidden_size,
              expert_fn=expert_fn,
              num_experts=hparams.moe_num_experts,
              k=hparams.moe_k,
              loss_coef=hparams.moe_loss_coef)
          extra_loss += loss
          x = dp(common_layers.layer_postprocess, x, y, hparams)
        else:
          y = dp(ffn_layer, common_layers.layer_preprocess(x, hparams), hparams)
          x = dp(common_layers.layer_postprocess, x, y, hparams)
  return dp(common_layers.layer_preprocess, x, hparams), extra_loss
Example #16
0
def ce(model, config, scope, connect, threshold = 1e-5):
	with tf.variable_scope(scope), tf.name_scope(scope):
		with tf.variable_scope('inputs'), tf.name_scope('inputs'):
			model['%s_in0length' %scope] = model['%s_out0length' %connect]
			model['%s_in1length' %scope] = model['%s_out1length' %connect]
			model['%s_in2length' %scope] = model['%s_out2length' %connect]
			model['%s_maxin2length' %scope] = model['%s_maxout2length' %connect]
			model['%s_inputs' %scope] = tf.clip_by_value(tf.nn.softmax(model['%s_outputs' %connect]), threshold, 1. - threshold, name = '%s_inputs' %scope)
			model['%s_out0length' %scope] = model['%s_in0length' %scope]
			model['%s_out1length' %scope] = model['%s_in1length' %scope]
			model['%s_out2length' %scope] = tf.placeholder(tf.int32, [model['%s_in0length' %scope]], '%s_out2length' %scope)
			model['%s_maxout2length' %scope] = model['%s_maxin2length' %scope]

		with tf.variable_scope('labels'), tf.name_scope('labels'):
			model['%s_labels_len' %scope] = tf.placeholder(tf.int32, [model['%s_in0length' %scope]], '%s_labels_len' %scope)
			model['%s_labels_ind' %scope] = tf.placeholder(tf.int64, [None, 2], '%s_labels_ind' %scope)
			model['%s_labels_val' %scope] = tf.placeholder(tf.int32, [None], '%s_labels_val' %scope)
			model['%s_labels_collapsed' %scope] = tf.sparse_to_dense(model['%s_labels_ind' %scope], [model['%s_maxin2length' %scope], model['%s_in0length' %scope]], model['%s_labels_val' %scope], -1, name = '%s_labels_collapsed' %scope)
			model['%s_labels' %scope] = tf.one_hot(model['%s_labels_collapsed' %scope], model['%s_out1length' %scope], name = '%s_labels' %scope)

		with tf.variable_scope('loss'), tf.name_scope('loss'):
			model['%s_loss' %scope] = tf.reduce_sum(-tf.multiply(model['%s_labels' %scope], tf.log(model['%s_inputs' %scope])), name = '%s_loss' %scope)

		with tf.variable_scope('outputs'), tf.name_scope('outputs'):
			model['%s_output' %scope] = model['%s_inputs' %scope]

	return model
Example #17
0
 def __call__(self, features, labels, params):
   """Creates the model graph. See the model_fn documentation in
   tf.contrib.learn.Estimator class for a more detailed explanation.
   """
   with tf.variable_scope("model"):
     with tf.variable_scope(self.name):
       return self._build(features, labels, params)
    def project_bilstm_layer(self, lstm_outputs, name=None):
        """
        hidden layer between lstm layer and logits
        :param lstm_outputs: [batch_size, num_steps, emb_size] 
        :return: [batch_size, num_steps, num_tags]
        """
        with tf.variable_scope("project" if not name else name):
            with tf.variable_scope("hidden"):
                W = tf.get_variable("W", shape=[self.hidden_unit * 2, self.hidden_unit],
                                    dtype=tf.float32, initializer=self.initializers.xavier_initializer())

                b = tf.get_variable("b", shape=[self.hidden_unit], dtype=tf.float32,
                                    initializer=tf.zeros_initializer())
                output = tf.reshape(lstm_outputs, shape=[-1, self.hidden_unit * 2])
                hidden = tf.tanh(tf.nn.xw_plus_b(output, W, b))

            # project to score of tags
            with tf.variable_scope("logits"):
                W = tf.get_variable("W", shape=[self.hidden_unit, self.num_labels],
                                    dtype=tf.float32, initializer=self.initializers.xavier_initializer())

                b = tf.get_variable("b", shape=[self.num_labels], dtype=tf.float32,
                                    initializer=tf.zeros_initializer())

                pred = tf.nn.xw_plus_b(hidden, W, b)
            return tf.reshape(pred, [-1, self.seq_length, self.num_labels])
    def add_logits_op(self):
        """
        Adds logits to self
        """
        with tf.variable_scope("bi-lstm"):
            lstm_fwrd_cell = tf.contrib.rnn.LSTMCell(self.hidden_size)
            lstm_back_cell = tf.contrib.rnn.LSTMCell(self.hidden_size)
            (output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn(lstm_fwrd_cell,
                                                                        lstm_back_cell,
                                                                        self.word_embeddings,
                                                                        sequence_length=self.sequence_lengths,
                                                                        dtype=tf.float32)
            output = tf.concat([output_fw, output_bw], axis=-1)
            output = tf.nn.dropout(output, self.dropout)

        with tf.variable_scope("proj"):
            W = tf.get_variable("W", shape=[2*self.hidden_size, self.ntags],
                dtype=tf.float32)

            b = tf.get_variable("b", shape=[self.ntags], dtype=tf.float32,
                initializer=tf.zeros_initializer())

            ntime_steps = tf.shape(output)[1]
            output = tf.reshape(output, [-1, 2*self.hidden_size])
            pred = tf.matmul(output, W) + b
            self.logits = tf.reshape(pred, [-1, ntime_steps, self.ntags])
Example #20
0
  def testInitFromCheckpointWithScopes(self):
    init_value0 = np.asarray([1.0, 3.0, 9.0],
                             dtype=np.float32).reshape((1, 3, 1))
    init_value1 = np.asarray([2.0, 4.0, 6.0, 8.0],
                             dtype=np.float32).reshape((2, 1, 2))

    var_names_to_values = {'layer0/v0': init_value0, 'layer1/v1': init_value1}
    model_dir = os.path.join(self.get_temp_dir(), 'model')
    with self.test_session() as sess:
      model_path = self.create_checkpoint_from_values(var_names_to_values,
                                                      model_dir)
      with tf.variable_scope('my_model/my_layer0'):
        var0 = tf.contrib.framework.variables.variable('my_var0',
                                                       shape=init_value0.shape)
      with tf.variable_scope('my_model/my_layer1'):
        var1 = tf.contrib.framework.variables.variable('my_var1',
                                                       shape=init_value1.shape)

      vars_to_restore = {'layer0/v0': var0, 'layer1/v1': var1}
      op, feed_dict = tf.contrib.framework.variables.assign_from_checkpoint(
          model_path,
          vars_to_restore)

      # Initialize the variables.
      sess.run(tf.global_variables_initializer())

      # Perform the assignment.
      sess.run(op, feed_dict)

      # Request and test the variable values:
      self.assertAllEqual(init_value0, var0.eval())
      self.assertAllEqual(init_value1, var1.eval())
Example #21
0
    def __init__(self, model_path = "models", threshold = [0.6, 0.7, 0.7], factor = 0.709, scale_factor = 1):
        '''
        :param face_rec_sess: FaceRecSession
        :param threshold: detection threshold
        :param factor: default 0.709 image pyramid -- magic number
        :param model_path:
        '''
        self.threshold = threshold
        self.factor = factor
        self.scale_factor = scale_factor;
        with tf.Graph().as_default(), tf.device('/cpu:0'):
            print("Loading Face detection model")
            self.sess = tf.Session()
            if not model_path:
                model_path, _ = os.path.split(os.path.realpath(__file__))

            with tf.variable_scope('pnet'):
                data = tf.placeholder(tf.float32, (None, None, None, 3), 'input')
                pnet = PNet({'data': data})
                pnet.load(os.path.join(model_path, 'det1.npy'), self.sess)
            with tf.variable_scope('rnet'):
                data = tf.placeholder(tf.float32, (None, 24, 24, 3), 'input')
                rnet = RNet({'data': data})
                rnet.load(os.path.join(model_path, 'det2.npy'), self.sess)
            with tf.variable_scope('onet'):
                data = tf.placeholder(tf.float32, (None, 48, 48, 3), 'input')
                onet = ONet({'data': data})
                onet.load(os.path.join(model_path, 'det3.npy'), self.sess)

            self.pnet = lambda img: self.sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0': img})
            self.rnet = lambda img: self.sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0': img})
            self.onet = lambda img: self.sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'),
                                            feed_dict={'onet/input:0': img})
            print("Face detection model loaded")
Example #22
0
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        with tf.variable_scope("convnet"):
            for num_outputs, kernel_size, stride in convs:
                out = layers.convolution2d(out,
                                           num_outputs=num_outputs,
                                           kernel_size=kernel_size,
                                           stride=stride,
                                           activation_fn=tf.nn.relu)
        conv_out = layers.flatten(out)
        with tf.variable_scope("action_value"):
            action_out = conv_out
            for hidden in hiddens:
                action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
                if layer_norm:
                    action_out = layers.layer_norm(action_out, center=True, scale=True)
                action_out = tf.nn.relu(action_out)
            action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)

        if dueling:
            with tf.variable_scope("state_value"):
                state_out = conv_out
                for hidden in hiddens:
                    state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
                    if layer_norm:
                        state_out = layers.layer_norm(state_out, center=True, scale=True)
                    state_out = tf.nn.relu(state_out)
                state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
            action_scores_mean = tf.reduce_mean(action_scores, 1)
            action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
            q_out = state_score + action_scores_centered
        else:
            q_out = action_scores
        return q_out
Example #23
0
  def start_session(self):
    """
    Creates the session.

    """
    self.input_layer_mats = ["W_input", "b_input"]
    self.hidden_layer_mats = []
    for i in xrange(self.num_hidden):
      self.hidden_layer_mats.append("W" + str(i))
      self.hidden_layer_mats.append("b" + str(i))
    self.output_layer_mats = ["W_output", "b_output"]

    self.weight_mats = self.input_layer_mats + self.hidden_layer_mats + self.output_layer_mats

    with tf.variable_scope("network") as scope:
      self.create_model_trainable()

    with tf.variable_scope("target") as scope:
      self.create_model_target()

    init = tf.initialize_all_variables()

    session = tf.Session()
    session.run(init)

    return session
def inference(input_tensor,train,regularizer):
    #第一层卷积
    with tf.variable_scope('layer1-conv1'):
        conv1_weights = tf.get_variable("weight",
                [CONV1_SIZE,CONV1_SIZE,NUM_CHANNELS,CONV1_DEEP],
                initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv1_biases = tf.get_variable("biases",[CONV1_DEEP],
                 initializer=tf.constant_initializer(0.0))
        conv1 = tf.nn.conv2d(input_tensor,conv1_weights,
                             strides=[1,1,1,1],padding='SAME')
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1,conv1_biases))
    #第二层池化    
    with tf.name_scope('layer2-pool1'):
        pool1 = tf.nn.max_pool(relu1,ksize=[1,2,2,1],
                               strides=[1,2,2,1],padding='SAME')
    #第三层卷积
    with tf.variable_scope('layer3-conv2'):
        conv2_weights = tf.get_variable("weight",
                [CONV2_SIZE,CONV2_SIZE,CONV1_DEEP,CONV2_DEEP],
                initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv2_biases = tf.get_variable("biases",[CONV2_DEEP],
                 initializer=tf.constant_initializer(0.0))
        conv2 = tf.nn.conv2d(pool1,conv2_weights,
                             strides=[1,1,1,1],padding='SAME')
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2,conv2_biases))    
        
    #第四层池化
    with tf.name_scope('layer4-pool2'):
        pool2 = tf.nn.max_pool(relu2,ksize=[1,2,2,1],
                               strides=[1,2,2,1],padding='SAME')
        
    pool_shape = pool2.get_shape().as_list()
    nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
    
    reshaped = tf.reshape(pool2,[pool_shape[0],nodes])
    
    #第五层全连接层
    with tf.variable_scope('layer5-fc1'):
        fc1_weights = tf.get_variable("weight",[nodes,FC_SIZE],
                initializer=tf.truncated_normal_initializer(stddev=0.1))
        #只有全连接层的权重需要加入正则化
        if regularizer != None:
            tf.add_to_collection('losses',regularizer(fc1_weights))
        fc1_biases = tf.get_variable("bias",[FC_SIZE],
                initializer=tf.constant_initializer(0.1))
        fc1 = tf.nn.relu(tf.matmul(reshaped,fc1_weights) + fc1_biases)
        if train: fc1 = tf.nn.dropout(fc1,0.5)

    #第六层全连接层
    with tf.variable_scope('layer6-fc2'):
        fc2_weights = tf.get_variable("weight",[FC_SIZE,NUM_LABELS],
                initializer=tf.truncated_normal_initializer(stddev=0.1))
        #只有全连接层的权重需要加入正则化
        if regularizer != None:
            tf.add_to_collection('losses',regularizer(fc2_weights))
        fc2_biases = tf.get_variable("bias",[NUM_LABELS],
                initializer=tf.constant_initializer(0.1))
        logit = tf.matmul(fc1,fc2_weights) + fc2_biases

    return logit
Example #25
0
  def testLSTMBasicToBlockPeeping(self):
    with self.test_session(use_gpu=self._use_gpu) as sess:
      batch_size = 2
      input_size = 3
      cell_size = 4
      sequence_length = 5

      inputs = []
      for _ in range(sequence_length):
        inp = tf.convert_to_tensor(
            np.random.randn(batch_size, input_size),
            dtype=tf.float32)
        inputs.append(inp)

      initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=19890212)
      with tf.variable_scope("basic", initializer=initializer):
        cell = tf.nn.rnn_cell.LSTMCell(cell_size,
                                       use_peepholes=True,
                                       state_is_tuple=True)
        outputs, _ = tf.nn.rnn(cell, inputs, dtype=tf.float32)

        sess.run([tf.initialize_all_variables()])
        basic_outputs = sess.run(outputs)
        basic_grads = sess.run(tf.gradients(outputs, inputs))
        basic_wgrads = sess.run(tf.gradients(outputs, tf.trainable_variables()))

      with tf.variable_scope("block", initializer=initializer):
        w = tf.get_variable("w",
                            shape=[input_size + cell_size, cell_size * 4],
                            dtype=tf.float32)
        b = tf.get_variable("b",
                            shape=[cell_size * 4],
                            dtype=tf.float32,
                            initializer=tf.zeros_initializer)

        wci = tf.get_variable("wci", shape=[cell_size], dtype=tf.float32)
        wcf = tf.get_variable("wcf", shape=[cell_size], dtype=tf.float32)
        wco = tf.get_variable("wco", shape=[cell_size], dtype=tf.float32)

        _, _, _, _, _, _, outputs = fused_lstm(
            tf.convert_to_tensor(sequence_length,
                                 dtype=tf.int64),
            inputs,
            w,
            b,
            wci=wci,
            wcf=wcf,
            wco=wco,
            cell_clip=0,
            use_peephole=True)

        sess.run([tf.initialize_all_variables()])
        block_outputs = sess.run(outputs)
        block_grads = sess.run(tf.gradients(outputs, inputs))
        block_wgrads = sess.run(tf.gradients(outputs, [w, b, wci, wcf, wco]))

      self.assertAllClose(basic_outputs, block_outputs)
      self.assertAllClose(basic_grads, block_grads)
      for basic, block in zip(basic_wgrads, block_wgrads):
        self.assertAllClose(basic, block, rtol=1e-2, atol=1e-2)
Example #26
0
 def testBasicLSTMCell(self):
   with self.test_session() as sess:
     with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)):
       x = tf.zeros([1, 2])
       m = tf.zeros([1, 8])
       g, out_m = tf.nn.rnn_cell.MultiRNNCell(
           [tf.nn.rnn_cell.BasicLSTMCell(2)] * 2)(x, m)
       sess.run([tf.initialize_all_variables()])
       res = sess.run([g, out_m], {x.name: np.array([[1., 1.]]),
                                   m.name: 0.1 * np.ones([1, 8])})
       self.assertEqual(len(res), 2)
       # The numbers in results were not calculated, this is just a smoke test.
       self.assertAllClose(res[0], [[0.24024698, 0.24024698]])
       expected_mem = np.array([[0.68967271, 0.68967271,
                                 0.44848421, 0.44848421,
                                 0.39897051, 0.39897051,
                                 0.24024698, 0.24024698]])
       self.assertAllClose(res[1], expected_mem)
     with tf.variable_scope("other", initializer=tf.constant_initializer(0.5)):
       x = tf.zeros([1, 3])  # Test BasicLSTMCell with input_size != num_units.
       m = tf.zeros([1, 4])
       g, out_m = tf.nn.rnn_cell.BasicLSTMCell(2, input_size=3)(x, m)
       sess.run([tf.initialize_all_variables()])
       res = sess.run([g, out_m], {x.name: np.array([[1., 1., 1.]]),
                                   m.name: 0.1 * np.ones([1, 4])})
       self.assertEqual(len(res), 2)
Example #27
0
    def __init__(self,sess,n_features,n_actions,lr=0.001):
        self.sess = sess

        self.s = tf.placeholder(tf.float32,[1,n_features],name='state')
        self.a = tf.placeholder(tf.int32,None,name='act')
        self.td_error = tf.placeholder(tf.float32,None,"td_error")

        with tf.variable_scope('Actor'):
            l1 = tf.layers.dense(
                inputs = self.s,
                units = 20,
                activation = tf.nn.relu,
                kernel_initializer = tf.random_normal_initializer(mean=0,stddev=0.1),
                bias_initializer = tf.constant_initializer(0.1),
                name = 'l1'
            )

            self.acts_prob = tf.layers.dense(
                inputs = l1,
                units = n_actions,
                activation = tf.nn.softmax,
                kernel_initializer = tf.random_normal_initializer(mean=0,stddev=0.1),
                bias_initializer = tf.constant_initializer(0.1),
                name = 'acts_prob'
            )


            with tf.variable_scope('exp_v'):
                log_prob = tf.log(self.acts_prob[0,self.a])
                self.exp_v = tf.reduce_mean(log_prob * self.td_error)


            with tf.variable_scope('train'):
                self.train_op =  tf.train.AdamOptimizer(lr).minimize(-self.exp_v)
    def __load_model(self):
        # Initial memory value for recurrence.
        self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim))

        # choose RNN/GRU/LSTM cell
        with tf.variable_scope("train_test", reuse=True):
            self.cell = rnn_cell.LSTMCell(self.memory_dim)

        # embedding model
        if not self.attention:
            with tf.variable_scope("train_test"):
                self.dec_outputs, self.dec_memory = seq2seq.embedding_rnn_seq2seq(\
                                self.enc_inp, self.dec_inp, self.cell, \
                                self.vocab_size, self.vocab_size, self.seq_length)
            with tf.variable_scope("train_test", reuse = True):
                self.dec_outputs_tst, _ = seq2seq.embedding_rnn_seq2seq(\
                                self.enc_inp, self.dec_inp, self.cell, \
                                self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)

        else:
            with tf.variable_scope("train_test"):
                self.dec_outputs, self.dec_memory = seq2seq.embedding_attention_seq2seq(\
                                self.enc_inp, self.dec_inp, self.cell, \
                                self.vocab_size, self.vocab_size, self.seq_length)
            with tf.variable_scope("train_test", reuse = True):
                self.dec_outputs_tst, _ = seq2seq.embedding_attention_seq2seq(\
                                self.enc_inp, self.dec_inp, self.cell, \
                                self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
def cnn_model(X, y):
    """2 layer Convolutional network to predict from sequence of words
    to a class."""
    # Convert indexes of words into embeddings.
    # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then
    # maps word indexes of the sequence into [batch_size, sequence_length,
    # EMBEDDING_SIZE].
    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
        embedding_size=EMBEDDING_SIZE, name='words')
    word_vectors = tf.expand_dims(word_vectors, 3)
    with tf.variable_scope('CNN_Layer1'):
        # Apply Convolution filtering on input sequence.
        conv1 = skflow.ops.conv2d(word_vectors, N_FILTERS, FILTER_SHAPE1, padding='VALID')
        # Add a RELU for non linearity.
        conv1 = tf.nn.relu(conv1)
        # Max pooling across output of Convlution+Relu.
        pool1 = tf.nn.max_pool(conv1, ksize=[1, POOLING_WINDOW, 1, 1], 
            strides=[1, POOLING_STRIDE, 1, 1], padding='SAME')
        # Transpose matrix so that n_filters from convolution becomes width.
        pool1 = tf.transpose(pool1, [0, 1, 3, 2])
    with tf.variable_scope('CNN_Layer2'):
        # Second level of convolution filtering.
        conv2 = skflow.ops.conv2d(pool1, N_FILTERS, FILTER_SHAPE2,
            padding='VALID')
        # Max across each filter to get useful features for classification.
        pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1])
    # Apply regular WX + B and classification.
    return skflow.models.logistic_regression(pool2, y)
Example #30
0
  def testWithScopes(self):
    init_value0 = np.asarray([1.0, 3.0, 9.0]).reshape((1, 3, 1))
    init_value1 = np.asarray([2.0, 4.0, 6.0, 8.0]).reshape((2, 1, 2))

    with self.test_session() as sess:
      initializer = tf.truncated_normal_initializer(stddev=.1)

      with tf.variable_scope('my_model/my_layer0'):
        var0 = tf.contrib.framework.variable(
            'my_var0', shape=[1, 3, 1], initializer=initializer)
      with tf.variable_scope('my_model/my_layer1'):
        var1 = tf.contrib.framework.variable(
            'my_var1', shape=[2, 1, 2], initializer=initializer)

      var_names_to_values = {'my_model/my_layer0/my_var0': init_value0,
                             'my_model/my_layer1/my_var1': init_value1}
      init_fn = tf.contrib.framework.assign_from_values_fn(var_names_to_values)

      # Initialize the variables.
      sess.run(tf.global_variables_initializer())

      # Perform the assignment.
      init_fn(sess)

      # Request and test the variable values:
      var0, var1 = sess.run([var0, var1])
      self.assertAllEqual(init_value0, var0)
      self.assertAllEqual(init_value1, var1)
Example #31
0
def additive_attention(queries,
                       keys,
                       values,
                       bias,
                       hidden_size,
                       concat=False,
                       keep_prob=None,
                       dtype=None,
                       scope=None):
    """ Additive attention mechanism. This layer is implemented using a
        one layer feed forward neural network

    :param queries: A tensor with shape [batch, heads, length_q, depth_k]
    :param keys: A tensor with shape [batch, heads, length_kv, depth_k]
    :param values: A tensor with shape [batch, heads, length_kv, depth_v]
    :param bias: A tensor
    :param hidden_size: An integer
    :param concat: A boolean value. If ``concat'' is set to True, then
        the computation of attention mechanism is following $tanh(W[q, k])$.
        When ``concat'' is set to False, the computation is following
        $tanh(Wq + Vk)$
    :param keep_prob: a scalar in [0, 1]
    :param dtype: An optional instance of tf.DType
    :param scope: An optional string, the scope of this layer

    :returns: A dict with the following keys:
        weights: A tensor with shape [batch, length_q]
        outputs: A tensor with shape [batch, length_q, depth_v]
    """

    with tf.variable_scope(scope,
                           default_name="additive_attention",
                           values=[queries, keys, values, bias],
                           dtype=dtype):
        length_q = tf.shape(queries)[2]
        length_kv = tf.shape(keys)[2]
        q = tf.tile(tf.expand_dims(queries, 3), [1, 1, 1, length_kv, 1])
        k = tf.tile(tf.expand_dims(keys, 2), [1, 1, length_q, 1, 1])

        if concat:
            combined = tf.tanh(
                linear(tf.concat([q, k], axis=-1),
                       hidden_size,
                       True,
                       True,
                       name="qk_transform"))
        else:
            q = linear(queries, hidden_size, True, True, name="q_transform")
            k = linear(keys, hidden_size, True, True, name="key_transform")
            combined = tf.tanh(q + k)

        # shape: [batch, heads, length_q, length_kv]
        logits = tf.squeeze(linear(combined, 1, True, True, name="logits"),
                            axis=-1)

        if bias is not None:
            logits += bias

        weights = tf.nn.softmax(logits, name="attention_weights")

        if keep_prob or keep_prob < 1.0:
            weights = tf.nn.dropout(weights, keep_prob)

        outputs = tf.matmul(weights, values)

        return {"weights": weights, "outputs": outputs}
Example #32
0
    def ready(self):
        config = self.config
        d = config.hidden

        batch_size = tf.shape(self.sent)[0]
        sent_mask = tf.cast(self.sent, tf.bool)
        sent_len = tf.reduce_sum(tf.cast(sent_mask, tf.int32), axis=1)
        sent_maxlen = tf.reduce_max(sent_len)
        sent_mask = tf.slice(sent_mask, [0, 0], [batch_size, sent_maxlen])
        sent = tf.slice(self.sent, [0, 0], [batch_size, sent_maxlen])

        mid_mask = tf.cast(self.mid, tf.bool)
        mid_len = tf.reduce_sum(tf.cast(mid_mask, tf.int32), axis=1)
        mid_maxlen = tf.reduce_max(mid_len)
        mid_mask = tf.slice(mid_mask, [0, 0], [batch_size, mid_maxlen])
        mid = tf.slice(self.mid, [0, 0], [batch_size, mid_maxlen])

        pat_mask = tf.cast(self.pats, tf.bool)
        pat_len = tf.reduce_sum(tf.cast(pat_mask, tf.int32), axis=1)

        with tf.variable_scope("embedding"):
            sent_emb = tf.nn.embedding_lookup(self.word_mat, sent)
            mid_emb = tf.nn.embedding_lookup(self.word_mat, mid)
            sent_emb = dropout(sent_emb,
                               keep_prob=config.keep_prob,
                               is_train=self.is_train,
                               mode="embedding")
            pat_emb = tf.nn.embedding_lookup(self.word_mat, self.pats)

        with tf.variable_scope("encoder"):
            rnn = Cudnn_RNN(num_layers=2, num_units=d // 2)
            cont, _ = rnn(sent_emb, seq_len=sent_len, concat_layers=False)
            pat, _ = rnn(pat_emb, seq_len=pat_len, concat_layers=False)

            cont_d = dropout(cont,
                             keep_prob=config.keep_prob,
                             is_train=self.is_train)
            pat_d = dropout(pat,
                            keep_prob=config.keep_prob,
                            is_train=self.is_train)

        with tf.variable_scope("attention"):
            att_a = attention(cont_d, config.att_hidden, mask=sent_mask)
            pat_a = self.pat_a = attention(pat_d,
                                           config.att_hidden,
                                           mask=pat_mask)

        with tf.variable_scope("sim"):
            sim, pat_sim = att_match(mid_emb,
                                     pat_emb,
                                     mid_mask,
                                     pat_mask,
                                     d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)

            neg_idxs = tf.matmul(self.rels, tf.transpose(self.rels, [1, 0]))
            pat_pos = tf.square(tf.maximum(config.tau - pat_sim, 0.))
            pat_pos = tf.reduce_max(pat_pos - (1 - neg_idxs) * 1e30, axis=1)
            pat_neg = tf.square(tf.maximum(pat_sim, 0.))
            pat_neg = tf.reduce_max(pat_neg - 1e30 * neg_idxs, axis=1)
            l_sim = tf.reduce_sum(self.weight * (pat_pos + pat_neg), axis=0)

            with tf.variable_scope("pred"):
                att2_d = tf.reduce_sum(tf.expand_dims(att_a, axis=2) * cont_d,
                                       axis=1)
                pat2_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d,
                                       axis=1)

                logit = self.logit = dense(att2_d,
                                           config.num_class,
                                           use_bias=False)
                pred = tf.nn.softmax(logit)
                l_a = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit[:config.batch_size],
                        labels=self.rel[:config.batch_size]),
                    axis=0)

                xsim = tf.stop_gradient(sim[config.batch_size:])
                pseudo_rel = tf.gather(self.rels, tf.argmax(xsim, axis=1))
                bound = tf.reduce_max(xsim, axis=1)
                weight = tf.nn.softmax(10 * bound)
                l_u = tf.reduce_sum(
                    weight * tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit[config.batch_size:], labels=pseudo_rel),
                    axis=0)

                logit = dense(pat2_d, config.num_class, use_bias=False)
                l_pat = self.pat_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit, labels=self.rels),
                    axis=0)

        self.max_val = tf.reduce_sum(pred * -log(pred), axis=1)
        self.pred = tf.argmax(pred, axis=1)

        self.loss = l_a + config.alpha * l_pat + config.beta * l_sim + config.gamma * l_u
        self.sim_pred = tf.argmax(tf.gather(self.rels, tf.argmax(sim, axis=1)),
                                  axis=1)
        self.sim_max_val = tf.reduce_max(sim, axis=1)
        self.gold = tf.argmax(self.rel, axis=1)
        self.max_logit = tf.reduce_max(self.logit, axis=1)
def densenet_views(inputs,
				   num_classes=1000,
				   reduction=None,
				   growth_rate=None,
				   num_filters=None,
				   num_layers=None,
				   dropout_rate=None,
				   is_training=True,
				   reuse=None,
				   scope=None):
	assert reduction is not None
	assert growth_rate is not None
	assert num_filters is not None
	assert num_layers is not None

	end_points = {}
	compression = 1.0 - reduction
	num_dense_blocks = len(num_layers)

	with tf.variable_scope(scope, 'densenetxxx', [inputs, num_classes], reuse=reuse) as sc:
		end_points_collection = sc.name + '_end_points'
		with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training), \
			 slim.arg_scope([slim.conv2d, _conv, _conv_block, _dense_block, _transition_block], outputs_collections=end_points_collection), \
			 slim.arg_scope([_conv], dropout_rate=dropout_rate):
			net = inputs

			# initial convolution
			net = slim.conv2d(net, num_filters, 7, stride=2, scope='conv1')
			net = slim.batch_norm(net)
			net = tf.nn.relu(net)
			net = slim.max_pool2d(net, 3, stride=2, padding='SAME')

			# FIRST BLOCK ==============
			# dense blocks
			net, num_filters = _dense_block(net, num_layers[0], num_filters, growth_rate, scope='dense_block' + str(1))
			# Add transition_block
			net, num_filters = _transition_block(net, num_filters, compression=compression, scope='transition_block' + str(1))

			views_softmax = add_views_branch(net, dropout_rate, end_points)

			# MIDDLE BLOCKS
			for i in range(1, num_dense_blocks - 1):
				# dense blocks
				net, num_filters = _dense_block(net, num_layers[i], num_filters, growth_rate, scope='dense_block' + str(i + 1))
				# Add transition_block
				net, num_filters = _transition_block(net, num_filters, compression=compression, scope='transition_block' + str(i + 1))

			with tf.variable_scope('3ViewBranches'):
				views_softmax = tf.reshape(views_softmax, [-1, 1, 1, NUM_VIEWS])
				views_softmax_split = tf.split(views_softmax, [1, 1, 1], axis=3)

				blocks = []
				for view in range(NUM_VIEWS):
					with tf.variable_scope('View_%d_Branch' % view):
						block_view, _ = _dense_block(net, num_layers[-1], num_filters, growth_rate, scope='dense_block' + str(num_dense_blocks))
						scaled_block_view = tf.multiply(block_view, views_softmax_split[view], name='scale_view_%d' % view)
						blocks.append(scaled_block_view)

				net = tf.add_n(blocks, 'combine_views')

			# final blocks
			with tf.variable_scope('final_block', [inputs]):
				net = slim.batch_norm(net)
				net = tf.nn.relu(net)
				net = tf.reduce_mean(net, [1, 2], name='global_avg_pool', keep_dims=True)

			net = slim.conv2d(net, 1536, 1, activation_fn=tf.nn.relu, biases_initializer=tf.zeros_initializer(), scope='pre_logits')
			end_points['PreLogits'] = slim.flatten(net, 'pre_logits')

			net = tf.nn.dropout(net, keep_prob=0.8)

			net = slim.conv2d(net, num_classes, 1, biases_initializer=tf.zeros_initializer(), scope='logits')
			net = slim.flatten(net)

			end_points.update(slim.utils.convert_collection_to_dict(end_points_collection))

			if num_classes is not None:
				end_points['predictions'] = slim.softmax(net, scope='predictions')

			return net, end_points
Example #34
0
    def __init__(self,
                 encoders: List[Stateful],
                 vocabulary: Vocabulary,
                 data_id: str,
                 name: str,
                 max_output_len: int,
                 dropout_keep_prob: float = 1.0,
                 rnn_size: int = None,
                 embedding_size: int = None,
                 output_projection: OutputProjectionSpec = None,
                 encoder_projection: EncoderProjection = None,
                 attentions: List[BaseAttention] = None,
                 embeddings_source: EmbeddedSequence = None,
                 attention_on_input: bool = True,
                 rnn_cell: str = "GRU",
                 conditional_gru: bool = False,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        """Create a refactored version of monster decoder.

        Arguments:
            encoders: Input encoders of the decoder
            vocabulary: Target vocabulary
            data_id: Target data series
            name: Name of the decoder. Should be unique accross all Neural
                Monkey objects
            max_output_len: Maximum length of an output sequence
            dropout_keep_prob: Probability of keeping a value during dropout

        Keyword arguments:
            rnn_size: Size of the decoder hidden state, if None set
                according to encoders.
            embedding_size: Size of embedding vectors for target words
            output_projection: How to generate distribution over vocabulary
                from decoder rnn_outputs
            encoder_projection: How to construct initial state from encoders
            attention: The attention object to use. Optional.
            embeddings_source: Embedded sequence to take embeddings from
            rnn_cell: RNN Cell used by the decoder (GRU or LSTM)
            conditional_gru: Flag whether to use the Conditional GRU
                architecture
            attention_on_input: Flag whether attention from previous decoding
                step should be combined with the input in the next step.
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        check_argument_types()

        log("Initializing decoder, name: '{}'".format(name))

        self.encoders = encoders
        self.vocabulary = vocabulary
        self.data_id = data_id
        self.max_output_len = max_output_len
        self.dropout_keep_prob = dropout_keep_prob
        self.embedding_size = embedding_size
        self.rnn_size = rnn_size
        self.output_projection_spec = output_projection
        self.encoder_projection = encoder_projection
        self.attentions = attentions
        self.embeddings_source = embeddings_source
        self._conditional_gru = conditional_gru
        self._attention_on_input = attention_on_input
        self._rnn_cell_str = rnn_cell

        if self.attentions is None:
            self.attentions = []

        if self.embedding_size is None and self.embeddings_source is None:
            raise ValueError("You must specify either embedding size or the "
                             "embedded sequence from which to reuse the "
                             "embeddings (e.g. set either 'embedding_size' or "
                             " 'embeddings_source' parameter)")

        if self.embeddings_source is not None:
            if self.embedding_size is not None:
                warn("Overriding the embedding_size parameter with the"
                     " size of the reused embeddings from the encoder.")

            self.embedding_size = (
                self.embeddings_source.embedding_matrix.get_shape()[1].value)

        if self.encoder_projection is None:
            if not self.encoders:
                log("No encoder - language model only.")
                self.encoder_projection = empty_initial_state
            elif rnn_size is None:
                log("No rnn_size or encoder_projection: Using concatenation of"
                    " encoded states")
                self.encoder_projection = concat_encoder_projection
                self.rnn_size = sum(e.output.get_shape()[1].value
                                    for e in encoders)
            else:
                log("Using linear projection of encoders as the initial state")
                self.encoder_projection = linear_encoder_projection(
                    self.dropout_keep_prob)

        assert self.rnn_size is not None

        if self._rnn_cell_str not in RNN_CELL_TYPES:
            raise ValueError("RNN cell must be a either 'GRU', 'LSTM', or "
                             "'NematusGRU'. Not {}".format(self._rnn_cell_str))

        if self.output_projection_spec is None:
            log("No output projection specified - using tanh projection")
            self.output_projection = nonlinear_output(
                self.rnn_size, tf.tanh)[0]
            self.output_projection_size = self.rnn_size
        elif isinstance(self.output_projection_spec, tuple):
            (self.output_projection,
             self.output_projection_size) = tuple(self.output_projection_spec)
        else:
            self.output_projection = self.output_projection_spec
            self.output_projection_size = self.rnn_size

        if self._attention_on_input:
            self.input_projection = self.input_plus_attention
        else:
            self.input_projection = self.embed_input_symbol

        with self.use_scope():
            with tf.variable_scope("attention_decoder") as self.step_scope:
                pass

        # TODO when it is possible, remove the printing of the cost var
        log("Decoder initalized. Cost var: {}".format(str(self.cost)))
        log("Runtime logits tensor: {}".format(str(self.runtime_logits)))
Example #35
0
        def body(*args) -> LoopState:
            loop_state = LoopState(*args)
            step = loop_state.step

            with tf.variable_scope(self.step_scope):
                # Compute the input to the RNN
                rnn_input = self.input_projection(*loop_state)

                # Run the RNN.
                cell = self._get_rnn_cell()
                if self._rnn_cell_str in ["GRU", "NematusGRU"]:
                    cell_output, next_state = cell(
                        rnn_input, loop_state.prev_rnn_output)

                    attns = [
                        a.attention(cell_output, loop_state.prev_rnn_output,
                                    rnn_input, att_loop_state, loop_state.step)
                        for a, att_loop_state in zip(
                            self.attentions,
                            loop_state.attention_loop_states)]

                    if self.attentions:
                        contexts, att_loop_states = zip(*attns)
                    else:
                        contexts, att_loop_states = [], []

                    if self._conditional_gru:
                        cell_cond = self._get_conditional_gru_cell()
                        cond_input = tf.concat(contexts, -1)
                        cell_output, next_state = cell_cond(
                            cond_input, next_state, scope="cond_gru_2_cell")

                elif self._rnn_cell_str == "LSTM":
                    prev_state = tf.contrib.rnn.LSTMStateTuple(
                        loop_state.prev_rnn_state, loop_state.prev_rnn_output)
                    cell_output, state = cell(rnn_input, prev_state)
                    next_state = state.c
                    attns = [
                        a.attention(cell_output, loop_state.prev_rnn_output,
                                    rnn_input, att_loop_state, loop_state.step)
                        for a, att_loop_state in zip(
                            self.attentions,
                            loop_state.attention_loop_states)]
                    if self.attentions:
                        contexts, att_loop_states = zip(*attns)
                    else:
                        contexts, att_loop_states = [], []
                else:
                    raise ValueError("Unknown RNN cell.")

                with tf.name_scope("rnn_output_projection"):
                    embedded_input = tf.nn.embedding_lookup(
                        self.embedding_matrix, loop_state.input_symbol)

                    output = self.output_projection(
                        cell_output, embedded_input, list(contexts),
                        self.train_mode)

                logits = self._logit_function(output)

            self.step_scope.reuse_variables()

            if sample:
                next_symbols = tf.multinomial(logits, num_samples=1)
            elif train_mode:
                next_symbols = loop_state.train_inputs[step]
            else:
                next_symbols = tf.to_int32(tf.argmax(logits, axis=1))
                int_unfinished_mask = tf.to_int32(
                    tf.logical_not(loop_state.finished))

                # Note this works only when PAD_TOKEN_INDEX is 0. Otherwise
                # this have to be rewritten
                assert PAD_TOKEN_INDEX == 0
                next_symbols = next_symbols * int_unfinished_mask

            has_just_finished = tf.equal(next_symbols, END_TOKEN_INDEX)
            has_finished = tf.logical_or(loop_state.finished,
                                         has_just_finished)

            new_loop_state = LoopState(
                step=step + 1,
                input_symbol=next_symbols,
                train_inputs=loop_state.train_inputs,
                prev_rnn_state=next_state,
                prev_rnn_output=cell_output,
                rnn_outputs=loop_state.rnn_outputs.write(
                    step + 1, cell_output),
                prev_contexts=list(contexts),
                prev_logits=logits,
                logits=loop_state.logits.write(step, logits),
                finished=has_finished,
                mask=loop_state.mask.write(step,
                                           tf.logical_not(has_finished)),
                attention_loop_states=list(att_loop_states))
            return new_loop_state
def train(train_model=True):
    """
    Trains the agent with hyperparameters and other info loaded from mission_control_<game>.py file
    :param train_model: bool, True  -> Trains the agent
                              False -> Loads the LATEST trained agent and plays
    :return: absolutely nothing
    """
    with tf.variable_scope("Action_agent"):
        agent = get_agent(X_input)

    with tf.variable_scope("Target_agent"):
        target_agent = get_agent(X_input)

    loss = tf.losses.mean_squared_error(labels=Y_target, predictions=agent)

    var_list = tf.trainable_variables()
    agent_vars = [t for t in var_list if t.name.startswith("Action_agent")]

    optimizer = tf.train.RMSPropOptimizer(learning_rate=mc.learning_rate,
                                          momentum=mc.momentum,
                                          epsilon=mc.epsilon).minimize(
                                              loss, var_list=agent_vars)

    # Create the summary for tensorboard
    # TODO: Plot the rewards per episode
    tf.summary.scalar(name='loss', tensor=loss)
    tf.summary.scalar(name='max_q_value', tensor=tf.reduce_max(
        agent))  # TODO: Replace this to the op in the paper
    tf.summary.histogram(name='q_values_hist', values=agent)

    # TODO: Plot the length of each episode
    # TODO: Plot the argmax of the action taken for each play

    saver = tf.train.Saver()
    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        if train_model:
            print("Training agent!")
            print("Preparing required directories")

            # Initialize global variables
            sess.run(init)

            # Used to measure time taken
            t1 = time.time()

            # Kinda like the global step, but is not a "Tensor"
            step = 0

            # Get the initial epsilon
            prob_rand = mc.prob_random

            # TODO: Change this ASAP
            # Add epsilon to Tensorboard
            tf.summary.scalar('epsilon', tensor=prob_rand)
            summary_op = tf.summary.merge_all()

            replay_memory = deque()

            if mc.load_trained_model:
                saved_models = os.listdir(mc.logdir)
                latest_saved_model = sorted(saved_models)[-1]
                saver.restore(
                    sess,
                    tf.train.latest_checkpoint(mc.logdir + latest_saved_model +
                                               "/saved_models/"))
                with open(
                        mc.logdir + latest_saved_model +
                        "/saved_models/checkpoint", 'r') as checkout_file:
                    line_1 = checkout_file.readline()
                    step = int(line_1[30:-2])
                tensorboard_dir = mc.logdir + latest_saved_model + "/Tensorboard/"
                saved_model_dir = mc.logdir + latest_saved_model + "/saved_models/"
                log_dir = mc.logdir + latest_saved_model + "/logs/"

                replay_memory = collect_rand_observations(
                    replay_memory, sess, agent)
            else:
                replay_memory = collect_rand_observations(
                    replay_memory)  # Get the initial 50k random observations

            if not mc.load_trained_model:
                tensorboard_dir, saved_model_dir, log_dir = make_directories(
                    mc.logdir)

            print("Tensorboard files stores in: {}".format(tensorboard_dir))
            print("Saved models stored in: {}".format(saved_model_dir))
            print("Log files stores in: {}".format(log_dir))

            # File writer for tensorboard
            writer = tf.summary.FileWriter(logdir=tensorboard_dir,
                                           graph=sess.graph)

            game_rewards = []

            # Save current mission control file
            with open("mission_control_breakout.py", "r") as mc_file:
                mission_control_file = mc_file.read()
                with open(log_dir + "/mission_control.txt", "w") as mc_writer:
                    mc_writer.write(mission_control_file)

            for e in range(mc.n_episodes):
                with open(log_dir + "/log.txt", "a") as log_file:
                    log_file.write(
                        "--------------------------Episode: {}/{}------------------------------\n"
                        .format(e + 1, mc.n_episodes))
                print(
                    "--------------------------Episode: {}/{}------------------------------\n"
                    .format(e + 1, mc.n_episodes))
                # Prepare first observation
                observation = env.reset()
                observation = ops.convert_to_gray_n_resize(observation)
                observation = np.expand_dims(observation, axis=2)
                state = np.repeat(observation, 4, axis=2)
                state = np.expand_dims(state, axis=0)

                # TODO: Only for breakout
                lives_left = 5
                log_q_values = []
                episode_rewards = []
                for t in itertools.count():
                    mini_batch = random.sample(replay_memory, mc.batch_size)

                    agent_input = []
                    agent_target = []
                    for s in range(len(mini_batch)):
                        state_ = mini_batch[s][0]
                        action_ = mini_batch[s][1]
                        reward_ = mini_batch[s][2]
                        next_state_ = mini_batch[s][3]
                        done_ = mini_batch[s][4]
                        life_lost = mini_batch[s][5]

                        agent_input.append(state_[0])
                        target = sess.run(target_agent,
                                          feed_dict={X_input: state_})
                        if done_ or life_lost == 1:
                            target[0, action_] = reward_
                            agent_target.append(target[0])
                        else:
                            agent_output = sess.run(
                                target_agent, feed_dict={X_input: next_state_})
                            target[0, action_] = reward_ + mc.gamma * (
                                np.amax(agent_output))
                            agent_target.append(target[0])

                    # Training the agent for 1 iterations. Finally!!
                    for i in range(mc.fit_epochs):
                        sess.run(optimizer,
                                 feed_dict={
                                     X_input: agent_input,
                                     Y_target: agent_target
                                 })

                    # Copy trained parameters from the agent to the target network
                    if (step + 1) % mc.target_network_update == 0:
                        copy_parameters(sess)

                    l, summary = sess.run([loss, summary_op],
                                          feed_dict={
                                              X_input: agent_input,
                                              Y_target: agent_target
                                          })
                    writer.add_summary(summary, global_step=step)

                    print("\rStep: {} ({}), Episode: {}/{}, Loss: {}".format(
                        t, step, e + 1, mc.n_episodes, l),
                          end="")
                    sys.stdout.flush()

                    # Collect the next observation
                    if np.random.rand() < prob_rand:
                        action = env.action_space.sample()
                    else:
                        q_prediction = sess.run(agent,
                                                feed_dict={X_input: state})
                        action = np.argmax(q_prediction)
                        log_q_values.extend(q_prediction)
                    next_state, reward, done, info = env.step(action)
                    next_state = ops.convert_to_gray_n_resize(next_state)
                    next_state = np.expand_dims(next_state, axis=2)
                    next_state = np.expand_dims(next_state, axis=0)
                    next_states = np.append(next_state,
                                            state[:, :, :, :3],
                                            axis=3)

                    life_lost = 0
                    if lives_left - info['ale.lives'] > 0:
                        life_lost = 1
                        lives_left -= 1

                    # Remove old samples from replay memory if it's full
                    if len(replay_memory) > mc.observation_time:
                        replay_memory.popleft()

                    replay_memory.append(
                        (state, action, reward, next_states, done, life_lost))
                    state = next_states
                    episode_rewards.append(reward)
                    step += 1

                    if (step + 1) % 10000 == 0:
                        # Save the agent
                        saved_path = saver.save(sess,
                                                saved_model_dir + '/model',
                                                global_step=step)

                    prob_rand = anneal_epsilon(step)

                    if mc.show_ui:
                        env.render()

                    if done:
                        break

                with open(log_dir + "/log.txt", "a") as log_file:
                    log_file.write(
                        "Step: {} ({}), Play: {}/{}, Loss: {}\n".format(
                            t, step, e + 1, mc.n_episodes, l))
                    log_file.write("Reward Obtained: {}\n".format(
                        np.sum(episode_rewards)))
                    game_rewards.append(np.sum(episode_rewards))
                    x_val = np.arange(e + 1)
                    plt.plot(x_val, game_rewards)
                    plt.xlabel("Episode")
                    plt.ylabel("Reward Obtained")
                    plt.savefig("{}/Rewards.png".format(log_dir))
                    plt.close()

                    if log_q_values != []:
                        log_file.write("Average Q Value: {}\n".format(
                            np.mean(log_q_values)))
                    else:
                        log_file.write("All of the actions were random\n")

                print("\nReward Obtained: {}".format(np.sum(episode_rewards)))

                if log_q_values != []:
                    print("Average Q Value: {}".format(np.mean(log_q_values)))
                else:
                    print("All of the actions were random")

            print("Time taken of {} Plays on your potato: {:.4f}s".format(
                mc.n_episodes,
                time.time() - t1))
            print("Average time for each Play: {:.4f}s".format(
                (time.time() - t1) / mc.n_episodes))
            print("Tensorboard files saved in: {}".format(tensorboard_dir))
            print("Model saved in: {}".format(saved_path))
            print(
                "Model parameters stored in: {}".format(log_dir +
                                                        "mission_control.txt"))
            print("Agent get to roll!")
            with open(log_dir + "/log.txt", "a") as log_file:
                log_file.write(
                    "Time taken of {} episodes on your potato: {:.4f}s\n".
                    format(mc.n_episodes,
                           time.time() - t1))
                log_file.write(
                    "Average time for each episode: {:.4f}s\n".format(
                        (time.time() - t1) / mc.n_episodes))
        else:
            # Get the latest trained model
            saved_models = os.listdir(mc.logdir)
            latest_saved_model = sorted(saved_models)[-1]
            saver.restore(
                sess,
                tf.train.latest_checkpoint(mc.logdir + latest_saved_model +
                                           "/saved_models/"))
            print("Getting model from: {}".format(mc.logdir +
                                                  latest_saved_model +
                                                  "/saved_models/"))
            print(
                "------------------------Playing----------------------------")
            play(sess=sess,
                 agent=agent,
                 no_plays=mc.n_episodes,
                 log_dir=None,
                 show_ui=mc.show_ui,
                 show_action=mc.show_action)
Example #37
0
def multihead_attention(queries,
                        memories,
                        bias,
                        num_heads,
                        key_size,
                        value_size,
                        output_size,
                        keep_prob=None,
                        output=True,
                        dtype=None,
                        scope=None):
    """ Multi-head scaled-dot-product attention with input/output
        transformations.

    :param queries: A tensor with shape [batch, length_q, depth_q] if
    :param memories: A tensor with shape [batch, length_m, depth_m]
    :param bias: A tensor (see attention_bias)
    :param num_heads: An integer dividing key_size and value_size
    :param key_size: An integer
    :param value_size: An integer
    :param output_size: An integer
    :param keep_prob: A floating point number in (0, 1]
    :param output: Whether to use output transformation
    :param dtype: An optional instance of tf.DType
    :param scope: An optional string

    :returns: A dict with the following keys:
        weights: A tensor with shape [batch, length_q]
        outputs: A tensor with shape [batch, length_q, depth_v]
    """

    if key_size % num_heads != 0:
        raise ValueError("Key size (%d) must be divisible by the number of "
                         "attention heads (%d)." % (key_size, num_heads))

    if value_size % num_heads != 0:
        raise ValueError("Value size (%d) must be divisible by the number of "
                         "attention heads (%d)." % (value_size, num_heads))

    with tf.variable_scope(scope,
                           default_name="multihead_attention",
                           values=[queries, memories],
                           dtype=dtype):
        if memories is None:
            # self attention
            size = key_size * 2 + value_size
            combined = linear(queries, size, True, True, scope="qkv_transform")
            q, k, v = tf.split(combined, [key_size, key_size, value_size],
                               axis=-1)
        else:
            q = linear(queries, key_size, True, True, scope="q_transform")
            combined = linear(memories,
                              key_size + value_size,
                              True,
                              scope="kv_transform")
            k, v = tf.split(combined, [key_size, value_size], axis=-1)

        # split heads
        q = split_heads(q, num_heads)
        k = split_heads(k, num_heads)
        v = split_heads(v, num_heads)

        # scale query
        key_depth_per_head = key_size // num_heads
        q *= key_depth_per_head**-0.5

        # attention
        results = multiplicative_attention(q, k, v, bias, keep_prob)

        # combine heads
        weights = results["weights"]
        x = combine_heads(results["outputs"])

        if output:
            outputs = linear(x,
                             output_size,
                             True,
                             True,
                             scope="output_transform")
        else:
            outputs = x

        return {"weights": weights, "outputs": outputs}
Example #38
0
    def _dual_pointer_decoder(self, decoder_input, decoder_init_state,
                              decoder_hidden, pointing_memory):
        '''
        듀얼 포인터 네트워크 디코더 및 train operate layer
        :param decoder_input: 디코더 입력
        :param decoder_init_state: 디코더 초기 상태 값, 인코더 최종 state 사용
        :param decoder_hidden: 디코더 은닉층 사이즈
        :param pointing_memory: 디코더에서 포인팅 할 타겟
        :return:
        '''
        with tf.variable_scope("decoder_v3"):
            init_state = decoder_init_state

            with tf.variable_scope("object_cell_define"):
                object_decoder_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
                    decoder_hidden, dropout_keep_prob=self.keep_pob)
                object_cell_pre_state = init_state

            with tf.variable_scope("subject_cell_define"):
                subject_decoder_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
                    decoder_hidden, dropout_keep_prob=self.keep_pob)
                subject_cell_pre_state = init_state

            with tf.variable_scope("decoder_input_layer"):
                decoder_input_per_step = tf.unstack(decoder_input, axis=1)

            with tf.variable_scope("decoding_triple", reuse=tf.AUTO_REUSE):
                # 듀얼 포인팅 부분
                object_logits = []
                relation_logits = []
                subject_logits = []
                rev_relation_logits = []

                for i in range(self.max_entities):
                    input = decoder_input_per_step[i]
                    object_deocder_output, object_state = object_decoder_cell(
                        input, object_cell_pre_state)
                    subject_decoder_output, subject_state = subject_decoder_cell(
                        input, subject_cell_pre_state)

                    object_deocder_output = tf.expand_dims(
                        object_deocder_output, axis=1)
                    subject_decoder_output = tf.expand_dims(
                        subject_decoder_output, axis=1)

                    # 포인팅은 multi-head attention 기반으로 수행
                    relation_output, object_pointing = self._multi_head_attention(
                        key=pointing_memory,
                        query=object_deocder_output,
                        value=pointing_memory,
                        attention_name="object_pointing")
                    rev_output, subject_pointing = self._multi_head_attention(
                        key=pointing_memory,
                        query=subject_decoder_output,
                        value=pointing_memory,
                        attention_name="subject_pointing")
                    object_pointing = tf.squeeze(object_pointing, axis=1)
                    subject_pointing = tf.squeeze(subject_pointing, axis=1)
                    relation_output = tf.squeeze(relation_output, axis=1)
                    rev_output = tf.squeeze(rev_output, axis=1)

                    relation_logit = tf.layers.dense(
                        relation_output,
                        units=self.relation_vocab_size,
                        activation=tf.nn.leaky_relu,
                        name="relation_label")
                    rev_relation_logit = tf.layers.dense(
                        rev_output,
                        units=self.relation_vocab_size,
                        activation=tf.nn.leaky_relu,
                        name="rev_relation_label")
                    object_logits.append(object_pointing)
                    relation_logits.append(relation_logit)
                    subject_logits.append(subject_pointing)
                    rev_relation_logits.append(rev_relation_logit)

                    object_cell_pre_state = object_state
                    subject_cell_pre_state = subject_state

                object_logits = tf.stack(object_logits, axis=1)
                relation_logits = tf.stack(relation_logits, axis=1)
                subject_logits = tf.stack(subject_logits, axis=1)
                rev_relation_logits = tf.stack(rev_relation_logits, axis=1)

                self.object_predicts = tf.argmax(object_logits, axis=-1)
                self.relation_predicts = tf.argmax(relation_logits, axis=-1)
                self.subject_predicts = tf.argmax(subject_logits, axis=-1)
                self.rev_relation_predicts = tf.argmax(rev_relation_logits,
                                                       axis=-1)

            with tf.variable_scope("training_layer"):
                # train operate 부분
                self.object_loss = tf.losses.sparse_softmax_cross_entropy(
                    logits=object_logits,
                    labels=self.object_target,
                    weights=self.relation_weight)
                self.re_loss = tf.losses.sparse_softmax_cross_entropy(
                    logits=relation_logits,
                    labels=self.relation_target,
                    weights=self.relation_weight)
                self.subject_loss = tf.losses.sparse_softmax_cross_entropy(
                    logits=subject_logits,
                    labels=self.subject_target,
                    weights=self.rev_relation_weight)
                self.rev_re_loss = tf.losses.sparse_softmax_cross_entropy(
                    logits=rev_relation_logits,
                    labels=self.rev_relation_target,
                    weights=self.rev_relation_weight)

                self.object_loss = tf.reduce_mean(self.object_loss)
                self.re_loss = tf.reduce_mean(self.re_loss)
                self.subject_loss = tf.reduce_mean(self.subject_loss)
                self.rev_re_loss = tf.reduce_mean(self.rev_re_loss)

                self.loss = (0.4 *
                             self.object_loss) + (0.4 * self.subject_loss) + (
                                 0.1 * self.re_loss) + (0.1 * self.rev_re_loss)

                # Adam optimizer 및 EMA 사용, 학습 parameter tuning
                _optimizer = tf.train.AdamOptimizer(
                    learning_rate=self.learning_rate)
                self._gradients = _optimizer.compute_gradients(self.loss)
                # for g in self._gradients:
                #     print(g)
                _apply_op = _optimizer.apply_gradients(
                    self._gradients, global_step=self.global_step)
                _ema = tf.train.ExponentialMovingAverage(decay=0.9999)

                with tf.control_dependencies([_apply_op]):
                    _ema_op = _ema.apply(
                        tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
                    self.train_op = tf.group(_ema_op)

                self.saver = tf.train.Saver(tf.global_variables(),
                                            max_to_keep=10)
    def _transformer_layer(self,
                           inputs,
                           decoder_inputs,
                           drop_rate,
                           is_training,
                           scope='Transformer_body',
                           reuse=tf.AUTO_REUSE):

        with tf.variable_scope(name_or_scope=scope, reuse=reuse):
            with tf.name_scope('ENCODER'):
                # Input Embedding + Positional Encoding
                input_embedding = embedding(ids=inputs,
                                            vocab_size=len(
                                                self.input_int2vocab),
                                            embed_dim=self._num_units,
                                            zeropad=True,
                                            pos=True,
                                            scope='enc_embedding',
                                            reuse=False)

                input_embedding = tf.layers.dropout(inputs=input_embedding,
                                                    rate=drop_rate,
                                                    training=is_training)

                # Encoder Blocks
                for i in range(1, self._num_blocks + 1):
                    input_embedding = encoding_sublayer(
                        input_embedding=input_embedding,
                        num_units=self._num_units,
                        num_heads=self._num_heads,
                        drop_rate=drop_rate,
                        is_training=is_training,
                        scope='enc_block_{}'.format(i),
                        reuse=False)
            with tf.name_scope('DECODER'):
                output_embedding = embedding(ids=decoder_inputs,
                                             vocab_size=len(
                                                 self.target_int2vocab),
                                             embed_dim=self._num_units,
                                             zeropad=True,
                                             pos=True,
                                             scope='dec_embedding',
                                             reuse=False)

                output_embedding = tf.layers.dropout(inputs=output_embedding,
                                                     rate=drop_rate,
                                                     training=is_training)

                # Decoding Blocks
                for i in range(1, self._num_blocks + 1):
                    output_embedding = decoding_sublayer(
                        output_embedding=output_embedding,
                        input_embedding=input_embedding,
                        num_units=self._num_units,
                        num_heads=self._num_heads,
                        drop_rate=drop_rate,
                        is_training=is_training,
                        scope='dec_block_{}'.format(i),
                        reuse=False)

            # Final linear projection
            with tf.name_scope('FINAL_DENSE'):
                logits = tf.layers.dense(inputs=output_embedding,
                                         units=len(self.target_int2vocab))

        return logits
image.set_shape([None, None, None, 3])
for y in y_true:
    y.set_shape([None, None, None, None, None])

##################
# Model definition
##################
# yolo_model = yolov3(args.class_num, args.anchors, args.use_label_smooth, args.use_focal_loss, args.batch_norm_decay, args.weight_decay)
# with tf.variable_scope('yolov3'):
#     pred_feature_maps = yolo_model.forward(image, is_training=is_training)
# loss = yolo_model.compute_loss(pred_feature_maps, y_true)
# y_pred = yolo_model.predict(pred_feature_maps)

yolo_model = sliming_yolov3(args.class_num, args.anchors, args.use_label_smooth, args.use_focal_loss, args.batch_norm_decay, args.weight_decay)
############################## first prune #################################################################################
with tf.variable_scope('yolov3'):
    pred_feature_maps = yolo_model.forward_include_res_with_prune_factor(image, prune_factor=0.8, is_training=is_training)
############################################################################################################################

# ############################## second prune #################################################################################
# with tf.variable_scope('yolov3'):
#     pred_feature_maps = yolo_model.forward_include_res_with_prune_factor(image, prune_factor=0.8, is_training=is_training, prune_cnt=2)
# ############################################################################################################################

# ############################## third prune #################################################################################
# with tf.variable_scope('yolov3'):
#     pred_feature_maps = yolo_model.forward_include_res_with_prune_factor(image, prune_factor=0.8, is_training=is_training, prune_cnt=3)
# ############################################################################################################################

############################## fourth prune #################################################################################
# with tf.variable_scope('yolov3'):
Example #41
0
def osvos(inputs, scope='osvos'):
	"""Defines the OSVOS network
	Args:
	inputs: Tensorflow placeholder that contains the input image
	scope: Scope name for the network
	Returns:
	net: Output Tensor of the network
	end_points: Dictionary with all Tensors of the network
	"""
	im_size = tf.shape(inputs)

	with tf.variable_scope(scope, 'osvos', [inputs]) as sc:
		end_points_collection = sc.name + '_end_points'
		# Collect outputs of all intermediate layers.
		with slim.arg_scope([slim.conv2d, slim.max_pool2d],
							padding='SAME',
							outputs_collections=end_points_collection):
			net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
			net = slim.max_pool2d(net, [2, 2], scope='pool1')
			net_2 = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
			net = slim.max_pool2d(net_2, [2, 2], scope='pool2')
			net_3 = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
			net = slim.max_pool2d(net_3, [2, 2], scope='pool3')
			net_4 = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
			net = slim.max_pool2d(net_4, [2, 2], scope='pool4')
			net_5 = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')

			# Get side outputs of the network
			with slim.arg_scope([slim.conv2d],
								activation_fn=None):
				side_2 = slim.conv2d(net_2, 16, [3, 3], scope='conv2_2_16')
				side_3 = slim.conv2d(net_3, 16, [3, 3], scope='conv3_3_16')
				side_4 = slim.conv2d(net_4, 16, [3, 3], scope='conv4_3_16')
				side_5 = slim.conv2d(net_5, 16, [3, 3], scope='conv5_3_16')

				# Supervise side outputs
				side_2_s = slim.conv2d(side_2, 1, [1, 1], scope='score-dsn_2')
				side_3_s = slim.conv2d(side_3, 1, [1, 1], scope='score-dsn_3')
				side_4_s = slim.conv2d(side_4, 1, [1, 1], scope='score-dsn_4')
				side_5_s = slim.conv2d(side_5, 1, [1, 1], scope='score-dsn_5')
				with slim.arg_scope([slim.convolution2d_transpose],
									activation_fn=None, biases_initializer=None, padding='VALID',
									outputs_collections=end_points_collection, trainable=False):
					# Side outputs
					side_2_s = slim.convolution2d_transpose(side_2_s, 1, 4, 2, scope='score-dsn_2-up')
					side_2_s = crop_features(side_2_s, im_size)
					utils.collect_named_outputs(end_points_collection, 'osvos/score-dsn_2-cr', side_2_s)
					side_3_s = slim.convolution2d_transpose(side_3_s, 1, 8, 4, scope='score-dsn_3-up')
					side_3_s = crop_features(side_3_s, im_size)
					utils.collect_named_outputs(end_points_collection, 'osvos/score-dsn_3-cr', side_3_s)
					side_4_s = slim.convolution2d_transpose(side_4_s, 1, 16, 8, scope='score-dsn_4-up')
					side_4_s = crop_features(side_4_s, im_size)
					utils.collect_named_outputs(end_points_collection, 'osvos/score-dsn_4-cr', side_4_s)
					side_5_s = slim.convolution2d_transpose(side_5_s, 1, 32, 16, scope='score-dsn_5-up')
					side_5_s = crop_features(side_5_s, im_size)
					utils.collect_named_outputs(end_points_collection, 'osvos/score-dsn_5-cr', side_5_s)

					# Main output
					side_2_f = slim.convolution2d_transpose(side_2, 16, 4, 2, scope='score-multi2-up')
					side_2_f = crop_features(side_2_f, im_size)
					utils.collect_named_outputs(end_points_collection, 'osvos/side-multi2-cr', side_2_f)
					side_3_f = slim.convolution2d_transpose(side_3, 16, 8, 4, scope='score-multi3-up')
					side_3_f = crop_features(side_3_f, im_size)
					utils.collect_named_outputs(end_points_collection, 'osvos/side-multi3-cr', side_3_f)
					side_4_f = slim.convolution2d_transpose(side_4, 16, 16, 8, scope='score-multi4-up')
					side_4_f = crop_features(side_4_f, im_size)
					utils.collect_named_outputs(end_points_collection, 'osvos/side-multi4-cr', side_4_f)
					side_5_f = slim.convolution2d_transpose(side_5, 16, 32, 16, scope='score-multi5-up')
					side_5_f = crop_features(side_5_f, im_size)
					utils.collect_named_outputs(end_points_collection, 'osvos/side-multi5-cr', side_5_f)
				concat_side = tf.concat([side_2_f, side_3_f, side_4_f, side_5_f], axis=3)

				net = slim.conv2d(concat_side, 1, [1, 1], scope='upscore-fuse')

		end_points = slim.utils.convert_collection_to_dict(end_points_collection)
		return net, end_points
Example #42
0
    def _multi_head_attention(self,
                              key,
                              query,
                              value,
                              attention_name,
                              num_heads=8,
                              head_size=32,
                              intermediate_size=512,
                              return_type="concat"):
        '''
        multi-head attention
        :param key: key
        :param query: query
        :param value: value, self attention 일 시 key, query, value 다 같은 값
        :param attention_name: scope name
        :param num_heads: head 개수
        :param head_size: head size, 분할 후 차원 수
        :param intermediate_size: 마지막 FFN layer
        :param return_type: 어떤 식으로 결과를 낼 것인지 결정
        :return:
        '''
        with tf.variable_scope(name_or_scope=attention_name):
            _query = tf.layers.dense(query,
                                     units=num_heads * head_size,
                                     activation=tf.nn.leaky_relu,
                                     name="query")
            _key = tf.layers.dense(key,
                                   units=num_heads * head_size,
                                   activation=tf.nn.leaky_relu,
                                   name="key")
            _value = tf.layers.dense(value,
                                     units=num_heads * head_size,
                                     activation=tf.nn.leaky_relu,
                                     name="value")

            _query_split = tf.split(_query, num_heads, axis=-1)
            _key_split = tf.split(_key, num_heads, axis=-1)
            _value_split = tf.split(_value, num_heads, axis=-1)

            _query_split = [
                tf.layers.dense(q, head_size, activation=tf.nn.leaky_relu)
                for q in _query_split
            ]
            _key_split = [
                tf.layers.dense(k, head_size, activation=tf.nn.leaky_relu)
                for k in _key_split
            ]
            _value_split = [
                tf.layers.dense(v, head_size, activation=tf.nn.leaky_relu)
                for v in _value_split
            ]

            _query_concat = tf.concat(_query_split, axis=0)
            _key_concat = tf.concat(_key_split, axis=0)
            _value_concat = tf.concat(_value_split, axis=0)

            _matmul_query_key = tf.matmul(_query_concat,
                                          _key_concat,
                                          transpose_b=True)
            _scale_align = _matmul_query_key / (head_size**0.5)
            _softmax_align = tf.nn.softmax(_scale_align, -1)

            _output = tf.matmul(_softmax_align, _value_concat)

            # query_step * key_step
            multi_head_align = tf.add_n(
                tf.split(_scale_align, num_heads, axis=0))
            multi_head_output = tf.concat(tf.split(_output, num_heads, axis=0),
                                          axis=2)

            # multi_head_output = tf.layers.dense(multi_head_output, intermediate_size, activation=tf.nn.leaky_relu,
            #                                     name="mh_out")
            # query = tf.layers.dense(query, intermediate_size, activation=tf.nn.leaky_relu, name="query_intermediate")

            if return_type == "concat":
                residual_output = tf.concat([multi_head_output, query],
                                            axis=-1)
            elif return_type == "dense":
                residual_output = tf.concat([multi_head_output, query],
                                            axis=-1)
                residual_output = tf.layers.dense(residual_output,
                                                  intermediate_size,
                                                  activation=tf.nn.leaky_relu,
                                                  name="mh_output")
            elif return_type == "residual":
                residual_output = multi_head_output + query
            else:
                residual_output = multi_head_output

        return residual_output, multi_head_align
Example #43
0
def xception(inputs,
             blocks,
             num_classes=None,
             is_training=True,
             global_pool=True,
             keep_prob=0.5,
             output_stride=None,
             reuse=None,
             scope=None):
  """Generator for Xception models.

  This function generates a family of Xception models. See the xception_*()
  methods for specific model instantiations, obtained by selecting different
  block instantiations that produce Xception of various depths.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels]. Must be
      floating point. If a pretrained checkpoint is used, pixel values should be
      the same as during training (see go/slim-classification-models for
      specifics).
    blocks: A list of length equal to the number of Xception blocks. Each
      element is an Xception Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks.
      If 0 or None, we return the features before the logit layer.
    is_training: whether batch_norm layers are in training mode.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    keep_prob: Keep probability used in the pre-logits dropout layer.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is 0 or None,
      then net is the output of the last Xception block, potentially after
      global average pooling. If num_classes is a non-zero integer, net contains
      the pre-softmax activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
  with tf.variable_scope(
      scope, 'xception', [inputs], reuse=reuse) as sc:
    end_points_collection = sc.original_name_scope + 'end_points'
    with slim.arg_scope([slim.conv2d,
                         slim.separable_conv2d,
                         xception_module,
                         stack_blocks_dense],
                        outputs_collections=end_points_collection):
      with slim.arg_scope([slim.batch_norm], is_training=is_training):
        net = inputs
        if output_stride is not None:
          if output_stride % 2 != 0:
            raise ValueError('The output_stride needs to be a multiple of 2.')
          output_stride /= 2
        # Root block function operated on inputs.
        net = resnet_utils.conv2d_same(net, 32, 3, stride=2,
                                       scope='entry_flow/conv1_1')
        net = resnet_utils.conv2d_same(net, 64, 3, stride=1,
                                       scope='entry_flow/conv1_2')

        # Extract features for entry_flow, middle_flow, and exit_flow.
        net = stack_blocks_dense(net, blocks, output_stride)

        # Convert end_points_collection into a dictionary of end_points.
        end_points = slim.utils.convert_collection_to_dict(
            end_points_collection, clear_collection=True)

        if global_pool:
          # Global average pooling.
          net = tf.reduce_mean(net, [1, 2], name='global_pool', keepdims=True)
          end_points['global_pool'] = net
        if num_classes:
          net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training,
                             scope='prelogits_dropout')
          net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                            normalizer_fn=None, scope='logits')
          end_points[sc.name + '/logits'] = net
          end_points['predictions'] = slim.softmax(net, scope='predictions')
        return net, end_points
def bottleneck_unit(x, out_chan1, out_chan2, down_stride=False, up_stride=False, name=None):
    """
    Modified implementation from github ry?!
    """

    def conv_transpose(tensor, out_channel, shape, strides, name=None):
        out_shape = tensor.get_shape().as_list()
        in_channel = out_shape[-1]
        kernel = weight_variable([shape, shape, out_channel, in_channel], name=name)
        shape[-1] = out_channel
        return tf.nn.conv2d_transpose(x, kernel, output_shape=out_shape, strides=[1, strides, strides, 1],
                                      padding='SAME', name='conv_transpose')

    def conv(tensor, out_chans, shape, strides, name=None):
        in_channel = tensor.get_shape().as_list()[-1]
        kernel = weight_variable([shape, shape, in_channel, out_chans], name=name)
        return tf.nn.conv2d(x, kernel, strides=[1, strides, strides, 1], padding='SAME', name='conv')

    def bn(tensor, name=None):
        """
        :param tensor: 4D tensor input
        :param name: name of the operation
        :return: local response normalized tensor - not using batch normalization :(
        """
        return tf.nn.lrn(tensor, depth_radius=5, bias=2, alpha=1e-4, beta=0.75, name=name)

    in_chans = x.get_shape().as_list()[3]

    if down_stride or up_stride:
        first_stride = 2
    else:
        first_stride = 1

    with tf.variable_scope('res%s' % name):
        if in_chans == out_chan2:
            b1 = x
        else:
            with tf.variable_scope('branch1'):
                if up_stride:
                    b1 = conv_transpose(x, out_chans=out_chan2, shape=1, strides=first_stride,
                                        name='res%s_branch1' % name)
                else:
                    b1 = conv(x, out_chans=out_chan2, shape=1, strides=first_stride, name='res%s_branch1' % name)
                b1 = bn(b1, 'bn%s_branch1' % name, 'scale%s_branch1' % name)

        with tf.variable_scope('branch2a'):
            if up_stride:
                b2 = conv_transpose(x, out_chans=out_chan1, shape=1, strides=first_stride, name='res%s_branch2a' % name)
            else:
                b2 = conv(x, out_chans=out_chan1, shape=1, strides=first_stride, name='res%s_branch2a' % name)
            b2 = bn(b2, 'bn%s_branch2a' % name, 'scale%s_branch2a' % name)
            b2 = tf.nn.relu(b2, name='relu')

        with tf.variable_scope('branch2b'):
            b2 = conv(b2, out_chans=out_chan1, shape=3, strides=1, name='res%s_branch2b' % name)
            b2 = bn(b2, 'bn%s_branch2b' % name, 'scale%s_branch2b' % name)
            b2 = tf.nn.relu(b2, name='relu')

        with tf.variable_scope('branch2c'):
            b2 = conv(b2, out_chans=out_chan2, shape=1, strides=1, name='res%s_branch2c' % name)
            b2 = bn(b2, 'bn%s_branch2c' % name, 'scale%s_branch2c' % name)

        x = b1 + b2
        return tf.nn.relu(x, name='relu')
Example #45
0
def xception_module(inputs,
                    depth_list,
                    skip_connection_type,
                    stride,
                    unit_rate_list=None,
                    rate=1,
                    activation_fn_in_separable_conv=False,
                    regularize_depthwise=False,
                    outputs_collections=None,
                    scope=None):
  """An Xception module.

  The output of one Xception module is equal to the sum of `residual` and
  `shortcut`, where `residual` is the feature computed by three separable
  convolution. The `shortcut` is the feature computed by 1x1 convolution with
  or without striding. In some cases, the `shortcut` path could be a simple
  identity function or none (i.e, no shortcut).

  Note that we replace the max pooling operations in the Xception module with
  another separable convolution with striding, since atrous rate is not properly
  supported in current TensorFlow max pooling implementation.

  Args:
    inputs: A tensor of size [batch, height, width, channels].
    depth_list: A list of three integers specifying the depth values of one
      Xception module.
    skip_connection_type: Skip connection type for the residual path. Only
      supports 'conv', 'sum', or 'none'.
    stride: The block unit's stride. Determines the amount of downsampling of
      the units output compared to its input.
    unit_rate_list: A list of three integers, determining the unit rate for
      each separable convolution in the xception module.
    rate: An integer, rate for atrous convolution.
    activation_fn_in_separable_conv: Includes activation function in the
      separable convolution or not.
    regularize_depthwise: Whether or not apply L2-norm regularization on the
      depthwise convolution weights.
    outputs_collections: Collection to add the Xception unit output.
    scope: Optional variable_scope.

  Returns:
    The Xception module's output.

  Raises:
    ValueError: If depth_list and unit_rate_list do not contain three elements,
      or if stride != 1 for the third separable convolution operation in the
      residual path, or unsupported skip connection type.
  """
  if len(depth_list) != 3:
    raise ValueError('Expect three elements in depth_list.')
  if unit_rate_list:
    if len(unit_rate_list) != 3:
      raise ValueError('Expect three elements in unit_rate_list.')

  with tf.variable_scope(scope, 'xception_module', [inputs]) as sc:
    residual = inputs

    def _separable_conv(features, depth, kernel_size, depth_multiplier,
                        regularize_depthwise, rate, stride, scope):
      if activation_fn_in_separable_conv:
        activation_fn = tf.nn.relu
      else:
        activation_fn = None
        features = tf.nn.relu(features)
      return separable_conv2d_same(features,
                                   depth,
                                   kernel_size,
                                   depth_multiplier=depth_multiplier,
                                   stride=stride,
                                   rate=rate,
                                   activation_fn=activation_fn,
                                   regularize_depthwise=regularize_depthwise,
                                   scope=scope)
    for i in range(3):
      residual = _separable_conv(residual,
                                 depth_list[i],
                                 kernel_size=3,
                                 depth_multiplier=1,
                                 regularize_depthwise=regularize_depthwise,
                                 rate=rate*unit_rate_list[i],
                                 stride=stride if i == 2 else 1,
                                 scope='separable_conv' + str(i+1))
    if skip_connection_type == 'conv':
      shortcut = slim.conv2d(inputs,
                             depth_list[-1],
                             [1, 1],
                             stride=stride,
                             activation_fn=None,
                             scope='shortcut')
      print("xception_module[residual]",residual)
      print("xception_module[shortcut]",shortcut)
      outputs = residual + shortcut
    elif skip_connection_type == 'sum':
      outputs = residual + inputs
    elif skip_connection_type == 'none':
      outputs = residual
    else:
      raise ValueError('Unsupported skip connection type.')

    return slim.utils.collect_named_outputs(outputs_collections,
                                            sc.name,
                                            outputs)
def create_model(
    bert_config,
    is_training,
    input_ids,
    input_mask,
    segment_ids,
    labels,
    num_labels,
    use_one_hot_embeddings,
):
    """Creates a classification model."""
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings,
    )

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    output_layer = model.get_pooled_output()

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights",
        [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02),
    )

    output_bias = tf.get_variable(
        "output_bias", [num_labels], initializer=tf.zeros_initializer()
    )

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)

        # probabilities = tf.nn.softmax(logits, axis=-1) ### multiclass case
        probabilities = tf.nn.sigmoid(logits)  # multi-label case

        labels = tf.cast(labels, tf.float32)
        tf.logging.info(
            "num_labels:{};logits:{};labels:{}".format(num_labels, logits, labels)
        )
        per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=labels, logits=logits
        )
        loss = tf.reduce_mean(per_example_loss)

        # probabilities = tf.nn.softmax(logits, axis=-1)
        # log_probs = tf.nn.log_softmax(logits, axis=-1)
        #
        # one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
        #
        # per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        # loss = tf.reduce_mean(per_example_loss)

        return (loss, per_example_loss, logits, probabilities)
Example #47
0
    def build_decoder(self):
        with tf.variable_scope("decode"):
            for layer in range(self.num_layers):
                with tf.variable_scope('decoder_{}'.format(layer + 1)):
                    dec_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
                        2 * self.lstm_hidden_units)
                    dec_cell = tf.contrib.rnn.DropoutWrapper(
                        dec_cell, input_keep_prob=self.keep_prob)

            self.output_layer = Dense(self.decoder_vocab_size)

            attn_mech = attention_wrapper.LuongAttention(
                2 * self.lstm_hidden_units,
                self.enc_outputs,
                memory_sequence_length=self.source_sentence_length)

            attn_cell = attention_wrapper.AttentionWrapper(
                dec_cell, attn_mech, self.attention_temperature,
                self.use_hmean, self.lstm_hidden_units)

            self.init_state = attn_cell.zero_state(self.batch_size, tf.float32)

            with tf.name_scope("training_decoder"):
                training_helper = tf.contrib.seq2seq.TrainingHelper(
                    inputs=self.dec_embed_input,
                    sequence_length=self.target_sentence_length,
                    time_major=False)

                training_decoder = basic_decoder.BasicDecoder(
                    attn_cell,
                    training_helper,
                    initial_state=self.init_state,
                    latent_vector=self.z_vector,
                    output_layer=self.output_layer)

                self.training_logits, _state, _len, self.c_kl_batch_train = decoder.dynamic_decode(
                    training_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=self.decoder_num_tokens)

                self.training_logits = tf.identity(
                    self.training_logits.rnn_output, 'logits')

            with tf.name_scope("inference_decoder"):
                start_token = self.decoder_word_index['GO']
                end_token = self.decoder_word_index['EOS']

                start_tokens = tf.tile(tf.constant([start_token],
                                                   dtype=tf.int32),
                                       [self.batch_size],
                                       name='start_tokens')

                inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                    self.decoder_embeddings, start_tokens, end_token)

                inference_decoder = basic_decoder.BasicDecoder(
                    attn_cell,
                    inference_helper,
                    initial_state=self.init_state,
                    latent_vector=self.z_vector,
                    output_layer=self.output_layer)

                self.inference_logits, _state, _len, self.c_kl_batch_inf = decoder.dynamic_decode(
                    inference_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=self.decoder_num_tokens)

                self.inference_logits = tf.identity(
                    self.inference_logits.sample_id, name='predictions')

                self.c_kl_batch_train = tf.div(
                    self.c_kl_batch_train,
                    tf.cast(self.target_sentence_length, dtype=tf.float32
                            ))  # Divide by respective target seq lengths
Example #48
0
def stack_blocks_dense(net,
                       blocks,
                       output_stride=None,
                       outputs_collections=None):
  """Stacks Xception blocks and controls output feature density.

  First, this function creates scopes for the Xception in the form of
  'block_name/unit_1', 'block_name/unit_2', etc.

  Second, this function allows the user to explicitly control the output
  stride, which is the ratio of the input to output spatial resolution. This
  is useful for dense prediction tasks such as semantic segmentation or
  object detection.

  Control of the output feature density is implemented by atrous convolution.

  Args:
    net: A tensor of size [batch, height, width, channels].
    blocks: A list of length equal to the number of Xception blocks. Each
      element is an Xception Block object describing the units in the block.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution, which needs to be equal to
      the product of unit strides from the start up to some level of Xception.
      For example, if the Xception employs units with strides 1, 2, 1, 3, 4, 1,
      then valid values for the output_stride are 1, 2, 6, 24 or None (which
      is equivalent to output_stride=24).
    outputs_collections: Collection to add the Xception block outputs.

  Returns:
    net: Output tensor with stride equal to the specified output_stride.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
  # The current_stride variable keeps track of the effective stride of the
  # activations. This allows us to invoke atrous convolution whenever applying
  # the next residual unit would result in the activations having stride larger
  # than the target output_stride.
  current_stride = 1

  # The atrous convolution rate parameter.
  rate = 1
  for block in blocks:
    with tf.variable_scope(block.scope, 'block', [net]) as sc:
      for i, unit in enumerate(block.args):
        if output_stride is not None and current_stride > output_stride:
          raise ValueError('The target output_stride cannot be reached.')
        with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
          # If we have reached the target output_stride, then we need to employ
          # atrous convolution with stride=1 and multiply the atrous rate by the
          # current unit's stride for use in subsequent layers.
          print("[stack_blocks_dense]:output_stride:%d,current_stride:%d" % (output_stride, current_stride))
          if output_stride is not None and current_stride == output_stride:
            net = block.unit_fn(net, rate=rate, **dict(unit, stride=1))
            rate *= unit.get('stride', 1)
          else:
            net = block.unit_fn(net, rate=1, **unit)
            current_stride *= unit.get('stride', 1)

      # Collect activations at the block's end before performing subsampling.
      net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net)

  if output_stride is not None and current_stride != output_stride:
    raise ValueError('The target output_stride cannot be reached.')

  return net
def distribute(images,
               labels,
               num_classes,
               total_num_examples,
               devices,
               is_train=True):
    # Put your code here
    # You can refer to the "original" function above, it is for the single-node version.
    if devices is None:
        devices = [None]
# copied from original function

    def configure_optimizer(global_step, total_num_steps):
        """Return a configured optimizer"""
        def exp_decay(start, tgtFactor, num_stairs):
            decay_step = total_num_steps / (num_stairs - 1)
            decay_rate = (1 / tgtFactor)**(1 / (num_stairs - 1))
            return tf.train.exponential_decay(start,
                                              global_step,
                                              decay_step,
                                              decay_rate,
                                              staircase=True)

        def lparam(learning_rate, momentum):
            return {'learning_rate': learning_rate, 'momentum': momentum}

        return HybridMomentumOptimizer({
            'weights':
            lparam(exp_decay(0.001, 250, 4), 0.9),
            'biases':
            lparam(exp_decay(0.002, 10, 2), 0.9),
        })

    #copied from orignal function
    def train(total_loss, global_step, total_num_steps):
        """Build train operations"""
        # Compute gradients
        with tf.control_dependencies([total_loss]):
            opt = configure_optimizer(global_step, total_num_steps)
            grads = opt.compute_gradients(total_loss)

        # Apply gradients.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        with tf.control_dependencies([apply_gradient_op]):
            return tf.no_op(name='train')

    # 1. Create global steps on the parameter server node. You can use the same method that the single-machine program uses.
    builder = ModelBuilder(devices[-1])
    global_step = builder.ensure_global_step()

    # 2. Configure your optimizer using HybridMomentumOptimizer.
    opt = configure_optimizer(global_step, total_num_examples)

    # 3. Construct graph replica by splitting the original tensors into sub tensors. (hint: take a look at tf.split )
    inputImagesSplit = tf.split(images, len(devices[:-1]))
    inputLabelsSplit = tf.split(labels, len(devices[:-1]))
    # keep a list to store gradients
    workerGradients = []

    with tf.variable_scope('AlexNet') as varScope:
        # 4. For each worker node, create replica by calling alexnet_inference and computing gradients.
        with tf.name_scope('') as nameScope:
            for i in range(len(devices[:-1])):
                with tf.device(devices[i]):
                    worker = devices[i]
                    with tf.name_scope("Workers_{}".format(i)):
                        net, logits, total_loss = alexnet_inference(
                            builder, inputImagesSplit[i], inputLabelsSplit[i],
                            num_classes)
                        gradient = opt.compute_gradients(total_loss)

                workerGradients.append(gradient)
                #Reuse the variable for the next replica. For more information on how to reuse variables in TensorFlow,
                varScope.reuse_variables()

    # 5. On the parameter server node, apply gradients.
    with tf.device(builder.variable_device()):
        combinedGrad = builder.average_gradients(workerGradients)
        trainOp = opt.apply_gradients(combinedGrad, global_step=global_step)
        allTrainOp = tf.group(trainOp, name='AllTrainOps')

    # 6. return required values.
    return net, logits, total_loss, allTrainOp, global_step
Example #50
0
import os
import collections

import numpy as np

import tensorflow as tf

from tensorflow.contrib.rnn.python.ops.rnn_cell import Conv2DLSTMCell

# Defined for a single timestep
batch_size = None
max_time = None
input_shape = [128, 128, 25]

with tf.variable_scope('rnn', reuse=tf.AUTO_REUSE) as vs:
    # Define a cell
    # TODO: add support for padding='valid'
    cell = Conv2DLSTMCell(input_shape=input_shape,
                          output_channels=2,
                          kernel_shape=[7, 7],
                          use_bias=True,
                          name='conv_2d_lstm_cell_1')
    # Define input placeholder
    x_input = tf.placeholder(dtype=tf.float32,
                             shape=[
                                 batch_size,
                                 max_time,
                             ] + input_shape,
                             name='rnn_input')
    # Define rnn layer
Example #51
0
    def __init__(self, pretrained_embeddings, flags):
        """
        Initializes your System

        :param args: pass in more arguments as needed
        """
        self.pretrained_embeddings = pretrained_embeddings
        self.flags = flags
        self.h_size = self.flags.state_size
        self.p_size = self.flags.output_size
        self.q_size = self.flags.question_size
        self.embed_size = self.flags.embedding_size
        self.dropout = self.flags.dropout
        self.encoder = Encoder(hidden_size=self.h_size,
                               dropout=(1.0-self.flags.dropout))

        self.decoder = Decoder(hidden_size=self.h_size,
                               output_size=self.p_size,
                               dropout=(1.0-self.flags.dropout))


        # ==== set up placeholder tokens ========

        self.context_placeholder = tf.placeholder(tf.int32, shape=(None, self.p_size), name='context_placeholder')
        self.question_placeholder = tf.placeholder(tf.int32, shape=(None, self.q_size), name='question_placeholder')
        self.answer_span_placeholder = tf.placeholder(tf.int32, shape=(None, 2), name='answer_span_placeholder')
        self.mask_q_placeholder = tf.placeholder(tf.int32, shape=(None,), name='mask_q_placeholder')
        self.mask_ctx_placeholder = tf.placeholder(tf.int32, shape=(None,), name='mask_ctx_placeholder')
        self.dropout_placeholder = tf.placeholder(tf.float32, shape=(), name='dropout_placeholder')

        # ==== assemble pieces ====
        with tf.variable_scope("qa", initializer=tf.uniform_unit_scaling_initializer(1.0)):
            self.setup_embeddings()
            self.setup_system()
            self.setup_loss()

        # ==== set up training/updating procedure ====
        self.global_step = tf.Variable(0, trainable=False)
        self.starter_learning_rate = self.flags.learning_rate

        self.learning_rate = self.starter_learning_rate

        # learning rate decay
        # self.learning_rate = tf.train.exponential_decay(self.starter_learning_rate, self.global_step,
        #                                    1000, 0.96, staircase=True)

        self.optimizer = get_optimizer("adam")
        
        if self.flags.grad_clip:
            # gradient clipping
            self.optimizer = self.optimizer(self.learning_rate)
            grads = self.optimizer.compute_gradients(self.loss)
            for i, (grad, var) in enumerate(grads):
                if grad is not None:
                    grads[i] = (tf.clip_by_norm(grad, self.flags.max_gradient_norm), var)
            self.train_op = self.optimizer.apply_gradients(grads, global_step=self.global_step)
        else:
            # no gradient clipping
            self.train_op = self.optimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)

        self.saver=tf.train.Saver()
    def build_autoencoder(self, input_tensor, name, reuse=False):
        """
        Generator的autoencoder部分, 负责获取图像上下文信息
        :param input_tensor:
        :param name:
        :param reuse:
        :return:
        """
        with tf.variable_scope(name, reuse=reuse):
            conv_1 = self.conv2d(inputdata=input_tensor, out_channel=64, kernel_size=5,
                                 padding='SAME',
                                 stride=1, use_bias=False, name='conv_1')
            relu_1 = self.lrelu(inputdata=conv_1, name='relu_1')

            conv_2 = self.conv2d(inputdata=relu_1, out_channel=128, kernel_size=3,
                                 padding='SAME',
                                 stride=2, use_bias=False, name='conv_2')
            relu_2 = self.lrelu(inputdata=conv_2, name='relu_2')

            conv_3 = self.conv2d(inputdata=relu_2, out_channel=128, kernel_size=3,
                                 padding='SAME',
                                 stride=1, use_bias=False, name='conv_3')
            relu_3 = self.lrelu(inputdata=conv_3, name='relu_3')

            conv_4 = self.conv2d(inputdata=relu_3, out_channel=128, kernel_size=3,
                                 padding='SAME',
                                 stride=2, use_bias=False, name='conv_4')
            relu_4 = self.lrelu(inputdata=conv_4, name='relu_4')

            conv_5 = self.conv2d(inputdata=relu_4, out_channel=256, kernel_size=3,
                                 padding='SAME',
                                 stride=1, use_bias=False, name='conv_5')
            relu_5 = self.lrelu(inputdata=conv_5, name='relu_5')

            conv_6 = self.conv2d(inputdata=relu_5, out_channel=256, kernel_size=3,
                                 padding='SAME',
                                 stride=1, use_bias=False, name='conv_6')
            relu_6 = self.lrelu(inputdata=conv_6, name='relu_6')

            dia_conv1 = self.dilation_conv(input_tensor=relu_6, k_size=3, out_dims=256, rate=2,
                                           padding='SAME', use_bias=False, name='dia_conv_1')
            relu_7 = self.lrelu(dia_conv1, name='relu_7')

            dia_conv2 = self.dilation_conv(input_tensor=relu_7, k_size=3, out_dims=256, rate=4,
                                           padding='SAME', use_bias=False, name='dia_conv_2')
            relu_8 = self.lrelu(dia_conv2, name='relu_8')

            dia_conv3 = self.dilation_conv(input_tensor=relu_8, k_size=3, out_dims=256, rate=8,
                                           padding='SAME', use_bias=False, name='dia_conv_3')
            relu_9 = self.lrelu(dia_conv3, name='relu_9')

            dia_conv4 = self.dilation_conv(input_tensor=relu_9, k_size=3, out_dims=256, rate=16,
                                           padding='SAME', use_bias=False, name='dia_conv_4')
            relu_10 = self.lrelu(dia_conv4, name='relu_10')

            conv_7 = self.conv2d(inputdata=relu_10, out_channel=256, kernel_size=3,
                                 padding='SAME', stride=1, use_bias=False,
                                 name='conv_7')
            relu_11 = self.lrelu(inputdata=conv_7, name='relu_11')

            conv_8 = self.conv2d(inputdata=relu_11, out_channel=256, kernel_size=3,
                                 padding='SAME', stride=1, use_bias=False,
                                 name='conv_8')
            relu_12 = self.lrelu(inputdata=conv_8, name='relu_12')

            deconv_1 = self.deconv2d(inputdata=relu_12, out_channel=128, kernel_size=4,
                                     stride=2, padding='SAME', use_bias=False, name='deconv_1')
            avg_pool_1 = self.avgpooling(inputdata=deconv_1, kernel_size=2, stride=1, padding='SAME',
                                         name='avg_pool_1')
            relu_13 = self.lrelu(inputdata=avg_pool_1, name='relu_13')

            conv_9 = self.conv2d(inputdata=tf.add(relu_13, relu_3), out_channel=128, kernel_size=3,
                                 padding='SAME', stride=1, use_bias=False,
                                 name='conv_9')
            relu_14 = self.lrelu(inputdata=conv_9, name='relu_14')

            deconv_2 = self.deconv2d(inputdata=relu_14, out_channel=64, kernel_size=4,
                                     stride=2, padding='SAME', use_bias=False, name='deconv_2')
            avg_pool_2 = self.avgpooling(inputdata=deconv_2, kernel_size=2, stride=1, padding='SAME',
                                         name='avg_pool_2')
            relu_15 = self.lrelu(inputdata=avg_pool_2, name='relu_15')

            conv_10 = self.conv2d(inputdata=tf.add(relu_15, relu_1), out_channel=32, kernel_size=3,
                                  padding='SAME', stride=1, use_bias=False,
                                  name='conv_10')
            relu_16 = self.lrelu(inputdata=conv_10, name='relu_16')

            skip_output_1 = self.conv2d(inputdata=relu_12, out_channel=3, kernel_size=3,
                                        padding='SAME', stride=1, use_bias=False,
                                        name='skip_ouput_1')

            skip_output_2 = self.conv2d(inputdata=relu_14, out_channel=3, kernel_size=3,
                                        padding='SAME', stride=1, use_bias=False,
                                        name='skip_output_2')

            skip_output_3 = self.conv2d(inputdata=relu_16, out_channel=3, kernel_size=3,
                                        padding='SAME', stride=1, use_bias=False,
                                        name='skip_output_3')
            # 传统GAN输出层都使用tanh函数激活
            skip_output_3 = tf.nn.tanh(skip_output_3, name='skip_output_3_tanh')

            ret = {
                'skip_1': skip_output_1,
                'skip_2': skip_output_2,
                'skip_3': skip_output_3
            }

        return ret
Example #53
0
def _build_pnasnet_base(images,
                        normal_cell,
                        num_classes,
                        hparams,
                        is_training,
                        final_endpoint=None):
    """Constructs a PNASNet image model."""

    end_points = {}

    def add_and_check_endpoint(endpoint_name, net):
        end_points[endpoint_name] = net
        return final_endpoint and (endpoint_name == final_endpoint)

    # Find where to place the reduction cells or stride normal cells
    reduction_indices = nasnet_utils.calc_reduction_layers(
        hparams.num_cells, hparams.num_reduction_layers)

    # pylint: disable=protected-access
    stem = lambda: nasnet._imagenet_stem(images, hparams, normal_cell)
    # pylint: enable=protected-access
    net, cell_outputs = stem()
    if add_and_check_endpoint('Stem', net):
        return net, end_points

    # Setup for building in the auxiliary head.
    aux_head_cell_idxes = []
    if len(reduction_indices) >= 2:
        aux_head_cell_idxes.append(reduction_indices[1] - 1)

    # Run the cells
    filter_scaling = 1.0
    # true_cell_num accounts for the stem cells
    true_cell_num = 2
    activation_fn = tf.nn.relu6 if hparams.use_bounded_activation else tf.nn.relu
    for cell_num in range(hparams.num_cells):
        is_reduction = cell_num in reduction_indices
        stride = 2 if is_reduction else 1
        if is_reduction: filter_scaling *= hparams.filter_scaling_rate
        if hparams.skip_reduction_layer_input or not is_reduction:
            prev_layer = cell_outputs[-2]
        net = normal_cell(net,
                          scope='cell_{}'.format(cell_num),
                          filter_scaling=filter_scaling,
                          stride=stride,
                          prev_layer=prev_layer,
                          cell_num=true_cell_num)
        if add_and_check_endpoint('Cell_{}'.format(cell_num), net):
            return net, end_points
        true_cell_num += 1
        cell_outputs.append(net)

        if (hparams.use_aux_head and cell_num in aux_head_cell_idxes
                and num_classes and is_training):
            aux_net = activation_fn(net)
            # pylint: disable=protected-access
            nasnet._build_aux_head(aux_net,
                                   end_points,
                                   num_classes,
                                   hparams,
                                   scope='aux_{}'.format(cell_num))
            # pylint: enable=protected-access

    # Final softmax layer
    with tf.variable_scope('final_layer'):
        net = activation_fn(net)
        net = nasnet_utils.global_avg_pool(net)
        if add_and_check_endpoint('global_pool', net) or not num_classes:
            return net, end_points
        net = slim.dropout(net,
                           hparams.dense_dropout_keep_prob,
                           scope='dropout')
        logits = slim.fully_connected(net, num_classes)

        if add_and_check_endpoint('Logits', logits):
            return net, end_points

        predictions = tf.nn.softmax(logits, name='predictions')
        if add_and_check_endpoint('Predictions', predictions):
            return net, end_points
    return logits, end_points
Example #54
0
    def test(self):
        global grad_bufs
        global sess

        inits = {}
        inits['w1'] = np.array([10.0, 20.0]).astype(np.float32)
        inits['w2'] = np.array([5.0, 10.0]).astype(np.float32)

        scopes = ['update_scope', 'apply_scope']

        tf.reset_default_graph()
        sess = tf.Session()

        input = tf.placeholder(tf.float32, [None, 2])

        vars = {}
        losses = {}
        for scope in scopes:
            with tf.variable_scope(scope):
                w1 = tf.Variable(inits['w1'], name='w1')
                w2 = tf.Variable(inits['w2'], name='w2')
                # NB reduce_sum is necessary to ensure that the gradients
                # accumulated for multiple examples in a batch are the same as
                # if the examples were presented in individual batches
                losses[scope] = tf.reduce_sum(w1 + input * w2, axis=-1)
                vars[scope] = {'w1': w1, 'w2': w2}

        o = tf.train.GradientDescentOptimizer(learning_rate=1)
        """
        Check that no extra trainable variables have been introduced.
        """
        # two variables, two scopes, for a total of 4 trainable variables
        assert (len(tf.trainable_variables()) == 4)

        update_ops, apply_ops, zero_ops = create_train_ops(
            losses['update_scope'], o, 'update_scope', 'apply_scope')

        assert (len(tf.trainable_variables()) == 4)

        sess.run(tf.global_variables_initializer())

        grad_bufs = {
            v.name: v
            for v in tf.global_variables() if 'grad_buf' in v.name
        }
        """
        Check that the gradient buffers start out zero.
        """
        assert_grad_bufs_zero()

        # so the first loss term looks like w1 + 1 * w2
        # and the second term looks like w1 + 2 * w2
        sess.run(update_ops, feed_dict={input: [[1, 1], [2, 2]]})
        """
        Confirm that no changes have taken place to the trainable
        variables yet in either scope.
        """
        for scope in scopes:
            for var_name, var in vars[scope].items():
                val = sess.run(var)
                np.testing.assert_equal(val, inits[var_name])
        """
        Confirm that the gradient buffers look reasonable.
        """
        for buf_name, buf in grad_bufs.items():
            actual = sess.run(buf)
            # first loss term was w1 + 1 * w2
            # second was w1 + 2 * w2
            # first loss term contribution:
            # derivative wrt to each element of both vectors should be 1
            # second loss term contribution:
            # derivative wrt w1 should be 1; derivative wrt w2 should be 2
            if 'w1' in buf_name:
                expected = np.array([1., 1.]) + np.array([1., 1.])
            elif 'w2' in buf_name:
                expected = np.array([1., 1.]) + np.array([2., 2.])
            np.testing.assert_equal(actual, expected)

        # loss will be e.g. w1 + [3, 4] * w2
        sess.run(update_ops, feed_dict={input: [[3, 4], [5, 6]]})
        """
        Confirm that the gradient buffers still look reasonable.
        """
        for buf_name, buf in grad_bufs.items():
            actual = sess.run(buf)
            if 'w1' in buf_name:
                expected = np.array([1., 1.]) + np.array([1., 1.]) + \
                           np.array([1., 1.]) + np.array([1., 1.])
            elif 'w2' in buf_name:
                expected = np.array([1., 1.]) + np.array([2., 2.]) + \
                           np.array([3., 4.]) + np.array([5., 6.])
            np.testing.assert_equal(actual, expected)

        sess.run(apply_ops)
        """
        Confirm that no changes have been made to the variables in update_scope.
        """
        for var_name, var in vars['update_scope'].items():
            actual = sess.run(var)
            if 'w1' in var_name:
                expected = inits['w1']
            elif 'w2' in var_name:
                expected = inits['w2']
            np.testing.assert_equal(actual, expected)
        """
        Confirm that changes _have_ been made to the variables in apply_scope.
        """
        for var_name, var in vars['apply_scope'].items():
            actual = sess.run(var)
            # w1 started off as [10, 20];
            # gradient wrt w1 was 1 on each step,
            # and we went for 4 steps with step size of 1
            if 'w1' in var_name:
                expected = [10 - 1. - 1. - 1. - 1., 20 - 1. - 1. - 1. - 1.]
            # w2 started off as [5, 10]
            # gradients were [1, 1], [2, 2], [3, 4], and [5, 6]
            elif 'w2' in var_name:
                expected = [5. - 1. - 2. - 3. - 5., 10. - 1. - 2. - 4. - 6.]
            np.testing.assert_equal(actual, expected)

        sess.run(zero_ops)
        """
        Check that gradient buffers have been zeroed.
        """
        assert_grad_bufs_zero()
def features_matching(inputs_p1, inputs_p2):
    with tf.variable_scope("matching"):
        layers_1,dense_1 = features(inputs_p1, 'feature_layers_1') # 4*4*256
        layers_2,dense_2 = features(inputs_p2, 'feature_layers_2') # 4*4*256
        output,denses = matching(dense_1,dense_2)   
        return output ,[layers_1,layers_2,denses]
Example #56
0
 def conv_layer(self, bottom, name):
     with tf.variable_scope(name):   # CNN's filter is constant, NOT Variable that can be trained
         conv = tf.nn.conv2d(bottom, self.data_dict[name][0], [1, 1, 1, 1], padding='SAME')
         lout = tf.nn.relu(tf.nn.bias_add(conv, self.data_dict[name][1]))
         return lout
Example #57
0
 def __init__(self, epsilon=1e-5, momentum = 0.9, name="batch_norm"):
   with tf.variable_scope(name):
     self.epsilon  = epsilon
     self.momentum = momentum
     self.name = name
Example #58
0
    def build_model(self,
                    video,
                    video_mask,
                    caption,
                    caption_mask,
                    train_flag,
                    reuse_variable=False):

        self.video = video  # [batch_size, length, kernel, kernel, channel]
        self.video_mask = video_mask  # [batch_size, length]
        video_mask_leng = tf.cast(tf.reduce_sum(self.video_mask, 1), tf.int32)
        self.caption = caption  # [batch_size, length]
        self.caption_mask = caption_mask  # [batch_size, length]
        caption_mask_leng = tf.cast(tf.reduce_sum(self.caption_mask, 1),
                                    tf.int32)

        #Make Mask list
        self.video_mask_list = []
        self.caption_mask_list = []
        max_len = self.config.caption_length
        for mi in range(2):
            video_mask_leng = tf.maximum(1, video_mask_leng - 2)
            caption_mask_leng = tf.maximum(1, caption_mask_leng - 2)
            max_len -= 2
            self.video_mask_list.append(
                tf.reverse(
                    tf.sequence_mask(video_mask_leng, max_len, tf.float32),
                    [-1]))
            self.caption_mask_list.append(
                tf.sequence_mask(caption_mask_leng, max_len, tf.float32))
        max_len = int((max_len - 1) / 2)
        video_mask_leng = tf.cast((video_mask_leng - 1) / 2, tf.int32)
        video_mask_leng = tf.maximum(1, video_mask_leng)
        caption_mask_leng = tf.cast((caption_mask_leng - 1) / 2, tf.int32)
        caption_mask_leng = tf.maximum(1, caption_mask_leng)
        self.video_mask_list.append(
            tf.reverse(tf.sequence_mask(video_mask_leng, max_len, tf.float32),
                       [-1]))
        self.caption_mask_list.append(
            tf.sequence_mask(caption_mask_leng, max_len, tf.float32))

        self.train_flag = train_flag

        #Batch normalization
        self.bn_fn = slim.batch_norm
        self.bn_params = {'is_training': self.train_flag}

        self.word_embed_t = tf.Variable(self.word_embed,
                                        dtype=tf.float32,
                                        name="word_embed",
                                        trainable=True)
        #video drop
        self.squeezed_feat = tf.squeeze(self.video)
        self.embedded_feat = tf.reshape(
            self.squeezed_feat,
            [self.batch_size, self.video_steps, self.channel_size])

        #  [batch_size, length, channel_size]
        self.embedded_feat = self.embedded_feat * tf.expand_dims(video_mask, 2)

        self.video_cell_d = lambda: rnn_cell.DropoutWrapper(
            self.video_cell(),
            input_keep_prob=self.dropout_keep_prob,
            output_keep_prob=self.dropout_keep_prob)
        self.caption_cell_d = lambda: rnn_cell.DropoutWrapper(
            self.caption_cell(),
            input_keep_prob=self.dropout_keep_prob,
            output_keep_prob=self.dropout_keep_prob)

        video_cell1 = rnn_cell.MultiRNNCell(
            [self.video_cell_d() for _ in range(self.config.num_layers)],
            state_is_tuple=True)
        video_cell2 = rnn_cell.MultiRNNCell(
            [self.video_cell_d() for _ in range(self.config.num_layers)],
            state_is_tuple=True)
        video_cell = [video_cell1, video_cell2]

        caption_cell1 = rnn_cell.MultiRNNCell(
            [self.caption_cell_d() for _ in range(self.config.num_layers)],
            state_is_tuple=True)
        caption_cell2 = rnn_cell.MultiRNNCell(
            [self.caption_cell_d() for _ in range(self.config.num_layers)],
            state_is_tuple=True)
        caption_cell = [caption_cell1, caption_cell2]

        video_emb_state = self.build_video_embedding(video_cell,
                                                     self.embedded_feat,
                                                     self.video_mask,
                                                     reuse_variable)
        rnn_emb_state = self.build_caption_encoder(caption_cell,
                                                   reuse_variable)

        with tf.variable_scope("multimodal",
                               initializer=self.initializer) as scope:
            margin_list = []
            logit_list = []
            for i in range(self.batch_size):
                if i > 0:
                    scope.reuse_variables()
                fuse = self.fusion(tf.tile(
                    tf.expand_dims(video_emb_state[i, :, :], 0),
                    [self.batch_size, 1, 1]),
                                   rnn_emb_state,
                                   i,
                                   reuse=(i > 0))
                with slim.arg_scope(
                    [slim.fully_connected],
                        weights_regularizer=slim.l2_regularizer(0.0005),
                        normalizer_fn=self.bn_fn,
                        normalizer_params=self.bn_params):
                    logit = slim.fully_connected(
                        fuse,
                        256,
                        activation_fn=tf.nn.leaky_relu,
                        scope='fc1',
                        reuse=(i > 0))
                    logit = slim.fully_connected(
                        logit,
                        256,
                        activation_fn=tf.nn.leaky_relu,
                        scope='fc2',
                        reuse=(i > 0))
                    logit = slim.fully_connected(
                        logit,
                        128,
                        activation_fn=tf.nn.leaky_relu,
                        scope='fc3',
                        reuse=(i > 0))
                    logit = slim.fully_connected(logit,
                                                 1,
                                                 activation_fn=None,
                                                 scope='scorefn',
                                                 reuse=(i > 0))
                score = logit

                logit_list.append(score)
                margin_list.append(score)

        margin_mat = tf.squeeze(tf.stack(margin_list))
        logit_mat = tf.squeeze(tf.stack(logit_list))
        self.logit = logit_mat
        diag_elem = tf.diag_part(margin_mat)
        loss_mat = tf.maximum(
            0.0, 10. + margin_mat - tf.reshape(diag_elem, [-1, 1]))
        margin_loss = tf.reduce_sum(loss_mat) / (self.batch_size *
                                                 self.batch_size)
        self.scores = margin_mat
        self.mean_loss = margin_loss
        self.concept_loss = tf.constant(0)
    def decode_infer(self, inputs, state):
        # state['enc']: [b * beam, l_s, e]  ,   state['dec']: [b * beam, q', e]
        # q' = previous decode output length
        # during infer, following graph are constructed using beam search
        with self.graph.as_default():
            config = self.bert_config

            target_sequence = inputs['target']  # [b * beam, q']
            vocab_size = len(self.hps.vocab_out)
            # trunct word idx, change those greater than vocab_size to unkId
            shape = target_sequence.shape
            unkid = self.hps.vocab_out[self.hps.unk]
            # target_sequence = tf_trunct(target_sequence, vocab_size, self.hps.unkId)
            target_sequence = tf_trunct(target_sequence, vocab_size, unkid)
            target_sequence.set_shape(shape)

            target_length = inputs['target_length']
            target_seg_ids = tf.zeros_like(target_sequence, dtype=tf.int32, name='target_seg_ids_infer')
            tgt_mask = tf.sequence_mask(target_length,
                                        maxlen=tf.shape(target_sequence)[1],
                                        dtype=tf.float32)  # [b, q']

            # with tf.variable_scope('bert', reuse=True):
            out_dict_size = len(self.hps.vocab_out)
            with tf.variable_scope('bert', reuse=True):
                with tf.variable_scope('embeddings'), tf.device('/cpu:0'):
                    # Perform embedding lookup on the target word ids.
                    (tgt_embed, _) = embedding_lookup(
                        input_ids=target_sequence,
                        vocab_size=out_dict_size,  # out vocab size
                        embedding_size=config.hidden_size,
                        initializer_range=config.initializer_range,
                        word_embedding_name='word_embeddings',
                        use_one_hot_embeddings=False)

                    # Add positional embeddings and token type embeddings, then layer
                    # normalize and perform dropout.
                    tgt_embed = embedding_postprocessor(
                        input_tensor=tgt_embed,
                        use_token_type=True,
                        token_type_ids=target_seg_ids,
                        token_type_vocab_size=config.type_vocab_size,
                        token_type_embedding_name='token_type_embeddings',
                        use_position_embeddings=True,
                        position_embedding_name='position_embeddings',
                        initializer_range=config.initializer_range,
                        max_position_embeddings=config.max_position_embeddings,
                        dropout_prob=config.hidden_dropout_prob)
            
            
            with tf.variable_scope('decode', reuse=True):
                # [b, q', e]
                masked_tgt_embed = tgt_embed * tf.expand_dims(tgt_mask, -1)
                dec_attn_bias = attention_bias(tf.shape(masked_tgt_embed)[1], "causal")
                decoder_input = tf.pad(masked_tgt_embed, [[0, 0], [1, 0], [0, 0]])[:, :-1, :]  # Shift left

                infer_decoder_input = decoder_input[:, -1:, :]
                infer_dec_attn_bias = dec_attn_bias[:, :, -1:, :]

                ret = transformer_decoder_three(infer_decoder_input,
                                          self.enc_output,
                                          self.topic_memory,
                                          infer_dec_attn_bias,
                                          self.enc_attn_bias,
                                          self.hps,
                                          state=state['decoder'])

                all_att_weights, decoder_output, decoder_state = ret
                decoder_output = decoder_output[:, -1, :]  # [b * beam, e]
                vocab_logits = tf.matmul(decoder_output, self.decoder_weights, False, True)  # [b * beam, v]
                vocab_probs = tf.nn.softmax(vocab_logits)
                vocab_size = out_dict_size  # out vocabsize
                # we have tiled source_id_oo before feed, so last argument is set to 1
                with tf.variable_scope('copy'):
                    logits = calculate_final_logits(decoder_output, all_att_weights,
                                                    vocab_probs,
                                                    self.input_ids_oo, self.max_out_oovs, self.input_mask, vocab_size,
                                                    tgt_seq_len=1)
                log_prob = tf.log(logits)  # [b * beam, v + v']
        return log_prob, {'encoder': state['encoder'], 'decoder': decoder_state}
Example #60
0
 def image_to_embedding(self, images, is_training=True):
   """Create a graph, transforming images into embedding vectors."""
   with tf.variable_scope('net', reuse=is_training):
     return self.model_func(images, is_training=is_training)