Example #1
0
    def _build_q_net(self, state, action, variable_scope, reuse=False):
        with tf.variable_scope(variable_scope, reuse=reuse):
            user_id_embedding_table = tf.get_variable(
                name="user_id",
                shape=[self.user_num, 20],
                initializer=initializers.xavier_initializer(),
                trainable=True,
                dtype=tf.float32)
            user_id = tf.cast(state[:, 0], dtype=tf.int32)
            user_id_embeddings = tf.nn.embedding_lookup(
                user_id_embedding_table, ids=user_id, name="user_id_embedding")
            state = tf.concat([user_id_embeddings, state[:, 1:]], axis=1)

            n_features = state.get_shape()[1]

            state = tf.concat(
                [state,
                 tf.expand_dims(action, axis=1, name="2d-action")],
                axis=1)
            fc1 = tf.layers.dense(state,
                                  units=n_features,
                                  activation=tf.nn.relu,
                                  name='fc1')
            fc2 = tf.layers.dense(fc1,
                                  units=n_features // 2,
                                  activation=tf.nn.relu,
                                  name='fc2')

            q = tf.layers.dense(fc2, units=self.action_dim, name='q')

            return q[:, 0]
    def loss_layer(logits, labels, num_labels, lengths, input_mask):
        FLAGS = tf.flags.FLAGS

        trans = tf.get_variable("transitions",
                                shape=[num_labels, num_labels],
                                initializer=initializers.xavier_initializer())
        if FLAGS.use_crf:
            with tf.variable_scope("crf-loss"):
                log_likelihood, trans = tf.contrib.crf.crf_log_likelihood(
                    inputs=logits,
                    tag_indices=labels,
                    transition_params=trans,
                    sequence_lengths=lengths)
                per_example_loss = -log_likelihood
                loss = tf.reduce_mean(per_example_loss)
                return loss, per_example_loss, trans
        else:
            labels_one_hot = tf.one_hot(labels, num_labels)
            cross_entropy = labels_one_hot * tf.log(tf.nn.softmax(logits))
            cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2)
            cross_entropy *= tf.to_float(input_mask)
            cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1)
            cross_entropy /= tf.cast(lengths, tf.float32)
            per_example_loss = cross_entropy
            loss = tf.reduce_mean(per_example_loss)
            return loss, per_example_loss, trans
 def __init__(self, config):
     self.config = config
     self.task_name = config["task_name"]
     self.lstm_dim = config["lstm_dim"]
     self.embedding_size = config["embedding_size"]
     self.max_epoch = config["max_epoch"] ######原为10 epoch
     self.learning_rate = config["learning_rate"]
     self.checkpoint_dir = config["checkpoint_dir"]
     self.checkpoint_path = config["checkpoint_path"]
     self.initializer = initializers.xavier_initializer()
     self.is_training = True if ARGS.entry=="train" else False
     self.bert_config = config["bert_config"]
     self.init_checkpoint = config["init_checkpoint"]
     self.vocab_dir = config["vocab_dir"]
     self.tf_serving_save_dir = config["tf_serving_save_dir"]
     self.predict_file = config["predict_file"]
     self.predict_result = config["predict_result"]
     self.require_improvement = config["require_improvement"]
     self.global_steps = tf.Variable(0, trainable=False)
     self.best_dev_f1 = tf.Variable(0.0, trainable=False)
     self.best_f1 = 0.0
     self.best_match_num = 0
     self.steps = 0 # 迭代次数
     self.last_improved = 0 # 记录上一次提升批次
     self.tokenizer = tokenization.FullTokenizer(
         vocab_file=self.vocab_dir,
     )
Example #4
0
def dense(cls,
          input_layer,
          shape,
          dtype=tf.float32,
          activation=tf.nn.relu,
          name="dense",
          detailed_summary=False):
    with tf.variable_scope(name):
        w = tf.get_variable("w",
                            shape=shape,
                            dtype=dtype,
                            initializer=initializers.xavier_initializer())
        b = tf.get_variable("b",
                            shape=shape[1],
                            dtype=dtype,
                            initializer=tf.zeros_initializer())
        out = tf.nn.bias_add(tf.matmul(input_layer, w), b)

        if detailed_summary:
            with tf.name_scope('w'):
                cls.variable_summaries(w)

            with tf.name_scope('b'):
                cls.variable_summaries(b)

            with tf.name_scope('output'):
                cls.variable_summaries(out)

        if activation is not None:
            return activation(out)
        else:
            return out
Example #5
0
    def _build_action_net(self, state, variable_scope):
        with tf.variable_scope(variable_scope):
            user_id_embedding_table = tf.get_variable(
                name="user_id",
                shape=[self.user_num, 20],
                initializer=initializers.xavier_initializer(),
                trainable=True,
                dtype=tf.float32)
            user_id = tf.cast(state[:, 0], dtype=tf.int32)
            user_id_embeddings = tf.nn.embedding_lookup(
                user_id_embedding_table, ids=user_id, name="user_id_embedding")
            state = tf.concat([user_id_embeddings, state[:, 1:]], axis=1)

            n_features = state.get_shape()[1]
            fc1 = tf.layers.dense(state,
                                  units=n_features // 2,
                                  activation=tf.nn.relu,
                                  name='fc1')
            actions = tf.layers.dense(fc1,
                                      self.action_dim,
                                      activation=tf.nn.sigmoid,
                                      name='a')
            scaled_a = tf.multiply(actions, 1, name='scaled_a')

            return scaled_a[:, 0]
Example #6
0
 def __init__(self, embeddings, lstm_dim_=100, num_tags_=4, lr_=0.001):
     self.lstm_dim = lstm_dim_
     self.num_tags = num_tags_
     self.lr = lr_
     self.initializer = initializers.xavier_initializer()
     self.dropout = tf.placeholder(dtype=tf.float32, name='dropout')
     self.max_steps = tf.placeholder(dtype=tf.int32,
                                     shape=[
                                         None,
                                     ],
                                     name='seq_length')
     self.x_input = tf.placeholder(dtype=tf.int32,
                                   shape=[None, None],
                                   name='x_input')
     self.y_target = tf.placeholder(dtype=tf.int32,
                                    shape=[None, None],
                                    name='y_target')
     self.num_steps = tf.shape(self.x_input)[-1]
     with tf.variable_scope("char_embedding"):
         self.embeddings = tf.get_variable(name='embeddings',
                                           initializer=embeddings)
     self.logits = self.project_layer_single(self.bigru_layer())
     with tf.variable_scope("crf_loss"):
         self.trans = tf.get_variable("transitions",
                                      shape=[self.num_tags, self.num_tags],
                                      initializer=self.initializer)
         self.loss = self.loss_layer(self.logits)
         self.train_step = tf.train.AdamOptimizer(self.lr).minimize(
             self.loss)
Example #7
0
def linear(input_,
           output_size,
           weights_initializer=initializers.xavier_initializer(),
           biases_initializer=tf.zeros_initializer,
           activation_fn=None,
           trainable=True,
           name='linear'):
    """
    Constructs a fully connected layer.
    """
    # Get shape of input.
    shape = input_.get_shape().as_list()

    if len(shape) > 2:
        # Flatten.
        input_ = tf.reshape(input_,
                            [-1, reduce(lambda x, y: x * y, shape[1:])])
        shape = input_.get_shape().as_list()

    with tf.variable_scope(name):
        # Weights, bias, output.
        w = tf.get_variable('w', [shape[1], output_size],
                            tf.float32,
                            initializer=weights_initializer,
                            trainable=trainable)
        b = tf.get_variable('b', [output_size],
                            initializer=biases_initializer,
                            trainable=trainable)
        out = tf.nn.bias_add(tf.matmul(input_, w), b)

        if activation_fn != None:
            # Apply activation function.
            out = activation_fn(out)

        return out, w, b
Example #8
0
def fractal_conv2d(inputs,
                   num_columns,
                   num_outputs,
                   kernel_size,
                   joined=True,
                   stride=1,
                   padding='SAME',
                   # rate=1,
                   activation_fn=nn.relu,
                   normalizer_fn=slim.batch_norm,
                   normalizer_params=None,
                   weights_initializer=initializers.xavier_initializer(),
                   weights_regularizer=None,
                   biases_initializer=None,
                   biases_regularizer=None,
                   reuse=None,
                   variables_collections=None,
                   outputs_collections=None,
                   is_training=True,
                   trainable=True,
                   scope=None):
  """Builds a fractal block with slim.conv2d.
  The fractal will have `num_columns` columns, and have
  Args:
    inputs: a 4-D tensor  `[batch_size, height, width, channels]`.
    num_columns: integer, the columns in the fractal.
  """
  locs = locals()
  fractal_args = ['inputs','num_columns','joined','is_training']
  asc_fn = lambda : slim.arg_scope([slim.conv2d],
                                   **{arg:val for (arg,val) in locs.items()
                                      if arg not in fractal_args})
  return fractal_template(inputs, num_columns, slim.conv2d, asc_fn,
                          joined, is_training, reuse, scope)
def arcface_softmax(feature,
                    targets,
                    num_outputs,
                    s=32,
                    m=0.5,
                    name="arcface_softmax"):

    feature_shape = feature.get_shape()
    kernel = tf.get_variable("arcface_softmax/W",
                             [feature_shape[1], num_outputs],
                             dtype=tf.float32,
                             initializer=initializers.xavier_initializer())
    feature_norm = tf.nn.l2_normalize(feature, dim=1)
    kernel_norm = tf.nn.l2_normalize(kernel, dim=0)
    cos_theta = tf.matmul(feature_norm, kernel_norm)

    theta = tf.acos(cos_theta)
    phi_theta = tf.cos(theta + m)

    # cos_m = math.cos(m)
    # sin_m = math.sin(m)
    # cos_theta2 = tf.square(cos_theta)
    # sin_theta2 = tf.subtract(1.0, cos_theta2)
    # sin_theta = tf.sqrt(sin_theta2)
    # phi_theta = tf.subtract(tf.multiply(cos_theta, cos_m), tf.multiply(sin_theta, sin_m))

    logits = s * cos_theta
    logits_ = s * phi_theta

    adjust_logits = tf.where(tf.equal(targets, 1.0), logits_, logits)

    return adjust_logits
Example #10
0
def xavier(uniform=True, seed=None, dtype=tf.float32):
    """ Xavier.

    Returns an initializer performing "Xavier" initialization for weights.

    This initializer is designed to keep the scale of the gradients roughly the
    same in all layers. In uniform distribution this ends up being the range:
    `x = sqrt(6. / (in + out)); [-x, x]` and for normal distribution a standard
    deviation of `sqrt(3. / (in + out))` is used.

    Arguments:
        uniform: Whether to use uniform or normal distributed random
            initialization.
        seed: A Python integer. Used to create random seeds. See
            `set_random_seed` for behavior.
        dtype: The data type. Only floating point types are supported.

    Returns:
        An initializer for a weight matrix.

    References:
        Understanding the difficulty of training deep feedforward neural
        networks. International conference on artificial intelligence and
        statistics. Xavier Glorot and Yoshua Bengio (2010).

    Links:
        [http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf]
        (http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf)
    """
    return xavier_initializer(uniform=uniform, seed=seed, dtype=dtype)
Example #11
0
def linear(inputs, output_size,
			weights_initializer=initializers.xavier_initializer(),
			biases_initializer=tf.zeros_initializer, synthetic=False,
			activation_fn=None, batch_norm=True, name='linear'):
	
	var = {}
	shape = inputs.get_shape().as_list()
	with tf.variable_scope(name):
		var['w'] = tf.get_variable('w', [shape[1], output_size], tf.float32,
						initializer=weights_initializer)
		var['b'] = tf.get_variable('b', [output_size],
						initializer=biases_initializer)
		out = tf.nn.bias_add(tf.matmul(inputs, var['w']), var['b'])

		if batch_norm:
			out = tf.contrib.layers.batch_norm(out)
		if activation_fn is not None:
			out = activation_fn(out)
		if synthetic:
			with tf.variable_scope('synthetic_grad'):
				out_shape = out.get_shape()
				h1, var['l1_w'], var['l1_b'] = linear(out, 4000, weights_initializer=tf.zeros_initializer,
									biases_initializer=tf.zeros_initializer, activation_fn=tf.nn.relu, batch_norm=True, name='l1')
				synthetic_grad, var['l2_w'], var['l2_b'] = linear(h1, out_shape[1], weights_initializer=tf.zeros_initializer,
									biases_initializer=tf.zeros_initializer, activation_fn=tf.nn.relu, batch_norm=True, name='l2')
			return out, var['w'], var['b'], synthetic_grad
		else:
			return out, var['w'], var['b']
Example #12
0
    def res_block(self, input, name):
        with tf.variable_scope(name):
            with slim.arg_scope(
                [slim.conv2d],
                    activation_fn=tf.nn.relu,
                    # normalizer_fn=self.normalizer,
                    # normalizer_params=self.norm_params1,
                    weights_initializer=initializers.xavier_initializer(
                        uniform=True),
                    weights_regularizer=slim.l1_regularizer(1e-4)):
                # print('......................................')
                split1 = input
                split1_1 = input
                conv3_1 = slim.conv2d(split1, 48, [3, 3], 1, scope='conv_3_1')
                conv3_2 = slim.conv2d(conv3_1, 48, [3, 3], 1, scope='conv3_2')
                slice1_1, slice1_2 = tf.split(conv3_2, [16, 32], axis=3)
                conv3_3 = slim.conv2d(slice1_2, 48, [3, 3], scope='conv3_3')
                conv3_4 = slim.conv2d(conv3_3, 64, [3, 3], scope='conv3_4')
                slice2_1, slice2_2 = tf.split(conv3_4, [16, 48], axis=3)
                conv3_5 = slim.conv2d(slice2_2, 48, [3, 3], 1, scope='conv3_5')
                conv3_6 = slim.conv2d(conv3_5, 96, [3, 3], 1, scope='conv3_6')

                concat1 = tf.concat([split1_1, slice1_1, slice2_1], axis=3)

                sum1 = concat1 + conv3_6
                down1 = slim.conv2d(sum1, 64, [1, 1], 1, scope='down1')
                return down1
Example #13
0
    def pre_convolution(self, image1, image2, image3, name):
        with tf.variable_scope(name):
            with slim.arg_scope(
                [slim.conv2d],
                    activation_fn=tf.nn.relu,
                    # normalizer_fn=self.normalizer,
                    # normalizer_params=self.norm_params1,
                    weights_initializer=initializers.xavier_initializer(
                        uniform=True),
                    weights_regularizer=slim.l1_regularizer(1e-4)):
                image1 = slim.conv2d(image1, 64, [3, 3], 1, scope='conv1_1')
                image1 = slim.conv2d(image1, 64, [3, 3], 1, scope='conv1_2')
                image2 = slim.conv2d(image2, 64, [3, 3], 1, scope='conv2_1')
                image2 = slim.conv2d(image2, 64, [3, 3], 1, scope='conv2_2')
                image3 = slim.conv2d(image3, 64, [3, 3], 1, scope='conv3_1')
                image3 = slim.conv2d(image3, 64, [3, 3], 1, scope='conv3_2')

                image_1_2 = tf.concat([image1, image2], axis=3)
                image_1_2 = slim.conv2d(image_1_2,
                                        64, [3, 3],
                                        1,
                                        scope='conv_1_2')

                image_2_3 = tf.concat([image2, image3], axis=3)
                image_2_3 = slim.conv2d(image_2_3,
                                        64, [3, 3],
                                        1,
                                        scope='conv_2_3')

                image_1_2_3 = tf.concat([image_1_2, image_2_3], axis=3)
                image_1_2_3 = slim.conv2d(image_1_2_3,
                                          64, [3, 3],
                                          1,
                                          scope='conv_1_2_3')
                return image_1_2_3
    def __init__(self, bert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, max_seq_length,
                 use_one_hot_embeddings):
        # load bert
        bert = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)
        # 获取bert的输出
        output_layer = bert.get_sequence_output()
        # self.all_encoder_layers = bert.get_all_encoder_layers()
        if is_training:
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
        hidden_size = output_layer.shape[-1].value
        output_layer = tf.reshape(output_layer, [-1, hidden_size])
        tf.logging.info(" The dimension of bert output:%s" %
                        output_layer.shape)

        # 全连接层
        output_weight = tf.get_variable(
            "output_weights", [num_labels, hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))
        output_bias = tf.get_variable("output_bias", [num_labels],
                                      initializer=tf.zeros_initializer())
        logits = tf.matmul(output_layer, output_weight, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        self.logits = tf.reshape(logits, [-1, max_seq_length, num_labels])

        # 使用全连接层的输出计算MNLP分数
        self.probs = tf.nn.softmax(self.logits, axis=-1)
        self.best_probs = tf.reduce_max(self.probs, axis=-1)
        self.mnlp_score = tf.reduce_mean(tf.log(self.best_probs), axis=-1)

        # 计算输入样本的长度
        used = tf.sign(tf.abs(input_ids))
        lengths = tf.reduce_sum(used, reduction_indices=1)

        # crf层
        with tf.variable_scope("crf"):
            trans = tf.get_variable(
                "transitions",
                shape=[num_labels, num_labels],
                initializer=initializers.xavier_initializer())
            if labels is None:
                self.loss = None
            else:
                log_likelihood, trans = tf.contrib.crf.crf_log_likelihood(
                    inputs=self.logits,
                    tag_indices=labels,
                    transition_params=trans,
                    sequence_lengths=lengths)
                self.loss = tf.reduce_mean(-log_likelihood)

            self.predicts, self.score = crf.crf_decode(potentials=self.logits,
                                                       transition_params=trans,
                                                       sequence_length=lengths)
Example #15
0
    def _create_lstm_policy(self, args):
        # Create LSTM portion of network
        lstms_pol = [
            rnn.LSTMCell(args.policy_size,
                         state_is_tuple=True,
                         initializer=initializers.xavier_initializer())
            for _ in range(args.num_policy_layers)
        ]
        self.policy_lstm = rnn.MultiRNNCell(lstms_pol, state_is_tuple=True)
        self.policy_state = self.policy_lstm.zero_state(
            args.batch_size * args.sample_size, tf.float32)

        # Get samples from standard normal distribution, transform to match z-distribution
        samples = tf.random_normal(
            [args.sample_size, args.batch_size, args.z_dim], name="z_samples")
        z_samples = samples * tf.exp(self.z_logstd) + self.z_mean
        self.z_samples = tf.transpose(z_samples, [1, 0, 2])

        # Construct policy input
        policy_input = tf.reshape(tf.concat([self.states, self.z_samples], 2),
                                  [-1, 1, args.z_dim + args.state_dim])
        # Forward pass
        helper = seq2seq.TrainingHelper(policy_input,
                                        sequence_length=[1] * args.batch_size *
                                        args.sample_size)
        decoder = seq2seq.BasicDecoder(cell=self.policy_lstm,
                                       helper=helper,
                                       initial_state=self.policy_state)
        output, self.final_policy_state, _ = seq2seq.dynamic_decode(
            decoder, scope='policy_cell')
        #output = tf.squeeze(tf.gather(output[0], output[1]))
        output = output[0][:, -1, :]

        # Fully connected layer to latent variable distribution parameters
        W = tf.get_variable("lstm_w", [args.policy_size, args.action_dim],
                            initializer=initializers.xavier_initializer())
        b = tf.get_variable("lstm_b", [args.action_dim])
        a_mean = tf.nn.xw_plus_b(output, W, b)
        self.a_mean = tf.reshape(
            a_mean, [args.batch_size, args.sample_size, args.action_dim],
            name="a_mean")

        # Initialize logstd
        self.a_logstd = tf.Variable(np.zeros(args.action_dim),
                                    name="a_logstd",
                                    dtype=tf.float32)
def fully_connected(in_c, out_c, name):
    with tf.variable_scope(name):
        w = tf.get_variable(name='w',
                            shape=[in_c, out_c],
                            initializer=initializers.xavier_initializer())
        b = tf.get_variable(name='b',
                            shape=[out_c],
                            initializer=initializers.xavier_initializer())

    def wx_b(x, actication=True):
        x = tf.matmul(x, w) + b
        if actication == True:
            return tf.nn.relu((x))
        else:
            return x

    return wx_b
def forward(inputs,
            num_outputs,
            input_dim=None,
            hiddens=[200],
            activation_fn=tf.nn.relu,
            weights_initializer=initializers.xavier_initializer(),
            weights_regularizer=None,
            biases_initializer=init_ops.zeros_initializer(),
            biases_regularizer=None,
            reuse=None,
            scope=None):
    """
  similary as melt.slim.layers.mlp but the first step(from input to first hidden adjusted so input can be sparse)
  """

    assert len(hiddens) >= 1, "must at least contain one hidden layer"

    scope = 'mlp' if scope is None else scope
    with tf.variable_scope(scope):
        outputs = melt.layers.fully_connected(
            inputs,
            num_outputs,
            input_dim=input_dim,
            activation_fn=activation_fn,
            weights_initializer=weights_initializer,
            weights_regularizer=weights_regularizer,
            biases_initializer=biases_initializer,
            biases_regularizer=biases_regularizer,
            reuse=reuse,
            scope='fc_0')

        #--------other hidden layers
        # for i in xrange(len(hiddens) -1):
        #   outputs = slim.fully_connected(outputs, hiddens[i + 1],
        #                          activation_fn=activation_fn,
        #                          weights_initializer=weights_initializer,
        #                          weights_regularizer=weights_regularizer,
        #                          biases_initializer=biases_initializer,
        #                          biases_regularizer=biases_regularizer,
        #                          scope='fc_%d'%i+1)

        slim.stack(outputs,
                   slim.fully_connected,
                   hiddens[1:],
                   activation_fn=activation_fn,
                   weights_initializer=weights_initializer,
                   weights_regularizer=weights_regularizer,
                   biases_initializer=biases_initializer,
                   biases_regularizer=biases_regularizer,
                   scope='fc')

        return slim.linear(outputs,
                           num_outputs,
                           weights_initializer=weights_initializer,
                           weights_regularizer=weights_regularizer,
                           biases_initializer=biases_initializer,
                           biases_regularizer=biases_regularizer,
                           scope='linear')
Example #18
0
def version_1(inputs, is_training):
    with tf.name_scope('version_1'):
        n_filter = 20
        n_hidden = 100
        n_out = 10

        # Reshaping for convolutional operation
        x = tf.reshape(inputs, [-1, 28, 28, 1])

        # Convolutional layer
        net = slim.conv2d(x,
                          n_filter, [5, 5],
                          padding='VALID',
                          activation_fn=tf.nn.sigmoid,
                          scope='conv1')

        # Pooling layer
        net = slim.max_pool2d(net, [2, 2],
                              stride=2,
                              padding='VALID',
                              scope='pool1')

        # Flatten for fully-connected layer
        net = slim.flatten(net, scope='flatten3')

        # 100 sigmoid neurons
        net = slim.fully_connected(
            net,
            n_hidden,
            scope='fc1',
            activation_fn=tf.nn.sigmoid,
            weights_initializer=initializers.xavier_initializer(),
            biases_initializer=init_ops.zeros_initializer())

        # 10 neurons (softmax)
        logits = slim.fully_connected(
            net,
            n_out,
            activation_fn=None,
            scope='fco',
            weights_initializer=initializers.xavier_initializer(),
            biases_initializer=init_ops.zeros_initializer())
        out_layer = tf.nn.softmax(logits)

    return out_layer, logits
Example #19
0
def multi_input_dense_layer(
        inputs,
        units,
        activation=corrnet.activations.leaky_relu,
        use_bias=True,
        kernel_initializer=initializers.xavier_initializer(),
        bias_initializer=init_ops.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        trainable=True,
        name=None,
        reuse=None):
    """Functional interface for the multi input densely-connected layer.
    This layer implements the operation:
    `outputs = activation(inputs1.kernel1 + inputs2.kernel2 + bias)`
    Where `activation` is the activation function passed as the `activation`
    argument (if not `None`), `kernel` is a weights matrix created by the
    layer, and `bias` is a bias vector created by the layer
    (only if `use_bias` is `True`).
    Note: if the `inputs` tensor has a rank greater than 2, then it is
    flattened prior to the initial matrix multiply by `kernel`.
    Arguments:
      inputs: a list of Tensor inputs.
      units: Integer or Long, dimensionality of the output space.
      activation: Activation function (callable). Set it to None to maintain a
        linear activation.
      use_bias: Boolean, whether the layer uses a bias.
      kernel_initializer: Initializer function for the weight matrix.
        If `None` (default), weights are initialized using the default
        initializer used by `tf.get_variable`.
      bias_initializer: Initializer function for the bias.
      kernel_regularizer: Regularizer function for the weight matrix.
      bias_regularizer: Regularizer function for the bias.
      activity_regularizer: Regularizer function for the output.
      trainable: Boolean, if `True` also add variables to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
      name: String, the name of the layer.
      reuse: Boolean, whether to reuse the weights of a previous layer
        by the same name.
    Returns:
      The created layer. Use .apply method to pass inputs
    """
    layer = MultiInputDense(units,
                            activation=activation,
                            use_bias=use_bias,
                            kernel_initializer=kernel_initializer,
                            bias_initializer=bias_initializer,
                            kernel_regularizer=kernel_regularizer,
                            bias_regularizer=bias_regularizer,
                            activity_regularizer=activity_regularizer,
                            trainable=trainable,
                            name=name,
                            dtype=inputs[0].dtype.base_dtype,
                            _scope=name,
                            _reuse=reuse)
    return layer  # .apply(inputs)
Example #20
0
def trans_conv2d(inputs,
                 num_outputs,
                 kernel_size,
                 output_shape,
                 stride=1,
                 padding='SAME',
                 activation_fn=nn_ops.relu,
                 normalizer_fn=None,
                 normalizer_params=None,
                 weights_initializer=initializers.xavier_initializer(),
                 weights_regularizer=None,
                 biases_initializer=init_ops.zeros_initializer(),
                 biases_regularizer=None,
                 reuse=None,
                 variables_collections=None,
                 outputs_collections=None,
                 scope=None):
    """
    trans_convolution with specified output_shape
    """
    if type(stride) in (int, float):
        stride = (stride, stride)
    if type(kernel_size) is int:
        kernel_size = (kernel_size, kernel_size)

    with tf.variable_scope(scope, 'trans_conv2d', reuse=reuse) as sc:
        indim = tensor_shape(inputs)[-1]
        filters = get_variable(name='weights',
                               shape=kernel_size + (num_outputs, indim),
                               init=weights_initializer,
                               reg=weights_regularizer,
                               collections=variables_collections)
        if biases_initializer is not None:
            biases = get_variable(name='biases',
                                  shape=(num_outputs),
                                  init=biases_initializer,
                                  reg=biases_regularizer,
                                  collections=variables_collections)

    outputs = tf.nn.conv2d_transpose(inputs,
                                     filters,
                                     output_shape,
                                     strides=(1) + stride + (1),
                                     padding=padding,
                                     name=scope)

    if biases_initializer is not None:
        outputs = outputs + biases

    if normalizer_fn is not None:
        normalizer_params = normalizer_params or {}
        outputs = normalizer_fn(outputs, **normalizer_params)

    if activation_fn is not None:
        outputs = activation_fn(outputs)

    return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
Example #21
0
    def _create_lstm_policy(self, args):
        raise NotImplementedError
        # Create LSTM portion of network
        lstm = rnn_cell.LSTMCell(args.policy_size,
                                 state_is_tuple=True,
                                 initializer=initializers.xavier_initializer())
        self.policy_lstm = rnn_cell.MultiRNNCell([lstm] *
                                                 args.num_policy_layers,
                                                 state_is_tuple=True)
        self.policy_state = self.policy_lstm.zero_state(
            args.batch_size * args.sample_size, tf.float32)

        # Get samples from standard normal distribution, transform to match z-distribution
        samples = tf.random_normal(
            [args.sample_size, args.batch_size, args.z_dim], name="z_samples")
        self.z_samples = samples * tf.exp(self.z_logstd) + self.z_mean
        self.z_samples = tf.transpose(self.z_samples, perm=[1, 0, 2])

        # Construct policy input
        policy_input = tf.concat(2, [self.states, self.z_samples])
        policy_input = tf.reshape(
            policy_input,
            [args.batch_size * args.sample_size, args.state_dim + args.z_dim],
            name="policy_input")

        # Forward pass
        with tf.variable_scope("policy"):
            output, self.final_policy_state = seq2seq.rnn_decoder(
                [policy_input], self.policy_state, self.policy_lstm)
        output = tf.reshape(tf.concat(1, output), [-1, args.policy_size])

        # Fully connected layer to latent variable distribution parameters
        W = tf.get_variable("lstm_w", [args.policy_size, args.action_dim],
                            initializer=initializers.xavier_initializer())
        b = tf.get_variable("lstm_b", [args.action_dim])
        a_mean = tf.nn.xw_plus_b(output, W, b)
        self.a_mean = tf.reshape(
            a_mean, [args.batch_size, args.sample_size, args.action_dim],
            name="a_mean")

        # Initialize logstd
        self.a_logstd = tf.Variable(np.zeros(args.action_dim),
                                    name="a_logstd",
                                    dtype=tf.float32)
Example #22
0
    def __init__(self,
                 words_id,
                 segs_id,
                 labels,
                 lengths,
                 config,
                 is_train=True):
        self.config = config
        self.is_train = is_train

        self.lr = config.lr
        self.char_dim = config.char_dim
        self.lstm_dim = config.lstm_dim
        self.seg_dim = config.seg_dim

        self.num_tags = config.num_tags
        self.num_chars = config.num_chars
        self.num_segs = 4

        self.initializer = initializers.xavier_initializer()
        self.global_step = tf.Variable(0, trainable=False)
        self.char_inputs = words_id
        self.seg_inputs = segs_id
        self.targets = labels

        self.dropout = config.dropout

        self.lengths = tf.cast(lengths, tf.int32)
        self.batch_size = tf.shape(self.char_inputs)[0]
        self.num_steps = config.max_seq_length

        self.model_type = config.model_type
        self.layers = [{'dilation': 1}, {'dilation': 1}, {'dilation': 2}]
        self.filter_width = 3
        self.num_filter = self.lstm_dim
        self.embedding_dim = self.char_dim + self.seg_dim
        self.repeat_times = 4
        self.cnn_output_width = 0

        embedding = self.embedding_layer(self.char_inputs, self.seg_inputs,
                                         config)

        if self.model_type == "bilstm":
            if self.is_train:
                model_inputs = tf.nn.dropout(embedding, self.dropout)
            model_outputs = self.biLSTM_layer(model_inputs, self.lstm_dim,
                                              self.lengths)
            self.logits = self.project_layer_bilstm(model_outputs)
        elif self.model_type == "idcnn":
            if self.is_train:
                model_inputs = tf.nn.dropout(embedding, self.dropout)

            model_outputs = self.IDCNN_layer(model_inputs)
            self.logits = self.project_layer_idcnn(model_outputs)
        else:
            raise KeyError
Example #23
0
    def _encode(self):
        """
        Employs two Bi-LSTMs to encode passage and question separately
        """
        init = None
        if self.para_init:
            init_w = tf.constant_initializer(self.init1)
            init_b = tf.constant_initializer(self.init1)
        else:
            init_w = initializers.xavier_initializer()
            init_b = tf.zeros_initializer()

        if self.simple_net in [0, 1, 4]:
            with tf.variable_scope('passage_encoding'):
                self.sep_p_encodes = tc.layers.fully_connected(
                    self.p_emb,
                    num_outputs=2 * self.hidden_size,
                    activation_fn=tf.nn.tanh,
                    weights_initializer=init_w,
                    biases_initializer=init_b)
            with tf.variable_scope('question_encoding'):
                self.sep_q_encodes = tc.layers.fully_connected(
                    self.q_emb,
                    num_outputs=2 * self.hidden_size,
                    activation_fn=tf.nn.tanh,
                    weights_initializer=init_w,
                    biases_initializer=init_b)
        if self.simple_net in [2, 3, 5, 7, 8]:
            with tf.variable_scope('passage_encoding'):
                self.sep_p_encodes, self.seq_p_states, self.p_r = rnn(
                    'bi-lstm',
                    self.p_emb,
                    self.p_length,
                    self.hidden_size,
                    self.init1,
                    batch_size=self.batch_size,
                    debug=self.para_init)
            with tf.variable_scope('question_encoding'):
                self.sep_q_encodes, self.seq_q_states, _ = rnn(
                    'bi-lstm',
                    self.q_emb,
                    self.q_length,
                    self.hidden_size,
                    self.init1,
                    batch_size=self.batch_size,
                    debug=self.para_init)
            if self.use_dropout:
                self.sep_p_encodes = tf.nn.dropout(self.sep_p_encodes,
                                                   self.dropout_keep_prob)
                self.sep_q_encodes = tf.nn.dropout(self.sep_q_encodes,
                                                   self.dropout_keep_prob)

        #self.sep_p_encodes *= tf.expand_dims(self.passage_mask, -1)
        #self.sep_q_encodes *= tf.expand_dims(self.question_mask, -1)
        variable_summaries(self.sep_p_encodes)
        variable_summaries(self.sep_q_encodes)
    def __init__(self, config):
        # super parameters
        self.config = config
        self.lr = config['lr']
        self.char_dim = config['char_dim']
        self.seg_dim = config['seg_dim']
        self.lstm_dim = config['lstm_dim']

        self.num_tags = config['num_tags']
        self.num_chars = config['num_chars']
        self.num_segs = 4

        self.global_step = tf.Variable(0, trainable=False)
        self.best_dev_f1 =tf.Variable(0.0, trainable=False)
        self.best_test_f1 = tf.Variable(0.0, trainable=False)
        self.initializer = initializers.xavier_initializer()

        # placeholder
        self.char_input = tf.placeholder(dtype=tf.int32, shape=[None, None], name='CharInputs') # batch_size * LenSentence
        self.seg_input = tf.placeholder(dtype=tf.int32, shape=[None, None], name='SgeInputs')
        self.targets = tf.placeholder(dtype=tf.int32, shape=[None, None], name='Targets')
        self.dropout = tf.placeholder(dtype=tf.float32, name='Dropout')

        # lengths, batch_size, num_steps
        used = tf.sign(tf.abs(self.char_input))
        length = tf.reduce_sum(used, reduction_indices=1)
        self.lengths = tf.cast(length, tf.int32)
        self.batch_size = tf.shape(self.char_input[0])
        self.num_steps = tf.shape(self.char_input[1])

        # Net Structure
        embedding_output = self.embedding_layer(char_inputs=self.char_input, seg_inputs=self.seg_input, config=config)
        embedding_dropout = tf.nn.dropout(embedding_output, self.dropout)
        lstm_output = self.biLSTM_layer(lstm_inputs=embedding_dropout, lstm_dim=self.lstm_dim, lengths=self.lengths)
        self.logits = self.project_layer(lstm_outputs=lstm_output)
        self.loss = self.loss_layer(project_logits=self.logits, lengths=self.lengths)

        # optimizer
        opt_kind = self.config['optimizer']
        if opt_kind == 'sgd':
            self.opt = tf.train.GradientDescentOptimizer(self.lr)
        elif opt_kind == 'adam':
            self.opt = tf.train.AdamOptimizer(self.lr)
        elif opt_kind == 'adgrad':
            self.opt = tf.train.AdagradOptimizer(self.lr)
        else:
            raise KeyError

        # apply grad clip to avoid gradient explosion
        grads_vars = self.opt.compute_gradients(self.loss)
        capped_grads_vars = [[tf.clip_by_value(g, -self.config['clip'], self.config['clip']), v]
                            for g, v in grads_vars]
        self.train_op = self.opt.apply_gradients(capped_grads_vars, self.global_step)

        # model saver
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
Example #25
0
    def __init__(self, config, embeddings):

        self.config = config

        self.lstm_dim = config["lstm_dim"]
        self.num_chars = config["num_chars"]
        self.num_tags = config["num_tags"]
        self.char_dim = config["char_dim"]
        self.lr = config["lr"]


        zero_pad = tf.constant(0.0, dtype=tf.float32, shape=[1, config["char_dim"]])
        self.char_embeding = tf.concat(axis=0, values=[zero_pad, tf.get_variable(name="char_embeding", initializer=embeddings)])

        self.global_step = tf.Variable(0, trainable=False)
        self.initializer = initializers.xavier_initializer()

        self.char_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name="char_inputs")
        self.targets = tf.placeholder(dtype=tf.int32, shape=[None, None], name="targets")
        self.dropout = tf.placeholder(dtype=tf.float32, name="dropout")
        self.lengths = tf.placeholder(dtype=tf.int32, shape=[None, ], name="lengths")


        # self.middle_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="middle_dropout_keep_prob")
        # self.hidden_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="hidden_dropout_keep_prob")

        self.input_dropout_keep_prob = tf.placeholder_with_default(config["input_dropout_keep"], [], name="input_dropout_keep_prob")

        self.batch_size = tf.shape(self.char_inputs)[0]
        self.num_steps = tf.shape(self.char_inputs)[-1]

        # forward
        embedding = self.embedding_layer(self.char_inputs)
        lstm_inputs = tf.nn.dropout(embedding, self.input_dropout_keep_prob)

        ## bi-directional lstm layer
        lstm_outputs = self.bilstm_layer(lstm_inputs)
        ## logits for tags
        self.project_layer(lstm_outputs)
        ## loss of the model
        self.loss = self.loss_layer(self.logits, self.lengths)


        with tf.variable_scope("optimizer"):
            optimizer = self.config["optimizer"]
            if optimizer == "sgd":
                self.opt = tf.train.GradientDescentOptimizer(self.lr)
            elif optimizer == "adam":
                self.opt = tf.train.AdamOptimizer(self.lr)
            elif optimizer == "adgrad":
                self.opt = tf.train.AdagradOptimizer(self.lr)
            else:
                raise KeyError
            grads_vars = self.opt.compute_gradients(self.loss)
            capped_grads_vars = [[tf.clip_by_value(g, -self.config["clip"], self.config["clip"]), v] for g, v in grads_vars]
            self.train_op = self.opt.apply_gradients(capped_grads_vars, self.global_step)
Example #26
0
 def embedding_to_logit(self, embedding, is_training=True, seed=0):
     """Create a graph, transforming embedding vectors to logit classs scores."""
     from tensorflow.contrib.layers.python.layers import initializers
     with tf.variable_scope('net', reuse=is_training):
         return slim.fully_connected(
             embedding,
             self.num_labels,
             activation_fn=None,
             weights_regularizer=slim.l2_regularizer(1e-4),
             weights_initializer=initializers.xavier_initializer(seed=seed))
    def lstm_model(self):
        with tf.variable_scope(name_or_scope='lstm_model', reuse=tf.AUTO_REUSE):
            # 输入
            # 特征数量为25,batch_size,seq_length并不固定
            x = tf.placeholder(shape=(None, None, 25), name='input', dtype=tf.float64)
            print(x.name)
            # 占位符,说明是否正在训练(影响batch_normalization层)
            is_training = tf.placeholder(name='is_training', dtype=tf.bool)

            # lstm构造
            lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.num_units, initializer=tf.orthogonal_initializer(),
                                                    num_proj=48, reuse=tf.AUTO_REUSE,
                                                name='lstm_cell', activation='sigmoid')
            #lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.num_units, initializer=tf.random_normal_initializer(),
            #num_proj = 48, reuse = tf.AUTO_REUSE,
            #name = 'lstm_cell', activation = 'sigmoid')
            c = (tf.nn.dynamic_rnn(cell=lstm_cell, inputs=x, dtype=tf.float64))[0][:, -1, :]

        with tf.variable_scope(name_or_scope='fully_connection', reuse=tf.AUTO_REUSE):
            # 全连接层
            b_1 = tf.get_variable(name='bias_1', dtype=tf.float64, initializer=tf.constant(value=np.zeros((16,)), dtype=tf.float64))
            w_1 = tf.get_variable(name='weight_1', shape=(48, 16), dtype=tf.float64,
                                initializer=initializers.xavier_initializer())

            b_2 = tf.get_variable(name='bias_2', dtype=tf.float64, initializer=tf.constant(value=np.zeros((2,)), dtype=tf.float64))
            w_2 = tf.get_variable(name='weight_2', shape=(16, 2), dtype=tf.float64,
                                  initializer=initializers.xavier_initializer())
            #w = tf.get_variable(name='weight', shape=(32, 2), dtype=tf.float64,
                                 #initializer=tf.random_normal_initializer())

            c2 = tf.matmul(c, w_1) + b_1
            # batch normalization层
            batch_norm = tf.layers.batch_normalization(inputs=tf.matmul(c2, w_2) + b_2, training=is_training, name='batch_norm',
                                                       epsilon=0)
            # 激活函数加softmax输出概率 [P涨,P跌]
            # tanh_output = tf.tanh(batch_norm, 'tanh')
            # output = tf.nn.softmax(tanh_output)
            leaky_relu_output = tf.nn.leaky_relu(features=batch_norm, alpha=0.2, name='relu')
            output = tf.nn.softmax(logits=leaky_relu_output, name='output')


            # 返回模型输出值,和占位符
            return output, x, is_training
    def __init__(self, config):

        # 从参数列表中获取模型参数
        self.config = config
        self.lr = config["lr"]
        self.char_dim = config["char_dim"]  # 字符的词向量维度
        self.lstm_dim = config["lstm_dim"]  # lstm隐层神经元数量
        self.seg_dim = config["seg_dim"]    # 字符的分割特征维度
        self.num_tags = config["num_tags"]  # 标签数量
        self.num_chars = config["num_chars"]    # 字符数量
        self.num_segs = 4   # 分割特征的数量
        # 设置全局变量
        self.global_step = tf.Variable(0, trainable=False)
        self.best_dev_f1 = tf.Variable(0.0, trainable=False)
        self.best_test_f1 = tf.Variable(0.0, trainable=False)
        self.initializer = initializers.xavier_initializer()
        # 设置输入占位符
        self.char_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name="ChatInputs")    # 字符特征,由字符的索引id组成
        self.seg_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name="SegInputs")  # 分割特征,由每个字符的分割特征索引组成
        self.targets = tf.placeholder(dtype=tf.int32, shape=[None, None], name="Targets")   # 真实标签
        self.dropout = tf.placeholder(dtype=tf.float32, name="Dropout")
        # 设置变量
        self.char_lookup = None    # 词向量矩阵,初始化模型的时候,通过预训练词向量进行初始化
        self.seg_lookup = None  # 分割特征向量矩阵
        self.trans = None  # 状态转移矩阵,在loss层中进行计算

        used = tf.sign(tf.abs(self.char_inputs))    # 计算序列中索引非0字符的数量
        length = tf.reduce_sum(used, reduction_indices=1)
        self.lengths = tf.cast(length, tf.int32)    # 记录序列除去padding(索引为0)的真实长度
        self.batch_size = tf.shape(self.char_inputs)[0]
        self.num_steps = tf.shape(self.char_inputs)[-1]  # 序列总长度

        # 构造tensor的传递
        embedding = self.embedding_layer()  # 通过embedding_layer得到字词向量拼接后的特征向量
        lstm_inputs = tf.nn.dropout(embedding, self.dropout)    # dropout层
        lstm_outputs = self.bilstm_layer(lstm_inputs)  # 双向BiLSTM层
        self.logits = self.project_layer(lstm_outputs)  # 进行预测,得到对每个字符是每个标签的概率
        self.loss = self.loss_layer(self.logits)  # 计算loss
        # 设置训练阶段的优化算法
        with tf.variable_scope("optimizer"):
            optimizer = self.config["optimizer"]
            if optimizer == "sgd":
                self.opt = tf.train.GradientDescentOptimizer(self.lr)
            elif optimizer == "adam":
                self.opt = tf.train.AdamOptimizer(self.lr)
            elif optimizer == "adgrad":
                self.opt = tf.train.AdagradOptimizer(self.lr)
            else:
                raise KeyError
            # 设置梯度裁剪(grad clip)以避免梯度爆炸
            grads_vars = self.opt.compute_gradients(self.loss)
            capped_grads_vars = [[tf.clip_by_value(g, -self.config["clip"], self.config["clip"]), v]
                                 for g, v in grads_vars]
            self.train_op = self.opt.apply_gradients(capped_grads_vars, self.global_step)
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)   # 模型保存设置
Example #29
0
    def __init__(self, config, embeddings):

        self.config = config

        self.lstm_dim = config["lstm_dim"]
        self.num_chars = config["num_chars"]
        self.num_tags = config["num_tags"]
        self.char_dim = config["char_dim"]
        self.lr = config["lr"]


        self.char_embeding = tf.get_variable(name="char_embeding", initializer=embeddings)

        self.global_step = tf.Variable(0, trainable=False)
        self.initializer = initializers.xavier_initializer()

        self.char_inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name="char_inputs")
        self.targets = tf.placeholder(dtype=tf.int32, shape=[None, None], name="targets")
        self.dropout = tf.placeholder(dtype=tf.float32, name="dropout")
        self.lengths = tf.placeholder(dtype=tf.int32, shape=[None, ], name="lengths")


        # self.middle_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="middle_dropout_keep_prob")
        # self.hidden_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="hidden_dropout_keep_prob")

        self.input_dropout_keep_prob = tf.placeholder_with_default(config["input_dropout_keep"], [], name="input_dropout_keep_prob")

        self.batch_size = tf.shape(self.char_inputs)[0]
        self.num_steps = tf.shape(self.char_inputs)[-1]

        # forward
        embedding = self.embedding_layer(self.char_inputs)
        lstm_inputs = tf.nn.dropout(embedding, self.input_dropout_keep_prob)

        ## bi-directional lstm layer
        lstm_outputs = self.bilstm_layer(lstm_inputs)
        ## logits for tags
        self.project_layer(lstm_outputs)
        ## loss of the model
        self.loss = self.loss_layer(self.logits, self.lengths)


        with tf.variable_scope("optimizer"):
            optimizer = self.config["optimizer"]
            if optimizer == "sgd":
                self.opt = tf.train.GradientDescentOptimizer(self.lr)
            elif optimizer == "adam":
                self.opt = tf.train.AdamOptimizer(self.lr)
            elif optimizer == "adgrad":
                self.opt = tf.train.AdagradOptimizer(self.lr)
            else:
                raise KeyError
            grads_vars = self.opt.compute_gradients(self.loss)
            capped_grads_vars = [[tf.clip_by_value(g, -self.config["clip"], self.config["clip"]), v] for g, v in grads_vars]
            self.train_op = self.opt.apply_gradients(capped_grads_vars, self.global_step)
Example #30
0
    def __init__(self):
        self.learning_rate = ner_tv.initial_learning_rate
        self.num_hidden = ner_tv.hidden_neural_size  #lstm隐层个数
        self.embedding_size = ner_tv.embedding_dim
        self.num_tags = ner_tv.tags_num
        self.max_grad_norm = ner_tv.max_grad_norm
        self.max_sentence_len = ner_tv.sentence_length
        self.w2v_model_path = ner_tv.word2vec_path
        self.model_save_path = ner_tv.train_model_bi_lstm
        self.train_epoch = ner_tv.num_epochs
        self.dropout_train = ner_tv.dropout
        self.decay_step = ner_tv.decay_step
        self.decay_rate = ner_tv.decay_rate
        self.min_learning_rate = ner_tv.min_learning_rate
        self.initializer = initializers.xavier_initializer()

        self.inputs = tf.placeholder(dtype=tf.int32,
                                     shape=[None, self.max_sentence_len],
                                     name="inputs")
        self.labels = tf.placeholder(dtype=tf.int32,
                                     shape=[None, self.max_sentence_len],
                                     name='labels')
        self.dropout = tf.placeholder(dtype=tf.float32, name='dropout')

        with tf.variable_scope("word2vec_embedding"):
            self.embedding_vec = tf.Variable(change_gensim_mode2array(),
                                             name='word2vec',
                                             dtype=tf.float32)
            inputs_embedding = tf.nn.embedding_lookup(self.embedding_vec,
                                                      self.inputs)
            lengths = self.get_length(self.inputs)
            self.lengths = tf.cast(lengths, tf.int32)
        lstm_outputs = self.biLSTM_layer(inputs_embedding, self.lengths)

        self.logits = self.project_layer(lstm_outputs)

        self.loss = self.loss_layer(self.logits, self.lengths)

        self.global_step = tf.Variable(0, trainable=False, name="global_step")

        self.train_learning_rate = tf.maximum(
            tf.train.exponential_decay(self.learning_rate,
                                       self.global_step,
                                       self.decay_step,
                                       self.decay_rate,
                                       staircase=True), self.min_learning_rate)

        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.train_learning_rate)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          self.max_grad_norm)
        self.train_op = self.optimizer.apply_gradients(
            zip(grads, tvars), global_step=self.global_step)
        self.saver = tf.train.Saver(max_to_keep=3)
Example #31
0
    def _build_q_net(self, state, n_actions, variable_scope, reuse=False):
        with tf.variable_scope(variable_scope, reuse=reuse):
            user_id_embedding_table = tf.get_variable(
                name="user_id",
                shape=[self.user_num, 10],
                initializer=initializers.xavier_initializer(),
                trainable=True,
                dtype=tf.float32)
            user_id = tf.cast(state[:, 0], dtype=tf.int32)
            user_id_embeddings = tf.nn.embedding_lookup(
                user_id_embedding_table, ids=user_id, name="user_id_embedding")
            state = tf.concat([user_id_embeddings, state[:, 1:]], axis=1)

            n_features = state.get_shape()[1]

            fc1 = tf.layers.dense(
                state,
                units=n_features,
                activation=tf.nn.relu,
                name='fc1',
                kernel_initializer=initializers.xavier_initializer())

            fc2 = tf.layers.dense(
                fc1,
                units=n_features // 2,
                activation=tf.nn.relu,
                name='fc2',
                kernel_initializer=initializers.xavier_initializer())

            fc3 = tf.layers.dense(
                fc2,
                units=n_features // 2,
                activation=tf.nn.relu,
                name='fc3',
                kernel_initializer=initializers.xavier_initializer())
            q_out = tf.maximum(
                tf.layers.dense(
                    fc3,
                    units=n_actions,
                    name='q',
                    kernel_initializer=initializers.xavier_initializer()), 0)
            return q_out
Example #32
0
    def _create_mlp_policy(self, args):
        # Get samples from standard normal distribution, transform to match z-distribution
        samples = tf.random_normal(
            [args.sample_size, args.batch_size, args.z_dim], name="z_samples")
        self.z_samples = samples * tf.exp(self.z_logstd) + self.z_mean
        self.z_samples = tf.transpose(self.z_samples, perm=[1, 0, 2])

        # Construct encoder input
        enc_in = tf.concat(2, [self.states, self.z_samples])
        enc_in = tf.reshape(
            enc_in,
            [args.batch_size * args.sample_size, args.state_dim + args.z_dim],
            name="enc_in")

        # Create fully connected network of desired size
        W = tf.get_variable("w_0",
                            [args.state_dim + args.z_dim, args.policy_size],
                            initializer=initializers.xavier_initializer())
        b = tf.get_variable("b_0", [args.policy_size])
        output = tf.nn.dropout(tf.nn.relu(tf.nn.xw_plus_b(enc_in, W, b)),
                               args.dropout_level)

        for i in xrange(1, args.num_policy_layers):
            W = tf.get_variable("w_" + str(i),
                                [args.policy_size, args.policy_size],
                                initializer=initializers.xavier_initializer())
            b = tf.get_variable("b_" + str(i), [args.policy_size])
            output = tf.nn.dropout(tf.nn.relu(tf.nn.xw_plus_b(output, W, b)),
                                   args.dropout_level)

        W = tf.get_variable("w_end", [args.policy_size, args.action_dim],
                            initializer=initializers.xavier_initializer())
        b = tf.get_variable("b_end", [args.action_dim])
        a_mean = tf.nn.xw_plus_b(output, W, b)
        self.a_mean = tf.reshape(
            a_mean, [args.batch_size, args.sample_size, args.action_dim],
            name="a_mean")

        # Initialize logstd
        self.a_logstd = tf.Variable(np.zeros(args.action_dim),
                                    name="a_logstd",
                                    dtype=tf.float32)
Example #33
0
 def project_layer(inputs, out_dim, seq_length, scope='project'):
     with tf.variable_scope(scope):
         in_dim = inputs.get_shape().as_list()[-1]
         weight = tf.get_variable('W', shape=[in_dim, out_dim],
                                  dtype=tf.float32, initializer=initializers.xavier_initializer())
         bias = tf.get_variable('b', shape=[out_dim], dtype=tf.float32,
                                 initializer=tf.zeros_initializer())
         t_output = tf.reshape(inputs, [-1, in_dim])            # (batch_size*seq_length, in_dim)
         output = tf.matmul(t_output, weight) + bias            # (batch_size*seq_length, out_dim)
         output = tf.reshape(output, [-1, seq_length, out_dim]) # (batch_size, seq_length, out_dim)
         return output
Example #34
0
    def _create_tf_embed_nn(self, x_in, is_training, layer_sizes, name):
        # type: (tf.Tensor, tf.Tensor, List[int], Text) -> tf.Tensor
        """Create nn with hidden layers and name"""
        reg = tf.contrib.layers.l2_regularizer(self.C2)
        x = x_in
        for i, layer_size in enumerate(layer_sizes):
            x = tf.layers.dense(inputs=x,
                                kernel_initializer=initializers.xavier_initializer(),
                                units=layer_size,
                                activation=tf.nn.relu,
                                kernel_regularizer=reg,
                                name='hidden_layer_{}_{}'.format(name, i))
            x = tf.layers.dropout(x, rate=self.droprate, training=is_training)

        x = tf.layers.dense(inputs=x,
                            kernel_initializer=initializers.xavier_initializer(),
                            units=self.embed_dim,
                            kernel_regularizer=reg,
                            name='embed_layer_{}'.format(name))
        return x
  def test_conv_layer(self):
    g = ops.Graph()
    with g.as_default():
      inputs = array_ops.placeholder(dtypes.float32, shape=[8, 5, 5, 3])

    with contrib_ops.arg_scope(
        [layers.batch_norm], fused=True, is_training=True, trainable=True):
      return layers.convolution(
          inputs,
          num_outputs=16,
          kernel_size=3,
          stride=1,
          padding='VALID',
          activation_fn=nn_ops.relu,
          normalizer_fn=layers.batch_norm,
          normalizer_params={},
          weights_initializer=initializers.xavier_initializer(),
          weights_regularizer=None,
          biases_initializer=init_ops.zeros_initializer(),
          biases_regularizer=None,
          reuse=None,
          trainable=True,
          scope=None)

    inputs_pattern = graph_matcher.OpTypePattern('*', name='inputs')
    relu_pattern = graph_matcher.OpTypePattern(
        'Relu',
        name='relu',
        inputs=[
            graph_matcher.OpTypePattern(
                'FusedBatchNorm',
                inputs=[
                    graph_matcher.OpTypePattern(
                        'Conv2D', inputs=[inputs_pattern, '*']), '*', '*', '*',
                    '*'
                ])
        ])
    matcher = graph_matcher.GraphMatcher(relu_pattern)
    match_results = list(matcher.match_graph(g))
    self.assertEqual(1, len(match_results))
    match_result = match_results[0]
    self.assertEqual(match_result.get_tensor(inputs_pattern), inputs)
    self.assertEqual(match_result.get_tensor('inputs'), inputs)
Example #36
0
def _get_weights_wrapper(
  name, shape, dtype=tf.float32, initializer=initializers.xavier_initializer(),
  weights_decay_factor=None
):
  """Wrapper over _get_variable_wrapper() to get weights, with weights decay factor in loss.
  """

  weights = _get_variable_wrapper(
    name=name, shape=shape, dtype=dtype, initializer=initializer
  )

  if weights_decay_factor is not None and weights_decay_factor > 0.0:

    weights_wd = tf.multiply(
      tf.nn.l2_loss(weights), weights_decay_factor, name=name + '/l2loss'
    )

    tf.add_to_collection('losses', weights_wd)

  return weights
Example #37
0
	def __init__(self, sess, dataset, conf, num_train=50000, input_size=3072, test_filename='/data2/andrewliao11/cifar-10-batches-py/test_batch'):

		self.sess = sess
		self.test_filename = test_filename
		self.w = 32
		self.h = 32
		self.channel = 3
		self.synthetic = conf.synthetic
		self.optim_type = conf.optim_type
		self.test_per_iter = conf.test_per_iter
		self.max_step = conf.max_step
		self.ckpt_dir = conf.checkpoint_dir
		self.batch_size = conf.batch_size
		self.num_train = num_train
		self.max_epoch = math.floor(conf.max_step/math.floor(self.num_train/self.batch_size))
		self.input_dims = input_size
		self.hidden_size = conf.hidden_size
		self.weight_initializer = initializers.xavier_initializer()
		self.bias_initializer = tf.constant_initializer(0.1)
		self.output_size = conf.output_size
		self.max_to_keep = conf.max_to_keep
		self.dataset = dataset
		self.var = {}
		self.grad_output = {}
		self.synthetic_grad = {}
		self.layer_out = {}
		self.grad_loss = []

		self.global_step = tf.get_variable('global_step', [],initializer=tf.constant_initializer(0), trainable=False)
		if self.optim_type == 'exp_decay':
			decay_steps = int(math.floor(self.num_train/self.batch_size)* conf.num_epoch_per_decay)
			self.lr = tf.train.exponential_decay(conf.init_lr,
							self.global_step, decay_steps,
							conf.decay_factor,
							staircase=True)
			self.optim = tf.train.GradientDescentOptimizer(self.lr)
		elif self.optim_type == 'adam':
			self.optim = tf.train.AdamOptimizer(conf.init_lr)	
Example #38
0
def linear(input_,
           output_size,
           weights_initializer=initializers.xavier_initializer(),
           biases_initializer=tf.zeros_initializer,
           activation_fn=None,
           trainable=True,
           name='linear'):
  shape = input_.get_shape().as_list()

  if len(shape) > 2:
    input_ = tf.reshape(input_, [-1, reduce(lambda x, y: x * y, shape[1:])])
    shape = input_.get_shape().as_list()

  with tf.variable_scope(name):
    w = tf.get_variable('w', [shape[1], output_size], tf.float32,
        initializer=weights_initializer, trainable=trainable)
    b = tf.get_variable('b', [output_size],
        initializer=biases_initializer, trainable=trainable)
    out = tf.nn.bias_add(tf.matmul(input_, w), b)

    if activation_fn != None:
      return activation_fn(out), w, b
    else:
      return out, w, b
Example #39
0
def class_capsules(inputs, num_classes, iterations, batch_size, name):
    """
    :param inputs: ((24, 4, 4, 32, 4, 4), (24, 4, 4, 32))
    :param num_classes: 10
    :param iterations: 3
    :param batch_size: 24
    :param name:
    :return poses, activations: poses (24, 10, 4, 4), activation (24, 10).
    """

    inputs_poses, inputs_activations = inputs # (24, 4, 4, 32, 4, 4), (24, 4, 4, 32)

    inputs_shape = inputs_poses.get_shape()
    spatial_size = int(inputs_shape[1])  # 4
    pose_size = int(inputs_shape[-1])    # 4
    i_size = int(inputs_shape[3])        # 32

    # inputs_poses (24*4*4=384, 32, 16)
    inputs_poses = tf.reshape(inputs_poses, shape=[batch_size*spatial_size*spatial_size, inputs_shape[-3], inputs_shape[-2]*inputs_shape[-2] ])

    with tf.variable_scope(name) as scope:
        with tf.variable_scope('votes') as scope:
            # inputs_poses (384, 32, 16)
            # votes: (384, 32, 10, 16)
            votes = mat_transform(inputs_poses, num_classes, size=batch_size*spatial_size*spatial_size)
            tf.logging.info(f"{name} votes shape: {votes.get_shape()}")

            # votes (24, 4, 4, 32, 10, 16)
            votes = tf.reshape(votes, shape=[batch_size, spatial_size, spatial_size, i_size, num_classes, pose_size*pose_size])

            # (24, 4, 4, 32, 10, 16)
            votes = coord_addition(votes, spatial_size, spatial_size)

            tf.logging.info(f"{name} votes shape with coord addition: {votes.get_shape()}")

        with tf.variable_scope('routing') as scope:
            # beta_v and beta_a one for each output capsule: (1, 10)
            beta_v = tf.get_variable(
                name='beta_v', shape=[1, num_classes], dtype=tf.float32,
                initializer=initializers.xavier_initializer()
            )
            beta_a = tf.get_variable(
                name='beta_a', shape=[1, num_classes], dtype=tf.float32,
                initializer=initializers.xavier_initializer()
            )

            # votes (24, 4, 4, 32, 10, 16) -> (24, 512, 10, 16)
            votes_shape = votes.get_shape()
            votes = tf.reshape(votes, shape=[batch_size, votes_shape[1] * votes_shape[2] * votes_shape[3], votes_shape[4], votes_shape[5]] )

            # inputs_activations (24, 4, 4, 32) -> (24, 512)
            inputs_activations = tf.reshape(inputs_activations, shape=[batch_size,
                                                                       votes_shape[1] * votes_shape[2] * votes_shape[3]])

            # votes (24, 512, 10, 16), inputs_activations (24, 512)
            # poses (24, 10, 16), activation (24, 10)
            poses, activations = matrix_capsules_em_routing(
                votes, inputs_activations, beta_v, beta_a, iterations, name='em_routing'
            )

        # poses (24, 10, 16) -> (24, 10, 4, 4)
        poses = tf.reshape(poses, shape=[batch_size, num_classes, pose_size, pose_size] )

        # poses (24, 10, 4, 4), activation (24, 10)
        return poses, activations
def legacy_fully_connected(x,
                           num_output_units,
                           activation_fn=None,
                           weight_init=initializers.xavier_initializer(),
                           bias_init=init_ops.zeros_initializer,
                           name=None,
                           weight_collections=(ops.GraphKeys.WEIGHTS,),
                           bias_collections=(ops.GraphKeys.BIASES,),
                           output_collections=(ops.GraphKeys.ACTIVATIONS,),
                           trainable=True,
                           weight_regularizer=None,
                           bias_regularizer=None):
  # pylint: disable=anomalous-backslash-in-string
  r"""Adds the parameters for a fully connected layer and returns the output.
  A fully connected layer is generally defined as a matrix multiply:
  `y = f(w * x + b)` where `f` is given by `activation_fn`. If
  `activation_fn` is `None`, the result of `y = w * x + b` is
  returned.
  If `x` has shape [\\\(\\text{dim}_0, \\text{dim}_1, ..., \\text{dim}_n\\\)]
  with more than 2 dimensions (\\\(n > 1\\\)), then we repeat the matrix
  multiply along the first dimensions. The result r is a tensor of shape
  [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`],
  where \\\( r_{i_0, ..., i_{n-1}, k} =
  \\sum_{0 \\leq j < \\text{dim}_n} x_{i_0, ... i_{n-1}, j} \cdot w_{j, k}\\\).
  This is accomplished by reshaping `x` to 2-D
  [\\\(\\text{dim}_0 \\cdot ... \\cdot \\text{dim}_{n-1}, \\text{dim}_n\\\)]
  before the matrix multiply and afterwards reshaping it to
  [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`].
  This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting
  `bias_init` to `None`.
  The variable creation is compatible with `tf.variable_scope` and so can be
  reused with `tf.variable_scope` or `tf.make_template`.
  Most of the details of variable creation can be controlled by specifying the
  initializers (`weight_init` and `bias_init`) and in which collections to place
  the created variables (`weight_collections` and `bias_collections`; note that
  the variables are always added to the `VARIABLES` collection). The output of
  the layer can be placed in custom collections using `output_collections`.
  The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`,
  respectively.
  A per layer regularization can be specified by setting `weight_regularizer`
  and `bias_regularizer`, which are applied to the weights and biases
  respectively, and whose output is added to the `REGULARIZATION_LOSSES`
  collection.
  Args:
    x: The input `Tensor`.
    num_output_units: The size of the output.
    activation_fn: A function that requires a single Tensor that is applied as a
      non-linearity. If None is used, do not apply any activation.
    weight_init: An optional weight initialization, defaults to
      `xavier_initializer`.
    bias_init: An initializer for the bias, defaults to 0. Set to `None` in
      order to disable bias.
    name: The name for this operation is used to name operations and to find
      variables. If specified it must be unique for this scope, otherwise a
      unique name starting with "fully_connected" will be created.  See
      `tf.variable_op_scope` for details.
    weight_collections: List of graph collections to which weights are added.
    bias_collections: List of graph collections to which biases are added.
    output_collections: List of graph collections to which outputs are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    weight_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for weights.
    bias_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for biases.
  Returns:
    The output of the fully connected layer.
  Raises:
    ValueError: if x has rank less than 2 or if its last dimension is not set.
  """
  with variable_scope.variable_op_scope([x], name, 'fully_connected'):
    dims = x.get_shape().dims
    if dims is None:
      raise ValueError('dims of x must be known but is None')
    if len(dims) < 2:
      raise ValueError('rank of x must be at least 2 not: %d' % len(dims))
    num_input_units = dims[-1].value
    if num_input_units is None:
      raise ValueError('last dimension of x must be known but is None')
    dtype = x.dtype.base_dtype

    weight_collections = set(list(weight_collections or []) +
                             [ops.GraphKeys.VARIABLES])
    w = variable_scope.get_variable('weights',
                                    shape=[num_input_units, num_output_units],
                                    dtype=dtype,
                                    initializer=weight_init,
                                    collections=weight_collections,
                                    regularizer=weight_regularizer,
                                    trainable=trainable)
    x_2_dim = x if len(dims) <= 2 else array_ops.reshape(x,
                                                         [-1, num_input_units])
    y = standard_ops.matmul(x_2_dim, w)

    if bias_init is not None:
      bias_collections = set(list(bias_collections or []) +
                             [ops.GraphKeys.VARIABLES])
      b = variable_scope.get_variable('bias',
                                      shape=[num_output_units],
                                      dtype=dtype,
                                      initializer=bias_init,
                                      collections=bias_collections,
                                      regularizer=bias_regularizer,
                                      trainable=trainable)

      y = nn.bias_add(y, b)

    if len(dims) > 2:
      out_shape = array_ops.unpack(array_ops.shape(x))
      out_shape[-1] = num_output_units

      y = array_ops.reshape(y, array_ops.pack(out_shape))

      static_shape = x.get_shape().as_list()
      static_shape[-1] = num_output_units
      y.set_shape(static_shape)

    return _apply_activation(y, activation_fn, output_collections)
def fully_connected(inputs,
                    num_outputs,
                    activation_fn=nn.relu,
                    normalizer_fn=None,
                    normalizer_params=None,
                    weights_initializer=initializers.xavier_initializer(),
                    weights_regularizer=None,
                    biases_initializer=init_ops.zeros_initializer,
                    biases_regularizer=None,
                    reuse=None,
                    variables_collections=None,
                    outputs_collections=None,
                    trainable=True,
                    scope=None):
  """Adds a fully connected layer.
  `fully_connected` creates a variable called `weights`, representing a fully
  connected weight matrix, which is multiplied by the `inputs` to produce a
  `Tensor` of hidden units. If a `normalizer_fn` is provided (such as
  `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
  None and a `biases_initializer` is provided then a `biases` variable would be
  created and added the hidden units. Finally, if `activation_fn` is not `None`,
  it is applied to the hidden units as well.
  Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened
  prior to the initial matrix multiply by `weights`.
  Args:
    inputs: A tensor of with at least rank 2 and value for the last dimension,
      i.e. `[batch_size, depth]`, `[None, None, None, channels]`.
    num_outputs: Integer, the number of output units in the layer.
    activation_fn: activation function.
    normalizer_fn: normalization function to use instead of `biases`. If
      `normalize_fn` is provided then `biases_initializer` and
      `biases_regularizer` are ignored and `biases` are not created nor added.
    normalizer_params: normalization function parameters.
    weights_initializer: An initializer for the weights.
    weights_regularizer: Optional regularizer for the weights.
    biases_initializer: An initializer for the biases. If None skip biases.
    biases_regularizer: Optional regularizer for the biases.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for all the variables or
      a dictionary containing a different list of collections per variable.
    outputs_collections: collection to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_op_scope.
  Returns:
     the tensor variable representing the result of the series of operations.
  Raises:
    ValueError: if x has rank less than 2 or if its last dimension is not set.
  """
  if not isinstance(num_outputs, int):
    raise ValueError('num_outputs should be integer, got %s.', num_outputs)
  with variable_scope.variable_op_scope([inputs],
                                        scope,
                                        'fully_connected',
                                        reuse=reuse) as sc:
    dtype = inputs.dtype.base_dtype
    num_input_units = utils.last_dimension(inputs.get_shape(), min_rank=2)

    static_shape = inputs.get_shape().as_list()
    static_shape[-1] = num_outputs

    out_shape = array_ops.unpack(array_ops.shape(inputs))
    out_shape[-1] = num_outputs

    weights_shape = [num_input_units, num_outputs]
    weights_collections = utils.get_variable_collections(
        variables_collections, 'weights')
    weights = variables.model_variable('weights',
                                       shape=weights_shape,
                                       dtype=dtype,
                                       initializer=weights_initializer,
                                       regularizer=weights_regularizer,
                                       collections=weights_collections,
                                       trainable=trainable)
    if len(static_shape) > 2:
      # Reshape inputs
      inputs = array_ops.reshape(inputs, [-1, num_input_units])
    outputs = standard_ops.matmul(inputs, weights)
    if normalizer_fn:
      normalizer_params = normalizer_params or {}
      outputs = normalizer_fn(outputs, **normalizer_params)
    else:
      if biases_initializer is not None:
        biases_collections = utils.get_variable_collections(
            variables_collections, 'biases')
        biases = variables.model_variable('biases',
                                          shape=[num_outputs,],
                                          dtype=dtype,
                                          initializer=biases_initializer,
                                          regularizer=biases_regularizer,
                                          collections=biases_collections,
                                          trainable=trainable)
        outputs = nn.bias_add(outputs, biases)
    if len(static_shape) > 2:
      # Reshape back outputs
      outputs = array_ops.reshape(outputs, array_ops.pack(out_shape))
      outputs.set_shape(static_shape)
    if activation_fn:
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def convolution2d(inputs,
                  num_outputs,
                  kernel_size,
                  stride=1,
                  padding='SAME',
                  activation_fn=nn.relu,
                  normalizer_fn=None,
                  normalizer_params=None,
                  weights_initializer=initializers.xavier_initializer(),
                  weights_regularizer=None,
                  biases_initializer=init_ops.zeros_initializer,
                  biases_regularizer=None,
                  reuse=None,
                  variables_collections=None,
                  outputs_collections=None,
                  trainable=True,
                  scope=None):
  """Adds a 2D convolution followed by an optional batch_norm layer.
  `convolution2d` creates a variable called `weights`, representing the
  convolutional kernel, that is convolved with the `inputs` to produce a
  `Tensor` of activations. If a `normalizer_fn` is provided (such as
  `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
  None and a `biases_initializer` is provided then a `biases` variable would be
  created and added the activations. Finally, if `activation_fn` is not `None`,
  it is applied to the activations as well.
  Args:
    inputs: a 4-D tensor  `[batch_size, height, width, channels]`.
    num_outputs: integer, the number of output filters.
    kernel_size: a list of length 2 `[kernel_height, kernel_width]` of
      of the filters. Can be an int if both values are the same.
    stride: a list of length 2 `[stride_height, stride_width]`.
      Can be an int if both strides are the same. Note that presently
      both strides must have the same value.
    padding: one of `VALID` or `SAME`.
    activation_fn: activation function.
    normalizer_fn: normalization function to use instead of `biases`. If
      `normalize_fn` is provided then `biases_initializer` and
      `biases_regularizer` are ignored and `biases` are not created nor added.
    normalizer_params: normalization function parameters.
    weights_initializer: An initializer for the weights.
    weights_regularizer: Optional regularizer for the weights.
    biases_initializer: An initializer for the biases. If None skip biases.
    biases_regularizer: Optional regularizer for the biases.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: optional list of collections for all the variables or
      a dictionay containing a different list of collection per variable.
    outputs_collections: collection to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for `variable_op_scope`.
  Returns:
    a tensor representing the output of the operation.
  """
  with variable_scope.variable_op_scope([inputs],
                                        scope, 'Conv', reuse=reuse) as sc:
    dtype = inputs.dtype.base_dtype
    kernel_h, kernel_w = utils.two_element_tuple(kernel_size)
    stride_h, stride_w = utils.two_element_tuple(stride)
    num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4)
    weights_shape = [kernel_h, kernel_w,
                     num_filters_in, num_outputs]
    weights_collections = utils.get_variable_collections(
        variables_collections, 'weights')
    weights = variables.model_variable('weights',
                                       shape=weights_shape,
                                       dtype=dtype,
                                       initializer=weights_initializer,
                                       regularizer=weights_regularizer,
                                       collections=weights_collections,
                                       trainable=trainable)
    outputs = nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1],
                        padding=padding)
    if normalizer_fn:
      normalizer_params = normalizer_params or {}
      outputs = normalizer_fn(outputs, **normalizer_params)
    else:
      if biases_initializer is not None:
        biases_collections = utils.get_variable_collections(
            variables_collections, 'biases')
        biases = variables.model_variable('biases',
                                          shape=[num_outputs,],
                                          dtype=dtype,
                                          initializer=biases_initializer,
                                          regularizer=biases_regularizer,
                                          collections=biases_collections,
                                          trainable=trainable)
        outputs = nn.bias_add(outputs, biases)
    if activation_fn:
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
Example #43
0
def separable_convolution2d(
        inputs,
        num_outputs,
        kernel_size,
        depth_multiplier,
        stride=1,
        padding='SAME',
        rate=1,
        activation_fn=tf.nn.relu,
        normalizer_fn=None,
        normalizer_params=None,
        weights_initializer=initializers.xavier_initializer(),
        weights_regularizer=None,
        biases_initializer=init_ops.zeros_initializer(),
        biases_regularizer=None,
        reuse=None,
        variables_collections=None,
        outputs_collections=None,
        trainable=True,
        data_format='NHWC',
        scope=None):
    """Adds a depth-separable 2D convolution with optional batch_norm layer.
    This op first performs a depthwise convolution that acts separately on
    channels, creating a variable called `depthwise_weights`. If `num_outputs`
    is not None, it adds a pointwise convolution that mixes channels, creating a
    variable called `pointwise_weights`. Then, if `batch_norm_params` is None,
    it adds bias to the result, creating a variable called 'biases', otherwise
    it adds a batch normalization layer. It finally applies an activation function
    to produce the end result.
    Args:
        inputs: A tensor of size [batch_size, height, width, channels].
        num_outputs: The number of pointwise convolution output filters. If is
            None, then we skip the pointwise convolution stage.
        kernel_size: A list of length 2: [kernel_height, kernel_width] of
            of the filters. Can be an int if both values are the same.
        depth_multiplier: The number of depthwise convolution output channels for
            each input channel. The total number of depthwise convolution output
            channels will be equal to `num_filters_in * depth_multiplier`.
        stride: A list of length 2: [stride_height, stride_width], specifying the
            depthwise convolution stride. Can be an int if both strides are the same.
        padding: One of 'VALID' or 'SAME'.
        rate: A list of length 2: [rate_height, rate_width], specifying the dilation
            rates for a'trous convolution. Can be an int if both rates are the same.
            If any value is larger than one, then both stride values need to be one.
        activation_fn: Activation function. The default value is a ReLU function.
            Explicitly set it to None to skip it and maintain a linear activation.
        normalizer_fn: Normalization function to use instead of `biases`. If
            `normalizer_fn` is provided then `biases_initializer` and
            `biases_regularizer` are ignored and `biases` are not created nor added.
            default set to None for no normalizer function
        normalizer_params: Normalization function parameters.
        weights_initializer: An initializer for the weights.
        weights_regularizer: Optional regularizer for the weights.
        biases_initializer: An initializer for the biases. If None skip biases.
        biases_regularizer: Optional regularizer for the biases.
        reuse: Whether or not the layer and its variables should be reused. To be
            able to reuse the layer scope must be given.
        variables_collections: Optional list of collections for all the variables or
            a dictionary containing a different list of collection per variable.
        outputs_collections: Collection to add the outputs.
        trainable: Whether or not the variables should be trainable or not.
        scope: Optional scope for variable_scope.
    Returns:
        A `Tensor` representing the output of the operation.
    """
    layer_variable_getter = _build_variable_getter(
        {'bias': 'biases',
         'depthwise_kernel': 'depthwise_weights',
         'pointwise_kernel': 'pointwise_weights'})

    with variable_scope.variable_scope(
            scope, 'SeparableConv2d', [inputs], reuse=reuse,
            custom_getter=layer_variable_getter) as sc:
        inputs = ops.convert_to_tensor(inputs)

        if num_outputs is not None:
            channel_format = 'channels_last' if data_format == 'NHWC' else 'channels_first'
            # Apply separable conv using the SeparableConvolution2D layer.
            layer = convolutional_layers.SeparableConvolution2D(
                filters=num_outputs,
                kernel_size=kernel_size,
                strides=stride,
                padding=padding,
                data_format=channel_format,
                dilation_rate=utils.two_element_tuple(rate),
                activation=None,
                depth_multiplier=depth_multiplier,
                use_bias=not normalizer_fn and biases_initializer,
                depthwise_initializer=weights_initializer,
                pointwise_initializer=weights_initializer,
                bias_initializer=biases_initializer,
                depthwise_regularizer=weights_regularizer,
                pointwise_regularizer=weights_regularizer,
                bias_regularizer=biases_regularizer,
                activity_regularizer=None,
                trainable=trainable,
                name=sc.name,
                dtype=inputs.dtype.base_dtype,
                _scope=sc,
                _reuse=reuse)
            outputs = layer.apply(inputs)

            # Add variables to collections.
            _add_variable_to_collections(layer.depthwise_kernel,
                                         variables_collections, 'weights')
            _add_variable_to_collections(layer.pointwise_kernel,
                                         variables_collections, 'weights')
            if layer.bias:
                _add_variable_to_collections(layer.bias,
                                             variables_collections, 'biases')

            if normalizer_fn is not None:
                normalizer_params = normalizer_params or {}
                outputs = normalizer_fn(outputs, **normalizer_params)
        else:
            outputs = depthwise_convolution2d(
                inputs,
                kernel_size,
                depth_multiplier,
                stride,
                padding,
                rate,
                activation_fn,
                normalizer_fn,
                normalizer_params,
                weights_initializer,
                weights_regularizer,
                biases_initializer,
                biases_regularizer,
                reuse,
                variables_collections,
                outputs_collections,
                trainable,
                data_format,
                scope=None)
            return outputs

        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
Example #44
0
    def __init__(self, config):

        self.config = config
        self.lr = config["lr"]
        self.char_dim = config["char_dim"]
        self.lstm_dim = config["lstm_dim"]
        self.seg_dim = config["seg_dim"]

        self.num_tags = config["num_tags"]
        self.num_chars = config["num_chars"]
        self.num_segs = 4

        self.global_step = tf.Variable(0, trainable=False)
        self.best_dev_f1 = tf.Variable(0.0, trainable=False)
        self.best_test_f1 = tf.Variable(0.0, trainable=False)
        self.initializer = initializers.xavier_initializer()

        # add placeholders for the model

        self.char_inputs = tf.placeholder(dtype=tf.int32,
                                          shape=[None, None],
                                          name="ChatInputs")
        self.seg_inputs = tf.placeholder(dtype=tf.int32,
                                         shape=[None, None],
                                         name="SegInputs")

        self.targets = tf.placeholder(dtype=tf.int32,
                                      shape=[None, None],
                                      name="Targets")
        # dropout keep prob
        self.dropout = tf.placeholder(dtype=tf.float32,
                                      name="Dropout")

        used = tf.sign(tf.abs(self.char_inputs))
        length = tf.reduce_sum(used, reduction_indices=1)
        self.lengths = tf.cast(length, tf.int32)
        self.batch_size = tf.shape(self.char_inputs)[0]
        self.num_steps = tf.shape(self.char_inputs)[-1]

        # embeddings for chinese character and segmentation representation
        embedding = self.embedding_layer(self.char_inputs, self.seg_inputs, config)

        # apply dropout before feed to lstm layer
        lstm_inputs = tf.nn.dropout(embedding, self.dropout)

        # bi-directional lstm layer
        lstm_outputs = self.biLSTM_layer(lstm_inputs, self.lstm_dim, self.lengths)

        # logits for tags
        self.logits = self.project_layer(lstm_outputs)

        # loss of the model
        self.loss = self.loss_layer(self.logits, self.lengths)

        with tf.variable_scope("optimizer"):
            optimizer = self.config["optimizer"]
            if optimizer == "sgd":
                self.opt = tf.train.GradientDescentOptimizer(self.lr)
            elif optimizer == "adam":
                self.opt = tf.train.AdamOptimizer(self.lr)
            elif optimizer == "adgrad":
                self.opt = tf.train.AdagradOptimizer(self.lr)
            else:
                raise KeyError

            # apply grad clip to avoid gradient explosion
            grads_vars = self.opt.compute_gradients(self.loss)
            capped_grads_vars = [[tf.clip_by_value(g, -self.config["clip"], self.config["clip"]), v]
                                 for g, v in grads_vars]
            self.train_op = self.opt.apply_gradients(capped_grads_vars, self.global_step)

        # saver of the model
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
Example #45
0
def depthwise_convolution2d(
        inputs,
        kernel_size,
        depth_multiplier=1,
        stride=1,
        padding='SAME',
        rate=1,
        activation_fn=nn.relu,
        normalizer_fn=None,
        normalizer_params=None,
        weights_initializer=initializers.xavier_initializer(),
        weights_regularizer=None,
        biases_initializer=init_ops.zeros_initializer(),
        biases_regularizer=None,
        reuse=None,
        variables_collections=None,
        outputs_collections=None,
        trainable=True,
        data_format='NHWC',
        scope=None):
    """Adds a depthwise 2D convolution with optional batch_norm layer.
    This op performs a depthwise convolution that acts separately on
    channels, creating a variable called `depthwise_weights`. Then,
    if `normalizer_fn` is None,
    it adds bias to the result, creating a variable called 'biases', otherwise,
    the `normalizer_fn` is applied. It finally applies an activation function
    to produce the end result.
    Args:
        inputs: A tensor of size [batch_size, height, width, channels].
        num_outputs: The number of pointwise convolution output filters. If is
          None, then we skip the pointwise convolution stage.
        kernel_size: A list of length 2: [kernel_height, kernel_width] of
          of the filters. Can be an int if both values are the same.
        depth_multiplier: The number of depthwise convolution output channels for
          each input channel. The total number of depthwise convolution output
          channels will be equal to `num_filters_in * depth_multiplier`.
        stride: A list of length 2: [stride_height, stride_width], specifying the
          depthwise convolution stride. Can be an int if both strides are the same.
        padding: One of 'VALID' or 'SAME'.
        rate: A list of length 2: [rate_height, rate_width], specifying the dilation
          rates for atrous convolution. Can be an int if both rates are the same.
          If any value is larger than one, then both stride values need to be one.
        activation_fn: Activation function. The default value is a ReLU function.
          Explicitly set it to None to skip it and maintain a linear activation.
        normalizer_fn: Normalization function to use instead of `biases`. If
          `normalizer_fn` is provided then `biases_initializer` and
          `biases_regularizer` are ignored and `biases` are not created nor added.
          default set to None for no normalizer function
        normalizer_params: Normalization function parameters.
        weights_initializer: An initializer for the weights.
        weights_regularizer: Optional regularizer for the weights.
        biases_initializer: An initializer for the biases. If None skip biases.
        biases_regularizer: Optional regularizer for the biases.
        reuse: Whether or not the layer and its variables should be reused. To be
          able to reuse the layer scope must be given.
        variables_collections: Optional list of collections for all the variables or
          a dictionary containing a different list of collection per variable.
        outputs_collections: Collection to add the outputs.
        trainable: Whether or not the variables should be trainable or not.
        scope: Optional scope for variable_scope.
    Returns:
        A `Tensor` representing the output of the operation.
    """
    with variable_scope.variable_scope(scope, 'DepthwiseConv2d', [inputs],
                                       reuse=reuse) as sc:
        inputs = ops.convert_to_tensor(inputs)
        # Actually apply depthwise conv instead of separable conv.
        dtype = inputs.dtype.base_dtype
        kernel_h, kernel_w = utils.two_element_tuple(kernel_size)
        stride_h, stride_w = utils.two_element_tuple(stride)
        if data_format == 'NHWC':
            num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4)
            strides = [1, stride_h, stride_w, 1]
        else:
            num_filters_in = inputs.get_shape().as_list()[1]
            strides = [1, 1, stride_h, stride_w]

        weights_collections = utils.get_variable_collections(
            variables_collections, 'weights')

        # Depthwise weights variable.
        depthwise_shape = [kernel_h, kernel_w,
                           num_filters_in, depth_multiplier]
        depthwise_weights = variables.model_variable(
            'depthwise_weights',
            shape=depthwise_shape,
            dtype=dtype,
            initializer=weights_initializer,
            regularizer=weights_regularizer,
            trainable=trainable,
            collections=weights_collections)

        outputs = nn.depthwise_conv2d(inputs, depthwise_weights,
                                      strides, padding,
                                      rate=utils.two_element_tuple(rate),
                                      data_format=data_format)
        num_outputs = depth_multiplier * num_filters_in

        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)
        else:
            if biases_initializer is not None:
                biases_collections = utils.get_variable_collections(
                    variables_collections, 'biases')
                biases = variables.model_variable('biases',
                                                  shape=[num_outputs,],
                                                  dtype=dtype,
                                                  initializer=biases_initializer,
                                                  regularizer=biases_regularizer,
                                                  trainable=trainable,
                                                  collections=biases_collections)
                outputs = nn.bias_add(outputs, biases, data_format=data_format)
        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
Example #46
0
def masked_fully_connected(
    inputs,
    num_outputs,
    activation_fn=nn.relu,
    normalizer_fn=None,
    normalizer_params=None,
    weights_initializer=initializers.xavier_initializer(),
    weights_regularizer=None,
    biases_initializer=init_ops.zeros_initializer(),
    biases_regularizer=None,
    reuse=None,
    variables_collections=None,
    outputs_collections=None,
    trainable=True,
    scope=None):
  """Adds a sparse fully connected layer. The weight matrix is masked.

  `fully_connected` creates a variable called `weights`, representing a fully
  connected weight matrix, which is multiplied by the `inputs` to produce a
  `Tensor` of hidden units. If a `normalizer_fn` is provided (such as
  `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
  None and a `biases_initializer` is provided then a `biases` variable would be
  created and added the hidden units. Finally, if `activation_fn` is not `None`,
  it is applied to the hidden units as well.

  Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened
  prior to the initial matrix multiply by `weights`.

  Args:
    inputs: A tensor of at least rank 2 and static value for the last dimension;
      i.e. `[batch_size, depth]`, `[None, None, None, channels]`.
    num_outputs: Integer or long, the number of output units in the layer.
    activation_fn: Activation function. The default value is a ReLU function.
      Explicitly set it to None to skip it and maintain a linear activation.
    normalizer_fn: Normalization function to use instead of `biases`. If
      `normalizer_fn` is provided then `biases_initializer` and
      `biases_regularizer` are ignored and `biases` are not created nor added.
      default set to None for no normalizer function
    normalizer_params: Normalization function parameters.
    weights_initializer: An initializer for the weights.
    weights_regularizer: Optional regularizer for the weights.
    biases_initializer: An initializer for the biases. If None skip biases.
    biases_regularizer: Optional regularizer for the biases.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for all the variables or
      a dictionary containing a different list of collections per variable.
    outputs_collections: Collection to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
     The tensor variable representing the result of the series of operations.

  Raises:
    ValueError: If x has rank less than 2 or if its last dimension is not set.
  """
  if not isinstance(num_outputs, six.integer_types):
    raise ValueError('num_outputs should be int or long, got %s.' %
                     (num_outputs,))

  layer_variable_getter = _build_variable_getter({
      'bias': 'biases',
      'kernel': 'weights'
  })

  with variable_scope.variable_scope(
      scope,
      'fully_connected', [inputs],
      reuse=reuse,
      custom_getter=layer_variable_getter) as sc:
    inputs = ops.convert_to_tensor(inputs)
    layer = core.MaskedFullyConnected(
        units=num_outputs,
        activation=None,
        use_bias=not normalizer_fn and biases_initializer,
        kernel_initializer=weights_initializer,
        bias_initializer=biases_initializer,
        kernel_regularizer=weights_regularizer,
        bias_regularizer=biases_regularizer,
        activity_regularizer=None,
        trainable=trainable,
        name=sc.name,
        dtype=inputs.dtype.base_dtype,
        _scope=sc,
        _reuse=reuse)
    outputs = layer.apply(inputs)

    # Add variables to collections.
    _add_variable_to_collections(layer.kernel, variables_collections, 'weights')
    if layer.bias is not None:
      _add_variable_to_collections(layer.bias, variables_collections, 'biases')

    # Apply normalizer function / layer.
    if normalizer_fn is not None:
      if not normalizer_params:
        normalizer_params = {}
      outputs = normalizer_fn(outputs, **normalizer_params)

    if activation_fn is not None:
      outputs = activation_fn(outputs)

    return utils.collect_named_outputs(outputs_collections,
                                       sc.original_name_scope, outputs)
Example #47
0
def masked_convolution(inputs,
                       num_outputs,
                       kernel_size,
                       stride=1,
                       padding='SAME',
                       data_format=None,
                       rate=1,
                       activation_fn=nn.relu,
                       normalizer_fn=None,
                       normalizer_params=None,
                       weights_initializer=initializers.xavier_initializer(),
                       weights_regularizer=None,
                       biases_initializer=init_ops.zeros_initializer(),
                       biases_regularizer=None,
                       reuse=None,
                       variables_collections=None,
                       outputs_collections=None,
                       trainable=True,
                       scope=None):
  """Adds an 2D convolution followed by an optional batch_norm layer.
  The layer creates a mask variable on top of the weight variable. The input to
  the convolution operation is the elementwise multiplication of the mask
  variable and the weigh

  It is required that 1 <= N <= 3.

  `convolution` creates a variable called `weights`, representing the
  convolutional kernel, that is convolved (actually cross-correlated) with the
  `inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is
  provided (such as `batch_norm`), it is then applied. Otherwise, if
  `normalizer_fn` is None and a `biases_initializer` is provided then a `biases`
  variable would be created and added the activations. Finally, if
  `activation_fn` is not `None`, it is applied to the activations as well.

  Performs atrous convolution with input stride/dilation rate equal to `rate`
  if a value > 1 for any dimension of `rate` is specified.  In this case
  `stride` values != 1 are not supported.

  Args:
    inputs: A Tensor of rank N+2 of shape
      `[batch_size] + input_spatial_shape + [in_channels]` if data_format does
      not start with "NC" (default), or
      `[batch_size, in_channels] + input_spatial_shape` if data_format starts
      with "NC".
    num_outputs: Integer, the number of output filters.
    kernel_size: A sequence of N positive integers specifying the spatial
      dimensions of the filters.  Can be a single integer to specify the same
      value for all spatial dimensions.
    stride: A sequence of N positive integers specifying the stride at which to
      compute output.  Can be a single integer to specify the same value for all
      spatial dimensions.  Specifying any `stride` value != 1 is incompatible
      with specifying any `rate` value != 1.
    padding: One of `"VALID"` or `"SAME"`.
    data_format: A string or None.  Specifies whether the channel dimension of
      the `input` and output is the last dimension (default, or if `data_format`
      does not start with "NC"), or the second dimension (if `data_format`
      starts with "NC").  For N=1, the valid values are "NWC" (default) and
      "NCW".  For N=2, the valid values are "NHWC" (default) and "NCHW".
      For N=3, the valid values are "NDHWC" (default) and "NCDHW".
    rate: A sequence of N positive integers specifying the dilation rate to use
      for atrous convolution.  Can be a single integer to specify the same
      value for all spatial dimensions.  Specifying any `rate` value != 1 is
      incompatible with specifying any `stride` value != 1.
    activation_fn: Activation function. The default value is a ReLU function.
      Explicitly set it to None to skip it and maintain a linear activation.
    normalizer_fn: Normalization function to use instead of `biases`. If
      `normalizer_fn` is provided then `biases_initializer` and
      `biases_regularizer` are ignored and `biases` are not created nor added.
      default set to None for no normalizer function
    normalizer_params: Normalization function parameters.
    weights_initializer: An initializer for the weights.
    weights_regularizer: Optional regularizer for the weights.
    biases_initializer: An initializer for the biases. If None skip biases.
    biases_regularizer: Optional regularizer for the biases.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for all the variables or
      a dictionary containing a different list of collection per variable.
    outputs_collections: Collection to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for `variable_scope`.

  Returns:
    A tensor representing the output of the operation.

  Raises:
    ValueError: If `data_format` is invalid.
    ValueError: Both 'rate' and `stride` are not uniformly 1.
  """
  if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW']:
    raise ValueError('Invalid data_format: %r' % (data_format,))

  layer_variable_getter = _build_variable_getter({
      'bias': 'biases',
      'kernel': 'weights'
  })

  with variable_scope.variable_scope(
      scope, 'Conv', [inputs], reuse=reuse,
      custom_getter=layer_variable_getter) as sc:
    inputs = ops.convert_to_tensor(inputs)
    input_rank = inputs.get_shape().ndims

    if input_rank == 3:
      raise ValueError('Sparse Convolution not supported for input with rank',
                       input_rank)
    elif input_rank == 4:
      layer_class = core.MaskedConv2D
    elif input_rank == 5:
      raise ValueError('Sparse Convolution not supported for input with rank',
                       input_rank)
    else:
      raise ValueError('Sparse Convolution not supported for input with rank',
                       input_rank)

    if data_format is None or data_format == 'NHWC':
      df = 'channels_last'
    elif data_format == 'NCHW':
      df = 'channels_first'
    else:
      raise ValueError('Unsupported data format', data_format)

    layer = layer_class(
        filters=num_outputs,
        kernel_size=kernel_size,
        strides=stride,
        padding=padding,
        data_format=df,
        dilation_rate=rate,
        activation=None,
        use_bias=not normalizer_fn and biases_initializer,
        kernel_initializer=weights_initializer,
        bias_initializer=biases_initializer,
        kernel_regularizer=weights_regularizer,
        bias_regularizer=biases_regularizer,
        activity_regularizer=None,
        trainable=trainable,
        name=sc.name,
        dtype=inputs.dtype.base_dtype,
        _scope=sc,
        _reuse=reuse)
    outputs = layer.apply(inputs)

    # Add variables to collections.
    _add_variable_to_collections(layer.kernel, variables_collections, 'weights')
    if layer.use_bias:
      _add_variable_to_collections(layer.bias, variables_collections, 'biases')

    if normalizer_fn is not None:
      normalizer_params = normalizer_params or {}
      outputs = normalizer_fn(outputs, **normalizer_params)

    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections,
                                       sc.original_name_scope, outputs)
Example #48
0
def conv2d_leaders(inputs,
                   num_outputs,
                   kernel_size,
                   rates=[1],
                   stride=1,
                   padding='SAME',
                   activation_fn=nn.relu,
                   normalizer_fn=None,
                   normalizer_params=None,
                   weights_initializer=initializers.xavier_initializer(),
                   weights_regularizer=None,
                   biases_initializer=init_ops.zeros_initializer,
                   biases_regularizer=None,
                   reuse=None,
                   variables_collections=None,
                   outputs_collections=None,
                   trainable=True,
                   scope=None,):
    """Adds a 2D convolution followed by an optional batch_norm layer.
    `convolution2d` creates a variable called `weights`, representing the
    convolutional kernel, that is convolved with the `inputs` to produce a
    `Tensor` of activations. If a `normalizer_fn` is provided (such as
    `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
    None and a `biases_initializer` is provided then a `biases` variable would be
    created and added the activations. Finally, if `activation_fn` is not `None`,
    it is applied to the activations as well.
    Performs a'trous convolution with input stride equal to rate if rate is
    greater than one.
    Args:
        inputs: a 4-D tensor  `[batch_size, height, width, channels]`.
        num_outputs: integer, the number of output filters.
        kernel_size: a list of length 2 `[kernel_height, kernel_width]` of
          of the filters. Can be an int if both values are the same.
        stride: a list of length 2 `[stride_height, stride_width]`.
          Can be an int if both strides are the same. Note that presently
          both strides must have the same value.
        padding: one of `VALID` or `SAME`.
        rate: integer. If less than or equal to 1, a standard convolution is used.
          If greater than 1, than the a'trous convolution is applied and `stride`
          must be set to 1.
        activation_fn: activation function.
        normalizer_fn: normalization function to use instead of `biases`. If
          `normalize_fn` is provided then `biases_initializer` and
          `biases_regularizer` are ignored and `biases` are not created nor added.
        normalizer_params: normalization function parameters.
        weights_initializer: An initializer for the weights.
        weights_regularizer: Optional regularizer for the weights.
        biases_initializer: An initializer for the biases. If None skip biases.
        biases_regularizer: Optional regularizer for the biases.
        reuse: whether or not the layer and its variables should be reused. To be
          able to reuse the layer scope must be given.
        variables_collections: optional list of collections for all the variables or
          a dictionay containing a different list of collection per variable.
        outputs_collections: collection to add the outputs.
        trainable: If `True` also add variables to the graph collection
          `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
        scope: Optional scope for `variable_op_scope`.
    Returns:
        a tensor representing the output of the operation.
    Raises:
        ValueError: if both 'rate' and `stride` are larger than one.
    """
    with variable_scope.variable_scope(scope, 'Conv', [inputs],
                                       reuse=reuse) as sc:

        inputs = ops.convert_to_tensor(inputs)
        dtype = inputs.dtype.base_dtype
        # inshape = tf.shape(inputs)

        # Leading kernel size.
        kernel_h, kernel_w = utils.two_element_tuple(kernel_size)
        stride_h, stride_w = utils.two_element_tuple(stride)
        num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4)

        # Weights variable.
        weights_shape = [kernel_h, kernel_w,
                         num_filters_in, num_outputs]
        weights_collections = utils.get_variable_collections(
            variables_collections, 'weights')
        weights = variables.model_variable('weights',
                                           shape=weights_shape,
                                           dtype=dtype,
                                           initializer=weights_initializer,
                                           regularizer=weights_regularizer,
                                           collections=weights_collections,
                                           trainable=trainable)
        # # Bias variable.
        # biases = None
        # if biases_initializer is not None:
        #     biases_collections = utils.get_variable_collections(
        #         variables_collections, 'biases')
        #     biases = variables.model_variable('biases',
        #                                       shape=[num_outputs, ],
        #                                       dtype=dtype,
        #                                       initializer=biases_initializer,
        #                                       regularizer=biases_regularizer,
        #                                       collections=biases_collections,
        #                                       trainable=trainable)

        # Convolution at different scales.
        outputs_pool = []
        for rate in rates:
            if rate > 1:
                conv = nn.atrous_conv2d(inputs, weights, rate, padding='SAME')
            else:
                conv = nn.conv2d(inputs, weights, [1, 1, 1, 1], padding='SAME')
            outputs_pool.append(conv)
        # 'Pooling' at different scales. A bit hacky. Use of concat + max_pool?
        outputs = None
        outputs_pool.reverse()
        for node in outputs_pool:
            if outputs is None:
                outputs = node
            else:
                outputs = tf.maximum(outputs, node)
        # # Add bias?
        # if biases is not None:
        #     outputs = tf.nn.bias_add(outputs, biases)

        # Fix padding and stride. A bit hacky too and not so efficient!
        if padding == 'VALID' or stride > 1:
            padfilter = np.zeros(shape=(kernel_h, kernel_w, num_filters_in, 1),
                                 dtype=dtype)
            x = (kernel_h - 1) / 2
            y = (kernel_w - 1) / 2
            padfilter[x, y, :, 0] = 1.
            outputs = tf.nn.depthwise_conv2d(outputs, padfilter,
                                             [1, stride_h, stride_w, 1],
                                             padding=padding)

        # Batch norm / bias and activation...
        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)
        else:
            if biases_initializer is not None:
                biases_collections = utils.get_variable_collections(
                    variables_collections, 'biases')
                biases = variables.model_variable('biases',
                                                  shape=[num_outputs, ],
                                                  dtype=dtype,
                                                  initializer=biases_initializer,
                                                  regularizer=biases_regularizer,
                                                  collections=biases_collections,
                                                  trainable=trainable)
                outputs = nn.bias_add(outputs, biases)
        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections,
                                           sc.name, outputs)
Example #49
0
def fully_connected(x,
                    num_output_units,
                    activation_fn=None,
                    weight_init=initializers.xavier_initializer(),
                    bias_init=standard_ops.constant_initializer(0.),
                    name=None,
                    weight_collections=(ops.GraphKeys.WEIGHTS,),
                    bias_collections=(ops.GraphKeys.BIASES,),
                    output_collections=(ops.GraphKeys.ACTIVATIONS,),
                    weight_regularizer=None,
                    bias_regularizer=None):
  """Adds the parameters for a fully connected layer and returns the output.

  A fully connected layer is generally defined as a matrix multiply:
  `y = f(w * x + b)` where `f` is given by `activation_fn`. If
  `activation_fn` is `None`, the result of `y = w * x + b` is
  returned.

  This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting
  `bias_init` to `None`.

  The variable creation is compatible with `tf.variable_scope` and so can be
  reused with `tf.variable_scope` or `tf.make_template`.

  Most of the details of variable creation can be controlled by specifying the
  initializers (`weight_init` and `bias_init`) and which in collections to place
  the created variables (`weight_collections` and `bias_collections`; note that
  the variables are always added to the `VARIABLES` collection). The output of
  the layer can be placed in custom collections using `output_collections`.
  The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`,
  respectively.

  A per layer regularization can be specified by setting `weight_regularizer`
  and `bias_regularizer`, which are applied to the weights and biases
  respectively, and whose output is added to the `REGULARIZATION_LOSSES`
  collection.

  Args:
    x: The input `Tensor`.
    num_output_units: The size of the output.
    activation_fn: A function that requires a single Tensor that is applied as a
      non-linearity. If None is used, do not apply any activation.
    weight_init: An optional weight initialization, defaults to
      `xavier_initializer`.
    bias_init: An initializer for the bias, defaults to 0. Set to `None` in
      order to disable bias.
    name: The name for this operation is used to name operations and to find
      variables. If specified it must be unique for this scope, otherwise a
      unique name starting with "fully_connected" will be created.  See
      `tf.variable_op_scope` for details.
    weight_collections: List of graph collections to which weights are added.
    bias_collections: List of graph collections to which biases are added.
    output_collections: List of graph collections to which outputs are added.
    weight_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for weights.
    bias_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for biases.

  Returns:
    The output of the fully connected layer.
  """
  with variable_scope.variable_op_scope([x], name, 'fully_connected'):
    num_input_units = x.get_shape().dims[1].value
    dtype = x.dtype.base_dtype

    w = _weight_variable(shape=[num_input_units, num_output_units],
                         dtype=dtype,
                         initializer=weight_init,
                         collections=weight_collections,
                         regularizer=weight_regularizer)

    y = standard_ops.matmul(x, w)

    if bias_init is not None:
      b = _bias_variable(shape=[num_output_units],
                         dtype=dtype,
                         initializer=bias_init,
                         collections=bias_collections,
                         regularizer=bias_regularizer)

      y = nn.bias_add(y, b)

    return _apply_activation(y, activation_fn, output_collections)
def dnn_sampled_softmax_classifier_model_fn(features, target_indices,
                                            mode, params):
  """model_fn that uses candidate sampling.

  Args:
    features: Single Tensor or dict of Tensor (depends on data passed to `fit`)
    target_indices: A single Tensor of shape [batch_size, n_labels] containing
      the target indices.
    mode: Represents if this training, evaluation or prediction. See `ModeKeys`.
    params: A dict of hyperparameters that are listed below.
      hidden_units- List of hidden units per layer. All layers are fully
        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
        has 32.
      feature_columns- An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      n_classes- number of target classes. It must be greater than 2.
      n_samples- number of sample target classes. Needs to be tuned - A good
        starting point could be 2% of n_classes.
      n_labels- number of labels in each example.
      top_k- The number of classes to predict.
      optimizer- An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      dropout- When not `None`, the probability we will drop out a given
        coordinate.
      gradient_clip_norm- A float > 0. If provided, gradients are
        clipped to their global norm with this clipping ratio. See
        tf.clip_by_global_norm for more details.
      num_ps_replicas- The number of parameter server replicas.

  Returns:
    predictions: A single Tensor or a dict of Tensors.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """

  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  n_samples = params["n_samples"]
  n_labels = params["n_labels"]
  top_k = params["top_k"]
  optimizer = params["optimizer"]
  dropout = params["dropout"]
  gradient_clip_norm = params["gradient_clip_norm"]
  num_ps_replicas = params["num_ps_replicas"]

  parent_scope = "dnn_ss"

  # Setup the input layer partitioner.
  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Create the input layer.
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        features,
        feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  # Setup the hidden layer partitioner.
  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))

  final_hidden_layer_dim = None
  # Create hidden layers using fully_connected.
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id, [net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(net,
                                   num_hidden_units,
                                   variables_collections=[parent_scope],
                                   scope=scope)
      final_hidden_layer_dim = num_hidden_units
      # Add dropout if it is enabled.
      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
        net = layers.dropout(net, keep_prob=(1.0 - dropout))

  # Create the weights and biases for the logit layer.
  with variable_scope.variable_scope(
      parent_scope + "/logits", [net],
      partitioner=hidden_layer_partitioner) as scope:
    dtype = net.dtype.base_dtype
    weights_shape = [n_classes, final_hidden_layer_dim]
    weights = variables.model_variable(
        "weights",
        shape=weights_shape,
        dtype=dtype,
        initializer=initializers.xavier_initializer(),
        trainable=True,
        collections=[parent_scope])
    biases = variables.model_variable(
        "biases",
        shape=[n_classes,],
        dtype=dtype,
        initializer=init_ops.zeros_initializer,
        trainable=True,
        collections=[parent_scope])

  if mode == estimator.ModeKeys.TRAIN:
    # Call the candidate sampling APIs and calculate the loss.
    sampled_values = nn.learned_unigram_candidate_sampler(
        true_classes=math_ops.to_int64(target_indices),
        num_true=n_labels,
        num_sampled=n_samples,
        unique=True,
        range_max=n_classes)

    sampled_softmax_loss = nn.sampled_softmax_loss(
        weights=weights,
        biases=biases,
        inputs=net,
        labels=math_ops.to_int64(target_indices),
        num_sampled=n_samples,
        num_classes=n_classes,
        num_true=n_labels,
        sampled_values=sampled_values)

    loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss")

    train_op = optimizers.optimize_loss(
        loss=loss, global_step=contrib_framework.get_global_step(),
        learning_rate=_DEFAULT_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm,
        name=parent_scope)
    return None, loss, train_op

  elif mode == estimator.ModeKeys.EVAL:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    # Since the targets have multiple labels, setup the target probabilities
    # as 1.0/n_labels for each of the labels.
    target_one_hot = array_ops.one_hot(
        indices=target_indices,
        depth=n_classes,
        on_value=1.0 / n_labels)
    target_one_hot = math_ops.reduce_sum(
        input_tensor=target_one_hot,
        reduction_indices=[1])

    loss = math_ops.reduce_mean(
        nn.softmax_cross_entropy_with_logits(logits, target_one_hot))

    return predictions, loss, None

  elif mode == estimator.ModeKeys.INFER:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    return predictions, None, None
Example #51
0
def conv_capsule(inputs, shape, strides, iterations, batch_size, name):
  """This constructs a convolution capsule layer from a primary or convolution capsule layer.
      i: input capsules (32)
      o: output capsules (32)
      batch size: 24
      spatial dimension: 14x14
      kernel: 3x3
  :param inputs: a primary or convolution capsule layer with poses and activations
         pose: (24, 14, 14, 32, 4, 4)
         activation: (24, 14, 14, 32)
  :param shape: the shape of convolution operation kernel, [kh, kw, i, o] = (3, 3, 32, 32)
  :param strides: often [1, 2, 2, 1] (stride 2), or [1, 1, 1, 1] (stride 1).
  :param iterations: number of iterations in EM routing. 3
  :param name: name.

  :return: (poses, activations).

  """
  inputs_poses, inputs_activations = inputs

  with tf.variable_scope(name) as scope:

      stride = strides[1] # 2
      i_size = shape[-2] # 32
      o_size = shape[-1] # 32
      pose_size = inputs_poses.get_shape()[-1]  # 4

      # Tile the input capusles' pose matrices to the spatial dimension of the output capsules
      # Such that we can later multiple with the transformation matrices to generate the votes.
      inputs_poses = kernel_tile(inputs_poses, 3, stride)  # (?, 14, 14, 32, 4, 4) -> (?, 6, 6, 3x3=9, 32x16=512)

      # Tile the activations needed for the EM routing
      inputs_activations = kernel_tile(inputs_activations, 3, stride)  # (?, 14, 14, 32) -> (?, 6, 6, 9, 32)
      spatial_size = int(inputs_activations.get_shape()[1]) # 6

      # Reshape it for later operations
      inputs_poses = tf.reshape(inputs_poses, shape=[-1, 3 * 3 * i_size, 16])  # (?, 9x32=288, 16)
      inputs_activations = tf.reshape(inputs_activations, shape=[-1, spatial_size, spatial_size, 3 * 3 * i_size]) # (?, 6, 6, 9x32=288)

      with tf.variable_scope('votes') as scope:

          # Generate the votes by multiply it with the transformation matrices
          votes = mat_transform(inputs_poses, o_size, size=batch_size*spatial_size*spatial_size)  # (864, 288, 32, 16)

          # Reshape the vote for EM routing
          votes_shape = votes.get_shape()
          votes = tf.reshape(votes, shape=[batch_size, spatial_size, spatial_size, votes_shape[-3], votes_shape[-2], votes_shape[-1]]) # (24, 6, 6, 288, 32, 16)
          tf.logging.info(f"{name} votes shape: {votes.get_shape()}")

      with tf.variable_scope('routing') as scope:

          # beta_v and beta_a one for each output capsule: (1, 1, 1, 32)
          beta_v = tf.get_variable(
              name='beta_v', shape=[1, 1, 1, o_size], dtype=tf.float32,
              initializer=initializers.xavier_initializer()
          )
          beta_a = tf.get_variable(
              name='beta_a', shape=[1, 1, 1, o_size], dtype=tf.float32,
              initializer=initializers.xavier_initializer()
          )

          # Use EM routing to compute the pose and activation
          # votes (24, 6, 6, 3x3x32=288, 32, 16), inputs_activations (?, 6, 6, 288)
          # poses (24, 6, 6, 32, 16), activation (24, 6, 6, 32)
          poses, activations = matrix_capsules_em_routing(
              votes, inputs_activations, beta_v, beta_a, iterations, name='em_routing'
          )

          # Reshape it back to 4x4 pose matrix
          poses_shape = poses.get_shape()
          # (24, 6, 6, 32, 4, 4)
          poses = tf.reshape(
              poses, [
                  poses_shape[0], poses_shape[1], poses_shape[2], poses_shape[3], pose_size, pose_size
              ]
          )

      tf.logging.info(f"{name} pose shape: {poses.get_shape()}")
      tf.logging.info(f"{name} activations shape: {activations.get_shape()}")

      return poses, activations
  def test_xavier_wrong_dtype(self):
    with self.assertRaisesRegexp(
        TypeError, 'Cannot create initializer for non-floating point type.'):
      initializers.xavier_initializer(dtype=dtypes.int32)

    self.assertIsNone(regularizers.l1_regularizer(0.)(None))