Example #1
0
def binarize(x):
    # we also have to reassign the sign gradient otherwise it will be almost everywhere equal to zero
    # using the straight through estimator
    with tf.get_default_graph().gradient_override_map({'Sign': 'Identity'}):
        #return tf.sign(x)				#	<-- wrong sign doesn't return +1 for zero
        return tf.sign(tf.sign(x) +
                       1e-8)  #	<-- this should be ok, ugly but okay
Example #2
0
def mask(inputs, queries=None, keys=None, type=None):
    """Masks paddings on keys or queries to inputs
    inputs: 3d tensor. (N, T_q, T_k)
    queries: 3d tensor. (N, T_q, d)
    keys: 3d tensor. (N, T_k, d)

    e.g.,
    >> queries = tf.constant([[[1.],
                        [2.],
                        [0.]]], tf.float32) # (1, 3, 1)
    >> keys = tf.constant([[[4.],
                     [0.]]], tf.float32)  # (1, 2, 1)
    >> inputs = tf.constant([[[4., 0.],
                               [8., 0.],
                               [0., 0.]]], tf.float32)
    >> mask(inputs, queries, keys, "key")
    array([[[ 4.0000000e+00, -4.2949673e+09],
        [ 8.0000000e+00, -4.2949673e+09],
        [ 0.0000000e+00, -4.2949673e+09]]], dtype=float32)
    >> inputs = tf.constant([[[1., 0.],
                             [1., 0.],
                              [1., 0.]]], tf.float32)
    >> mask(inputs, queries, keys, "query")
    array([[[1., 0.],
        [1., 0.],
        [0., 0.]]], dtype=float32)
    """
    padding_num = -2**32 + 1
    #### 将keys对应的padding部分的权重设置成很大的一个负数(等价于不存在对应位置的attention)
    if type in ("k", "key", "keys"):
        # Generate masks
        masks = tf.sign(tf.reduce_sum(tf.abs(keys), axis=-1))  # (N, T_k)
        masks = tf.expand_dims(masks, 1)  # (N, 1, T_k)
        masks = tf.tile(masks, [1, tf.shape(queries)[1], 1])  # (N, T_q, T_k)

        # Apply masks to inputs
        paddings = tf.ones_like(inputs) * padding_num
        outputs = tf.where(tf.equal(masks, 0), paddings,
                           inputs)  # (N, T_q, T_k)
    elif type in ("q", "query", "queries"):
        # Generate masks
        masks = tf.sign(tf.reduce_sum(tf.abs(queries), axis=-1))  # (N, T_q)
        masks = tf.expand_dims(masks, -1)  # (N, T_q, 1)
        masks = tf.tile(masks, [1, 1, tf.shape(keys)[1]])  # (N, T_q, T_k)

        # Apply masks to inputs
        outputs = inputs * masks
    elif type in ("f", "future", "right"):
        diag_vals = tf.ones_like(inputs[0, :, :])  # (T_q, T_k)
        # tril = tf.linalg.LinearOperatorLowerTriangular(diag_vals).to_dense()  # (T_q, T_k)
        tril = tf.linalg.band_part(diag_vals, -1, 0)  # (T_q, T_k)
        masks = tf.tile(tf.expand_dims(tril, 0),
                        [tf.shape(inputs)[0], 1, 1])  # (N, T_q, T_k)

        paddings = tf.ones_like(masks) * padding_num
        outputs = tf.where(tf.equal(masks, 0), paddings, inputs)
    else:
        print("Check if you entered type correctly!")

    return outputs
Example #3
0
    def MakeGraph(self):

        tf.random.set_random_seed(0)

        self.X = tf.placeholder(tf.float32, [None, self.visibleDimensions],
                                name="X")

        maxWeight = -4.0 * np.sqrt(
            6.0 / (self.hiddenDimensions + self.visibleDimensions))
        self.weights = tf.Variable(tf.random_uniform(
            [self.visibleDimensions, self.hiddenDimensions],
            minval=-maxWeight,
            maxval=maxWeight),
                                   tf.float32,
                                   name="weights")

        self.hiddenBias = tf.Variable(
            tf.zeros([self.hiddenDimensions], tf.float32, name="hiddenBias"))
        self.visibleBias = tf.Variable(
            tf.zeros([self.visibleDimensions], tf.float32, name="visibleBias"))

        hProb0 = tf.nn.sigmoid(
            tf.matmul(self.X, self.weights) + self.hiddenBias)

        hSample = tf.nn.relu(
            tf.sign(hProb0 - tf.random_uniform(tf.shape(hProb0))))

        forward = tf.matmul(tf.transpose(self.X), hSample)

        v = tf.matmul(hSample, tf.transpose(self.weights)) + self.visibleBias

        vMask = tf.sign(self.X)  # Make sure everything is 0 or 1
        vMask3D = tf.reshape(vMask, [tf.shape(v)[0], -1, self.ratingValues])
        vMask3D = tf.reduce_max(vMask3D, axis=[2], keepdims=True)

        v = tf.reshape(v, [tf.shape(v)[0], -1, self.ratingValues])
        vProb = tf.nn.softmax(v * vMask3D)
        vProb = tf.reshape(vProb, [tf.shape(v)[0], -1])

        hProb1 = tf.nn.sigmoid(
            tf.matmul(vProb, self.weights) + self.hiddenBias)
        backward = tf.matmul(tf.transpose(vProb), hProb1)

        weightUpdate = self.weights.assign_add(self.learningRate *
                                               (forward - backward))

        hiddenBiasUpdate = self.hiddenBias.assign_add(
            self.learningRate * tf.reduce_mean(hProb0 - hProb1, 0))

        visibleBiasUpdate = self.visibleBias.assign_add(
            self.learningRate * tf.reduce_mean(self.X - vProb, 0))

        self.update = [weightUpdate, hiddenBiasUpdate, visibleBiasUpdate]
Example #4
0
def rbm(movies_df, config):
    """
    Implement RBM architecture in TensorFlow
    :param movies_df: data frame that stores movies information
    :param config: variable to store hyper-parameters
    :return: variables to be used during TensorFlow training
    """
    config.n_hid = 100  # Number of hidden layers
    config.n_vis = len(movies_df)  # Number of visible layers

    # Create respective placeholder variables for storing visible and hidden layer biases and weights
    vb = tf.placeholder("float", [config.n_vis])  # Number of unique movies
    hb = tf.placeholder("float", [config.n_hid])  # Number of features
    W = tf.placeholder("float", [config.n_vis, config.n_hid])  # Weights that connect the hidden and visible layers

    # Pre-process the input data
    v0 = tf.placeholder("float", [None, config.n_vis])
    _h0 = tf.nn.sigmoid(tf.matmul(v0, W) + hb)
    h0 = tf.nn.relu(tf.sign(_h0 - tf.random_uniform(tf.shape(_h0))))

    # Reconstruct the pre-processed input data (Sigmoid and ReLU activation functions are used)
    _v1 = tf.nn.sigmoid(tf.matmul(h0, tf.transpose(W)) + vb)
    v1 = tf.nn.relu(tf.sign(_v1 - tf.random_uniform(tf.shape(_v1))))
    h1 = tf.nn.sigmoid(tf.matmul(v1, W) + hb)

    # Set RBM training parameters
    alpha = 0.1  # Set learning rate
    w_pos_grad = tf.matmul(tf.transpose(v0), h0)  # Set positive gradients
    w_neg_grad = tf.matmul(tf.transpose(v1), h1)  # Set negative gradients

    # Calculate contrastive divergence to maximize
    CD = (w_pos_grad - w_neg_grad) / tf.to_float(tf.shape(v0)[0])

    # Create methods to update the weights and biases
    update_w = W + alpha * CD
    update_vb = vb + alpha * tf.reduce_mean(v0 - v1, 0)
    update_hb = hb + alpha * tf.reduce_mean(h0 - h1, 0)

    # Set error function (RMSE)
    err = v0 - v1
    err_sum = tf.sqrt(tf.reduce_mean(err**2))

    # Initialize variables
    cur_w = np.zeros([config.n_vis, config.n_hid], np.float32)  # Current weight
    cur_vb = np.zeros([config.n_vis], np.float32)  # Current visible unit biases
    cur_hb = np.zeros([config.n_hid], np.float32)  # Current hidden unit biases
    prv_w = np.zeros([config.n_vis, config.n_hid], np.float32)  # Previous weight
    prv_vb = np.zeros([config.n_vis], np.float32)  # Previous visible unit biases
    prv_hb = np.zeros([config.n_hid], np.float32)  # Previous hidden unit biases

    return v0, W, vb, hb, update_w, prv_w, prv_vb, prv_hb, update_vb, update_hb, cur_w, cur_vb, cur_hb, err_sum
def optimize_linear(grad, eps, ord=np.inf):
    """
  code is from: https://github.com/tensorflow/cleverhans
  Solves for the optimal input to a linear function under a norm constraint.

  Optimal_perturbation = argmax_{eta, ||eta||_{ord} < eps} dot(eta, grad)

  :param grad: tf tensor containing a batch of gradients
  :param eps: float scalar specifying size of constraint region
  :param ord: int specifying order of norm
  :returns:
    tf tensor containing optimal perturbation
  """

    # In Python 2, the `list` call in the following line is redundant / harmless.
    # In Python 3, the `list` call is needed to convert the iterator returned by `range` into a list.
    red_ind = list(range(1, len(grad.get_shape())))
    avoid_zero_div = 1e-12
    if ord == np.inf:
        # Take sign of gradient
        optimal_perturbation = tf.sign(grad)
        # The following line should not change the numerical results.
        # It applies only because `optimal_perturbation` is the output of
        # a `sign` op, which has zero derivative anyway.
        # It should not be applied for the other norms, where the
        # perturbation has a non-zero derivative.
        optimal_perturbation = tf.stop_gradient(optimal_perturbation)
    elif ord == 1:
        abs_grad = tf.abs(grad)
        sign = tf.sign(grad)
        max_abs_grad = tf.reduce_max(abs_grad, red_ind, keepdims=True)
        tied_for_max = tf.to_float(tf.equal(abs_grad, max_abs_grad))
        num_ties = tf.reduce_sum(tied_for_max, red_ind, keepdims=True)
        optimal_perturbation = sign * tied_for_max / num_ties
    elif ord == 2:
        square = tf.maximum(
            avoid_zero_div,
            tf.reduce_sum(tf.square(grad),
                          reduction_indices=red_ind,
                          keepdims=True))
        optimal_perturbation = grad / tf.sqrt(square)
    else:
        raise NotImplementedError("Only L-inf, L1 and L2 norms are "
                                  "currently implemented.")

    # Scale perturbation to be the solution for the norm=eps rather than
    # norm=1 problem
    scaled_perturbation = tf.multiply(eps, optimal_perturbation)
    return scaled_perturbation
Example #6
0
    def sample(self, seed=None):
        """Sample a matrix with the given spectrum.

    Args:
      seed: if seed is set, use a constant random number generator to produce a
        sample, otherwise use built in tensorflow random numbers.

    Returns:
      The sampled matrix.
    """

        dims = self._spectrum.shape[0]
        if seed is not None:
            rand = contrib_stateless.stateless_random_uniform(
                shape=[dims, dims],
                dtype=tf.float32,
                # Arbitrary offset on seed to prevent overlap of random state.
                seed=[seed + 1233, seed + 341]) * 2 - 1
        else:
            rand = tf.random_uniform([dims, dims], -1., 1., dtype=tf.float32)
        q, r = tf.qr(rand, full_matrices=True)

        # Multiply by the sign of the diagonal to ensure a uniform distribution.
        q *= tf.sign(tf.matrix_diag_part(r))

        # qDq^T where D is a diagonal matrix containing the spectrum
        return tf.matmul(tf.matmul(q, tf.diag(self._spectrum)),
                         q,
                         transpose_b=True)
    def attack_single_step(self, x, eta, y):
        """
        Given the original image and the perturbation computed so far, computes
        a new perturbation.
        :param x: A tensor with the original input.
        :param eta: A tensor the same shape as x that holds the perturbation.
        :param y: A tensor with the target labels or ground-truth labels.
        """
        from cleverhans.utils_tf import model_loss, clip_eta

        adv_x = x + eta
        preds = self.model.get_probs(adv_x)
        loss = model_loss(y, preds)
        if self.targeted:
            loss = -loss
        grad, = tf.gradients(loss, adv_x)
        if self.pgd_update == 'sign':
            adv_x = adv_x + self.eps_iter * tf.sign(grad)
        elif self.pgd_update == 'plain':
            adv_x = adv_x + self.eps_iter * grad / tf.reduce_sum(
                grad**2, axis=[1, 2, 3], keep_dims=True)**0.5
        else:
            raise Exception('Wrong pgd_update.')
        if self.clip_min is not None and self.clip_max is not None:
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)
        eta = adv_x - x
        eta = clip_eta(eta, self.ord, self.eps)
        return eta
Example #8
0
    def constraints(self):
        """
        The constraints to impose.
        """
        # Turn softmax output to categories.
        predictions = (1 + tf.sign(self.predictions)) / 2

        # Set the constraint to zero.
        self.constraint = 0
        ct = list()

        # Compute DIDI constraint.
        for I in self.I_train:
            N = tf.reduce_sum(tf.cast(I >= 0, dtype=tf.float32))
            Np = tf.reduce_sum(I)
            a = (tf.reduce_sum(predictions) / N)
            b = (tf.reduce_sum(I * predictions) / Np)

            tmp = tf.cond(Np > 0, lambda: 2 * (a - b), lambda: 0.0)
            ct.append(tf.abs(tmp))

        # ConstrainedMinimizationProblems must always provide their constraints in
        # the form (tensor <= 0).
        # return self.constraint - self.constraint_value
        return sum(ct) - self.constraint_value
Example #9
0
 def encode(self, x, noise):
     x = tf.to_float(x)
     # we can't use tf.pow(..., 8.0) because of a high-error approximation
     # on TPU.  Instead we square three times.
     x = tf.sign(x) * tf.square(tf.square(tf.square(tf.abs(x) * 128.0)))
     x = _to_bfloat16_unbiased(x, noise)
     return x
Example #10
0
 def _get_discriminator_output(self, inputs, discriminator, labels):
     """Discriminator binary classifier."""
     with tf.variable_scope("discriminator_predictions"):
         hidden = tf.layers.dense(
             discriminator.get_sequence_output(),
             units=self._bert_config.hidden_size,
             activation=modeling.get_activation(
                 self._bert_config.hidden_act),
             kernel_initializer=modeling.create_initializer(
                 self._bert_config.initializer_range))
         logits = tf.squeeze(tf.layers.dense(hidden, units=1), -1)
         weights = tf.cast(inputs.input_mask, tf.float32)
         labelsf = tf.cast(labels, tf.float32)
         losses = tf.nn.sigmoid_cross_entropy_with_logits(
             logits=logits, labels=labelsf) * weights
         per_example_loss = (tf.reduce_sum(losses, axis=-1) /
                             (1e-6 + tf.reduce_sum(weights, axis=-1)))
         loss = tf.reduce_sum(losses) / (1e-6 + tf.reduce_sum(weights))
         probs = tf.nn.sigmoid(logits)
         preds = tf.cast(tf.round((tf.sign(logits) + 1) / 2), tf.int32)
         DiscOutput = collections.namedtuple(
             "DiscOutput",
             ["loss", "per_example_loss", "probs", "preds", "labels"])
         return DiscOutput(
             loss=loss,
             per_example_loss=per_example_loss,
             probs=probs,
             preds=preds,
             labels=labels,
         )
Example #11
0
def retrieve_seq_length_op(data):
    """ An op to compute the length of a sequence. 0 are masked. """
    with tf.name_scope('GetLength'):
        used = tf.sign(tf.reduce_max(tf.abs(data), reduction_indices=2))
        length = tf.reduce_sum(used, reduction_indices=1)
        length = tf.cast(length, tf.int32)
    return length
Example #12
0
def create_model(bert_config,
                 is_training,
                 input_ids,
                 input_mask,
                 segment_ids,
                 labels,
                 num_labels,
                 use_one_hot_embeddings,
                 dropout_rate=1.0,
                 lstm_size=1,
                 cell='lstm',
                 num_layers=1,
                 crf_only=False,
                 lstm_only=False):
    """
    创建X模型
    :param bert_config: bert 配置
    :param is_training:
    :param input_ids: 数据的idx 表示
    :param input_mask:
    :param segment_ids:
    :param labels: 标签的idx 表示
    :param num_labels: 类别数量
    :param use_one_hot_embeddings:
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    import tensorflow as tf
    from bert_base.bert import modeling
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value
    # 算序列真实长度
    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(
        used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度
    # 添加CRF output layer
    with tf.variable_scope('finetune'):
        blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                              input_mask=input_mask,
                              hidden_unit=lstm_size,
                              cell_type=cell,
                              num_layers=num_layers,
                              dropout_rate=dropout_rate,
                              initializers=initializers,
                              num_labels=num_labels,
                              seq_length=max_seq_length,
                              labels=labels,
                              lengths=lengths,
                              is_training=is_training)
        rst = blstm_crf.add_blstm_crf_layer(crf_only=crf_only,
                                            lstm_only=lstm_only)
    return rst
Example #13
0
def clip_eta(eta, ord, eps):
    """
  Helper function to clip the perturbation to epsilon norm ball.
  :param eta: A tensor with the current perturbation.
  :param ord: Order of the norm (mimics Numpy).
              Possible values: np.inf, 1 or 2.
  :param eps: Epsilon, bound of the perturbation.
  """

    # Clipping perturbation eta to self.ord norm ball
    if ord not in [np.inf, 1, 2]:
        raise ValueError('ord must be np.inf, 1, or 2.')
    reduc_ind = list(xrange(1, len(eta.get_shape())))
    avoid_zero_div = 1e-12
    if ord == np.inf:
        eta = clip_by_value(eta, -eps, eps)
    elif ord == 1:
        # Implements a projection algorithm onto the l1-ball from
        # (Duchi et al. 2008) that runs in time O(d*log(d)) where d is the
        # input dimension.
        # Paper link (Duchi et al. 2008): https://dl.acm.org/citation.cfm?id=1390191

        eps = tf.cast(eps, eta.dtype)

        dim = tf.reduce_prod(tf.shape(eta)[1:])
        eta_flat = tf.reshape(eta, (-1, dim))
        abs_eta = tf.abs(eta_flat)

        if 'sort' in dir(tf):
            mu = -tf.sort(-abs_eta, axis=-1)
        else:
            # `tf.sort` is only available in TF 1.13 onwards
            mu = tf.nn.top_k(abs_eta, k=dim, sorted=True)[0]
        cumsums = tf.cumsum(mu, axis=-1)
        js = tf.cast(tf.divide(1, tf.range(1, dim + 1)), eta.dtype)
        t = tf.cast(tf.greater(mu - js * (cumsums - eps), 0), eta.dtype)

        rho = tf.argmax(t * cumsums, axis=-1)
        rho_val = tf.reduce_max(t * cumsums, axis=-1)
        theta = tf.divide(rho_val - eps, tf.cast(1 + rho, eta.dtype))

        eta_sgn = tf.sign(eta_flat)
        eta_proj = eta_sgn * tf.maximum(abs_eta - theta[:, tf.newaxis], 0)
        eta_proj = tf.reshape(eta_proj, tf.shape(eta))

        norm = tf.reduce_sum(tf.abs(eta), reduc_ind)
        eta = tf.where(tf.greater(norm, eps), eta_proj, eta)

    elif ord == 2:
        # avoid_zero_div must go inside sqrt to avoid a divide by zero
        # in the gradient through this operation
        norm = tf.sqrt(
            tf.maximum(avoid_zero_div,
                       reduce_sum(tf.square(eta), reduc_ind, keepdims=True)))
        # We must *clip* to within the norm ball, not *normalize* onto the
        # surface of the ball
        factor = tf.minimum(1., div(eps, norm))
        eta = eta * factor
    return eta
    def __init__(
        self,
        learning_rate,
        num_layers,
        size,
        size_layer,
        output_size,
        seq_len,
        forget_bias=0.1,
    ):
        def lstm_cell(size_layer):
            return tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple=False)

        def global_pooling(x, func):
            batch_size = tf.shape(self.X)[0]
            num_units = x.get_shape().as_list()[-1]
            x = func(x, x.get_shape().as_list()[1], 1)
            x = tf.reshape(x, [batch_size, num_units])
            return x

        rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
            [lstm_cell(size_layer) for _ in range(num_layers)],
            state_is_tuple=False,
        )
        self.X = tf.placeholder(tf.float32, (None, None, size))
        self.Y = tf.placeholder(tf.float32, (None, output_size))
        drop = tf.nn.rnn_cell.DropoutWrapper(rnn_cells,
                                             output_keep_prob=forget_bias)
        self.hidden_layer = tf.placeholder(tf.float32,
                                           (None, num_layers * 2 * size_layer))
        self.outputs, self.last_state = tf.nn.dynamic_rnn(
            drop,
            self.X,
            initial_state=self.hidden_layer,
            dtype=tf.float32,
            time_major=True,
        )
        self.outputs = self.outputs[:, :, 0]
        x = self.X
        masks = tf.sign(self.outputs)
        batch_size = tf.shape(self.X)[0]
        align = tf.matmul(self.X, tf.transpose(self.X, [0, 2, 1]))
        paddings = tf.fill(tf.shape(align), float('-inf'))
        k_masks = tf.tile(tf.expand_dims(masks, 1), [1, seq_len, 1])
        align = tf.where(tf.equal(k_masks, 0), paddings, align)
        align = tf.nn.tanh(align)
        q_masks = tf.to_float(masks)
        q_masks = tf.tile(tf.expand_dims(q_masks, -1), [1, 1, seq_len])
        align *= q_masks

        x = tf.matmul(align, x)
        g_max = global_pooling(x, tf.layers.max_pooling1d)
        g_avg = global_pooling(x, tf.layers.average_pooling1d)
        self.outputs = tf.concat([g_max, g_avg], 1)
        self.logits = tf.layers.dense(self.outputs, output_size)
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost)
Example #15
0
def jacobian_augmentation(sess,
                          x,
                          X_sub_prev,
                          Y_sub,
                          grads,
                          lmbda,
                          aug_batch_size=512,
                          feed=None):
    """
  Augment an adversary's substitute training set using the Jacobian
  of a substitute model to generate new synthetic inputs.
  See https://arxiv.org/abs/1602.02697 for more details.
  See cleverhans_tutorials/mnist_blackbox.py for example use case
  :param sess: TF session in which the substitute model is defined
  :param x: input TF placeholder for the substitute model
  :param X_sub_prev: substitute training data available to the adversary
                     at the previous iteration
  :param Y_sub: substitute training labels available to the adversary
                at the previous iteration
  :param grads: Jacobian symbolic graph for the substitute
                (should be generated using utils_tf.jacobian_graph)
  :return: augmented substitute data (will need to be labeled by oracle)
  """
    assert len(x.get_shape()) == len(np.shape(X_sub_prev))
    assert len(grads) >= np.max(Y_sub) + 1
    assert len(X_sub_prev) == len(Y_sub)

    aug_batch_size = min(aug_batch_size, X_sub_prev.shape[0])

    # Prepare input_shape (outside loop) for feeding dictionary below
    input_shape = list(x.get_shape())
    input_shape[0] = 1

    # Create new numpy array for adversary training data
    # with twice as many components on the first dimension.
    X_sub = np.vstack([X_sub_prev, X_sub_prev])
    num_samples = X_sub_prev.shape[0]

    # Creating and processing as batch
    for p_idxs in range(0, num_samples, aug_batch_size):
        X_batch = X_sub_prev[p_idxs:p_idxs + aug_batch_size, ...]
        feed_dict = {x: X_batch}
        if feed is not None:
            feed_dict.update(feed)

        # Compute sign matrix
        grad_val = sess.run([tf.sign(grads)], feed_dict=feed_dict)[0]

        # Create new synthetic point in adversary substitute training set
        for (indx, ind) in zip(range(p_idxs, p_idxs + X_batch.shape[0]),
                               range(X_batch.shape[0])):
            X_sub[num_samples +
                  indx] = (X_batch[ind] +
                           lmbda * grad_val[Y_sub[indx], ind, ...])

    # Return augmented training data (needs to be labeled afterwards)
    return X_sub
Example #16
0
def binarize(x):
    """
    Clip and binarize tensor using the straight through estimator (STE) for the gradient.
    """
    g = tf.get_default_graph()

    with ops.name_scope("Binarized") as name:
        with g.gradient_override_map({"Sign": "Identity"}):
            return tf.sign(x)
Example #17
0
def mu_law_decode(output, quantization_channels):
    '''Recovers waveform from quantized values.'''
    with tf.name_scope('decode'):
        mu = quantization_channels - 1
        # Map values back to [-1, 1].
        signal = 2 * (tf.to_float(output) / mu) - 1
        # Perform inverse of mu-law transformation.
        magnitude = (1 / mu) * ((1 + mu)**abs(signal) - 1)
        return tf.sign(signal) * magnitude
Example #18
0
 def minimize(self, loss, x, optim_state):
     """Refer to parent class documentation."""
     lr = self._lr_fn(optim_state.iteration)
     grads = self.gradients(loss, x)
     if self._fgsm:
         grads = [tf.sign(g) for g in grads]
     new_x = [None] * len(x)
     for i in range(len(x)):
         new_x[i] = x[i] - lr * grads[i]
     new_optim_state = self._State(optim_state.iteration + 1)
     return new_x, new_optim_state
Example #19
0
def prox_l1(w, lamb):
    '''
    Parameters
    ----------
    @param w : input vector
    @param lamb : penalty paramemeter

    Returns
    -------
    @retval : perform soft-thresholding on input vector
    '''
    return tf.multiply(tf.sign(w), tf.maximum(tf.abs(w) - lamb, 0))
Example #20
0
    def _permute_z_for_y_tilde(self):
        """Permute z using ODIN: z_tilde = z - e * tf.sign(-1 * grad(predy/z)).

    This is a method modified based on Liang, Shiyu, Yixuan Li, & R. Srikant.
    Enhancing the reliability of out-of-distribution image detection
    in neural networks (2017). See Eq. (2) in the paper.
    The original method was proposed for adding permutation to input x,
    where x is a continuous variable. Since our input x is discrete variable,
    we add permuations to the input of the last layer of the neural networks.
    The function works for any intermediate variable, but we chosoe the input
    to the last layer, self.out.

    Thus, we take gradient from max(self.logits) to self.out, and then
    self.out_tilde = self.out - self._params.epsilon * tf.sign(-1 * self.grads)
    self.out is of the size [batch_size, hidden_dense_size]
    self.logits is of the size [batch_size, n_class].
    We first create ids_set for paris (m, id), where m=1,2,...,batch_size, and
    id = argmax(self.logits[m]) .
    Then we take gradient from self.logits[m] wrt the self.out[m].
    The resulting grads is of the size [hidden_dense_size].
    """
        def create_ids(y):
            """Create triplets (id_ymax, m, b), m in num_samps, b in batch_size."""
            ids_max = np.argmax(y, axis=1)
            # ids_set: m in batch_size, ids_max[m]
            ids_set = np.array([(m, ids_max[m])
                                for m in range(self._params.batch_size)])
            return ids_set

        def grads_from_y_to_z(x):
            """Take gradient from each yp[id_ymax, m, b] to z[m, b]."""
            # self.probs [batch_size, n_class]
            # self.out [batch_size, hidden_dense_size]
            grad = tf.gradients(self.probs[x[0], x[1]], self.out)[0][x[0], :]
            return grad

        ids_set = tf.py_func(create_ids, [self.logits], tf.int64)
        grads_flat = tf.map_fn(grads_from_y_to_z, ids_set, dtype=tf.float32)
        self.grads = tf.reshape(
            grads_flat,
            [self._params.batch_size, self._params.hidden_dense_size])
        tf.logging.info('grads_flat.shape=%s', grads_flat.shape)
        tf.logging.info('grads.shape=%s', self.grads.shape)
        # odin permutation
        self.out_tilde = self.out - self._params.epsilon * tf.sign(
            -1 * self.grads)

        self.logits_tilde = self.logits_dense_fn(self.out_tilde)
        tf.logging.info('logits_tilde.shape=%s', self.logits_tilde.shape)
        tf.logging.info('logits.shape=%s', self.logits.shape)
        self.probs_tilde = tf.nn.softmax(self.logits_tilde /
                                         tf.to_float(self._params.temperature))
        tf.logging.info('probs_tilde.shape=%s', self.probs_tilde.shape)
Example #21
0
def mu_law_encode(audio, quantization_channels):
    '''Quantizes waveform amplitudes.'''
    with tf.name_scope('encode'):
        mu = tf.to_float(quantization_channels - 1)
        # Perform mu-law companding transformation (ITU-T, 1988).
        # Minimum operation is here to deal with rare large amplitudes caused
        # by resampling.
        safe_audio_abs = tf.minimum(tf.abs(audio), 1.0)
        magnitude = tf.log1p(mu * safe_audio_abs) / tf.log1p(mu)
        signal = tf.sign(audio) * magnitude
        # Quantize signal to the specified number of levels.
        return tf.cast((signal + 1) / 2 * mu + 0.5, dtype=tf.int32)
Example #22
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """See base class."""
        assignments = []
        for (grad, param) in grads_and_vars:
            if grad is None or param is None:
                continue

            param_name = self._get_variable_name(param.name)

            if self._do_use_l2_reg(param_name):
                grad += self.l2_param * param

            if self._do_use_l1_reg(param_name):
                grad += self.l1_param * tf.sign(param)

            m = tf.get_variable(name=param_name + "/adam_m",
                                shape=param.shape.as_list(),
                                dtype=tf.float32,
                                trainable=False,
                                initializer=tf.zeros_initializer())
            v = tf.get_variable(name=param_name + "/adam_v",
                                shape=param.shape.as_list(),
                                dtype=tf.float32,
                                trainable=False,
                                initializer=tf.zeros_initializer())

            # Standard Adam update.
            next_m = (tf.multiply(self.beta_1, m) +
                      tf.multiply(1.0 - self.beta_1, grad))
            next_v = (tf.multiply(self.beta_2, v) +
                      tf.multiply(1.0 - self.beta_2, tf.square(grad)))

            update = next_m / (tf.sqrt(next_v) + self.epsilon)

            # Just adding the square of the weights to the loss function is *not*
            # the correct way of using L2 regularization/weight decay with Adam,
            # since that will interact with the m and v parameters in strange ways.
            #
            # Instead we want ot decay the weights in a manner that doesn't interact
            # with the m/v parameters. This is equivalent to adding the square
            # of the weights to the loss with plain (non-momentum) SGD.
            if self._do_use_weight_decay(param_name):
                update += self.weight_decay_rate * param

            update_with_lr = self.learning_rate * update

            next_param = param - update_with_lr

            assignments.extend(
                [param.assign(next_param),
                 m.assign(next_m),
                 v.assign(next_v)])
        return tf.group(*assignments, name=name)
Example #23
0
    def create_model(self, optimizer):
        """Model function for Logistic Regression."""
        features = tf.placeholder(tf.float32,
                                  shape=[None, 100],
                                  name='features')
        labels = tf.placeholder(tf.float32, shape=[None, 1], name='labels')

        W = tf.Variable(tf.zeros([100, 1]))
        b = tf.Variable(tf.zeros([1]))
        y_pred = tf.matmul(features, W) + b

        loss = 0.01 * tf.reduce_sum(tf.square(W)) + tf.reduce_mean(
            tf.maximum(tf.zeros_like(labels), 1 - labels * y_pred))

        grads_and_vars = optimizer.compute_gradients(loss)
        grads, _ = zip(*grads_and_vars)
        train_op = optimizer.apply_gradients(
            grads_and_vars, global_step=tf.train.get_global_step())
        eval_metric_ops = tf.count_nonzero(tf.equal(labels, tf.sign(y_pred)))
        return features, labels, train_op, grads, eval_metric_ops, loss, tf.sign(
            y_pred)
Example #24
0
def compute_cv(vc, iteration):
    cv_list = []
    prod_list = []
    min_list = []

    if SUM_PRODUCT:
        vc = tf.clip_by_value(vc, -10, 10)
        tanh_vc = tf.tanh(vc / 2.0)
    edge_order = []
    for i in range(0, m):  # for each check node c
        for j in range(0, chk_degrees[i]):
            # edge = u[i][j]
            edge_order.append(u[i][j])
            extrinsic_edges = []
            for jj in range(0, chk_degrees[i]):
                if jj != j:
                    extrinsic_edges.append(u[i][jj])
            if SUM_PRODUCT:
                temp = tf.gather(tanh_vc, extrinsic_edges)
                temp = tf.reduce_prod(temp, 0)
                temp = tf.log((1 + temp) / (1 - temp))
                cv_list.append(temp)
            if MIN_SUM:
                temp = tf.gather(vc, extrinsic_edges)
                temp1 = tf.reduce_prod(tf.sign(temp), 0)
                temp2 = tf.reduce_min(tf.abs(temp), 0)
                prod_list.append(temp1)
                min_list.append(temp2)

    if SUM_PRODUCT:
        cv = tf.stack(cv_list)
    if MIN_SUM:
        prods = tf.stack(prod_list)
        mins = tf.stack(min_list)
        if decoder.decoder_type == "RNOMS":
            # offsets = tf.nn.softplus(decoder.B_cv)
            # mins = tf.nn.relu(mins - tf.tile(tf.reshape(offsets,[-1,1]),[1,batch_size]))
            mins = tf.nn.relu(mins - decoder.B_cv)
        elif decoder.decoder_type == "FNOMS":
            offsets = tf.nn.softplus(decoder.B_cv[iteration])
            mins = tf.nn.relu(mins - tf.tile(tf.reshape(offsets, [-1, 1]), [1, batch_size]))
        cv = prods * mins

    new_order = np.zeros(num_edges).astype(np.int)
    new_order[edge_order] = np.array(range(0, num_edges)).astype(np.int)
    cv = tf.gather(cv, new_order)

    if decoder.decoder_type == "RNSPA" or decoder.decoder_type == "RNNMS":
        cv = cv * tf.tile(tf.reshape(decoder.W_cv, [-1, 1]), [1, batch_size])
    elif decoder.decoder_type == "FNSPA" or decoder.decoder_type == "FNNMS":
        cv = cv * tf.tile(tf.reshape(decoder.W_cv[iteration], [-1, 1]), [1, batch_size])
    return cv
Example #25
0
    def __init__(self,
                 model_type,
                 config,
                 num_classes,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None,
                 label_ids=None,
                 dropout_prob=0.1,
                 add_crf=False):
        model_type = model_type.lower()
        if model_type not in MODELS:
            raise ValueError("Unsupported model option: {}, "
                             "you can choose one of {}".format(
                                 model_type, "、".join(MODELS.keys())))

        model = MODELS[model_type](config,
                                   input_ids,
                                   input_mask=input_mask,
                                   token_type_ids=token_type_ids,
                                   is_training=is_training)
        sequence_output = model.get_sequence_output()
        with tf.variable_scope("classification"):
            # 根据is_training 判断dropout
            dropout_prob = get_dropout_prob(is_training, dropout_prob)

            sequence_output = tf.nn.dropout(sequence_output, rate=dropout_prob)

            _, seq_len, hidden_size = get_shape_list(sequence_output,
                                                     expected_rank=3)
            sequence_output = tf.reshape(sequence_output, [-1, hidden_size])
            logits = tf.layers.dense(sequence_output,
                                     num_classes,
                                     kernel_initializer=create_initializer(
                                         config.initializer_range))
            self.logits = tf.reshape(logits, [-1, seq_len, num_classes])
            if add_crf:
                used = tf.sign(tf.abs(input_ids))
                lengths = tf.reduce_sum(used, reduction_indices=1)
                crf = CRF(self.logits,
                          num_labels=num_classes,
                          labels=label_ids,
                          lengths=lengths)
                self.loss = crf.loss
                self.predictions = crf.pred_ids
            else:
                probabilities = tf.nn.softmax(self.logits, axis=-1)
                self.predictions = tf.argmax(probabilities, axis=-1)
                if label_ids is not None:
                    self.loss = cross_entropy_loss(self.logits, label_ids,
                                                   num_classes)
Example #26
0
def length(sequence):
    """
    Get true length of sequences (without padding), and mask for true-length in max-length.

    Input of shape: (batch_size, max_seq_length, hidden_dim)
    Output shapes, 
    length: (batch_size)
    mask: (batch_size, max_seq_length, 1)
    """
    populated = tf.sign(tf.abs(sequence))
    length = tf.cast(tf.reduce_sum(populated, axis=1), tf.int32)
    mask = tf.cast(tf.expand_dims(populated, -1), tf.float32)
    return length, mask
    def _call(self, inputs):
        self_vecs, neigh_vecs = inputs

        dims = tf.shape(neigh_vecs)
        batch_size = dims[0]
        initial_state = self.cell.zero_state(batch_size, tf.float32)
        used = tf.sign(tf.reduce_max(tf.abs(neigh_vecs), axis=2))
        length = tf.reduce_sum(used, axis=1)
        length = tf.maximum(length, tf.constant(1.))
        length = tf.cast(length, tf.int32)

        with tf.variable_scope(self.name) as scope:
            try:
                rnn_outputs, rnn_states = tf.nn.dynamic_rnn(
                    self.cell,
                    neigh_vecs,
                    initial_state=initial_state,
                    dtype=tf.float32,
                    time_major=False,
                    sequence_length=length)
            except ValueError:
                scope.reuse_variables()
                rnn_outputs, rnn_states = tf.nn.dynamic_rnn(
                    self.cell,
                    neigh_vecs,
                    initial_state=initial_state,
                    dtype=tf.float32,
                    time_major=False,
                    sequence_length=length)
        batch_size = tf.shape(rnn_outputs)[0]
        max_len = tf.shape(rnn_outputs)[1]
        out_size = int(rnn_outputs.get_shape()[2])
        index = tf.range(0, batch_size) * max_len + (length - 1)
        flat = tf.reshape(rnn_outputs, [-1, out_size])
        neigh_h = tf.gather(flat, index)

        from_neighs = tf.matmul(neigh_h, self.vars['neigh_weights'])
        from_self = tf.matmul(self_vecs, self.vars["self_weights"])

        output = tf.add_n([from_self, from_neighs])

        if not self.concat:
            output = tf.add_n([from_self, from_neighs])
        else:
            output = tf.concat([from_self, from_neighs], axis=1)

        # bias
        if self.bias:
            output += self.vars['bias']

        return self.act(output)
Example #28
0
def inv_mu_law(x, mu=255):
    """A TF implementation of inverse Mu-Law.

  Args:
    x: The Mu-Law samples to decode.
    mu: The Mu we used to encode these samples.

  Returns:
    out: The decoded data.
  """
    x = tf.cast(x, tf.float32)
    out = (x + 0.5) * 2.0 / (mu + 1)
    out = tf.sign(out) / mu * ((1 + mu)**tf.abs(out) - 1)
    out = tf.where(tf.equal(x, 0), x, out)
    return out
Example #29
0
def mu_law(x, mu=255, int8=False):
  """A TF implementation of Mu-Law encoding.

  Args:
    x: The audio samples to encode.
    mu: The Mu to use in our Mu-Law.
    int8: Use int8 encoding.

  Returns:
    out: The Mu-Law encoded int8 data.
  """
  out = tf.sign(x) * tf.log(1 + mu * tf.abs(x)) / np.log(1 + mu)
  out = tf.floor(out * 128)
  if int8:
    out = tf.cast(out, tf.int8)
  return out
Example #30
0
def _quantize(x, params, randomize=True):
    """Quantize x according to params, optionally randomizing the rounding."""
    if not params.quantize:
        return x

    if not randomize:
        return tf.bitcast(tf.cast(x / params.quantization_scale, tf.int16),
                          tf.float16)

    abs_x = tf.abs(x)
    sign_x = tf.sign(x)
    y = abs_x / params.quantization_scale
    y = tf.floor(y + tf.random_uniform(common_layers.shape_list(x)))
    y = tf.minimum(y, tf.int16.max) * sign_x
    q = tf.bitcast(tf.cast(y, tf.int16), tf.float16)
    return q