Beispiel #1
0
def gelu(input_tensor):
    """Gaussian Error Linear Unit.

  This is a smoother version of the RELU.
  Original paper: https://arxiv.org/abs/1606.08415

  Args:
    input_tensor: float Tensor to perform activation.

  Returns:
    `input_tensor` with the GELU activation applied.
  """
    cdf = 0.5 * (1.0 + tf.erf(input_tensor / tf.sqrt(2.0)))
    return input_tensor * cdf
def KL(p, q, hypers=None, global_step=1.0E99):
    if isinstance(p, DiagonalGaussianVar):
        if isinstance(q, DiagonalGaussianVar):
            safe_qvar = q.var + bu.EPSILON
            entropy_term = 0.5 * (1 + bu.log2pi + tf.log(p.var))
            cross_entropy_term = 0.5 * (bu.log2pi + tf.log(safe_qvar) + (p.var + (p.mean - q.mean)**2) / safe_qvar)           
            return tf.reduce_sum(cross_entropy_term - entropy_term)
        elif isinstance(q, DiagonalLaplaceVar):
            sigma = tf.sqrt(p.var)
            mu_ovr_sigma = p.mean / sigma
            tmp = 2 * bu.standard_gaussian(mu_ovr_sigma) + mu_ovr_sigma * tf.erf(mu_ovr_sigma * bu.one_ovr_sqrt2)
            tmp *= sigma / q.b
            tmp += 0.5 * tf.log(2 * q.b * q.b / (pi * p.var)) - 0.5
            return tf.reduce_sum(tmp)
        elif isinstance(q, InverseGammaVar):
            return EBKL(p, q, hypers, global_step)
    print('unsupported KL')
 def gaussian_cdf(x, radius):
     return 0.5 * (1 + tf.erf(x / (math.sqrt(2.) * radius)))
Beispiel #4
0
    def __init__(
        self,
        input_node,
        hidden_layers_node,
        output_node,
        learning_rate,
        batch_size,
        display_step,
        activation,
        seed=1,
        feature_selection=False,
        a=1,
        sigma=0.1,
        lam=0.5,
        param_search=False
    ):  #Note: a, sigma, lam should be set by params dict that will be passed to this class.
        self.param_search = param_search
        # Register hyperparameters for feature selection
        self.a = a
        self.sigma = sigma
        self.lam = lam
        # Register regular hyperparameters
        self.lr = learning_rate
        self.batch_size = batch_size
        self.display_step = display_step  # to print loss/acc information during training

        G = tf.Graph()
        with G.as_default():
            self.sess = tf.Session(graph=G)
            # tf Graph Input
            X = tf.placeholder(
                tf.float32,
                [None, input_node])  # X.shape == [batch_size, feature_size]
            y = tf.placeholder(tf.float32, [None, output_node])
            train_gates = tf.placeholder(tf.float32, [1], name='train_gates')
            self.nnweights = []
            prev_node = input_node
            prev_x = X
            with tf.variable_scope('gates', reuse=tf.AUTO_REUSE):
                self.alpha = tf.get_variable(
                    'alpha', [
                        prev_node,
                    ],
                    initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.01))
                prev_x = self.feature_selector(prev_x, train_gates)

            layer_name = 'layer' + str(1)
            for i in range(len(hidden_layers_node)):
                layer_name = 'layer' + str(i + 1)
                with tf.variable_scope(layer_name, reuse=tf.AUTO_REUSE):
                    weights = tf.get_variable(
                        'weights', [prev_node, hidden_layers_node[i]],
                        initializer=tf.truncated_normal_initializer(
                            stddev=0.1))
                    self.nnweights.append(weights)
                    biases = tf.get_variable(
                        'biases', [hidden_layers_node[i]],
                        initializer=tf.constant_initializer(0.0))
                    layer_out = (tf.matmul(prev_x, weights) + biases
                                 )  # Softmax

                    if activation == 'relu':
                        layer_out = tf.nn.relu(layer_out)
                    elif activation == 'sigmoid':
                        layer_out = tf.nn.sigmoid(layer_out)
                    elif activation == 'tanh':
                        layer_out = tf.nn.tanh(layer_out)
                    elif activation == 'none':
                        layer_out = (layer_out)
                    else:
                        raise NotImplementedError('activation not recognized')

                    prev_node = hidden_layers_node[i]
                    prev_x = layer_out

            # Output of model
            # Minimize error using cross entropy
            if output_node == 1:
                # pred = layer_out
                weights = tf.get_variable(
                    'weights', [1, 1],
                    initializer=tf.truncated_normal_initializer(stddev=0.1))
                self.nnweights.append(weights)
                biases = tf.get_variable(
                    'biases', [1], initializer=tf.constant_initializer(0.0))
                pred = (tf.matmul(layer_out, weights) + biases)
                loss_fun = tf.reduce_mean(tf.squared_difference(pred, y))
            else:
                pred = tf.nn.softmax(layer_out)
                pred_log = (layer_out)
                loss_fun = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(labels=y,
                                                            logits=layer_out))
            if feature_selection:
                ## gates regularization
                input2cdf = self.alpha
                #reg = 0.5*(1 + tf.erf(input2cdf/(self.sigma*np.sqrt(2))))
                reg = 0.5 - 0.5 * tf.erf((-1 / (2 * self.a) - input2cdf) /
                                         (self.sigma * np.sqrt(2)))
                reg_gates = self.lam * tf.reduce_mean(reg)
                loss = loss_fun + reg_gates
                self.reg_gates = reg_gates  # for debugging
            else:
                loss = loss_fun
                self.reg_gates = 0
            # Get optimizer
            train_step = tf.train.GradientDescentOptimizer(
                learning_rate).minimize(loss)
            # For evaluation
            correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            # Initialize the variables (i.e. assign their default value)
            init_op = tf.global_variables_initializer()
            self.saver = tf.train.Saver()

        # Save into class members
        self.X = X
        self.y = y
        self.pred = pred
        self.train_gates = train_gates
        self.loss = loss
        self.pred_log = pred_log
        self.train_step = train_step
        self.correct_prediction = correct_prediction
        self.accuracy = accuracy
        self.output_node = output_node
        self.weights = weights
        self.biases = biases
        # set random state
        tf.set_random_seed(seed)
        self.sess.run(init_op)
Beispiel #5
0
 def gelu(input_tensor):
     cdf = 0.5 * (1.0 + tf.erf(input_tensor / tf.sqrt(2.0)))
     return input_tensor * cdf
Beispiel #6
0
def gelu(x):
    """Apply gelu function."""
    return x * 0.5 * (1.0 + tf.erf(x / math.sqrt(2.0)))
def gaussian_cdf(x):
    return 0.5 * (1.0 + tf.erf(x * one_ovr_sqrt2))