def _conv_layer(self, out_p, w_dims, n_layer): with tf.name_scope('conv%i' %n_layer) as scope: # Create weights weights = tf.get_variable(name="conv%i/weights" %n_layer, shape= w_dims, initializer= self.conv_initialiser, regularizer = regularizers.l2_regularizer(self.weight_reg_strength)) # Create bias bias = tf.get_variable( name='conv%i/bias' %n_layer, shape= w_dims[-1], initializer= tf.constant_initializer(0.0)) # Create input by applying convoltion with the weights on the input conv_in = tf.nn.conv2d(out_p, weights, [1, 1, 1, 1], padding='same') # Add bias and caculate activation relu = tf.nn.relu(tf.nn.bias_add(conv_input, bias)) # Apply max pooling out = tf.nn.max_pool( relu, ksize= [1, 3, 3, 1], strides=[1, 2, 2, 1], padding='same', name='pool%i'%n_layer) # add summary tf.histogram_summary("conv%i/out" %n_layer, conv) tf.histogram_summary("conv%i/relu" %n_layer, relu) tf.histogram_summary("conv%i/in" %n_layer, conv_in) tf.histogram_summary("conv%i/weights"% n_layer, weights) tf.histogram_summary("conv%i/bias"% n_layer, bias) return out
def __init__(self, vocab_size, q_dim, s_dim, o_dim, num_layers, num_samples=512, is_training=True): """ Constructor for an HRED object. Args: vocab_size: The size of the vocabulary q_dim: The size of the query embeddings s_dim: The size of the session embeddings o_dim: The size of the output """ self.vocab_size = vocab_size self.q_dim = q_dim self.s_dim = s_dim self.o_dim = o_dim self.num_layers = num_layers self.num_samples = num_samples self.is_training = is_training self.init = tf.random_normal_initializer(stddev=1e-2) self.reg = regularizers.l2_regularizer(1e-2)
def _forward_fc_layer(name, w_shape, b_shape, x_inp, regularizer_strength, act_func): with tf.variable_scope(name): W = tf.get_variable( 'W', w_shape, initializer=tf.random_normal_initializer( mean=0.0, stddev=1e-3, dtype=tf.float32), regularizer=regularizers.l2_regularizer( regularizer_strength)) b = tf.get_variable('b', b_shape, initializer=tf.constant_initializer(0)) out = act_func(tf.matmul(x_inp, W) + b) tf.histogram_summary(name + '_weights', W) tf.histogram_summary(name + '_b', b) tf.histogram_summary(name + '_out', out) return out
def __init__(self, n_hidden=[100], n_classes=10, is_training=True, activation_fn=tf.nn.relu, dropout_rate=0., weight_initializer=initializers.xavier_initializer(), weight_regularizer=regularizers.l2_regularizer(0.001)): """ Constructor for an MLP object. Default values should be used as hints for the usage of each parameter. Args: n_hidden: list of ints, specifies the number of units in each hidden layer. If the list is empty, the MLP will not have any hidden units, and the model will simply perform a multinomial logistic regression. n_classes: int, number of classes of the classification problem. This number is required in order to specify the output dimensions of the MLP. is_training: Bool Tensor, it indicates whether the model is in training mode or not. This will be relevant for methods that perform differently during training and testing (such as dropout). Have look at how to use conditionals in TensorFlow with tf.cond. activation_fn: callable, takes a Tensor and returns a transformed tensor. Activation function specifies which type of non-linearity to use in every hidden layer. dropout_rate: float in range [0,1], presents the fraction of hidden units that are randomly dropped for regularization. weight_initializer: callable, a weight initializer that generates tensors of a chosen distribution. weight_regularizer: callable, returns a scalar regularization loss given a weight variable. The returned loss will be added to the total loss for training purposes. """ self.n_hidden = n_hidden self.n_classes = n_classes self.is_training = is_training self.activation_fn = activation_fn self.dropout_rate = dropout_rate self.weight_initializer = weight_initializer self.weight_regularizer = weight_regularizer
def __init__(self, n_hidden = [100], n_classes = 10, is_training = True, activation_fn = tf.nn.relu, dropout_rate = 0., weight_initializer = initializers.xavier_initializer(), weight_regularizer = regularizers.l2_regularizer(0.001)): """ Constructor for an MLP object. Default values should be used as hints for the usage of each parameter. Args: n_hidden: list of ints, specifies the number of units in each hidden layer. If the list is empty, the MLP will not have any hidden units, and the model will simply perform a multinomial logistic regression. n_classes: int, number of classes of the classification problem. This number is required in order to specify the output dimensions of the MLP. is_training: Bool Tensor, it indicates whether the model is in training mode or not. This will be relevant for methods that perform differently during training and testing (such as dropout). Have look at how to use conditionals in TensorFlow with tf.cond. activation_fn: callable, takes a Tensor and returns a transformed tensor. Activation function specifies which type of non-linearity to use in every hidden layer. dropout_rate: float in range [0,1], presents the fraction of hidden units that are randomly dropped for regularization. weight_initializer: callable, a weight initializer that generates tensors of a chosen distribution. weight_regularizer: callable, returns a scalar regularization loss given a weight variable. The returned loss will be added to the total loss for training purposes. """ self.n_hidden = n_hidden self.n_classes = n_classes self.is_training = is_training self.activation_fn = activation_fn self.dropout_rate = dropout_rate self.weight_initializer = weight_initializer self.weight_regularizer = weight_regularizer
def _fcl_layer(self, out_p, w_dims, n_layer, last_layer=False): """ Adds a fully connected layer to the graph, Args: out_p: A tensor float containing the output from the previous layer w_dims: a vector of ints containing weight dims n_layer: an int containing the number of the layer """ with tf.name_scope('fcl%i' % n_layer): # Creates weights weights = tf.get_variable(shape=w_dims, initializer=self.fcl_initialiser, regularizer=regularizers.l2_regularizer( self.weight_reg_strength), name="fcl%i/weights" % n_layer) # Create bias bias = tf.get_variable(shape=w_dims[-1], initializer=tf.constant_initializer(0.0), name="fcl%i/bias" % n_layer) # Calculate input fcl_out = tf.nn.bias_add(tf.matmul(out_p, weights), bias) # Calculate activation if not last_layer: fcl_out = tf.nn.relu(fcl_out, name="fcl%i" % n_layer) # Summaries if self.summary: pass #tf.histogram_summary("fcl%i/out" %n_layer, fcl_out) #tf.histogram_summary("fcl%i/weights"% n_layer, weights) #tf.histogram_summary("fcl%i/bias"% n_layer, bias) return fcl_out
def _fcl_layer(self, out_p, w_dims, n_layer): """ Adds a fully connected layer to the graph, Args: out_p: A tensor float containing the output from the previous layer w_dims: a vector of ints containing weight dims n_layer: an int containing the number of the layer """ with tf.name_scope('fcl%i'%n_layer) as scope: # TODO REMOVE NUMBER HARDCODED # Creates weights weights = tf.get_variable( # TODO MIGHT BE THAT THE OUTPUT SIZE COMES FIRST shape=w_dims, initializer= self.fcl_initialiser, regularizer=regularizers.l2_regularizer(self.weight_reg_strength), name="fcl%i/weights"%n_layer) # Create bias bias = tf.get_variable( shape=w_dims[-1], initializer=tf.constant_initializer(0.0), name="fcl%i/bias"%n_layer) # Calculate input fcl_in = tf.nn.bias_add(tf.matmul(out_p, weights), bias) # Calculate activation fcl_out = tf.nn.relu(fcl_in) # Summaries tf.histogram_summary("fcl%i/out" %n_layer, fcl_out) tf.histogram_summary("fcl%i/in" %n_layer, fcl_in) tf.histogram_summary("fcl%i/weights"% n_layer, weights) tf.histogram_summary("fcl%i/bias"% n_layer, bias) return fcl_out
def _fc_layer(self, x, n, output_shape=None, act_fn=None, reg_strength=0., input_shape=None, init='sqrt'): ''' x : tensor n : name; string input_shape : int output_shape : int act_fn : e.g. tf.nn.relu reg_strength : float - regularisation strength ''' # Naming scope with tf.variable_scope(n): # Infer input shape from x if input_shape is not None: if x.get_shape()[1] != input_shape: raise Warning('x shape in hidden layer != given shape. ' +\ 'This may cause problems...') else: input_shape = x.get_shape()[1] # Weights W_shape = [input_shape, output_shape] sd = 1 / tf.sqrt( tf.reduce_prod(tf.cast(x.get_shape()[1:], tf.float32))) if init == 'sqrt': W_init = tf.truncated_normal_initializer(stddev=sd, dtype=tf.float32) else: init = float(init) W_init = tf.random_uniform_initializer(minval=-init, maxval=init, dtype=tf.float32) W_reg = regularizers.l2_regularizer(reg_strength) W = tf.get_variable('W', W_shape, initializer=W_init, regularizer=W_reg) # Biases b_init = tf.constant_initializer(0) b = tf.get_variable('b', output_shape, initializer=b_init) # Linear transform pre_act = tf.matmul(x, W) + b self._histsum('pre_act', pre_act) # Activate if act_fn is not None: pre_drop = act_fn(pre_act) else: pre_drop = pre_act self._histsum('pre_drop', pre_drop) # Dropout output = tf.cond( self.is_training, lambda: tf.nn.dropout(pre_drop, 1.0 - self.dropout_rate), lambda: pre_drop) self._histsum('post_drop', output) self._get_weights_and_bias_summaries(n) return output
'uniform': lambda scale: tf. random_uniform_initializer(minval=-scale, maxval=scale ) # Initialization from a uniform distribution } # You can check the TensorFlow API at # https://www.tensorflow.org/versions/r0.11/api_docs/python/contrib.layers.html#regularizers # https://www.tensorflow.org/versions/r0.11/api_docs/python/state_ops.html#sharing-variables WEIGHT_REGULARIZER_DICT = { 'none': lambda weight: regularizers.l1_regularizer(0.), # No regularization 'l1': lambda weight: regularizers.l1_regularizer(weight), # L1 regularization 'l2': lambda weight: regularizers.l2_regularizer(weight) # L2 regularization } # You can check the TensorFlow API at # https://www.tensorflow.org/versions/r0.11/api_docs/python/nn.html#activation-functions ACTIVATION_DICT = { 'relu': tf.nn.relu, # ReLU 'elu': tf.nn.elu, # ELU 'tanh': tf.tanh, #Tanh 'sigmoid': tf.sigmoid } #Sigmoid # You can check the TensorFlow API at # https://www.tensorflow.org/versions/r0.11/api_docs/python/train.html#optimizers OPTIMIZER_DICT = { 'sgd': tf.train.GradientDescentOptimizer, # Gradient Descent