def _build_body(self): # input projection _Wi = tf.get_variable('Wi', [self.obs_size, self.n_hidden], initializer=xavier_initializer()) _bi = tf.get_variable('bi', [self.n_hidden], initializer=tf.constant_initializer(0.)) # add relu/tanh here if necessary _projected_features = tf.matmul(self._features, _Wi) + _bi _lstm_f = tf.contrib.rnn.LSTMCell(self.n_hidden, state_is_tuple=True) _lstm_op, self._next_state = _lstm_f(inputs=_projected_features, state=(self._state_c, self._state_h)) # reshape LSTM's state tuple (2,n_hidden) -> (1,n_hidden*2) _state_reshaped = tf.concat(axis=1, values=(self._next_state.c, self._next_state.h)) # output projection _Wo = tf.get_variable('Wo', [self.n_hidden*2, self.n_actions], initializer=xavier_initializer()) _bo = tf.get_variable('bo', [self.n_actions], initializer=tf.constant_initializer(0.)) # get logits _logits = tf.matmul(_state_reshaped, _Wo) + _bo # probabilities normalization : elemwise multiply with action mask self._probs = tf.multiply(tf.squeeze(tf.nn.softmax(_logits)), self._action_mask, name='probs') return _logits
def multi_layer(weight_initialize, sigmoid, dropout): if weight_initialize: W1 = tf.get_variable('W1', shape=[MNIST.IMAGE_PIXELS, 256], initializer=xavier_initializer()) W2 = tf.get_variable('W2', shape=[256, 256], initializer=xavier_initializer()) W3 = tf.get_variable('W3', shape=[256, MNIST.NUM_CLASSES], initializer=xavier_initializer()) B1 = tf.Variable(tf.random_normal([256]), name='B1') B2 = tf.Variable(tf.random_normal([256]), name='B2') B3 = tf.Variable(tf.random_normal([MNIST.NUM_CLASSES]), name='B3') else: W1 = tf.Variable(tf.random_normal([MNIST.IMAGE_PIXELS, 256]), name='W1') W2 = tf.Variable(tf.random_normal([256, 256]), name='W2') W3 = tf.Variable(tf.random_normal([256, MNIST.NUM_CLASSES]), name='W3') B1 = tf.Variable(tf.random_normal([256]), name='B1') B2 = tf.Variable(tf.random_normal([256]), name='B2') B3 = tf.Variable(tf.random_normal([MNIST.NUM_CLASSES]), name='B3') activation_function = tf.nn.sigmoid if sigmoid else tf.nn.relu if dropout: _L1 = activation_function(tf.add(tf.matmul(MNIST.X, W1), B1), name='Hidden_layer1') L1 = tf.nn.dropout(_L1, MNIST.DROPOUT_RATE, name='Hidden_dropout_layer1') _L2 = activation_function(tf.add(tf.matmul(L1, W2), B2), name='Hidden_layer2') L2 = tf.nn.dropout(_L2, MNIST.DROPOUT_RATE, name='Hidden_dropout_layer2') hypothesis = tf.add(tf.matmul(L2, W3), B3) else: L1 = activation_function(tf.add(tf.matmul(MNIST.X, W1), B1), name='Hidden_layer1') L2 = activation_function(tf.add(tf.matmul(L1, W2), B2), name='Hidden_layer2') hypothesis = tf.add(tf.matmul(L2, W3), B3) return hypothesis, [W1, W2, W3, B1, B2, B3]
def highway_convolutional_network(input_units, n_filters, filter_width=3, use_batch_norm=False, use_dilation=False, training_ph=None): if n_filters is None: # If number of filters is not given the number of filters # will be equal to the number of input features n_filters = input_units.get_shape().as_list()[-1] for n_layer, n_filt in enumerate(n_filters): if use_dilation: dilation_rate = 2**n_layer else: dilation_rate = 1 units = tf.layers.conv1d(input_units, n_filt, filter_width, padding='same', dilation_rate=dilation_rate, kernel_initializer=xavier_initializer()) if use_batch_norm: units = tf.layers.batch_normalization(units, training=training_ph) sigmoid_gate = tf.layers.dense(input_units, 1, activation=tf.sigmoid, kernel_initializer=xavier_initializer()) input_units = sigmoid_gate * input_units + (1 - sigmoid_gate) * units input_units = tf.nn.relu(input_units) return input_units
def deconv(x, output_shape, kwidth=5, dilation=2, init=None, uniform=False, bias_init=None, name='deconv1d'): input_shape = x.get_shape() in_channels = input_shape[-1] out_channels = output_shape[-1] assert len(input_shape) >= 3 # reshape the tensor to use 2d operators x2d = tf.expand_dims(x, 2) o2d = output_shape[:2] + [1] + [output_shape[-1]] w_init = init if w_init is None: w_init = xavier_initializer(uniform=uniform) with tf.variable_scope(name): # filter shape: [kwidth, output_channels, in_channels] W = tf.get_variable('W', [kwidth, 1, out_channels, in_channels], initializer=w_init ) try: deconv = tf.nn.conv2d_transpose(x2d, W, output_shape=o2d, strides=[1, dilation, 1, 1]) except AttributeError: # support for versions of TF before 0.7.0 # based on https://github.com/carpedm20/DCGAN-tensorflow deconv = tf.nn.deconv2d(x2d, W, output_shape=o2d, strides=[1, dilation, 1, 1]) if bias_init is not None: b = tf.get_variable('b', [out_channels], initializer=tf.constant_initializer(0.)) deconv = tf.reshape(tf.nn.bias_add(deconv, b), deconv.get_shape()) else: deconv = tf.reshape(deconv, deconv.get_shape()) # reshape back to 1d deconv = tf.reshape(deconv, output_shape) return deconv
def task_specific_attention(self, inputs, output_size, initializer=layers.xavier_initializer(), activation_fn=tf.tanh, scope=None): """ Performs task-specific attention reduction, using learned attention context vector (constant within task of interest). Args: inputs: Tensor of shape [batch_size, units, input_size] `input_size` must be static (known) `units` axis will be attended over (reduced from output) `batch_size` will be preserved output_size: Size of output's inner (feature) dimension Returns: outputs: Tensor of shape [batch_size, output_dim]. """ assert len(inputs.get_shape()) == 3 and inputs.get_shape()[-1].value is not None with tf.variable_scope(scope or 'attention') as scope: # u_w, attention 向量 attention_context_vector = tf.get_variable(name='attention_context_vector', shape=[output_size], initializer=initializer, dtype=tf.float32) # 全连接层,把 h_i 转为 u_i , shape= [batch_size, units, input_size] -> [batch_size, units, output_size] input_projection = layers.fully_connected(inputs, output_size, activation_fn=activation_fn, scope=scope) # 输出 [batch_size, units] vector_attn = tf.reduce_sum(tf.multiply(input_projection, attention_context_vector), axis=2, keep_dims=True) attention_weights = tf.nn.softmax(vector_attn, dim=1) tf.summary.histogram('attention_weigths', attention_weights) weighted_projection = tf.multiply(inputs, attention_weights) outputs = tf.reduce_sum(weighted_projection, axis=1) return outputs # 输出 [batch_size, hidden_size*2]
def dense_convolutional_network(input_units, n_filters=None, n_layers=1, filter_width=3, use_dilation=False, use_batch_norm=False, training_ph=None): units = input_units if n_filters is None: # If number of filters is not given the number of filters # will be equal to the number of input features n_filters = input_units.get_shape().as_list()[-1] units_list = [units] for n_layer in range(n_layers): total_units = tf.concat(units_list, axis=-1) if use_dilation: dilation_rate = 2**n_layer else: dilation_rate = 1 units = tf.layers.conv1d(total_units, n_filters, filter_width, dilation_rate=dilation_rate, padding='same', kernel_initializer=xavier_initializer()) if use_batch_norm: units = tf.layers.batch_normalization(units, training=training_ph) units = tf.nn.relu(units) units_list.append(units) return units
def __init__(self, num_class = 101, keep_prob = 0.6, batch_size = 3, epoch=40, lr = 1e-4): self.IMG_WIDTH = 171 self.IMG_HEIGHT = 128 self.CROP_WIDTH = 112 self.CROP_HEIGHT = 112 self.graph = tf.Graph() self.num_class = num_class self.epoch = epoch self.CLIP_LENGTH = 16 self.keep_prob = keep_prob self.batch_size = batch_size decay_epoch=10 #每5个epoch改变一次学习率 # train clip: 9537*5 CLIP=5 # test clip: 3783*5 CLIP=5 # train clip: 9537*3 CLIP=3 # test clip: 3783*3 CLIP=3 self.n_step_epoch=int( 9537/batch_size) with self.graph.as_default(): self.inputs = tf.placeholder(tf.float32, [None, self.CLIP_LENGTH, self.CROP_HEIGHT, self.CROP_WIDTH, 3]) self.labels = tf.placeholder(tf.int64, [batch_size,]) self.initializer = layers.xavier_initializer() self.global_step = tf.Variable(0, trainable = False, name = "global_step") self.lr = tf.train.exponential_decay(lr, self.global_step, int(decay_epoch*self.n_step_epoch), 1e-1, True) tf.add_to_collection(tf.GraphKeys.GLOBAL_STEP, self.global_step)
def xavier(uniform=True, seed=None, dtype=tf.float32, name='Xavier'): """Xavier. Returns an initializer performing "Xavier" initialization for weights. This initializer is designed to keep the scale of the gradients roughly the same in all layers. In uniform distribution this ends up being the range: `x = sqrt(6. / (in + out)); [-x, x]` and for normal distribution a standard deviation of `sqrt(3. / (in + out))` is used. Args: uniform: Whether to use uniform or normal distributed random initialization. seed: A Python integer. Used to create random seeds. See `set_random_seed` for behavior. dtype: The data type. Only floating point types are supported. name: name of the op. Returns: An initializer for a weight matrix. References: Understanding the difficulty of training deep feedforward neural networks. International conference on artificial intelligence and statistics. Xavier Glorot and Yoshua Bengio (2010). Links: [http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf] (http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf) """ with get_name_scope(name): return tflayers.xavier_initializer(uniform=uniform, seed=seed, dtype=dtype)
def stacked_convolutions(input_units, n_filters, filter_width=3, use_batch_norm=False, use_dilation=False, training_ph=None): units = input_units if n_filters is None: # If number of filters is not given the number of filters # will be equal to the number of input features n_filters = input_units.get_shape().as_list()[-1] # if isinstance(n_filters, collections.Iterable) and n_layers is not None: # assert len(n_filters) == n_layers n_layers = len(n_filters) for n_layer in range(n_layers): if isinstance(n_filters, collections.Iterable): current_n_fileters = n_filters[n_layer] else: current_n_fileters = n_filters if use_dilation: dilation_rate = 2**n_layer else: dilation_rate = 1 units = tf.layers.conv1d(units, current_n_fileters, filter_width, padding='same', dilation_rate=dilation_rate, kernel_initializer=xavier_initializer()) if use_batch_norm: assert training_ph is not None units = tf.layers.batch_normalization(units, training=training_ph) units = tf.nn.relu(units) return units
def create(cls, embeddings, labels, **kwargs): model = cls() model.embeddings = embeddings model._record_state(**kwargs) model.lengths_key = kwargs.get('lengths_key') model.labels = labels nc = len(labels) # This only exists to make exporting easier model.pdrop_value = kwargs.get('dropout', 0.5) model.dropin_value = kwargs.get('dropin', {}) model.sess = kwargs.get('sess', tf.Session()) model.lengths = kwargs.get('lengths', tf.placeholder(tf.int32, [None], name="lengths")) model.y = kwargs.get('y', tf.placeholder(tf.int32, [None, None], name="y")) model.pdrop_in = kwargs.get('dropin', 0.0) model.labels = labels model.crf = bool(kwargs.get('crf', False)) model.crf_mask = bool(kwargs.get('crf_mask', False)) model.span_type = kwargs.get('span_type') model.proj = bool(kwargs.get('proj', False)) model.feed_input = bool(kwargs.get('feed_input', False)) model.activation_type = kwargs.get('activation', 'tanh') model.constraint = kwargs.get('constraint') # Wrap the constraint in a non-trainable variable so that it is saved # into the checkpoint. This means we won't need to recreate the actual # values of it when we reload the model if model.constraint is not None: constraint = [] for i, c in enumerate(model.constraint): constraint.append(tf.get_variable("constraint_{}".format(i), initializer=c, trainable=False)) model.constraint = constraint embedseq = model.embed(**kwargs) seed = np.random.randint(10e8) enc_out = model.encode(embedseq, **kwargs) with tf.variable_scope("output") as model.out_scope: if model.feed_input is True: enc_out = tf.concat(axis=2, values=[enc_out, embedseq]) # Converts seq to tensor, back to (B,T,W) T = tf.shape(enc_out)[1] H = enc_out.get_shape()[2] # Flatten from [B x T x H] - > [BT x H] enc_out_bt_x_h = tf.reshape(enc_out, [-1, H]) init = xavier_initializer(True, seed) with tf.contrib.slim.arg_scope([fully_connected], weights_initializer=init): if model.proj is True: hidden = tf.layers.dropout(fully_connected(enc_out_bt_x_h, H, activation_fn=tf_activation(model.activation_type)), model.pdrop_value, training=TRAIN_FLAG()) preds = fully_connected(hidden, nc, activation_fn=None, weights_initializer=init) else: preds = fully_connected(enc_out_bt_x_h, nc, activation_fn=None, weights_initializer=init) model.probs = tf.reshape(preds, [-1, T, nc], name="probs") return model
def _init_embedding(self, scope): with tf.variable_scope(scope): with tf.variable_scope("embedding") as scope: self.embedding_matrix = tf.get_variable( name="embedding_matrix", shape=[self.vocab_size, self.embedding_size], initializer=layers.xavier_initializer(), dtype=tf.float32) self.inputs_embedded = tf.nn.embedding_lookup( self.embedding_matrix, self.inputs)
def _convolution(self, value, filter_width, stride, input_channels, out_channels, apply_non_linearity=True): """ Apply a convolutional layer Args: value: the input tensor to apply the convolution on filter_width: the width of the filter (kernel) stride: the striding of the filter (kernel) input_channels: the number if input channels out_channels: the number of output channels apply_non_linearity: whether to apply a non linearity Returns: the output after convolution, added biases and possible non linearity applied """ layer_id = self.convolution_count self.convolution_count += 1 with tf.variable_scope('convolution_layer_{}'.format(layer_id)) as layer: # Create variables filter and bias filters = tf.get_variable('filters', shape=[filter_width, input_channels, out_channels], dtype=tf.float32, initializer=xavier_initializer()) bias = tf.Variable(tf.constant(0.0, shape=[out_channels]), name='bias') # Apply convolution convolution_out = tf.nn.conv1d(value, filters, stride, 'SAME', use_cudnn_on_gpu=True, name='convolution') # Create summary with tf.name_scope('summaries'): # add depth of 1 (=grayscale) leading to shape [filter_width, input_channels, 1, out_channels] kernel_with_depth = tf.expand_dims(filters, 2) # to tf.image_summary format [batch_size=out_channels, height=filter_width, width=input_channels, channels=1] kernel_transposed = tf.transpose(kernel_with_depth, [3, 0, 1, 2]) # this will display random 3 filters from all the output channels tf.summary.image(layer.name + 'filters', kernel_transposed, max_outputs=3) tf.summary.histogram(layer.name + 'filters', filters) tf.summary.image(layer.name + 'bias', tf.reshape(bias, [1, 1, out_channels, 1])) tf.summary.histogram(layer.name + 'bias', bias) # Add bias convolution_out = tf.nn.bias_add(convolution_out, bias) if apply_non_linearity: # Add non-linearity activations = tf.nn.relu(convolution_out, name='activation') tf.summary.histogram(layer.name + 'activation', activations) return activations, out_channels else: return convolution_out, out_channels
def noisy_layer(self, prefix, action_in, out_size, sigma0, non_linear=True): """ a common dense layer: y = w^{T}x + b a noisy layer: y = (w + \epsilon_w*\sigma_w)^{T}x + (b+\epsilon_b*\sigma_b) where \epsilon are random variables sampled from factorized normal distributions and \sigma are trainable variables which are expected to vanish along the training procedure """ in_size = int(action_in.shape[1]) epsilon_in = tf.random_normal(shape=[in_size]) epsilon_out = tf.random_normal(shape=[out_size]) epsilon_in = self.f_epsilon(epsilon_in) epsilon_out = self.f_epsilon(epsilon_out) epsilon_w = tf.matmul( a=tf.expand_dims(epsilon_in, -1), b=tf.expand_dims(epsilon_out, 0)) epsilon_b = epsilon_out sigma_w = tf.get_variable( name=prefix + "_sigma_w", shape=[in_size, out_size], dtype=tf.float32, initializer=tf.random_uniform_initializer( minval=-1.0 / np.sqrt(float(in_size)), maxval=1.0 / np.sqrt(float(in_size)))) # TF noise generation can be unreliable on GPU # If generating the noise on the CPU, # lowering sigma0 to 0.1 may be helpful sigma_b = tf.get_variable( name=prefix + "_sigma_b", shape=[out_size], dtype=tf.float32, # 0.5~GPU, 0.1~CPU initializer=tf.constant_initializer( sigma0 / np.sqrt(float(in_size)))) w = tf.get_variable( name=prefix + "_fc_w", shape=[in_size, out_size], dtype=tf.float32, initializer=layers.xavier_initializer()) b = tf.get_variable( name=prefix + "_fc_b", shape=[out_size], dtype=tf.float32, initializer=tf.zeros_initializer()) action_activation = tf.nn.xw_plus_b(action_in, w + sigma_w * epsilon_w, b + sigma_b * epsilon_b) if not non_linear: return action_activation return tf.nn.relu(action_activation)
def add_layer(inputs, in_size, out_size, n_layer, activation_function=None): # add one more layer and return the output of this layer layer_name = 'layer%s' % n_layer with tf.variable_scope(layer_name): with tf.variable_scope('weights'): Weights = tf.get_variable(shape=[in_size, out_size], name='W', initializer=xavier_initializer()) tf.histogram_summary(layer_name + '/weights', Weights) with tf.variable_scope('biases'): biases = tf.get_variable(shape=[1, out_size], name='b', initializer=xavier_initializer()) tf.histogram_summary(layer_name + '/biases', biases) with tf.variable_scope('Wx_plus_b'): Wx_plus_b = tf.add(tf.matmul(inputs, Weights), biases) if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b, ) tf.histogram_summary(layer_name + '/outputs', outputs) return outputs
def conv1d(x, kwidth=5, num_kernels=1, init=None, uniform=False, bias_init=None, name='conv1d', padding='SAME'): input_shape = x.get_shape() in_channels = input_shape[-1] assert len(input_shape) >= 3 w_init = init if w_init is None: w_init = xavier_initializer(uniform=uniform) with tf.variable_scope(name): # filter shape: [kwidth, in_channels, num_kernels] W = tf.get_variable('W', [kwidth, in_channels, num_kernels], initializer=w_init ) conv = tf.nn.conv1d(x, W, stride=1, padding=padding) if bias_init is not None: b = tf.get_variable('b', [num_kernels], initializer=tf.constant_initializer(bias_init)) conv = conv + b return conv
def downconv(x, output_dim, kwidth=5, pool=2, init=None, uniform=False, bias_init=None, name='downconv'): """ Downsampled convolution 1d """ x2d = tf.expand_dims(x, 2) w_init = init if w_init is None: w_init = xavier_initializer(uniform=uniform) with tf.variable_scope(name): W = tf.get_variable('W', [kwidth, 1, x.get_shape()[-1], output_dim], initializer=w_init) conv = tf.nn.conv2d(x2d, W, strides=[1, pool, 1, 1], padding='SAME') if bias_init is not None: b = tf.get_variable('b', [output_dim], initializer=bias_init) conv = tf.reshape(tf.nn.bias_add(conv, b), conv.get_shape()) else: conv = tf.reshape(conv, conv.get_shape()) # reshape back to 1d conv = tf.reshape(conv, conv.get_shape().as_list()[:2] + [conv.get_shape().as_list()[-1]]) return conv
def fully_connected(input_tensor, name, n_out, activation_fn=tf.nn.relu): n_in = input_tensor.get_shape()[-1].value with tf.variable_scope(name): if name in data_dict: weights = tf.get_variable('weights', shape=None, dtype=tf.float32, initializer=tf.constant( data_dict[name][0])) biases = tf.get_variable('bias', shape=None, dtype=tf.float32, initializer=tf.constant( data_dict[name][1])) else: weights = tf.get_variable('weights', [n_in, n_out], tf.float32, xavier_initializer()) biases = tf.get_variable("bias", [n_out], tf.float32, tf.constant_initializer(0.0)) logits = tf.nn.bias_add(tf.matmul(input_tensor, weights), biases) if activation_fn: return activation_fn(logits), weights, biases else: return logits, weights, biases
def deconv64(_in: Tensor): net = tf.layers.dense( _in, 2 * 2 * 256, activation=tf.nn.relu, kernel_initializer=cl.xavier_initializer(uniform=False)) net = tf.reshape(net, [-1, 2, 2, 256]) net = tf.layers.conv2d_transpose(net, 128, 5, strides=2, activation=tf.nn.relu, name='conv1') # 7x7 -> net = tf.layers.conv2d_transpose(net, 64, 4, strides=2, activation=tf.nn.relu, name='conv2') # 16x16 -> net = tf.layers.conv2d_transpose(net, 32, 2, strides=2, activation=tf.nn.relu, name='conv3') # 32x32 -> net = tf.layers.conv2d_transpose(net, 1, 2, strides=2, activation=tf.nn.relu, name='conv4') # 64x64 return net
def deconv28(in_: Tensor, is_train: bool = False) -> Tensor: net = tf.layers.dense( in_, 32 * 4 * 4, activation=tf.nn.relu, kernel_initializer=cl.xavier_initializer(uniform=False)) net = tf.reshape(net, [-1, 4, 4, 32]) net = tf.layers.conv2d_transpose(net, 32, 4, strides=2, name='deconv1') net = tf.layers.batch_normalization(net, training=is_train) net = tf.nn.relu(net) net = tf.layers.conv2d_transpose(net, 32, 4, strides=2, name='deconv2') net = tf.layers.batch_normalization(net, training=is_train) net = tf.nn.relu(net) net = tf.layers.conv2d_transpose(net, 32, 4, strides=1, name='deconv3') net = tf.layers.batch_normalization(net, training=is_train) net = tf.nn.relu(net) net = tf.layers.conv2d_transpose(net, 1, 4, strides=1, activation=None, name='deconv4') _logits = net return _logits
def le_conv_tune(x__: tf.Tensor, n_out: int, activation_fn=tf.nn.relu, drop_out: Optional[float] = None, batch_norm=False, is_train=True): net = tf.layers.conv2d(x__, 20, 5, activation=activation_fn, name='conv1') net = tf.layers.max_pooling2d(net, 2, 1, name='pool1') if batch_norm: net = tf.layers.batch_normalization(net, training=is_train) net = tf.layers.conv2d(net, 50, 5, activation=activation_fn, name='conv2') net = tf.layers.max_pooling2d(net, 2, 1, name='pool2') net = cl.flatten(net) if batch_norm: net = tf.layers.batch_normalization(net, training=is_train) net = tf.layers.dense( net, n_out, activation=None, kernel_initializer=cl.xavier_initializer(uniform=False)) if drop_out is not None: net = tf.layers.dropout(net, rate=drop_out, training=is_train) _logits = net return _logits
def deconv_btn(inputs, kernel_size, num_filters_in, num_outputs, name, is_training = True, stride_size = [1, 1], padding = 'SAME', activation_fn = tf.nn.relu): """ Convolution Transpose followed by batch normalization then activation fn: ---------- Args: inputs: Tensor, [batch_size, height, width, channels] kernel_size: List, filter size [height, width] num_filters_in: Ingteger, number of channels in input tensor num_outputs: Integer, number of convolution filters is_training: Boolean, in training mode or not name: String, scope name stride_size: List, convolution stide [height, width] padding: String, input padding activation_fn: Tensor fn, activation function on output (can be None) Returns: outputs: Tensor, [batch_size, height+-, width+-, num_outputs] """ with tf.variable_scope(name): kernel_shape = [kernel_size[0], kernel_size[1], num_outputs, num_filters_in] stride_shape = [1, stride_size[0], stride_size[1], 1] input_shape = tf.shape(inputs) output_shape = tf.stack([input_shape[0], input_shape[1], input_shape[2], num_outputs]) weights = tf.get_variable('weights', kernel_shape, tf.float32, xavier_initializer()) bias = tf.get_variable('bias', [num_outputs], tf.float32, tf.constant_initializer(0.0)) conv_trans = tf.nn.conv2d_transpose(inputs, weights, output_shape, stride_shape, padding = padding) outputs = tf.nn.bias_add(conv_trans, bias) outputs = tf.contrib.layers.batch_norm(outputs, center = True, scale = True, is_training = is_training) if activation_fn is not None: outputs = activation_fn(outputs) return outputs
def __init__(self, vggname, neck, keep_prob, wd, feature_dim, num_classes=10): """Creates a model for classifying an image using VGG networks. Args: vggname: A string representing the vgg type, such as 'VGG11'. neck: A bool value that decides using the MLP neck or not. keep_prob: The rate of keeping one neuron in Dropout. wd: The co-efficient of weight decay. feature_dim: the dimension of the representation space. num_classes: The number of classes for classification. """ super(VggNet, self).__init__() self.vggname = vggname self.num_classes = num_classes self.regularizer = contrib_layers.l2_regularizer(scale=wd) self.initializer = contrib_layers.xavier_initializer() self.variance_initializer = contrib_layers.variance_scaling_initializer( factor=0.1, mode='FAN_IN', uniform=False, seed=None, dtype=tf.dtypes.float32) self.pool_num = 0 self.conv_num = 0 self.drop_rate = 1 - keep_prob self.neck = neck self.feature_dim = feature_dim
def conv1d(x, kwidth=5, num_kernels=1, init=None, uniform=False, bias_init=None, name='conv1d', padding='SAME'): input_shape = x.get_shape() in_channels = input_shape[-1] assert len(input_shape) >= 3 w_init = init if w_init is None: w_init = xavier_initializer(uniform=uniform) with tf.variable_scope(name): # filter shape: [kwidth, in_channels, num_kernels] W = tf.get_variable('W', [kwidth, in_channels, num_kernels], initializer=w_init) conv = tf.nn.conv1d(x, W, stride=1, padding=padding) if bias_init is not None: b = tf.get_variable('b', [num_kernels], initializer=tf.constant_initializer(bias_init)) conv = conv + b return conv
def conv2d_transpose( inputs, out_shape, kernel_size=(5, 5), stride=(1, 1), activation_fn=tf.nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=tflayers.xavier_initializer(), scope=None, reuse=None): batchsize = tf.shape(inputs)[0] in_channels = int(inputs.get_shape()[-1]) output_shape = tf.stack([batchsize, out_shape[0], out_shape[1], out_shape[2]]) filter_shape = [kernel_size[0], kernel_size[1], out_shape[2], in_channels] with tf.variable_scope(scope, 'Conv2d_transpose', [inputs], reuse=reuse) as sc: w = tf.get_variable('weights', filter_shape, initializer=weights_initializer) outputs = tf.nn.conv2d_transpose(inputs, w, output_shape=output_shape, strides=[1, stride[0], stride[1], 1]) if not normalizer_fn: biases = tf.get_variable('biases', [out_shape[2]], initializer=tf.constant_initializer(0.0)) outputs = tf.nn.bias_add(outputs, biases) if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) if activation_fn is not None: outputs = activation_fn(outputs) return outputs
def dilated2d(inputs, output_dim, kernel_size, activation_fn, rate, batch_norm=False, is_training=True, trainable=True, name='dilated2d'): with tf.variable_scope(name): weights = tf.get_variable(name='w', shape=[kernel_size, kernel_size, inputs.get_shape()[-1], output_dim], dtype=tf.float32, # initializer=tf.truncated_normal_initializer(stddev=stddev) initializer=layers.xavier_initializer()) dilated = tf.nn.atrous_conv2d(inputs, weights, rate, padding='SAME') # batch normalization if batch_norm is True: dilated = layers.batch_norm(inputs=dilated, updates_collections=None, trainable=trainable, is_training=is_training) else: biases = tf.get_variable(name='b', shape=[output_dim], dtype=tf.float32, trainable=trainable, initializer=tf.zeros_initializer()) dilated = dilated + biases if activation_fn.lower() == 'relu': out = tf.nn.relu(dilated) elif activation_fn.lower() == 'leakyrelu': out = tf.nn.leaky_relu(dilated, alpha=0.2) elif activation_fn.lower() == 'sigmoid': out = tf.nn.sigmoid(dilated) elif activation_fn.lower() == 'tanh': out = tf.nn.tanh(dilated) elif activation_fn.lower() == 'none': out = dilated else: out = None return dilated
def fully_connected(inputs, output_dim, activation_fn, batch_norm=False, is_training=True, trainable=True, name=None): with tf.variable_scope(name): weights = tf.get_variable(name='w', shape=[inputs.get_shape()[-1].value, output_dim], dtype=tf.float32, trainable=trainable, initializer=layers.xavier_initializer()) fc = tf.matmul(inputs, weights) # batch normalization if batch_norm is True: fc = layers.batch_norm(inputs=fc, updates_collections=None, trainable=trainable, is_training=is_training) else: biases = tf.get_variable(name='b', shape=[output_dim], dtype=tf.float32, trainable=trainable, initializer=tf.zeros_initializer()) fc = fc + biases # activation function if activation_fn.lower() == 'relu': return tf.nn.relu(fc) elif activation_fn.lower() == 'leakyrelu': return tf.nn.leaky_relu(fc, alpha=0.2) elif activation_fn.lower() == 'sigmoid': return tf.nn.sigmoid(fc) elif activation_fn.lower() == 'tanh': return tf.nn.tanh(fc) elif activation_fn.lower() == 'none': return fc else: return None
def get_q_values_op(self, state, scope, reuse=False): with tf.variable_scope(scope): frames = tf.split(state, self.FLAGS.state_hist, axis=3) cnn_frames = [ self.cnn_network(f, scope, reuse=False) if i == 0 else self.cnn_network(f, scope, reuse=True) for i, f in enumerate(frames) ] cnn_tensor_input = tf.stack(cnn_frames, axis=1) lstm_cell = tf.contrib.rnn.BasicLSTMCell(512) _, (_, lstm_out) = tf.nn.dynamic_rnn(lstm_cell, cnn_tensor_input, dtype=tf.float32, scope=scope) q_vals = layers.fully_connected( inputs=lstm_out, num_outputs=self.num_actions, activation_fn=None, weights_initializer=layers.xavier_initializer(), biases_initializer=tf.constant_initializer(0), scope=scope + "fc") assert (q_vals.get_shape().as_list() == [None, self.num_actions]) return q_vals
def build_mlp(mlp_input, output_size, scope, n_layers, size, output_activation=None): ''' Build a feed forward network ''' Input = mlp_input with tf.variable_scope(scope): for i in range(n_layers - 1): out = layers.fully_connected( Input, num_outputs=size, weights_initializer=layers.xavier_initializer(uniform=True), activation_fn=tf.nn.relu) Input = out # Fully Connected Layer out = layers.fully_connected(inputs=Input, num_outputs=output_size, activation_fn=output_activation) return out
def auxiliary_loss(self, attention_score, document_vector, question_vector): # [b * s ,1] -> [b, s] attention_logits = tf.squeeze(attention_score, axis=-1) attention_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=attention_logits, labels=self.sentence_idx) attention_loss = tf.reduce_mean(attention_loss) logits_input = tf.concat([document_vector, question_vector], axis=-1) binary_logits = tf.layers.dense( logits_input, 2, kernel_initializer=layers.xavier_initializer(), kernel_regularizer=self.regularizer, use_bias=True, activation=None) self.unans_prob = binary_logits[:, 0] self.preds = tf.argmax(binary_logits, axis=1, output_type=tf.int32) correct_pred = tf.equal(self.preds, self.answerable) self.acc = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32)) logistic_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=binary_logits, labels=self.answerable) logistic_loss = tf.reduce_mean(logistic_loss) return attention_loss, logistic_loss
def task_specific_attention(self, inputs, output_size, initializer=layers.xavier_initializer(), activation_fn=tf.tanh, scope=None): assert len(inputs.get_shape()) == 3 and inputs.get_shape( )[-1].value is not None with tf.variable_scope(scope or 'attention') as scope: attention_context_vector = tf.get_variable( name='attention_context_vector', shape=[output_size], initializer=initializer, dtype=tf.float32) input_projection = layers.fully_connected( inputs, output_size, activation_fn=activation_fn, scope=scope) vector_attn = tf.reduce_sum(tf.multiply(input_projection, attention_context_vector), axis=2, keep_dims=True) attention_weights = tf.nn.softmax(vector_attn, dim=1) tf.summary.histogram('attention_weigths', attention_weights) weighted_projection = tf.multiply(inputs, attention_weights) return weighted_projection
def conv_op(self, input_op, name, ksize, n_out, stride, p): n_in = input_op.get_shape().value with tf.name_scope(name) as scope: kernel = tf.get_variable( name=scope + 'w', shape=[ksize[0], ksize[1], ksize[2], n_in, n_out], dtype=tf.float32, initializer=xavier_initializer()) conv = tf.nn.conv3d( input=input_op, filter=kernel, strides=[1, stride[0], stride[1], stride[2], 1], padding='SAME') bias_init_val = tf.constant(value=0.0, shape=[n_out], dtype=tf.float32) biases = tf.Variable(initial_value=bias_init_val, trainable=True, name='b') z = tf.nn.bias_add(conv, biases) activation = tf.nn.relu(z, name=scope) p += [kernel, biases] return activation
def build_residual_block(self, input_, channel, strides, transpose=False): if not transpose: bn = self.lrelu(tf.layers.batch_normalization(input_)) conv1 = tf.layers.conv2d( bn, channel, (3, 3), padding='same', strides=strides, kernel_initializer=tcl.xavier_initializer()) conv2 = self.lrelu(tf.layers.batch_normalization(conv1)) conv2 = tf.layers.conv2d( conv2, channel, (3, 3), padding='same', kernel_initializer=tcl.xavier_initializer()) conv3 = tf.layers.conv2d( input_, channel, (1, 1), strides=strides, kernel_initializer=tcl.xavier_initializer()) out = tf.add(conv3, conv2) else: bn = tf.nn.relu(tf.layers.batch_normalization(input_)) deconv1 = tf.layers.conv2d_transpose( bn, channel, (3, 3), padding='same', strides=strides, kernel_initializer=tcl.xavier_initializer()) deconv2 = tf.nn.relu(tf.layers.batch_normalization(deconv1)) deconv2 = tf.layers.conv2d( deconv2, channel, (3, 3), padding='same', kernel_initializer=tcl.xavier_initializer()) deconv3 = tf.layers.conv2d( input_, channel, (1, 1), strides=strides, kernel_initializer=tcl.xavier_initializer()) out = tf.add(deconv3, deconv2) return out
def __call__(self, x, state, scope=None): with tf.variable_scope(scope or type(self).__name__): prev_h = state UG_Wx = tf.get_variable("UG_Wx", [self._input_size, self._num_neurons], initializer=xavier_initializer(seed=42)) UG_Uh = tf.get_variable("UG_Uh", [self._num_neurons, self._num_neurons], initializer=xavier_initializer(seed=42)) RG_Wx = tf.get_variable("RG_Wx", [self._input_size, self._num_neurons], initializer=xavier_initializer(seed=42)) RG_Uh = tf.get_variable("RG_Uh", [self._num_neurons, self._num_neurons], initializer=xavier_initializer(seed=42)) tanh_Wx = tf.get_variable("tanh_Wx", [self._input_size, self._num_neurons], initializer=xavier_initializer(seed=42)) tanh_Uh = tf.get_variable("tanh_Uh", [self._num_neurons, self._num_neurons], initializer=xavier_initializer(seed=42)) tanh_b = tf.get_variable("tanh_b", [1, self._num_neurons], initializer=tf.ones_initializer()) update_gate = tf.sigmoid( tf.add(tf.matmul(x, UG_Wx), tf.matmul(prev_h, UG_Uh))) reset_gate = tf.sigmoid( tf.add(tf.matmul(x, RG_Wx), tf.matmul(prev_h, RG_Uh))) tanh_output = tf.tanh( tf.add( tf.add(tf.matmul(x, tanh_Wx), tf.matmul(tf.multiply(prev_h, reset_gate), tanh_Uh)), tanh_b)) y_t = tf.add(tf.multiply((1.0 - update_gate), prev_h), tf.multiply(update_gate, tanh_output)) return y_t, y_t
def compute(self, x, state, parameters, scope=None): if parameters is None: with tf.variable_scope(scope or ("LSTM"+str(self._n_neurons))): prev_C, prev_h = tf.split(state, 2, 1) IG_Wx = tf.get_variable("IG_Wx", [self._n_inputs, self._n_neurons], initializer=xavier_initializer(seed=42)) IG_Uh = tf.get_variable("IG_Uh", [self._n_neurons, self._n_neurons], initializer=xavier_initializer(seed=42)) IG_b = tf.get_variable("IG_b", [1, self._n_neurons], initializer=tf.ones_initializer()) FG_Wx = tf.get_variable("FG_Wx", [self._n_inputs, self._n_neurons], initializer=xavier_initializer(seed=42)) FG_Uh = tf.get_variable("FG_Uh", [self._n_neurons, self._n_neurons], initializer=xavier_initializer(seed=42)) FG_b = tf.get_variable("FG_b", [1, self._n_neurons], initializer=tf.ones_initializer()) tanh_Wx = tf.get_variable("tanh_Wx", [self._n_inputs, self._n_neurons], initializer=xavier_initializer(seed=42)) tanh_Uh = tf.get_variable("tanh_Uh", [self._n_neurons, self._n_neurons], initializer=xavier_initializer(seed=42)) tanh_b = tf.get_variable("tanh_b", [1, self._n_neurons], initializer=tf.ones_initializer()) OG_Wx = tf.get_variable("OG_Wx", [self._n_inputs, self._n_neurons], initializer=xavier_initializer(seed=42)) OG_Uh = tf.get_variable("OG_Uh", [self._n_neurons, self._n_neurons], initializer=xavier_initializer(seed=42)) OG_b = tf.get_variable("OG_b", [1, self._n_neurons], initializer=tf.ones_initializer()) return self.evaluate(x, prev_C, prev_h,IG_Wx,IG_Uh,IG_b,FG_Wx,FG_Uh,FG_b,tanh_Wx,tanh_Uh,tanh_b,OG_Wx,OG_Uh,OG_b) else: prev_C, prev_h = tf.split(state, 2, 1) IG_Wx = self._parameters['IG_Wx'] IG_Uh = self._parameters['IG_Uh'] IG_b = self._parameters['IG_b'] FG_Wx = self._parameters['FG_Wx'] FG_Uh = self._parameters['FG_Uh'] FG_b = self._parameters['FG_b'] tanh_Wx = self._parameters['tanh_Wx'] tanh_Uh = self._parameters['tanh_Uh'] tanh_b = self._parameters['tanh_b'] OG_Wx = self._parameters['OG_Wx'] OG_Uh = self._parameters['OG_Uh'] OG_b = self._parameters['OG_b'] return self.evaluate(x, prev_C, prev_h,IG_Wx,IG_Uh,IG_b,FG_Wx,FG_Uh,FG_b,tanh_Wx,tanh_Uh,tanh_b,OG_Wx,OG_Uh,OG_b)
def gloret(name, shape): return tf.get_variable(name, shape=shape, initializer=xavier_initializer())
def infer(self, reuse): rced = self.rced activation_fn = tf.nn.relu is_training = True input_dim = rced.input_dim left_context = rced.left_context right_context = rced.right_context splice_dim = left_context + 1 + right_context #inputs_O = self.inputs in_dims = self.inputs.get_shape().as_list() if len(in_dims) == 2: # shape format [batch, width] dims = self.inputs.get_shape().as_list() assert dims[0] == rced.batch_size inputs = tf.reshape(self.inputs, [dims[0], splice_dim, input_dim]) inputs = tf.expand_dims(inputs, -1) elif len(in_dims) == 3: # shape format [batch, length, width] dims = self.inputs.get_shape().as_list() assert dims[0] == 1 inputs = tf.squeeze(self.inputs, [0]) inputs = tf.reshape(self.inputs, [-1, splice_dim, input_dim]) inputs = tf.expand_dims(inputs, -1) # If test of cv , BN should use global mean / stddev if rced.cross_validation: is_training = False with tf.variable_scope('g_model') as scope: if reuse: scope.reuse_variables() if rced.batch_norm: normalizer_fn = batch_norm normalizer_params = { "is_training": is_training, "scale": True, "renorm": True } else: normalizer_fn = None normalizer_params = None if rced.l2_scale > 0.0 and is_training: weights_regularizer = l2_regularizer(rced.l2_scale) else: weights_regularizer = None keep_prob = 1.0 if not reuse: print("*** Generator summary ***") print("G inputs shape: {}".format(inputs.get_shape())) # inputs format [batch, in_height, in_width, in_channels] # filters format [filter_height, filter_width, in_channels, out_channels] filters_num = [12, 12, 24, 24, 32, 32, 24, 24, 12, 12] filters_width = [13, 11, 9, 7, 7, 7, 7, 9, 11, 13] assert len(filters_num) == len(filters_num) inputs_O = tf.reshape(inputs, [-1, splice_dim * input_dim]) inputs_0 = tf.contrib.layers.conv2d( inputs, filters_num[0], [splice_dim, filters_width[0]], activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer()) #inputs_333 = inputs + inputs_0 inputs_1 = tf.contrib.layers.conv2d( inputs_0, filters_num[1], [splice_dim, filters_width[1]], activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer()) # inputs_1 = inputs_1 + inputs_0 inputs_1 = tf.layers.max_pooling2d(inputs=inputs_1, pool_size=[2, 2], strides=2, padding='valid') inputs_2 = tf.contrib.layers.conv2d( inputs_1, filters_num[2], [splice_dim, filters_width[2]], activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer()) #inputs_2 = inputs_2 + inputs_1 inputs_3 = tf.contrib.layers.conv2d( inputs_2, filters_num[3], [splice_dim, filters_width[3]], activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer()) # inputs_3 = inputs_3 + inputs_2 inputs_3 = tf.layers.max_pooling2d(inputs=inputs_3, pool_size=[2, 2], strides=2, padding='valid') inputs_4 = tf.contrib.layers.conv2d( inputs_3, filters_num[4], [splice_dim, filters_width[4]], activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer()) #inputs_4 = inputs_4 + inputs_3 inputs_5 = tf.contrib.layers.conv2d( inputs_4, filters_num[5], [splice_dim, filters_width[5]], activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer()) # inputs_5 = inputs_5 + inputs_4 #inputs_5=tf.layers.max_pooling2d(inputs=inputs_5, pool_size=[2, 2], strides=2) inputs_5 = tf.layers.conv2d_transpose(inputs_5, filters=filters_num[6], kernel_size=(2, 2), strides=(2, 2), padding='valid', activation=tf.nn.relu) inputs_6 = tf.contrib.layers.conv2d( inputs_5, filters_num[6], [splice_dim, filters_width[6]], activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer()) skip_connect_shape = inputs_3.get_shape() net_shape = inputs_6.get_shape() print(net_shape[1]) size = [-1, net_shape[1].value, net_shape[2].value, -1] skip_connect_crop = tf.slice(inputs_3, [0, 0, 0, 0], size) inputs_6 = tf.concat([skip_connect_crop, inputs_6], axis=3) inputs_6 = inputs_6 + inputs_3 inputs_7 = tf.contrib.layers.conv2d( inputs_6, filters_num[7], [splice_dim, filters_width[7]], activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer()) inputs_7 = inputs_7 + inputs_6 inputs_7 = tf.layers.conv2d_transpose(inputs_7, filters=filters_num[6], kernel_size=(2, 2), strides=(2, 2), padding='valid', activation=tf.nn.relu) #inputs_7=tf.layers.max_pooling2d(inputs=inputs_7, pool_size=[2, 2], strides=2) inputs_8 = tf.contrib.layers.conv2d( inputs_7, filters_num[8], [splice_dim, filters_width[8]], activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer()) skip_connect_shape = inputs_1.get_shape() net_shape = inputs_8.get_shape() print(net_shape[1]) size = [-1, net_shape[1].value, net_shape[2].value, -1] skip_connect_crop2 = tf.slice(inputs_1, [0, 0, 0, 0], size) inputs_8 = tf.concat([skip_connect_crop2, inputs_8], axis=3) inputs_9 = tf.contrib.layers.conv2d( inputs_8, filters_num[9], [splice_dim, filters_width[9]], activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer()) inputs_9 = tf.layers.max_pooling2d(inputs=inputs_9, pool_size=[2, 2], strides=2) #inputs_9 = inputs_9 + inputs_8 print("***********shaper---------------------") print(np.shape(inputs_9)) # name_I = "inputs_"+str(len(filters_num)+1) # inputs = name_I # Linear output # inputs = tf.reshape(inputs, [rced.batch_size, -1]) inputs_D = tf.reshape(inputs_9, [-1, 4 * 128 * filters_num[-1]]) print("***********reshaper------------after---------") print(np.shape(inputs_D)) inputs_D = tf.concat([inputs_D, inputs_O], 1) y = fully_connected(inputs_D, 257, activation_fn=None, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer()) if not reuse: print("G output shape: {}".format(y.get_shape())) sys.stdout.flush() return y
import numpy as np import tensorflow as tf from tqdm import tqdm import os from config import Config as conf from preprocess import preprocessor from tensorflow.contrib.layers import xavier_initializer from tensorflow.contrib.rnn import LSTMCell # Input placeholders data = tf.placeholder(tf.int32, [None, conf.seq_length - 1, 1], "sentences") next_word = tf.placeholder(tf.int32, [None, conf.seq_length - 1, 1], "next_word") # LSTM Matrices embedding_matrix = tf.get_variable("embed", [conf.vocab_size, conf.embed_size], tf.float32, initializer=xavier_initializer()) output_matrix = tf.get_variable("output", [conf.proj_hidden_state, conf.vocab_size], tf.float32, initializer=xavier_initializer()) output_bias = tf.get_variable("bias", [conf.vocab_size], tf.float32, initializer=xavier_initializer()) projection_matrix = tf.get_variable("projection", [conf.num_hidden_state, conf.proj_hidden_state], tf.float32, initializer=xavier_initializer()) # embedding lookup word_embeddings = tf.nn.embedding_lookup(embedding_matrix, data) # shape: (64, 29, 1, 100) word_embeddings = tf.reshape(word_embeddings, [conf.batch_size, conf.seq_length -1, conf.embed_size]) #shape: (64, 29, 100) assert word_embeddings.shape == (conf.batch_size, conf.seq_length - 1, conf.embed_size) # RNN unrolling print("creating RNN") lstm_outputs = []
def xavier_initializer(): return contrib_layers.xavier_initializer()
def __call__(self, noisy_w, is_ref, units, spk=None, z_on=True, do_prelu=False): # TODO: remove c_vec """ Build the graph propagating (noisy_w) --> x On first pass will make variables. """ segan = self.segan def make_z(shape, mean=0., std=1., name='z'): if is_ref: with tf.variable_scope(name) as scope: z_init = tf.random_normal_initializer(mean=mean, stddev=std) z = tf.get_variable("z", shape, initializer=z_init, trainable=False ) if z.device != "/device:GPU:0": # this has to be created into gpu0 print('z.device is {}'.format(z.device)) assert False else: z = tf.random_normal(shape, mean=mean, stddev=std, name=name, dtype=tf.float32) return z if hasattr(segan, 'generator_built'): tf.get_variable_scope().reuse_variables() make_vars = False else: make_vars = True if is_ref: print('*** Building Generator ***') in_dims = noisy_w.get_shape().as_list() h_i = noisy_w if len(in_dims) == 2: h_i = tf.expand_dims(noisy_w, -1) elif len(in_dims) < 2 or len(in_dims) > 3: raise ValueError('Generator input must be 2-D or 3-D') kwidth = 20 # kwidth = 31 enc_layers = 7 skips = [] if is_ref and do_prelu: #keep track of prelu activations alphas = [] with tf.variable_scope('g_ae'): #AE to be built is shaped: # enc ~ [16384x1, 8192x16, 4096x32, 2048x32, 1024x64, 512x64, 256x128, 128x128, 64x256, 32x256, 16x512, 8x1024] # dec ~ [8x2048, 16x1024, 32x512, 64x512, 8x256, 256x256, 512x128, 1024x128, 2048x64, 4096x64, 8192x32, 16384x1] #FIRST ENCODER for layer_idx, layer_depth in enumerate(segan.g_enc_depths): bias_init = None if segan.bias_downconv: if is_ref: print('Biasing downconv in G') bias_init = tf.constant_initializer(0.) h_i_dwn = downconv(h_i, layer_depth, kwidth=kwidth, init=tf.truncated_normal_initializer(stddev=0.02), bias_init=bias_init, name='enc_{}'.format(layer_idx)) if is_ref: print('Downconv {} -> {}'.format(h_i.get_shape(), h_i_dwn.get_shape())) h_i = h_i_dwn if layer_idx < len(segan.g_enc_depths) - 1: if is_ref: print('Adding skip connection downconv ' '{}'.format(layer_idx)) # store skip connection # last one is not stored cause it's the code skips.append(h_i) if do_prelu: if is_ref: print('-- Enc: prelu activation --') h_i = prelu(h_i, ref=is_ref, name='enc_prelu_{}'.format(layer_idx)) if is_ref: # split h_i into its components alpha_i = h_i[1] h_i = h_i[0] alphas.append(alpha_i) else: if is_ref: print('-- Enc: leakyrelu activation --') h_i = leakyrelu(h_i) if z_on: # random code is fused with intermediate representation z = make_z([segan.batch_size, h_i.get_shape().as_list()[1], segan.g_enc_depths[-1]]) h_i = tf.concat([z, h_i], 2) #SECOND DECODER (reverse order) g_dec_depths = segan.g_enc_depths[:-1][::-1] + [1] if is_ref: print('g_dec_depths: ', g_dec_depths) for layer_idx, layer_depth in enumerate(g_dec_depths): if layer_idx < len(g_dec_depths)-1: h_i_dim = skips[-(layer_idx + 1)].get_shape().as_list() else: h_i_dim = in_dims assert layer_depth == h_i_dim[2] assert segan.batch_size == h_i_dim[0] out_shape = [h_i_dim[0], h_i_dim[1], layer_depth] bias_init = None # deconv if segan.deconv_type == 'deconv': if is_ref: print('-- Transposed deconvolution type --') if segan.bias_deconv: print('Biasing deconv in G') if segan.bias_deconv: bias_init = tf.constant_initializer(0.) h_i_dcv = deconv(h_i, out_shape, kwidth=kwidth, dilation=2, init=tf.truncated_normal_initializer(stddev=0.02), bias_init=bias_init, name='dec_{}'.format(layer_idx)) elif segan.deconv_type == 'nn_deconv': if is_ref: print('-- NN interpolated deconvolution type --') if segan.bias_deconv: print('Biasing deconv in G') if segan.bias_deconv: bias_init = 0. h_i_dcv = nn_deconv(h_i, kwidth=kwidth, dilation=2, init=tf.truncated_normal_initializer(stddev=0.02), bias_init=bias_init, name='dec_{}'.format(layer_idx)) else: raise ValueError('Unknown deconv type {}'.format(segan.deconv_type)) if is_ref: print('Deconv {} -> {}'.format(h_i.get_shape(), h_i_dcv.get_shape())) h_i = h_i_dcv if layer_idx < len(g_dec_depths) - 1: if do_prelu: if is_ref: print('-- Dec: prelu activation --') h_i = prelu(h_i, ref=is_ref, name='dec_prelu_{}'.format(layer_idx)) if is_ref: # split h_i into its components alpha_i = h_i[1] h_i = h_i[0] alphas.append(alpha_i) else: if is_ref: print('-- Dec: leakyrelu activation --') h_i = leakyrelu(h_i) # fuse skip connection skip_ = skips[-(layer_idx + 1)] if is_ref: print('Fusing skip connection of ' 'shape {}'.format(skip_.get_shape())) h_i = tf.concat([h_i, skip_], 2) else: if is_ref: # print('-- Dec: tanh activation --') print('-- Dec: linear layer --') h_i = tf.squeeze(h_i, -1) h_i = tf.layers.dense(h_i, units, kernel_initializer=xavier_initializer()) h_i = tf.expand_dims(h_i, -1) wave = h_i if is_ref and do_prelu: print('Amount of alpha vectors: ', len(alphas)) # segan.gen_wave_summ = histogram_summary('gen_wave', wave) if is_ref: print('Amount of skip connections: ', len(skips)) print('Last wave shape: ', wave.get_shape()) print('*************************') segan.generator_built = True # ret feats contains the features refs to be returned ret_feats = [wave] if z_on: ret_feats.append(z) if is_ref and do_prelu: ret_feats += alphas return ret_feats
def build(self): endpoints = self.endpoints y = self.inputs['images'] with arg_scope([layers.conv2d, layers.separable_conv2d], padding='SAME', activation_fn=tf.nn.relu6, weights_initializer=layers.xavier_initializer(), weights_regularizer=layers.l2_regularizer(self.weight_decay), normalizer_fn=layers.batch_norm, normalizer_params={'is_training': self.is_training}): y = layers.conv2d(y, 32, (3, 3), 2, scope='Conv2d_0') endpoints['Conv2d_0'] = y # set num_outputs to None to skipe point-wise convolution y = layers.separable_conv2d(y, None, (3, 3), 1, stride=1, scope='Depthwise_Conv2d_1') y = layers.conv2d(y, 64, (1, 1), scope='Pointwise_Conv2d_1') endpoints['Pointwise_Conv2d_1'] = y y = layers.separable_conv2d(y, None, (3, 3), 1, stride=2, scope='Depthwise_Conv2d_2') y = layers.conv2d(y, 128, (1, 1), scope='Pointwise_Conv2d_2') endpoints['Pointwise_Conv2d_2'] = y y = layers.separable_conv2d(y, None, (3, 3), 1, stride=1, scope='Depthwise_Conv2d_3') y = layers.conv2d(y, 128, (1, 1), scope='Pointwise_Conv2d_3') endpoints['Pointwise_Conv2d_3'] = y y = layers.separable_conv2d(y, None, (3, 3), 1, stride=2, scope='Depthwise_Conv2d_4') y = layers.conv2d(y, 256, (1, 1), scope='Pointwise_Conv2d_4') endpoints['Pointwise_Conv2d_4'] = y y = layers.separable_conv2d(y, None, (3, 3), 1, stride=1, scope='Depthwise_Conv2d_5') y = layers.conv2d(y, 256, (1, 1), scope='Pointwise_Conv2d_5') endpoints['Pointwise_Conv2d_5'] = y y = layers.separable_conv2d(y, None, (3, 3), 1, stride=2, scope='Depthwise_Conv2d_6') # y = layers.separable_conv2d(y, None, (3, 3), 1, stride=1, scope='Depthwise_Conv2d_6') y = layers.conv2d(y, 512, (1, 1), scope='Pointwise_Conv2d_6') endpoints['Pointwise_Conv2d_6'] = y # repeat 5 times y = layers.separable_conv2d(y, None, (3, 3), 1, stride=1, scope='Depthwise_Conv2d_7') y = layers.conv2d(y, 512, (1, 1), scope='Pointwise_Conv2d_7') # y = dropblock(y, 0.9, 7, self.is_training) endpoints['Pointwise_Conv2d_7'] = y y = layers.separable_conv2d(y, None, (3, 3), 1, stride=1, scope='Depthwise_Conv2d_8') y = layers.conv2d(y, 512, (1, 1), scope='Pointwise_Conv2d_8') # y = dropblock(y, 0.9, 7, self.is_training) endpoints['Pointwise_Conv2d_8'] = y y = layers.separable_conv2d(y, None, (3, 3), 1, stride=1, scope='Depthwise_Conv2d_9') y = layers.conv2d(y, 512, (1, 1), scope='Pointwise_Conv2d_9') # y = dropblock(y, 0.9, 7, self.is_training) endpoints['Pointwise_Conv2d_9'] = y y = layers.separable_conv2d(y, None, (3, 3), 1, stride=2, scope='Depthwise_Conv2d_10') y = layers.conv2d(y, 512, (1, 1), scope='Pointwise_Conv2d_10') # y = dropblock(y, 0.9, 7, self.is_training) endpoints['Pointwise_Conv2d_10'] = y y = layers.separable_conv2d(y, None, (3, 3), 1, stride=1, scope='Depthwise_Conv2d_11') y = layers.conv2d(y, 512, (1, 1), scope='Pointwise_Conv2d_11') # y = dropblock(y, 0.9, 7, self.is_training) endpoints['Pointwise_Conv2d_11'] = y y = layers.separable_conv2d(y, None, (3, 3), 1, stride=2, scope='Depthwise_Conv2d_12') y = layers.conv2d(y, 1024, (1, 1), scope='Pointwise_Conv2d_12') # y = dropblock(y, 0.9, 7, self.is_training) endpoints['Pointwise_Conv2d_12'] = y # 此层stride存疑,原文为2。 y = layers.separable_conv2d(y, None, (3, 3), 1, stride=1, scope='Depthwise_Conv2d_13') y = layers.conv2d(y, 1024, (1, 1), scope='Pointwise_Conv2d_13') # y = dropblock(y, 0.9, 7, self.is_training) endpoints['Pointwise_Conv2d_13'] = y y = tf.reduce_mean(y, keepdims=True, axis=[1, 2]) endpoints['global_pooling'] = y y = layers.flatten(y) y = layers.fully_connected(y, 1000, scope='fc1') endpoints['fc1'] = y self.outputs['logits'] = y return y
def fast_text_model_fn(self, features, labels, mode, params): vocab_table = lookup.index_table_from_file(vocabulary_file=self.VOCAB_FILE, num_oov_buckets=1, default_value=-1) text = features[self.FEATURE_COL] words = tf.string_split(text) dense_words = tf.sparse_tensor_to_dense(words, default_value=self.PAD_WORD) word_ids = vocab_table.lookup(dense_words) padding = tf.constant([[0, 0], [0, self.MAX_LEN]]) # Pad all the word_ids entries to the maximum document length word_ids_padded = tf.pad(word_ids, padding) word_id_vector = tf.slice(word_ids_padded, [0, 0], [-1, self.MAX_LEN]) if mode == tf.estimator.ModeKeys.TRAIN: tf.keras.backend.set_learning_phase(True) else: tf.keras.backend.set_learning_phase(False) with tf.name_scope('embedding'): embedding_vectors = layers.embed_sequence(word_id_vector, vocab_size=self.VOCAB_LEN, embed_dim=self.EMBED_DIM, initializer=layers.xavier_initializer(seed=42)) tf.logging.info('Word Vectors = {}'.format(embedding_vectors)) with tf.name_scope('fast_text'): average_vectors = tf.reduce_sum(embedding_vectors, axis=1) tf.logging.info('Average Word Vectors = {}'.format(average_vectors)) with tf.name_scope('hidden_layer'): fc1 = tf.keras.layers.Dense(1024, activation='relu')(average_vectors) d1 = tf.keras.layers.Dropout(0.5)(fc1) fc2 = tf.keras.layers.Dense(self.EMBED_DIM / 2, activation='relu')(d1) d2 = tf.keras.layers.Dropout(0.5)(fc2) tf.logging.info('Hidden Layer = {}'.format(d2)) with tf.name_scope('output'): logits = tf.keras.layers.Dense(self.TARGET_SIZE, activation=None)(d2) tf.logging.info('Logits Layer = {}'.format(logits)) probabilities = tf.nn.softmax(logits) predicted_indices = tf.argmax(probabilities, axis=1) tf.summary.histogram('fasttext', average_vectors) tf.summary.histogram('softmax', probabilities) if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'class': predicted_indices, 'probabilities': probabilities } exported_outputs = { 'prediction': tf.estimator.export.PredictOutput(predictions) } return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=exported_outputs) loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) tf.summary.scalar('loss', loss) acc = tf.equal(predicted_indices, labels) acc = tf.reduce_mean(tf.cast(acc, tf.float32)) tf.summary.scalar('acc', acc) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.AdamOptimizer() train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) if mode == tf.estimator.ModeKeys.EVAL: eval_metrics_ops = { 'accuracy': tf.metrics.accuracy(labels=labels, predictions=predicted_indices), 'precision': tf.metrics.precision(labels=labels, predictions=predicted_indices), 'recall': tf.metrics.recall(labels=labels, predictions=predicted_indices), 'f1_score': self.streaming_f1(labels=labels, predictions=predicted_indices, n_classes=self.TARGET_SIZE) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics_ops)
def single_layer(weight_initialize): if weight_initialize: W = tf.get_variable('weights', shape=[MNIST.IMAGE_PIXELS, MNIST.NUM_CLASSES], initializer=xavier_initializer()) b = tf.Variable(tf.zeros([MNIST.NUM_CLASSES]), name='biases') else: W = tf.Variable(tf.random_normal([MNIST.IMAGE_PIXELS, MNIST.NUM_CLASSES]), name='weights') b = tf.Variable(tf.random_normal([MNIST.NUM_CLASSES]), name='biases') hypothesis = tf.add(tf.matmul(MNIST.X, W), b) return hypothesis, [W, b]
def __init__(self, vocabs, save_path, n_filters=(128, 256), filter_width=3, token_embeddings_dim=128, char_embeddings_dim=50, use_char_embeddins=True, embeddings_dropout=False, dense_dropout=False, use_batch_norm=False, logging=False, entity_of_interest=None, use_crf=False, net_type='cnn', char_filter_width=5, verbouse=False, embeddings_onethego=False, train_now=False, load_path=None, **kwargs): super().__init__(save_path=save_path, load_path=load_path, train_now=train_now, mode=kwargs['mode']) n_tags = len(vocabs['tag_vocab']) n_tokens = len(vocabs['token_vocab']) n_chars = len(vocabs['char_vocab']) # Create placeholders if embeddings_onethego: x_word = tf.placeholder(dtype=tf.float32, shape=[None, None, token_embeddings_dim], name='x_word') else: x_word = tf.placeholder(dtype=tf.int32, shape=[None, None], name='x_word') x_char = tf.placeholder(dtype=tf.int32, shape=[None, None, None], name='x_char') y_true = tf.placeholder(dtype=tf.int32, shape=[None, None], name='y_tag') # Auxiliary placeholders learning_rate_ph = tf.placeholder(dtype=tf.float32, shape=[], name='learning_rate') dropout_ph = tf.placeholder_with_default(1.0, shape=[]) training_ph = tf.placeholder_with_default(False, shape=[]) mask_ph = tf.placeholder(dtype=tf.float32, shape=[None, None]) # Embeddings if not embeddings_onethego: with tf.variable_scope('Embeddings'): w_emb = embedding_layer(x_word, n_tokens=n_tokens, token_embedding_dim=token_embeddings_dim) if use_char_embeddins: c_emb = character_embedding_network(x_char, n_characters=n_chars, char_embedding_dim=char_embeddings_dim, filter_width=char_filter_width) emb = tf.concat([w_emb, c_emb], axis=-1) else: emb = w_emb else: emb = x_word # Dropout for embeddings if embeddings_dropout: emb = tf.layers.dropout(emb, dropout_ph, training=training_ph) if 'cnn' in net_type.lower(): # Convolutional network with tf.variable_scope('ConvNet'): units = stacked_convolutions(emb, n_filters=n_filters, filter_width=filter_width, use_batch_norm=use_batch_norm, training_ph=training_ph) elif 'rnn' in net_type.lower(): units = stacked_rnn(emb, n_filters, cell_type='lstm') elif 'cnn_highway' in net_type.lower(): units = highway_convolutional_network(emb, n_filters=n_filters, filter_width=filter_width, use_batch_norm=use_batch_norm, training_ph=training_ph) else: raise KeyError('There is no such type of network: {}'.format(net_type)) # Classifier with tf.variable_scope('Classifier'): logits = tf.layers.dense(units, n_tags, kernel_initializer=xavier_initializer()) # Loss with masking if use_crf: sequence_lengths = tf.reduce_sum(mask_ph, axis=1) log_likelihood, trainsition_params = tf.contrib.crf.crf_log_likelihood(logits, y_true, sequence_lengths) loss_tensor = -log_likelihood predictions = None else: ground_truth_labels = tf.one_hot(y_true, n_tags) loss_tensor = tf.nn.softmax_cross_entropy_with_logits(labels=ground_truth_labels, logits=logits) loss_tensor = loss_tensor * mask_ph predictions = tf.argmax(logits, axis=-1) loss = tf.reduce_mean(loss_tensor) # Initialize session sess = tf.Session() if verbouse: self.print_number_of_parameters() if logging: self.train_writer = tf.summary.FileWriter('summary', sess.graph) self.token_vocab = vocabs['token_vocab'] self.tag_vocab = vocabs['tag_vocab'] self.char_vocab = vocabs['char_vocab'] self._use_crf = use_crf self.summary = tf.summary.merge_all() self._x_w = x_word self._x_c = x_char self._y_true = y_true self._y_pred = predictions if use_crf: self._logits = logits self._trainsition_params = trainsition_params self._sequence_lengths = sequence_lengths self._loss = loss self._sess = sess self._learning_rate_ph = learning_rate_ph self._dropout = dropout_ph self._loss_tensor = loss_tensor self._use_dropout = True if embeddings_dropout or dense_dropout else None self._training_ph = training_ph self._logging = logging self._train_op = self.get_train_op(loss, learning_rate_ph) self._embeddings_onethego = embeddings_onethego self._entity_of_interest = entity_of_interest self.verbouse = verbouse self._mask = mask_ph sess.run(tf.global_variables_initializer())
def _run_backend_specific_init(self): self._initializer = tf_layers.xavier_initializer( uniform=self.args['uniform'], seed=self.args['seed'], dtype=self._get_dtype())
def add_model(self, inputs): outputs = list() inputs = tf.reshape( inputs, [-1, self.config.sequence_length, self.config.sequence_width, 1]) for i, filter_size in enumerate(self.config.conv1_filter_sizes): with tf.variable_scope('filter{}'.format(filter_size)): # 第一层的filter的W和b conv1_W = tf.get_variable( 'conv1_W', shape=[filter_size, 1, 1, self.config.conv1_filter_num], initializer=xavier_initializer()) conv1_b = tf.get_variable( 'conv1_b', initializer=tf.constant( 0.1, shape=[self.config.conv1_filter_num])) # 卷积 conv1_out = tf.nn.relu( (tf.nn.conv2d(inputs, conv1_W, [1, 1, 1, 1], padding=self.config.conv1_padding) + conv1_b)) # 池化 pool1_b = tf.get_variable( 'pool1_b', initializer=tf.constant( 0.1, shape=[self.config.conv1_filter_num])) pool1_out = tf.nn.max_pool( conv1_out, [1, self.config.conv1_pool_sizes[i], 1, 1], [1, self.config.conv1_pool_sizes[i], 1, 1], padding=self.config.conv1_padding) pool1_out = tf.nn.tanh(pool1_out + pool1_b) dropout_pool1_out = tf.nn.dropout(pool1_out, self.keep_prob) # 第一层的filter的W和b conv2_W = tf.get_variable( 'conv2_W', shape=[ self.config.conv2_filter_sizes[i], self.config.sequence_width, conv1_out.get_shape()[3], self.config.conv2_filter_num ], initializer=xavier_initializer()) conv2_b = tf.get_variable( 'conv2_b', initializer=tf.constant( 0.1, shape=[self.config.conv2_filter_num])) # 卷积 conv2_out = tf.nn.relu( (tf.nn.conv2d(dropout_pool1_out, conv2_W, [1, 1, 1, 1], padding=self.config.conv2_padding) + conv2_b)) # 池化 pool2_b = tf.get_variable( 'pool2_b', initializer=tf.constant( 0.1, shape=[self.config.conv2_filter_num])) pool2_out = tf.nn.max_pool( conv2_out, [1, self.config.conv2_pool_sizes[i], 1, 1], [1, self.config.conv2_pool_sizes[i], 1, 1], padding=self.config.conv1_padding) pool2_out = tf.nn.tanh(pool2_out + pool2_b) dropout_pool2_out = tf.nn.dropout(pool2_out, self.keep_prob) outputs.append(dropout_pool2_out) # 加入正则项 # tf.add_to_collection('total_loss', 0.5 * self.config.l2_reg_lambda * tf.nn.l2_loss(conv1_W)) # tf.add_to_collection('total_loss', 0.5 * self.config.l2_reg_lambda * tf.nn.l2_loss(conv2_W)) total_channels = len( self.config.conv2_filter_sizes) * self.config.conv2_filter_num if len(outputs) == 1: real_outputs = tf.reshape( outputs[0], [-1, total_channels * int(outputs[0].get_shape()[1])]) else: raise ValueError( 'This version can only support one type of filter, ' 'rather than {}'.format(len(self.config.conv2_filter_sizes))) # 加入FC层输出 FC1_W = tf.get_variable( 'FC_W', shape=[real_outputs.get_shape()[1], self.config.hidden_size], initializer=xavier_initializer()) FC1_b = tf.Variable(initial_value=tf.zeros([self.config.hidden_size]), name='FC_b') final_outputs = tf.matmul(real_outputs, FC1_W) + FC1_b tf.add_to_collection( 'total_loss', 0.5 * self.config.l2_reg_lambda * tf.nn.l2_loss(FC1_W)) # 加入softmax层输出 FC2_W = tf.get_variable( 'FC2_W', shape=[self.config.hidden_size, self.config.hidden2_size], initializer=xavier_initializer()) FC2_b = tf.Variable(initial_value=tf.zeros([self.config.hidden2_size]), name='FC2_b') final_outputs = tf.matmul(final_outputs, FC2_W) + FC2_b # 加入softmax层输出 sm_W = tf.get_variable( 'sm_W', shape=[self.config.hidden2_size, self.config.label_size], initializer=xavier_initializer()) sm_b = tf.Variable(initial_value=tf.zeros([self.config.label_size]), name='sm_b') final_outputs = tf.matmul(final_outputs, sm_W) + sm_b return final_outputs
def _build_layers(self, inputs, num_outputs, options): # Parse options image_shape = options["custom_options"]["image_shape"] convs = options.get("conv_filters", [ [16, [8, 8], 4], [32, [5, 5], 3], [32, [5, 5], 2], [512, [10, 10], 1], ]) hiddens = options.get("fcnet_hiddens", [64]) fcnet_activation = options.get("fcnet_activation", "tanh") if fcnet_activation == "tanh": activation = tf.nn.tanh elif fcnet_activation == "relu": activation = tf.nn.relu # Sanity checks image_size = np.product(image_shape) expected_shape = [image_size + 5 + 2] assert inputs.shape.as_list()[1:] == expected_shape, \ (inputs.shape.as_list()[1:], expected_shape) # Reshape the input vector back into its components vision_in = tf.reshape(inputs[:, :image_size], [tf.shape(inputs)[0]] + image_shape) metrics_in = inputs[:, image_size:] print("Vision in shape", vision_in) print("Metrics in shape", metrics_in) # Setup vision layers with tf.name_scope("carla_vision"): for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1): vision_in = slim.conv2d( vision_in, out_size, kernel, stride, scope="conv{}".format(i)) out_size, kernel, stride = convs[-1] vision_in = slim.conv2d( vision_in, out_size, kernel, stride, padding="VALID", scope="conv_out") vision_in = tf.squeeze(vision_in, [1, 2]) # Setup metrics layer with tf.name_scope("carla_metrics"): metrics_in = slim.fully_connected( metrics_in, 64, weights_initializer=xavier_initializer(), activation_fn=activation, scope="metrics_out") print("Shape of vision out is", vision_in.shape) print("Shape of metric out is", metrics_in.shape) # Combine the metrics and vision inputs with tf.name_scope("carla_out"): i = 1 last_layer = tf.concat([vision_in, metrics_in], axis=1) print("Shape of concatenated out is", last_layer.shape) for size in hiddens: last_layer = slim.fully_connected( last_layer, size, weights_initializer=xavier_initializer(), activation_fn=activation, scope="fc{}".format(i)) i += 1 output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") return output, last_layer
def attention_decoder(decoder_inputs: list, initial_state: tf.Tensor, encoder_states, encoder_padding_mask, cell, init_state_attention=False): # args # decoder inputs : list of tensor, shape : [batch, embeding] # initial_state : decoder initial state # encoder_states : encoder hidden states # encoder_padding_mask padding token is marked as 1 otherwise 0 # cell : tf.nn.rnn_cell instance # i with tf.variable_scope("attention_decoder"): batch_size, nsteps, state_size = tf.unstack(tf.shape(encoder_states)) # (batch_size, time_step, 1, state_size ) encoder_states = tf.expand_dims(encoder_states, axis=2) # badhnau attention : v^t *tanh(w_h h_i + w_s * s_t + b) attention_size = state_size w_h = tf.get_variable(shape=[1, 1, state_size, attention_size], name="w_h", initializer=layers.xavier_initializer()) encoder_features = tf.nn.conv2d(encoder_states, w_h, strides=[1, 1, 1, 1], padding="SAME") v = tf.get_variable(shape=[attention_size], initializer=layers.xavier_initializer(), name="v") def attention(decoder_state): with tf.variable_scope("attention"): # shape : [batch_size, attention_size] # W_s * s_t + b decoder_features = linear_layer(decoder_state, attention_size, "decoder_mat") # shape : [batch_size, 1, 1, attention_size] decoder_features = tf.expand_dims( tf.expand_dims(decoder_features, 1), 1) # apply softmax and mask padding def masked_attention(e): # args e: un-normalized attention dist # shape : [batch_size, attention_size] attention_dist = tf.nn.softmax(e) # apply mask to zero padding attention_dist *= encoder_padding_mask masked_sums = tf.reduce_sum(attention_dist, axis=1, keep_dims=True) return attention_dist / masked_sums # encoder features : [batch, max_steps, 1, attention_size] # decoder features : [batch, 1, 1, attention_size] # when two terms are added, broadcast is applied to decoder features e = tf.reduce_sum(v * tf.tanh(encoder_features + decoder_features), axis=[2, 3]) # attention_dist : [batch, max_steps] attention_dist = masked_attention(e) attention_dist = tf.reshape(attention_dist, [batch_size, -1, 1, 1]) context_vector = tf.reduce_sum(attention_dist * encoder_states, axis=[1, 2]) context_vector = tf.reshape(context_vector, [-1, state_size]) return context_vector, attention_dist outputs = [] attention_dists = [] state = initial_state context_vector = tf.zeros([batch_size, attention_size]) if init_state_attention: context_vector = attention(state) # iterate for every time step of decoder hidden state for i, decoder_input in enumerate(decoder_inputs): if i > 0: tf.get_variable_scope().reuse_variables() # concat decoder input and context vector decoder_new_input = linear_layer(tf.concat( [decoder_input, context_vector], axis=1), state_size, scope="projected_context") # run the decoder rnn cell(instance of tf.nn.rnn_cell) cell_output, state = cell(decoder_new_input, state) if i == 0 and init_state_attention: tf.get_variable_scope().reuse_variables() context_vector, attention_dist = attention(state) else: context_vector, attention_dist = attention(state) attention_dists.append(attention_dist) # concat context vector and decoder hidden state # and multiply V with tf.variable_scope("attention_projection") as scope: output = linear_layer( tf.concat([cell_output, context_vector], axis=1), state_size, scope) outputs.append(output) return outputs, state, attention_dist
def weight_variable(name, shape, regularization=None): regularizer = None if regularization is not None: regularizer = l2_regularizer(1e-5) return tf.get_variable(name, shape=shape, initializer=xavier_initializer(), regularizer=regularizer)
convResult = 512 fc1Units = 256 fc2Units = 128 keep_prob = 0.7 margin = 0.5 learning_rate = 0.01 isTrain = True graph = tf.Graph() with graph.as_default(): WC1 = tf.get_variable('WC1', [filterSize, filterSize, channels, filter1], tf.float32, lays.xavier_initializer()) bC1 = tf.get_variable('bC1', [filter1], tf.float32, tf.zeros_initializer()) WC2 = tf.get_variable('WC2', [filterSize, filterSize, filter1, filter2], tf.float32, lays.xavier_initializer()) bC2 = tf.get_variable('bC2', [filter2], tf.float32, tf.zeros_initializer()) WC3 = tf.get_variable('WC3', [filterSize, filterSize, filter2, filter3], tf.float32, lays.xavier_initializer()) bC3 = tf.get_variable('bC3', [filter3], tf.float32, tf.zeros_initializer()) WC4 = tf.get_variable('WC4', [filterSize, filterSize, filter3, filter4], tf.float32, lays.xavier_initializer()) bC4 = tf.get_variable('bC4', [filter4], tf.float32, tf.zeros_initializer()) WC5 = tf.get_variable('WC5', [filterSize, filterSize, filter4, filter5],
def _build_model(self): gru_outputs = [] self.add_placeholder() # get auto-regression with tf.variable_scope("inputs"): self.input = tf.reshape( self.input_x, [-1, self.config.nsteps * self.config.nfeatures]) #pred = self.mlp_net(self.input, hidden_layers, 1, name="mlp_net") self.input = tf.layers.dense(self.input, 32, None, use_bias=True) #self.input = tf.layers.dense(self.input, 6, None, use_bias = True) result = tf.squeeze( tf.layers.dense(self.input, 1, None, use_bias=False)) with tf.variable_scope("short_term"): conv = self.conv1d(self.input_x, self.config.kernel_sizes, self.config.num_filters, scope="short_term") gru_outputs = self.gru(conv, scope="short_gru") # [b, t, d] print(gru_outputs) context = self.temporal_attention(gru_outputs) # [b, d] last_hidden_states = gru_outputs[:, -1, :] # [b, d] linear_inputs = tf.concat([context, last_hidden_states], axis=1) # prediction and loss result_ = tf.layers.dense( linear_inputs, 1, activation=tf.nn.tanh, use_bias=True, kernel_regularizer=self.regularizer, kernel_initializer=layers.xavier_initializer()) self.predictions = result + result_ #self.predictions = tf.squeeze(tf.layers.dense(ar, 1)) self.loss = tf.losses.mean_squared_error(labels=self.targets, predictions=self.predictions) error = tf.reduce_sum((self.targets - self.predictions)**2)**0.5 denom = tf.reduce_sum( (self.targets - tf.reduce_mean(self.targets))**2)**0.5 self.rmse = tf.sqrt( tf.reduce_mean( tf.square(tf.subtract(self.targets, self.predictions)))) self.rse = error / denom self.mae = tf.reduce_mean(tf.abs(self.targets - self.predictions)) self.mape = tf.reduce_mean( tf.abs((self.targets - self.predictions) / self.targets)) self.smape = tf.reduce_mean( 2 * tf.abs(self.targets - self.predictions) / (tf.abs(self.targets) + tf.abs(self.predictions))) ''' if self.config.l2_lambda > 0: reg_vars = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) reg_term = layers.apply_regularization(self.regularizer, reg_vars) self.loss += reg_term ''' #self.loss += ar_loss #self.loss = ar_loss self.add_train_op() self.initialize_session()
def create_weights(self, name, shape): w = tf.get_variable(name, shape=shape, initializer=xavier_initializer()) self.weights.append(w) return w
def _init(self, inputs, num_outputs, options): # Parse options image_shape = options["custom_options"]["image_shape"] convs = options.get("conv_filters", [ [16, [8, 8], 4], [32, [5, 5], 3], [32, [5, 5], 2], [512, [10, 10], 1], ]) hiddens = options.get("fcnet_hiddens", [64]) fcnet_activation = options.get("fcnet_activation", "tanh") if fcnet_activation == "tanh": activation = tf.nn.tanh elif fcnet_activation == "relu": activation = tf.nn.relu # Sanity checks image_size = np.product(image_shape) expected_shape = [image_size + 5 + 2] assert inputs.shape.as_list()[1:] == expected_shape, \ (inputs.shape.as_list()[1:], expected_shape) # Reshape the input vector back into its components vision_in = tf.reshape(inputs[:, :image_size], [tf.shape(inputs)[0]] + image_shape) metrics_in = inputs[:, image_size:] print("Vision in shape", vision_in) print("Metrics in shape", metrics_in) # Setup vision layers with tf.name_scope("carla_vision"): for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1): vision_in = slim.conv2d(vision_in, out_size, kernel, stride, scope="conv{}".format(i)) out_size, kernel, stride = convs[-1] vision_in = slim.conv2d(vision_in, out_size, kernel, stride, padding="VALID", scope="conv_out") vision_in = tf.squeeze(vision_in, [1, 2]) # Setup metrics layer with tf.name_scope("carla_metrics"): metrics_in = slim.fully_connected( metrics_in, 64, weights_initializer=xavier_initializer(), activation_fn=activation, scope="metrics_out") print("Shape of vision out is", vision_in.shape) print("Shape of metric out is", metrics_in.shape) # Combine the metrics and vision inputs with tf.name_scope("carla_out"): i = 1 last_layer = tf.concat([vision_in, metrics_in], axis=1) print("Shape of concatenated out is", last_layer.shape) for size in hiddens: last_layer = slim.fully_connected( last_layer, size, weights_initializer=xavier_initializer(), activation_fn=activation, scope="fc{}".format(i)) i += 1 output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") return output, last_layer
def __call__(self, noisy_w, units, is_ref, spk=None): """ Build the graph propagating (noisy_w) --> x On first pass will make variables. """ segan = self.segan def make_z(shape, mean=0., std=1., name='z'): if is_ref: with tf.variable_scope(name) as scope: z_init = tf.random_normal_initializer(mean=mean, stddev=std) z = tf.get_variable("z", shape, initializer=z_init, trainable=False ) if z.device != "/device:GPU:0": # this has to be created into gpu0 print('z.device is {}'.format(z.device)) assert False else: z = tf.random_normal(shape, mean=mean, stddev=std, name=name, dtype=tf.float32) return z if hasattr(segan, 'generator_built'): tf.get_variable_scope().reuse_variables() make_vars = False else: make_vars = True print('*** Building Generator ***') in_dims = noisy_w.get_shape().as_list() h_i = noisy_w if len(in_dims) == 2: h_i = tf.expand_dims(noisy_w, -1) elif len(in_dims) < 2 or len(in_dims) > 3: raise ValueError('Generator input must be 2-D or 3-D') kwidth = 3 z = make_z([segan.batch_size, h_i.get_shape().as_list()[1], segan.g_enc_depths[-1]]) h_i = tf.concat([h_i, z], 2) skip_out = True skips = [] for block_idx, dilation in enumerate(segan.g_dilated_blocks): name = 'g_residual_block_{}'.format(block_idx) if block_idx >= len(segan.g_dilated_blocks) - 1: skip_out = False if skip_out: res_i, skip_i = residual_block(h_i, dilation, kwidth, num_kernels=32, bias_init=None, stddev=0.02, do_skip = True, name=name) else: res_i = residual_block(h_i, dilation, kwidth, num_kernels=32, bias_init=None, stddev=0.02, do_skip = False, name=name) # feed the residual output to the next block h_i = res_i if segan.keep_prob < 1: print('Adding dropout w/ keep prob {} ' 'to G'.format(segan.keep_prob)) h_i = tf.nn.dropout(h_i, segan.keep_prob_var) if skip_out: # accumulate the skip connections skips.append(skip_i) else: # for last block, the residual output is appended skips.append(res_i) print('Amount of skip connections: ', len(skips)) # TODO: last pooling for actual wave with tf.variable_scope('g_wave_pooling'): skip_T = tf.stack(skips, axis=0) skips_sum = tf.reduce_sum(skip_T, axis=0) skips_sum = leakyrelu(skips_sum) wave_a = conv1d(skips_sum, kwidth=1, num_kernels=1, init=tf.truncated_normal_initializer(stddev=0.02)) wave = tf.layers.dense(wave_a, units, kernel_initializer=xavier_initializer()) # wave = tf.tanh(wave_a) # segan.gen_wave_summ = histogram_summary('gen_wave', wave) print('Last residual wave shape: ', res_i.get_shape()) print('*************************') segan.generator_built = True return wave, z
def __init__(self, corpus, n_filters=(128, 256), filter_width=3, token_embeddings_dim=128, char_embeddings_dim=50, use_char_embeddins=True, pretrained_model_filepath=None, embeddings_dropout=False, dense_dropout=False, use_batch_norm=False, logging=False, use_crf=False, net_type='cnn', char_filter_width=5, verbouse=True, use_capitalization=False, concat_embeddings=False, cell_type=None): tf.reset_default_graph() n_tags = len(corpus.tag_dict) n_tokens = len(corpus.token_dict) n_chars = len(corpus.char_dict) embeddings_onethego = not concat_embeddings and \ corpus.embeddings is not None and \ not isinstance(corpus.embeddings, dict) # Create placeholders if embeddings_onethego: x_word = tf.placeholder(dtype=tf.float32, shape=[None, None, corpus.embeddings.vector_size], name='x_word') else: x_word = tf.placeholder(dtype=tf.int32, shape=[None, None], name='x_word') if concat_embeddings: x_emb = tf.placeholder(dtype=tf.float32, shape=[None, None, corpus.embeddings.vector_size], name='x_word') x_char = tf.placeholder(dtype=tf.int32, shape=[None, None, None], name='x_char') y_true = tf.placeholder(dtype=tf.int32, shape=[None, None], name='y_tag') mask = tf.placeholder(dtype=tf.float32, shape=[None, None], name='mask') x_capi = tf.placeholder(dtype=tf.float32, shape=[None, None], name='x_capi') # Auxiliary placeholders learning_rate_ph = tf.placeholder(dtype=tf.float32, shape=[], name='learning_rate') dropout_ph = tf.placeholder_with_default(1.0, shape=[]) training_ph = tf.placeholder_with_default(False, shape=[]) learning_rate_decay_ph = tf.placeholder(dtype=tf.float32, shape=[], name='learning_rate_decay') # Embeddings if not embeddings_onethego: with tf.variable_scope('Embeddings'): w_emb = embedding_layer(x_word, n_tokens=n_tokens, token_embedding_dim=token_embeddings_dim) if use_char_embeddins: c_emb = character_embedding_network(x_char, n_characters=n_chars, char_embedding_dim=char_embeddings_dim, filter_width=char_filter_width) emb = tf.concat([w_emb, c_emb], axis=-1) else: emb = w_emb else: emb = x_word if concat_embeddings: emb = tf.concat([emb, x_emb], axis=2) if use_capitalization: cap = tf.expand_dims(x_capi, 2) emb = tf.concat([emb, cap], axis=2) # Dropout for embeddings if embeddings_dropout: emb = tf.layers.dropout(emb, dropout_ph, training=training_ph) if 'cnn' in net_type.lower(): # Convolutional network with tf.variable_scope('ConvNet'): units = stacked_convolutions(emb, n_filters=n_filters, filter_width=filter_width, use_batch_norm=use_batch_norm, training_ph=training_ph) elif 'rnn' in net_type.lower(): if cell_type is None or cell_type not in {'lstm', 'gru'}: raise RuntimeError('You must specify the type of the cell! It could be either "lstm" or "gru"') units = stacked_rnn(emb, n_filters, cell_type=cell_type) elif 'cnn_highway' in net_type.lower(): units = highway_convolutional_network(emb, n_filters=n_filters, filter_width=filter_width, use_batch_norm=use_batch_norm, training_ph=training_ph) else: raise KeyError('There is no such type of network: {}'.format(net_type)) # Classifier with tf.variable_scope('Classifier'): logits = tf.layers.dense(units, n_tags, kernel_initializer=xavier_initializer()) if use_crf: sequence_lengths = tf.reduce_sum(mask, axis=1) log_likelihood, trainsition_params = tf.contrib.crf.crf_log_likelihood(logits, y_true, sequence_lengths) loss_tensor = -log_likelihood predictions = None else: ground_truth_labels = tf.one_hot(y_true, n_tags) loss_tensor = tf.nn.softmax_cross_entropy_with_logits(labels=ground_truth_labels, logits=logits) loss_tensor = loss_tensor * mask predictions = tf.argmax(logits, axis=-1) loss = tf.reduce_mean(loss_tensor) # Initialize session sess = tf.Session() if verbouse: self.print_number_of_parameters() if logging: self.train_writer = tf.summary.FileWriter('summary', sess.graph) self._use_crf = use_crf self.summary = tf.summary.merge_all() self._learning_rate_decay_ph = learning_rate_decay_ph self._x_w = x_word self._x_c = x_char self._y_true = y_true self._y_pred = predictions if concat_embeddings: self._x_emb = x_emb if use_crf: self._logits = logits self._trainsition_params = trainsition_params self._sequence_lengths = sequence_lengths self._learning_rate_ph = learning_rate_ph self._dropout = dropout_ph self._loss = loss self._sess = sess self.corpus = corpus self._loss_tensor = loss_tensor self._use_dropout = True if embeddings_dropout or dense_dropout else None self._training_ph = training_ph self._logging = logging # Get training op self._train_op = self.get_train_op(loss, learning_rate_ph, lr_decay_rate=learning_rate_decay_ph) self._embeddings_onethego = embeddings_onethego self.verbouse = verbouse sess.run(tf.global_variables_initializer()) self._mask = mask if use_capitalization: self._x_capi = x_capi self._use_capitalization = use_capitalization self._concat_embeddings = concat_embeddings if pretrained_model_filepath is not None: self.load(pretrained_model_filepath)
def build_AFM_model_only_word(self , rnn_units = 150 , training = True): with tf.variable_scope('placeholder'): self.input_q = tf.placeholder(tf.int32, [None, None], name='input_q') # placeholder只存储一个batch的数据 self.input_r = tf.placeholder(tf.int32, [None, None], name='input_r') # placeholder只存储一个batch的数据 self.q_sequence_len = tf.placeholder(tf.int32, [None], name='q_sequence_len') self.r_sequence_len = tf.placeholder(tf.int32, [None], name='r_sequence_len') self.input_y = tf.placeholder(tf.float32, [None], name='input_y') self.embedding_ph = tf.placeholder(tf.float32, shape=(self.total_words, self.word_embedding_size)) self.keep_prob = tf.placeholder(tf.float32, name='keep_prob') with tf.variable_scope('word_embedding'): word_embeddings = tf.get_variable('word_embeddings_v', shape=(self.total_words, self. word_embedding_size), dtype=tf.float32, trainable=True) # 我发现这个任务embedding设为trainable很重要 self.embedding_init = word_embeddings.assign(self.embedding_ph) q_embedding = tf.nn.embedding_lookup(word_embeddings, self.input_q) r_embedding = tf.nn.embedding_lookup(word_embeddings, self.input_r) with tf.variable_scope('first_encodeing'): GRU_fw = rnn.GRUCell(rnn_units, kernel_initializer=tf.orthogonal_initializer(), name='forwardCell') GRU_fw = tf.nn.rnn_cell.DropoutWrapper(GRU_fw, output_keep_prob=self.keep_prob) GRU_bw = rnn.GRUCell(rnn_units, kernel_initializer=tf.orthogonal_initializer(), name='backwordCell') GRU_bw = tf.nn.rnn_cell.DropoutWrapper(GRU_bw, output_keep_prob=self.keep_prob) q_gru, q_last_state = tf.nn.bidirectional_dynamic_rnn(GRU_fw, GRU_bw, q_embedding, sequence_length=self.q_sequence_len, dtype=tf.float32) r_gru, r_last_state = tf.nn.bidirectional_dynamic_rnn(GRU_fw, GRU_bw, r_embedding, sequence_length=self.r_sequence_len, dtype=tf.float32) q_gru = tf.concat(q_gru, 2) r_gru = tf.concat(r_gru, 2) #start building blocks 论文原文中是多层block stack起来 with tf.variable_scope("cross_attention_fusion"): with tf.variable_scope("word_level"): #cross attention # Att[i,j] = qi * W * rj + uq * qi + ur * rj attention_weight = tf.get_variable(name="attention_weight",shape=(rnn_units * 2 , rnn_units * 2),dtype=tf.float32, initializer=xavier_initializer()) attention_vector_q = tf.get_variable(name='attention_vector_q',shape=(rnn_units * 2 , 1)) attention_vector_r = tf.get_variable(name="attention_vector_r",shape=(rnn_units * 2 , 1)) A = tf.matmul( tf.tensordot(q_gru, attention_weight, axes=(2, 0)) , tf.transpose(r_gru, perm=(0, 2, 1))) \ + tf.tensordot(q_gru , attention_vector_q , axes=(2,0)) \ + tf.transpose(tf.tensordot(r_gru, attention_vector_r, axes=(2 , 0)), perm = (0,2,1)) #A of the shape(batch , q , r) atted_q = tf.matmul( tf.nn.softmax(A) , r_gru) atted_r = tf.matmul( tf.nn.softmax( tf.transpose(A , perm=(0,2,1))) , q_gru) #fusion for cross attention fused_q = tf.concat([q_gru,atted_q,q_gru - atted_q, q_gru * atted_q ] , axis = 2) fused_r = tf.concat([r_gru,atted_r,r_gru - atted_r, r_gru * atted_r ] , axis = 2) fused_q = fully_connected(fused_q , rnn_units * 2 , activation_fn=tf.nn.relu) fused_r = fully_connected(fused_r, rnn_units * 2, activation_fn=tf.nn.relu) GRU_fw = rnn.GRUCell(rnn_units, kernel_initializer=tf.orthogonal_initializer(), name='forwardCell') GRU_fw = tf.nn.rnn_cell.DropoutWrapper(GRU_fw, output_keep_prob=self.keep_prob) GRU_bw = rnn.GRUCell(rnn_units, kernel_initializer=tf.orthogonal_initializer(), name='backwordCell') GRU_bw = tf.nn.rnn_cell.DropoutWrapper(GRU_bw, output_keep_prob=self.keep_prob) fused_q, q_last_state = tf.nn.bidirectional_dynamic_rnn(GRU_fw, GRU_bw, fused_q, sequence_length=self.q_sequence_len, dtype=tf.float32) fused_r, r_last_state = tf.nn.bidirectional_dynamic_rnn(GRU_fw, GRU_bw, fused_r, sequence_length=self.r_sequence_len, dtype=tf.float32) fused_q = tf.concat(fused_q, 2) fused_r = tf.concat(fused_r, 2) with tf.variable_scope("self_attention_fusion"): with tf.variable_scope("word_level"): # self attention Sq = tf.matmul(fused_q, fused_q, transpose_b=True) # batch , q, q Sr = tf.matmul(fused_r, fused_r, transpose_b=True) # batch , r ,r Sq = tf.nn.softmax(Sq) Sr = tf.nn.softmax(Sr) Hq = tf.matmul(Sq, fused_q) Hr = tf.matmul(Sr, fused_r) # fusion for self attention fusedS_q = tf.concat([fused_q, Hq, fused_q - Hq, fused_q * Hq], axis=- 1) fusedS_r = tf.concat([fused_r, Hr, fused_r - Hr, fused_r * Hr], axis=- 1) fusedS_q = fully_connected(fusedS_q, rnn_units * 2, activation_fn=tf.nn.relu) fusedS_r = fully_connected(fusedS_r, rnn_units * 2, activation_fn=tf.nn.relu) GRU_fw = rnn.GRUCell(rnn_units, kernel_initializer=tf.orthogonal_initializer(), name='forwardCell') GRU_fw = tf.nn.rnn_cell.DropoutWrapper(GRU_fw, output_keep_prob=self.keep_prob) GRU_bw = rnn.GRUCell(rnn_units, kernel_initializer=tf.orthogonal_initializer(), name='backwordCell') GRU_bw = tf.nn.rnn_cell.DropoutWrapper(GRU_bw, output_keep_prob=self.keep_prob) fusedS_q, q_last_state = tf.nn.bidirectional_dynamic_rnn(GRU_fw, GRU_bw, fusedS_q, sequence_length=self.q_sequence_len, dtype=tf.float32) fusedS_r, r_last_state = tf.nn.bidirectional_dynamic_rnn(GRU_fw, GRU_bw, fusedS_r, sequence_length=self.r_sequence_len, dtype=tf.float32) fusedS_q = tf.concat(fusedS_q, 2) fusedS_r = tf.concat(fusedS_r, 2) with tf.variable_scope("output"): Vqmean = tf.reduce_mean(fusedS_q, axis = 1) Vqmax = tf.reduce_max(fusedS_q, axis = 1) Vrmean = tf.reduce_mean(fusedS_r, axis = 1) Vrmax = tf.reduce_max(fusedS_r, axis = 1) self.final_matching_vector = tf.concat([Vqmean, Vqmax, Vrmean, Vrmax], axis=-1) temp = tf.layers.dense(self.final_matching_vector,rnn_units,activation=tf.nn.tanh, kernel_initializer=tf.contrib.layers.xavier_initializer(), kernel_regularizer=tf.contrib.layers.l2_regularizer(self.l2), bias_regularizer=tf.contrib.layers.l2_regularizer(self.l2), ) logits = tf.layers.dense(temp, 2, kernel_initializer=tf.contrib.layers.xavier_initializer(), kernel_regularizer=tf.contrib.layers.l2_regularizer(self.l2), bias_regularizer=tf.contrib.layers.l2_regularizer(self.l2), name='output') self.y_pred = tf.nn.softmax(logits) #[batch_size , 2] self.y_score = self.y_pred[:,1] self.class_label_pred = tf.argmax(self.y_pred, 1) # 预测类别 with tf.variable_scope('optimze'): # self.total_loss = tf.reduce_mean( # tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y, logits=logits)) # log(1 + (pred_y - real_y)^2) self.total_loss = tf.log(1.0 + tf.square(self.y_score - self.input_y)) tf.summary.scalar('loss', self.total_loss) optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) self.train_op = optimizer.minimize(self.total_loss) if training: i = 0 while os.path.exists('./charSlice' + str(i)): i += 1 os.makedirs('./charSlice' + str(i)) return './charSlice' + str(i)
def __linear(self, input, kernel_shape, name): weights = tf.get_variable("W"+name, kernel_shape, initializer=xavier_initializer()) return tf.matmul(input, weights)