def deconv(x, filter_size, stride_width, stride_height, feature_out, scope="deconv2d", log_device_placement=True): with tf.variable_scope(scope): w = tf.get_variable( "w", [filter_size, filter_size, feature_out[-1], x.get_shape()[-1]], initializer=tf.truncated_normal_initializer(stddev=0.02)) b = tf.get_variable("b", [feature_out[-1]], initializer=tf.constant_intializer(0.0)) deconv = tf.nn.conv2d_transpose( x, w, strides=[1, stride_width, stride_height, 1], output_shape=feature_out) + b return deconv
def train(): """ """ with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.get_variable('global_step', [], initializer=tf.constant_intializer(0), trainable=False) num_batches_per_epoch = cifar10.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / batch_size decay_steps = int(num_batches_per_epoch * cifar10.NUM_EPOCHS_PER_DECAY) lr = tf.train.exponential_decay(cifar10.INITIAL_LEAARNING_RATE, global_step, decay_steps, cifar10.LEARNING_RATE_DECAY_FACTOR, staircase=True) opt = tf.train.GradientDescentOptimizer(lr) tower_grads = [] for i in range(num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope: loss = tower_loss(scope) tf.get_variable_scope().reuse_variables() grads = opt.compute_gradients(loss) tower_grads.append(grads) grads = average_gradients(tower_grads) # 更新模型参数 apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) pass
def highwaynet(input): g = tf.layers.dense(inputs=input, units=128, activation=tf.nn.sigmoid, bias_initializer=tf.constant_intializer(-1.0)) r = tf.layers.dense(inputs=input, units=128, activation=tf.nn.relu) return g * r + input * (1.0 - g)
def capsnet(inputs): ''' Construct a 3-layer capsule net with 28x28 inputs. ''' ## Layer 1 is a regular convulution. We blow 1 channel up into 256 channels. with tf.variable_scope('conv1') as scope: kernel = _get_kernel('weights', [9, 9, 1, 256], stddev=5e-2, reg=0.0) conv = tf.nn.conv2d(inputs, kernel, [1, 1, 1, 1], padding='VALID') biases = tf.get_variable('biases', [256], initializer=tf.constant_initializer(0.0)) pre_act = tf.nn.bias_add(conv, biases) conv1 = tf.nn.relu(pre_act, name=scope.name) ## Layer 2 is the first capsule layer. It amounts to 32 parallel convolutions from 256 channels ## down to 8 channels. Each of these 32 conv layers contains (width) * (height) capsules of length 8. ## The output of the layer is a [width * height * 32] * 8 matrix. Each of the [width * height * 32] rows ## represents a capsule. capsules1 = tf.zeros((0, 8)) with tf.variable_scope('primary_caps' + str(i)) as scope: for i in range(0, 32): kernel = _get_kernel('weights' + str(i), [9, 9, 256, 8], stddev=5e-2, reg=0.0) conv = tf.nn.conv2d(conv1, kernel, [1, 2, 2, 1], padding='VALID') biases = tf.get_variable('biases' + str(i), [8], initializer=tf.constant_initializer(0.0)) pre_act = tf.nn.bias_add(conv, biases) conv2 = tf.nn.relu(pre_act, name=scope.name) shaped = tf.reshape(conv2, [36, 8]) capsules1 = tf.concat([capsules1, shaped], 0) with tf.variable_scope('coupling') as scope: priors = tf.get_variable('priors', shape=[capsules1.shape[0], 10], initializer=tf.constant_initializer(0.0)) coupling_coeffs = tf.softmax(priors) with tf.variable_scope('secondary_caps'): for j in range(0, NUM_CLASSES): routes_into_j = [] for i in range(0, capsules1.shape[0]): W_ij = _get_tn_var('weights_' + str(i) + str(j), shape=[16, 8], stddev=0.04, reg=0.004) b_ij = tf.get_variable('biases_' + str(i) + str(j), [16], initializer=tf.constant_intializer(0.0)) uhat = tf.add(tf.matmul(W_ij, capsules1[i]), b_ij) # \times c_i routes_into_j.append(tf.scalar_mul(coupling_coeffs[i, j], uhat)) s_j = tf.reduce_sum(routes_into_j)
def text_cnn(inputs, filter_sizes, num_filters, embedding_size, sequence_length, dropout_keep_prob=1.0): # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.variable_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] # Create variable named "weights". weights = tf.get_variable( "weights", filter_shape, initializer=tf.random_normal_initializer()) # Create variable named "biases". biases = tf.get_variable("biases", [num_filters], initializer=tf.constant_intializer(0.0)) conv = tf.nn.conv2d(inputs, weights, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, biases), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) h_pool = tf.concat(pooled_outputs, 3) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) # Add dropout with tf.name_scope("dropout"): h_drop = tf.nn.dropout(h_pool_flat, dropout_keep_prob) return h_drop
def _embed(self): """ The embedding layer, question and passage share embeddings """ with tf.device('/cpu:0'), tf.variable_scope('word_embedding'): self.word_embeddings = tf.get_variable( 'word_embeddings', shape=(self.vocab.size(), self.vocab.embed_dim), initializer=tf.constant_initializer(self.vocab.embeddings), trainable=True) self.p_emb = tf.nn.embedding_lookup(self.word_embeddings, self.p) self.q_emb = tf.nn.embedding_lookup(self.word_embeddings, self.q) #self.p_emb = tf.concat([self.p_emb , self.p_allennlp],axis = -1) #self.q_emb = tf.concat([self.q_emb , self.q_allennlp],axis = -1) self.char_embeddings = tf.get_variable( 'char_embeddings', shape=(self.vocab.char_size(), self.vocab.char_embed_dim), initializer=tf.constant_intializer(self.vocab.char_embeddings)) self.p_char_emb = tf.nn.embedding_lookup(self.char_embeddings, self.p_char) self.q_char_emb = tf.nn.embedding_lookup(self.char_embeddings, self.q_char)
def _load_embeddings(embedding_fname): with open(embedding_fname, 'rb') as pe_f: pretrained_embed = pickle.load(pe_f) return pretrained_embed.shape, tf.constant_intializer(pretrained_embed)