def mlp_param_init(dim, scheme = 'zero'): """ @note: Initializes parameters to build a multi-layer perceptron with tensorflow. The shapes are: W1: [n1, n_x] B1: [n1, 1] W2: [n2, n1] B2: [n2, 1] ... Wl: [n_y, nl-1] Bl: [n_y, 1] @param dim: the number of unit in each level -- dim = [n_x, n1, n2, ..., n(l-1), n_y] @param scheme: the initial scheme of Weight, including {'zero', 'xavier'} @return: parameters -- a dictionary of tensors containing W1, b1, W2, b2, W3, b3 """ parameters = {} l = len(dim) # the layers' count # parameter initializing (using xavier_initializer for weight) # (from 0 - input to l-1 - output) for i in range(1, l): if scheme == 'xavier': parameters['W'+str(i)] = tf.get_variable('W'+str(i), [dim[i], dim[i-1]], \ initializer = tf.contrib.layers.xavier_initializer()) else: parameters['W'+str(i)] = tf.get_variable('W'+str(i), [dim[i], dim[i-1]], \ initializer = tf.zeros_initializer()) parameters['B'+str(i)] = tf.get_variable('B'+str(i), [dim[i], 1], \ initializer = tf.zeros_initializer()) return parameters
def conv2d_zeros(x, width, filter_size=[3, 3], stride=[1, 1], pad="SAME", logscale_factor=3, skip=1, edge_bias=True, name=None): with tf.variable_scope(name, "conv2d"): if edge_bias and pad == "SAME": x = add_edge_padding(x, filter_size) pad = 'VALID' n_in = int(x.get_shape()[3]) stride_shape = [1] + stride + [1] filter_shape = filter_size + [n_in, width] w = tf.get_variable("W", filter_shape, tf.float32, initializer=tf.zeros_initializer()) if skip == 1: x = tf.nn.conv2d(x, w, stride_shape, pad, data_format='NHWC') else: assert stride[0] == 1 and stride[1] == 1 x = tf.nn.atrous_conv2d(x, w, skip, pad) x += tf.get_variable("b", [1, 1, 1, width], initializer=tf.ones_initializer()) x *= tf.exp(tf.get_variable("logs", [1, width], initializer=tf.zeros_initializer()) * logscale_factor) return x
def Discriminator_with_Vanilla(input_Pattern, hidden_Unit_Size = 128, label_Unit_Size = 10, is_Training = True, reuse = False): with tf.variable_scope('discriminator', reuse=reuse): hidden_Activation = tf.layers.dense( inputs = input_Pattern, units = hidden_Unit_Size, activation = tf.nn.relu, use_bias = True, kernel_initializer = tf.truncated_normal_initializer(stddev=0.1), bias_initializer = tf.zeros_initializer(), name = "hidden" ) discrimination_Logits = tf.layers.dense( inputs = hidden_Activation, units = 1, activation = None, use_bias = True, kernel_initializer = tf.truncated_normal_initializer(stddev=0.1), bias_initializer = tf.zeros_initializer(), name = "discrimination" ) discrimination_Activation = tf.nn.sigmoid(discrimination_Logits); label_Logits = tf.layers.dense( inputs = hidden_Activation, units = label_Unit_Size, activation = None, use_bias = True, kernel_initializer = tf.truncated_normal_initializer(stddev=0.1), bias_initializer = tf.zeros_initializer(), name = "label" ) label_Activation = tf.nn.softmax(label_Logits); return discrimination_Logits, label_Logits, discrimination_Activation, label_Activation;
def initialize_parameters(): """ Initializes parameters to build a neural network with tensorflow. The shapes are: W1 : [n_hidden_1, n_input] b1 : [n_hidden_1, 1] W2 : [n_hidden_2, n_hidden_1] b2 : [n_hidden_2, 1] W3 : [n_classes, n_hidden_2] b3 : [n_classes, 1] """ tf.set_random_seed(42) # First hidden layer W1 = tf.get_variable("W1", [n_hidden_1, n_input], initializer=tf.contrib.layers.xavier_initializer(seed=42)) b1 = tf.get_variable("b1", [n_hidden_1, 1], initializer=tf.zeros_initializer()) # Second hidden layer W2 = tf.get_variable("W2", [n_hidden_2, n_hidden_1], initializer=tf.contrib.layers.xavier_initializer(seed=42)) b2 = tf.get_variable("b2", [n_hidden_2, 1], initializer=tf.zeros_initializer()) # Output layer W3 = tf.get_variable("W3", [n_classes, n_hidden_2], initializer=tf.contrib.layers.xavier_initializer(seed=42)) b3 = tf.get_variable("b3", [n_classes, 1], initializer=tf.zeros_initializer()) # Store initializations as a dictionary of parameters parameters = { "W1": W1, "b1": b1, "W2": W2, "b2": b2, "W3": W3, "b3": b3 } return parameters
def query_encoder(self, v_q, is_training=True, scope="query_encoder"): """Encode query image feature Args: v_q: query image feature (batch_size, img_dim) is_training: True - training model / False - inference model Returns: phi_q: query vector v_qr: reconstructed v_q """ with tf.variable_scope(scope): h1 = tf.contrib.layers.fully_connected(inputs=v_q, num_outputs=256, activation_fn=tf.nn.tanh, weights_regularizer=tf.contrib.layers.l2_regularizer(self.weight_decay), biases_initializer=tf.zeros_initializer()) phi_q = tf.contrib.layers.fully_connected(inputs=h1, num_outputs=128, activation_fn=tf.nn.tanh, weights_regularizer=tf.contrib.layers.l2_regularizer(self.weight_decay), biases_initializer=tf.zeros_initializer()) h2 = tf.contrib.layers.fully_connected(inputs=phi_q, num_outputs=256, activation_fn=tf.nn.tanh, weights_regularizer=tf.contrib.layers.l2_regularizer(self.weight_decay), biases_initializer=tf.zeros_initializer()) v_qr = tf.contrib.layers.fully_connected(inputs=h2, num_outputs=self.img_dim, activation_fn=tf.nn.tanh, weights_regularizer=tf.contrib.layers.l2_regularizer(self.weight_decay), biases_initializer=tf.zeros_initializer()) return phi_q, v_qr
def auxnet(embedding, size, dropout_rate=.5, std=.2, is_training=True, scope='auxnet'): """ Defines the fully connected layers for the auxnet: -- so far, one layer to batch norm to relu to dropout Args: embedding: the histogram embedding matrix size: int size of each hidden layer dropout_rate: rate to dropout (usually .5) std: standard deviation used for initilizer is_training: bool--used to turn off dropout for inference scope: name the op/tensor Returns: fc: the fully connected network as a tensor of size (pxsize) """ # make lower/upper for uniform init a,b = 0 - np.sqrt(3)*std, np.sqrt(3)*std with tf.variable_scope(scope,'Aux'): # notes: if you use dropout and batchnorm no need for regularizer with slim.arg_scope([slim.fully_connected], weights_initializer = tf.random_uniform_initializer(minval=a,maxval=b), #weights_initializer = tf.truncated_normal_initializer(std), weights_regularizer = slim.l2_regularizer(.005), activation_fn=tf.nn.relu): """ net = slim.fully_connected(embedding, size, scope='hidden') net = slim.dropout(net, dropout_rate, is_training=is_training, scope='dropout') net= slim.fully_connected(net, size, scope='output', activation_fn=None) """ fc = slim.fully_connected(embedding, size, biases_initializer=tf.zeros_initializer(), activation_fn=None, #tf.nn.relu, scope='hidden') #tf.summary.histogram('beforebn/%s' % scope, fc, collections=['train']) fc = slim.batch_norm(fc, center=True, scale=True, zero_debias_moving_mean=True, is_training=is_training, scope='bn') # mod option: add another layer here: fc = tf.nn.relu(fc, 'relu') # now apply the dropout: fc = slim.dropout(fc, dropout_rate, is_training=is_training, scope='dropout') # add another layer: fc = slim.fully_connected(fc, size, biases_initializer=tf.zeros_initializer(), activation_fn=tf.nn.tanh, scope="hidden2") #tf.summary.histogram('activations/auxnet/%s' % scope, fc, collections=['train']) return fc
def create_slots(self, var): """Create the factorized Adam accumulators for diet variables.""" params = self.params shape = var.get_shape().as_list() if not hasattr(params, "slots"): params.slots = defaultdict(dict) name = var.op.name slots = params.slots[name] if params.factored_second_moment_accumulator and len(shape) == 2: slots["adam_vr"] = tf.get_variable( name + "_adam_vr", [shape[0], 1], trainable=False, initializer=tf.zeros_initializer()) slots["adam_vc"] = tf.get_variable( name + "_adam_vc", [1, shape[1]], trainable=False, initializer=tf.zeros_initializer()) else: slots["adam_v"] = tf.get_variable( name + "_adam_v", shape, trainable=False, initializer=tf.zeros_initializer()) if params.beta1 != 0.0: slots["adam_m"] = tf.get_variable( name + "_adam_m", shape, trainable=False, initializer=tf.zeros_initializer())
def basic_fc_discriminator(x): """Compute discriminator score for a batch of input images. Inputs: - x: TensorFlow Tensor of flattened input images, shape [batch_size, 784] Returns: TensorFlow Tensor with shape [batch_size, 1], containing the score for an image being real for each input image. """ with tf.variable_scope("bfcdiscriminator"): W1 = tf.get_variable("W1", (784, 256)) b1 = tf.get_variable("b1", (256, ), initializer=tf.zeros_initializer()) W2 = tf.get_variable("W2", (256, 256)) b2 = tf.get_variable("b2", (256, ), initializer=tf.zeros_initializer()) W3 = tf.get_variable("W3", (256, 1), ) b3 = tf.get_variable("b3", (1, ), initializer=tf.zeros_initializer()) H1 = tf.matmul(x, W1) + b1 H1L = leaky_relu(H1) H2 = tf.matmul(H1L, W2) + b2 H2L = leaky_relu(H2) logits = tf.matmul(H2L, W3) + b3 return logits
def initialize_parameters(): ''' 初始化神经网络的参数,参数的维度如下: W1:[25,12288] b1:[25,1] W2:[12,25] b2:[12,1] W3:[6.12] b3:[6,1] :return: parameters - 包含了W和b的字典 ''' tf.set_random_seed(1)#指定随机种子 W1= tf.get_variable('W1',[25,12288],initializer=tf.contrib.layers.xavier_initializer(seed=1)) b1 = tf.get_variable('b1',[25,1],initializer=tf.zeros_initializer()) W2 = tf.get_variable('W2',[12,25],initializer=tf.contrib.layers.xavier_initializer(seed=1)) b2 = tf.get_variable('b2',[12,1],initializer=tf.zeros_initializer()) W3 = tf.get_variable('W3',[6,12],initializer=tf.contrib.layers.xavier_initializer(seed=1)) b3 = tf.get_variable('b3',[6,1],initializer=tf.zeros_initializer()) parameters = { 'W1':W1, 'b1':b1, 'W2':W2, 'b2':b2, 'W3':W3, 'b3':b3 } return parameters
def project_layer(self, lstm_outputs, name=None): """ """ with tf.variable_scope("project" if not name else name): with tf.variable_scope("hidden"): w_tanh = tf.get_variable("w_tanh", shape=[self.lstm_dim * 2, self.lstm_dim], dtype=tf.float32, initializer=self.initializer, regularizer=tf.contrib.layers.l2_regularizer(0.001)) b_tanh = tf.get_variable("b_tanh", shape=[self.lstm_dim], dtype=tf.float32, initializer=tf.zeros_initializer()) output = tf.reshape(lstm_outputs, shape=[-1, self.lstm_dim * 2]) hidden = tf.tanh(tf.nn.xw_plus_b(output, w_tanh, b_tanh)) drop_hidden = tf.nn.dropout(hidden, self.dropout) # project to score of tags with tf.variable_scope("output"): w_out = tf.get_variable("w_out", shape=[self.lstm_dim, self.num_tags], dtype=tf.float32, initializer=self.initializer, regularizer=tf.contrib.layers.l2_regularizer(0.001)) b_out = tf.get_variable("b_out", shape=[self.num_tags], dtype=tf.float32, initializer=tf.zeros_initializer() ) pred = tf.nn.xw_plus_b(drop_hidden, w_out, b_out, name="pred") self.logits = tf.reshape(pred, [-1, self.num_steps, self.num_tags], name="logits")
def project_bilstm_layer(self, lstm_outputs, name=None): """ hidden layer between lstm layer and logits :param lstm_outputs: [batch_size, num_steps, emb_size] :return: [batch_size, num_steps, num_tags] """ with tf.variable_scope("project" if not name else name): with tf.variable_scope("hidden"): W = tf.get_variable("W", shape=[self.hidden_unit * 2, self.hidden_unit], dtype=tf.float32, initializer=self.initializers.xavier_initializer()) b = tf.get_variable("b", shape=[self.hidden_unit], dtype=tf.float32, initializer=tf.zeros_initializer()) output = tf.reshape(lstm_outputs, shape=[-1, self.hidden_unit * 2]) hidden = tf.tanh(tf.nn.xw_plus_b(output, W, b)) # project to score of tags with tf.variable_scope("logits"): W = tf.get_variable("W", shape=[self.hidden_unit, self.num_labels], dtype=tf.float32, initializer=self.initializers.xavier_initializer()) b = tf.get_variable("b", shape=[self.num_labels], dtype=tf.float32, initializer=tf.zeros_initializer()) pred = tf.nn.xw_plus_b(hidden, W, b) return tf.reshape(pred, [-1, self.seq_length, self.num_labels])
def bacthnorm(inputs, scope, epsilon=1e-05, momentum=0.99, is_training=True): inputs_shape = inputs.get_shape().as_list()# 输出 形状尺寸 params_shape = inputs_shape[-1:]# 输入参数的长度 axis = list(range(len(inputs_shape) - 1)) with tf.variable_scope(scope): beta = create_variable("beta", params_shape, initializer=tf.zeros_initializer()) gamma = create_variable("gamma", params_shape, initializer=tf.ones_initializer()) # 均值 常量 不需要训练 for inference moving_mean = create_variable("moving_mean", params_shape, initializer=tf.zeros_initializer(), trainable=False) # 方差 常量 不需要训练 moving_variance = create_variable("moving_variance", params_shape, initializer=tf.ones_initializer(), trainable=False) if is_training: mean, variance = tf.nn.moments(inputs, axes=axis)# 计算均值和方差 # 移动平均求 均值和 方差 考虑上一次的量 xt = a * x_t-1 +(1-a)*x_now update_move_mean = moving_averages.assign_moving_average(moving_mean, mean, decay=momentum) update_move_variance = moving_averages.assign_moving_average(moving_variance, variance, decay=momentum) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_move_mean) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_move_variance) else: mean, variance = moving_mean, moving_variance return tf.nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon)
def batch_norm(x, decay=0.999, epsilon=1e-03, is_training=True, scope="scope"): x_shape = x.get_shape() num_inputs = x_shape[-1] reduce_dims = list(range(len(x_shape) - 1)) with tf.variable_scope(scope): beta = create_var("beta", [num_inputs,], initializer=tf.zeros_initializer()) gamma = create_var("gamma", [num_inputs,], initializer=tf.ones_initializer()) # for inference moving_mean = create_var("moving_mean", [num_inputs,], initializer=tf.zeros_initializer(), trainable=False) moving_variance = create_var("moving_variance", [num_inputs], initializer=tf.ones_initializer(), trainable=False) if is_training: mean, variance = tf.nn.moments(x, axes=reduce_dims) update_move_mean = moving_averages.assign_moving_average(moving_mean, mean, decay=decay) update_move_variance = moving_averages.assign_moving_average(moving_variance, variance, decay=decay) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_move_mean) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_move_variance) else: mean, variance = moving_mean, moving_variance return tf.nn.batch_normalization(x, mean, variance, beta, gamma, epsilon)
def initializeParameters(self, m, n): """ Arguments: m -- number of users n -- number of items Returns: parameters -- parameters['b'], global bias, scalar parameters['u'], users bias, shape (m, 1) parameters['d'], item bias, shape (1, n) parameters['P'], users feature matrix, shape (m, K) parameters['Q'], items feature matrix, shape (n, K) """ k = self.K parameters = {} parameters['b'] = tf.get_variable(name='b', dtype=tf.float64, shape=[], initializer=tf.zeros_initializer()) parameters['u'] = tf.get_variable(name='u', dtype=tf.float64, shape=[m, 1], initializer=tf.zeros_initializer()) parameters['d'] = tf.get_variable(name='d', dtype=tf.float64, shape=[1, n], initializer=tf.zeros_initializer()) parameters['P'] = tf.get_variable(name='P', dtype=tf.float64, shape=[m, k], initializer=tf.random_normal_initializer()) parameters['Q'] = tf.get_variable(name='Q', dtype=tf.float64, shape=[n, k], initializer=tf.random_normal_initializer()) return parameters
def initialize_parameters(): """ Initializes parameters to build a neural network with tensorflow. The shapes are: W1 : [25, 12288] b1 : [25, 1] W2 : [12, 25] b2 : [12, 1] W3 : [6, 12] b3 : [6, 1] Returns: parameters -- a dictionary of tensors containing W1, b1, W2, b2, W3, b3 """ tf.set_random_seed(1) # so that your "random" numbers match ours ### START CODE HERE ### (approx. 6 lines of code) W1 = tf.get_variable("W1", [25,12288], initializer = tf.contrib.layers.xavier_initializer(seed = 1)) b1 = tf.get_variable("b1", [25,1], initializer = tf.zeros_initializer()) W2 = tf.get_variable("W2", [12,25], initializer = tf.contrib.layers.xavier_initializer(seed = 1)) b2 = tf.get_variable("b2", [12,1], initializer = tf.zeros_initializer()) W3 = tf.get_variable("W3", [6,12], initializer = tf.contrib.layers.xavier_initializer(seed = 1)) b3 = tf.get_variable("b3", [6,1], initializer = tf.zeros_initializer()) ### END CODE HERE ### parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2, "W3": W3, "b3": b3} return parameters
def main(_): ed.set_seed(42) # DATA x_data = build_toy_dataset(FLAGS.N) # MODEL pi = Dirichlet(concentration=tf.ones(FLAGS.K)) mu = Normal(0.0, 1.0, sample_shape=[FLAGS.K, FLAGS.D]) sigma = InverseGamma(concentration=1.0, rate=1.0, sample_shape=[FLAGS.K, FLAGS.D]) c = Categorical(logits=tf.log(pi) - tf.log(1.0 - pi), sample_shape=FLAGS.N) x = Normal(loc=tf.gather(mu, c), scale=tf.gather(sigma, c)) # INFERENCE qpi = Empirical(params=tf.get_variable( "qpi/params", [FLAGS.T, FLAGS.K], initializer=tf.constant_initializer(1.0 / FLAGS.K))) qmu = Empirical(params=tf.get_variable("qmu/params", [FLAGS.T, FLAGS.K, FLAGS.D], initializer=tf.zeros_initializer())) qsigma = Empirical(params=tf.get_variable("qsigma/params", [FLAGS.T, FLAGS.K, FLAGS.D], initializer=tf.ones_initializer())) qc = Empirical(params=tf.get_variable("qc/params", [FLAGS.T, FLAGS.N], initializer=tf.zeros_initializer(), dtype=tf.int32)) gpi = Dirichlet(concentration=tf.constant([1.4, 1.6])) gmu = Normal(loc=tf.constant([[1.0, 1.0], [-1.0, -1.0]]), scale=tf.constant([[0.5, 0.5], [0.5, 0.5]])) gsigma = InverseGamma(concentration=tf.constant([[1.1, 1.1], [1.1, 1.1]]), rate=tf.constant([[1.0, 1.0], [1.0, 1.0]])) gc = Categorical(logits=tf.zeros([FLAGS.N, FLAGS.K])) inference = ed.MetropolisHastings( latent_vars={pi: qpi, mu: qmu, sigma: qsigma, c: qc}, proposal_vars={pi: gpi, mu: gmu, sigma: gsigma, c: gc}, data={x: x_data}) inference.initialize() sess = ed.get_session() tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) t = info_dict['t'] if t == 1 or t % inference.n_print == 0: qpi_mean, qmu_mean = sess.run([qpi.mean(), qmu.mean()]) print("") print("Inferred membership probabilities:") print(qpi_mean) print("Inferred cluster means:") print(qmu_mean)
def fc(inputs, w_shape, b_shape): weight = tf.get_variable("weights", w_shape, initializer=tf.zeros_initializer(tf.float32)) bias = tf.get_variable("bias", b_shape, initializer=tf.zeros_initializer(tf.float32)) return tf.matmul(inputs, weight) + bias
def evaluate_precision_recall( input_, labels, threshold=0.5, per_example_weights=None, name=PROVIDED, phase=Phase.train ): """Computes the precision and recall of the prediction vs the labels. Args: input_: A rank 2 Tensor or a Pretty Tensor holding the result of the model. labels: The target labels to learn as a float tensor. threshold: The threshold to use to decide if the prediction is true. per_example_weights: A Tensor with a weight per example. name: An optional name. phase: The phase of this model; non training phases compute a total across all examples. Returns: Precision and Recall. """ _ = name # Eliminate warning, name used for namescoping by PT. selected, sum_retrieved, sum_relevant = _compute_precision_recall(input_, labels, threshold, per_example_weights) if phase != Phase.train: dtype = tf.float32 # Create the variables in all cases so that the load logic is easier. relevant_count = tf.get_variable( "relevant_count", [], dtype, tf.zeros_initializer(), collections=[bookkeeper.GraphKeys.TEST_VARIABLES], trainable=False, ) retrieved_count = tf.get_variable( "retrieved_count", [], dtype, tf.zeros_initializer(), collections=[bookkeeper.GraphKeys.TEST_VARIABLES], trainable=False, ) selected_count = tf.get_variable( "selected_count", [], dtype, tf.zeros_initializer(), collections=[bookkeeper.GraphKeys.TEST_VARIABLES], trainable=False, ) with input_.g.device(selected_count.device): selected = tf.assign_add(selected_count, selected) with input_.g.device(retrieved_count.device): sum_retrieved = tf.assign_add(retrieved_count, sum_retrieved) with input_.g.device(relevant_count.device): sum_relevant = tf.assign_add(relevant_count, sum_relevant) return ( tf.where(tf.equal(sum_retrieved, 0), tf.zeros_like(selected), selected / sum_retrieved), tf.where(tf.equal(sum_relevant, 0), tf.zeros_like(selected), selected / sum_relevant), )
def _batch_norm_without_layers(self, input_layer, decay, use_scale, epsilon): """Batch normalization on `input_layer` without tf.layers.""" shape = input_layer.shape num_channels = shape[3] if self.data_format == 'NHWC' else shape[1] beta = self.get_variable( 'beta', [num_channels], tf.float32, tf.float32, initializer=tf.zeros_initializer()) if use_scale: gamma = self.get_variable( 'gamma', [num_channels], tf.float32, tf.float32, initializer=tf.ones_initializer()) else: gamma = tf.constant(1.0, tf.float32, [num_channels]) moving_mean = tf.get_variable( 'moving_mean', [num_channels], tf.float32, initializer=tf.zeros_initializer(), trainable=False) moving_variance = tf.get_variable( 'moving_variance', [num_channels], tf.float32, initializer=tf.ones_initializer(), trainable=False) if self.phase_train: bn, batch_mean, batch_variance = tf.nn.fused_batch_norm( input_layer, gamma, beta, epsilon=epsilon, data_format=self.data_format, is_training=True) mean_update = moving_averages.assign_moving_average( moving_mean, batch_mean, decay=decay, zero_debias=False) variance_update = moving_averages.assign_moving_average( moving_variance, batch_variance, decay=decay, zero_debias=False) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, mean_update) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, variance_update) else: bn, _, _ = tf.nn.fused_batch_norm( input_layer, gamma, beta, mean=moving_mean, variance=moving_variance, epsilon=epsilon, data_format=self.data_format, is_training=False) return bn
def mnist_model(train_data_flat, train_labels, x0): """Creates a simple linear model that evaluates cross-entropy loss and gradient on MNIST dataset. Mirrors 'linear' model from train-on-mnist.lua Result is a Python callable that accepts ITensor parameter vector and returns ITensor loss and gradient. """ # batchSize = 60000 batchSize = 1 x_size = 10 x_offset = 512 # reshape flat parameter vector into W and b parameter matrices x_placeholder, param = tf.get_session_tensor(x0.tf_handle, x0.dtype) W_flat = tf.slice(param, [0], [x_size*10]) W = tf.reshape(W_flat, [x_size, 10]) b_flat = tf.slice(param, [x_size*10], [10]) b = tf.reshape(b_flat, [1, 10]) # create model data = tf.Variable(tf.zeros_initializer((batchSize, x_size), dtype=dtype)) targets = tf.Variable(tf.zeros_initializer((batchSize, x_size), dtype=dtype)) logits = tf.matmul(data, W) + b cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, targets) # create loss and gradient ops cross_entropy_loss = tf.reduce_mean(cross_entropy) Wnorm = tf.reduce_sum(tf.square(W)) bnorm = tf.reduce_sum(tf.square(b)) loss = cross_entropy_loss + (bnorm + Wnorm)/2 [grad] = tf.gradients(loss, [param]) # get handle ops that will be used to initialize ITensors loss_handle_tensor = tf.get_session_handle(loss) grad_handle_tensor = tf.get_session_handle(grad) # initialize data and targets data_placeholder = tf.placeholder(dtype=dtype) data_init = data.assign(data_placeholder) labels_placeholder = tf.placeholder(shape=(batchSize), dtype=tf.int32) labels_onehot = tf.one_hot(labels_placeholder - 1, 10, dtype=dtype) targets_init = targets.assign(labels_onehot) sess.run(data_init, feed_dict={data_placeholder:train_data_flat[:batchSize,x_offset:x_offset+x_size]}) sess.run(targets_init, feed_dict={labels_placeholder: train_labels[:batchSize]}) # Create our callable that works on persistent Tensors def eval_model(x): loss_handle, grad_handle = sess.run([loss_handle_tensor, grad_handle_tensor], feed_dict={x_placeholder: x.tf_handle}) return [env.handle_to_itensor(loss_handle), env.handle_to_itensor(grad_handle)] return eval_model
def module_with_variables(): tf.get_variable( name="weights", shape=[3], initializer=tf.zeros_initializer()) tf.get_variable( name="partition", shape=[4], initializer=tf.zeros_initializer(), partitioner=tf.fixed_size_partitioner(3))
def mnistCost(train_data_flat, train_labels, x0, env): """Creates a simple linear model that evaluates cross-entropy loss and gradient on MNIST dataset. Mirrors 'linear' model from train-on-mnist.lua Result is a Python callable that accepts ITensor parameter vector and returns ITensor loss and gradient. It works as a plug-in replacement of "opfunc" in train-on-mnist IE, you can do: x = ti.ones(...) opfunc=mnist_model(x0) loss, grad = opfunc(x0) x1 = lbfgs(opfunc,...) """ batchSize = 100 # create our input end-point, this is where ITensor->Tensor conversion # happens param = env.make_input(x0) # reshape flat parameter vector into W and b parameter matrices W_flat = tf.slice(param, [0], [10240]) W = tf.reshape(W_flat, [1024, 10]) b_flat = tf.slice(param, [10240], [10]) b = tf.reshape(b_flat, [1, 10]) # create model data = tf.Variable(tf.zeros_initializer((batchSize, 1024))) targets = tf.Variable(tf.zeros_initializer((batchSize, 10))) logits = tf.matmul(data, W) + b cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, targets) # create loss and gradient ops cross_entropy_loss = tf.reduce_mean(cross_entropy) Wnorm = tf.reduce_sum(tf.square(W)) bnorm = tf.reduce_sum(tf.square(b)) loss = cross_entropy_loss + (bnorm + Wnorm)/2 [grad] = tf.gradients(loss, [param]) # initialize data and targets. Load entire dataset into tf Variable data_placeholder = tf.placeholder(dtype=tf.float32) data_init = data.assign(data_placeholder) labels_placeholder = tf.placeholder(shape=(batchSize), dtype=tf.int32) labels_onehot = tf.one_hot(labels_placeholder - 1, 10) targets_init = targets.assign(labels_onehot) env.sess.run(data_init, feed_dict={data_placeholder: train_data_flat[:batchSize]}) env.sess.run(targets_init, feed_dict={labels_placeholder: train_labels[:batchSize]}) # create imperative wrapper of tensorflow graph we just constructed # ITensor input is automatically converged and fed into param # and outputs are converted to ITensor objects and returned return env.make_function(inputs=[param], outputs=[loss, grad])
def linear_zeros(name, x, width, logscale_factor=3): with tf.variable_scope(name): n_in = int(x.get_shape()[1]) w = tf.get_variable("W", [n_in, width], tf.float32, initializer=tf.zeros_initializer()) x = tf.matmul(x, w) x += tf.get_variable("b", [1, width], initializer=tf.zeros_initializer()) x *= tf.exp(tf.get_variable("logs", [1, width], initializer=tf.zeros_initializer()) * logscale_factor) return x
def module_with_variables(): tf.get_variable( name="weights", shape=[3], initializer=tf.zeros_initializer()) tf.get_variable( name="partition", shape=[4], initializer=tf.zeros_initializer(), partitioner=tf.fixed_size_partitioner(3)) hub.add_signature(outputs=tf.constant(1.0))
def Weight_Generate(self, initializer_Std = 0.1): self.weightMatrix_Dict = {}; self.biasMatrix_Dict = {}; self.weightMatrix_Dict["Generator","IH"] = tf.get_variable( name = "Weight_Generator_IH", shape = (self.noise_Size, self.hidden_Size), dtype = tf.float32, initializer = tf.truncated_normal_initializer(stddev=initializer_Std) ) self.weightMatrix_Dict["Generator","HO"] = tf.get_variable( name = "Weight_Generator_HO", shape = (self.hidden_Size, 28*28), dtype = tf.float32, initializer = tf.truncated_normal_initializer(stddev=initializer_Std) ) self.weightMatrix_Dict["Discriminator","IH"] = tf.get_variable( name = "Weight_Discriminator_IH", shape = (28*28, self.hidden_Size), dtype = tf.float32, initializer = tf.truncated_normal_initializer(stddev=initializer_Std) ) self.weightMatrix_Dict["Discriminator","HO"] = tf.get_variable( name = "Weight_Discriminator_HO", shape = (self.hidden_Size, 1), dtype = tf.float32, initializer = tf.truncated_normal_initializer(stddev=initializer_Std) ) self.biasMatrix_Dict["Generator","H"] = tf.get_variable( name = "Bias_Generator_IH", shape = (1, self.hidden_Size), dtype = tf.float32, initializer = tf.zeros_initializer() ) self.biasMatrix_Dict["Generator","O"] = tf.get_variable( name = "Bias_Generator_HO", shape = (1, 28*28), dtype = tf.float32, initializer = tf.zeros_initializer() ) self.biasMatrix_Dict["Discriminator","H"] = tf.get_variable( name = "Bias_Discriminator_IH", shape = (1, self.hidden_Size), dtype = tf.float32, initializer = tf.zeros_initializer() ) self.biasMatrix_Dict["Discriminator","O"] = tf.get_variable( name = "Bias_Discriminator_HO", shape = (1, 1), dtype = tf.float32, initializer = tf.zeros_initializer() )
def __init__(self, size, eps=1e-2, default_clip_range=np.inf, sess=None): """A normalizer that ensures that observations are approximately distributed according to a standard Normal distribution (i.e. have mean zero and variance one). Args: size (int): the size of the observation to be normalized eps (float): a small constant that avoids underflows default_clip_range (float): normalized observations are clipped to be in [-default_clip_range, default_clip_range] sess (object): the TensorFlow session to be used """ self.size = size self.eps = eps self.default_clip_range = default_clip_range self.sess = sess if sess is not None else tf.get_default_session() self.local_sum = np.zeros(self.size, np.float32) self.local_sumsq = np.zeros(self.size, np.float32) self.local_count = np.zeros(1, np.float32) self.sum_tf = tf.get_variable( initializer=tf.zeros_initializer(), shape=self.local_sum.shape, name='sum', trainable=False, dtype=tf.float32) self.sumsq_tf = tf.get_variable( initializer=tf.zeros_initializer(), shape=self.local_sumsq.shape, name='sumsq', trainable=False, dtype=tf.float32) self.count_tf = tf.get_variable( initializer=tf.ones_initializer(), shape=self.local_count.shape, name='count', trainable=False, dtype=tf.float32) self.mean = tf.get_variable( initializer=tf.zeros_initializer(), shape=(self.size,), name='mean', trainable=False, dtype=tf.float32) self.std = tf.get_variable( initializer=tf.ones_initializer(), shape=(self.size,), name='std', trainable=False, dtype=tf.float32) self.count_pl = tf.placeholder(name='count_pl', shape=(1,), dtype=tf.float32) self.sum_pl = tf.placeholder(name='sum_pl', shape=(self.size,), dtype=tf.float32) self.sumsq_pl = tf.placeholder(name='sumsq_pl', shape=(self.size,), dtype=tf.float32) self.update_op = tf.group( self.count_tf.assign_add(self.count_pl), self.sum_tf.assign_add(self.sum_pl), self.sumsq_tf.assign_add(self.sumsq_pl) ) self.recompute_op = tf.group( tf.assign(self.mean, self.sum_tf / self.count_tf), tf.assign(self.std, tf.sqrt(tf.maximum( tf.square(self.eps), self.sumsq_tf / self.count_tf - tf.square(self.sum_tf / self.count_tf) ))), ) self.lock = threading.Lock()
def state(self, state_name): """Returns, creating if necessary, a state variable with the given name.""" if state_name not in self.requested_tensors: count = tf.get_variable('count_%s' % state_name, [], tf.int32, tf.zeros_initializer(), trainable=False) value = tf.get_variable(state_name, [self.batch_size, self.node_depth], tf.float32, tf.zeros_initializer()) self.requested_tensors[state_name] = (count, value) return self.requested_tensors[state_name][1]
def _init_parameters(self): if self.Bmean is None: self.Bmean = vs.get_variable("Bmean", [self._num_units], initializer = tf.zeros_initializer()) if self.Bvar is None: self.Bvar = vs.get_variable("Bvar", [self._num_units], initializer = tf.zeros_initializer()) if self.Wmean is None: self.Wmean = vs.get_variable("Wmean", [self._num_units, self._num_units], initializer = tf.uniform_unit_scaling_initializer(factor=weight_init_factor)) if self.Wvar is None: self.Wvar = vs.get_variable("Wvar", [self._num_units, self._num_units], initializer = tf.uniform_unit_scaling_initializer(factor=weight_init_factor)) if self.Wmean_r is None: self.Wmean_r = vs.get_variable("Wmean_r", [self._num_units, self._num_units], initializer = tf.uniform_unit_scaling_initializer(factor=weight_init_factor)) if self.Wvar_r is None: self.Wvar_r = vs.get_variable("Wvar_r", [self._num_units, self._num_units], initializer = tf.uniform_unit_scaling_initializer(factor=weight_init_factor))
def __init__(self,dim,name=None): super(CrossCompressUnit,self).__init__(name) self.dim = dim with tf.variable_scope(self.name): self.weight_vv = tf.get_variable(name='weight_vv',shape=(dim,1),dtype=tf.float32) self.weight_ev = tf.get_variable(name='weight_ev',shape=(dim,1),dtype=tf.float32) self.weight_ve = tf.get_variable(name='weight_ve',shape=(dim,1),dtype=tf.float32) self.weight_ee = tf.get_variable(name='weight_ee',shape=(dim,1),dtype=tf.float32) self.bias_v = tf.get_variable(name='bias_v',shape=dim,initializer=tf.zeros_initializer()) self.bias_e = tf.get_variable(name='bias_e',shape=dim,initializer=tf.zeros_initializer()) self.vars = [self.weight_vv,self.weight_ev,self.weight_ve,self.weight_ee]
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """See base class.""" assignments = [] for (grad, param) in grads_and_vars: if grad is None or param is None: continue param_name = self._get_variable_name(param.name) m = tf.get_variable( name=param_name + "/adam_m", shape=param.shape.as_list(), dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) v = tf.get_variable( name=param_name + "/adam_v", shape=param.shape.as_list(), dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) # Standard Adam update. next_m = ( tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad)) next_v = ( tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2, tf.square(grad))) update = next_m / (tf.sqrt(next_v) + self.epsilon) # Just adding the square of the weights to the loss function is *not* # the correct way of using L2 regularization/weight decay with Adam, # since that will interact with the m and v parameters in strange ways. # # Instead we want ot decay the weights in a manner that doesn't interact # with the m/v parameters. This is equivalent to adding the square # of the weights to the loss with plain (non-momentum) SGD. if self._do_use_weight_decay(param_name): update += self.weight_decay_rate * param update_with_lr = self.learning_rate * update next_param = param - update_with_lr assignments.extend( [param.assign(next_param), m.assign(next_m), v.assign(next_v)]) return tf.group(*assignments, name=name)
def _create_cqa_modules(is_training, predictions, update_num): num_labels = 2 input_extract = predictions["input_extract"] input1_extract = predictions["input1_extract"] input2_extract = predictions["input2_extract"] input3_extract = predictions["input3_extract"] embedding = predictions["embedding"] input_mask = predictions["input_mask"] q_type = predictions["q_type"] labels = predictions["label_ids"] encoder_output1 = predictions["last_layer"] # encoder_output = encoder_output1 + encoder_output2 + \ # encoder_output3 + encoder_output4 encoder_output = predictions["output_layer"] sent1 = None sent2 = None sent3 = None sent1_mask = None sent2_mask = None sent3_mask = None mark0 = None mark1 = None mark2 = None mark3 = None if input_extract is None and input3_extract is None: sent1_mask = tf.cast(tf.not_equal(input1_extract, 0), tf.float32) sent2_mask = tf.cast(tf.not_equal(input2_extract, 0), tf.float32) sent1 = tf.batch_gather(encoder_output, input1_extract) sent2 = tf.batch_gather(encoder_output, input2_extract) elif input3_extract is None: sent1_mask = tf.cast(tf.equal(input_extract, 1), tf.float32) sent2_mask = tf.cast(tf.equal(input_extract, 2), tf.float32) sent1 = encoder_output * tf.expand_dims(sent1_mask, axis=-1) sent2 = encoder_output * tf.expand_dims(sent2_mask, axis=-1) else: sent1_mask = tf.cast(tf.not_equal(input1_extract, 0), tf.float32) sent2_mask = tf.cast(tf.not_equal(input2_extract, 0), tf.float32) sent3_mask = tf.cast(tf.not_equal(input3_extract, 0), tf.float32) sent1 = tf.batch_gather(encoder_output, input1_extract) sent2 = tf.batch_gather(encoder_output, input2_extract) sent3 = tf.batch_gather(encoder_output, input3_extract) mark0 = tf.squeeze(encoder_output1[:, 0:1, :], axis=1) model = CQAMODEL(is_training=is_training, all_sent=encoder_output, input_mask=input_mask, sent1=sent1, sent2=sent2, sent3=sent3, sent1_mask=sent1_mask, sent2_mask=sent2_mask, sent3_mask=sent3_mask, mark0=mark0, mark1=mark1, mark2=mark2, mark3=mark3, embedding=embedding, update_num=update_num) # model = Baseline(is_training=is_training, # sent1=sent1, sent2=sent2, sent3=sent3, # sent1_mask=sent1_mask, sent2_mask=sent2_mask, sent3_mask=sent3_mask, # mark0=mark0, mark1=mark1, mark2=mark2, mark3=mark3) result = model.get_output() # (B, dim) # mark0 = tf.layers.dense(mark0, 768, activation=tf.tanh) # result = mark0 hidden_size = result.shape[-1].value output_weights = tf.get_variable( "output_weights_v2", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "output_bias_v2", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout result = tf.nn.dropout(result, keep_prob=0.9) logits = tf.matmul(result, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) prob = tf.nn.softmax(logits, axis=-1) log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) total_loss = tf.reduce_mean(per_example_loss) return total_loss, logits, prob
def mobilenet(inputs, num_classes=1001, prediction_fn=slim.softmax, reuse=None, scope='Mobilenet', base_only=False, **mobilenet_args): """Mobilenet model for classification, supports both V1 and V2. Note: default mode is inference, use mobilenet.training_scope to create training network. Args: inputs: a tensor of shape [batch_size, height, width, channels]. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer (before dropout) are returned instead. prediction_fn: a function to get predictions out of logits (default softmax). reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. base_only: if True will only create the base of the network (no pooling and no logits). ## 卷积层的定义 **mobilenet_args: passed to mobilenet_base verbatim. - conv_defs: list of conv defs - multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. - output_stride: will ensure that the last layer has at most total stride. If the architecture calls for more stride than that provided (e.g. output_stride=16, but the architecture has 5 stride=2 operators), it will replace output_stride with fractional convolutions using Atrous Convolutions. Returns: logits: the pre-softmax activations, a tensor of size [batch_size, num_classes] end_points: a dictionary from components of the network to the corresponding activation tensor. Raises: ValueError: Input rank is invalid. """ is_training = mobilenet_args.get('is_training', False) input_shape = inputs.get_shape().as_list() if len(input_shape) != 4: raise ValueError('Expected rank 4 input, was: %d' % len(input_shape)) with tf.variable_scope(scope, 'Mobilenet', reuse=reuse) as scope: inputs = tf.identity(inputs, 'input') net, end_points, netFirst = mobilenet_base(inputs, scope=scope, **mobilenet_args) if base_only: return net, end_points, netFirst net = tf.identity(net, name='embedding') with tf.variable_scope('Logits'): net = global_pool(net) end_points['global_pool'] = net if not num_classes: return net, end_points net = slim.dropout(net, scope='Dropout', is_training=is_training) # 1 x 1 x num_classes # Note: legacy scope name. logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, biases_initializer=tf.zeros_initializer(), scope='Conv2d_1c_1x1') logits = tf.squeeze(logits, [1, 2]) logits = tf.identity(logits, name='output') end_points['Logits'] = logits if prediction_fn: end_points['Predictions'] = prediction_fn(logits, 'Predictions') return logits, end_points
def alexnet_v2(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.2, spatial_squeeze=True, scope='alexnet_v2', global_pool=False): """AlexNet version 2. Described in: http://arxiv.org/pdf/1404.5997v2.pdf Parameters from: github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ layers-imagenet-1gpu.cfg Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224 or set global_pool=True. To use in fully convolutional mode, set spatial_squeeze to false. The LRN layers have been removed and change the initializers from random_normal_initializer to xavier_initializer. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: the number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer are returned instead. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the logits. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. global_pool: Optional boolean flag. If True, the input to the classification layer is avgpooled to size 1x1, for any input size. (This is not part of the original AlexNet.) Returns: net: the output of the logits layer (if num_classes is a non-zero integer), or the non-dropped-out input to the logits layer (if num_classes is 0 or None). end_points: a dict of tensors with intermediate activations. """ with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=[end_points_collection]): net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') net = slim.max_pool2d(net, [3, 3], 2, scope='pool1') net = slim.conv2d(net, 192, [5, 5], scope='conv2') net = slim.max_pool2d(net, [3, 3], 2, scope='pool2') net = slim.conv2d(net, 384, [3, 3], scope='conv3') net = slim.conv2d(net, 384, [3, 3], scope='conv4') net = slim.conv2d(net, 256, [3, 3], scope='conv5') net = slim.max_pool2d(net, [3, 3], 2, scope='pool5') # Use conv2d instead of fully_connected layers. with slim.arg_scope( [slim.conv2d], weights_initializer=trunc_normal(0.005), biases_initializer=tf.constant_initializer(0.1)): net = slim.conv2d(net, 4096, [5, 5], padding='VALID', scope='fc6') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = slim.conv2d(net, 4096, [1, 1], scope='fc7') # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict( end_points_collection) if global_pool: net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool') end_points['global_pool'] = net if num_classes: net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = slim.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, biases_initializer=tf.zeros_initializer(), scope='fc8') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout8') if spatial_squeeze: net = tf.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def _init(self, ob_space, ac_space, hid_size, num_hid_layers, gaussian_fixed_var=True): assert isinstance(ob_space, gym.spaces.Box) # Add the variable to track layers self.num_hid_layers = num_hid_layers self.pdtype = pdtype = make_pdtype(ac_space) sequence_length = None ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape)) with tf.variable_scope("obfilter"): self.ob_rms = RunningMeanStd(shape=ob_space.shape) with tf.variable_scope('vf'): obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0) last_out = obz for i in range(num_hid_layers): last_out = tf.nn.tanh( tf.layers.dense( last_out, hid_size, name="fc%i" % (i + 1), kernel_initializer=U.normc_initializer(1.0))) self.vpred = tf.layers.dense( last_out, 1, name='final', kernel_initializer=U.normc_initializer(0.1))[:, 0] with tf.variable_scope('pol'): last_out = obz for i in range(num_hid_layers): last_out = tf.nn.tanh( tf.layers.dense( last_out, hid_size, name='fc%i' % (i + 1), kernel_initializer=U.normc_initializer(1.0))) if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box): mean = tf.layers.dense( last_out, pdtype.param_shape()[0] // 2, name='final', kernel_initializer=U.normc_initializer(0.01)) logstd = tf.get_variable( name="logstd", shape=[1, pdtype.param_shape()[0] // 2], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) else: pdparam = tf.layers.dense( last_out, pdtype.param_shape()[0], name='final', kernel_initializer=U.normc_initializer(0.01)) pdparam = tf.clip_by_value(pdparam, -5.0, 5.0) self.pd = pdtype.pdfromflat(pdparam) self.state_in = [] self.state_out = [] stochastic = tf.placeholder(dtype=tf.bool, shape=()) ac = U.switch(stochastic, self.pd.sample(), self.pd.mode()) self._act = U.function( [stochastic, ob], [ac, self.vpred, tf.exp(self.pd.logp(ac))])
def P_Net(inputs, label=None, bbox_target=None, landmark_target=None, training=True): #why activation is prelu, why? ''' leaky relu vs prelu: https://datascience.stackexchange.com/questions/18583/what-is-the-difference-between-leakyrelu-and-prelu Leaky ReLUs: allow a small, non-zero gradient when the unit is not active. Parametric ReLUs: take this idea further by making the coefficient of leakage into a parameter that is learned along with the other neural network parameters. ''' with slim.arg_scope( [slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer( ), # slim does not have zeros initilizer weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print("PNet input shape: ", inputs.get_shape()) net = slim.conv2d(inputs, num_outputs=10, kernel_size=[3, 3], stride=1, scope='conv1') print("PNet conv1 shape: ", net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, padding='SAME', scope='pool1') print("PNet pool1 shape: ", net.get_shape()) net = slim.conv2d(net, num_outputs=16, kernel_size=[3, 3], stride=1, scope='conv2') print("PNet conv2 shape: ", net.get_shape()) net = slim.conv2d(net, num_outputs=32, kernel_size=[3, 3], stride=1, scope='conv3') print("PNet conv3 shape: ", net.get_shape()) # final 3 conv to get H*W*2 classifier, H*W*4 bbox, H*W*10 landmar_pred conv4_1 = slim.conv2d(net, num_outputs=2, kernel_size=[1, 1], stride=1, scope='conv4_1', activation_fn=tf.nn.softmax) print('P_Net conv4_1 shape ', net.get_shape()) bbox_pred = slim.conv2d( net, num_outputs=4, kernel_size=[1, 1], stride=1, scope='conv4_2', activation_fn=None ) # important scope name should not be the same as veriable name print('P_Net bbox_pred conv layer shape ', bbox_pred.get_shape()) landmark_pred = slim.conv2d(net, num_outputs=10, kernel_size=[1, 1], stride=1, scope='conv4_3', activation_fn=None) print('P_Net ladmark conv layer shape', landmark_pred.get_shape()) if training: #batch*2 to determin if it is a face #why squeezing? what will happe cls_prob = tf.squeeze(conv4_1, [1, 2], name='cls_prob') cls_loss = cls_ohem(cls_prob, label) #check bbox_loss bbox_pred = tf.squeeze(bbox_pred, [1, 2], name='bbox_pred') bbox_loss = bbox_ohem(bbox_pred, bbox_target, label) #landmark loss landmark_pred = tf.squeeze(landmark_pred, [1, 2], name='landmark_pred') landmark_loss = landmark_ohem(landmark_pred, landmark_target, label) accuracy = cal_accuracy(cls_prob, label) #tf.add_n: Adds all input tensors element-wise. L2_loss = tf.add_n(slim.losses.get_regularization_losses()) return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy else: #test, batch_size=1 cls_prob_test = tf.squeeze(conv4_1, axis=0) bbox_pred_test = tf.squeeze(bbox_pred, axis=0) landmark_pred_test = tf.squeeze(landmark_pred, axis=0) return cls_prob_test, bbox_pred_test, landmark_pred_test
test_lbls = np.array([int(lbl) for (lbl, img) in data], np.int32) ########################################################## #Train model ########################################################## graph = tf.Graph() with graph.as_default(): images = tf.placeholder(tf.float32, [None, 28 * 28], 'images') labels = tf.placeholder(tf.int32, [None], 'labels') #Define the encoder model with tf.variable_scope('encoder'): W = tf.get_variable('W', [28 * 28, thought_vector_size], tf.float32, tf.random_normal_initializer(stddev=0.1)) b = tf.get_variable('b', [thought_vector_size], tf.float32, tf.zeros_initializer()) thoughts = tf.tanh(tf.matmul(images, W) + b) # The thought vector #Define the decoder model with tf.variable_scope('decoder'): # The transpose of the weight matrix of the encoder layer is used as weight for this layer # W = tf.get_variable('W', [thought_vector_size, 28*28], tf.float32, tf.random_normal_initializer(stddev=0.1)) b = tf.get_variable('b', [28 * 28], tf.float32, tf.zeros_initializer()) dec_logits = tf.matmul(thoughts, tf.transpose(W)) + b dec_outs = tf.sigmoid(dec_logits) # The output image #Define the classifier model with tf.variable_scope('classifier'): W = tf.get_variable('W', [thought_vector_size, 10], tf.float32, tf.random_normal_initializer(stddev=0.1)) b = tf.get_variable('b', [10], tf.float32, tf.zeros_initializer())
def fully_connected(inputs, num_outputs, activation_fn=tf.nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=tf.zeros_initializer(), biases_regularizer=None, do_spec_norm=False, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Adds support for spectral normalization following https://arxiv.org/abs/1802.05957. For non-spectral normed fc layer, See tensorflow.contrib.layers.python.layers.fully_connected for doc. """ # ***Added section*** layer_class = layers.core_layers.Dense if do_spec_norm: layer_class = SpectralNormedDense # ***Added section ends*** if not isinstance(num_outputs, layers.six.integer_types): raise ValueError('num_outputs should be int or long, got %s.' % (num_outputs, )) layer_variable_getter = layers._build_variable_getter({ 'bias': 'biases', 'kernel': 'weights' }) with tf.variable_scope(scope, 'fully_connected', [inputs], reuse=reuse, custom_getter=layer_variable_getter) as sc: inputs = tf.convert_to_tensor(inputs) layer = layer_class(units=num_outputs, activation=None, use_bias=not normalizer_fn and biases_initializer, kernel_initializer=weights_initializer, bias_initializer=biases_initializer, kernel_regularizer=weights_regularizer, bias_regularizer=biases_regularizer, activity_regularizer=None, trainable=trainable, name=sc.name, dtype=inputs.dtype.base_dtype, _scope=sc, _reuse=reuse) outputs = layer.apply(inputs) # Add variables to collections. layers._add_variable_to_collections(layer.kernel, variables_collections, 'weights') if layer.bias is not None: layers._add_variable_to_collections(layer.bias, variables_collections, 'biases') # Apply normalizer function / layer. if normalizer_fn is not None: if not normalizer_params: normalizer_params = {} outputs = normalizer_fn(outputs, **normalizer_params) if activation_fn is not None: outputs = activation_fn(outputs) return layer_utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def deform_conv_2d(inputs, num_outputs, kernel_size=3, stride=1, dilate_rate=1, deformable_group=1, data_format='channels_first', no_bias=True, name=None): with tf.variable_scope(name, 'deform_conv'): if 'channels_last' == data_format: inputs = tf.transpose(inputs, [0, 3, 1, 2], name='trans') offset = tf.layers.conv2d(inputs, 2 * deformable_group * kernel_size**2, kernel_size, padding='SAME', dilation_rate=(dilate_rate, dilate_rate), strides=(stride, stride), data_format='channels_first') kernel = tf.get_variable(name='kernel', shape=(num_outputs, inputs.get_shape().as_list()[1], kernel_size, kernel_size), initializer=tf.glorot_uniform_initializer()) if not no_bias: bias_var = tf.get_variable(name='bias', shape=(1, num_outputs, 1, 1), initializer=tf.zeros_initializer()) res = deform_conv_op(inputs, filter=kernel, offset=offset, rates=[1, 1, dilate_rate, dilate_rate], padding='SAME', strides=[1, 1, stride, stride], num_groups=1, deformable_group=deformable_group) if 'channels_last' == data_format: res = tf.transpose(res, [0, 2, 3, 1], name='trans_inv') if not no_bias: res = res + bias_var return res
def _one_step(self, x): """one time-step execution which follows 1. from top to bottom, 'R's are updated 2. predict the next frame, and the next input-data is fed 3. from bottom to top, 'E's are updated Args: x : 4-dim (batch_size, height, width, num_channels) tensor 'None' if no input frame """ if x is not None: assert len(x.shape) == 4, "the dimension of the input tensor must be {}, but {}.".format(4, len(x.shape)) # initialize 'R's and 'E's as 0 when started if self.stack_E is None: self.stack_E = [] self.stack_R = [] # the following calculations are messy # as in the case of implementing convLSTM tmp_A = tf.zeros_like(x, tf.float32) for l in range(self.num_layers): # calculation to double the number of channels chx2 = tf.layers.Conv2D(filters=2*self.stack_channels[l], kernel_size=1, trainable=False, kernel_initializer=tf.zeros_initializer()) # E.shape == 2 * A.shape tmp_E = chx2(tmp_A) self.stack_E.append(tf.zeros_like(tmp_E)) # R.shape == A.shape self.stack_R.append(tf.zeros_like(tmp_A)) getattr(self, "R_block"+str(l)).reset_state() if l != self.num_layers - 1: # calculation to extract the shape in the above layer # 2D-shape is reduced to halve, while channel size is increased goup = tf.layers.Conv2D(filters=self.stack_channels[l+1], kernel_size=2, strides=2, trainable=False, kernel_initializer=tf.zeros_initializer()) # A[l].shape -> A[l+1].shape tmp_A = goup(tmp_A) # update R-block from top to bottom for l in reversed(range(self.num_layers)): if l != self.num_layers-1: new_R = getattr(self, "R_block"+str(l))(self.stack_R[l], self.stack_E[l], self.stack_R[l+1]) else: new_R = getattr(self, "R_block"+str(l))(self.stack_R[l], self.stack_E[l]) self.stack_R[l] = new_R # update E-block from bottom to top for l in range(self.num_layers): if l != 0: new_E, _ = getattr(self, "E_block"+str(l))(self.stack_R[l], self.stack_E[l-1]) else: new_E, pred = getattr(self, "E_block"+str(l))(self.stack_R[l], x) self.stack_E[l] = new_E tmp_loss = tf.reduce_mean(self.stack_E[0]) # loss for this time step return tmp_loss, pred
slopes_t = tf.reduce_max(slopes) if cfg.bMaxGrad else slopes if cfg.oReg == 'cp': dis_lip_loss = cfg.fWeightLip * tf.reduce_mean(tf.square(slopes_t)) elif cfg.oReg == 'gp': dis_lip_loss = cfg.fWeightLip * tf.reduce_mean( tf.square(slopes_t - cfg.fLipTarget)) elif cfg.oReg == 'lp': dis_lip_loss = cfg.fWeightLip * tf.reduce_mean( tf.square(tf.maximum(0.0, slopes_t - cfg.fLipTarget))) elif cfg.oReg == 'al': if not cfg.bMaxGrad: slopes_t = tf.reduce_mean(slopes) al_lambda = tf.get_variable('lambda', [], initializer=tf.zeros_initializer(), trainable=False) constraint = slopes_t - cfg.fLipTarget dis_lip_loss = cfg.fWeightLip * tf.square( constraint) + al_lambda * constraint if cfg.fLrAL != 0: al_lambda_update_op = tf.assign(al_lambda, al_lambda + cfg.fLrAL * constraint) else: al_lambda_update_op = tf.assign( al_lambda, al_lambda + 2 * cfg.fWeightLip * constraint) elif cfg.oReg == 'ali': if not cfg.bMaxGrad: slopes_t = tf.reduce_mean(slopes) al_lambda = tf.get_variable('lambda', [], initializer=tf.zeros_initializer(),
def __init__(self, modelpp): path = modelpp.get_remote_date( "https://www.flyai.com/m/multi_cased_L-12_H-768_A-12.zip") data_root = os.path.splitext(path)[0] bert_config_file = os.path.join(data_root, 'bert_config.json') bert_config = modeling.BertConfig.from_json_file(bert_config_file) init_checkpoint = os.path.join(data_root, 'bert_model.ckpt') bert_vocab_file = os.path.join(data_root, 'vocab.txt') self.input_ids = tf.placeholder(tf.int32, shape=[None, None], name='input_ids') self.input_mask = tf.placeholder(tf.int32, shape=[None, None], name='input_masks') self.segment_ids = tf.placeholder(tf.int32, shape=[None, None], name='segment_ids') self.labels = tf.placeholder(tf.int32, shape=[ None, ], name='labels') self.keep_prob = tf.placeholder(tf.float32, name='keep_prob') self.learning_rate = tf.placeholder(tf.float32, name='learning_rate') # 创建bert模型 with tf.name_scope('Bert'): model = modeling.BertModel( config=bert_config, is_training=True, input_ids=self.input_ids, input_mask=self.input_mask, token_type_ids=self.segment_ids, # 这里如果使用TPU 设置为True,速度会快些。使用CPU 或GPU 设置为False ,速度会快些。 use_one_hot_embeddings=False) # 这个获取每个token的output 输入数据[batch_size, seq_length, embedding_size] 如果做seq2seq 或者ner 用这个 # output_layer = model.get_sequence_output() tvars = tf.trainable_variables() # 加载BERT模型 (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) output_layer = model.get_pooled_output() # 这个获取句子的output hidden_size = output_layer.shape[-1].value # 获取输出的维度 # 构建W 和 b output_weights = tf.get_variable( "output_weights", [hidden_size, num_labels], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("predict"): output_layer = tf.nn.dropout(output_layer, keep_prob=self.keep_prob) logits = tf.nn.bias_add(tf.matmul(output_layer, output_weights), output_bias) log_probs = tf.nn.log_softmax(logits, axis=-1) self.pred = tf.argmax(log_probs, 1, name='pred') with tf.name_scope("accuracy"): # 准确率 correct_pred = tf.equal(self.labels, tf.cast(self.pred, tf.int32)) self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='acc') with tf.name_scope("optimize"): # 将label进行onehot转化 one_hot_labels = tf.one_hot(self.labels, depth=num_labels, dtype=tf.float32) # 构建损失函数 per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) self.loss = tf.reduce_mean(per_example_loss) # 优化器 self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.loss)
def build_net(self, ob, ac_shape, hid_size, num_hid_layers): self.ob = ob self.ob_shape = ob.shape.as_list()[1:] with tf.variable_scope("ob_filter"): self.ob_rms = RunningMeanStd(ob.shape.as_list()[1:]) # normalized observation obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0) # net to fit value function net = obz for i in range(num_hid_layers): net = tf.layers.dense( inputs=net, units=hid_size, activation=tf.nn.tanh, kernel_initializer=tf.random_normal_initializer(mean=0, stddev=1), name="vffc%i" % (i + 1)) self.vpred = tf.layers.dense( inputs=net, units=1, activation=None, kernel_initializer=tf.random_normal_initializer(mean=0, stddev=1), name="vffinal") # train value function self.vreal = tf.placeholder(dtype=tf.float32, shape=(None, ), name="vreal") vloss = tf.reduce_mean(tf.square(self.vreal - self.vpred)) valueFunctionVars = [ v for v in self.get_trainable_variables() if v.name.startswith("%s/vff" % self.scope) ] self.vadam = tf.train.AdamOptimizer().minimize( vloss, var_list=valueFunctionVars) # net to predict mean and standard deviation of action net = obz for i in range(num_hid_layers): net = tf.layers.dense( inputs=net, units=hid_size, activation=tf.nn.tanh, kernel_initializer=tf.random_normal_initializer(mean=0, stddev=1), name="polc%i" % (i + 1)) mean = tf.layers.dense(inputs=net, units=ac_shape[0], activation=None, kernel_initializer=tf.random_normal_initializer( mean=0, stddev=0.01)) logstd = mean * 0.0 + tf.get_variable( name="logstd", shape=[1, ac_shape[0]], initializer=tf.zeros_initializer(), dtype=tf.float32) # std not related to observation # action is normally distributed self.pd = DiagGaussianPd(mean, logstd) self.stochastic = tf.placeholder(dtype=tf.bool, shape=(), name="stochastic") self.action = tf.cond(self.stochastic, lambda: self.pd.sample(), lambda: self.pd.mode())
def XinNingNetwork2(input, heatmap, is_training, weight_decay, batch_norm_params, num_labels, depth_multi, min_depth=8): print("labels; ", num_labels) time.sleep(3) def depth(d): return max(int(d * depth_multi), min_depth) with tf.variable_scope('pfld_inference2'): features = {} # normalizer_fn=slim.batch_norm, with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu6, weights_initializer=tf.truncated_normal_initializer( stddev=0.01), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, padding='SAME', # trainable=is_training ): print('PFLD input shape({}): {}'.format(input.name, input.get_shape())) print("=== start stage 2 ===") print(heatmap.name, heatmap.get_shape()) # 112*112*1*2 / concat / 112*112*2 concatted_2 = tf.concat([input, heatmap], 3) print(concatted_2.name, concatted_2.get_shape()) # 112*112*2 / conv3*3 / c:8,n:1,s:2 conv2_1 = conv2d(concatted_2, stride=2, channel=8, kernel=3, depth=depth, scope='conv2_1') print(conv2_1.name, conv2_1.get_shape()) # 56*56*8 / pool3*3 / c:28,n:1,s:2 pool2_1 = slim.max_pool2d(conv2_1, kernel_size=[3, 3], stride=2, scope='pool2_1', padding='SAME') print(pool2_1.name, pool2_1.get_shape()) # 28*28*8 / conv3*3 / c:16,n:1,s:1 conv2_2 = conv2d(pool2_1, stride=1, channel=16, kernel=3, depth=depth, scope='conv2_2') print(conv2_2.name, conv2_2.get_shape()) # 28*28*16 / pool3*3 / c:16,n:1,s:2 pool2_2 = slim.max_pool2d(conv2_2, kernel_size=[3, 3], stride=2, scope='pool2_2', padding='SAME') print(pool2_2.name, pool2_2.get_shape()) # 14*14*16 / conv3*3 / c:64,n:1,s:1 conv2_2_1 = conv2d(pool2_2, stride=1, channel=64, kernel=3, depth=depth, scope='conv2_2.1') print(conv2_2_1.name, conv2_2_1.get_shape()) # 14*14*64 / global_pool / c:64,n:1 pool2_2_1 = slim.avg_pool2d(conv2_2_1, [14, 14], stride=[14, 14], scope='pool2_2.1', padding='SAME') print(pool2_2_1.name, pool2_2_1.get_shape()) # 14*14*16 / conv3*3 / c:64,n:1,s:2 conv2_3 = conv2d(pool2_2, stride=2, channel=64, kernel=3, depth=depth, scope='conv2_3') print(conv2_3.name, conv2_3.get_shape()) # 7*7*64 / pool3*3 / c:64,n:1,s:2 pool2_3 = slim.max_pool2d(conv2_3, kernel_size=[3, 3], stride=2, scope='pool2_3', padding='SAME') print(pool2_3.name, pool2_3.get_shape()) # 3*3*64 / conv3*3 / c:64,n:1,s:1 conv2_3_1 = conv2d(pool2_3, stride=2, channel=64, kernel=3, depth=depth, scope='conv2_3.1') print(conv2_3_1.name, conv2_3_1.get_shape()) # 3*3*64 / global_pool / c:64,n:1 pool2_3_1 = slim.avg_pool2d(conv2_3_1, [3, 3], stride=[3, 3], scope='pool2_3.1', padding='SAME') print(pool2_3_1.name, pool2_3_1.get_shape()) # 3*3*64 / conv3*3 / c:64,n:1,s:2 conv2_4 = conv2d(pool2_3, stride=2, channel=64, kernel=3, depth=depth, scope='conv2_4') print(conv2_4.name, conv2_4.get_shape()) # 2*2*64 / global_pool / c:64,n:1 pool2_4_1 = slim.avg_pool2d(conv2_4, [2, 2], stride=[2, 2], scope='pool2_4.1', padding='SAME') print(pool2_4_1.name, pool2_4_1.get_shape()) # 1*1*64*3 / concat / 1*1*192 concatted_2 = tf.concat([pool2_2_1, pool2_3_1, pool2_4_1], 3) print(concatted_2.name, concatted_2.get_shape()) flattened = slim.flatten(concatted_2) print(flattened.name, flattened.get_shape()) # 1*1*192 / fc / 1*136 output_2 = slim.fully_connected(flattened, num_outputs=num_labels * 2, scope='fc_2') print("last layer name") print(output_2.name, output_2.get_shape()) return output_2
def convolution(inputs, num_outputs, kernel_size, stride=1, padding='SAME', data_format=None, rate=1, activation_fn=tf.nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=tf.zeros_initializer(), biases_regularizer=None, do_spec_norm=False, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Adds support for spectral normalization following https://arxiv.org/abs/1802.05957. For non-spectral normed convolution, See tensorflow.contrib.layers.python.layers.convolution for doc. """ if data_format not in [ None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW' ]: raise ValueError('Invalid data_format: %r' % (data_format, )) layer_variable_getter = layers._build_variable_getter({ 'bias': 'biases', 'kernel': 'weights' }) with tf.variable_scope(scope, 'Conv', [inputs], reuse=reuse, custom_getter=layer_variable_getter) as sc: inputs = tf.convert_to_tensor(inputs) input_rank = inputs.get_shape().ndims # ***Modified section*** if input_rank == 3: layer_class = convolutional_layers.Convolution1D if do_spec_norm: raise NotImplementedError( 'only supports 2d conv for spectral norm.') elif input_rank == 4: layer_class = convolutional_layers.Convolution2D if do_spec_norm: layer_class = SpecNormConv2d elif input_rank == 5: layer_class = convolutional_layers.Convolution3D if do_spec_norm: raise NotImplementedError( 'only supports 2d conv for spectral norm.') else: raise ValueError('Convolution not supported for input with rank', input_rank) # ***Modified section ends*** df = ('channels_first' if data_format and data_format.startswith('NC') else 'channels_last') layer = layer_class(filters=num_outputs, kernel_size=kernel_size, strides=stride, padding=padding, data_format=df, dilation_rate=rate, activation=None, use_bias=not normalizer_fn and biases_initializer, kernel_initializer=weights_initializer, bias_initializer=biases_initializer, kernel_regularizer=weights_regularizer, bias_regularizer=biases_regularizer, activity_regularizer=None, trainable=trainable, name=sc.name, dtype=inputs.dtype.base_dtype, _scope=sc, _reuse=reuse) outputs = layer.apply(inputs) # Add variables to collections. layers._add_variable_to_collections(layer.kernel, variables_collections, 'weights') if layer.use_bias: layers._add_variable_to_collections(layer.bias, variables_collections, 'biases') if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) if activation_fn is not None: outputs = activation_fn(outputs) return layer_utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def get_bn(zero_init=False): if zero_init: return lambda x, name: BatchNorm( 'bn', x, gamma_init=tf.zeros_initializer()) else: return lambda x, name: BatchNorm('bn', x)
def build_model(self, height, width, states_per_action): with tf.device('/gpu:0'): state = tf.placeholder('float32', shape=(None, height, width, states_per_action), name='states') self.layers['state'] = state # First convolutional layer with tf.variable_scope('conv1'): conv1 = tf.contrib.layers.convolution2d( inputs=state, num_outputs=16, kernel_size=[8, 8], stride=[4, 4], padding="VALID", activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers. xavier_initializer_conv2d(), biases_initializer=tf.zeros_initializer()) self.layers['conv1'] = conv1 # Second convolutional layer with tf.variable_scope('conv2'): conv2 = tf.contrib.layers.convolution2d( inputs=conv1, num_outputs=32, kernel_size=[4, 4], stride=[2, 2], padding="VALID", activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers. xavier_initializer_conv2d(), biases_initializer=tf.zeros_initializer()) self.layers['conv2'] = conv2 # Flatten the network with tf.variable_scope('flatten'): flatten = tf.contrib.layers.flatten(inputs=conv2) self.layers['flatten'] = flatten # Fully connected layer with 256 hidden units with tf.variable_scope('fc1'): fc1 = tf.contrib.layers.fully_connected( inputs=flatten, num_outputs=256, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer()) self.layers['fc1'] = fc1 # The policy output with tf.variable_scope('policy'): policy = tf.contrib.layers.fully_connected( inputs=fc1, num_outputs=self.action_size, activation_fn=tf.nn.softmax, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=None) self.layers['policy'] = policy # The value output with tf.variable_scope('value'): value = tf.contrib.layers.fully_connected( inputs=fc1, num_outputs=1, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=None) self.layers['value'] = value return state, policy, value
# Section One: Define the layers of the neural network itself # Input Layer with tf.variable_scope('input'): X = tf.placeholder(tf.float32, shape=(None, number_of_inputs)) # Layer 1 with tf.variable_scope('layer_1'): weights = tf.get_variable( name="weights1", shape=[number_of_inputs, layer_1_nodes], initializer=tf.contrib.layers.xavier_initializer()) biases = tf.get_variable(name="biases1", shape=[layer_1_nodes], initializer=tf.zeros_initializer()) layer_1_output = tf.nn.relu(tf.matmul(X, weights) + biases) # Layer 2 with tf.variable_scope('layer_2'): weights = tf.get_variable( name="weights2", shape=[layer_1_nodes, layer_2_nodes], initializer=tf.contrib.layers.xavier_initializer()) biases = tf.get_variable(name="biases2", shape=[layer_2_nodes], initializer=tf.zeros_initializer()) layer_2_output = tf.nn.relu(tf.matmul(layer_1_output, weights) + biases) # Layer 3 with tf.variable_scope('layer_3'):
def cifarnet_bn(images, num_classes=10, is_training=False, dropout_keep_prob=0.5, prediction_fn=slim.softmax, scope='CifarNet'): end_points = {} with tf.variable_scope(scope, 'CifarNet', [images]): # Stem Network # Input: 3x28x28 # Output: 96x28x28 net = slim.conv2d(images, 32, [3, 3], padding='SAME', scope='stem_conv1') net = slim.conv2d(net, 96, [3, 3], padding='SAME', scope='stem_conv2') # Inception Module 1 # Input: 96x28x28 # Output: 128x28x28 net = inception_module(net, [32, 96, 16, 64, 16, 16]) end_points['inception1'] = net # Inception Module 2 # Input: 128x28x28 # Output: 240x28x28 net = inception_module(net, [64, 128, 32, 96, 48, 32]) end_points['inception2'] = net # Maxpool # Input: 240x28x28 # Output: 240x14x14 net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME', scope='maxpool') end_points['maxpool'] = net # Inception Module 3 # Input: 240x14x14 # Output: 256x14x14 net = inception_module(net, [96, 96, 16, 104, 24, 32]) end_points['inception3'] = net # Avgpool # Input: 256x14x14 # Output: 256x4x4 net = slim.avg_pool2d(net, [5, 5], stride=3, padding='SAME', scope='avgpool') end_points['avgpool'] = net # Flatten: # Input: 256x4x4 # Output: 4096 net = slim.flatten(net) end_points['flatten'] = net net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout1') # FC # Input: 4096 # Output: 10 logits = slim.fully_connected( net, num_classes, biases_initializer=tf.zeros_initializer(), # weights_initializer=trunc_normal(1/50.0), weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=None, activation_fn=None, scope='logits') end_points['Logits'] = logits end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def __init__(self, env_spec, name='CategoricalGRUPolicy', hidden_dim=32, hidden_nonlinearity=tf.nn.tanh, hidden_w_init=tf.initializers.glorot_uniform(), hidden_b_init=tf.zeros_initializer(), recurrent_nonlinearity=tf.nn.sigmoid, recurrent_w_init=tf.initializers.glorot_uniform(), output_nonlinearity=None, output_w_init=tf.initializers.glorot_uniform(), output_b_init=tf.zeros_initializer(), hidden_state_init=tf.zeros_initializer(), hidden_state_init_trainable=False, state_include_action=True, layer_normalization=False): if not isinstance(env_spec.action_space, akro.Discrete): raise ValueError('CategoricalGRUPolicy only works' 'with akro.Discrete action space.') super().__init__(name, env_spec) self._obs_dim = env_spec.observation_space.flat_dim self._action_dim = env_spec.action_space.n self._hidden_dim = hidden_dim self._hidden_nonlinearity = hidden_nonlinearity self._hidden_w_init = hidden_w_init self._hidden_b_init = hidden_b_init self._recurrent_nonlinearity = recurrent_nonlinearity self._recurrent_w_init = recurrent_w_init self._output_nonlinearity = output_nonlinearity self._output_w_init = output_w_init self._output_b_init = output_b_init self._hidden_state_init = hidden_state_init self._hidden_state_init_trainable = hidden_state_init_trainable self._layer_normalization = layer_normalization self._state_include_action = state_include_action if state_include_action: self._input_dim = self._obs_dim + self._action_dim else: self._input_dim = self._obs_dim self._f_step_prob = None self.model = CategoricalGRUModel( output_dim=self._action_dim, hidden_dim=self._hidden_dim, name='prob_network', hidden_nonlinearity=hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, recurrent_nonlinearity=recurrent_nonlinearity, recurrent_w_init=recurrent_w_init, hidden_state_init=hidden_state_init, hidden_state_init_trainable=hidden_state_init_trainable, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, layer_normalization=layer_normalization) self._prev_actions = None self._prev_hiddens = None self._dist = None self._init_hidden = None self._initialize()
def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0): mean = fc(latent_vector, 'pi', self.size, init_scale=init_scale, init_bias=init_bias) logstd = tf.get_variable(name='logstd', shape=[1, self.size], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) return self.pdfromflat(pdparam), mean
mnist = input_data.read_data_sets('data/mnist', one_hot=True) X_batch, Y_batch = mnist.train.next_batch(batch_size) # Step2: 建立feature(X)和labels(Y)的placehoder # MNIST data中每張圖片的大小 28*28 = 784 X = tf.placeholder(tf.float32, [batch_size, 784], name='image') # 每張圖片要對應的 1~10 classes,對應的數字 0~9 Y = tf.placeholder(tf.int32, [batch_size, 10], name='label') # Step3: 建立weights和bias # w 是隨機初始化 mean of 0, stddev of 0.01 # w 的shape 是要mapping X (784)-> Y (10) w = tf.get_variable(name='weight', shape=(784, 10), initializer=tf.random_normal_initializer()) # b 初始化 = 0 # b 的shape 跟Y的shape相同 b = tf.get_variable(name='bias', shape=(1, 10), initializer=tf.zeros_initializer()) # Step4: 建立model logits = tf.matmul(X, w) + b # Step5: 定義loss function entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y, name='loss') # 計算整個batch的平均loss loss = tf.reduce_mean(entropy) # loss = tf.reduce_mean(-tf.reduce_sum(tf.nn.softmax(logits) * tf.log(Y), reduction_indices=[1])) # Step6: 定義training op optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
def initialize_parameters(): tf.set_random_seed(1) x1_encoder_h1 = tf.get_variable("x1_encoder_h1", [144,16], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1)) x1_encoder_b1 = tf.get_variable("x1_encoder_b1", [16], initializer = tf.zeros_initializer()) x2_encoder_h1 = tf.get_variable("x2_encoder_h1", [21,16], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1)) x2_encoder_b1 = tf.get_variable("x2_encoder_b1", [16], initializer = tf.zeros_initializer()) x1_encoder_h2 = tf.get_variable("x1_encoder_h2", [16,32], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1)) x1_encoder_b2 = tf.get_variable("x1_encoder_b2", [32], initializer = tf.zeros_initializer()) x2_encoder_h2 = tf.get_variable("x2_encoder_h2", [16,32], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1)) x2_encoder_b2 = tf.get_variable("x2_encoder_b2", [32], initializer = tf.zeros_initializer()) x1_encoder_h3 = tf.get_variable("x1_encoder_h3", [32,64], initializer =tf.contrib.layers.variance_scaling_initializer(seed = 1)) x1_encoder_b3 = tf.get_variable("x1_encoder_b3", [64], initializer = tf.zeros_initializer()) x2_encoder_h3 = tf.get_variable("x2_encoder_h3", [32,64], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1)) x2_encoder_b3 = tf.get_variable("x2_encoder_b3", [64], initializer = tf.zeros_initializer()) x1_encoder_h4 = tf.get_variable("x1_encoder_h4", [64,128], initializer =tf.contrib.layers.variance_scaling_initializer(seed = 1)) x1_encoder_b4 = tf.get_variable("x1_encoder_b4", [128], initializer = tf.zeros_initializer()) x2_encoder_h4 = tf.get_variable("x2_encoder_h4", [64,128], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1)) x2_encoder_b4 = tf.get_variable("x2_encoder_b4", [128], initializer = tf.zeros_initializer()) joint_encoder_h1 = tf.get_variable("joint_encoder_h1", [256,128], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1)) joint_encoder_b1 = tf.get_variable("joint_encoder_b1", [128], initializer = tf.zeros_initializer()) joint_encoder_h2 = tf.get_variable("joint_encoder_h2", [128,64], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1)) joint_encoder_b2 = tf.get_variable("joint_encoder_b2", [64], initializer = tf.zeros_initializer()) joint_encoder_h3 = tf.get_variable("joint_encoder_h3", [64,15], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1)) joint_encoder_b3 = tf.get_variable("joint_encoder_b3", [15], initializer = tf.zeros_initializer()) x1_decoder_h1 = tf.get_variable("x1_decoder_h1", [128,64], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1)) x1_decoder_b1 = tf.get_variable("x1_decoder_b1", [64], initializer = tf.zeros_initializer()) x2_decoder_h1 = tf.get_variable("x2_decoder_h1", [128,64], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1)) x2_decoder_b1 = tf.get_variable("x2_decoder_b1", [64], initializer = tf.zeros_initializer()) x1_decoder_h2 = tf.get_variable("x1_decoder_h2", [64,32], initializer =tf.contrib.layers.variance_scaling_initializer(seed = 1)) x1_decoder_b2 = tf.get_variable("x1_decoder_b2", [32], initializer = tf.zeros_initializer()) x2_decoder_h2 = tf.get_variable("x2_decoder_h2", [64,32], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1)) x2_decoder_b2 = tf.get_variable("x2_decoder_b2", [32], initializer = tf.zeros_initializer()) x1_decoder_h3 = tf.get_variable("x1_decoder_h3", [32,16], initializer =tf.contrib.layers.variance_scaling_initializer(seed = 1)) x1_decoder_b3 = tf.get_variable("x1_decoder_b3", [16], initializer = tf.zeros_initializer()) x2_decoder_h3 = tf.get_variable("x2_decoder_h3", [32,16], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1)) x2_decoder_b3 = tf.get_variable("x2_decoder_b3", [16], initializer = tf.zeros_initializer()) x1_decoder_h4 = tf.get_variable("x1_decoder_h4", [16,144], initializer =tf.contrib.layers.variance_scaling_initializer(seed = 1)) x1_decoder_b4 = tf.get_variable("x1_decoder_b4", [144], initializer = tf.zeros_initializer()) x2_decoder_h4 = tf.get_variable("x2_decoder_h4", [16,21], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1)) x2_decoder_b4 = tf.get_variable("x2_decoder_b4", [21], initializer = tf.zeros_initializer()) parameters = {"x1_encoder_h1": x1_encoder_h1, "x1_encoder_b1": x1_encoder_b1, "x2_encoder_h1": x2_encoder_h1, "x2_encoder_b1": x2_encoder_b1, "x1_encoder_h2": x1_encoder_h2, "x1_encoder_b2": x1_encoder_b2, "x2_encoder_h2": x2_encoder_h2, "x2_encoder_b2": x2_encoder_b2, "x1_encoder_h3": x1_encoder_h3, "x1_encoder_b3": x1_encoder_b3, "x2_encoder_h3": x2_encoder_h3, "x2_encoder_b3": x2_encoder_b3, "x1_encoder_h4": x1_encoder_h4, "x1_encoder_b4": x1_encoder_b4, "x2_encoder_h4": x2_encoder_h4, "x2_encoder_b4": x2_encoder_b4, "joint_encoder_h1": joint_encoder_h1, "joint_encoder_b1": joint_encoder_b1, "joint_encoder_h2": joint_encoder_h2, "joint_encoder_b2": joint_encoder_b2, "joint_encoder_h3": joint_encoder_h3, "joint_encoder_b3": joint_encoder_b3, "x1_decoder_h1": x1_decoder_h1, "x1_decoder_b1": x1_decoder_b1, "x2_decoder_h1": x2_decoder_h1, "x2_decoder_b1": x2_decoder_b1, "x1_decoder_h2": x1_decoder_h2, "x1_decoder_b2": x1_decoder_b2, "x2_decoder_h2": x2_decoder_h2, "x2_decoder_b2": x2_decoder_b2, "x1_decoder_h3": x1_decoder_h3, "x1_decoder_b3": x1_decoder_b3, "x2_decoder_h3": x2_decoder_h3, "x2_decoder_b3": x2_decoder_b3, "x1_decoder_h4": x1_decoder_h4, "x1_decoder_b4": x1_decoder_b4, "x2_decoder_h4": x2_decoder_h4, "x2_decoder_b4": x2_decoder_b4} return parameters
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, extras): """Creates a classification model.""" model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, extras=extras) output_layer = model.get_sequence_output() from_seq_length = output_layer.shape[1].value hidden_size = output_layer.shape[2].value # B 10 F 768 output_layer = tf.stack([output_layer] * FLAGS.max_num_relations, axis=1) # B 10 F 1 e1_mas = tf.reshape(extras.e1_mas, [-1, FLAGS.max_num_relations, from_seq_length, 1]) # B 10 F 768 e1 = tf.multiply(output_layer, tf.to_float(e1_mas)) # B 10 768 e1 = tf.reduce_sum(e1, axis=-2) / tf.maximum(1.0, tf.reduce_sum(tf.to_float(e1_mas), axis=-2)) # B*10 768 e1 = tf.reshape(e1, [-1, hidden_size]) # B 10 F 1 e2_mas = tf.reshape(extras.e2_mas, [-1, FLAGS.max_num_relations, from_seq_length, 1]) # B 10 F 768 e2 = tf.multiply(output_layer, tf.to_float(e2_mas)) # B 10 768 e2 = tf.reduce_sum(e2, axis=-2) / tf.maximum(1.0, tf.reduce_sum(tf.to_float(e2_mas), axis=-2)) # B*10 768 e2 = tf.reshape(e2, [-1, hidden_size]) # B*10 768*2 output_layer = tf.concat([e1, e2], axis=-1) output_weights = tf.get_variable( "cls/entity/output_weights", [num_labels, hidden_size*2], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "cls/entity/output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) # B*10 num_label logits = tf.matmul(output_layer, output_weights, transpose_b=True) # B*10 num_label logits = tf.nn.bias_add(logits, output_bias) # B*10 num_label probabilities = tf.nn.softmax(logits, axis=-1) # B*10 num_label log_probs = tf.nn.log_softmax(logits, axis=-1) # B*10 labels = tf.reshape(labels, [-1]) # B*10 num_label one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) # B*10 per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) # B*10 cls_mask = tf.reshape(tf.to_float(extras.cls_mask), [-1]) # B*10 per_example_loss = per_example_loss * cls_mask loss = tf.reduce_sum(per_example_loss) / tf.reduce_sum(cls_mask) return (loss, per_example_loss, logits, probabilities)
def create_bias_initializer(unused_bias_shape, dtype=tf.float32): """Returns a default initializer for the biases of a linear/AddBias module.""" return tf.zeros_initializer(dtype=dtype)
def batch_norm( x, center=True, scale=True, training=True, trainable=True, epsilon=1e-6, gamma_initializer=tf.ones_initializer(), beta_initializer=tf.zeros_initializer(), ): """Batch Norm function that is compatible with pipelining. The normal batch norm function does not work correctly with pipelining as it relies on assign ops in the forward pass to update the moving averages which are not allowed. This function instead represents the moving averages as trainable variables but with a custom gradient that defines its gradient as the moving average update step. This means they can be correctly accumulated over the pipeline micro-batches. To ensure the moving average updates are correctly applied the Optimizer class must be augmented with the 'add_bn_moving_average_updates' function. Args: x: A Tensor with at least 2 dimensions in NHWC format. All shape dimensions must be fully defined. center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. epsilon: Small float added to variance to avoid dividing by zero. training: Whether this is operation is being used in a training network. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). gamma_initializer: Optional initializer for gamma. beta_initializer: Optional initializer for beta. Returns: A `Tensor` representing the output of the operation. """ with tf.variable_scope('batch_norm'): num_channels = x.get_shape().as_list()[3] if center: beta = tf.get_variable('beta', shape=(num_channels), dtype=x.dtype, initializer=beta_initializer, trainable=trainable) else: beta = tf.constant(0.0, shape=(num_channels), dtype=x.dtype) if scale: gamma = tf.get_variable('gamma', shape=(num_channels), dtype=x.dtype, initializer=gamma_initializer, trainable=trainable) else: gamma = tf.constant(1.0, shape=(num_channels), dtype=x.dtype) moving_mean = tf.get_variable('moving_mean', shape=(num_channels), dtype=x.dtype, initializer=tf.zeros_initializer(), trainable=trainable) moving_variance = tf.get_variable('moving_variance', shape=(num_channels), dtype=x.dtype, initializer=tf.ones_initializer(), trainable=trainable) if training: x, mean, variance = tf.nn.fused_batch_norm(x, gamma, beta, epsilon=epsilon, data_format='NHWC') else: x, mean, variance = tf.nn.fused_batch_norm( x, gamma, beta, mean=moving_mean, variance=moving_variance, epsilon=epsilon, is_training=False, data_format='NHWC') @tf.custom_gradient def moving_avg_updates(X, moving_m, moving_v): def bw(dx): return dx, moving_m - mean, moving_v - variance return X, bw x = moving_avg_updates(x, moving_mean, moving_variance) return x
# Model architecture parameters n_stocks = 500 n_neurons_1 = 1024 n_neurons_2 = 512 n_neurons_3 = 256 n_neurons_4 = 128 n_target = 1 # Placeholder X = tf.placeholder(dtype=tf.float32, shape=[None, n_stocks]) Y = tf.placeholder(dtype=tf.float32, shape=[None]) # Initializers sigma = 1 weight_initializer = tf.variance_scaling_initializer(mode="fan_avg", distribution="uniform", scale=sigma) bias_initializer = tf.zeros_initializer() # In[92]: # Layer 1: Variables for hidden weights and biases W_hidden_1 = tf.Variable(weight_initializer([n_stocks, n_neurons_1])) bias_hidden_1 = tf.Variable(bias_initializer([n_neurons_1])) # Layer 2: Variables for hidden weights and biases W_hidden_2 = tf.Variable(weight_initializer([n_neurons_1, n_neurons_2])) bias_hidden_2 = tf.Variable(bias_initializer([n_neurons_2])) # Layer 3: Variables for hidden weights and biases W_hidden_3 = tf.Variable(weight_initializer([n_neurons_2, n_neurons_3]))
def _build_graph(self): config = self.config x_size = config.dim_input_ctrl h_size = config.dim_hidden_ctrl a_size = config.dim_output_ctrl lr = self.lr_plh with self.graph.as_default(): model_name = config.controller_model_name initializer = tf.contrib.layers.xavier_initializer(uniform=True) if model_name == '2layer': hidden = slim.fully_connected(self.state_plh, h_size, weights_initializer=initializer, activation_fn=tf.nn.leaky_relu) self.logits = slim.fully_connected(hidden, a_size, weights_initializer=initializer, activation_fn=None) self.output = tf.nn.softmax(self.logits) elif model_name == '2layer_logits_clipping': hidden = slim.fully_connected(self.state_plh, h_size, weights_initializer=initializer, activation_fn=tf.nn.leaky_relu) self.logits = slim.fully_connected(hidden, a_size, weights_initializer=initializer, activation_fn=None) self.output = tf.nn.softmax(self.logits / config.logit_clipping_c) elif model_name == 'linear': self.logits = slim.fully_connected(self.state_plh, a_size, weights_initializer=initializer, activation_fn=None) self.output = tf.nn.softmax(self.logits) elif model_name == 'linear_logits_clipping': #self.logits = slim.fully_connected(self.state_plh, a_size, # weights_initializer=initializer, # activation_fn=None) # ----Old version---- w = tf.get_variable('w', shape=[x_size, a_size], dtype=tf.float32, initializer=initializer) b = tf.get_variable('b', shape=[a_size], dtype=tf.float32, initializer=tf.zeros_initializer()) self.logits = tf.matmul(self.state_plh, w) + b self.output = tf.nn.softmax(self.logits / config.logit_clipping_c) else: raise Exception('Invalid controller_model_name') self.chosen_action = tf.argmax(self.output, 1) self.action = tf.cast(tf.argmax(self.action_plh, 1), tf.int32) self.indexes = tf.range(0, tf.shape(self.output)[0])\ * tf.shape(self.output)[1] + self.action self.responsible_outputs = tf.gather(tf.reshape(self.output, [-1]), self.indexes) self.loss = -tf.reduce_mean(tf.log(self.responsible_outputs) * self.reward_plh) # ----Restore gradients and update them after several iterals.---- optimizer = tf.train.AdamOptimizer(learning_rate=lr) self.tvars = tf.trainable_variables() tvars = self.tvars self.gradient_plhs = [] for idx, var in enumerate(tvars): placeholder = tf.placeholder(tf.float32, name=str(idx) + '_plh') self.gradient_plhs.append(placeholder) gvs = optimizer.compute_gradients(self.loss, tvars) self.grads = [grad for grad, _ in gvs] self.train_op = optimizer.apply_gradients(zip(self.gradient_plhs, tvars)) #self.train_op = optimizer.apply_gradients(gvs) self.init = tf.global_variables_initializer() self.saver = tf.train.Saver()
def XinNingNetwork1(input, is_training, weight_decay, batch_norm_params, num_labels, depth_multi, min_depth=8): print("labels; ", num_labels) time.sleep(3) def depth(d): return max(int(d * depth_multi), min_depth) with tf.variable_scope('pfld_inference1'): features = {} # normalizer_fn=slim.batch_norm, with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu6, weights_initializer=tf.truncated_normal_initializer( stddev=0.01), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, padding='SAME', # trainable=is_training ): print('PFLD input shape({}): {}'.format(input.name, input.get_shape())) # 112*112*3(1) / conv3*3 / c:16,n:1,s:2 conv1_1 = conv2d(input, stride=2, channel=16, kernel=3, depth=depth, scope='conv1_1') print(conv1_1.name, conv1_1.get_shape()) # 56*56*16 / conv3*3 / c:32,n:1,s:2 conv1_2 = conv2d(conv1_1, stride=2, channel=32, kernel=3, depth=depth, scope='conv1_2') print(conv1_2.name, conv1_2.get_shape()) # 28*28*32 / pool2*2 / c:32,n:1,s:2 pool1_2 = slim.max_pool2d(conv1_2, kernel_size=[2, 2], stride=2, scope='pool1_2', padding='SAME') print(pool1_2.name, pool1_2.get_shape()) # 14*14*32 / conv3*3 / c:64,n:1,s:2 conv1_2_1 = conv2d(pool1_2, stride=2, channel=64, kernel=3, depth=depth, scope='conv1_2.1') print(conv1_2_1.name, conv1_2_1.get_shape()) # 7*7*64 / global_pool / c:64,n:1 pool1_2_1 = slim.avg_pool2d(conv1_2_1, [7, 7], stride=[7, 7], scope='pool1_2.1', padding='SAME') print(pool1_2_1.name, pool1_2_1.get_shape()) # 14*14*32 / conv3*3 / c:64,n:1,s:2 conv1_3 = conv2d(pool1_2, stride=2, channel=64, kernel=3, depth=depth, scope='conv1_3') print(conv1_3.name, conv1_3.get_shape()) # 7*7*64 / pool2*2 / c:64,n:1,s:2 pool1_3 = slim.max_pool2d(conv1_3, kernel_size=[2, 2], stride=2, scope='pool1_3', padding='SAME') print(pool1_3.name, pool1_3.get_shape()) # 4*4*64 / conv3*3 / c:64,n:1,s:2 conv1_3_1 = conv2d(pool1_3, stride=2, channel=64, kernel=3, depth=depth, scope='conv1_3.1') print(conv1_3_1.name, conv1_3_1.get_shape()) # 2*2*64 / global_pool / c:64,n:1 pool1_3_1 = slim.avg_pool2d(conv1_3_1, [2, 2], stride=[2, 2], scope='pool1_3.1', padding='SAME') print(pool1_3_1.name, pool1_3_1.get_shape()) # 4*4*64 / conv3*3 / c:64,n:1,s:2 conv1_4 = conv2d(pool1_3, stride=2, channel=64, kernel=3, depth=depth, scope='conv1_4') print(conv1_4.name, conv1_4.get_shape()) # 2*2*64 / global_pool / c:64,n:1 pool1_4_1 = slim.avg_pool2d(conv1_4, [2, 2], stride=[2, 2], scope='pool1_4.1', padding='SAME') print(pool1_4_1.name, pool1_4_1.get_shape()) # 1*1*64*3() / concat / 1*1*192 concatted_1 = tf.concat([pool1_2_1, pool1_3_1, pool1_4_1], 3) print(concatted_1.name, concatted_1.get_shape()) flattened_1 = slim.flatten(concatted_1) print(flattened_1.name, flattened_1.get_shape()) # 1*1*192 / fc / 1*136 output_1 = slim.fully_connected(flattened_1, num_outputs=num_labels * 2, scope='fc_1') print(output_1.name, output_1.get_shape()) # 1*136 / transform / 112*112*1 heatmap, _heat_values = _HeatMap(output_1, input, num_labels) print(heatmap.name, heatmap.get_shape()) print("=== finish stage 1 ===") return output_1, heatmap, _heat_values
def O_Net(inputs, label=None, bbox_target=None, landmark_target=None, training=True): with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print("ONet input shape: ", inputs.get_shape()) net = slim.conv2d(inputs, num_outputs=32, kernel_size=[3, 3], stride=1, scope='conv1') print("ONet conv1 shape: ", net.get_shape()) # in the original model, for O net all pooling using stride of 2 net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope='pool1', padding='SAME') print("ONet pool1 shape: ", net.get_shape()) net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope='conv2') print("ONet conv2 shape: ", net.get_shape()) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope='pool2') print("ONet pool2 shape: ", net.get_shape()) net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope='conv3') print("ONet conv3 shape: ", net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool3', padding='SAME') print("ONet pool3 shape: ", net.get_shape()) net = slim.conv2d(net, num_outputs=128, kernel_size=[2, 2], stride=1, scope='conv4') print("ONet conv4 shape: ", net.get_shape()) fc_flatten = slim.flatten(net) print("ONet fc input shape: ", fc_flatten.get_shape()) fc1 = slim.fully_connected(fc_flatten, num_outputs=256, scope='fc1', activation_fn=tf.nn.relu) #cls print('ONet fc shape after flattening: ', fc1.get_shape()) cls_prob = slim.fully_connected(fc1, num_outputs=2, scope='cls_fc', activation_fn=tf.nn.softmax) print('ONet cls_prob fc shape ', cls_prob.get_shape()) #bbox bbox_pred = slim.fully_connected(fc1, num_outputs=4, scope='bbox_fc', activation_fn=None) print('ONet bbox_pred fc shape ', bbox_pred.get_shape()) #landmark landmark_pred = slim.fully_connected(fc1, num_outputs=10, scope='landmark_fc', activation_fn=None) print('ONet landmark fc shape ', landmark_pred.get_shape()) if training: cls_loss = cls_ohem(cls_prob, label) bbox_loss = bbox_ohem(bbox_pred, bbox_target, label) accuracy = cal_accuracy(cls_prob, label) landmark_loss = landmark_ohem(landmark_pred, landmark_target, label) L2_loss = tf.add_n(slim.losses.get_regularization_losses()) return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy else: return cls_prob, bbox_pred, landmark_pred
def __init__(self, size, eps=1e-2, default_clip_range=np.inf, sess=None): """A normalizer that ensures that observations are approximately distributed according to a standard Normal distribution (i.e. have mean zero and variance one). Args: size (int): the size of the observation to be normalized eps (float): a small constant that avoids underflows default_clip_range (float): normalized observations are clipped to be in [-default_clip_range, default_clip_range] sess (object): the TensorFlow session to be used """ self.size = size self.eps = eps self.default_clip_range = default_clip_range self.sess = sess if sess is not None else tf.get_default_session() self.local_sum = np.zeros(self.size, np.float32) self.local_sumsq = np.zeros(self.size, np.float32) self.local_count = np.zeros(1, np.float32) self.sum_tf = tf.get_variable(initializer=tf.zeros_initializer(), shape=self.local_sum.shape, name='sum', trainable=False, dtype=tf.float32) self.sumsq_tf = tf.get_variable(initializer=tf.zeros_initializer(), shape=self.local_sumsq.shape, name='sumsq', trainable=False, dtype=tf.float32) self.count_tf = tf.get_variable(initializer=tf.ones_initializer(), shape=self.local_count.shape, name='count', trainable=False, dtype=tf.float32) self.mean = tf.get_variable(initializer=tf.zeros_initializer(), shape=(self.size, ), name='mean', trainable=False, dtype=tf.float32) self.std = tf.get_variable(initializer=tf.ones_initializer(), shape=(self.size, ), name='std', trainable=False, dtype=tf.float32) self.count_pl = tf.placeholder(name='count_pl', shape=(1, ), dtype=tf.float32) self.sum_pl = tf.placeholder(name='sum_pl', shape=(self.size, ), dtype=tf.float32) self.sumsq_pl = tf.placeholder(name='sumsq_pl', shape=(self.size, ), dtype=tf.float32) self.update_op = tf.group(self.count_tf.assign_add(self.count_pl), self.sum_tf.assign_add(self.sum_pl), self.sumsq_tf.assign_add(self.sumsq_pl)) self.recompute_op = tf.group( tf.assign(self.mean, self.sum_tf / self.count_tf), tf.assign( self.std, tf.sqrt( tf.maximum( tf.square(self.eps), self.sumsq_tf / self.count_tf - tf.square(self.sum_tf / self.count_tf)))), ) self.lock = threading.Lock()