def testFunctionalDenseTwiceReuse(self): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name='my_dense') vars1 = variables.trainable_variables() core_layers.dense(inputs, 2, name='my_dense', reuse=True) vars2 = variables.trainable_variables() self.assertEqual(vars1, vars2)
def testFunctionalDenseTwice(self): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2) vars1 = variables.trainable_variables() core_layers.dense(inputs, 2) vars2 = variables.trainable_variables() self.assertEqual(len(vars1), 2) self.assertEqual(len(vars2), 4)
def testFunctionalDenseTwice(self): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2) vars1 = _get_variable_dict_from_varstore().values() core_layers.dense(inputs, 2) vars2 = _get_variable_dict_from_varstore().values() self.assertEqual(len(vars1), 2) self.assertEqual(len(vars2), 4)
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) if isinstance(units, int): with variable_scope.variable_scope( 'logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) else: logits = [] for head_index, logits_dimension in enumerate(units): with variable_scope.variable_scope( 'logits_head_{}'.format(head_index), values=(net,)) as logits_scope: these_logits = core_layers.dense( net, units=logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(these_logits, logits_scope.name) logits.append(these_logits) return logits
def testFunctionalDenseWithCustomGetter(self): called = [0] def custom_getter(getter, *args, **kwargs): called[0] += 1 return getter(*args, **kwargs) with tf.variable_scope('test', custom_getter=custom_getter): inputs = tf.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2) self.assertEqual(called[0], 2)
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ is_training = mode == model_fn.ModeKeys.TRAIN with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and is_training: net = core_layers.dropout(net, rate=dropout, training=True) if batch_norm: # TODO(hjm): In future, if this becomes popular, we can enable # customization of the batch normalization params by accepting a # list of `BatchNormalization` instances as `batch_norm`. net = normalization.batch_normalization( net, # The default momentum 0.99 actually crashes on certain # problem, so here we use 0.999, which is the default of # tf.contrib.layers.batch_norm. momentum=0.999, training=is_training, name='batchnorm_%d' % layer_id) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def testFunctionalDenseTwiceReuseFromScope(self): with self.test_session(): with variable_scope.variable_scope('scope'): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name='my_dense') vars1 = variables.trainable_variables() with variable_scope.variable_scope('scope', reuse=True): core_layers.dense(inputs, 2, name='my_dense') vars2 = variables.trainable_variables() self.assertEqual(vars1, vars2)
def testKernelRegularizerWithReuse(self): regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3 inputs = random_ops.random_uniform((5, 3), seed=1) _ = core_layers.dense( inputs, 2, name='my_dense', kernel_regularizer=regularizer) self.assertEqual( len(ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)), 1) _ = core_layers.dense( inputs, 2, name='my_dense', kernel_regularizer=regularizer, reuse=True) self.assertEqual( len(ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)), 1)
def testFunctionalDenseInitializerFromScope(self): with self.test_session() as sess: with variable_scope.variable_scope( 'scope', initializer=init_ops.ones_initializer()): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2) sess.run(variables.global_variables_initializer()) weights = sess.run(variables.trainable_variables()) self.assertEqual(len(weights), 2) # Check that the matrix weights got initialized to ones (from scope). self.assertAllClose(weights[0], np.ones((3, 2))) # Check that the bias still got initialized to zeros. self.assertAllClose(weights[1], np.zeros((2)))
def testFunctionalDenseInitializerFromScope(self): with variable_scope.variable_scope( 'scope', initializer=init_ops.ones_initializer()), self.test_session(): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2) variables.global_variables_initializer().run() weights = _get_variable_dict_from_varstore() self.assertEqual(len(weights), 2) # Check that the matrix weights got initialized to ones (from scope). self.assertAllClose(weights['scope/dense/kernel'].read_value().eval(), np.ones((3, 2))) # Check that the bias still got initialized to zeros. self.assertAllClose(weights['scope/dense/bias'].read_value().eval(), np.zeros((2)))
def testFunctionalDenseInitializerFromScope(self): with variable_scope.variable_scope( 'scope', initializer=init_ops.ones_initializer()): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2) if context.in_graph_mode(): self.evaluate(variables.global_variables_initializer()) weights = variables.trainable_variables() self.assertEqual(len(weights), 2) # Check that the matrix weights got initialized to ones (from scope). self.assertAllClose( self.evaluate(weights[0].read_value()), np.ones((3, 2))) # Check that the bias still got initialized to zeros. self.assertAllClose(self.evaluate(weights[1].read_value()), np.zeros((2)))
def _fn(x, output_units): """Fully connected MLP parameterized via `real_nvp_template`.""" for units in hidden_layers: x = layers.dense( inputs=x, units=units, activation=activation, *args, **kwargs) x = layers.dense( inputs=x, units=(1 if shift_only else 2) * output_units, activation=None, *args, **kwargs) if shift_only: return x, None shift, log_scale = array_ops.split(x, 2, axis=-1) return shift, log_scale
def fn(a, b, c): return core_layers.dense( a, 10, use_bias=False, kernel_initializer=lambda shape, dtype, partition_info: w ) + math_ops.matmul(b, c)
def testEagerExecution(self): with context.eager_mode(): container = variable_scope.EagerVariableStore() x = constant_op.constant([[2.0]]) with container.as_default(): y = core_layers.dense( x, 1, name='my_dense', kernel_initializer=init_ops.ones_initializer()) self.assertAllEqual(y, [[2.0]]) self.assertEqual(len(container.variables()), 2) # Recreate the layer to test reuse. with container.as_default(): core_layers.dense( x, 1, name='my_dense', kernel_initializer=init_ops.ones_initializer()) self.assertEqual(len(container.variables()), 2)
def testFunctionalDense(self): inputs = tf.random_uniform((5, 3), seed=1) outputs = core_layers.dense( inputs, 2, activation=tf.nn.relu, name='my_dense') self.assertEqual( len(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)), 2) self.assertEqual(outputs.op.name, 'my_dense/Relu') self.assertEqual(outputs.get_shape().as_list(), [5, 2])
def testFunctionalDense(self): with self.test_session(): inputs = random_ops.random_uniform((5, 3), seed=1) outputs = core_layers.dense( inputs, 2, activation=nn_ops.relu, name='my_dense') self.assertEqual( len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2) self.assertEqual(outputs.op.name, 'my_dense/Relu')
def F(x): out = core_layers.dense(x, 3, use_bias=False) def Grad(out_grad, variables=None): # pylint: disable=redefined-outer-name self.assertEqual(1, len(variables)) grads = gradients.gradients(out, [x, variables[0]], grad_ys=out_grad) return grads[0], [array_ops.ones((4, 3))] return out, Grad
def F(x, use_resource=False): with variable_scope.variable_scope("f", use_resource=use_resource): out = core_layers.dense(x, 4, use_bias=False) def Grad(out_grad, variables=None): # pylint: disable=redefined-outer-name del out_grad self.assertEqual(1, len(variables)) return (array_ops.ones((3, 2)), [array_ops.ones((2, 4))]) return out, Grad
def layer_with_recompute(inputs, is_recomputing=False): kwarg_values.append(is_recomputing) out = core_layers.dense(inputs, 2) out = normalization_layers.batch_normalization(out, training=True) if is_recomputing: # Ensure that the updates are not duplicated by popping off the latest # 2 additions. update_ops = ops.get_collection_ref(ops.GraphKeys.UPDATE_OPS) update_ops.pop() update_ops.pop() return out
def rnn_logit_fn(features, mode): """Recurrent Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits. """ with variable_scope.variable_scope( 'sequence_input_layer', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): sequence_input, sequence_length = seq_fc.sequence_input_layer( features=features, feature_columns=sequence_feature_columns) summary.histogram('sequence_length', sequence_length) if context_feature_columns: context_input = feature_column_lib.input_layer( features=features, feature_columns=context_feature_columns) sequence_input = seq_fc.concatenate_context_input( context_input, sequence_input) cell = rnn_cell_fn(mode) # Ignore output state. rnn_outputs, _ = rnn.dynamic_rnn( cell=cell, inputs=sequence_input, sequence_length=sequence_length, dtype=dtypes.float32, time_major=False) last_activations = _select_last_activations(rnn_outputs, sequence_length) with variable_scope.variable_scope('logits', values=(rnn_outputs,)): logits = core_layers.dense( last_activations, units=output_units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer()) return logits
def testFunctionalDenseInScope(self): with variable_scope.variable_scope('test'): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name='my_dense') var = variables.trainable_variables()[0] self.assertEqual(var.name, 'test/my_dense/weights:0') with variable_scope.variable_scope('test1') as scope: inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name=scope) var = variables.trainable_variables()[2] self.assertEqual(var.name, 'test1/weights:0') with variable_scope.variable_scope('test2'): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2) var = variables.trainable_variables()[4] self.assertEqual(var.name, 'test2/dense/weights:0')
def testFunctionalDenseInScope(self): with self.test_session(): with variable_scope.variable_scope('test'): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name='my_dense') var_dict = _get_variable_dict_from_varstore() var_key = 'test/my_dense/kernel' self.assertEqual(var_dict[var_key].name, '%s:0' % var_key) with variable_scope.variable_scope('test1') as scope: inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name=scope) var_dict = _get_variable_dict_from_varstore() var_key = 'test1/kernel' self.assertEqual(var_dict[var_key].name, '%s:0' % var_key) with variable_scope.variable_scope('test2'): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2) var_dict = _get_variable_dict_from_varstore() var_key = 'test2/dense/kernel' self.assertEqual(var_dict[var_key].name, '%s:0' % var_key)
def MNIST_model(features, labels, mode): #input features input_layer = tf.reshape(features["x"], [-1, 28, 28, 1]) if mode == tf.estimator.ModeKeys.EVAL: input_data = tf.constant(1.0) - input_layer else: input_data = input_layer #convolution 1 conv1 = conv2d(inputs=input_data, filters=32, kernel_size=[5, 5], padding="same", activation=relu, use_bias=True) #max pooling 1 pool1 = max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) #convolution 2 conv2 = conv2d(inputs=pool1, filters=64, kernel_size=[5, 5], padding="same", activation=relu, use_bias=True) #max pooling 2 pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) #Fully connected pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64]) dense_out = dense(inputs=pool2_flat, units=hidden_size, activation=relu) dropout_out = dropout(inputs=dense_out, rate=drop_rate, training=mode == tf.estimator.ModeKeys.TRAIN) logits = dense(inputs=dropout_out, units=10) #a dictionary of prediction operators predictions = { "logits": tf.multiply(logits, tf.constant(1.0), name="logit_out"), #class prediction "classes": tf.argmax(input=logits, axis=1), #probability prediction "probabilities": tf.nn.softmax(logits, name="softmax_tensor") } #prediction mode if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) #Calculate Loss (for both TRAIN and EVAL modes) onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10) loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits) #train mode if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) #evaluation metrics (for EVAL mode) eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"]) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def predict_scores(features): candidate = dense(features['label'], 10) anchor = dense(features['anchor_label'], 10) scores = matmul(candidate, expand_dims(anchor, axis=1), transpose_b=True) return squeeze(scores)
def multi_head_attention(query, key=None, n_heads=8, causalty=True, keep_prob=0.8,reuse=None, is_training=True): """ blah blah Args: query: current value key: memory from another computation or query as same -> self attention """ def _split_concat(inputs, n_heads=n_heads, split_axis=2, concat_axis=0): return tf.concat(tf.split(inputs, n_heads, axis=split_axis), axis=concat_axis) def _scaling(inputs, embedding_size): return inputs / (embedding_size**0.5) def _dot_product_attention(): # dot product matmul = tf.matmul(scaled_q, K_, transpose_b=True) # mask option # add bias here bias = tf.get_variable('bias', [matmul.get_shape().as_list()[-1]], initializer=tf.zeros_initializer()) logits = matmul + bias if causalty: with tf.variable_scope('tril'): diagonal = tf.ones_like(logits[0,:,:]) if tf.__version__ == '1.4.0': tril_fn = tf.contrib.linalg.LinearOperatorTriL elif tf.__version__ == '1.5.0': tril_fn = tf.contrib.linalg.LinearOperatorLowerTriangular tril = tril_fn(diagonal).to_dense() # (T_q, T_k) masks = tf.tile(tf.expand_dims(tril,0), [shape_list(logits)[0],1,1]) # 직각삼각형을 쌓는다. paddings = tf.ones_like(masks) * (-2**32+1) # extreamly small value for softmax logits = tf.where(tf.equal(masks,0), paddings, logits) # get weights logits = tf.nn.softmax(logits) # 여기를 우리는 다른 걸로 바꾸면 어떨까? logits = tf.nn.dropout(logits, keep_prob) return tf.matmul(logits, V_) # checking self attention if key is None: key = query d = query.get_shape().as_list()[-1] n_units = d//n_heads Q = dense(query, d, use_bias=False) K = dense(key, d, use_bias=False) V = dense(key, d, use_bias=False) # (batch_size, n_target, embedding_size) Q_ = _split_concat(Q) K_ = _split_concat(K) V_ = _split_concat(V) # (batch_size*n_head, n_target, embedding_size/n_head) # pre scaling scaled_q = _scaling(Q_, d) # dot product attention with tf.variable_scope('dot_product_attention'): outputs = _dot_product_attention() # restore shape to beginning outputs = _split_concat(outputs, split_axis=0, concat_axis=2) # linear projection outputs = dense(outputs, d, use_bias=False, name='output_transform') # from google code return outputs
def dense(*args, **kargs): return layers_core.dense(*args, **kargs)
def g(x): # pylint: disable=function-redefined return core_layers.dense(x, self.CHANNELS // 2, use_bias=True)
def mlp(inputs, hidden_layers): hidden = inputs for hidden_size in hidden_layers: hidden = core.dense( hidden, units=hidden_size, use_bias=False, activation=nn_ops.relu) return hidden
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) if rnn_feature_columns != None: rnn_features_embedding = feature_column_lib.input_layer( features=features, feature_columns=rnn_feature_columns) rnn_features_embedding = tf.reshape( rnn_features_embedding, [-1, FLAGS.rnn_length, FLAGS.rnn_input_size]) cell = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.rnn_hidden_size) att_wrapper = tf.contrib.rnn.AttentionCellWrapper( cell=cell, attn_length=10) outputs, _ = tf.nn.dynamic_rnn(att_wrapper, rnn_features_embedding, dtype=tf.float32) outputs = tf.reshape( outputs, [-1, FLAGS.rnn_length * FLAGS.rnn_hidden_size]) net = array_ops.concat([net, outputs], 1) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def _dnn_linear_combined_model_fn(features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing. """ if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.' ) num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = (partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope(dnn_parent_scope, values=tuple( six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( 'input', partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as dnn_hidden_layer_scope: net = core_layers.dense(net, units=num_hidden_units, activation=dnn_activation_fn, kernel_initializer=init_ops. glorot_uniform_initializer(), name=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dnn_dropout, training=True) _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(net, )) as dnn_logits_scope: dnn_logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: linear_logits = feature_column_lib.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize(loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def __init__(self, in_shape, classes, lr=0.001): ''' classes: List of class names or integers corrisponding to each class being classified by the network. ie: ['left', 'straight', 'right'] or [0, 1, 2] ''' # Define classes self.num_bins = len(classes) self.classes = np.array(classes, np.float32) self.class_lookup = [c for c in classes] # Define model tf.reset_default_graph() self.x = tf.placeholder(tf.float32, shape=[ None, ] + in_shape, name="input") self.y_steering = tf.placeholder(tf.int32, shape=(None, )) self.y_throttle = tf.placeholder(tf.float32, shape=(None, )) self._training = tf.placeholder(tf.bool) self.training = tf.get_variable("training", dtype=tf.bool, initializer=True, trainable=False) self.set_training = self.training.assign(self._training) relu = tf.nn.relu sigmoid = tf.nn.sigmoid with tf.name_scope("donkey"): # input num conv stride pad conv = conv2d(self.x, 24, (5, 5), (2, 2), "same", activation=relu, kernel_initializer=xavier(), name="conv1") conv = conv2d(conv, 32, (5, 5), (2, 2), "same", activation=relu, kernel_initializer=xavier(), name="conv2") conv = conv2d(conv, 64, (5, 5), (2, 2), "same", activation=relu, kernel_initializer=xavier(), name="conv3") conv = conv2d(conv, 64, (3, 3), (2, 2), "same", activation=relu, kernel_initializer=xavier(), name="conv4") conv = conv2d(conv, 64, (3, 3), (1, 1), "same", activation=relu, kernel_initializer=xavier(), name="conv5") conv = flatten(conv) # in num conv = dense(conv, 100, activation=relu, kernel_initializer=xavier(), name="fc1") conv = dropout(conv, rate=0.1, training=self.training) conv = dense(conv, 50, activation=relu, kernel_initializer=xavier(), name="fc2") conv = dropout(conv, rate=0.1, training=self.training) # Steering self.logits = dense(conv, self.num_bins, activation=None, kernel_initializer=xavier(), name="logits") self.steering_probs = tf.nn.softmax(self.logits, name="steeringi_probs") self.steering_prediction = tf.reduce_sum( tf.multiply(self.steering_probs, self.classes), axis=1, name="steering_prediction") # Throttle self.throttle = dense(conv, 1, sigmoid, kernel_initializer=xavier(), name="throttle") # keep tensor names for easy freezing/loading later self._TENSOR_DICT = { common._IMAGE_INPUT: self.x.name, common._STEERING_PREDICTION: self.steering_prediction.name, common._STEERING_PROBS: self.steering_probs.name, common._THROTTLE_PREDICTION: self.throttle.name } with tf.name_scope("loss"): self.loss_steering = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.y_steering, logits=self.logits) self.loss_steering = tf.reduce_mean(self.loss_steering) self.loss_throttle = tf.reduce_mean( (self.throttle - self.y_throttle)**2) self.loss = 0.9 * self.loss_steering + 0.001 * self.loss_throttle tf.summary.scalar("weighted_loss", self.loss) tf.summary.scalar("steering_loss", self.loss_steering) tf.summary.scalar("throttle_loss", self.loss_throttle) optimizer = tf.train.AdamOptimizer(learning_rate=lr) self.train_step = optimizer.minimize(self.loss) self.init_vars = tf.global_variables_initializer() self.saver = tf.train.Saver()
def g(x): # pylint: disable=function-redefined return core_layers.dense(x, self.CHANNELS // 2, use_bias=True)
def fn(a, b, c): return core_layers.dense(a, 10, use_bias=False) + math_ops.matmul( b, c)
def f(x, side_input): return core_layers.dense(x, self.CHANNELS // 2, use_bias=True) + side_input[0]
def layer_with_recompute(inputs): return core_layers.dense(inputs, 2)
def f2(x): return core_layers.dense(x, self.CHANNELS // 2, activation=nn_ops.relu)
def _dnn_model_fn(features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net model_fn. Args: features: Dict of `Tensor` (depends on data passed to `train`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ optimizer = optimizers.get_optimizer_instance(optimizer, learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope('dnn', values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def MNIST_model(features, labels, mode): #input features input_layer = tf.reshape(features["x"], [-1, 28, 28, 1]) if mode == tf.estimator.ModeKeys.EVAL: #input_data = tf.constant(1.0) - input_layer input_data = input_layer else: input_data = input_layer #convolution 1 conv1 = conv2d(inputs=input_data, filters=32, kernel_size=[5, 5], padding="same", activation=relu, use_bias=True) #max pooling 1 pool1 = max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) #convolution 2 conv2 = conv2d(inputs=pool1, filters=64, kernel_size=[5, 5], padding="same", activation=relu, use_bias=True) #max pooling 2 pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) #Fully connected pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64]) dense_out = dense(inputs=pool2_flat, units=hidden_size, activation=relu) dropout_out = dropout(inputs=dense_out, rate=drop_rate, training=mode == tf.estimator.ModeKeys.TRAIN) #Generate a [28 * 28, 10] matrix as context #initialize w_a_1 = weight_variable(name="w_a_1", shape=[hidden_size, 28 * 28 * 10]) b_a_1 = bias_variable(name="b_a_1", shape=[28 * 28 * 10]) context = tf.add(tf.matmul(dropout_out, w_a_1), b_a_1) context_matrix = tf.reshape(context, [-1, 28 * 28, 10]) #dot product layer input_data_flat = tf.reshape(input_data, [-1, 28 * 28, 1]) input_data_tiled = tf.tile(input=input_data_flat, multiples=[1, 1, 10]) weighted_context = tf.multiply(input_data_tiled, context_matrix) #Generate softmax result logits = tf.reduce_sum(weighted_context, axis=[1]) #a dictionary of prediction operators predictions = { "logits": tf.multiply(logits, tf.constant(1.0), name="logit_out"), #class prediction "classes": tf.argmax(input=logits, axis=1), #probability prediction "probabilities": tf.nn.softmax(logits, name="softmax_tensor") } #prediction mode if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) #regularization l1_regularizer = tf.contrib.layers.l1_regularizer(scale=reg_scale) regularization_cost = tf.contrib.layers.apply_regularization( l1_regularizer, [context_matrix]) #Calculate Loss (for both TRAIN and EVAL modes) onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10) error_cost = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits) #total lost loss = regularization_cost + error_cost #train mode if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) #evaluation metrics (for EVAL mode) eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"]) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def _tower_func(self, gpu_id): """Calculate the total loss on a single tower running the LSTM model. """ data = self.data_splits[gpu_id] data_num = self.data_num_splits[gpu_id] data_len = self.data_len_splits[gpu_id] data_ab = self.data_ab_splits[gpu_id] y_weight = self.y_weight_splits[gpu_id] y_weight = tf.Print(y_weight, [tf.shape(y_weight)], message="y_weight shape init: ") self.batch_size = tf.shape(data)[0] self.sentence_len = tf.shape(data)[2] self.sentence_num = tf.shape(data)[1] - 1 self.y_weight_len = tf.shape(y_weight)[1] x = tf.slice(data, [0, 0, 0], [self.batch_size, self.sentence_num, self.sentence_len]) x_num = data_num - 1 x_len = tf.slice(data_len, [0, 0], [self.batch_size, self.sentence_num]) x_ab = tf.slice(data_ab, [0, 0], [self.batch_size, self.sentence_num]) y_index = tf.concat([ tf.expand_dims(tf.range(self.batch_size), axis=1), tf.expand_dims(x_num, axis=-1) ], axis=1) y_data = tf.gather_nd(data, y_index) y_in = tf.slice(y_data, [0, 0], [self.batch_size, self.sentence_len - 1]) y_out = tf.slice(y_data, [0, 1], [self.batch_size, self.sentence_len - 1]) y_weight = tf.slice(y_weight, [0, 1], [self.batch_size, self.y_weight_len - 1]) y_len = tf.gather_nd(data_len - 1, y_index) # Look up embedding, emp_inp: [max_time, batch_size, num_units] with tf.variable_scope("embedding"): self.embedding = tf.get_variable( "embedding", [self.vocab_size, model_config.embed_size]) # Encoder with tf.variable_scope("encoder"): # Build sentence cell if self.mode == tf.contrib.learn.ModeKeys.TRAIN: raw_sentence_cell = build_rnn_cell( model_config.sentence_cell_layer_size, model_config.sentence_cell_layer_num) self.sentence_cell = tf.contrib.rnn.DropoutWrapper( cell=raw_sentence_cell, state_keep_prob=model_config.sentence_cell_keep_prob, variational_recurrent=True, input_size=tf.TensorShape( model_config.sentence_cell_layer_size), dtype=tf.float32) else: self.sentence_cell = build_rnn_cell( model_config.sentence_cell_layer_size, model_config.sentence_cell_layer_num) # Buil session cell if self.mode == tf.contrib.learn.ModeKeys.TRAIN: raw_session_cell = build_rnn_cell( model_config.session_cell_layer_size, model_config.session_cell_layer_num) self.session_cell = tf.contrib.rnn.DropoutWrapper( cell=raw_session_cell, state_keep_prob=model_config.session_cell_keep_prob, variational_recurrent=True, input_size=tf.TensorShape( model_config.session_cell_layer_size), dtype=tf.float32) else: self.session_cell = build_rnn_cell( model_config.session_cell_layer_size, model_config.session_cell_layer_num) # get last hidden state from session cell session_state = self._build_encoder(x, x_num, x_len, x_ab) # select state a or b to decode intermediate_state = tf.reshape( session_state, [-1, model_config.session_cell_layer_size]) with tf.variable_scope("intent_projection"): decoder_intent_state_middle = layers_core.dense( intermediate_state, model_config.decoder_cell_layer_size * model_config.intent_num, activation=tf.nn.relu, use_bias=True, name="intent_proj_middle") decoder_intent_state = layers_core.dense( decoder_intent_state_middle, model_config.decoder_cell_layer_size * model_config.intent_num, use_bias=True, name="intent_proj") decoder_intent_state = tf.reshape( decoder_intent_state, [-1, model_config.decoder_cell_layer_size]) # Decoder with tf.variable_scope("decoder"): # Build decoder cell if self.mode == tf.contrib.learn.ModeKeys.TRAIN: raw_decoder_cell = build_rnn_cell( model_config.decoder_cell_layer_size, model_config.decoder_cell_layer_num) self.decoder_cell = tf.contrib.rnn.DropoutWrapper( cell=raw_decoder_cell, state_keep_prob=model_config.decoder_cell_keep_prob, variational_recurrent=True, input_size=tf.TensorShape( model_config.decoder_cell_layer_size), dtype=tf.float32) else: self.decoder_cell = build_rnn_cell( model_config.decoder_cell_layer_size, model_config.decoder_cell_layer_num) logits = self._build_decoder(decoder_intent_state, y_in, y_len) loss = self._compute_loss(logits, y_out, y_len, y_weight, decoder_intent_state) return loss
def fn(a, b, c): return core_layers.dense(a, 10, use_bias=False) + math_ops.matmul(b, c)
def test_q_ops_quantile_dqn(self): env = gym.make('CartPole-v0') ops.reset_default_graph() np.random.seed(42) random_seed.set_random_seed(42) env.seed(42) # Setup the policy and model global_step = training_util.get_or_create_global_step() deterministic_ph = array_ops.placeholder( dtypes.bool, [], name='deterministic') exploration_op = learning_rate_decay.exponential_decay( QTest.hparams.initial_exploration, global_step, QTest.hparams.exploration_decay_steps, QTest.hparams.exploration_decay_rate) state_distribution, state_ph = gym_ops.distribution_from_gym_space( env.observation_space, name='state_space') action_distribution, _ = gym_ops.distribution_from_gym_space( env.action_space, name='action_space') # Setup the dataset stream = streams.Uniform.from_distributions( state_distribution, action_distribution) with variable_scope.variable_scope('logits'): action_value_op = mlp(state_ph, QTest.hparams.hidden_layers) action_value_op = core.dense( action_value_op, stream.action_value_shape[-1] * QTest.hparams.num_quantiles, use_bias=False) action_value_op_shape = array_ops.shape(action_value_op) action_value_shape = [ action_value_op_shape[0], action_value_op_shape[1], stream.action_value_shape[-1], QTest.hparams.num_quantiles] action_value_op = gen_array_ops.reshape(action_value_op, action_value_shape) mean_action_value_op = math_ops.reduce_mean(action_value_op, axis=-1) action_op = math_ops.argmax(mean_action_value_op, axis=-1) action_op = array_ops.squeeze(action_op) policy_variables = variables.trainable_variables(scope='logits') next_state_ph = shortcuts.placeholder_like(state_ph, name='next_state_space') with variable_scope.variable_scope('targets'): target_next_action_value_op = mlp(next_state_ph, QTest.hparams.hidden_layers) target_next_action_value_op = core.dense( target_next_action_value_op, stream.action_value_shape[-1] * QTest.hparams.num_quantiles, use_bias=False) target_next_action_value_op_shape = array_ops.shape(target_next_action_value_op) target_next_action_value_shape = [ target_next_action_value_op_shape[0], target_next_action_value_op_shape[1], stream.action_value_shape[-1], QTest.hparams.num_quantiles] target_next_action_value_op = gen_array_ops.reshape( target_next_action_value_op, target_next_action_value_shape) mean_target_next_action_value_op = math_ops.reduce_mean( target_next_action_value_op, axis=-1) assign_target_op = shortcuts.assign_scope('logits', 'target_logits') replay_dataset = dataset.ReplayDataset( stream, max_sequence_length=QTest.hparams.max_sequence_length) replay_dataset = replay_dataset.batch(QTest.hparams.batch_size) replay_op = replay_dataset.make_one_shot_iterator().get_next() action_ph = array_ops.placeholder( stream.action_dtype, [None, None] + stream.action_shape, name='action') reward_ph = array_ops.placeholder( stream.reward_dtype, [None, None] + stream.reward_shape, name='reward') terminal_ph = array_ops.placeholder( dtypes.bool, [None, None], name='terminal') sequence_length_ph = array_ops.placeholder( dtypes.int32, [None, 1], name='sequence_length') sequence_length = array_ops.squeeze(sequence_length_ph, -1) q_value_op, expected_q_value_op = q_ops.expected_q_value( array_ops.expand_dims(reward_ph, -1), action_ph, action_value_op, (target_next_action_value_op, mean_target_next_action_value_op), weights=array_ops.expand_dims( 1 - math_ops.cast(terminal_ph, reward_ph.dtype), -1), discount=QTest.hparams.discount) u = expected_q_value_op - q_value_op loss_op = losses_impl.huber_loss(u, delta=QTest.hparams.huber_loss_delta) tau_op = (2. * math_ops.range( 0, QTest.hparams.num_quantiles, dtype=u.dtype) + 1) / ( 2. * QTest.hparams.num_quantiles) loss_op *= math_ops.abs(tau_op - math_ops.cast(u < 0, tau_op.dtype)) loss_op = math_ops.reduce_mean(loss_op, axis=-1) loss_op = math_ops.reduce_mean( math_ops.reduce_sum(loss_op, axis=-1) / math_ops.cast( sequence_length, loss_op.dtype)) optimizer = adam.AdamOptimizer( learning_rate=QTest.hparams.learning_rate) train_op = optimizer.minimize(loss_op, var_list=policy_variables) train_op = control_flow_ops.cond( gen_math_ops.equal( gen_math_ops.mod( ops.convert_to_tensor( QTest.hparams.assign_target_steps, dtype=dtypes.int64), (global_step + 1)), 0), lambda: control_flow_ops.group(*[train_op, assign_target_op]), lambda: train_op) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(assign_target_op) for iteration in range(QTest.hparams.num_iterations): rewards = gym_test_utils.rollout_on_gym_env( sess, env, state_ph, deterministic_ph, mean_action_value_op, action_op, num_episodes=QTest.hparams.num_episodes, stream=stream) while True: try: replay = sess.run(replay_op) except (errors_impl.InvalidArgumentError, errors_impl.OutOfRangeError): break loss, _ = sess.run( (loss_op, train_op), feed_dict={ state_ph: replay.state, next_state_ph: replay.next_state, action_ph: replay.action, reward_ph: replay.reward, terminal_ph: replay.terminal, sequence_length_ph: replay.sequence_length, }) rewards = gym_test_utils.rollout_on_gym_env( sess, env, state_ph, deterministic_ph, mean_action_value_op, action_op, num_episodes=QTest.hparams.num_episodes, deterministic=True, save_replay=False) print('average_rewards = {}'.format(rewards / QTest.hparams.num_episodes))
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) if isinstance(units, int): with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) else: logits = [] for head_index, logits_dimension in enumerate(units): with variable_scope.variable_scope( 'logits_head_{}'.format(head_index), values=(net, )) as logits_scope: these_logits = core_layers.dense( net, units=logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer( ), name=logits_scope) _add_hidden_layer_summary(these_logits, logits_scope.name) logits.append(these_logits) return logits
def build(self, img_input): def conv(input, filters, stride, name): return conv2d(input, filters, [3, 3], strides=[stride, stride], name=name, padding='same', activation=None, use_bias=False, kernel_initializer=tf.random_normal_initializer( stddev=np.sqrt(2.0 / 9 / filters))) def add_layer(name, input, cur_layer_idx): # shape = input.get_shape().as_list() # in_channel = shape[3] with tf.variable_scope(name) as scope: c = self._batch_norm_default(input, name) c = tf.nn.relu(c) c = conv(c, self.deps[cur_layer_idx], 1, 'conv1') result = tf.concat([input, c], 3) return result def add_transition(name, input, nb_filters): # shape = input.get_shape().as_list() # in_channel = shape[3] with tf.variable_scope(name) as scope: l = self._batch_norm_default(input, name) l = tf.nn.relu(l) l = conv2d(l, nb_filters, [1, 1], strides=[1, 1], name='conv1', padding='same', activation=None, use_bias=False) l = tf.nn.relu(l) l = self._avgpool(l, 2) return l # tf.summary.image('input-image', img_input) l = conv(img_input, self.deps[0], 1, 'conv0') with tf.variable_scope('stage1') as scope: for i in range(self.N): l = add_layer('block{}'.format(i), l, self.N * 0 + 1 + i) l = add_transition('transition1', l, nb_filters=self.deps[self.N + 1]) with tf.variable_scope('stage2') as scope: for i in range(self.N): l = add_layer('block{}'.format(i), l, self.N * 1 + 2 + i) l = add_transition('transition2', l, nb_filters=self.deps[self.N * 2 + 2]) with tf.variable_scope('stage3') as scope: for i in range(self.N): l = add_layer('block{}'.format(i), l, self.N * 2 + 3 + i) l = self._batch_norm_default(l, scope='bnlast') l = tf.nn.relu(l) l = self._gap(l) l = self._flatten(l) logits = dense(l, self.num_classes, activation=None, use_bias=True, kernel_initializer=self._xavier_initializer(), name='fc10') return logits
def __init__(self, reader, scope=None): # Create local graph and use it in the session self.reader = reader self.decoding_vocab_size = reader.decoding_vocab_size self.vocab_size = reader.vocab_size self.graph = tf.Graph() self.sess = tf.Session(graph=self.graph, config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False)) self.initializer = tf.zeros_initializer() with self.graph.as_default(): # Build input placeholders and ops with tf.name_scope("input"): self.x = tf.placeholder(dtype=tf.int32, shape=[None, None, None], name='x') self.x_num = tf.placeholder(dtype=tf.int32, shape=[None], name='x_num') self.x_len = tf.placeholder(dtype=tf.int32, shape=[None, None], name='x_len') self.x_ab = tf.placeholder(dtype=tf.int32, shape=[None, None], name='x_ab') self.batch_size = tf.shape(self.x)[0] self.sentence_num = tf.shape(self.x)[1] self.sentence_len = tf.shape(self.x)[2] with tf.variable_scope("model"): # Look up embedding, emp_inp: [max_time, batch_size, num_units] with tf.variable_scope("embedding"): self.embedding = tf.get_variable( "embedding", [self.vocab_size, model_config.embed_size]) # Encoder with tf.variable_scope("encoder"): # Build sentence cell self.sentence_cell = build_rnn_cell( model_config.sentence_cell_layer_size, model_config.sentence_cell_layer_num) # Buil session cell self.session_cell = build_rnn_cell( model_config.session_cell_layer_size, model_config.session_cell_layer_num) # get last hidden state from session cell session_state = self._build_encoder( self.x, self.x_num, self.x_len, self.x_ab) # select state a or b to decode intermediate_state = tf.reshape( session_state, [-1, model_config.session_cell_layer_size]) with tf.variable_scope("intent_projection"): decoder_intent_state_middle = layers_core.dense( intermediate_state, model_config.decoder_cell_layer_size * model_config.intent_num, activation=tf.nn.relu, use_bias=True, name="intent_proj_middle") decoder_intent_state = layers_core.dense( decoder_intent_state_middle, model_config.decoder_cell_layer_size * model_config.intent_num, use_bias=True, name="intent_proj") decoder_intent_state = tf.reshape( decoder_intent_state, [-1, model_config.decoder_cell_layer_size]) # Decoder with tf.variable_scope("decoder"): # Build decoder cell self.decoder_cell = build_rnn_cell( model_config.decoder_cell_layer_size, model_config.decoder_cell_layer_num) self.sample_ids, self.scores = self._build_decoder( decoder_intent_state) with tf.name_scope("helper"): self.model_vars = tf.trainable_variables() self.model_saver = tf.train.Saver(self.model_vars) self.init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
def g(x): return core_layers.dense(x, self.CHANNELS // 2)
def f2(x): return core_layers.dense(x, self.CHANNELS // 2, activation=nn_ops.relu)
def f(x, side_input): return core_layers.dense( x, self.CHANNELS // 2, use_bias=True) + side_input[0]
def layer_with_recompute(inputs): return core_layers.dense(inputs, 2)
def g(x): return core_layers.dense(x, self.CHANNELS // 2)
def layer_with_recompute(inputs): with variable_scope.variable_scope("inner", use_resource=True): return core_layers.dense(inputs, 2)
def layer_with_recompute(inputs): with variable_scope.variable_scope("inner", use_resource=True): return core_layers.dense(inputs, 2)
def g(x): return core_layers.dense(x, self.CHANNELS // 2, use_bias=True)
def g(x): return core_layers.dense(x, self.CHANNELS // 2, use_bias=True)
def distribution_from_gym_space(space, logits=None, name='SpaceDistribution'): """Determines a parameterized `tf.distribution.Distribution` from the `gym.Space`. Arguments: space: a `gym.Space` instance (i.e. `env.action_space`) logits: optional `list` of `tf.Tensor` to be used instead of creating them. name: Python `str` name prefixed to Ops created. Raises: `TypeError` when space is not a `gym.Space` instance. Returns: Either one of the following: , `tuple` or `dict` of `DistributionWithLogits`, or just `DistributionWithLogits`. """ assert_utils.assert_true(isinstance(space, Space), '`space` must be an instance of `gym.Space`') with ops.name_scope(name): if isinstance(space, discrete.Discrete): if logits and isinstance(logits[0], ops.Tensor): logits = _dense_projection(logits[0], [space.n]) else: logits = _placeholder_factory_map[discrete.Discrete](space) distribution = categorical.Categorical( logits=math_ops.cast(logits, dtypes.float32)) return DistributionWithLogits(distribution=distribution, logits=logits) elif isinstance(space, multi_discrete.MultiDiscrete): if logits and isinstance(logits[0], ops.Tensor): logits = _dense_projection(logits[0], space.shape) else: logits = _placeholder_factory_map[ multi_discrete.MultiDiscrete](space) distribution = categorical.Categorical( logits=math_ops.cast(logits, dtypes.float32)) return DistributionWithLogits(distribution=distribution, logits=logits) elif isinstance(space, multi_binary.MultiBinary): if logits and isinstance(logits[0], ops.Tensor): logits = _dense_projection(logits[0], space.shape) else: logits = _placeholder_factory_map[multi_binary.MultiBinary]( space) distribution = bernoulli.Bernoulli(logits=logits) return DistributionWithLogits(distribution=distribution, logits=logits) elif isinstance(space, box.Box): if logits and isinstance(logits[0], ops.Tensor): logits = _dense_projection(logits[0], space.shape) else: logits = _placeholder_factory_map[box.Box](space) flat_shape = array_utils.product(space.shape) shape = array_ops.shape(logits) logits = gen_array_ops.reshape(logits, [shape[0], shape[1], flat_shape]) log_eps = math.log(distribution_utils.epsilon) alpha = core.dense(logits, flat_shape, use_bias=False) alpha = clip_ops.clip_by_value(alpha, log_eps, -log_eps) alpha = math_ops.log(math_ops.exp(alpha) + 1.0) + 1.0 alpha = gen_array_ops.reshape(alpha, shape) beta = core.dense(logits, flat_shape, use_bias=False) beta = clip_ops.clip_by_value(beta, log_eps, -log_eps) beta = math_ops.log(math_ops.exp(beta) + 1.0) + 1.0 beta = gen_array_ops.reshape(beta, shape) distribution = beta_min_max.BetaMinMax(concentration1=alpha, concentration0=beta, min_value=space.low, max_value=space.high) return DistributionWithLogits(distribution=distribution, logits=logits) elif isinstance(space, tuple_space.Tuple): if not logits: logits = [None] * len(space.spaces) return tuple( distribution_from_gym_space( val, logits=[logit], name='tuple_{}'.format(idx)) for idx, (val, logit) in enumerate(zip(space.spaces, logits))) elif isinstance(space, dict_space.Dict): if not logits: logits = [None] * len(space.spaces) return { key: distribution_from_gym_space(val, logits=[logit], name='{}'.format(key)) for (key, val), logit in zip(space.spaces.items(), logits) } raise TypeError('`space` not supported: {}'.format(type(space)))