def __init__(self, hidden_size=100, out_size=100, batch_size=300, n_node=None, lr=None, l2=None, step=1, decay=None, lr_dc=0.1, nonhybrid=False): print("4????\n\n\n") super(GGNN, self).__init__(hidden_size, out_size, batch_size, nonhybrid) self.embedding = tf.get_variable(shape=[n_node, hidden_size], name='embedding', dtype=tf.float32, initializer=tf.random_uniform_initializer(-self.stdv, self.stdv)) # 每个节点的嵌入向量 self.adj_in = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, None, None]) self.adj_out = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, None, None]) self.n_node = n_node self.L2 = l2 self.step = step self.nonhybrid = nonhybrid self.W_in = tf.get_variable('W_in', shape=[self.out_size, self.out_size], dtype=tf.float32, initializer=tf.random_uniform_initializer(-self.stdv, self.stdv)) self.b_in = tf.get_variable('b_in', [self.out_size], dtype=tf.float32, initializer=tf.random_uniform_initializer(-self.stdv, self.stdv)) self.W_out = tf.get_variable('W_out', [self.out_size, self.out_size], dtype=tf.float32, initializer=tf.random_uniform_initializer(-self.stdv, self.stdv)) self.b_out = tf.get_variable('b_out', [self.out_size], dtype=tf.float32, initializer=tf.random_uniform_initializer(-self.stdv, self.stdv)) with tf.variable_scope('ggnn_model', reuse=None): self.loss_train, _ = self.forward(self.ggnn()) with tf.variable_scope('ggnn_model', reuse=True): self.loss_test, self.score_test = self.forward(self.ggnn(), train=False) self.global_step = tf.Variable(0) self.learning_rate = tf.train.exponential_decay(lr, global_step=self.global_step, decay_steps=decay, decay_rate=lr_dc, staircase=True) # 动态衰减的学习率 self.opt = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss_train, global_step=self.global_step) # 学习到的参数 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) config = tf.ConfigProto(gpu_options=gpu_options) config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) writer = tf.summary.FileWriter('D:\\tf_dir\\tensorboard_study', self.sess.graph) self.sess.run(tf.global_variables_initializer()) writer.close()
def _fc(x, out_dim, var_list, name="fc", is_cifar=False): """ Define API for the fully connected layer. This includes both the variable declaration and matmul operation. """ in_dim = x.get_shape().as_list()[1] stdv = 1.0 / math.sqrt(in_dim) with tf.variable_scope(name): # Define the weights and biases for this layer w = tf.get_variable('weights', [in_dim, out_dim], tf.float32, initializer=tf.random_uniform_initializer( -stdv, stdv)) #initializer=tf.truncated_normal_initializer(stddev=0.1)) if is_cifar: b = tf.get_variable('biases', [out_dim], tf.float32, initializer=tf.random_uniform_initializer( -stdv, stdv)) else: b = tf.get_variable('biases', [out_dim], tf.float32, initializer=tf.constant_initializer(0)) # Append the variable to the trainable variables list var_list.append(w) var_list.append(b) # Do the FC operation output = tf.matmul(x, w) + b return output
def layer(input_layer, num_next_neurons, is_output=False): num_prev_neurons = int(input_layer.shape[1]) shape = [num_prev_neurons, num_next_neurons] if is_output: weight_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3) bias_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3) else: # 1/sqrt(f) fan_in_init = 1 / num_prev_neurons**0.5 weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) weights = tf.get_variable("weights", shape, initializer=weight_init) biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init) dot = tf.matmul(input_layer, weights) + biases if is_output: return dot relu = tf.nn.relu(dot) return relu
def __init__(self, hidden_size=100, out_size=100, batch_size=100, nonhybrid=True): self.hidden_size = hidden_size self.out_size = out_size self.batch_size = batch_size self.mask = tf.placeholder(dtype=tf.float32) self.alias = tf.placeholder(dtype=tf.int32) # 给给每个输入重新 self.item = tf.placeholder(dtype=tf.int32) # 重新编号的序列构成的矩阵 self.tar = tf.placeholder(dtype=tf.int32) self.nonhybrid = nonhybrid self.stdv = 1.0 / math.sqrt(self.hidden_size) self.nasr_w1 = tf.get_variable( 'nasr_w1', [self.out_size, self.out_size], dtype=tf.float32, initializer=tf.random_uniform_initializer(-self.stdv, self.stdv)) self.nasr_w2 = tf.get_variable( 'nasr_w2', [self.out_size, self.out_size], dtype=tf.float32, initializer=tf.random_uniform_initializer(-self.stdv, self.stdv)) self.nasr_v = tf.get_variable( 'nasrv', [1, self.out_size], dtype=tf.float32, initializer=tf.random_uniform_initializer(-self.stdv, self.stdv)) self.nasr_b = tf.get_variable('nasr_b', [self.out_size], dtype=tf.float32, initializer=tf.zeros_initializer())
def model_fn(model, features, labels, mode): print(features, labels, mode) x = features['x'] w1l = tf.get_variable('w1l', shape=[28 * 28 // 2, 128], dtype=tf.float32, initializer=tf.random_uniform_initializer( -0.01, 0.01)) b1l = tf.get_variable('b1l', shape=[128], dtype=tf.float32, initializer=tf.zeros_initializer()) w2 = tf.get_variable('w2', shape=[128 * 2, 10], dtype=tf.float32, initializer=tf.random_uniform_initializer( -0.01, 0.01)) b2 = tf.get_variable('b2', shape=[10], dtype=tf.float32, initializer=tf.zeros_initializer()) act1_l = tf.nn.relu(tf.nn.bias_add(tf.matmul(x, w1l), b1l)) if mode == tf.estimator.ModeKeys.TRAIN: act1_f = model.recv('act1_f', tf.float32, require_grad=True) elif mode == tf.estimator.ModeKeys.EVAL: act1_f = model.recv('act1_f', tf.float32, require_grad=False) else: act1_f = features['act1_f'] act1 = tf.concat([act1_l, act1_f], axis=1) logits = tf.nn.bias_add(tf.matmul(act1, w2), b2) if mode == tf.estimator.ModeKeys.PREDICT: return model.make_spec(mode=mode, predictions=logits) y = labels['y'] loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits) loss = tf.math.reduce_mean(loss) if mode == tf.estimator.ModeKeys.EVAL: classes = tf.argmax(logits, axis=1) acc_pair = tf.metrics.accuracy(y, classes) return model.make_spec( mode=mode, loss=loss, eval_metric_ops={'accuracy': acc_pair}) # mode == tf.estimator.ModeKeys.TRAIN optimizer = tf.train.GradientDescentOptimizer(0.1) train_op = model.minimize( optimizer, loss, global_step=tf.train.get_or_create_global_step()) correct = tf.nn.in_top_k(predictions=logits, targets=y, k=1) acc = tf.reduce_mean(input_tensor=tf.cast(correct, tf.float32)) logging_hook = tf.train.LoggingTensorHook( {"loss" : loss, "acc" : acc}, every_n_iter=10) return model.make_spec( mode=mode, loss=loss, train_op=train_op, training_hooks=[logging_hook])
def model_fn(model, features, labels, mode): global_step = tf.train.get_or_create_global_step() flt.feature.FeatureSlot.set_default_bias_initializer( tf.zeros_initializer()) flt.feature.FeatureSlot.set_default_vec_initializer( tf.random_uniform_initializer(-0.0078125, 0.0078125)) flt.feature.FeatureSlot.set_default_bias_optimizer( tf.train.FtrlOptimizer(learning_rate=0.01)) flt.feature.FeatureSlot.set_default_vec_optimizer( tf.train.AdagradOptimizer(learning_rate=0.01)) if args.fid_version == 1: slots = [512, 1023] else: model.set_use_fid_v2(True) slots = [512, 1023, 32767] hash_size = 101 embed_size = 16 for slot_id in slots: fs = model.add_feature_slot(slot_id, hash_size) fc = model.add_feature_column(fs) fc.add_vector(embed_size) model.freeze_slots(features) embed_output = model.get_vec() output_size = len(slots) * embed_size fc1_size = 64 w1f = tf.get_variable('w1f', shape=[output_size, fc1_size], dtype=tf.float32, initializer=tf.random_uniform_initializer( -0.01, 0.01)) b1f = tf.get_variable('b1f', shape=[fc1_size], dtype=tf.float32, initializer=tf.zeros_initializer()) act1_f = tf.nn.relu(tf.nn.bias_add(tf.matmul(embed_output, w1f), b1f)) if mode == tf.estimator.ModeKeys.TRAIN: gact1_f = model.send('act1_f', act1_f, require_grad=True) optimizer = tf.train.GradientDescentOptimizer(0.1) train_op = model.minimize(optimizer, act1_f, grad_loss=gact1_f, global_step=global_step) return model.make_spec( mode, loss=tf.math.reduce_mean(act1_f), train_op=train_op, ) elif mode == tf.estimator.ModeKeys.PREDICT: return model.make_spec(mode, predictions={'act1_f': act1_f})
def test_batchnorm_bounds(self, batchnorm_class, dtype, tol, is_training): batch_size = 11 input_size = 7 output_size = 5 lb_in = tf.random_normal(dtype=dtype, shape=(batch_size, input_size)) ub_in = tf.random_normal(dtype=dtype, shape=(batch_size, input_size)) lb_in, ub_in = tf.minimum(lb_in, ub_in), tf.maximum(lb_in, ub_in) nominal = tf.random_normal(dtype=dtype, shape=(batch_size, input_size)) # Linear layer. w = tf.random_normal(dtype=dtype, shape=(input_size, output_size)) b = tf.random_normal(dtype=dtype, shape=(output_size, )) # Batch norm layer. epsilon = 1.e-2 bn_initializers = { 'beta': tf.random_normal_initializer(), 'gamma': tf.random_uniform_initializer(.1, 3.), 'moving_mean': tf.random_normal_initializer(), 'moving_variance': tf.random_uniform_initializer(.1, 3.) } batchnorm_module = batchnorm_class(offset=True, scale=True, eps=epsilon, initializers=bn_initializers) # Connect the batchnorm module to the graph. batchnorm_module(tf.random_normal(dtype=dtype, shape=(batch_size, output_size)), is_training=is_training) bounds_in = ibp.RelativeIntervalBounds(lb_in - nominal, ub_in - nominal, nominal) bounds_out = bounds_in.apply_linear(None, w, b) bounds_out = bounds_out.apply_batch_norm( batchnorm_module, batchnorm_module.mean if is_training else batchnorm_module.moving_mean, batchnorm_module.variance if is_training else batchnorm_module.moving_variance, batchnorm_module.gamma, batchnorm_module.beta, epsilon) lb_out, ub_out = bounds_out.lower, bounds_out.upper # Separately, calculate dual objective by adjusting the linear layer. wn, bn = layer_utils.combine_with_batchnorm(w, b, batchnorm_module) bounds_out_lin = bounds_in.apply_linear(None, wn, bn) lb_out_lin, ub_out_lin = bounds_out_lin.lower, bounds_out_lin.upper init_op = tf.global_variables_initializer() with self.test_session() as session: session.run(init_op) (lb_out_val, ub_out_val, lb_out_lin_val, ub_out_lin_val) = session.run( (lb_out, ub_out, lb_out_lin, ub_out_lin)) self.assertAllClose(lb_out_val, lb_out_lin_val, atol=tol, rtol=tol) self.assertAllClose(ub_out_val, ub_out_lin_val, atol=tol, rtol=tol)
def model_fn(model, features, labels, mode): global_step = tf.train.get_or_create_global_step() x = [features['x_{0}'.format(i)] for i in range(512)] num_slot = 512 fid_size, embed_size = 101, 16 embeddings = [ tf.get_variable('slot_emb{0}'.format(i), shape=[fid_size, embed_size], dtype=tf.float32, initializer=tf.random_uniform_initializer(-0.01, 0.01)) for i in range(num_slot) ] embed_output = tf.concat([ tf.nn.embedding_lookup_sparse( embeddings[i], x[i], sp_weights=None, combiner='mean') for i in range(num_slot) ], axis=1) output_size = num_slot * embed_size fc1_size = 64 w1f = tf.get_variable('w1f', shape=[output_size, fc1_size], dtype=tf.float32, initializer=tf.random_uniform_initializer( -0.01, 0.01)) b1f = tf.get_variable('b1f', shape=[fc1_size], dtype=tf.float32, initializer=tf.zeros_initializer()) act1_f = tf.nn.relu(tf.nn.bias_add(tf.matmul(embed_output, w1f), b1f)) if mode == tf.estimator.ModeKeys.TRAIN: gact1_f = model.send('act1_f', act1_f, require_grad=True) optimizer = tf.train.GradientDescentOptimizer(0.1) train_op = model.minimize(optimizer, act1_f, grad_loss=gact1_f, global_step=global_step) return model.make_spec(mode, loss=tf.math.reduce_mean(act1_f), train_op=train_op) if mode == tf.estimator.ModeKeys.EVAL: model.send('act1_f', act1_f, require_grad=False) fake_loss = tf.reduce_mean(act1_f) return model.make_spec(mode=mode, loss=fake_loss) # mode == tf.estimator.ModeKeys.PREDICT: return model.make_spec(mode, predictions={'act1_f': act1_f})
def __init__(self, layers, out_caps, cap_sz, batch_size, drop, inp_caps=None, name=None, tau=1.0): if not name: layer = self.__class__.__name__.lower() name = layer + '_' + str(get_layer_id(layer)) self.name = name self.batch_size = batch_size self.tau = tau self.drop = drop self.cap_sz = cap_sz self.d, self.k = out_caps * cap_sz, out_caps self._cache_zero_d = tf.zeros([1, self.d]) self._cache_zero_k = tf.zeros([1, self.k]) if inp_caps is not None: self.inp_caps = inp_caps if layers == 1: with tf.variable_scope('Linear-1'): stdv = 1. / tf.sqrt(tf.cast(self.d, tf.float32)) self.w1 = tf.get_variable( shape=[inp_caps * cap_sz, cap_sz * out_caps], initializer=tf.random_uniform_initializer(minval=-stdv, maxval=stdv), name='weights') self.b1 = tf.get_variable( shape=[cap_sz * out_caps], initializer=tf.random_uniform_initializer(minval=-stdv, maxval=stdv), name='bias') if layers == 2: with tf.variable_scope('Linear-2'): stdv = 1. / tf.sqrt(tf.cast(self.d, tf.float32)) self.w2 = tf.get_variable( shape=[inp_caps * cap_sz, cap_sz * out_caps], initializer=tf.random_uniform_initializer(minval=-stdv, maxval=stdv), name='weights') self.b2 = tf.get_variable( shape=[cap_sz * out_caps], initializer=tf.random_uniform_initializer(minval=-stdv, maxval=stdv), name='bias')
def model_fn(model, features, labels, mode): x = features['x'] w1f = tf.get_variable('w1f', shape=[28 * 28 / 2, 128], dtype=tf.float32, initializer=tf.random_uniform_initializer( -0.01, 0.01)) b1f = tf.get_variable('b1f', shape=[128], dtype=tf.float32, initializer=tf.zeros_initializer()) act1_f = tf.nn.relu(tf.nn.bias_add(tf.matmul(x, w1f), b1f)) if mode == tf.estimator.ModeKeys.TRAIN: gact1_f = model.send('act1_f', act1_f, require_grad=True) optimizer = tf.train.GradientDescentOptimizer(0.1) train_op = model.minimize( optimizer, act1_f, grad_loss=gact1_f, global_step=tf.train.get_or_create_global_step()) return model.make_spec(mode, loss=tf.math.reduce_mean(act1_f), train_op=train_op) if mode == tf.estimator.ModeKeys.EVAL: model.send('act1_f', act1_f, require_grad=False) fake_loss = tf.reduce_mean(act1_f) return model.make_spec(mode=mode, loss=fake_loss) # mode == tf.estimator.ModeKeys.PREDICT: return model.make_spec(mode=mode, predictions={'act1_f': act1_f})
def cnn_predictor(input_num, inputs, actions, previous_action, scope): """ the cnn predictor to give the corresponding q value of the value function """ with tf.variable_scope(scope): asset_dim = inputs.get_shape()[1] L = inputs.get_shape()[2] # window length N = inputs.get_shape()[3] # feature # filter shape [height, width, channels, number of filters] conv1_W = tf.Variable(tf.truncated_normal([1, 3, N, 3], stddev=0.05)) # eg: [?, 10, 2, 32] layer = tf.nn.conv2d(inputs, filter=conv1_W, padding='VALID', strides=[1, 1, 1, 1]) # result: [?, 6, 1, 32] norm1 = tf.layers.batch_normalization(layer) x = tf.nn.relu(norm1) conv2_W = tf.Variable(tf.random_normal([1, L - 2, 3, 20], stddev=0.05)) conv2 = tf.nn.conv2d(x, filter=conv2_W, strides=[1, 1, 1, 1], padding='VALID') # [1, 6, 1, 20] norm2 = tf.layers.batch_normalization(conv2) x = tf.nn.relu(norm2) previous_w = tf.reshape(previous_action, [-1, int(asset_dim), 1, 1]) x = tf.concat([x, previous_w], axis=3) w = tf.reshape(actions, [-1, int(asset_dim), 1, 1]) x = tf.concat([x, w], axis=3) conv3_W = tf.Variable(tf.random_normal([1, 1, 22, 1], stddev=0.05)) conv3 = tf.nn.conv2d(x, filter=conv3_W, strides=[1, 1, 1, 1], padding='VALID') norm3 = tf.layers.batch_normalization(conv3) net = tf.nn.relu(norm3) net = tf.layers.flatten(net) out = tf.layers.dense(net, 1, kernel_initializer=tf.random_uniform_initializer(-0.003, 0.003)) return out
def build_net(self): state = tf.placeholder(tf.float32, shape=[None] + [self.M] + [self.L] + [self.N], name='market_situation') network = tflearn.layers.conv_2d(state, 2, [1, 2], [1, 1, 1, 1], 'valid', 'relu') width = network.get_shape()[2] network = tflearn.layers.conv_2d(network, 48, [1, width], [1, 1], "valid", 'relu', regularizer="L2", weight_decay=5e-9) w_previous = tf.placeholder(tf.float32, shape=[None, self.M]) network = tf.concat( [network, tf.reshape(w_previous, [-1, self.M, 1, 1])], axis=3) network = tflearn.layers.conv_2d(network, 1, [1, network.get_shape()[2]], [1, 1], "valid", 'relu', regularizer="L2", weight_decay=5e-9) network = tf.layers.flatten(network) w_init = tf.random_uniform_initializer(-0.005, 0.005) out = tf.layers.dense(network, self.M, activation=tf.nn.softmax, kernel_initializer=w_init) return state, w_previous, out
def build(self, name): x = tf.placeholder(dtype=tf.float32, shape=(None, ) + self.state_dim, name="%s_input" % name) action = tf.placeholder(tf.float32, shape=[None, self.action_dim], name="%s_action" % name) with tf.variable_scope(name): net = tf.nn.relu( dense_layer(x, 400, use_bias=True, scope="fc1", initializer=self.initializer)) net = dense_layer(tf.concat((net, action), 1), 300, use_bias=True, scope="fc2", initializer=self.initializer) net = tf.nn.relu(net) #net = tf.keras.layers.GaussianNoise(0.1, input_shape=net) # for low dim, weights are from uniform[-3e-3, 3e-3] net = dense_layer(net, 1, initializer=tf.random_uniform_initializer( -3e-3, 3e-3), scope="q", use_bias=True) return tf.squeeze(net), x, action
def _conv(x, kernel_size, out_channels, stride, var_list, pad="SAME", name="conv"): """ Define API for conv operation. This includes kernel declaration and conv operation both. """ in_channels = x.get_shape().as_list()[-1] with tf.variable_scope(name): #n = kernel_size * kernel_size * out_channels n = kernel_size * in_channels stdv = 1.0 / math.sqrt(n) w = tf.get_variable( 'kernel', [kernel_size, kernel_size, in_channels, out_channels], tf.float32, initializer=tf.random_uniform_initializer(-stdv, stdv)) #initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0/n))) # Append the variable to the trainable variables list var_list.append(w) # Do the convolution operation output = tf.nn.conv2d(x, w, [1, stride, stride, 1], padding=pad) return output
def get_variable_initializer(hparams): """Get variable initializer from hparams.""" if not hparams.initializer: return None mlperf_log.transformer_print(key=mlperf_log.MODEL_HP_INITIALIZER_GAIN, value=hparams.initializer_gain, hparams=hparams) if not tf.executing_eagerly(): tf.logging.info("Using variable initializer: %s", hparams.initializer) if hparams.initializer == "orthogonal": return tf.orthogonal_initializer(gain=hparams.initializer_gain) elif hparams.initializer == "uniform": max_val = 0.1 * hparams.initializer_gain return tf.random_uniform_initializer(-max_val, max_val) elif hparams.initializer == "normal_unit_scaling": return tf.variance_scaling_initializer( hparams.initializer_gain, mode="fan_avg", distribution="normal") elif hparams.initializer == "uniform_unit_scaling": return tf.variance_scaling_initializer( hparams.initializer_gain, mode="fan_avg", distribution="uniform") elif hparams.initializer == "xavier": return tf.initializers.glorot_uniform() else: raise ValueError("Unrecognized initializer: %s" % hparams.initializer)
def build(self, name): x = tf.placeholder(dtype=tf.float32, shape=(None, ) + self.state_dim, name="%s_input" % name) with tf.variable_scope(name): net = tf.nn.relu( dense_layer(x, 400, use_bias=True, scope="fc1", initializer=self.initializer)) #net = tf.keras.layers.GaussianNoise(0.1, input_shape=net) net = tf.nn.relu( dense_layer(net, 300, use_bias=True, scope="fc2", initializer=self.initializer)) # use tanh to normalize output between [-1, 1] net = tf.nn.tanh( dense_layer(net, self.action_dim, initializer=tf.random_uniform_initializer( -3e-3, 3e-3), scope="pi", use_bias=True)) return net, x
def __call__(self): """ """ radam_optimizer = RadamOptimizer.from_configurable(self, learning_rate=1e-1, decay_steps=500) x = tf.placeholder(tf.float32, shape=(None, 1), name='x') y = tf.placeholder(tf.float32, shape=(None, 1), name='y') def affine(a, b): return a * tf.log(x) + b a = tf.get_variable('a', shape=self.n_zipfs, dtype=tf.float32, initializer=tf.random_normal_initializer()) b = tf.get_variable('b', shape=self.n_zipfs, dtype=tf.float32, initializer=tf.random_normal_initializer()) s = tf.get_variable('s', shape=self.n_zipfs, dtype=tf.float32, initializer=tf.random_uniform_initializer(-2, -.5)) t = tf.get_variable('t', shape=self.n_zipfs, dtype=tf.float32, initializer=tf.random_normal_initializer()) w = tf.expand_dims(tf.nn.softmax(affine(a, b)), axis=1, name='w') z = tf.expand_dims(affine(s, t), axis=2, name='z') yhat = tf.squeeze(tf.matmul(w, z), axis=2, name='yhat') ell = tf.reduce_mean((tf.log(y) - yhat)**2 / 2, name='ell') ell += tf.reduce_mean((tf.reduce_max(w, axis=0) - 1)**2 / 2) minimize = radam_optimizer.minimize(ell, name='minimize') return x, y, ell, minimize
def conv2d(x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad="SAME", dtype=tf.float32, collections=None, summary_tag=None): with tf.variable_scope(name): stride_shape = [1, stride[0], stride[1], 1] filter_shape = [filter_size[0], filter_size[1], int(x.get_shape()[3]), num_filters] # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = intprod(filter_shape[:3]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = intprod(filter_shape[:2]) * num_filters # initialize weights with random weights w_bound = np.sqrt(6. / (fan_in + fan_out)) w = tf.get_variable("W", filter_shape, dtype, tf.random_uniform_initializer(-w_bound, w_bound), collections=collections) b = tf.get_variable("b", [1, 1, 1, num_filters], initializer=tf.zeros_initializer(), collections=collections) if summary_tag is not None: tf.summary.image(summary_tag, tf.transpose(tf.reshape(w, [filter_size[0], filter_size[1], -1, 1]), [2, 0, 1, 3]), max_images=10) return tf.nn.conv2d(x, w, stride_shape, pad) + b
def rnn_predictor(input_num, inputs, actions, previous_action, scope): """ the rnn predictor to give the corresponding q value of the value function """ with tf.variable_scope(scope): asset_dim = inputs.get_shape()[1] L = inputs.get_shape()[2] # window length N = inputs.get_shape()[3] # feature x=tf.reshape(inputs, shape=[-1, asset_dim, L*N]) hidden_size = 10 rnn_cells = [] for i in range(asset_dim): rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) ## create a BasicRNNCell rnn_cells.append(rnn_cell) cell = tf.nn.rnn_cell.MultiRNNCell(rnn_cells) initial_state = cell.zero_state(input_num, tf.float32) net, state = tf.nn.dynamic_rnn(cell, x, initial_state=initial_state, dtype=tf.float32) # state: [batch_size, hidden_size]; outputs: [batch_size, L, hidden_size] x = tf.reshape(net, [-1, int(asset_dim), 1, hidden_size]) previous_w = tf.reshape(previous_action, [-1, int(asset_dim), 1, 1]) x = tf.concat([x, previous_w], axis=3) w = tf.reshape(actions, [-1, int(asset_dim), 1, 1]) net = tf.concat([x, w], axis=3) net = tf.layers.flatten(net) net = tf.layers.dense(net, 64, activation=tf.nn.relu) out = tf.layers.dense(net, 1, kernel_initializer=tf.random_uniform_initializer(-0.01, 0.01)) return out
def _build_params(self): """Create TF parameters.""" initializer = tf.random_uniform_initializer(minval=-0.01, maxval=0.01) num_funcs = self.params.controller_num_functions hidden_size = self.params.controller_hidden_size with tf.variable_scope(self.name, initializer=initializer): with tf.variable_scope('lstm'): self.w_lstm = tf.get_variable( 'w', [2 * hidden_size, 4 * hidden_size]) with tf.variable_scope('embedding'): self.g_emb = tf.get_variable('g', [1, hidden_size]) self.w_emb = tf.get_variable('w', [num_funcs, hidden_size]) with tf.variable_scope('attention'): self.attn_w_1 = tf.get_variable('w_1', [hidden_size, hidden_size]) self.attn_w_2 = tf.get_variable('w_2', [hidden_size, hidden_size]) self.attn_v = tf.get_variable('v', [hidden_size, 1]) num_params = sum([ np.prod(v.shape) for v in tf.trainable_variables() if v.name.startswith(self.name) ]) print('Controller has {0} params'.format(num_params))
def init(n_inputs, n_outputs, uniform=True): if uniform: init_range = tf.sqrt(6.0 / (n_inputs + n_outputs)) return tf.random_uniform_initializer(-init_range, init_range) else: stddev = tf.sqrt(3.0 / (n_inputs + n_outputs)) return tf.truncated_normal_initializer(stddev=stddev)
def __init__(self, layer_name, filter_size, num_hidden_in, num_hidden_out, seq_shape, forget_bias=1.0, tln=False, initializer=0.001): """Initialize the Causal LSTM cell. Args: layer_name: layer names for different lstm layers. filter_size: int tuple thats the height and width of the filter. num_hidden_in: number of units for input tensor. num_hidden_out: number of units for output tensor. seq_shape: shape of a sequence. forget_bias: float, The bias added to forget gates. tln: whether to apply tensor layer normalization """ self.layer_name = layer_name self.filter_size = filter_size self.num_hidden_in = num_hidden_in self.num_hidden = num_hidden_out self.batch = seq_shape[0] self.height = seq_shape[2] self.width = seq_shape[3] self.layer_norm = tln self._forget_bias = forget_bias self.initializer = tf.random_uniform_initializer( -initializer, initializer)
def seq2seq_model(inputs, targets, keep_prob, batch_size, sequence_length, answers_num_words, questions_num_words, encoder_embedding_size, decoder_embedding_size, rnn_size, num_layers, questionswords2int): # encoder_embedded_input=tf.contrib.layers.embed_sequence(inputs, # answers_num_words+1, # encoder_embedding_size, # # intializer=tf.random_uniform_initializer(0,1)) encoder_embedded_input = tf.keras.layers.Embedding( inputs, encoder_embedding_size, embeddings_initializer=tf.random_uniform_initializer(0, 1)) encoder_state = encoder_rnn(encoder_embedded_input, rnn_size, num_layers, keep_prob, sequence_length) preprocessed_targets = preprocess_targets(targets, questionswords2int, batch_size) decoder_embedding_matrix = tf.Variable( tf.random_uniform([questions_num_words + 1, decoder_embedding_size], 0, 1)) decoder_embedded_input = tf.nn.embedding_lookup(decoder_embedding_matrix, preprocessed_targets) training_predictions, test_predictions = decoder_rnn( decoder_embedded_input, decoder_embedding_matrix, encoder_state, questions_num_words, sequence_length, rnn_size, num_layers, questionswords2int, keep_prob, batch_size) return training_predictions, test_predictions
def mkvar(self, name, shape=[], lo=-1.0, hi=1.0, trainable=None): init = tf.random_uniform_initializer(minval=lo, maxval=hi) return tf.compat.v1.get_variable(name=name, shape=shape, dtype=tf.float64, initializer=init, trainable=trainable)
def _createStackBidirectionalDynamicRNN(self, use_gpu, use_shape, use_state_tuple, initial_states_fw=None, initial_states_bw=None, scope=None): self.layers = [2, 3] input_size = 5 batch_size = 2 max_length = 8 initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed) sequence_length = tf.placeholder(tf.int64) self.cells_fw = [ rnn_cell.LSTMCell( # pylint:disable=g-complex-comprehension num_units, input_size, initializer=initializer, state_is_tuple=False) for num_units in self.layers ] self.cells_bw = [ rnn_cell.LSTMCell( # pylint:disable=g-complex-comprehension num_units, input_size, initializer=initializer, state_is_tuple=False) for num_units in self.layers ] inputs = max_length * [ tf.placeholder(tf.float32, shape=(batch_size, input_size) if use_shape else (None, input_size)) ] inputs_c = tf.stack(inputs) inputs_c = tf.transpose(inputs_c, [1, 0, 2]) outputs, st_fw, st_bw = contrib_rnn.stack_bidirectional_dynamic_rnn( self.cells_fw, self.cells_bw, inputs_c, initial_states_fw=initial_states_fw, initial_states_bw=initial_states_bw, dtype=tf.float32, sequence_length=sequence_length, scope=scope) # Outputs has shape (batch_size, max_length, 2* layer[-1]. output_shape = [None, max_length, 2 * self.layers[-1]] if use_shape: output_shape[0] = batch_size self.assertAllEqual(outputs.get_shape().as_list(), output_shape) input_value = np.random.randn(batch_size, input_size) return input_value, inputs, outputs, st_fw, st_bw, sequence_length
def mf_block(self): n_user, n_item = self.n_user, self.n_item HIDDEN_DIM, LAMBDA = self.config['hidden_dim'], self.config['lbda'] u = tf.placeholder(tf.int32, [None]) i = tf.placeholder(tf.int32, [None]) y = tf.placeholder(tf.float32, [None]) user_emb = tf.get_variable("user_emb", [n_user, HIDDEN_DIM], initializer=tf.random_uniform_initializer( -0.01, 0.01)) item_emb = tf.get_variable("item_emb", [n_item, HIDDEN_DIM], initializer=tf.random_uniform_initializer( -0.01, 0.01)) u_emb = tf.nn.embedding_lookup(user_emb, u) i_emb = tf.nn.embedding_lookup(item_emb, i) s = tf.reduce_sum(tf.multiply(u_emb, i_emb), 1, keepdims=False) score = tf.tensordot(u_emb, item_emb, axes=[[1], [1]]) l2_norm = tf.add_n([ tf.reduce_mean(tf.multiply(u_emb, u_emb)), tf.reduce_mean(tf.multiply(i_emb, i_emb)) ]) user_bias = tf.get_variable("user_bias", [n_user], initializer=tf.constant_initializer(0)) item_bias = tf.get_variable("item_bias", [n_item], initializer=tf.constant_initializer(0)) i_b = tf.nn.embedding_lookup(item_bias, i) u_b = tf.nn.embedding_lookup(user_bias, u) b = tf.get_variable("global_bias", [], initializer=tf.constant_initializer(0)) s += i_b + u_b + b score += tf.reshape(item_bias, [1, n_item]) l2_norm += tf.add_n([ tf.reduce_mean(tf.multiply(u_b, u_b)), tf.reduce_mean(tf.multiply(i_b, i_b)) ]) diff = s - y loss = tf.reduce_mean(tf.multiply(diff, diff)) + LAMBDA * l2_norm return [u, i, y, s], score, [loss], diff
def tower_loss(scope, inputs, labels): embedding_list = [] for i in range(feature_size): embedding = _variable_on_cpu( 'side_info_{0}_embeddings'.format(i), [max(side_info[:, i]) + 1, embedding_size], tf.random_uniform_initializer(-1.0, 1.0)) side_info_index = tf.nn.embedding_lookup(side_info[:, i], inputs) side_info_embed = tf.nn.embedding_lookup( embedding, tf.cast(side_info_index[:], dtype=tf.int32)) embedding_list.append(side_info_embed) alpha_embedding = _variable_on_cpu( 'alpha_embeddings', [item_size, feature_size], tf.random_uniform_initializer(-1.0, 1.0)) stacked_embed = tf.stack(embedding_list, axis=-1) alpha_index = tf.nn.embedding_lookup(side_info[:, 0], inputs) alpha_embed = tf.nn.embedding_lookup(alpha_embedding, alpha_index) alpha_embed_expand = tf.expand_dims(alpha_embed, 1) alpha_i_sum = tf.reduce_sum(tf.exp(alpha_embed_expand), axis=-1) merge_embedding = tf.reduce_sum(stacked_embed * tf.exp(alpha_embed_expand), axis=-1) / alpha_i_sum ''' cold start item stacked_embed = tf.stack(embedding_list[1:], axis=-1) alpha_index = tf.nn.embedding_lookup(side_info[:, 1], inputs) alpha_embed = tf.nn.embedding_lookup(alpha_embedding, alpha_index[:]) alpha_embed_expand = tf.expand_dims(alpha_embed, 1) alpha_i_sum = tf.reduce_sum(tf.exp(alpha_embed_expand), axis=-1) merge_embedding = tf.reduce_sum(stacked_embed * tf.exp(alpha_embed_expand), axis=-1) / alpha_i_sum cold_start_embedding = tf.reduce_sum(stacked_embed * tf.exp(alpha_embed_expand), axis=-1) / alpha_i_sum ''' weights = _variable_on_cpu( 'w', [item_size, embedding_size], tf.truncated_normal_initializer(stddev=1.0 / math.sqrt(embedding_size))) biases = _variable_on_cpu('b', [item_size], tf.zeros_initializer()) loss = tf.reduce_mean( tf.nn.nce_loss(weights=weights, biases=biases, labels=labels, inputs=merge_embedding, num_sampled=n_sampled, num_classes=item_size)) return loss, merge_embedding
def __init__(self, X, batch_size, input_size, output_size, model='lstm', rnn_size=128, num_layers=2): self._model = model self._num_unit = rnn_size # LSTM的单元个数 self._num_layers = num_layers # LSTM的层数 self._input_size = input_size # 最后全连接层输入维数 self._output_size = output_size # 最后全连接层输出维数 self._model_layers = self._get_layer() # 获得模型的LSTM隐含层 self._initial_state = self._model_layers.zero_state( batch_size, tf.float32) # 定义初始状态 with tf.compat.v1.variable_scope('rnnlm'): n = (self._num_unit + self._output_size) * 0.5 scale = tf.sqrt(3 / n) # 全连接层的参数定义 softmax_w = tf.get_variable( "softmax_w", [self._num_unit, self._output_size], initializer=tf.random_uniform_initializer(-scale, scale)) softmax_b = tf.get_variable( "softmax_b", [self._output_size], initializer=tf.random_uniform_initializer(-scale, scale)) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [self._input_size, self._num_unit]) inputs = tf.nn.embedding_lookup(embedding, X) # 运行隐含层LSTM outputs, last_state = tf.nn.dynamic_rnn( self._model_layers, inputs, initial_state=self._initial_state, scope="rnnlm") self._outputs = tf.reshape(outputs, [-1, self._num_unit]) self._last_state = last_state # 得到全连接层结果 self._logists = tf.matmul(self._outputs, softmax_w) + softmax_b # 得到预测结果 self._probs = tf.nn.softmax(self._logists)
def testLSTMBasicToBlockCell(self): with self.session(use_gpu=True) as sess: x = tf.zeros([1, 2]) x_values = np.random.randn(1, 2) m0_val = 0.1 * np.ones([1, 2]) m1_val = -0.1 * np.ones([1, 2]) m2_val = -0.2 * np.ones([1, 2]) m3_val = 0.2 * np.ones([1, 2]) initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=19890212) with tf.variable_scope("basic", initializer=initializer): m0 = tf.zeros([1, 2]) m1 = tf.zeros([1, 2]) m2 = tf.zeros([1, 2]) m3 = tf.zeros([1, 2]) g, ((out_m0, out_m1), (out_m2, out_m3)) = rnn_cell.MultiRNNCell( [ rnn_cell.BasicLSTMCell(2, state_is_tuple=True) for _ in range(2) ], state_is_tuple=True)(x, ((m0, m1), (m2, m3))) sess.run([tf.global_variables_initializer()]) basic_res = sess.run( [g, out_m0, out_m1, out_m2, out_m3], { x.name: x_values, m0.name: m0_val, m1.name: m1_val, m2.name: m2_val, m3.name: m3_val }) with tf.variable_scope("block", initializer=initializer): m0 = tf.zeros([1, 2]) m1 = tf.zeros([1, 2]) m2 = tf.zeros([1, 2]) m3 = tf.zeros([1, 2]) g, ((out_m0, out_m1), (out_m2, out_m3)) = rnn_cell.MultiRNNCell( [contrib_rnn.LSTMBlockCell(2) for _ in range(2)], state_is_tuple=True)(x, ((m0, m1), (m2, m3))) sess.run([tf.global_variables_initializer()]) block_res = sess.run( [g, out_m0, out_m1, out_m2, out_m3], { x.name: x_values, m0.name: m0_val, m1.name: m1_val, m2.name: m2_val, m3.name: m3_val }) self.assertEqual(len(basic_res), len(block_res)) for basic, block in zip(basic_res, block_res): self.assertAllClose(basic, block)
def xavier_init(n_inputs, n_outputs, uniform=True): if uniform: # 6 was used in the paper. init_range = tf.sqrt(6.0 / (n_inputs + n_outputs)) return tf.random_uniform_initializer(-init_range, init_range) else: # 3 gives us approximately the same limits as above since this repicks # values greater than 2 standard deviations from the mean. stddev = tf.sqrt(3.0 / (n_inputs + n_outputs)) return tf.truncated_normal_initializer(stddev=stddev)