def main(): tf.set_random_seed(10) with tf.Session() as sess: rnn_cell = tf.nn.rnn_cell.LSTMCell(10) # defining initial state initial_state = rnn_cell.zero_state(4, dtype=tf.float32) inputs = tf.Variable(tf.random_uniform(shape = (4, 30, 100)), name='input') inputs = tf.identity(inputs, "input_node") # 'state' is a tensor of shape [batch_size, cell_state_size] outputs, state = tf.nn.dynamic_rnn(rnn_cell, inputs, initial_state=initial_state, dtype=tf.float32) y1 = tf.identity(outputs, 'outputs') y2 = tf.identity(state, 'state') t1 = tf.ones([4, 30, 10]) t2 = tf.ones([4, 10]) loss = tf.reduce_sum((y1 - t1) * (y1 - t1)) + tf.reduce_sum((y2 - t2) * (y2 - t2)) tf.identity(loss, name = "lstm_loss") # tf.summary.FileWriter('/tmp/log', tf.get_default_graph()) net_outputs = map(lambda x: tf.get_default_graph().get_tensor_by_name(x), argv[2].split(',')) run_model(net_outputs, argv[1], None, argv[3] == 'True')
def addLayer(self, n, activation_function = 'tanh', include_bias = False, sd = 0.35, dropout = 0, normalization = None, weights = None): """ :Description: Adds a layer to the network, including a weight tensor and an activation tensor. :Input parameters: activation_function: type of activation function to be applied to each unit (string) include_bias: if true, a column of ones will be added to the weights (boolean) sd: standard deviation of the zero-mean gaussian from which the weights will be drawn (float) dropout: the chance with which each weight will be set to zero for a given training step (float) normalization: the type of normalization imposed on the layer activations. Can be a) 'softmax' for softmax normalization b) 'Shift' for de-meaning c) 'ShiftScale' for de-meaning and standard deviation normalization weights: if provided, will be used as weights of layer instead of drawing from gaussian (tensor) """ """ initialize weights and use them to calculate layer activations """ if weights: # if weights are provided, use those weights = tf.mul(tf.ones(weights.shape),weights) activations = tf.matmul(self.data, weights) if not self.weights else tf.matmul(self.Activations[-1], weights) elif not self.Weights: # else if first layer weights = tf.Variable(tf.random_normal([self.data.get_shape()[1].value, n], stddev = sd)) weights = tf.concat(1,[weights,tf.ones([weights.get_shape()[0],1])]) if include_bias else weights activations = tf.matmul(self.data, weights) else: # for every other layer weights = tf.Variable(tf.random_normal([self.Activations[-1].get_shape()[-1].value, n], stddev = sd)) weights = tf.concat(1,[weights,tf.ones([weights.get_shape()[0],1])]) if include_bias else weights activations = tf.matmul(self.Activations[-1], weights) self.Weights.append(weights) self.Activations.append(self.applyActivation(activations, activation_function)) # apply activation function on raw activations """ add dropout and/or normalization """ if dropout: self.Activations.append(tf.nn.dropout(self.Activations[-1], dropout)) if normalization == 'softmax': # for softmax normalization self.Activations.append(tf.nn.softmax(self.Activations[-1])) elif normalization == 'Shift': # for de-meaning self.Activations[-1] = tf.subtract(self.Activations[-1],tf.reduce_mean(self.Activations[-1])) elif normalization == 'ShiftScale': # for de-meaning & and rescaling by variance mu = tf.reduce_mean(self.Activations[-1]) diff = tf.subtract(self.Activations[-1],mu) self.Activations[-1] = tf.div(diff,tf.reduce_sum(tf.mul(diff,diff)))
def testSampleFromDiscretizedMixLogistic(self): batch = 2 height = 4 width = 4 num_mixtures = 5 seed = 42 logits = tf.concat( # assign all probability mass to first component [tf.ones([batch, height, width, 1]) * 1e8, tf.zeros([batch, height, width, num_mixtures - 1])], axis=-1) locs = tf.random_uniform([batch, height, width, num_mixtures * 3], minval=-.9, maxval=.9) log_scales = tf.ones([batch, height, width, num_mixtures * 3]) * -1e8 coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3])) pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1) locs_0 = locs[..., :3] expected_sample = tf.clip_by_value(locs_0, -1., 1.) actual_sample = common_layers.sample_from_discretized_mix_logistic( pred, seed=seed) actual_sample_val, expected_sample_val = self.evaluate( [actual_sample, expected_sample]) # Use a low tolerance: samples numerically differ, as the actual # implementation clips log-scales so they always contribute to sampling. self.assertAllClose(actual_sample_val, expected_sample_val, atol=1e-2)
def bn_layer(inputs,is_training,name='BatchNorm',moving_decay=0.9,eps=1e-5): shape = inputs.shape assert len(shape) in [2,4] param_shape = shape[-1] gamma = tf.Variable(tf.ones(param_shape), name='gamma') beta = tf.Variable(tf.zeros(param_shape), name='beta') mean = tf.Variable(tf.ones(param_shape), trainable=False, name='mean') var = tf.Variable(tf.ones(param_shape), trainable=False, name='var') tf.add_to_collection('l2_losses', tf.contrib.layers.l2_regularizer(lambda1)(gamma)) tf.add_to_collection('l2_losses', tf.contrib.layers.l2_regularizer(lambda1)(beta)) tf.add_to_collection('l2_losses', tf.contrib.layers.l2_regularizer(lambda1)(mean)) tf.add_to_collection('l2_losses', tf.contrib.layers.l2_regularizer(lambda1)(var)) if is_training == True: batch_mean, batch_var = tf.nn.moments(inputs,[0,1,2],name='moments') mean = tf.assign(mean, batch_mean) var = tf.assign(var, batch_var) return tf.nn.batch_normalization(inputs,batch_mean+mean*1e-10,batch_var+var*1e-10,gamma,beta,eps) else: return tf.nn.batch_normalization(inputs,mean,var,gamma,beta,eps)
def get_idx_map(shape): """Get index map for a image. Args: shape: [B, T, H, W] or [B, H, W] Returns: idx: [B, T, H, W, 2], or [B, H, W, 2] """ s = shape ndims = tf.shape(s) wdim = ndims - 1 hdim = ndims - 2 idx_shape = tf.concat(0, [s, tf.constant([1])]) ones_h = tf.ones(hdim - 1, dtype='int32') ones_w = tf.ones(wdim - 1, dtype='int32') h_shape = tf.concat(0, [ones_h, tf.constant([-1]), tf.constant([1, 1])]) w_shape = tf.concat(0, [ones_w, tf.constant([-1]), tf.constant([1])]) idx_y = tf.zeros(idx_shape, dtype='float') idx_x = tf.zeros(idx_shape, dtype='float') h = tf.slice(s, ndims - 2, [1]) w = tf.slice(s, ndims - 1, [1]) idx_y += tf.reshape(tf.to_float(tf.range(h[0])), h_shape) idx_x += tf.reshape(tf.to_float(tf.range(w[0])), w_shape) idx = tf.concat(ndims[0], [idx_y, idx_x]) return idx
def ternarize(x, thresh=0.05): """ Implemented Trained Ternary Quantization: https://arxiv.org/abs/1612.01064 Code modified from the authors' at: https://github.com/czhu95/ternarynet/blob/master/examples/Ternary-Net/ternary.py """ shape = x.get_shape() thre_x = tf.stop_gradient(tf.reduce_max(tf.abs(x)) * thresh) w_p = tf.get_variable('Wp', initializer=1.0, dtype=tf.float32) w_n = tf.get_variable('Wn', initializer=1.0, dtype=tf.float32) tf.summary.scalar(w_p.op.name + '-summary', w_p) tf.summary.scalar(w_n.op.name + '-summary', w_n) mask = tf.ones(shape) mask_p = tf.where(x > thre_x, tf.ones(shape) * w_p, mask) mask_np = tf.where(x < -thre_x, tf.ones(shape) * w_n, mask_p) mask_z = tf.where((x < thre_x) & (x > - thre_x), tf.zeros(shape), mask) @tf.custom_gradient def _sign_mask(x): return tf.sign(x) * mask_z, lambda dy: dy w = _sign_mask(x) w = w * mask_np tf.summary.histogram(w.name, w) return w
def _make_evaluation_dict(self, resized_groundtruth_masks=False): input_data_fields = fields.InputDataFields detection_fields = fields.DetectionResultFields image = tf.zeros(shape=[1, 20, 20, 3], dtype=tf.uint8) key = tf.constant('image1') detection_boxes = tf.constant([[[0., 0., 1., 1.]]]) detection_scores = tf.constant([[0.8]]) detection_classes = tf.constant([[0]]) detection_masks = tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32) num_detections = tf.constant([1]) groundtruth_boxes = tf.constant([[0., 0., 1., 1.]]) groundtruth_classes = tf.constant([1]) groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8) if resized_groundtruth_masks: groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8) detections = { detection_fields.detection_boxes: detection_boxes, detection_fields.detection_scores: detection_scores, detection_fields.detection_classes: detection_classes, detection_fields.detection_masks: detection_masks, detection_fields.num_detections: num_detections } groundtruth = { input_data_fields.groundtruth_boxes: groundtruth_boxes, input_data_fields.groundtruth_classes: groundtruth_classes, input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks } return eval_util.result_dict_for_single_example(image, key, detections, groundtruth)
def loss(self, logits, labels, regularization): """Adds to the inference model the layers required to generate loss.""" with tf.name_scope('loss'): with tf.name_scope('var_loss'): labels = tf.cast(labels, tf.float32) shape = labels.get_shape() same_class = tf.boolean_mask(logits, tf.equal(labels, tf.ones(shape))) diff_class = tf.boolean_mask(logits, tf.not_equal(labels, tf.ones(shape))) same_mean, same_var = tf.nn.moments(same_class, [0]) diff_mean, diff_var = tf.nn.moments(diff_class, [0]) var_loss = same_var + diff_var with tf.name_scope('mean_loss'): mean_loss = self.lamda * tf.where(tf.greater(self.mu - (same_mean - diff_mean), 0), self.mu - (same_mean - diff_mean), 0) with tf.name_scope('regularization'): regularization *= tf.add_n(self.regularizers) loss = var_loss + mean_loss + regularization # Summaries for TensorBoard. tf.summary.scalar('loss/total', loss) with tf.name_scope('averages'): averages = tf.train.ExponentialMovingAverage(0.9) op_averages = averages.apply([var_loss, mean_loss, regularization, loss]) tf.summary.scalar('loss/avg/var_loss', averages.average(var_loss)) tf.summary.scalar('loss/avg/mean_loss', averages.average(mean_loss)) tf.summary.scalar('loss/avg/regularization', averages.average(regularization)) tf.summary.scalar('loss/avg/total', averages.average(loss)) with tf.control_dependencies([op_averages]): loss_average = tf.identity(averages.average(loss), name='control') return loss, loss_average
def test_sample_mvn(session_tf, cov_structure, num_samples): """ Draws 10,000 samples from a distribution with known mean and covariance. The test checks if the mean and covariance of the samples is close to the true mean and covariance. """ N, D = 10000, 2 means = tf.ones((N, D), dtype=float_type) if cov_structure == "full": covs = tf.eye(D, batch_shape=[N], dtype=float_type) elif cov_structure == "diag": covs = tf.ones((N, D), dtype=float_type) samples = _sample_mvn(means, covs, cov_structure, num_samples=num_samples) value = session_tf.run(samples) if num_samples is None: assert value.shape == (N, D) else: assert value.shape == (num_samples, N, D) value = value.reshape(-1, D) samples_mean = np.mean(value, axis=0) samples_cov = np.cov(value, rowvar=False) np.testing.assert_array_almost_equal(samples_mean, [1., 1.], decimal=1) np.testing.assert_array_almost_equal(samples_cov, [[1., 0.], [0., 1.]], decimal=1)
def initialize_graph(self): self.infrate = tf.Variable(self.infrate, trainable=False) self.learnrate = tf.Variable(self.learnrate, trainable=False) self.phi = tf.Variable(tf.random_normal([self.nunits,self.stims.datasize])) self.acts = tf.Variable(tf.zeros([self.nunits,self.batch_size])) self.X = tf.placeholder(tf.float32, shape=[self.batch_size, self.stims.datasize]) self.Xhat = tf.matmul(tf.transpose(self.acts), self.phi) self.resid = self.X - self.Xhat self.mse = tf.reduce_sum(tf.square(self.resid))/self.batch_size/self.stims.datasize self.meanL1 = tf.reduce_sum(tf.abs(self.acts))/self.batch_size self.loss = 0.5*self.mse + self.lam*self.meanL1/self.stims.datasize inferer = tf.train.GradientDescentOptimizer(self.infrate) inf_step = tf.Variable(0, name='inf_step', trainable=False) self.inf_op = inferer.minimize(self.loss, global_step=inf_step, var_list=[self.acts]) learner = tf.train.GradientDescentOptimizer(self.learnrate) learn_step = tf.Variable(0,name='learn_step', trainable=False) self.learn_op = learner.minimize(self.loss, global_step=learn_step, var_list=[self.phi]) self.ma_variances = tf.Variable(tf.ones(self.nunits), trainable=False) self.gains = tf.Variable(tf.ones(self.nunits), trainable=False) _, self.variances = tf.nn.moments(self.acts, axes=[1]) self.update_variance = self.ma_variances.assign((1.-self.var_avg_rate)*self.ma_variances + self.var_avg_rate*self.variances) self.update_gains = self.gains.assign(self.gains*tf.pow(self.var_goal/self.ma_variances, self.gain_rate)) self.renorm_phi = self.phi.assign((tf.expand_dims(self.gains,dim=1)*tf.nn.l2_normalize(self.phi, dim=1))) self.sess = tf.Session() self.sess.run(tf.initialize_all_variables()) self.sess.run(self.phi.assign(tf.nn.l2_normalize(self.phi, dim=1)))
def _test_logpdf_scalar(scalar): x = tf.constant(scalar) val_true = stats.norm.logpdf(scalar) _assert_eq(norm.logpdf(x), val_true) _assert_eq(norm.logpdf(x, tf.zeros([1]), tf.constant(1.0)), val_true) _assert_eq(norm.logpdf(x, tf.zeros([1]), tf.ones([1])), val_true) _assert_eq(norm.logpdf(x, tf.zeros([1]), tf.diag(tf.ones([1]))), val_true)
def loss_layer(self, project_logits, lengths, name=None): with tf.variable_scope("crf_loss" if not name else name): small = -1000.0 start_logits = tf.concat( [small * tf.ones(shape=[self.batch_size, 1, self.num_tags]), tf.zeros(shape=[self.batch_size, 1, 1])], axis=-1) pad_logits = tf.cast(small * tf.ones([self.batch_size, self.num_steps, 1]), tf.float32) logits = tf.concat([project_logits, pad_logits], axis=-1) logits = tf.concat([start_logits, logits], axis=1) targets = tf.concat( [tf.cast(self.num_tags * tf.ones([self.batch_size, 1]), tf.int32), self.targets], axis=-1) self.trans = tf.get_variable( "transitions", shape=[self.num_tags + 1, self.num_tags + 1], initializer=self.initializer) log_likelihood, self.trans = crf_log_likelihood( inputs=logits, tag_indices=targets, transition_params=self.trans, sequence_lengths=lengths + 1) return tf.reduce_mean(-log_likelihood)
def testDtype(self): with self.test_session(): d = tf.fill([2, 3], 12., name="fill") self.assertEqual(d.get_shape(), [2, 3]) # Test default type for both constant size and dynamic size z = tf.ones([2, 3]) self.assertEqual(z.dtype, tf.float32) self.assertEqual([2, 3], z.get_shape()) self.assertAllEqual(z.eval(), np.ones([2, 3])) z = tf.ones(tf.shape(d)) self.assertEqual(z.dtype, tf.float32) self.assertEqual([2, 3], z.get_shape()) self.assertAllEqual(z.eval(), np.ones([2, 3])) # Test explicit type control for dtype in (tf.float32, tf.float64, tf.int32, tf.uint8, tf.int16, tf.int8, tf.complex64, tf.complex128, tf.int64, tf.bool): z = tf.ones([2, 3], dtype=dtype) self.assertEqual(z.dtype, dtype) self.assertEqual([2, 3], z.get_shape()) self.assertAllEqual(z.eval(), np.ones([2, 3])) z = tf.ones(tf.shape(d), dtype=dtype) self.assertEqual(z.dtype, dtype) self.assertEqual([2, 3], z.get_shape()) self.assertAllEqual(z.eval(), np.ones([2, 3]))
def _build_iid_normal_model(self, num_timesteps, latent_size, observation_size, transition_variance, observation_variance): """Build a model whose outputs are IID normal by construction.""" transition_variance = self._build_placeholder(transition_variance) observation_variance = self._build_placeholder(observation_variance) # Use orthogonal matrices to project a (potentially # high-dimensional) latent space of IID normal variables into a # low-dimensional observation that is still IID normal. random_orthogonal_matrix = lambda: np.linalg.qr( np.random.randn(latent_size, latent_size))[0][:observation_size, :] observation_matrix = self._build_placeholder(random_orthogonal_matrix()) model = tfd.LinearGaussianStateSpaceModel( num_timesteps=num_timesteps, transition_matrix=self._build_placeholder( np.zeros([latent_size, latent_size])), transition_noise=tfd.MultivariateNormalDiag( scale_diag=tf.sqrt(transition_variance) * tf.ones([latent_size], dtype=self.dtype)), observation_matrix=observation_matrix, observation_noise=tfd.MultivariateNormalDiag( scale_diag=tf.sqrt(observation_variance) * tf.ones([observation_size], dtype=self.dtype)), initial_state_prior=tfd.MultivariateNormalDiag( scale_diag=tf.sqrt(transition_variance) * tf.ones([latent_size], dtype=self.dtype)), validate_args=True) return model
def __init__(self, num_layers, num_units, batch_size, input_size, keep_prob=1.0): self.num_layers = num_layers self.grus = [] self.inits = [] self.dropout_mask = [] for layer in range(num_layers): input_size_ = input_size if layer == 0 else 2 * num_units gru_fw = tf.nn.rnn_cell.MultiRNNCell([ tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell(num_units=num_units)]) gru_bw = tf.nn.rnn_cell.MultiRNNCell([ tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell(num_units=num_units)]) init_fw = tf.Variable(tf.zeros([num_units])) init_fw = tf.expand_dims(tf.tile(tf.expand_dims(init_fw, axis=0), [batch_size, 1]), axis=0) init_bw = tf.Variable(tf.zeros([num_units])) init_bw = tf.expand_dims(tf.tile(tf.expand_dims(init_bw, axis=0), [batch_size, 1]), axis=0) mask_fw = tf.nn.dropout(tf.ones([1, batch_size, input_size_], dtype=tf.float32), keep_prob=keep_prob) mask_bw = tf.nn.dropout(tf.ones([1, batch_size, input_size_], dtype=tf.float32), keep_prob=keep_prob) self.grus.append((gru_fw, gru_bw,)) self.inits.append((init_fw, init_bw,)) self.dropout_mask.append((mask_fw, mask_bw,))
def log_likelihood(batch): #batch is NxD matrix, where N is length of batch, D is dimension of samples #P(D|w) = prod( sum( pi*N(samp|k)) #exp(-square(mean-samp)) #multiplying by ones replicates the matrix, becomes (N,D,K) tmp1 = tf.batch_matmul(tf.reshape(batch, [N,D,1]), tf.ones([N,1,K])) #same but with the means matrix tmp2 = tf.batch_matmul(means, tf.ones([K,1,N])) tmp2 = tf.transpose(tmp2, [2,1,0]) # (x - mu) tmp3 = tmp1 - tmp2 tmp4 = tmp1 - tmp2 # (x - mu).T(x - mu) tmp3 = tf.batch_matmul(tf.transpose(tmp3, [0,2,1]), tmp3) tmp3 = tf.reduce_sum(tmp3,2) # -(x - mu).T(x - mu) tmp3 = -tmp3 # exp(-(x - mu).T(x - mu)) tmp3 = tf.exp(tmp3) #multiply by mixture weights tmp3 = tf.matmul(tmp3, mixture_weights) #log tmp3 = tf.log(tmp3) #sum over all samples of the batch tmp3 = tf.reduce_sum(tmp3,0) return tmp3
def build_rmsprop_optimizer(self, learning_rate, rmsprop_decay, rmsprop_constant, gradient_clip, version): with tf.name_scope('rmsprop'): optimizer = tf.train.GradientDescentOptimizer(learning_rate) grads_and_vars = optimizer.compute_gradients(self.loss) grads = [gv[0] for gv in grads_and_vars] params = [gv[1] for gv in grads_and_vars] if gradient_clip > 0: grads = tf.clip_by_global_norm(grads, gradient_clip) if version == 'rmsprop': return optimizer.apply_gradients(zip(grads, params)) elif version == 'graves_rmsprop': square_grads = [tf.square(grad) for grad in grads] avg_grads = [tf.Variable(tf.ones(var.get_shape())) for var in params] avg_square_grads = [tf.Variable(tf.ones(var.get_shape())) for var in params] update_avg_grads = [grad_pair[0].assign((rmsprop_decay * grad_pair[0]) + ((1 - rmsprop_decay) * grad_pair[1])) for grad_pair in zip(avg_grads, grads)] update_avg_square_grads = [grad_pair[0].assign((rmsprop_decay * grad_pair[0]) + ((1 - rmsprop_decay) * tf.square(grad_pair[1]))) for grad_pair in zip(avg_square_grads, grads)] avg_grad_updates = update_avg_grads + update_avg_square_grads rms = [tf.sqrt(avg_grad_pair[1] - tf.square(avg_grad_pair[0]) + rmsprop_constant) for avg_grad_pair in zip(avg_grads, avg_square_grads)] rms_updates = [grad_rms_pair[0] / grad_rms_pair[1] for grad_rms_pair in zip(grads, rms)] train = optimizer.apply_gradients(zip(rms_updates, params)) return tf.group(train, tf.group(*avg_grad_updates))
def init_var_map(init_vars, init_path=None): if init_path is not None: load_var_map = pkl.load(open(init_path, 'rb')) print('load variable map from', init_path, load_var_map.keys()) var_map = {} for var_name, var_shape, init_method, dtype in init_vars: if init_method == 'zero': var_map[var_name] = tf.Variable(tf.zeros(var_shape, dtype=dtype), name=var_name, dtype=dtype) elif init_method == 'one': var_map[var_name] = tf.Variable(tf.ones(var_shape, dtype=dtype), name=var_name, dtype=dtype) elif init_method == 'normal': var_map[var_name] = tf.Variable(tf.random_normal(var_shape, mean=0.0, stddev=STDDEV, dtype=dtype), name=var_name, dtype=dtype) elif init_method == 'tnormal': var_map[var_name] = tf.Variable(tf.truncated_normal(var_shape, mean=0.0, stddev=STDDEV, dtype=dtype), name=var_name, dtype=dtype) elif init_method == 'uniform': var_map[var_name] = tf.Variable(tf.random_uniform(var_shape, minval=MINVAL, maxval=MAXVAL, dtype=dtype), name=var_name, dtype=dtype) elif init_method == 'xavier': maxval = np.sqrt(6. / np.sum(var_shape)) minval = -maxval var_map[var_name] = tf.Variable(tf.random_uniform(var_shape, minval=minval, maxval=maxval, dtype=dtype), name=var_name, dtype=dtype) elif isinstance(init_method, int) or isinstance(init_method, float): var_map[var_name] = tf.Variable(tf.ones(var_shape, dtype=dtype) * init_method, name=var_name, dtype=dtype) elif init_method in load_var_map: if load_var_map[init_method].shape == tuple(var_shape): var_map[var_name] = tf.Variable(load_var_map[init_method], name=var_name, dtype=dtype) else: print('BadParam: init method', init_method, 'shape', var_shape, load_var_map[init_method].shape) else: print('BadParam: init method', init_method) return var_map
def test_factored_joint_mvn_diag_full(self): batch_shape = [3, 2] mvn1 = tfd.MultivariateNormalDiag( loc=tf.zeros(batch_shape + [3]), scale_diag=tf.ones(batch_shape + [3])) mvn2 = tfd.MultivariateNormalFullCovariance( loc=tf.ones(batch_shape + [2]), covariance_matrix=(tf.ones(batch_shape + [2, 2]) * [[5., -2], [-2, 3.1]])) joint = sts_util.factored_joint_mvn([mvn1, mvn2]) self.assertEqual(self.evaluate(joint.event_shape_tensor()), self.evaluate(mvn1.event_shape_tensor() + mvn2.event_shape_tensor())) joint_mean_ = self.evaluate(joint.mean()) self.assertAllEqual(joint_mean_[..., :3], self.evaluate(mvn1.mean())) self.assertAllEqual(joint_mean_[..., 3:], self.evaluate(mvn2.mean())) joint_cov_ = self.evaluate(joint.covariance()) self.assertAllEqual(joint_cov_[..., :3, :3], self.evaluate(mvn1.covariance())) self.assertAllEqual(joint_cov_[..., 3:, 3:], self.evaluate(mvn2.covariance()))
def get_infogan_noise(batch_size, categorical_dim, structured_continuous_dim, total_continuous_noise_dims): """Get unstructured and structured noise for InfoGAN. Args: batch_size: The number of noise vectors to generate. categorical_dim: The number of categories in the categorical noise. structured_continuous_dim: The number of dimensions of the uniform continuous noise. total_continuous_noise_dims: The number of continuous noise dimensions. This number includes the structured and unstructured noise. Returns: A 2-tuple of structured and unstructured noise. First element is the unstructured noise, and the second is a 2-tuple of (categorical structured noise, continuous structured noise). """ # Get unstructurd noise. unstructured_noise = tf.random_normal( [batch_size, total_continuous_noise_dims - structured_continuous_dim]) # Get categorical noise Tensor. categorical_dist = ds.Categorical(logits=tf.zeros([categorical_dim])) categorical_noise = categorical_dist.sample([batch_size]) # Get continuous noise Tensor. continuous_dist = ds.Uniform(-tf.ones([structured_continuous_dim]), tf.ones([structured_continuous_dim])) continuous_noise = continuous_dist.sample([batch_size]) return [unstructured_noise], [categorical_noise, continuous_noise]
def testRejectionDataListInput(self): batch_size = 20 val_input_batch = [tf.zeros([2, 3, 4]), tf.ones([2, 4]), tf.ones(2) * 3] lbl_input_batch = tf.ones([], dtype=tf.int32) probs = np.array([0, 1, 0, 0, 0]) val_list, lbls = tf.contrib.training.stratified_sample( val_input_batch, lbl_input_batch, probs, batch_size, init_probs=[0, 1, 0, 0, 0] ) # Check output shapes. self.assertTrue(isinstance(val_list, list)) self.assertEqual(len(val_list), len(val_input_batch)) self.assertTrue(isinstance(lbls, tf.Tensor)) with self.test_session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) out = sess.run(val_list + [lbls]) coord.request_stop() coord.join(threads) # Check output shapes. self.assertEqual(len(out), len(val_input_batch) + 1)
def p_zt(self, prev_state, t): """Computes the model p(z_t| z_{t-1}).""" batch_size = tf.shape(prev_state)[0] if t > 0: z_mu_p = prev_state + self.bs[t - 1] p_zt = tf.contrib.distributions.Normal( loc=z_mu_p, scale=tf.sqrt(tf.ones_like(z_mu_p) * self.variance)) return p_zt else: # p(z_0) is mixture of two Normals mu_pos = tf.ones([batch_size, self.state_size], dtype=self.dtype) * self.prior_mode_mean mu_neg = tf.ones([batch_size, self.state_size], dtype=self.dtype) * -self.prior_mode_mean z0_pos = tf.contrib.distributions.Normal( loc=mu_pos, scale=tf.sqrt(tf.ones_like(mu_pos) * self.variance)) z0_neg = tf.contrib.distributions.Normal( loc=mu_neg, scale=tf.sqrt(tf.ones_like(mu_neg) * self.variance)) mode_probs = tf.convert_to_tensor([self.mixing_coeff, 1-self.mixing_coeff], dtype=tf.float64) mode_probs = tf.tile(mode_probs[tf.newaxis, tf.newaxis, :], [batch_size, 1, 1]) mode_selection_dist = tf.contrib.distributions.Categorical(probs=mode_probs) z0_dist = tf.contrib.distributions.Mixture( cat=mode_selection_dist, components=[z0_pos, z0_neg], validate_args=False) return z0_dist
def test_horovod_broadcast(self): """Test that the broadcast correctly broadcasts 1D, 2D, 3D tensors.""" hvd.init() rank = hvd.rank() size = hvd.size() # This test does not apply if there is only one worker. if size == 1: return with self.test_session() as session: dtypes = [tf.uint8, tf.int8, tf.uint16, tf.int16, tf.int32, tf.int64, tf.float32, tf.float64, tf.bool] dims = [1, 2, 3] root_ranks = list(range(size)) for dtype, dim, root_rank in itertools.product(dtypes, dims, root_ranks): try: tensor = tf.ones([17] * dim) * rank root_tensor = tf.ones([17] * dim) * root_rank if dtype == tf.bool: tensor = tensor % 2 root_tensor = root_tensor % 2 tensor = tf.cast(tensor, dtype=dtype) root_tensor = tf.cast(root_tensor, dtype=dtype) broadcasted_tensor = hvd.broadcast(tensor, root_rank) self.assertTrue( session.run(tf.reduce_all(tf.equal( tf.cast(root_tensor, tf.int32), tf.cast(broadcasted_tensor, tf.int32)))), "hvd.broadcast produces incorrect broadcasted tensor") except Exception: import traceback traceback.print_exc()
def c_body(c, pa): # Zeroing predictions below threshold with tf.variable_scope('bboxes_c_select', reuse=True): c_scores = b_scores[:, c] c_fmask = tf.cast(tf.greater(c_scores, confidence_threshold), scores.dtype) c_scores = c_scores * c_fmask c_bboxes = b_bboxes * tf.expand_dims(c_fmask, axis=-1) # Apply NMS with tf.variable_scope('bboxes_c_nms', reuse=True): c_indices = tf.image.non_max_suppression(c_bboxes, c_scores, top_k, nms_threshold) size = tf.size(c_indices) c_batch_ = tf.to_float(b) * tf.ones(shape=[top_k, 1], dtype=tf.float32) # len(indices) x 1 c_labels = tf.to_float(c) * tf.ones(shape=[top_k, 1], dtype=tf.float32) # len(indices) x 1 extra_size = top_k - size c_scores = tf.expand_dims(tf.gather(c_scores, c_indices), axis=-1) # len(indices) x 1 empty_c_scores = tf.zeros([extra_size, 1], dtype=tf.float32) c_scores = tf.concat([c_scores, empty_c_scores], axis=0) c_bboxes = tf.gather(c_bboxes, c_indices) # len(indices) x 4 empty_c_bboxes = tf.zeros([extra_size, 4], dtype=tf.float32) c_bboxes = tf.concat([c_bboxes, empty_c_bboxes], axis=0) c_predictions = tf.concat([c_batch_, c_labels, c_scores, c_bboxes], axis=1) # len(indices) x 7 return c + 1, pa.write(index=c - 1, value=c_predictions)
def _tf_loss(self, sim, sim_emb): """Define loss""" if self.use_max_sim_neg: max_sim_neg = tf.reduce_max(sim[:, 1:], -1) loss = tf.reduce_mean(tf.maximum(0., self.mu_pos - sim[:, 0]) + tf.maximum(0., self.mu_neg + max_sim_neg)) else: # create an array for mu mu = self.mu_neg * np.ones(self.num_neg + 1) mu[0] = self.mu_pos factors = tf.concat([-1 * tf.ones([1, 1]), tf.ones([1, tf.shape(sim)[1] - 1])], 1) max_margin = tf.maximum(0., mu + factors * sim) loss = tf.reduce_mean(tf.reduce_sum(max_margin, -1)) max_sim_emb = tf.maximum(0., tf.reduce_max(sim_emb, -1)) loss = (loss + # penalize max similarity between intent embeddings tf.reduce_mean(max_sim_emb) * self.C_emb + # add regularization losses tf.losses.get_regularization_loss()) return loss
def decoder_fn(time, cell_state, cell_input, cell_output, context_state): with tf.name_scope(name, "simple_decoder_fn_inference", [time, cell_state, cell_input, cell_output, context_state]): if cell_input is not None: raise ValueError("Expected cell_input to be None, but saw: %s" % cell_input) if cell_output is None: # invariant that this is time == 0 next_input_id = tf.ones([batch_size], dtype=dtype) * ( start_of_sequence_id) done = tf.zeros([batch_size], dtype=tf.bool) cell_state = encoder_state cell_output = tf.zeros([cell_size], dtype=tf.float32) else: softmax_output = output_fn(cell_output) if sample: next_input_id = tf.squeeze(tf.multinomial(softmax_output, 1), 1) else: next_input_id = tf.argmax(softmax_output, 1) next_input_id = tf.cast(next_input_id, dtype=dtype) done = tf.equal(next_input_id, end_of_sequence_id) next_input = tf.gather(embeddings, next_input_id) # if time > maxlen, return all true vector done = tf.cond( tf.greater(time, maximum_length), lambda: tf.ones([batch_size], dtype=tf.bool), lambda: done) return (done, cell_state, next_input, next_input_id, context_state)
def __call__(self, inputs, states, scope=None): with tf.variable_scope( scope or type(self).__name__, initializer=tf.random_normal_initializer(stddev=0.01)): # get the tensor if self._separate_pad: t_shape = [self._num_outputs, self._num_outputs, self._num_inputs] vec_a = inputs vec_b = states else: t_shape = [self._num_outputs+1, self._num_outputs, self._num_inputs+1] vec_a = tf.concat( axis=1, values=[inputs, tf.ones([inputs.get_shape()[0].value, 1])]) vec_b = tf.concat( axis=1, values=[inputs, tf.ones([inputs.get_shape()[0].value, 1])]) tensor = get_tt_3_tensor(t_shape, self._ranks, name='W') result = bilinear_product_tt_3(vec_a, tensor, vec_b) if self._separate_pad: # TODO possible weightnorm D = tf.get_variable('D', [self._num_inputs, self._num_outputs], initializer=tf.uniform_unit_scaling_initializer(1.2)) E = tf.get_variable('E', [self._num_outputs, self._num_outputs], initializer=tf.uniform_unit_scaling_initializer(1.2)) b = tf.get_variable('b', [self._num_outputs], initializer=tf.constant_initializer(0.0)) z = tf.nn.bias_add(tf.matmul(inputs, D) + tf.matmul(states, E), b) result = result + z result = self._nonlin(result) return result, result
def benchmarkCudnnLSTMTraining(self): test_configs = self._GetTestConfig() for config_name, config in test_configs.items(): config = test_configs[config_name] num_layers = config["num_layers"] num_units = config["num_units"] batch_size = config["batch_size"] seq_length = config["seq_length"] with tf.Graph().as_default(), tf.device("/gpu:0"): model = tf.contrib.cudnn_rnn.CudnnLSTM(num_layers, num_units, num_units) params_size_t = model.params_size() input_data = tf.Variable(tf.ones([seq_length, batch_size, num_units])) input_h = tf.Variable(tf.ones([num_layers, batch_size, num_units])) input_c = tf.Variable(tf.ones([num_layers, batch_size, num_units])) params = tf.Variable(tf.ones([params_size_t]), validate_shape=False) output, output_h, output_c = model( is_training=True, input_data=input_data, input_h=input_h, input_c=input_c, params=params) all_grads = tf.gradients([output, output_h, output_c], [params, input_data, input_h, input_c]) training_op = tf.group(*all_grads) self._BenchmarkOp(training_op, "cudnn_lstm %s %s" % (config_name, self._GetConfigDesc(config)))
def _testParamShapes(self, desired_shape): tn_param_shapes = tfd.TruncatedNormal.param_shapes(desired_shape) # Check the shapes by comparison with the untruncated Normal. n_param_shapes = tfd.Normal.param_shapes(desired_shape) self.assertAllEqual( self.evaluate(tn_param_shapes["loc"]), self.evaluate(n_param_shapes["loc"])) self.assertAllEqual( self.evaluate(tn_param_shapes["scale"]), self.evaluate(n_param_shapes["scale"])) self.assertAllEqual( self.evaluate(tn_param_shapes["low"]), self.evaluate(n_param_shapes["loc"])) self.assertAllEqual( self.evaluate(tn_param_shapes["high"]), self.evaluate(n_param_shapes["loc"])) loc = tf.zeros(tn_param_shapes["loc"]) scale = tf.ones(tn_param_shapes["scale"]) high = tf.ones(tn_param_shapes["high"]) low = tf.zeros(tn_param_shapes["low"]) sample_shape = self.evaluate( tf.shape( tfd.TruncatedNormal(loc=loc, scale=scale, low=low, high=high).sample())) self.assertAllEqual(desired_shape, sample_shape)
def getStatsEigen(self, stats=None): if len(self.stats_eigen) == 0: stats_eigen = {} if stats is None: stats = self.stats tmpEigenCache = {} with tf.device('/cpu:0'): for var in stats: for key in ['fprop_concat_stats', 'bprop_concat_stats']: for stats_var in stats[var][key]: if stats_var not in tmpEigenCache: stats_dim = stats_var.get_shape()[1].value e = tf.Variable(tf.ones( [stats_dim]), name='KFAC_FAC/' + stats_var.name.split(':')[0] + '/e', trainable=False) Q = tf.Variable(tf.diag(tf.ones( [stats_dim])), name='KFAC_FAC/' + stats_var.name.split(':')[0] + '/Q', trainable=False) stats_eigen[stats_var] = {'e': e, 'Q': Q} tmpEigenCache[ stats_var] = stats_eigen[stats_var] else: stats_eigen[stats_var] = tmpEigenCache[ stats_var] self.stats_eigen = stats_eigen return self.stats_eigen
def wrap(image: tf.Tensor) -> tf.Tensor: """Returns 'image' with an extra channel set to all 1s.""" shape = tf.shape(image) extended_channel = tf.ones([shape[0], shape[1], 1], image.dtype) extended = tf.concat([image, extended_channel], axis=2) return extended
def density(fin): # rho_new = tf.get_variable("rho", initializer=tf.zeros(nx,ny), dtype=tf.float64) rho_new = tf.reduce_sum(fin, axis=0) return rho_new def velocity(fin_cp, rho_cp): # u = tf.get_variable("u", initializer=tf.zeros((2,nx,ny),dtype=np.float64), dtype=tf.float64) # u_new = tf.einsum('ik,klm->ilm', tf.transpose(v), fin_cp) # this is giving rise to numerical error? u_x = tf.einsum('k,klm->lm', v_x, fin_cp) u_y = tf.einsum('k,klm->lm', v_y, fin_cp) u_new_1 = tf.stack([u_x, u_y], axis=0) u_new_1 /= rho_cp return u_new_1 ones_1 = tf.ones([9], dtype=tf.float64) ones_3 = tf.ones([9,nx,ny], dtype=tf.float64) tdiag = tf.linalg.diag(t) def equilibrium(rho, u_var): usqr = 3/2 * (u_var[0]**2 + u_var[1]**2) usqr_9 = tf.tensordot(ones_1, usqr, axes=0) # print(usqr_9.eval().shape) vu = 3 * (tf.tensordot(v_x, u_var[0], axes=0) + tf.tensordot(v_y, u_var[1], axes=0)) rho_9 = tf.stack([rho] * 9) eq = tf.einsum('ik,klm->ilm', tdiag, tf.math.multiply(rho_9, (ones_3 + vu + 0.5 * vu*vu - usqr_9))) return eq def create_bc_mask_1(): zeros = tf.zeros([3, nx-2, ny], dtype=tf.int32) ones = tf.ones([3, 1, ny], dtype=tf.int32)
def renormTrend(self, t): return self.muScal * t * tf.ones(shape=[self.d], dtype=tf.float32)
def forwardExpect(self, t, batchSize): return 0. * tf.ones((batchSize, self.d))
def sig(self, t, x): return tf.einsum('j,i->ij', tf.constant(self.sigScal, dtype=tf.float32), tf.ones(shape=tf.shape(x)[0], dtype=tf.float32))
def renormSigma(self, t): return self.sigScal * np.sqrt(t) * tf.ones(shape=[self.d], dtype=tf.float32)
def eval_val_2(dataset, name='Validation', include_pos_loss=True): ''' The evaluation will use the next mask from the output of mask transform network ''' d_loss.reset_states() d_accuracy.reset_states() for element in dataset: if include_pos_loss: inp, mask, tar = element mask_cum = tf.cumsum(mask, axis=-2) else: inp, tar = element tar_real = tar ###### ground truth batch_size = inp.shape[0] seq_len_p2 = inp.shape[-1] state_size = seq_len_p2 + 1 enc_inp = inp[:, tf.newaxis, :] enc_padding_mask = tf.zeros([batch_size, 1, 1, seq_len_p2]) dec_padding_mask = enc_padding_mask ########### initial mask for encoder mask_list = [] out_list = [] for i in range(state_size - 1): tar_inp = tf.ones([batch_size, 1, 1], tf.int64) * start_token_dec combined_mask = None ######### find the next smallest number ############## predictions, _, predicted_mask = transformer_2( enc_inp, tar_inp, False, enc_padding_mask, combined_mask, dec_padding_mask) predictions = predictions[:, :, -1:, :] out_list.append(tf.squeeze(predictions, axis=-2)) last_mask = predicted_mask[:, -1, :] last_mask = tf.one_hot( tf.argmax(last_mask, axis=-1), seq_len_p2 )[:, :, tf. newaxis] ########## pointer output from the modified transformer init_mask = tf.squeeze(enc_padding_mask, [1, 2]) init_mask = init_mask[:, :, tf.newaxis] x = tf.concat( [init_mask, last_mask], axis=-1 ) ######### concatenate initial mask with the pointer predict_msk = msk_transform_2( 2 * x - 1, False ) ####### rescale to [-1,1] and send to mask transform network mask_list.append(predict_msk[:, tf.newaxis, :]) predict_msk = tf.cast(tf.greater(predict_msk, 0), tf.float32) enc_padding_mask = predict_msk[:, tf.newaxis, tf.newaxis, :] dec_padding_mask = enc_padding_mask if include_pos_loss: mask_est = tf.concat(mask_list, -2) loss_position = loss_function(mask_cum[:, 1:, :], mask_est) else: loss_position = 0 out_est = tf.concat(out_list, -2) loss_content = loss_function( binary_encoding(tar_real, binary_size + 2), out_est) loss = loss_position + loss_content d_loss(loss) out_binary = tf.cast(tf.greater(out_est, 0), tf.int64) out_binary = back2int(out_binary) d_accuracy(tar_real, out_binary) print('{}_Loss {:.4f} {}_Accuracy {:.4f}'.format( name, d_loss.result(), name, d_accuracy.result())) return d_accuracy.result()
def convert_image_to_target(labels, bboxes, default_boxes, threshold=0.5, scaling=(0.1, 0.1, 0.2, 0.2)): """ encode one image into target :param labels: 1D Tensor(int64) labels in the image :param bboxes: 2D Tensor(float32), --shape[num_label,4], the relative coords of bboxes of each label(ymin,xmin,ymax,xmax) :param default_boxes: list of list of numpy :param threshold: threshold of positive iou :param scaling: scaling of encoding :return target_labels: list of tensor, class target of all default boxes,shape:[38,38,b],[19,19,b],...,[1,1,b] target_locs: list of tensor, locs offset target of all default boxes,shape:[38,38,b,4],[19,19,b,4],...,[1,1,b,4] """ target_labels_list = [] target_locs_list = [] # cal default box respectively for default_box in default_boxes: # cal four corners and crop bbox area outside of the image cy, cx, h, w = default_box ymin = tf.maximum(cy - h / 2.0, 0.0) xmin = tf.maximum(cx - w / 2.0, 0.0) ymax = tf.minimum(cy + h / 2.0, 1.0) xmax = tf.minimum(cx + w / 2.0, 1.0) shape = (cy.shape[0], cy.shape[1], h.shape[0]) default_area = (xmax - xmin) * (ymax - ymin) # save last labels,iou,ymin,etc.initialize 0 feat_labels = tf.zeros(shape, tf.int64) feat_iou = tf.zeros(shape) feat_ymin = tf.zeros(shape) feat_xmin = tf.zeros(shape) # initialize 1,prevent log0 feat_ymax = tf.ones(shape) feat_xmax = tf.ones(shape) def iou_with_bbox(bbox): """ cal iou :param box: single bbox (ymin,xmin,ymax,xmax) :return: iou tensor, --shape[H,W,B] """ # cal inter int_ymin = tf.maximum(ymin, bbox[0]) int_xmin = tf.maximum(xmin, bbox[1]) int_ymax = tf.minimum(ymax, bbox[2]) int_xmax = tf.minimum(xmax, bbox[3]) h = tf.maximum(int_ymax - int_ymin, 0.) w = tf.maximum(int_xmax - int_xmin, 0.) inter_area = h * w # cal union union_area = default_area - inter_area + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) iou = tf.div(inter_area, union_area) return iou def condition(i, feat_labels, feat_iou, feat_ymin, feat_xmin, feat_ymax, feat_xmax): """loop condition: all targets in labels""" r = tf.less(i, tf.shape(labels)) return r[0] def body(i, feat_labels, feat_iou, feat_ymin, feat_xmin, feat_ymax, feat_xmax): """loop body: update value""" label = labels[i] box = bboxes[i] iou = iou_with_bbox(box) # iou>0.5 & > original iou mask = tf.logical_and(tf.greater(iou, threshold), tf.greater(iou, feat_iou)) imask = tf.cast(mask, tf.int64) fmask = tf.cast(mask, tf.float32) feat_labels = tf.where(mask, imask * label, feat_labels) feat_iou = tf.where(mask, iou, feat_iou) feat_ymin = tf.where(mask, fmask * box[0], feat_ymin) feat_xmin = tf.where(mask, fmask * box[1], feat_xmin) feat_ymax = tf.where(mask, fmask * box[2], feat_ymax) feat_xmax = tf.where(mask, fmask * box[3], feat_xmax) return [i + 1, feat_labels, feat_iou, feat_ymin, feat_xmin, feat_ymax, feat_xmax] # loop i = 0 [i, feat_labels, feat_iou, feat_ymin, feat_xmin, feat_ymax, feat_xmax] = tf.while_loop(condition, body, [i, feat_labels, feat_iou, feat_ymin, feat_xmin, feat_ymax, feat_xmax]) # encode locs and calculate offset cy_offset = ((feat_ymax + feat_ymin) / 2 - cy) / h / scaling[0] cx_offset = ((feat_xmax + feat_xmin) / 2 - cx) / w / scaling[1] h_offset = tf.log((feat_ymax - feat_ymin) / h) / scaling[2] w_offset = tf.log((feat_xmax - feat_xmin) / w) / scaling[3] encode_locs = tf.stack([cx_offset, cy_offset, w_offset, h_offset], axis=-1) target_labels_list.append(feat_labels) target_locs_list.append(encode_locs) return target_labels_list, target_locs_list
def _db_to_amp_tensorflow(x): return tf.pow(tf.ones(tf.shape(x)) * 10.0, x * 0.05)
def __init__(self, batch_env, step, is_training, should_log, config): """Create an instance of the PPO algorithm. Args: batch_env: In-graph batch environment. step: Integer tensor holding the current training step. is_training: Boolean tensor for whether the algorithm should train. should_log: Boolean tensor for whether summaries should be returned. config: Object containing the agent configuration as attributes. """ self._batch_env = batch_env self._step = step self._is_training = is_training self._should_log = should_log self._config = config self._observ_filter = normalize.StreamingNormalize( self._batch_env.observ[0], center=True, scale=True, clip=5, name='normalize_observ') self._reward_filter = normalize.StreamingNormalize( self._batch_env.reward[0], center=False, scale=True, clip=10, name='normalize_reward') # Memory stores tuple of observ, action, mean, logstd, reward. template = (self._batch_env.observ[0], self._batch_env.action[0], self._batch_env.action[0], self._batch_env.action[0], self._batch_env.reward[0]) self._memory = memory.EpisodeMemory(template, config.update_every, config.max_length, 'memory') self._memory_index = tf.Variable(0, False) use_gpu = self._config.use_gpu and utility.available_gpus() with tf.device('/gpu:0' if use_gpu else '/cpu:0'): # Create network variables for later calls to reuse. action_size = self._batch_env.action.shape[1].value self._network = tf.make_template( 'network', functools.partial(config.network, config, action_size)) output = self._network( tf.zeros_like(self._batch_env.observ)[:, None], tf.ones(len(self._batch_env))) with tf.variable_scope('ppo_temporary'): self._episodes = memory.EpisodeMemory(template, len(batch_env), config.max_length, 'episodes') if output.state is None: self._last_state = None else: # Ensure the batch dimension is set. tf.contrib.framework.nest.map_structure( lambda x: x.set_shape([len(batch_env)] + x.shape. as_list()[1:]), output.state) # pylint: disable=undefined-variable self._last_state = tf.contrib.framework.nest.map_structure( lambda x: tf.Variable(lambda: tf.zeros_like(x), False), output.state) self._last_action = tf.Variable(tf.zeros_like( self._batch_env.action), False, name='last_action') self._last_mean = tf.Variable(tf.zeros_like( self._batch_env.action), False, name='last_mean') self._last_logstd = tf.Variable(tf.zeros_like( self._batch_env.action), False, name='last_logstd') self._penalty = tf.Variable(self._config.kl_init_penalty, False, dtype=tf.float32) self._optimizer = self._config.optimizer(self._config.learning_rate)
def run_optimizer(self): with tf.variable_scope('optimizer'): # target y variables for use in the loss function in algorithm 1 self.target_y = tf.placeholder(tf.float32, shape=[None], name="target_y") # chosen actions for use in the loss function in algorithm 1 self.chosen_actions = tf.placeholder(tf.int32, shape=[None], name="chosen_actions") # convert the chosen actions to a one-hot vector. self.chosen_actions_one_hot = tf.one_hot(self.chosen_actions, self.num_actions, on_value=1.0, off_value=0.0, axis=None, dtype=None, name="chosen_actions_one_hot") # The q value is that of the dot product of the prediction network # with the one-hot representation of the chosen action. this gives us a single chosen action # because reduce_sum will add this up over each of the indexes, all but one of which are non-zero self.predict_y = tf.reduce_sum(self.chosen_actions_one_hot * self.q_predictions, axis=1, #reduce along the second axis because we have batches name="predict_y") # Loss - Implement mean squared error between the target and prediction networks as the loss #self.loss = tf.reduce_mean(tf.square(tf.subtract(self.target_y, self.predict_y)), name="loss") #self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.predict_y, logits=self.target_y), name="loss") self.loss = tf.square(tf.subtract(self.target_y, self.predict_y)) #self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.predict_y, logits=self.target_y) if useLSTM and maskHalfLoss: self.maskA = tf.zeros([self.batch_size,self.trace_length//2]) self.maskB = tf.ones([self.batch_size,self.trace_length//2]) self.mask = tf.concat([self.maskA,self.maskB],1) self.mask = tf.reshape(self.mask,[-1]) self.loss = self.loss * self.mask #self.loss = tf.reduce_mean(self.loss * self.mask) self.loss = tf.reduce_mean(self.loss, name="loss") # Decay learning rate self.learning_rate = tf.maximum(self.learning_rate_min, tf.train.exponential_decay(self.learning_rate_init, self.global_step, self.learning_rate_decay_steps, self.learning_rate_decay)) # and pass it to the optimizer to train on this defined loss function #self.opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate, name="adam") self.opt = tf.train.RMSPropOptimizer(self.learning_rate, momentum=0.95, epsilon=0.01) #self.opt = tf.train.RMSPropOptimizer(self.learning_rate, 0.99, 0.0, 1e-6) # Clip the gradients if the option is enabled by breaking the optimizer's minimize() into a (get, clip, apply) set of operations if self.clip_gradients_enabled: gradients_and_variables = self.opt.compute_gradients(self.loss) if self.clip_gradients_by_global_norm: gradients, variables = zip(*gradients_and_variables) gradients, _ = tf.clip_by_global_norm(gradients, self.norm_gradient) clipped_gradients = zip(gradients, variables) else: clipped_gradients = [(self.clip_gradients(grad), var) for grad, var in gradients_and_variables] self.optimizer = self.opt.apply_gradients(clipped_gradients) # this increments global step else: self.optimizer = self.opt.minimize(self.loss)
#TODO: this pass if __name__ == "__main__": tf.compat.v1.random.set_random_seed(0) from tensorflow_manip import silence, toggle_cpu silence() utility.unitlength = 1 bz = 3 utility.scalefactor = utility.epsilon_water * utility.lB_water / utility.unitlength ion_dict = { interface.ion_pos_str: tf.random.uniform((50, 3), minval=-bz / 2, maxval=bz / 2), interface.ion_diameters_str: tf.ones((50, 3)), interface.ion_charges_str: tf.random.uniform((50, ), minval=-1, maxval=1) } simul_box = interface.Interface(salt_conc_in=0.5, salt_conc_out=0, salt_valency_in=1, salt_valency_out=1, bx=3, by=3, bz=bz, initial_ein=1, initial_eout=1) make_bins(simul_box, set_bin_width=0.05) print("number_of_bins", number_of_bins) sess = tf.compat.v1.Session()
def __init__(self, args, infer=False): # infer is set to true during sampling. self.args = args if infer: # Worry about one character at a time during sampling; no batching or BPTT. args.batch_size = 1 args.seq_length = 1 # Set cell_fn to the type of network cell we're creating -- RNN, GRU or LSTM. if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) # Call tensorflow library tensorflow-master/tensorflow/python/ops/rnn_cell # to create a layer of rnn_size cells of the specified basic type (RNN/GRU/LSTM). cell = cell_fn(args.rnn_size) # Use the same rnn_cell library to create a stack of these cells # of num_layers layers. Pass in a python list of these cells. # (The [cell] * arg.num_layers syntax literally duplicates cell multiple times in # a list. The syntax is such that [5, 6] * 3 would return [5, 6, 5, 6, 5, 6].) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers, state_is_tuple=True) # Create two TF placeholder nodes of 32-bit ints (NOT floats!), # each of shape batch_size x seq_length. This shape matches the batches # (listed in x_batches and y_batches) constructed in create_batches in utils.py. # input_data will receive input batches, and targets will be what it compares against # to calculate loss. self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) # Using the zero_state function in the RNNCell master class in rnn_cell library, # create a tensor of zeros such that we can swap it in for the network state at any time # to zero out the network's state. # State dimensions are: cell_fn state size (2 for LSTM) x rnn_size x num_layers. # So an LSTM network with 100 cells per layer and 3 layers would have a state size of 600, # and initial_state would have a dimension of none x 600. self.initial_state = self.cell.zero_state(args.batch_size, tf.float32) # Scope our new variables to the scope identifier string "rnnlm". with tf.variable_scope('rnnlm'): # Create new variable softmax_w and softmax_b for output. # softmax_w is a weights matrix from the top layer of the model (of size rnn_size) # to the vocabulary output (of size vocab_size). softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) # softmax_b is a bias vector of the ouput characters (of size vocab_size). softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) # [TODO: Why specify CPU? Same as the TF translation tutorial, but don't know why.] with tf.device("/cpu:0"): # Create new variable named 'embedding' to connect the character input to the base layer # of the RNN. Its role is the conceptual inverse of softmax_w. # It contains the trainable weights from the one-hot input vector to the lowest layer of RNN. embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) # Create an embedding tensor with tf.nn.embedding_lookup(embedding, self.input_data). # This tensor has dimensions batch_size x seq_length x rnn_size. # tf.split splits that embedding lookup tensor into seq_length tensors (along dimension 1). # Thus inputs is a list of seq_length different tensors, # each of dimension batch_size x 1 x rnn_size. inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data)) # Iterate through these resulting tensors and eliminate that degenerate second dimension of 1, # i.e. squeeze each from batch_size x 1 x rnn_size down to batch_size x rnn_size. # Thus we now have a list of seq_length tensors, each with dimension batch_size x rnn_size. inputs = [tf.squeeze(input_, [1]) for input_ in inputs] # THIS LOOP FUNCTION IS NEVER ACTUALLY USED. # IT IS EXPLICITLY NOT USED DURING TRAINING. # DURING INFERENCE, SEQ_LENGTH == 1, SO SEQ2SEQ.RNN_DECODER() ONLY USES THE LOOP ARGUMENT # ON SEQUENCE LENGTH ITEMS SUBSEQUENT TO THE FIRST. # This looping function is used as part of seq2seq.rnn_decoder only during sampling -- not training. # prev is a 2D Tensor of shape [batch_size x cell.output_size]. # returns a 2D Tensor of shape [batch_size x cell.input_size]. def loop(prev, _): # prev is initially the top cell state. # Convert the top cell state into character logits. prev = tf.matmul(prev, softmax_w) + softmax_b # Pull the character with the greatest logit (no sampling, just argmaxing). # WHY IS THIS ARGMAXING WHEN ACTUAL SAMPLING IS DONE PROBABILISTICALLY? # DOESN'T THIS CAUSE OUTPUTS NOT TO MATCH INPUTS DURING SEQUENCE GENERATION? prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) # Re-embed that symbol as the next step's input, and return that. return tf.nn.embedding_lookup(embedding, prev_symbol) # Set up a seq2seq decoder from the seq2seq.py library. # This constructs the outputs and states nodes of the network. # Outputs is a list (of len seq_length, same as inputs) of tensors of shape [batch_size x rnn_size]. # These are the raw output values of the top layer of the network at each time step. # They have NOT been fed through the decoder projection; they are still in network space, # not character space. # State is a tensor of shape [batch_size x cell.state_size]. # This is also the step where all of the trainable parameters for the LSTM (weights and biases) are defined. outputs, self.final_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') # tf.concat concatenates the output tensors along the rnn_size dimension, # to make a single tensor of shape [batch_size x (seq_length * rnn_size)]. # This gives the following 2D outputs matrix: # [(rnn output: batch 0, seq 0) (rnn output: batch 0, seq 1) ... (rnn output: batch 0, seq seq_len-1)] # [(rnn output: batch 1, seq 0) (rnn output: batch 1, seq 1) ... (rnn output: batch 1, seq seq_len-1)] # ... # [(rnn output: batch batch_size-1, seq 0) (rnn output: batch batch_size-1, seq 1) ... (rnn output: batch batch_size-1, seq seq_len-1)] # tf.reshape then reshapes it to a tensor of shape [(batch_size * seq_length) x rnn_size]. # Output will now be the following matrix: # [rnn output: batch 0, seq 0] # [rnn output: batch 0, seq 1] # ... # [rnn output: batch 0, seq seq_len-1] # [rnn output: batch 1, seq 0] # [rnn output: batch 1, seq 1] # ... # [rnn output: batch 1, seq seq_len-1] # ... # ... # [rnn output: batch batch_size-1, seq seq_len-1] # Note the following comment in rnn_cell.py: # Note: in many cases it may be more efficient to not use this wrapper, # but instead concatenate the whole sequence of your outputs in time, # do the projection on this batch-concatenated sequence, then split it # if needed or directly feed into a softmax. output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size]) # Obtain logits node by applying output weights and biases to the output tensor. # Logits is a tensor of shape [(batch_size * seq_length) x vocab_size]. # Recall that outputs is a 2D tensor of shape [(batch_size * seq_length) x rnn_size], # and softmax_w is a 2D tensor of shape [rnn_size x vocab_size]. # The matrix product is therefore a new 2D tensor of [(batch_size * seq_length) x vocab_size]. # In other words, that multiplication converts a loooong list of rnn_size vectors # to a loooong list of vocab_size vectors. # Then add softmax_b (a single vocab-sized vector) to every row of that list. # That gives you the logits! self.logits = tf.matmul(output, softmax_w) + softmax_b # Convert logits to probabilities. Probs isn't used during training! That node is never calculated. # Like logits, probs is a tensor of shape [(batch_size * seq_length) x vocab_size]. # During sampling, this means it is of shape [1 x vocab_size]. self.probs = tf.nn.softmax(self.logits) # seq2seq.sequence_loss_by_example returns 1D float Tensor containing the log-perplexity # for each sequence. (Size is batch_size * seq_length.) # Targets are reshaped from a [batch_size x seq_length] tensor to a 1D tensor, of the following layout: # target character (batch 0, seq 0) # target character (batch 0, seq 1) # ... # target character (batch 0, seq seq_len-1) # target character (batch 1, seq 0) # ... # These targets are compared to the logits to generate loss. # Logits: instead of a list of character indices, it's a list of character index probability vectors. # seq2seq.sequence_loss_by_example will do the work of generating losses by comparing the one-hot vectors # implicitly represented by the target characters against the probability distrutions in logits. # It returns a 1D float tensor (a vector) where item i is the log-perplexity of # the comparison of the ith logit distribution to the ith one-hot target vector. loss = seq2seq.sequence_loss_by_example([self.logits], # logits: 1-item list of 2D Tensors of shape [batch_size x vocab_size] [tf.reshape(self.targets, [-1])], # targets: 1-item list of 1D batch-sized int32 Tensors of the same length as logits [tf.ones([args.batch_size * args.seq_length])], # weights: 1-item list of 1D batch-sized float-Tensors of the same length as logits args.vocab_size) # num_decoder_symbols: integer, number of decoder symbols (output classes) # Cost is the arithmetic mean of the values of the loss tensor # (the sum divided by the total number of elements). # It is a single-element floating point tensor. This is what the optimizer seeks to minimize. self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length # Create a summary for our cost. tf.scalar_summary("cost", self.cost) # Create a node to track the learning rate as it decays through the epochs. self.lr = tf.Variable(args.learning_rate, trainable=False) self.global_epoch_fraction = tf.Variable(0.0, trainable=False) self.global_seconds_elapsed = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() # tvars is a python list of all trainable TF Variable objects. # tf.gradients returns a list of tensors of length len(tvars) where each tensor is sum(dy/dx). grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) # Use ADAM optimizer with the current learning rate. # Zip creates a list of tuples, where each tuple is (variable tensor, gradient tensor). # Training op nudges the variables along the gradient, with the given learning rate, using the ADAM optimizer. # This is the op that a training session should be instructed to perform. self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.summary_op = tf.merge_all_summaries()
import tensorflowvisu import math import matplotlib.pyplot as plt from tensorflow.examples.tutorials.mnist import input_data as mnist_data # load data mnist = mnist_data.read_data_sets("data", one_hot=True, reshape=False, validation_size=0) # tf.placeholder(dtype, shape=None, name=None) just defines a process X=tf.placeholder(tf.float32, [None, 28,28,1]) # correct answers Y_=tf.placeholder(tf.float32, [None, 10]) # weights tensor W1 = tf.Variable(tf.truncated_normal([5, 5, 1, 4], stddev=0.1)) B1 = tf.Variable(tf.ones([4])/10) W3 = tf.Variable(tf.truncated_normal([14 * 14 * 4, 200], stddev=0.1)) B3 = tf.Variable(tf.ones([200])/10) W4 = tf.Variable(tf.truncated_normal([200, 10], stddev=0.1)) B4 = tf.Variable(tf.ones([10])/10) stride = 1 #---------------------------------------- Experiment 1 --------------------------------------------- Y1 = tf.nn.relu(tf.nn.conv2d(X, W1, strides=[1, stride, stride, 1], padding='SAME') + B1) Y2 = tf.layers.max_pooling2d(inputs=Y1, pool_size=[2, 2], strides=2) Y3 = tf.nn.relu(tf.matmul(tf.reshape(Y2, shape=[-1, 14 * 14 * 4]), W3) + B3) #---------------------------------------- Experiment 2 --------------------------------------------- #Y1 = tf.nn.sigmoid(tf.nn.conv2d(X, W1, strides=[1, stride, stride, 1], padding='SAME') + B1) #Y2 = tf.layers.max_pooling2d(inputs=Y1, pool_size=[2, 2], strides=2)
def __init__( self, mode, batch_size, audio_context_radius, audio_nbands, audio_nchannels, nfeats, cnn_filter_shapes, cnn_init, cnn_pool, cnn_rnn_zack, rnn_cell_type, rnn_size, rnn_nlayers, rnn_init, rnn_nunroll, rnn_keep_prob, dnn_sizes, dnn_init, dnn_keep_prob, dnn_nonlin, target_weight_strategy, # 'rect', 'last', 'pos', 'seq' grad_clip, opt, export_feat_name=None, zack_hack=0): audio_context_len = audio_context_radius * 2 + 1 mode = mode do_cnn = len(cnn_filter_shapes) > 0 do_rnn = rnn_size > 0 and rnn_nlayers > 0 do_dnn = len(dnn_sizes) > 0 if not do_rnn: assert rnn_nunroll == 1 if cnn_rnn_zack: assert audio_context_len == 1 assert zack_hack > 0 and zack_hack % 2 == 0 export_feat_tensors = {} # Input tensors feats_audio_nunroll = tf.placeholder(dtype, shape=[ batch_size, rnn_nunroll + zack_hack, audio_context_len, audio_nbands, audio_nchannels ], name='feats_audio') feats_other_nunroll = tf.placeholder( dtype, shape=[batch_size, rnn_nunroll, nfeats], name='feats_other') print 'feats_audio: {}'.format(feats_audio_nunroll.get_shape()) print 'feats_other: {}'.format(feats_other_nunroll.get_shape()) if mode != 'gen': targets_nunroll = tf.placeholder(dtype, shape=[batch_size, rnn_nunroll]) # TODO: tf.ones acts as an overridable placeholder but this is still awkward target_weights_nunroll = tf.ones([batch_size, rnn_nunroll], dtype) # Reshape input tensors to remove nunroll dim; will briefly restore later during RNN if necessary if cnn_rnn_zack: feats_audio = tf.reshape(feats_audio_nunroll, shape=[ batch_size, rnn_nunroll + zack_hack, audio_nbands, audio_nchannels ]) else: feats_audio = tf.reshape(feats_audio_nunroll, shape=[ batch_size * rnn_nunroll, audio_context_len, audio_nbands, audio_nchannels ]) feats_other = tf.reshape(feats_other_nunroll, shape=[batch_size * rnn_nunroll, nfeats]) if mode != 'gen': targets = tf.reshape(targets_nunroll, shape=[batch_size * rnn_nunroll]) target_weights = tf.reshape(target_weights_nunroll, shape=[batch_size * rnn_nunroll]) # CNN cnn_output = feats_audio if do_cnn: layer_last = feats_audio nfilt_last = audio_nchannels for i, ((ntime, nband, nfilt), (ptime, pband)) in enumerate(zip(cnn_filter_shapes, cnn_pool)): layer_name = 'cnn_{}'.format(i) with tf.variable_scope(layer_name): filters = tf.get_variable( 'filters', [ntime, nband, nfilt_last, nfilt], initializer=cnn_init, dtype=dtype) biases = tf.get_variable( 'biases', [nfilt], initializer=tf.constant_initializer(0.1), dtype=dtype) if cnn_rnn_zack: padding = 'SAME' else: padding = 'VALID' conv = tf.nn.conv2d(layer_last, filters, [1, 1, 1, 1], padding=padding) biased = tf.nn.bias_add(conv, biases) convolved = tf.nn.relu(biased) pool_shape = [1, ptime, pband, 1] pooled = tf.nn.max_pool(convolved, ksize=pool_shape, strides=pool_shape, padding='SAME') print '{}: {}'.format(layer_name, pooled.get_shape()) export_feat_tensors[layer_name] = pooled # TODO: CNN dropout? layer_last = pooled nfilt_last = nfilt cnn_output = layer_last # Flatten CNN and concat with other features zack_hack_div_2 = 0 if cnn_rnn_zack: zack_hack_div_2 = zack_hack // 2 cnn_output = tf.slice(cnn_output, [0, zack_hack_div_2, 0, 0], [-1, rnn_nunroll, -1, -1]) nfeats_conv = reduce(lambda x, y: x * y, [int(x) for x in cnn_output.get_shape()[-2:]]) else: nfeats_conv = reduce(lambda x, y: x * y, [int(x) for x in cnn_output.get_shape()[-3:]]) feats_conv = tf.reshape(cnn_output, [batch_size * rnn_nunroll, nfeats_conv]) nfeats_tot = nfeats_conv + nfeats feats_all = tf.concat([feats_conv, feats_other], 1) print 'feats_cnn: {}'.format(feats_conv.get_shape()) print 'feats_all: {}'.format(feats_all.get_shape()) # Project to RNN size rnn_output = feats_all rnn_output_size = nfeats_tot if do_rnn: with tf.variable_scope('rnn_proj'): rnn_proj_w = tf.get_variable( 'W', [nfeats_tot, rnn_size], initializer=tf.uniform_unit_scaling_initializer( factor=1.0, dtype=dtype), dtype=dtype) rnn_proj_b = tf.get_variable( 'b', [rnn_size], initializer=tf.constant_initializer(0.0), dtype=dtype) rnn_inputs = tf.nn.bias_add(tf.matmul(feats_all, rnn_proj_w), rnn_proj_b) rnn_inputs = tf.reshape(rnn_inputs, [batch_size, rnn_nunroll, rnn_size]) rnn_inputs = tf.split(rnn_inputs, rnn_nunroll, axis=1) rnn_inputs = [tf.squeeze(input_, [1]) for input_ in rnn_inputs] if rnn_cell_type == 'rnn': cell_fn = tf.nn.rnn_cell.BasicRNNCell elif rnn_cell_type == 'gru': cell_fn = tf.nn.rnn_cell.GRUCell elif rnn_cell_type == 'lstm': cell_fn = tf.nn.rnn_cell.BasicLSTMCell else: raise NotImplementedError() cell = cell_fn(rnn_size) if mode == 'train' and rnn_keep_prob < 1.0: cell = tf.nn.rnn_cell.DropoutWrapper( cell, output_keep_prob=rnn_keep_prob) if rnn_nlayers > 1: cell = tf.nn.rnn_cell.MultiRNNCell([cell] * rnn_nlayers) initial_state = cell.zero_state(batch_size, dtype) # RNN # TODO: weight init with tf.variable_scope('rnn_unroll'): state = initial_state outputs = [] for i in xrange(rnn_nunroll): if i > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(rnn_inputs[i], state) outputs.append(cell_output) final_state = state rnn_output = tf.reshape(tf.concat(outputs, axis=1), [batch_size * rnn_nunroll, rnn_size]) rnn_output_size = rnn_size print 'rnn_output: {}'.format(rnn_output.get_shape()) # Dense NN dnn_output = rnn_output dnn_output_size = rnn_output_size if do_dnn: last_layer = rnn_output last_layer_size = rnn_output_size for i, layer_size in enumerate(dnn_sizes): layer_name = 'dnn_{}'.format(i) with tf.variable_scope(layer_name): dnn_w = tf.get_variable( 'W', shape=[last_layer_size, layer_size], initializer=dnn_init, dtype=dtype) dnn_b = tf.get_variable( 'b', shape=[layer_size], initializer=tf.constant_initializer(0.0), dtype=dtype) projected = tf.nn.bias_add(tf.matmul(last_layer, dnn_w), dnn_b) # TODO: argument nonlinearity, change bias to 0.1 if relu if dnn_nonlin == 'tanh': last_layer = tf.nn.tanh(projected) elif dnn_nonlin == 'sigmoid': last_layer = tf.nn.sigmoid(projected) elif dnn_nonlin == 'relu': last_layer = tf.nn.relu(projected) else: raise NotImplementedError() if mode == 'train' and dnn_keep_prob < 1.0: last_layer = tf.nn.dropout(last_layer, dnn_keep_prob) last_layer_size = layer_size print '{}: {}'.format(layer_name, last_layer.get_shape()) export_feat_tensors[layer_name] = last_layer dnn_output = last_layer dnn_output_size = last_layer_size # Logistic regression with tf.variable_scope('logit') as scope: logit_w = tf.get_variable( 'W', shape=[dnn_output_size, 1], initializer=tf.truncated_normal_initializer(stddev=1.0 / dnn_output_size, dtype=dtype), dtype=dtype) logit_b = tf.get_variable('b', shape=[1], initializer=tf.constant_initializer(0.0), dtype=dtype) logits = tf.squeeze(tf.nn.bias_add(tf.matmul(dnn_output, logit_w), logit_b), squeeze_dims=[1]) prediction = tf.nn.sigmoid(logits) prediction_inspect = tf.reshape(prediction, [batch_size, rnn_nunroll]) prediction_final = tf.squeeze(tf.slice(prediction_inspect, [0, rnn_nunroll - 1], [-1, 1]), squeeze_dims=[1]) print 'logit: {}'.format(logits.get_shape()) # Compute loss if mode != 'gen': neg_log_lhoods = tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=targets) if target_weight_strategy == 'rect': avg_neg_log_lhood = tf.reduce_mean(neg_log_lhoods) else: neg_log_lhoods = tf.multiply(neg_log_lhoods, target_weights) # be careful to have at least one weight be nonzero # should we be taking the mean elem-wise by batch? i think this is a big bug avg_neg_log_lhood = tf.reduce_sum( neg_log_lhoods) / tf.reduce_sum(target_weights) neg_log_lhoods_inspect = tf.reshape(neg_log_lhoods, [batch_size, rnn_nunroll]) # Train op if mode == 'train': lr = tf.Variable(0.0, trainable=False) self._lr = lr self._lr_summary = tf.summary.scalar('learning_rate', self._lr) tvars = tf.trainable_variables() grads = tf.gradients(avg_neg_log_lhood, tvars) if grad_clip > 0.0: grads, _ = tf.clip_by_global_norm(grads, grad_clip) if opt == 'sgd': optimizer = tf.train.GradientDescentOptimizer(lr) else: raise NotImplementedError() train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.contrib.framework.get_or_create_global_step()) # Tensor exports self.feats_audio = feats_audio_nunroll self.feats_other = feats_other_nunroll if export_feat_name: self.feats_export = export_feat_tensors[export_feat_name] self.prediction = prediction_inspect self.prediction_final = prediction_final if mode != 'gen': self.neg_log_lhoods = neg_log_lhoods_inspect self.avg_neg_log_lhood = avg_neg_log_lhood self.targets = targets_nunroll self.target_weights = target_weights_nunroll if mode == 'train': self.train_op = train_op if mode != 'train' and do_rnn: self.initial_state = initial_state self.final_state = final_state self.zack_hack_div_2 = zack_hack_div_2 self.mode = mode self.batch_size = batch_size self.rnn_nunroll = rnn_nunroll self.do_rnn = do_rnn self.target_weight_strategy = target_weight_strategy
def _get_start_token_ids(self, tensor_for_shape): start_token_id = 2 batch_size = utils.get_shape_list(tensor_for_shape)[0] return tf.ones([batch_size], dtype=tf.int32) * start_token_id
def _one_step(inputs, targets, batch_idx): probs, value = agent(inputs) policy_map = tf.identity(probs) #policy_map[policy_map<0.5] = 0.0 #policy_map[policy_map>=0.5] = 1.0 policy_map = tf.cast(policy_map >= 0.5, tf.float32) policy_map = tf.Variable(policy_map) probs = probs * args.alpha + (1 - probs) * (1 - args.alpha) distr = tfp.distributions.Bernoulli(probs=probs) #Bernoulli(probs) policy = distr.sample() if args.cl_step < num_blocks: ''' policy[:, :-args.cl_step] = 1 policy_map[:, :-args.cl_step] = 1 ''' policy_ = tf.transpose(policy) p_idx = tf.constant([[i] for i in range(policy_.shape[0] - args.cl_step)]) p_update = tf.ones_like(policy_[:-args.cl_step, :]) policy = tf.transpose( tf.tensor_scatter_nd_update(policy_, p_idx, p_update)) policy_map_ = tf.transpose(policy_map) pm_idx = tf.constant( [[i] for i in range(policy_map_.shape[0] - args.cl_step)]) pm_update = tf.ones_like(policy_map_[:-args.cl_step, :]) policy_map = tf.transpose( tf.tensor_scatter_nd_update(policy_map_, pm_idx, pm_update)) #policy_mask = Variable(torch.ones(inputs.size(0), policy.size(1))).cuda() policy_mask = tf.Variable(tf.ones([inputs.shape[0], policy.shape[1]])) policy_mask_ = tf.transpose(policy_mask) pmask_idx = tf.constant( [[i] for i in range(policy_mask_.shape[0] - args.cl_step)]) pmask_update = tf.ones_like(policy_mask_[:-args.cl_step, :]) policy_mask = tf.transpose( tf.tensor_scatter_nd_update(policy_mask_, pmask_idx, pmask_update)) else: policy_mask = None v_inputs = tf.Variable(inputs) preds_map = rnet.forward(v_inputs, policy_map) preds_sample = rnet.forward(v_inputs, policy) reward_map, _ = get_reward(preds_map, targets, policy_map) reward_sample, match = get_reward(preds_sample, targets, policy) advantage = reward_sample - reward_map loss = -distr.log_prob(policy) #loss = loss * tf.Variable(advantage).expand_as(policy) loss = loss * tf.broadcast_to(tf.Variable(advantage), policy.shape) if policy_mask is not None: loss = policy_mask * loss # mask for curriculum learning loss = tf.reduce_sum(loss) probs = tf.clip_by_value(probs, clip_value_min=1e-15, clip_value_max=1 - 1e-15) # probs.clamp(1e-15, 1-1e-15) entropy_loss = tf.math.negative(probs) * tf.math.log( probs) # -probs*torch.log(probs) entropy_loss = args.beta * tf.math.reduce_sum(entropy_loss) loss = (loss - entropy_loss) / inputs.shape[0] return loss, match, reward_sample, policy
def labels_real(self): return slim.one_hot_encoding(tf.ones(shape=(self.batch_size,), dtype=tf.int64), 2)
def create_look_ahead_mask(self, seq): seq_len = tf.shape(seq)[1] look_ahead_mask = 1 - tf.linalg.band_part(tf.ones( (seq_len, seq_len)), -1, 0) return look_ahead_mask
def __call__(self, x, prev_state): #we put the data in the previous state and first the input #so the whole system is to make the NTM keep remebering what it did. First it will add the input to the previous read memory . #it's like we get something out that . how the last seen case intracted with the memory. prev_read_vector_list = prev_state['read_vector_list'] # read vector in Sec 3.1 (the content that is # read out, length = memory_vector_dim) prev_controller_state = prev_state['controller_state'] # state of controller (LSTM hidden state) #can take as the controller state RNN # x + prev_read_vector -> controller (RNN) -> controller_output controller_input = tf.concat([x] + prev_read_vector_list, axis=1) #here add the input 9 bit vector to read vector from the previous memorty locations with tf.variable_scope('controller', reuse=self.reuse): controller_output, controller_state = self.controller(controller_input, prev_controller_state) #send through the lstm . Controller output contains (batchsize * 128(rnn hidden num)) # controller_output -> k (dim = memory_vector_dim, compared to each vector in M, Sec 3.1) # -> beta (positive scalar, key strength, Sec 3.1) -> w^c # -> g (scalar in (0, 1), blend between w_prev and w^c, Sec 3.2) -> w^g # -> s (dim = shift_range * 2 + 1, shift weighting, Sec 3.2) -> w^~ # (not memory_size, that's too wide) # -> gamma (scalar (>= 1), sharpen the final result, Sec 3.2) -> w * num_heads # controller_output -> erase, add vector (dim = memory_vector_dim, \in (0, 1), Sec 3.2) * write_head_num num_parameters_per_head = self.memory_vector_dim + 1 + 1 + (self.shift_range * 2 + 1) + 1 #here memory vec dim is equal to the kt . That is the veecto we are producing doing content based matching num_heads = self.read_head_num + self.write_head_num #two heads should predict these things total_parameter_num = num_parameters_per_head * num_heads + self.memory_vector_dim * 2 * self.write_head_num with tf.variable_scope("o2p", reuse=(self.step > 0) or self.reuse): o2p_w = tf.get_variable('o2p_w', [controller_output.get_shape()[1], total_parameter_num], #prediction layer weight initializer=tf.random_normal_initializer(mean=0.0, stddev=0.5)) o2p_b = tf.get_variable('o2p_b', [total_parameter_num], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.5)) parameters = tf.nn.xw_plus_b(controller_output, o2p_w, o2p_b) #computer the output multiply by weights and add bias head_parameter_list = tf.split(parameters[:, :num_parameters_per_head * num_heads], num_heads, axis=1) #spliting parameters erase_add_list = tf.split(parameters[:, num_parameters_per_head * num_heads:], 2 * self.write_head_num, axis=1) #these are earase and add when used in reading and writing #above thing is the erase content before finaly updatig the memory vector. this is element vice operation so this equal to the dimentions of the one memroy vector # k, beta, g, s, gamma -> w prev_w_list = prev_state['w_list'] # vector of weightings (blurred address) initialized softmax scores for each memory location we use this for the interpolation prev_M = prev_state['M'] #previous memory matrix w_list = [] #weight list p_list = [] #paramter list for i, head_parameter in enumerate(head_parameter_list): #read right head weights #this is interating only two times one for the read and one for the write params #here the same read and wright weight heads are distributed between read and write vector parts # Some functions to constrain the result in specific range # exp(x) -> x > 0 # sigmoid(x) -> x \in (0, 1) # softmax(x) -> sum_i x_i = 1 # log(exp(x) + 1) + 1 -> x > 1 #if there are two read head s and two write heads this will ierate for 4 times k = tf.tanh(head_parameter[:, 0:self.memory_vector_dim]) #getting the K values inoder to calculate the content based similarity . This is for the cosine simelaritY beta = tf.sigmoid(head_parameter[:, self.memory_vector_dim]) * 10 # do not use exp, it will explode! this use in content based attention to attenuate the normalizer g = tf.sigmoid(head_parameter[:, self.memory_vector_dim + 1]) #this is the interpolation gate . Tis use to update current wcontent based weights with previous ontest based wieghts s = tf.nn.softmax( #these parameters are for the shift and direct access head_parameter[:, self.memory_vector_dim + 2:self.memory_vector_dim + 2 + (self.shift_range * 2 + 1)] #this is to deside whether and by how much we need to rorate the weifjts ) #if the shift is one u can be at the same place of move to left or right so there should be 3 parameters they do circular convolution gamma = tf.log(tf.exp(head_parameter[:, -1]) + 1) + 1 #this is for the shaprnenning with tf.variable_scope('addressing_head_%d' % i): w = self.addressing(k, beta, g, s, gamma, prev_M, prev_w_list[i]) # Figure 2 This complte the adressing and update the weights w_list.append(w) #for each read and write head we need this p_list.append({'k': k, 'beta': beta, 'g': g, 's': s, 'gamma': gamma}) #ths is the parameter list # Reading (Sec 3.1) read_w_list = w_list[:self.read_head_num] #these are the final reading vector list after all content and location based adressing read_vector_list = [] for i in range(self.read_head_num):#we have one read head #if we have two heads we get two read vector which we will be interpolate with the x when putting the input to the lstm controller read_vector = tf.reduce_sum(tf.expand_dims(read_w_list[i], dim=2) * prev_M, axis=1) #multiply the read vector weights with the previus mememory this is the output of the ntm cell read_vector_list.append(read_vector) #only one read vector # Writing (Sec 3.2) write_w_list = w_list[self.read_head_num:] #get the write vectors same as above if we have final write head paraeters M = prev_M for i in range(self.write_head_num): #updating the memory vector #only one head w = tf.expand_dims(write_w_list[i], axis=2) #here the writing weight also goes throug locations based and everything as the reading weights . erase_vector = tf.expand_dims(tf.sigmoid(erase_add_list[i * 2]), axis=1) #get the erase vector inbetween 0 and 1 add_vector = tf.expand_dims(tf.tanh(erase_add_list[i * 2 + 1]), axis=1) #write vector between -1 and + 1 M = M * (tf.ones(M.get_shape()) - tf.matmul(w, erase_vector)) + tf.matmul(w, add_vector) #update the memory vector # controller_output -> NTM output if not self.output_dim: output_dim = x.get_shape()[1] #input shape at one time step else: output_dim = self.output_dim with tf.variable_scope("o2o", reuse=(self.step > 0) or self.reuse): o2o_w = tf.get_variable('o2o_w', [controller_output.get_shape()[1], output_dim], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.5)) o2o_b = tf.get_variable('o2o_b', [output_dim], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.5)) NTM_output = tf.nn.xw_plus_b(controller_output, o2o_w, o2o_b) state = { 'controller_state': controller_state, #hidden state 'read_vector_list': read_vector_list, #this has the sum of the read vector list in the section 3.1 'w_list': w_list, #this has the normalized weight list 'p_list': p_list, #paramter list got updated 'M': M #memory vector got updated } self.step += 1 return NTM_output, state
def reconstruction_loss(self, gt, fake): eps = tf.ones(gt.get_shape()) eps = 1e-6*eps loss = tf.reduce_mean(tf.sqrt(tf.square(fake-gt)+eps)) return loss
# Y = tf.compat.v1.placeholder(tf.float32, (None, sequence_length)) Y = tf.compat.v1.placeholder(tf.int32, (None, sequence_length)) # (None, 6) print(X) # 텐서의 형태 출력 / 값 나올라면 sess.run print(Y) # 2. 모델 구성 # model.add(LSTM(output, input_dim=(6, 5))) # cell = tf.nn.rnn_cell.BasicLSTMCell(output) cell = tf.keras.layers.LSTMCell(output) hypothesis, _states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32) # model.add(LSTM) print(hypothesis) # (?, 6, 5) # 3-1. 컴파일 weights = tf.ones([batch_size, sequence_length]) # [1, 6] sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=hypothesis, targets=Y, weights=weights) print('weights : ', weights) # Tensor("ones:0", shape=(1, 6), dtype=float32) cost = tf.reduce_mean(sequence_loss) # train = tf.train.AdamOptimizer(learning_rate=0.1).minimize(loss) train = tf.compat.v1.train.AdamOptimizer(learning_rate=0.1).minimize(cost) print("끗") prediction = tf.argmax(hypothesis, axis=2) print(prediction)
def GGXtf(maps): def GGXpxl(V,L,N,albedo,metallic,rough): albedo = albedo metallic = tf.reduce_mean(metallic,axis = -1)# set to single value rough= rough**2 H = normalisation(V+L) VdotH = tf.maximum(tf.reduce_sum(V*H,axis = -1),0) NdotH = tf.maximum(tf.reduce_sum(N*H,axis = -1),0) NdotV = tf.maximum(tf.reduce_sum(V*N,axis = -1),0) NdotL = tf.maximum(tf.reduce_sum(N*L,axis = -1),0) F = metallic+ (1 - metallic) * (1 - VdotH)**5 NDF = 1 / (PI*rough*pow(NdotH,4.0))*tf.exp((NdotH * NdotH - 1.0) / (rough * NdotH * NdotH)) G = tf.minimum( 2*NdotH*NdotV/VdotH, 2*NdotH*NdotL/VdotH) G = tf.minimum(tf.cast(1,dtype = tf.float32) , G) nominator = NDF* G * F denominator = 4 * NdotV * NdotL + 0.001 specular = nominator / denominator diffuse = (1-metallic)[:,:,None] * albedo / PI *NdotL[:,:,None] reflection = specular * NdotL*4 #* radiance reflection = tf.reshape(reflection,(256,256,1)) color = tf.concat([reflection,reflection,reflection],-1) + diffuse*1 return color**(1/1.8) maps = tf.squeeze(maps) lightpos = tf.constant([288,288,200],dtype = tf.float32) viewpos = tf.constant([143,143,288],dtype = tf.float32) albedomap, specularmap, normalinmap, roughnessmap = process(maps) shapex = 256 shapey = 256 x = np.linspace(0,shapex-1,shapex) y = np.linspace(0,shapey-1,shapey) xx,yy = tf.meshgrid(x,y) xx = tf.cast(tf.reshape(xx ,(shapex,shapey,1)),dtype = tf.float32) yy = tf.cast(tf.reshape(yy ,(shapex,shapey,1)),dtype = tf.float32) padd0 = tf.reshape(tf.zeros([shapex,shapey],dtype = tf.float32) ,(shapex,shapey,1)) padd1 = tf.reshape(tf.ones ([shapex,shapey],dtype = tf.float32),(shapex,shapey,1)) fragpos = tf.concat([xx,yy,padd0],axis = -1) N = normalisation(tf.concat([normalinmap,padd1],axis = -1)) V = normalisation(viewpos - fragpos) L = normalisation(lightpos - fragpos) ''' rough = tf.expand_dims(roughnessmap,axis=-1) title = ['view','light','albedo', 'specular', 'normal','roughness'] display_list=[V,L, albedomap, specularmap, N, rough] for i in range(len(display_list)): plt.subplot(1, len(display_list), i+1) plt.title(title[i]) plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i])) plt.axis('off') plt.show() ''' imgout = GGXpxl(V ,L , N, albedomap,specularmap,roughnessmap) return imgout
def mask_attn_weights(w): n = shape_list(w)[-1] b = tf.matrix_band_part(tf.ones([n, n]), -1, 0) b = tf.reshape(b, [1, 1, n, n]) w = w * b + -1e9 * (1 - b) return w
dx = 5 dy = 3 N = 10 np.random.seed(0) X_data = np.random.uniform(size=[N,dx]).astype('float32') W_gt = np.random.uniform(size=[dx,dy]).astype('float32') Y_data = np.matmul(X_data, W_gt) + 0.001*np.random.randn(N,dy) X = tf.placeholder(tf.float32, [None, dx]) Y = tf.placeholder(tf.float32, [None, dy]) # Set up Estimate W1_hat = tf.Variable(tf.ones([dx,dx])) W2_hat = tf.Variable(tf.ones([dx,dy])) # Cost function is the sqaured test error W_hat = tf.matmul(W1_hat, W2_hat) Y_hat = tf.matmul(X, W_hat) cost = tf.reduce_mean(tf.square(Y - Y_hat)) # Set up Estimate W1_hatA = tf.Variable(tf.ones([dx,dx])) W2_hatA = tf.Variable(tf.ones([dx,dy])) # Cost function is the sqaured test error W_hatA = tf.matmul(W1_hatA, W2_hatA) Y_hatA = tf.matmul(X, W_hatA)
'weights') biases = tf.Variable(tf.zeros([output_dim]), 'biases') pre_activation=tf.matmul(inputs, weights) + biases if dropout: outputs=tf.nn.dropout(pre_activation,keep_prob) else: outputs = pre_activation return outputs # In[8]: #reshape the flat data to 3-D and add the data augmentation n_inputs=tf.reshape(inputs,[50,32,32,3]) # reshape image # In[9]: scale1 = tf.Variable(tf.ones([32])) beta1 = tf.Variable(tf.zeros([32])) scale = tf.Variable(tf.ones([64])) beta = tf.Variable(tf.zeros([64]))#offset epsilon = 1e-3 with tf.name_scope('conv-layer-1'): W_conv1=weight_variable('w_conv71',[5,5,3,64])# patch = 5, in size = 3, out size= 64 conv1 = conv_layer(n_inputs,W_conv1,[1,1,1,1],64,dropout=True,keep_prob=keep_prob_conv)# 32*32*64 batch_mean1,batch_var1 = tf.nn.moments(conv1,[0,1,2]) norm1 = tf.nn.batch_normalization(conv1,batch_mean1,batch_var1,beta,scale,epsilon) activ1 = tf.nn.elu(norm1) pool1 = tf.nn.avg_pool(activ1,ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1],padding='SAME', name='pool1') with tf.name_scope('conv-layer-2'): W_conv2 = weight_variable('w_conv72',[5,5,64,64])# patch=5 in size=64, out size=64
def ones(shape, dtype=tf.float32, scope='default'): with tf.variable_scope(scope): init = tf.ones(shape, dtype=dtype) return tf.Variable(init)
def write_metric(self, metric, step): tf.summary.scalar("loss/actor_loss", metric['actor_loss'], step) tf.summary.scalar("loss/critic_loss", metric['critic_loss'], step) if __name__ == "__main__": from crazycar.algos_tf.encoder import Combine from crazycar.utils import set_seed from crazycar.agents.constants import SENSOR_SHAPE, CAMERA_SHAPE set_seed() tmp = { "sensor": tf.ones((1, ) + SENSOR_SHAPE), "image": tf.ones((1, ) + CAMERA_SHAPE) } agent = DDPG(Combine, 2) # # # agent.actor_target.hard_update(agent.actor) # p1 = agent.critic(tmp, np.zeros((1, 2), dtype='float32')) # p2 = agent.critic_target(tmp, np.zeros((1, 2), dtype='float32')) # print(p1) # print(p2) print(len(agent.actor.trainable_variables)) print(len(agent.actor_target.trainable_variables))
# 获取模型的输入,目标以及学习率节点,这些都是tf的placeholder input_text, targets, lr = build_inputs() # 输入数据的shape input_data_shape = tf.shape(input_text) #获得这个位置变量的shape cell, initial_state = build_lstm(input_data_shape[0], rnn_size) #这个input_data_shape[0] 指的是batch_size 也就是一个batch里有几行字符串 logits, final_state=build_outputs(cell,rnn_size,input_text,vocab_size,embed_dim) #这里的rnnsize为 神经元个数 out_vlaue=tf.nn.softmax(logits, name='probs') cost = seq2seq.sequence_loss( logits, targets, tf.ones([input_data_shape[0], input_data_shape[1]])) #损失函数 optimizer = tf.train.AdamOptimizer(lr) #梯度下降 gradients = optimizer.compute_gradients(cost) # 裁剪一下Gradient输出,最后的gradient都在[-1, 1]的范围内 capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None] train_op = optimizer.apply_gradients(capped_gradients) #训练模型 batches = get_batches(int_text, batch_size, seq_length) #获取所有的mini数据集 # 打开session开始训练,将上面创建的graph对象传递给session with tf.Session(graph=train_graph) as sess: sess.run(tf.global_variables_initializer()) #使所有变量定义 for epoch_i in range(num_epochs):
def main(): """Entrypoint. """ # Load data train_data, dev_data, test_data = data_utils.load_data_numpy( config_data.input_dir, config_data.filename_prefix) with open(config_data.vocab_file, 'rb') as f: id2w = pickle.load(f) vocab_size = len(id2w) beam_width = config_model.beam_width # Create logging tx.utils.maybe_create_dir(FLAGS.model_dir) logging_file = os.path.join(FLAGS.model_dir, 'logging.txt') logger = utils.get_logger(logging_file) print('logging file is saved in: %s', logging_file) # Build model graph encoder_input = tf.placeholder(tf.int64, shape=(None, None)) decoder_input = tf.placeholder(tf.int64, shape=(None, None)) batch_size = tf.shape(encoder_input)[0] # (text sequence length excluding padding) encoder_input_length = tf.reduce_sum( 1 - tf.cast(tf.equal(encoder_input, 0), tf.int32), axis=1) labels = tf.placeholder(tf.int64, shape=(None, None)) is_target = tf.cast(tf.not_equal(labels, 0), tf.float32) global_step = tf.Variable(0, dtype=tf.int64, trainable=False) learning_rate = tf.placeholder(tf.float64, shape=(), name='lr') # Source word embedding src_word_embedder = tx.modules.WordEmbedder(vocab_size=vocab_size, hparams=config_model.emb) src_word_embeds = src_word_embedder(encoder_input) src_word_embeds = src_word_embeds * config_model.hidden_dim**0.5 # Position embedding (shared b/w source and target) pos_embedder = tx.modules.SinusoidsPositionEmbedder( position_size=config_data.max_decoding_length, hparams=config_model.position_embedder_hparams) src_seq_len = tf.ones([batch_size], tf.int32) * tf.shape(encoder_input)[1] src_pos_embeds = pos_embedder(sequence_length=src_seq_len) src_input_embedding = src_word_embeds + src_pos_embeds encoder = TransformerEncoder(hparams=config_model.encoder) encoder_output = encoder(inputs=src_input_embedding, sequence_length=encoder_input_length) # The decoder ties the input word embedding with the output logit layer. # As the decoder masks out <PAD>'s embedding, which in effect means # <PAD> has all-zero embedding, so here we explicitly set <PAD>'s embedding # to all-zero. tgt_embedding = tf.concat([ tf.zeros(shape=[1, src_word_embedder.dim]), src_word_embedder.embedding[1:, :] ], axis=0) tgt_embedder = tx.modules.WordEmbedder(tgt_embedding) tgt_word_embeds = tgt_embedder(decoder_input) tgt_word_embeds = tgt_word_embeds * config_model.hidden_dim**0.5 tgt_seq_len = tf.ones([batch_size], tf.int32) * tf.shape(decoder_input)[1] tgt_pos_embeds = pos_embedder(sequence_length=tgt_seq_len) tgt_input_embedding = tgt_word_embeds + tgt_pos_embeds _output_w = tf.transpose(tgt_embedder.embedding, (1, 0)) decoder = TransformerDecoder(vocab_size=vocab_size, output_layer=_output_w, hparams=config_model.decoder) # For training outputs = decoder(memory=encoder_output, memory_sequence_length=encoder_input_length, inputs=tgt_input_embedding, decoding_strategy='train_greedy', mode=tf.estimator.ModeKeys.TRAIN) mle_loss = transformer_utils.smoothing_cross_entropy( outputs.logits, labels, vocab_size, config_model.loss_label_confidence) mle_loss = tf.reduce_sum(mle_loss * is_target) / tf.reduce_sum(is_target) train_op = tx.core.get_train_op(mle_loss, learning_rate=learning_rate, global_step=global_step, hparams=config_model.opt) tf.summary.scalar('lr', learning_rate) tf.summary.scalar('mle_loss', mle_loss) summary_merged = tf.summary.merge_all() # For inference (beam-search) start_tokens = tf.fill([batch_size], bos_token_id) def _embedding_fn(x, y): x_w_embed = tgt_embedder(x) y_p_embed = pos_embedder(y) return x_w_embed * config_model.hidden_dim**0.5 + y_p_embed predictions = decoder(memory=encoder_output, memory_sequence_length=encoder_input_length, beam_width=beam_width, length_penalty=config_model.length_penalty, start_tokens=start_tokens, end_token=eos_token_id, embedding=_embedding_fn, max_decoding_length=config_data.max_decoding_length, mode=tf.estimator.ModeKeys.PREDICT) # Uses the best sample by beam search beam_search_ids = predictions['sample_id'][:, :, 0] saver = tf.train.Saver(max_to_keep=5) best_results = {'score': 0, 'epoch': -1} def _eval_epoch(sess, epoch, mode): if mode == 'eval': eval_data = dev_data elif mode == 'test': eval_data = test_data else: raise ValueError('`mode` should be either "eval" or "test".') references, hypotheses = [], [] bsize = config_data.test_batch_size for i in range(0, len(eval_data), bsize): sources, targets = zip(*eval_data[i:i + bsize]) x_block = data_utils.source_pad_concat_convert(sources) feed_dict = { encoder_input: x_block, tx.global_mode(): tf.estimator.ModeKeys.EVAL, } fetches = { 'beam_search_ids': beam_search_ids, } fetches_ = sess.run(fetches, feed_dict=feed_dict) hypotheses.extend(h.tolist() for h in fetches_['beam_search_ids']) references.extend(r.tolist() for r in targets) hypotheses = utils.list_strip_eos(hypotheses, eos_token_id) references = utils.list_strip_eos(references, eos_token_id) if mode == 'eval': # Writes results to files to evaluate BLEU # For 'eval' mode, the BLEU is based on token ids (rather than # text tokens) and serves only as a surrogate metric to monitor # the training process fname = os.path.join(FLAGS.model_dir, 'tmp.eval') hypotheses = tx.utils.str_join(hypotheses) references = tx.utils.str_join(references) hyp_fn, ref_fn = tx.utils.write_paired_text(hypotheses, references, fname, mode='s') eval_bleu = bleu_wrapper(ref_fn, hyp_fn, case_sensitive=True) eval_bleu = 100. * eval_bleu logger.info('epoch: %d, eval_bleu %.4f', epoch, eval_bleu) print('epoch: %d, eval_bleu %.4f' % (epoch, eval_bleu)) if eval_bleu > best_results['score']: logger.info('epoch: %d, best bleu: %.4f', epoch, eval_bleu) best_results['score'] = eval_bleu best_results['epoch'] = epoch model_path = os.path.join(FLAGS.model_dir, 'best-model.ckpt') logger.info('saving model to %s', model_path) print('saving model to %s' % model_path) saver.save(sess, model_path) elif mode == 'test': # For 'test' mode, together with the cmds in README.md, BLEU # is evaluated based on text tokens, which is the standard metric. fname = os.path.join(FLAGS.model_dir, 'test.output') hwords, rwords = [], [] for hyp, ref in zip(hypotheses, references): hwords.append([id2w[y] for y in hyp]) rwords.append([id2w[y] for y in ref]) hwords = tx.utils.str_join(hwords) rwords = tx.utils.str_join(rwords) hyp_fn, ref_fn = tx.utils.write_paired_text(hwords, rwords, fname, mode='s', src_fname_suffix='hyp', tgt_fname_suffix='ref') logger.info('Test output writtn to file: %s', hyp_fn) print('Test output writtn to file: %s' % hyp_fn) def _train_epoch(sess, epoch, step, smry_writer): random.shuffle(train_data) train_iter = data.iterator.pool( train_data, config_data.batch_size, key=lambda x: (len(x[0]), len(x[1])), batch_size_fn=utils.batch_size_fn, random_shuffler=data.iterator.RandomShuffler()) for _, train_batch in enumerate(train_iter): in_arrays = data_utils.seq2seq_pad_concat_convert(train_batch) feed_dict = { encoder_input: in_arrays[0], decoder_input: in_arrays[1], labels: in_arrays[2], learning_rate: utils.get_lr(step, config_model.lr) } fetches = { 'step': global_step, 'train_op': train_op, 'smry': summary_merged, 'loss': mle_loss, } fetches_ = sess.run(fetches, feed_dict=feed_dict) step, loss = fetches_['step'], fetches_['loss'] if step and step % config_data.display_steps == 0: logger.info('step: %d, loss: %.4f', step, loss) print('step: %d, loss: %.4f' % (step, loss)) smry_writer.add_summary(fetches_['smry'], global_step=step) if step and step % config_data.eval_steps == 0: _eval_epoch(sess, epoch, mode='eval') return step # Run the graph with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(tf.tables_initializer()) smry_writer = tf.summary.FileWriter(FLAGS.model_dir, graph=sess.graph) if FLAGS.run_mode == 'train_and_evaluate': logger.info('Begin running with train_and_evaluate mode') if tf.train.latest_checkpoint(FLAGS.model_dir) is not None: logger.info('Restore latest checkpoint in %s' % FLAGS.model_dir) saver.restore(sess, tf.train.latest_checkpoint(FLAGS.model_dir)) step = 0 for epoch in range(config_data.max_train_epoch): step = _train_epoch(sess, epoch, step, smry_writer) elif FLAGS.run_mode == 'test': logger.info('Begin running with test mode') logger.info('Restore latest checkpoint in %s' % FLAGS.model_dir) saver.restore(sess, tf.train.latest_checkpoint(FLAGS.model_dir)) _eval_epoch(sess, 0, mode='test') else: raise ValueError('Unknown mode: {}'.format(FLAGS.run_mode))