def _partition_and_stitch(self, args, func_name): """ args is a list of tensors, to be passed to self.likelihoods.<func_name> args[-1] is the 'Y' argument, which contains the indexes to self.likelihoods. This function splits up the args using dynamic_partition, calls the relevant function on the likelihoods, and re-combines the result. """ # get the index from Y Y = args[-1] ind = Y[:, -1] ind = tf.cast(ind, tf.int32) Y = Y[:, :-1] args[-1] = Y # split up the arguments into chunks corresponding to the relevant likelihoods args = zip(*[tf.dynamic_partition(X, ind, self.num_likelihoods) for X in args]) # apply the likelihood-function to each section of the data with params_as_tensors_for(self, convert=False): funcs = [getattr(lik, func_name) for lik in self.likelihood_list] results = [f(*args_i) for f, args_i in zip(funcs, args)] # stitch the results back together partitions = tf.dynamic_partition(tf.range(0, tf.size(ind)), ind, self.num_likelihoods) results = tf.dynamic_stitch(partitions, results) return results
def loop(q_, mask, mass_, found_): q_list = tf.dynamic_partition(q_, mask, 2) condition_indices = tf.dynamic_partition(tf.range(tf.shape(q_)[0]), mask, 2) # 0 element it False, # 1 element if true p = q_list[1] * (1.0 - mass_) / tf.reduce_sum(q_list[1]) p_new = tf.dynamic_stitch(condition_indices, [q_list[0], p]) # condition verification and mask modification less_mask = tf.cast(tf.less(u, p_new), tf.int32) # 0 when u is bigger than p, 1 when u is less than p condition_indices = tf.dynamic_partition(tf.range(tf.shape(p_new)[0]), less_mask, 2) # 0 when u is bigger than p, 1 when u is less than p split_p_new = tf.dynamic_partition(p_new, less_mask, 2) split_u = tf.dynamic_partition(u, less_mask, 2) alpha = tf.dynamic_stitch(condition_indices, [split_p_new[0], split_u[1]]) mass_ += tf.reduce_sum(split_u[1]) mask = mask * (tf.ones_like(less_mask) - less_mask) found_ = tf.cond(tf.equal(tf.reduce_sum(less_mask), 0), lambda: False, lambda: True) alpha = tf.reshape(alpha, q_.shape) return alpha, mask, mass_, found_
def __call__(self, X): ind = tf.gather(tf.transpose(X), tf.shape(X)[1]-1) # ind = X[:,-1] ind = tf.cast(ind, tf.int32) X = tf.transpose(tf.gather(tf.transpose(X), tf.range(0, tf.shape(X)[1]-1))) # X = X[:,:-1] # split up X into chunks corresponding to the relevant likelihoods x_list = tf.dynamic_partition(X, ind, len(self.meanfunction_list)) # apply the likelihood-function to each section of the data results = [m(x) for x, m in zip(x_list, self.meanfunction_list)] # stitch the results back together partitions = tf.dynamic_partition(tf.range(0, tf.size(ind)), ind, len(self.meanfunction_list)) return tf.dynamic_stitch(partitions, results)
def split_apply_merge(inp, partitions, fns): """Split input according to partitions. Pass results through fns and merge. Args: inp: the input vector partitions: tensor of same length as input vector, having values 0, 1 fns: the two functions. Returns: the vector routed, where routed[i] = fns[partitions[i]](inp[i]) """ new_inputs = tf.dynamic_partition(inp, partitions, len(fns)) new_outputs = [fns[i](x) for i, x in enumerate(new_inputs)] new_indices = tf.dynamic_partition(tf.range(0, inp.get_shape()[0]), partitions, len(fns)) return tf.dynamic_stitch(new_indices, new_outputs)
def add_loss(graph, locations, confidences, batched_bboxes, batched_num_bboxes, bbox_priors, cfg): with graph.name_scope("loss"): # ground truth bounding boxes: # [batch_size, # of ground truth bounding boxes, 4] # we also need to know the number of ground truth bounding boxes for each image in the batch # (it can be different for each image...) # We could assume 1 for now. # Pass the locations, confidences, and ground truth labels to the matching function locations = tf.reshape(locations, [-1, 4]) confidences = tf.reshape(confidences, [-1]) # add the priors to the predicted residuals locations += tf.tile(bbox_priors, [cfg.BATCH_SIZE, 1]) # add a small epsilon to the confidences confidences += small_epsilon # print "Shapes" # print locations.get_shape().as_list() # print confidences.get_shape().as_list() # print batched_bboxes.get_shape().as_list() # print batched_num_bboxes.get_shape().as_list() params = [locations, confidences, batched_bboxes, batched_num_bboxes, cfg.BATCH_SIZE, cfg.LOCATION_LOSS_ALPHA] matching, stacked_gt_bboxes = tf.py_func(compute_assignments, params, [tf.int32, tf.float32], name="bipartite_matching") # matching: [num_predictions * batch_size] 0s and 1s for partitioning # stacked_gt_bboxes : [total number of gt bboxes for this batch, 4] # dynamic partition the bounding boxes and confidences into "positives" and "negatives" unmatched_locations, matched_locations = tf.dynamic_partition(locations, matching, 2) unmatched_confidences, matched_confidences = tf.dynamic_partition(confidences, matching, 2) # sum the norm from the "positive" bounding boxes #loss = tf.nn.l2_loss(matched_locations - stacked_gt_bboxes) # sum the negative logs of the "positive" confidences #loss = loss - tf.reduce_sum(tf.log(matched_confidences)) + tf.reduce_sum(tf.log((1. - matched_confidences) + small_epsilon)) # sum the negative logs of one minus the all of the confidences ###loss = loss - (1. / tf.cast(tf.reduce_sum(batched_num_bboxes), tf.float32) ) * tf.reduce_sum(tf.log( 1. - confidences)) #loss = loss - tf.reduce_sum(tf.log( (1. - confidences) + small_epsilon)) location_loss = cfg.LOCATION_LOSS_ALPHA * tf.nn.l2_loss(matched_locations - stacked_gt_bboxes) confidence_loss = -1. * tf.reduce_sum(tf.log(matched_confidences)) - tf.reduce_sum(tf.log((1. - unmatched_confidences) + small_epsilon)) #loss = -1. * tf.reduce_sum(tf.log(matched_confidences)) - tf.reduce_sum(tf.log((1. - unmatched_confidences) + small_epsilon)) + cfg.LOCATION_LOSS_ALPHA * tf.nn.l2_loss(matched_locations - stacked_gt_bboxes) return location_loss, confidence_loss, matching
def apply_factor(tensor, *args, **kwargs): scope = kwargs.pop("scope", "") with tf.name_scope(scope): n_args = len(args) if n_args is 0: tensor, output_size, error_symbol = tensor return one_hot(tensor, output_size, scope=scope) else: tensor, args = slice_out_int_literals(tensor, list(args)) args, is_batched = make_batch_consistent(args) tensor, output_size, error_symbol = tensor # handle the case where all arguments were int literals tensor_dim_sizes = [dim.value for dim in tensor.get_shape()] if not tensor_dim_sizes: return one_hot(tensor, output_size, scope=scope) # Each arg is batch size x arg dim. Add dimensions to enable broadcasting. for i, arg in enumerate(args): for j in range(len(args)): if j == i: continue args[i] = tf.expand_dims(args[i], j + 1) # compute joint before tensor is applied joint = 0 for arg in args: joint = joint + arg # prepare for unsorted_segment_sum joint = tf.reshape(joint, (-1, np.prod(tensor_dim_sizes))) joint = tf.transpose(joint, [1, 0]) # |tensor| x batch_size flat_tensor = tf.reshape(tensor, [-1]) if error_symbol is not None: to_logsumexp = tf.dynamic_partition(joint, flat_tensor, output_size + 1) del to_logsumexp[error_symbol] else: to_logsumexp = tf.dynamic_partition(joint, flat_tensor, output_size) result = tf.pack( map(lambda x : logsumexp(x, reduction_indices=0), to_logsumexp) ) result = tf.transpose(result, [1, 0]) if not is_batched: result = tf.squeeze(result) return result
def mmd_objective(z, s, sdim): """ Compute the MMD from latent space and nuisance_id Notes: Reimplementation in tensorflow of the Variational Fair Autoencoder https://arxiv.org/abs/1511.00830 """ #mmd_method = mmd_rbf mmd_method = mmd_fourier z_dim = z.get_shape().as_list()[1] # STEP 1: construct lists of samples in their proper batches z_part = tf.dynamic_partition(z, s, sdim) # STEP 2: add noise to all of them and get the mmd mmd = 0 for j, z_j in enumerate(z_part): z0_ = z_j aux_z0 = tf.random_normal([1, z_dim]) # if an S category does not have any samples z0 = tf.concat([z0_, aux_z0], 0) if len(z_part) == 2: z1_ = z_part[j + 1] aux_z1 = tf.random_normal((1, z_dim)) z1 = tf.concat([z1_, aux_z1], axis=0) return mmd_method(z0, z1) z1 = z mmd += mmd_method(z0, z1) return mmd
def mol_conv_layer(atoms, cH_params, aux_params, layer): #Sum all neighbors using adjacency matrix atom_sum_neigh = sum_neigh(atoms, aux_params, layer) # Partition the atom matrix by degree of atoms # THIS CREATES PROBLEMS WITH GRADIENTS. NEED TO USE SLICING indices = tf.sub(deg_list_ph, tf.constant(1,dtype=tf.int32)) atom_partitions = tf.dynamic_partition(atom_sum_neigh, indices, max_deg) # Get collection of modified atom features new_rel_atoms_collection = [] for deg in range(1,6): # Obtain relevant atoms for this degree rel_atoms = atom_partitions[deg-1] # Apply hidden affine to relevant atoms and append if bool_separate_conv_depths: out = affine(rel_atoms, cH_params['W'+str(deg)+'_'+str(layer)], cH_params['b'+str(deg)+'_'+str(layer)]) else: out = affine(rel_atoms, cH_params['W'+str(deg)], cH_params['b'+str(deg)]) new_rel_atoms_collection.append(out) # Combine all atoms back into the list # NOTE: FOR NOW USE CONCATENATION. MEANS WE CANNOT USE ARBITARY deg_list ORDER hidden_atoms = tf.concat(0, new_rel_atoms_collection) # Apply relu activated_atoms = tf.nn.relu(hidden_atoms) return activated_atoms
def testScalarIndexOutOfRange(self): with self.test_session() as sess: bad = 17 data = np.zeros(5) partitions = tf.dynamic_partition(data, bad, num_partitions=7) with self.assertRaisesOpError(r"partitions = 17 is not in \[0, 7\)"): sess.run(partitions)
def smoothed_l1_loss(input_tensor): absval = tf.abs(input_tensor) ind = tf.to_int32(absval > 1) inner, outer = tf.dynamic_partition(absval, ind, 2) loss = tf.reduce_sum(0.5 * tf.square(inner)) + \ tf.reduce_sum(outer - 0.5) return loss
def __call__(self, *parents): # x = [atom_features, deg_slice, membership, deg_adj_list placeholders...] atom_features = parents[0].out_tensor # Extract graph topology membership = parents[2].out_tensor # Perform the mol gather assert (self.batch_size > 1, "graph_gather requires batches larger than 1") # Obtain the partitions for each of the molecules activated_par = tf.dynamic_partition(atom_features, membership, self.batch_size) # Sum over atoms for each molecule sparse_reps = [ tf.reduce_sum(activated, 0, keep_dims=True) for activated in activated_par ] max_reps = [ tf.reduce_max(activated, 0, keep_dims=True) for activated in activated_par ] # Get the final sparse representations sparse_reps = tf.concat(axis=0, values=sparse_reps) max_reps = tf.concat(axis=0, values=max_reps) mol_features = tf.concat(axis=1, values=[sparse_reps, max_reps]) if self.activation_fn is not None: mol_features = self.activation_fn(mol_features) self.out_tensor = mol_features return mol_features
def _build_graph(self): """Construct tensorflow nodes for round of clustering""" # N.B. without tf.Variable, makes awesome glitchy clustered images self.centroids_in = tf.Variable(tf.slice(tf.random_shuffle(self.arr), [0, 0], [self.k, -1]), name="centroids_in") # tiled should be shape(self.n_pixels, self.k, size_data = 2 + self.channels) tiled_pix = tf.tile(tf.expand_dims(self.arr, 1), multiples=[1, self.k, 1], name="tiled_pix") # no need to take square root b/c positive reals and sqrt are isomorphic def radical_euclidean_dist(x, y): """Takes in 2 tensors and returns euclidean distance radical, i.e. dist**2""" with tf.name_scope("radical_euclidean"): return tf.square(tf.sub(x, y)) # should be shape(self.n_pixels, self.k) distances = tf.reduce_sum(radical_euclidean_dist(tiled_pix, self.centroids_in), reduction_indices=2, name="distances") # should be shape(self.n_pixels) nearest = tf.to_int32(tf.argmin(distances, 1), name="nearest") # should be list of len self.k with tensors of shape(size_cluster, size_data) self.clusters = tf.dynamic_partition(self.arr, nearest, self.k) # should be shape(self.k, size_data) self.centroids = tf.pack([tf.reduce_mean(cluster, 0) for cluster in self.clusters], name="centroids_out") self.update_roids = tf.assign(self.centroids_in, self.centroids)
def create_tensor(self, in_layers=None, set_tensors=True, **kwargs): """ Perform M steps of set2set gather, detailed descriptions in: https://arxiv.org/abs/1511.06391 """ if in_layers is None: in_layers = self.in_layers in_layers = convert_to_layers(in_layers) self.build() # Extract atom_features atom_features = in_layers[0].out_tensor atom_split = in_layers[1].out_tensor self.c = tf.zeros((self.batch_size, self.n_hidden)) self.h = tf.zeros((self.batch_size, self.n_hidden)) for i in range(self.M): q_expanded = tf.gather(self.h, atom_split) e = tf.reduce_sum(atom_features * q_expanded, 1) e_mols = tf.dynamic_partition(e, atom_split, self.batch_size) # Add another value(~-Inf) to prevent error in softmax e_mols = [ tf.concat([e_mol, tf.constant([-1000.])], 0) for e_mol in e_mols ] a = tf.concat([tf.nn.softmax(e_mol)[:-1] for e_mol in e_mols], 0) r = tf.segment_sum(tf.reshape(a, [-1, 1]) * atom_features, atom_split) # Model using this layer must set pad_batches=True q_star = tf.concat([self.h, r], axis=1) self.h, self.c = self.LSTMStep(q_star, self.c) out_tensor = q_star if set_tensors: self.variables = self.trainable_weights self.out_tensor = out_tensor return out_tensor
def call(self, x, mask=None): """Execute this layer on input tensors. x = [atom_features, membership] Parameters ---------- x: list Tensors as listed above mask: bool, optional Ignored. Present only to shadow superclass call() method. Returns ------- outputs: Tensor Tensor of molecular features """ # Add trainable weights self.build() outputs = x[0] membership = x[1] if self.gaussian_expand: outputs = self.gaussian_histogram(outputs) outputs = tf.dynamic_partition(outputs, membership, self.batch_size) output_molecules = [tf.reduce_sum(molecule, 0) for molecule in outputs] output_molecules = tf.stack(output_molecules) if self.gaussian_expand: output_molecules = tf.matmul(output_molecules, self.W) + self.b output_molecules = self.activation(output_molecules) return output_molecules
def graph_gather(atoms, membership_placeholder, batch_size): """ Parameters ---------- atoms: tf.Tensor Of shape (n_atoms, n_feat) membership_placeholder: tf.Placeholder Of shape (n_atoms,). Molecule each atom belongs to. batch_size: int Batch size for deep model. Returns ------- tf.Tensor Of shape (batch_size, n_feat) """ # WARNING: Does not work for Batch Size 1! If batch_size = 1, then use reduce_sum! assert (batch_size > 1, "graph_gather requires batches larger than 1") # Obtain the partitions for each of the molecules activated_par = tf.dynamic_partition(atoms, membership_placeholder, batch_size) # Sum over atoms for each molecule sparse_reps = [ tf.reduce_sum(activated, 0, keep_dims=True) for activated in activated_par ] # Get the final sparse representations sparse_reps = tf.concat(axis=0, values=sparse_reps) return sparse_reps
def testErrorIndexOutOfRange(self): with self.test_session() as sess: data = tf.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11], [12, 13, 14]]) indices = tf.constant([0, 2, 99, 2, 2]) partitions = tf.dynamic_partition(data, indices, num_partitions=4) with self.assertRaisesOpError(r"partitions\[2\] = 99 is not in \[0, 4\)"): sess.run(partitions)
def pair_loss(y_true, y_pred): y_true = tf.cast(y_true, tf.int32) parts = tf.dynamic_partition(y_pred, y_true, 2) y_pos = parts[1] y_neg = parts[0] y_pos = tf.expand_dims(y_pos, 0) y_neg = tf.expand_dims(y_neg, -1) out = K.sigmoid(y_neg - y_pos) return K.mean(out)
def scatter_update(cls, factor, indices, values, sharding_func): """Helper function for doing sharded scatter update.""" assert isinstance(factor, list) if len(factor) == 1: with ops.colocate_with(factor[0]): # TODO(agarwal): assign instead of scatter update for full batch update. return tf.scatter_update(factor[0], indices, values).op else: num_shards = len(factor) assignments, new_ids = sharding_func(indices) assert assignments is not None assignments = tf.cast(assignments, tf.int32) sharded_ids = tf.dynamic_partition(new_ids, assignments, num_shards) sharded_values = tf.dynamic_partition(values, assignments, num_shards) updates = [] for i in xrange(num_shards): updates.append(tf.scatter_update(factor[i], sharded_ids[i], sharded_values[i])) return tf.group(*updates)
def update_centroids(samples, centroids, num_clusters): # First, lets find the data samples closest to a centroid, then we update # its value using all vectors within that cluster expanded_data_vectors = tf.expand_dims(samples, 0) expanded_centroids = tf.expand_dims(centroids, 1) distances = tf.reduce_sum( tf.square( tf.sub( expanded_data_vectors, expanded_centroids ) ), 2 ) nearest_samples = tf.to_int32( tf.argmin(distances, 0) ) partitioned_data = tf.dynamic_partition(samples, nearest_samples, num_clusters) new_centroids = tf.concat(0, [tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitioned_data]) return new_centroids
def hsplit(x): # break out I and Q - note that horizontal splits seem to require transposition! c_fixedlevelsT = tf.transpose(x) partedIQ = tf.dynamic_partition(c_fixedlevelsT, partIQ, 2) # detranspose the split IQ data and shape into half-width image Q = tf.reshape(tf.transpose(partedIQ[0]), [1, 505, 422, 1]) I = tf.reshape(tf.transpose(partedIQ[1]), [1, 505, 422, 1]) return I, Q
def _add_beam_tag_dynamic(self, feat_x, beam_path, cur_size): max_size = self.beam_size path_list = tf.dynamic_partition(beam_path, tf.range(cur_size), max_size) non_empty_path = [tf.cond(tf.less(tf.shape(e)[0], 1), lambda : tf.zeros(shape=[0, self.window_size, self.dim_feat_x]), lambda : self._add_tag_dynamic(feat_x, tf.reshape(e, [-1]))) for e in path_list ] return tf.concat(0, non_empty_path)
def testHigherRank(self): np.random.seed(7) with self.test_session() as sess: for n in 2, 3: for shape in (4,), (4, 5), (4, 5, 2): partitions = np.random.randint(n, size=np.prod(shape)).reshape(shape) for extra_shape in (), (6,), (6, 7): data = np.random.randn(*(shape + extra_shape)) outputs = tf.dynamic_partition(data, partitions, num_partitions=n) self.assertEqual(n, len(outputs)) for i, output in enumerate(sess.run(outputs)): self.assertAllEqual(output, data[partitions == i])
def testHigherRankIndexOutOfRange(self): with self.test_session() as sess: shape = (2, 3) indices = tf.placeholder(shape=shape, dtype=np.int32) data = np.zeros(shape + (5,)) partitions = tf.dynamic_partition(data, indices, num_partitions=7) for i in xrange(2): for j in xrange(3): bad = np.zeros(shape, dtype=np.int32) bad[i, j] = 17 with self.assertRaisesOpError( r"partitions\[%d,%d\] = 17 is not in \[0, 7\)" % (i, j)): sess.run(partitions, feed_dict={indices: bad})
def triplet_loss(infer, labels, batch_size, radius = 1.0): feature_1, feature_2 = tf.split(0,2,infer) # label is either 0 or 1 # partition_list = tf.equal(labels,1) feature_diff = tf.reduce_sum(tf.square(feature_1 - feature_2), 1) feature_list = tf.dynamic_partition(feature_diff, labels, 2) # pos_loss = tf.reduce_mean(feature_list[1]) pos_list = feature_list[1] neg_list = (tf.maximum(0.0, radius * radius - feature_list[0])) full_list = tf.concat(0,[pos_list, neg_list]) loss = tf.reduce_mean(full_list) tf.add_to_collection('losses', loss) return tf.add_n(tf.get_collection('losses'), name = 'total_loss')
def testSimpleOneDimensional(self): with self.test_session() as sess: data = tf.constant([0, 13, 2, 39, 4, 17]) indices = tf.constant([0, 0, 2, 3, 2, 1]) partitions = tf.dynamic_partition(data, indices, num_partitions=4) partition_vals = sess.run(partitions) self.assertAllEqual([0, 13], partition_vals[0]) self.assertAllEqual([17], partition_vals[1]) self.assertAllEqual([2, 4], partition_vals[2]) self.assertAllEqual([39], partition_vals[3]) # Vector data input to DynamicPartition results in # `num_partitions` vectors of unknown length. self.assertEqual([None], partitions[0].get_shape().as_list()) self.assertEqual([None], partitions[1].get_shape().as_list()) self.assertEqual([None], partitions[2].get_shape().as_list()) self.assertEqual([None], partitions[3].get_shape().as_list())
def testSimpleTwoDimensional(self): with self.test_session() as sess: data = tf.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11], [12, 13, 14], [15, 16, 17]]) indices = tf.constant([0, 0, 2, 3, 2, 1]) partitions = tf.dynamic_partition(data, indices, num_partitions=4) partition_vals = sess.run(partitions) self.assertAllEqual([[0, 1, 2], [3, 4, 5]], partition_vals[0]) self.assertAllEqual([[15, 16, 17]], partition_vals[1]) self.assertAllEqual([[6, 7, 8], [12, 13, 14]], partition_vals[2]) self.assertAllEqual([[9, 10, 11]], partition_vals[3]) # Vector data input to DynamicPartition results in # `num_partitions` matrices with an unknown number of rows, and 3 columns. self.assertEqual([None, 3], partitions[0].get_shape().as_list()) self.assertEqual([None, 3], partitions[1].get_shape().as_list()) self.assertEqual([None, 3], partitions[2].get_shape().as_list()) self.assertEqual([None, 3], partitions[3].get_shape().as_list())
def triplet_loss(infer, labels, radius = 2.0): """ Args: infer: inference concatenate together with 2 * batch_size labels: 0 or 1 with batch_size radius: Return: loss: triplet loss """ feature_1, feature_2 = tf.split(0,2,infer) feature_diff = tf.reduce_sum(tf.square(feature_1 - feature_2), 1) feature_list = tf.dynamic_partition(feature_diff, labels, 2) pos_list = feature_list[1] neg_list = (tf.maximum(0.0, radius * radius - feature_list[0])) full_list = tf.concat(0,[pos_list, neg_list]) loss = tf.reduce_mean(full_list) return loss
def testHigherRank(self): np.random.seed(7) with self.test_session() as sess: for n in 2, 3: for shape in (4,), (4, 5), (4, 5, 2): partitions = np.random.randint(n, size=np.prod(shape)).reshape(shape) for extra_shape in (), (6,), (6, 7): data = np.random.randn(*(shape + extra_shape)) partitions_t = tf.constant(partitions, dtype=tf.int32) data_t = tf.constant(data) outputs = tf.dynamic_partition( data_t, partitions_t, num_partitions=n) self.assertEqual(n, len(outputs)) outputs_val = sess.run(outputs) for i, output in enumerate(outputs_val): self.assertAllEqual(output, data[partitions == i]) # Test gradients outputs_grad = [7 * output for output in outputs_val] grads = tf.gradients(outputs, [data_t, partitions_t], outputs_grad) self.assertEqual(grads[1], None) # Partitions has no gradients self.assertAllEqual(7 * data, sess.run(grads[0]))
def update_centroids(self, nearest_indices): partitions = tf.dynamic_partition(self.v_data, tf.to_int32(nearest_indices), self.n_clusters) return tf.concat([tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitions], 0)
def assign_new_model_parameters(params_1d): params = tf.dynamic_partition(params_1d, part, n_tensors) for i, (shape, param) in enumerate(zip(shapes, params)): model.trainable_variables[i].assign(tf.reshape(param, shape))
def batch_normalization(batch_data_list, types_list, miss_list): normalized_data = [] normalization_parameters = [] for i, d in enumerate(batch_data_list): # Partition the data in missing data (0) and observed data n(1) missing_data, observed_data = tf.dynamic_partition(d, miss_list[:, i], num_partitions=2) condition_indices = tf.dynamic_partition(tf.range(tf.shape(d)[0]), miss_list[:, i], num_partitions=2) if types_list[i]['type'] == 'real': # We transform the data to a gaussian with mean 0 and std 1 data_mean, data_var = tf.nn.moments(observed_data, 0) data_var = tf.clip_by_value(data_var, 1e-6, 1e20) # Avoid zero values aux_X = tf.nn.batch_normalization(observed_data, data_mean, data_var, offset=0.0, scale=1.0, variance_epsilon=1e-6) normalized_data.append( tf.dynamic_stitch(condition_indices, [missing_data, aux_X])) normalization_parameters.append([data_mean, data_var]) # When using log-normal elif types_list[i]['type'] == 'pos': # #We transform the log of the data to a gaussian with mean 0 and std 1 observed_data_log = tf.math.log(1.0 + observed_data) data_mean_log, data_var_log = tf.nn.moments(observed_data_log, 0) data_var_log = tf.clip_by_value(data_var_log, 1e-6, 1e20) # Avoid zero values aux_X = tf.nn.batch_normalization(observed_data_log, data_mean_log, data_var_log, offset=0.0, scale=1.0, variance_epsilon=1e-6) normalized_data.append( tf.dynamic_stitch(condition_indices, [missing_data, aux_X])) normalization_parameters.append([data_mean_log, data_var_log]) elif types_list[i]['type'] == 'count': # Input log of the data aux_X = tf.math.log(observed_data) normalized_data.append( tf.dynamic_stitch(condition_indices, [missing_data, aux_X])) normalization_parameters.append([0.0, 1.0]) else: # Don't normalize the categorical and ordinal variables normalized_data.append(d) normalization_parameters.append([0.0, 1.0]) # No normalization here return normalized_data, normalization_parameters
def __update_center(self, data, nearest): """updating centroid""" partitions = tf.dynamic_partition(data, tf.to_int32(nearest), self._k) # updating centers by means new_centers = tf.concat([tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitions], 0) return new_centers
def model_fn(features, labels, mode, params=None): """Constructs the object detection model. Args: features: Dictionary of feature tensors, returned from `input_fn`. labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, otherwise None. mode: Mode key from tf.estimator.ModeKeys. params: Parameter dictionary passed from the estimator. Returns: An `EstimatorSpec` that encapsulates the model and its serving configurations. """ params = params or {} total_loss, train_op, detections, export_outputs = None, None, None, None is_training = mode == tf.estimator.ModeKeys.TRAIN # Make sure to set the Keras learning phase. True during training, # False for inference. tf.keras.backend.set_learning_phase(is_training) detection_model = detection_model_fn(is_training=is_training, add_summaries=(not use_tpu)) scaffold = None batch_size = features['hash']._shape_as_list()[0] mask = tf.sequence_mask(features['query_sec'] * 24, tf.shape(features['query'])[1]) features['query'] = tf.boolean_mask(features['query'], mask) mask = tf.sequence_mask(features['query_sec'] * 3, tf.shape(features['query_box'])[1]) idx = tf.range(batch_size) idx = tf.reshape(idx, [-1, 1]) idx = tf.tile(idx, [1, tf.shape(features['query_box'])[1]]) features['query_box'] = tf.boolean_mask(features['query_box'], mask) idx = tf.boolean_mask(idx, mask) features['query_box'] = tf.dynamic_partition(features['query_box'], idx, batch_size) features['query_idx'] = idx d0 = batch_size * FLAGS.ref_sec * 3 labels[fields.InputDataFields.num_groundtruth_boxes] = tf.reshape( labels[fields.InputDataFields.num_groundtruth_boxes], [-1]) labels[fields.InputDataFields.groundtruth_boxes] = tf.reshape( labels[fields.InputDataFields.groundtruth_boxes], [d0, -1, 4]) labels[fields.InputDataFields.groundtruth_classes] = tf.reshape( labels[fields.InputDataFields.groundtruth_classes], [d0, -1, 2]) true_im_shape = features[fields.InputDataFields.true_image_shape] true_im_shape = tf.expand_dims(true_im_shape, axis=1) true_im_shape = tf.tile(true_im_shape, [1, FLAGS.ref_sec * 3, 1]) features[fields.InputDataFields.true_image_shape] = tf.reshape( true_im_shape, [-1, 3]) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): labels = unstack_batch( labels, unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors) gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] gt_classes_list = labels[fields.InputDataFields.groundtruth_classes] gt_masks_list = None if fields.InputDataFields.groundtruth_instance_masks in labels: gt_masks_list = labels[ fields.InputDataFields.groundtruth_instance_masks] gt_keypoints_list = None if fields.InputDataFields.groundtruth_keypoints in labels: gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints] gt_weights_list = None if fields.InputDataFields.groundtruth_weights in labels: gt_weights_list = labels[fields.InputDataFields.groundtruth_weights] if fields.InputDataFields.groundtruth_is_crowd in labels: gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd] detection_model.provide_groundtruth( groundtruth_boxes_list=gt_boxes_list, groundtruth_classes_list=gt_classes_list, groundtruth_masks_list=gt_masks_list, groundtruth_keypoints_list=gt_keypoints_list, groundtruth_weights_list=gt_weights_list) prediction_dict = detection_model.predict(features) if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT): detections = detection_model.postprocess( prediction_dict, features[fields.InputDataFields.true_image_shape]) if mode == tf.estimator.ModeKeys.TRAIN: if train_config.fine_tune_checkpoint and hparams.load_pretrained: if not train_config.fine_tune_checkpoint_type: # train_config.from_detection_checkpoint field is deprecated. For # backward compatibility, set train_config.fine_tune_checkpoint_type # based on train_config.from_detection_checkpoint. if train_config.from_detection_checkpoint: train_config.fine_tune_checkpoint_type = 'detection' else: train_config.fine_tune_checkpoint_type = 'classification' asg_map = detection_model.restore_map( fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type, load_all_detection_checkpoint_vars=( train_config.load_all_detection_checkpoint_vars)) available_var_map = ( get_variables_available_in_checkpoint( asg_map, FLAGS.i3d_ckpt, include_global_step=False)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: saver = tf.train.Saver(var_list=available_var_map, reshape=True) def init_fn(scaffold, session): saver.restore(session, FLAGS.i3d_ckpt) scaffold = tf.train.Scaffold(init_fn=init_fn) # tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, # available_var_map) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): losses_dict = detection_model.loss( prediction_dict, features[fields.InputDataFields.true_image_shape], features['ref_sec']) losses = [loss_tensor for loss_tensor in losses_dict.values()] if train_config.add_regularization_loss: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) if regularization_losses: regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') losses.append(regularization_loss) losses_dict['Loss/regularization_loss'] = regularization_loss total_loss = tf.add_n(losses, name='total_loss') losses_dict['Loss/total_loss'] = total_loss if 'graph_rewriter_config' in configs: graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=is_training) graph_rewriter_fn() # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we # can write learning rate summaries on TPU without host calls. global_step = tf.train.get_or_create_global_step() training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer) if mode == tf.estimator.ModeKeys.TRAIN: if use_tpu: training_optimizer = tf.contrib.tpu.CrossShardOptimizer( training_optimizer) if FLAGS.multi_gpu: training_optimizer = tf.contrib.estimator.TowerOptimizer( training_optimizer) # Optionally freeze some layers by setting their gradients to be zero. trainable_variables = None include_variables = ( train_config.update_trainable_variables if train_config.update_trainable_variables else None) exclude_variables = ( train_config.freeze_variables if train_config.freeze_variables else None) trainable_variables = tf.contrib.framework.filter_variables( tf.trainable_variables(), include_patterns=include_variables, exclude_patterns=exclude_variables) clip_gradients_value = None if train_config.gradient_clipping_by_norm > 0: clip_gradients_value = train_config.gradient_clipping_by_norm if not use_tpu: for var in optimizer_summary_vars: tf.summary.scalar(var.op.name, var) summaries = [] if use_tpu else None train_op = tf.contrib.layers.optimize_loss( loss=total_loss, global_step=global_step, learning_rate=None, clip_gradients=clip_gradients_value, optimizer=training_optimizer, variables=trainable_variables, summaries=summaries, name='') # Preventing scope prefix on all variables. if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = { tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(detections) } eval_metric_ops = None if mode == tf.estimator.ModeKeys.EVAL: scaffold = None class_agnostic = (fields.DetectionResultFields.detection_classes not in detections) groundtruth = _prepare_groundtruth_for_eval( detection_model, class_agnostic) use_original_images = fields.InputDataFields.original_image in features eval_images = ( features[fields.InputDataFields.original_image] if use_original_images else features[fields.InputDataFields.image]) eval_dict = eval_util.result_dict_for_single_example( eval_images[0:1], features[inputs.HASH_KEY][0], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=True) if class_agnostic: category_index = label_map_util.create_class_agnostic_category_index() else: category_index = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) img_summary = None if not use_tpu and use_original_images: detection_and_groundtruth = ( vis_utils.draw_side_by_side_evaluation_image( eval_dict, category_index, max_boxes_to_draw=eval_config.max_num_boxes_to_visualize, min_score_thresh=eval_config.min_score_threshold, use_normalized_coordinates=False)) img_summary = tf.summary.image('Detections_Left_Groundtruth_Right', detection_and_groundtruth) # Eval metrics on a single example. eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_config, category_index.values(), eval_dict) for loss_key, loss_tensor in iter(losses_dict.items()): eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) for var in optimizer_summary_vars: eval_metric_ops[var.op.name] = (var, tf.no_op()) if img_summary is not None: eval_metric_ops['Detections_Left_Groundtruth_Right'] = ( img_summary, tf.no_op()) eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()} if eval_config.use_moving_averages: variable_averages = tf.train.ExponentialMovingAverage(0.0) variables_to_restore = variable_averages.variables_to_restore() keep_checkpoint_every_n_hours = ( train_config.keep_checkpoint_every_n_hours) saver = tf.train.Saver( variables_to_restore, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours) scaffold = tf.train.Scaffold(saver=saver) # EVAL executes on CPU, so use regular non-TPU EstimatorSpec. if use_tpu and mode != tf.estimator.ModeKeys.EVAL: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, scaffold_fn=scaffold_fn, predictions=detections, loss=total_loss, train_op=train_op, eval_metrics=eval_metric_ops, export_outputs=export_outputs) else: return tf.estimator.EstimatorSpec( mode=mode, predictions=detections, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, scaffold=scaffold)
def update_centroids(samples, nearest_indices, n_clusters): # Updates the centroid to be the mean of all smaples associated with it nearest_indices = tf.to_int32(nearest_indices) partitions = tf.dynamic_partition(samples, nearest_indices, n_clusters) new_centroids = tf.concat([tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitions], 0) return new_centroids
def run_graph(self, num_features, train_data, val_data, test_data, sample_weights=None): ''' :param distribution: :param num_features: :param k: the dimensionality of the embedding, Must be >= 0; when k=0, it is a simple model; Otherwise it is factorized :return: ''' # INPUTs feature_indice = tf.placeholder(tf.int32, name='feature_indice') feature_values = tf.placeholder(tf.float32, name='feature_values') min_hbs = tf.placeholder(tf.float32, name='min_headerbids') # for regularization max_hbs = tf.placeholder(tf.float32, name='max_headerbids') # for regularization times = tf.placeholder(tf.float32, shape=[None], name='times') events = tf.placeholder(tf.int32, shape=[None], name='events') # shape: (batch_size, max_nonzero_len) embeddings_linear = tf.Variable( tf.truncated_normal(shape=(num_features, ), mean=0.0, stddev=1e-5)) filtered_embeddings_linear = tf.nn.embedding_lookup( embeddings_linear, feature_indice) * feature_values intercept = tf.Variable(1e-5) linear_term = self.linear_function(filtered_embeddings_linear, intercept) scale = linear_term embeddings_factorized = None filtered_embeddings_factorized = None if self.k > 0: # shape: (batch_size, max_nonzero_len, k) embeddings_factorized = tf.Variable( tf.truncated_normal(shape=(num_features, self.k), mean=0.0, stddev=1e-5)) filtered_embeddings_factorized = tf.nn.embedding_lookup(embeddings_factorized, feature_indice) * \ tf.tile(tf.expand_dims(feature_values, axis=-1), [1, 1, 1]) factorized_term = self.factorization_machines( filtered_embeddings_factorized) scale += factorized_term scale = tf.nn.softplus(scale) ''' if event == 0, right-censoring if event == 1, left-censoring ''' shape = tf.Variable(0.2, trainable=True) not_survival_proba = self.distribution.left_censoring( times, scale, shape) # the left area not_survival_bin = tf.where(tf.greater_equal(not_survival_proba, 0.5), tf.ones(tf.shape(not_survival_proba)), tf.zeros(tf.shape(not_survival_proba))) running_acc, acc_update = None, None if not sample_weights: running_acc, acc_update = tf.metrics.accuracy( labels=events, predictions=not_survival_bin) elif sample_weights == 'time': running_acc, acc_update = tf.metrics.accuracy( labels=events, predictions=not_survival_bin, weights=times) batch_loss = None if not sample_weights: batch_loss = tf.losses.log_loss(labels=events, predictions=not_survival_proba, reduction=tf.losses.Reduction.MEAN) elif sample_weights == 'time': # class_weights = tf.where(tf.equal(events, 1), # tf.ones(tf.shape(events)) * 100, # tf.ones(tf.shape(events))) batch_loss = tf.losses.log_loss(labels=events, predictions=not_survival_proba, weights=times, reduction=tf.losses.Reduction.MEAN) running_loss, loss_update = tf.metrics.mean(batch_loss) # Header Bidding Regularization hb_adxwon_partitions = tf.cast( tf.logical_and( tf.equal(events, 0), # adx won tf.logical_and( tf.not_equal(0.0, max_hbs), # the max_hb is not missing tf.less(times, max_hbs) # tf.less(times, min_hbs), # tf.logical_and( # # # tf.less(times, max_hbs), # the max hb > the revenue # # # tf.less(max_hbs - time, 1.0) # remove the outliers # # tf.less(times, min_hbs), # # tf.less((max_hbs - times) / times, 0.01) # # # tf.logical_and( # # # tf.less((max_hbs - times) / times, 0.01), # # # tf.less(times, 10.0) # # # ) # # ) )), tf.int32) hb_adxlose_partitions = tf.cast( tf.logical_and( tf.equal(events, 1), # adx lose tf.logical_and( tf.not_equal(0.0, min_hbs), # the min_hb is not missing tf.less(min_hbs, times) # the min hb < the floor # tf.less(max_hbs, times), # tf.logical_and( # tf.less(min_hbs, times), # # tf.less(max_hbs - time, 1.0) # remove the outliers # tf.less(0.9, (times - min_hbs) / times) # # tf.logical_and( # # tf.less(0.1, (times - min_hbs) / times), # # tf.less(times, 10.0) # # ) # ) )), tf.int32) # Using boolean_mask instead of dynamic_partition leads to: # "UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory." # https://stackoverflow.com/questions/44380727/get-userwarning-while-i-use-tf-boolean-mask?noredirect=1&lq=1 regable_hb_adxwon = tf.dynamic_partition(max_hbs, hb_adxwon_partitions, 2)[1] regable_hb_adxlose = tf.dynamic_partition(min_hbs, hb_adxlose_partitions, 2)[1] regable_scale_adxwon = tf.dynamic_partition(scale, hb_adxwon_partitions, 2)[1] regable_scale_adxlose = tf.dynamic_partition(scale, hb_adxlose_partitions, 2)[1] hb_adxwon_pred = self.distribution.left_censoring( regable_hb_adxwon, regable_scale_adxwon, shape) hb_adxlose_pred = self.distribution.left_censoring( regable_hb_adxlose, regable_scale_adxlose, shape) hb_reg_adxwon, hb_reg_adxlose = None, None if not sample_weights: # if True: hb_reg_adxwon = tf.losses.log_loss(labels=tf.zeros( tf.shape(hb_adxwon_pred)), predictions=hb_adxwon_pred) hb_reg_adxlose = tf.losses.log_loss(labels=tf.zeros( tf.shape(hb_adxlose_pred)), predictions=hb_adxlose_pred) elif sample_weights == 'time': regable_time_adxwon = tf.dynamic_partition(times, hb_adxwon_partitions, 2)[1] regable_time_adxlose = tf.dynamic_partition( times, hb_adxlose_partitions, 2)[1] hb_reg_adxwon = tf.losses.log_loss( labels=tf.ones(tf.shape(hb_adxwon_pred)), predictions=hb_adxwon_pred, weights=1.0 / regable_time_adxwon) hb_reg_adxlose = tf.losses.log_loss( labels=tf.zeros(tf.shape(hb_adxlose_pred)), predictions=hb_adxlose_pred, weights=1.0 / regable_time_adxlose) mean_hb_reg_adxwon = tf.reduce_mean(hb_reg_adxwon) mean_hb_reg_adxlose = tf.reduce_mean(hb_reg_adxlose) # L2 regularized sum of squares loss function over the embeddings ''' l2_norm = tf.constant(self.lambda_linear) * tf.pow(embeddings_linear, 2) if embeddings_factorized is not None: l2_norm += tf.reduce_sum(tf.pow(embeddings_factorized, 2), axis=-1) sum_l2_norm = tf.constant(self.lambda_factorized) * tf.reduce_sum(l2_norm) ''' l2_norm = self.lambda_linear * tf.nn.l2_loss( filtered_embeddings_linear) if embeddings_factorized is not None: l2_norm += self.lambda_factorized * tf.nn.l2_loss( filtered_embeddings_factorized) loss_mean = batch_loss + \ tf.constant(self.lambda_hb_adxwon) * mean_hb_reg_adxwon + \ tf.constant(self.lambda_hb_adxlose) * mean_hb_reg_adxlose + \ l2_norm # training_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss_mean) ### gradient clipping optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) gradients, variables = zip(*optimizer.compute_gradients(loss_mean)) gradients_clipped, _ = tf.clip_by_global_norm(gradients, 5.0) training_op = optimizer.apply_gradients( zip(gradients_clipped, variables)) # Isolate the variables stored behind the scenes by the metric operation running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES) # Define initializer to initialize/reset running variables running_vars_initializer = tf.variables_initializer( var_list=running_vars) init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: init.run() max_loss_val = None num_total_batches = int( np.ceil(train_data.num_instances / self.batch_size)) for epoch in range(1, self.num_epochs + 1): sess.run(running_vars_initializer) # model training num_batch = 0 start = nowtime() for time_batch, event_batch, featidx_batch, featval_batch, minhbs_natch, maxhbs_batch, max_nz_len \ in train_data.make_sparse_batch(self.batch_size, only_freq=ONLY_FREQ_TRAIN): num_batch += 1 _, loss_batch, _, event_batch, time_batch, shape_batch = sess.run( [ training_op, loss_mean, acc_update, events, times, shape ], feed_dict={ 'feature_indice:0': featidx_batch, 'feature_values:0': featval_batch, 'min_headerbids:0': minhbs_natch, 'max_headerbids:0': maxhbs_batch, 'times:0': time_batch, 'events:0': event_batch }) # print() # print('mean_hb_reg_adxwon_batch') # print(mean_hb_reg_adxwon_batch) # print('mean_hb_reg_adxlose_batch') # print(mean_hb_reg_adxlose_batch) # print('mean_batch_loss_batch') # print(mean_batch_loss_batch) # print("event_batch") # print(event_batch) # print('shape_batch') # print(shape_batch) if epoch == 1: print( "Epoch %d - Batch %d/%d: batch loss = %.4f" % (epoch, num_batch, num_total_batches, loss_batch)) print(" time: %.4fs" % (nowtime() - start)) start = nowtime() # evaluation on training data eval_nodes_update = [ loss_update, acc_update, not_survival_proba, scale, max_hbs ] eval_nodes_metric = [running_loss, running_acc] print() print("========== Evaluation at Epoch %d ==========" % epoch) print('*** On Training Set:') (loss_train, acc_train), _, _, _, _, _ = self.evaluate( train_data.make_sparse_batch(only_freq=ONLY_FREQ_TEST), running_vars_initializer, sess, eval_nodes_update, eval_nodes_metric, sample_weights) # print("TENSORFLOW:\tloss = %.6f\taccuracy = %.4f" % (loss_train, acc_train)) # evaluation on validation data print('*** On Validation Set:') ( loss_val, acc_val ), not_survival_val, _, _, events_val, times_val = self.evaluate( val_data.make_sparse_batch(only_freq=ONLY_FREQ_TEST), running_vars_initializer, sess, eval_nodes_update, eval_nodes_metric, sample_weights) # print("TENSORFLOW:\tloss = %.6f\taccuracy = %.4f" % (loss_val, acc_val)) print("Validation C-Index = %.4f" % c_index(events_val, not_survival_val, times_val)) if max_loss_val is None or loss_val < max_loss_val: print("!!! GET THE LOWEST VAL LOSS !!!") max_loss_val = loss_val # evaluation on test data print('*** On Test Set:') ( loss_test, acc_test ), not_survival_test, scale_test, max_hbs_test, events_test, times_test = self.evaluate( test_data.make_sparse_batch(only_freq=ONLY_FREQ_TEST), running_vars_initializer, sess, eval_nodes_update, eval_nodes_metric, sample_weights) # print("TENSORFLOW:\tloss = %.6f\taccuracy = %.4f" % (loss_test, acc_test)) print("TEST C-Index = %.4f" % c_index(events_test, not_survival_test, times_test)) # Store prediction results with open('output/all_predictions_factorized.csv', 'w', newline="\n") as outfile: csv_writer = csv.writer(outfile) csv_writer.writerow(('NOT_SURV_PROB', 'EVENTS', 'MAX(RESERVE, REVENUE)', 'MAX_HB', 'SCALE', 'SHAPE')) sh = shape.eval() for p, e, t, h, sc in zip(not_survival_test, events_test, times_test, max_hbs_test, scale_test): csv_writer.writerow((p, e, t, h, sc, sh)) print('All predictions are outputted for error analysis') # Store parameters params = { 'embeddings_linear': embeddings_linear.eval(), 'intercept': intercept.eval(), 'shape': shape.eval(), 'distribution_name': type(self.distribution).__name__ } if embeddings_factorized is not None: params[ 'embeddings_factorized'] = embeddings_factorized.eval( ), pickle.dump(params, open('output/params_k%d.pkl' % self.k, 'wb'))
def get_output(self, train=False): X = self.get_input(train) # 0,0,0,1,2,3,4 mask = self.get_input_mask(train) # 0,0,0,1,1,1,1 # X_rev = reverse(X) X_rev = K.permute_dimensions(X, (1, 0, 2)) X_rev = X_rev[::-1] X_rev = K.permute_dimensions(X_rev, (1, 0, 2)) # 4,3,2,1,0,0,0 Y = self.forward(X, mask) # 0,0,0,1,3,6,10 Y_rev = None if mask: if K._BACKEND == 'theano': #convert right padding to left padding by rolling shifts = K.sum(mask, axis=1) import theano X_rev, _ = theano.scan( lambda x, i: theano.tensor.roll(x, -i, 0), sequences=[X_rev, shifts]) # 0,0,0,4,3,2,1 #Get reverse output Y_rev = self.reverse( X_rev, mask ) # 0,0,0,4,7,9,10 or just 10 if return_sequences = False if self.return_sequences: #Fix allignment : # When return_sequence = True, outputs corresponding to the same input should be merged. # Reverse Y_rev. # Note : On reversing left padding will be converted to right padding. Y_rev = K.permute_dimensions((1, 0, 2)) Y_rev = Y_rev[::-1] Y_rev = K.permute_dimensions((1, 0, 2)) # 10,9,7,4,0,0,0 #Convert right padding back to to left padding Y_rev, _ = theano.scan( lambda x, i: theano.tensor.roll(x, -i, 0), sequences=[Y_rev, shifts]) # 0,0,0,10,9,7,4 else: import tensorflow as tf # mask_rev = reverse(mask) mask_rev = K.permute_dimensions(mask, (1, 0)) mask_rev = mask_rev[::-1] mask_rev = K.permute_dimensions(mask_rev, (1, 0)) # 1,1,1,1,0,0,0 # X_rev = 4,3,2,1,0,0,0 # Get reverse output: Y_rev = self.reverse( X_rev, mask_rev) # 4,7,9,10,g,g,g (g = Garbage value) # Reverse Y_rev Y_rev = K.permute_dimensions(Y_rev, (1, 0, 2)) Y_rev = Y_rev[::-1] Y_rev = K.permute_dimensions(Y_rev, (1, 0, 2)) # g,g,g,10,9,7,4 # Trim off garbage values [garbage, Y_rev] = tf.dynamic_partition(Y_rev, mask, 2) # [g,g,g] [10,9,7,4] if self.return_sequences: #pad left zeros = K.zeros_like(garbage) # 0,0,0 Y_rev = K.concatenate([zeros, Y_rev], axis=1) # 0,0,0,10,9,7,4 else: Y_rev = Y_rev[:, 0] # 10 else: self.reverse.return_sequences = self.return_sequences Y_rev = self.reverse(X_rev) if self.return_sequences: Y_rev = K.permute_dimensions(Y_rev, (1, 0, 2)) Y_rev = Y_rev[::-1] Y_rev = K.permute_dimensions(Y_rev, (1, 0, 2)) if K._BACKEND != 'theano': self.revere.return_sequences = True if self.merge_mode == 'concat': return K.concatenate([Y, Y_rev]) elif self.merge_mode == 'sum': return Y + Y_rev elif self.merge_mode == 'ave': return (Y + Y_rev) / 2 elif self.merge_mode == 'mul': return Y * Y_rev
def testNeuralLinUCBUpdateNumTrainSteps0(self, batch_size=1, context_dim=10): """Check NeuralLinUCBAgent updates when behaving like LinUCB.""" # Construct a `Trajectory` for the given action, observation, reward. num_actions = 5 initial_step, final_step = _get_initial_and_final_steps( batch_size, context_dim) action = np.random.randint(num_actions, size=batch_size, dtype=np.int32) action_step = _get_action_step(action) experience = _get_experience(initial_step, action_step, final_step) # Construct an agent and perform the update. observation_spec = tensor_spec.TensorSpec([context_dim], tf.float32) time_step_spec = time_step.time_step_spec(observation_spec) action_spec = tensor_spec.BoundedTensorSpec( dtype=tf.int32, shape=(), minimum=0, maximum=num_actions - 1) encoder = DummyNet(observation_spec) encoding_dim = 10 agent = neural_linucb_agent.NeuralLinUCBAgent( time_step_spec=time_step_spec, action_spec=action_spec, encoding_network=encoder, encoding_network_num_train_steps=0, encoding_dim=encoding_dim, optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=1e-2)) loss_info = agent.train(experience) self.evaluate(agent.initialize()) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(loss_info) final_a = self.evaluate(agent.cov_matrix) final_b = self.evaluate(agent.data_vector) # Compute the expected updated estimates. observations_list = tf.dynamic_partition( data=tf.reshape(tf.cast(experience.observation, tf.float64), [batch_size, context_dim]), partitions=tf.convert_to_tensor(action), num_partitions=num_actions) rewards_list = tf.dynamic_partition( data=tf.reshape(tf.cast(experience.reward, tf.float64), [batch_size]), partitions=tf.convert_to_tensor(action), num_partitions=num_actions) expected_a_updated_list = [] expected_b_updated_list = [] for _, (observations_for_arm, rewards_for_arm) in enumerate(zip( observations_list, rewards_list)): encoded_observations_for_arm, _ = encoder(observations_for_arm) encoded_observations_for_arm = tf.cast( encoded_observations_for_arm, dtype=tf.float64) num_samples_for_arm_current = tf.cast( tf.shape(rewards_for_arm)[0], tf.float64) num_samples_for_arm_total = num_samples_for_arm_current # pylint: disable=cell-var-from-loop def true_fn(): a_new = tf.matmul( encoded_observations_for_arm, encoded_observations_for_arm, transpose_a=True) b_new = bandit_utils.sum_reward_weighted_observations( rewards_for_arm, encoded_observations_for_arm) return a_new, b_new def false_fn(): return (tf.zeros([encoding_dim, encoding_dim], dtype=tf.float64), tf.zeros([encoding_dim], dtype=tf.float64)) a_new, b_new = tf.cond( tf.squeeze(num_samples_for_arm_total) > 0, true_fn, false_fn) expected_a_updated_list.append(self.evaluate(a_new)) expected_b_updated_list.append(self.evaluate(b_new)) # Check that the actual updated estimates match the expectations. self.assertAllClose(expected_a_updated_list, final_a) self.assertAllClose(expected_b_updated_list, final_b)
dataset_path = "../data/" HEIGHT = 300 WIDTH = 300 CHANNEL = 3 DIMENSIONS = HEIGHT * WIDTH * CHANNEL all_filepaths = [dataset_path + fp for fp in listdir(dataset_path)] all_images = ops.convert_to_tensor(all_filepaths, dtype=dtypes.string) test_set_size = int(0.2 * len(all_filepaths)) paritions = [0] * len(all_filepaths) paritions[:test_set_size] = [1] * test_set_size random.shuffle(paritions) train_images, test_images = tf.dynamic_partition(all_images, paritions, 2) train_input_queue = tf.train.slice_input_producer([train_images], shuffle=False) test_input_queue = tf.train.slice_input_producer([test_images], shuffle=False) file_content = tf.read_file(train_input_queue[0]) train_image = tf.image.decode_png(file_content, channels=CHANNEL) file_content = tf.read_file(train_input_queue[0]) test_image = tf.image.decode_png(file_content, channels=CHANNEL) train_image = tf.reshape(train_image, [DIMENSIONS, 1]) test_image = tf.reshape(test_image, [DIMENSIONS, 1]) BATCH_SIZE = 100
# test_filepaths = [FLAGS.dataset_path + fp for fp in test_filepaths] # 整合 # all_filepaths = train_filepaths + test_filepaths # all_labels = train_labels + test_labels all_images = ops.convert_to_tensor(all_filepaths, dtype=dtypes.string) all_labels = ops.convert_to_tensor(all_labels, dtype=dtypes.int32) # 创建自定义随机分片 partitions = [0] * len(all_filepaths) TEST_SET_SIZE = int(FLAGS.TEST_DATASET_RATE * len(all_filepaths)) partitions[:TEST_SET_SIZE] = [1] * TEST_SET_SIZE random.shuffle(partitions) train_images, test_images = tf.dynamic_partition(all_images, partitions, 2) train_labels, test_labels = tf.dynamic_partition(all_labels, partitions, 2) # 创建输入队列 train_input_queue = tf.train.slice_input_producer([train_images, train_labels], shuffle=True) test_input_queue = tf.train.slice_input_producer([test_images, test_labels], shuffle=True) # 读图并依据网络定义要求处理图 file_content = tf.read_file(train_input_queue[0]) train_image = tf.image.decode_jpeg(file_content, channels=FLAGS.NUM_CHANNELS) train_image = inception_preprocessing.preprocess_image(train_image, FLAGS.NET_IMAGE_SIZE_H, FLAGS.NET_IMAGE_SIZE_W, is_training=False)
def prediction(self): # embeddings = tf.Variable(tf.random_uniform([self.vocabulary_size, 200], -1.0, 1.0)) embeddings = tf.constant(self.embs, tf.float32) embed = tf.nn.embedding_lookup(embeddings, self.words) # dis_embeddings = tf.Variable(tf.random_uniform([self.dis_voc, self.dis_embed_size], -1.0, 1.0)) # dis_embed = tf.nn.embedding_lookup(dis_embeddings, self.dis) # pos_embeddings = tf.Variable(tf.random_uniform([self.pos_voc, self.pos_embed_size], -1.0, 1.0)) # pos_embed = tf.nn.embedding_lookup(pos_embeddings, self.pos_tags) # print(self.words.get_shape()) # print(pos_embed.get_shape()) # print(dis_embed.get_shape()) # last = tf.concat([self.words , pos_embed , dis_embed], 2) last = embed print(last.get_shape()) last_expanded = tf.expand_dims(last, -1) print(last_expanded.get_shape()) # emb_size = 200 + self.pos_embed_size + self.dis_embed_size emb_size = 200 # self.count = (self.count + 1)%353 # print(self.count) pooled_outputs = [] for i, filter_size in enumerate(self.filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): filter_shape = [ filter_size, self.word_embed_size, 1, self.num_filters ] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[self.num_filters]), name="b") conv = tf.nn.conv2d(last_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") print(h.get_shape()) hnew = tf.reshape( h, [-1, self.max_length - filter_size + 1, self.num_filters]) hnew = tf.transpose(hnew, [1, 0, 2]) print(hnew.get_shape()) split = tf.dynamic_partition(hnew, self.partitions[i], 2) print(split[0].get_shape()) [split0, split1] = [tf.transpose(sp, [1, 0, 2]) for sp in split] pool1 = tf.reduce_max(split0, 1) pool2 = tf.reduce_max(split1, 1) # pool3 = tf.reduce_max(split2, 1) print(pool2.get_shape()) pooled = tf.stack([pool1, pool2], 1) # print(pooled.get_shape()) pooled_outputs.append(pooled) # print(p) num_filters_total = self.num_filters * len(self.filter_sizes) * 2 h_pool = tf.concat(pooled_outputs, 2) # print(h_pool.get_shape()) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) print(h_pool_flat.get_shape()) # h_pool_flat = h_pool out_size = self.out_size with tf.variable_scope('first_layer_weights'): weight = tf.Variable( tf.truncated_normal([num_filters_total, 100], stddev=0.1)) with tf.variable_scope('first_layer_Bias'): bias = tf.Variable(tf.constant(0.1, shape=[100])) hidden = tf.nn.relu(tf.matmul(h_pool_flat, weight) + bias) with tf.variable_scope('second_layer_weights'): weight2 = tf.Variable( tf.truncated_normal([100, out_size], stddev=0.1)) with tf.variable_scope('Bias'): bias2 = tf.Variable(tf.constant(0.1, shape=[out_size])) self.prediction = tf.sigmoid(tf.matmul(hidden, weight2) + bias2) # print self.prediction.shape return self.prediction
def watch_movie(story, mem, l): mask = tf.sequence_mask(l, tf.shape(story)[0], dtype=tf.int32) _, clips = tf.dynamic_partition(story, mask, 2)
def deeplabv3_model_fn(features, labels, mode, params): """Model function for PASCAL VOC.""" images = tf.cast(tf.map_fn(preprocessing.mean_image_addition, features), tf.uint8) network = deeplab_v3_generator(params['num_classes'], params['output_stride'], params['base_architecture'], params['pre_trained_model'], params['batch_norm_decay']) logits = network(features, mode == tf.estimator.ModeKeys.TRAIN) pred_classes = tf.expand_dims(tf.argmax(logits, axis=3, output_type=tf.int32), axis=3) pred_decoded_labels = tf.py_func( preprocessing.decode_labels, [pred_classes, params['batch_size'], params['num_classes']], tf.uint8) predictions = { 'classes': pred_classes, 'probabilities': tf.nn.softmax(logits, name='softmax_tensor'), 'decoded_labels': pred_decoded_labels } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) gt_decoded_labels = tf.py_func( preprocessing.decode_labels, [labels, params['batch_size'], params['num_classes']], tf.uint8) labels = tf.squeeze(labels, axis=3) # reduce the channel dimension. logits_by_num_classes = tf.reshape(logits, [-1, params['num_classes']]) labels_flat = tf.reshape(labels, [ -1, ]) valid_indices = tf.to_int32(labels_flat <= params['num_classes'] - 1) valid_logits = tf.dynamic_partition(logits_by_num_classes, valid_indices, num_partitions=2)[1] valid_labels = tf.dynamic_partition(labels_flat, valid_indices, num_partitions=2)[1] preds_flat = tf.reshape(pred_classes, [ -1, ]) valid_preds = tf.dynamic_partition(preds_flat, valid_indices, num_partitions=2)[1] confusion_matrix = tf.confusion_matrix(valid_labels, valid_preds, num_classes=params['num_classes']) predictions['valid_preds'] = valid_preds predictions['valid_labels'] = valid_labels predictions['confusion_matrix'] = confusion_matrix cross_entropy = tf.losses.sparse_softmax_cross_entropy(logits=valid_logits, labels=valid_labels) # Create a tensor named cross_entropy for logging purposes. tf.identity(cross_entropy, name='cross_entropy') tf.summary.scalar('cross_entropy', cross_entropy) if not params['freeze_batch_norm']: train_var_list = [v for v in tf.trainable_variables()] else: train_var_list = [ v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name ] # Add weight decay to the loss. with tf.variable_scope("total_loss"): loss = cross_entropy + params.get( 'weight_decay', _WEIGHT_DECAY) * tf.add_n( [tf.nn.l2_loss(v) for v in train_var_list]) # loss = tf.losses.get_total_loss() # obtain the regularization losses as well if mode == tf.estimator.ModeKeys.TRAIN: tf.summary.image( 'images', tf.concat(axis=2, values=[images, gt_decoded_labels, pred_decoded_labels]), max_outputs=params['tensorboard_images_max_outputs'] ) # Concatenate row-wise. global_step = tf.train.get_or_create_global_step() if params['learning_rate_policy'] == 'piecewise': # Scale the learning rate linearly with the batch size. When the batch size # is 128, the learning rate should be 0.1. initial_learning_rate = 0.1 * params['batch_size'] / 128 batches_per_epoch = params['num_train'] / params['batch_size'] # Multiply the learning rate by 0.1 at 100, 150, and 200 epochs. boundaries = [ int(batches_per_epoch * epoch) for epoch in [100, 150, 200] ] values = [ initial_learning_rate * decay for decay in [1, 0.1, 0.01, 0.001] ] learning_rate = tf.train.piecewise_constant( tf.cast(global_step, tf.int32), boundaries, values) elif params['learning_rate_policy'] == 'poly': learning_rate = tf.train.polynomial_decay( params['initial_learning_rate'], tf.cast(global_step, tf.int32) - params['initial_global_step'], params['max_iter'], params['end_learning_rate'], power=params['power']) else: raise ValueError( 'Learning rate policy must be "piecewise" or "poly"') # Create a tensor named learning_rate for logging purposes tf.identity(learning_rate, name='learning_rate') tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=params['momentum']) # Batch norm requires update ops to be added as a dependency to the train_op update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step, var_list=train_var_list) else: train_op = None accuracy = tf.metrics.accuracy(valid_labels, valid_preds) mean_iou = tf.metrics.mean_iou(valid_labels, valid_preds, params['num_classes']) metrics = {'px_accuracy': accuracy, 'mean_iou': mean_iou} # Create a tensor named train_accuracy for logging purposes tf.identity(accuracy[1], name='train_px_accuracy') tf.summary.scalar('train_px_accuracy', accuracy[1]) def compute_mean_iou(total_cm, name='mean_iou'): """Compute the mean intersection-over-union via the confusion matrix.""" sum_over_row = tf.to_float(tf.reduce_sum(total_cm, 0)) sum_over_col = tf.to_float(tf.reduce_sum(total_cm, 1)) cm_diag = tf.to_float(tf.diag_part(total_cm)) denominator = sum_over_row + sum_over_col - cm_diag # The mean is only computed over classes that appear in the # label or prediction tensor. If the denominator is 0, we need to # ignore the class. num_valid_entries = tf.reduce_sum( tf.cast(tf.not_equal(denominator, 0), dtype=tf.float32)) # If the value of the denominator is 0, set it to 1 to avoid # zero division. denominator = tf.where(tf.greater(denominator, 0), denominator, tf.ones_like(denominator)) iou = tf.div(cm_diag, denominator) for i in range(params['num_classes']): tf.identity(iou[i], name='train_iou_class{}'.format(i)) tf.summary.scalar('train_iou_class{}'.format(i), iou[i]) # If the number of valid entries is 0 (no classes) we return 0. result = tf.where(tf.greater(num_valid_entries, 0), tf.reduce_sum(iou, name=name) / num_valid_entries, 0) return result train_mean_iou = compute_mean_iou(mean_iou[1]) tf.identity(train_mean_iou, name='train_mean_iou') tf.summary.scalar('train_mean_iou', train_mean_iou) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=metrics)
def z_proposal_GMM_factorized(X, samples_s, miss_list, batch_size, z_dim, reuse): mean_qz = [] log_var_qz = [] for i, d in enumerate(X): # Partition the data in missing data (0) and observed data n(1) missing_data, observed_data = tf.dynamic_partition(d, miss_list[:, i], num_partitions=2) missing_s, observed_s = tf.dynamic_partition(samples_s, miss_list[:, i], num_partitions=2) condition_indices = tf.dynamic_partition(tf.range(tf.shape(d)[0]), miss_list[:, i], num_partitions=2) # Get the dimensions of the observed data nObs = tf.shape(observed_data)[0] # Mean layer aux_m = tf.layers.dense( inputs=tf.concat([observed_data, observed_s], 1), units=z_dim, activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.05), name='layer_1_' + 'mean_enc_z' + str(i), reuse=reuse) # Reconstruct means with zeros (so they don't affect the mean_joint) aux_mean_qz = tf.dynamic_stitch( condition_indices, [tf.zeros([batch_size - nObs, z_dim], dtype=tf.float32), aux_m]) # Logvar layers aux_lv = tf.layers.dense( inputs=tf.concat([observed_data, observed_s], 1), units=z_dim, activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.05), name='layer_1_' + 'logvar_enc_z' + str(i), reuse=reuse) # Set a high value to make the variance in the missing cases negligible aux_log_var_qz = tf.dynamic_stitch( condition_indices, [tf.fill([batch_size - nObs, z_dim], 15.0), aux_lv]) mean_qz.append(aux_mean_qz) log_var_qz.append(aux_log_var_qz) # Input prior log_var_qz.append(tf.zeros([batch_size, z_dim])) mean_qz.append(tf.zeros([batch_size, z_dim])) # Compute full parameters, as a product of Gaussians distribution log_var_qz_joint = -tf.reduce_logsumexp(tf.negative(log_var_qz), 0) mean_qz_joint = tf.multiply( tf.exp(log_var_qz_joint), tf.reduce_sum(tf.multiply(mean_qz, tf.exp(tf.negative(log_var_qz))), 0)) # Avoid numerical problems log_var_qz = tf.clip_by_value(log_var_qz, -15.0, 15.0) # Rep-trick eps = tf.random.normal((batch_size, z_dim), 0, 1, dtype=tf.float32) samples_z = mean_qz_joint + tf.multiply(tf.exp(log_var_qz_joint / 2), eps) return samples_z, [mean_qz_joint, log_var_qz_joint]
def gmm(data, numClusters): # Number of iterations iterations = 400 # Number of data points N = data.shape[0] # Size of dimension d = data.shape[1] # Number of clusters/centroids K = numClusters ### Build Graph ### # Create placeholder for data points X = tf.placeholder(dtype=tf.float32, shape=(N, d), name="X") # Initialize centre of clusters with sampling from standard normal distribution MU = tf.Variable(initial_value=tf.random.normal(shape=[K, d], mean=0, stddev=math.sqrt(1), dtype=tf.float32), trainable=True, name="MU") # Initialize sigma with sampling from standard normal distribution sigma = tf.Variable(initial_value=tf.random_normal(shape=[K, 1], mean=0, stddev=math.sqrt(1)), trainable=True) # pass sigma through exp() to avoid constraints sigma = tf.math.exp(sigma) # Initialize log_pi with sampling from standard normal distribution log_pi = tf.Variable(initial_value=tf.random.normal(shape=[K, 1], mean=0, stddev=math.sqrt(1)), trainable=True) # pass log_pi through logsoftmax to avoid contraints log_pi = logsoftmax(log_pi) # calculate log probability: P(x,z) log_PDF = log_GaussPDF(X, MU, sigma) # Calculate loss: L = - logsumexp(log_PDF, log_pi) loss = -1 * tf.reduce_sum(reduce_logsumexp(log_PDF + tf.squeeze(log_pi))) # Adam Optimizer opt = tf.train.AdamOptimizer(learning_rate=0.1, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss) # Assign the index of the maximum probability cluster to each point in X assign_to_cluster = tf.math.argmax(log_posterior(log_PDF, log_pi), axis=1, output_type=tf.int32) # Transform data set by splitting into groups (for output) output = tf.dynamic_partition(X, assign_to_cluster, num_partitions=numClusters) # Initialize Tensorflow variables init = tf.global_variables_initializer() loss_history = [] clustered = None with tf.Session() as sess: sess.run(init) # Training loop for step in range(iterations): _MU, _sigma, _log_pi, _loss, _opt = sess.run( [MU, sigma, log_pi, loss, opt], feed_dict={X: data}) loss_history.append(_loss) # get trained parameters trained_centroids = MU.eval() trained_log_pi = log_pi.eval() trained_sigma = sigma.eval() # Assign each point to cluster based on distance to closest cluster centre clustered = sess.run(output, feed_dict={X: data}) return clustered, trained_centroids, trained_log_pi, trained_sigma, loss_history
def __init__(self): super(Model, self).__init__() with tf.name_scope('input'): images_initializer = tf.placeholder(dtype=tf.string, shape=[DATA_SET_SIZE]) heatmaps_initializer = tf.placeholder(dtype=tf.string, shape=[DATA_SET_SIZE]) def decode(image): return tf.image.decode_jpeg(image, ratio=2) images_before_resizing = tf.map_fn(decode, images_initializer, dtype=tf.uint8) heatmaps_before_resizing = tf.map_fn(decode, heatmaps_initializer, dtype=tf.uint8) images = tf.image.resize_images(images_before_resizing, [IMAGE_SIZE, IMAGE_SIZE]) heatmaps = tf.image.resize_images(heatmaps_before_resizing, [IMAGE_SIZE, IMAGE_SIZE]) partitions = create_partition_vector() train_images_value, validate_images_value = tf.dynamic_partition( images, partitions, 2) train_heatmaps_value, validate_heatmaps_value = tf.dynamic_partition( heatmaps, partitions, 2) def data_var(init): return tf.Variable(init, trainable=False, validate_shape=False) train_images = data_var(train_images_value) train_heatmaps = data_var(train_heatmaps_value) validate_images = data_var(validate_images_value) validate_heatmaps = data_var(validate_heatmaps_value) train_images.set_shape( [TRAIN_SET_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS]) train_heatmaps.set_shape( [TRAIN_SET_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS]) validate_images.set_shape( [VALIDATION_SET_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS]) validate_heatmaps.set_shape( [VALIDATION_SET_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS]) validate_images_augmented = augment_many(validate_images) def initialize_images(sess, images, heatmaps): # images_vars = [train_images, train_heatmaps, validate_images, validate_heatmaps] # sess.run( # [var.initializer for var in images_vars], # feed_dict={images_initializer: images, heatmaps_initializer: heatmaps}) sess.run(tf.global_variables_initializer(), feed_dict={ images_initializer: images, heatmaps_initializer: heatmaps }) self.initialize_images = initialize_images with tf.name_scope('batch'): batch_start = tf.placeholder(tf.int32, shape=[]) batch_images = tf.slice( train_images, [batch_start, 0, 0, 0], [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS]) batch_heatmaps = tf.slice( train_heatmaps, [batch_start, 0, 0, 0], [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS]) augmented_batch_images = augment_many(batch_images) augmented_batch_heatmaps = augment_many(batch_heatmaps) pred = conv_net(augmented_batch_images) ground_truth = tf.div(augmented_batch_heatmaps, 256) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=ground_truth)) optimizer = tf.train.AdamOptimizer( learning_rate=LEARNING_RATE).minimize(cost) correct_pred = tf.equal(tf.argmax(pred, 3), tf.argmax(ground_truth, 3)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) def train_on_batch(sess, batch_begin): sess.run(optimizer, feed_dict={batch_start: batch_begin}) return sess.run([cost, accuracy], feed_dict={batch_start: batch_begin}) self.train_on_batch = train_on_batch # validation tf.get_variable_scope().reuse_variables() validation_pred = conv_net(validate_images_augmented) validation_pred = tf.reshape(validation_pred, [ -1, IMAGE_TRANSFORMATION_NUMBER, IMAGE_SIZE, IMAGE_SIZE, CHANNELS ]) validation_pred = tf.map_fn(gather_transformations, validation_pred) validation_ground_truth = tf.div(validate_heatmaps, 256) validation_cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=validation_pred, labels=validation_ground_truth)) validation_correct_pred = tf.equal( tf.argmax(validation_pred, 3), tf.argmax(validation_ground_truth, 3)) validation_accuracy = tf.reduce_mean( tf.cast(validation_correct_pred, tf.float32)) def validate(sess): loss, acc = sess.run([validation_cost, validation_accuracy]) print("Validation loss %g" % loss) print("Validation accuracy %g" % acc) self.validate = validate
def kmeans(data, numClusters): # Number of iterations iterations = 200 # Number of data points N = data.shape[0] # Size of dimension d = data.shape[1] # Number of clusters/centroids K = numClusters ### Build Graph ### # Create placeholder for data points X = tf.placeholder(dtype=tf.float32, shape=(N, d), name="X") # Initialize centre of clusters with standard normal distribution MU = tf.Variable(initial_value=tf.random.normal(shape=[K, d], mean=0, stddev=math.sqrt(1), dtype=tf.float32), trainable=True, name="MU") # Calculate distance of each point to each cluster centre distances = distanceFunc(X, MU) # Calculate loss: L(MU) = sigma(n=1 to N) min(k=1 to K) ||X-MU||^2 loss = tf.math.reduce_sum(tf.math.reduce_min(distances, axis=1), name="loss") # Adam Optimizer opt = tf.train.AdamOptimizer(learning_rate=0.1, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss) # Assign the index of the minimum distance centroid to each point in X assign_to_cluster = tf.math.argmin(distances, axis=1, output_type=tf.int32) # Transform data set by splitting into groups (for output) output = tf.dynamic_partition(X, assign_to_cluster, num_partitions=numClusters) # Initialize Tensorflow variables init = tf.global_variables_initializer() loss_history = [] clustered = None with tf.Session() as sess: sess.run(init) # Training loop for step in range(iterations): _MU, _loss, _opt = sess.run([MU, loss, opt], feed_dict={X: data}) loss_history.append(_loss) # get trained centroids trained_centroids = MU.eval() # Assign each point to cluster based on distance to closest cluster centre clustered = sess.run(output, feed_dict={X: data}) return clustered, trained_centroids, loss_history
def image_classifier(input_tensor, label_tensor, is_training, FLAGS): return_dict = {} global_step = tf.Variable(0, name='global_step', trainable=False) return_dict["global_step"] = global_step is_bad_file = tf.cast(tf.equal(label_tensor, -1), tf.int32) filtered_imgs_tensor = tf.dynamic_partition(input_tensor, is_bad_file, 2)[0] filtered_label_tensor = tf.dynamic_partition(label_tensor, is_bad_file, 2)[0] with slim.arg_scope(FLAGS["argscope"]): logits, end_points = modelFn(filtered_imgs_tensor, num_classes=FLAGS["class_count"], is_training=is_training, reuse=not is_training, dropout_keep_prob=FLAGS["dropout_rate"]) onehot_tensor = tf.one_hot(filtered_label_tensor, FLAGS["class_count"]) prediction = tf.argmax(logits, 1) return_dict["prediction"] = prediction return_dict["softmax"] = end_points["Predictions"] return_dict["onehot_labels"] = onehot_tensor with tf.name_scope('evaluation'): with tf.name_scope('correct_prediction'): correct_prediction = tf.cast(tf.equal(prediction, tf.argmax(onehot_tensor, 1)), tf.int32) return_dict["group_sample_number"] = tf.reduce_sum(onehot_tensor, 0, keepdims=True) group_correct_prediction = tf.reduce_sum(tf.one_hot(prediction, FLAGS["class_count"]) * onehot_tensor, 0) return_dict["group_correct_prediction"] = group_correct_prediction with tf.name_scope('accuracy'): accuracy = tf.reduce_sum(correct_prediction) / tf.shape(prediction)[0] # print(accuracy) return_dict["accuracy"] = accuracy tf.summary.scalar('accuracy', accuracy) if is_training: if 'AuxLogits' in end_points: tf.losses.softmax_cross_entropy(onehot_tensor, end_points['AuxLogits'], weights=np.exp(1) * 0.1, scope='aux_loss') tf.losses.softmax_cross_entropy(onehot_tensor, logits, weights=np.exp(2) * 0.1) Logits_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="{}/Logits".format(FLAGS["modelScope"])) Logits_weights = list(filter(lambda x: x.name.find("weight") != -1, Logits_variables)) # regularizer = tf.nn.l2_loss(Logits_weights) regularizer = tf.reduce_sum(tf.log(1 + tf.square(Logits_weights))) tf.losses.add_loss(regularizer * 0.001) # trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=fully_connected_layer_name) # FCL_weights_tensor_list = list(filter(lambda x: x.name.find("weight") != -1, trainable_variables)) # regularizer = tf.add_n([tf.nn.l2_loss(w) for w in FCL_weights_tensor_list]) * 0.00001 * np.exp(1) # tf.losses.add_loss(regularizer) # tf.losses.add_loss(loss_AE * 0.00001 * np.exp(2)) makeLog("losses weights\t{}\t{}\t{}".format(np.exp(1) * 0.1, np.exp(2) * 0.1, 0.001)) total_loss = tf.losses.get_total_loss() return_dict["total_loss"] = total_loss tf.summary.scalar('total_loss', total_loss) optimizer = tf.train.RMSPropOptimizer(learning_rate=FLAGS["learning_rate"], momentum=0.9, decay=0.9, epsilon=1.0) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies([tf.group(*update_ops)]): train_op = optimizer.minimize(loss=total_loss, global_step=global_step) return_dict["train_op"] = train_op else: output_tensor = tf.nn.softmax(logits) return_dict["output_tensor_name"] = output_tensor.name.split(":")[0] makeLog("output_tensor_name: {}".format(output_tensor.name.split(":")[0])) return_dict["input_tensor"] = input_tensor return_dict["output_tensor"] = output_tensor return return_dict
def loss_crf_scan(self, _, current_input): """ Scan function for log likelihood computation :param _: previous output :param current_input: current tensor line :return: sequence log likelihood """ # TILING # Create tiling for "start" and "end" scores tile = tf.tile(tf.constant(-10000.0, shape=[1, 2], dtype=tf.float32), [tf.shape(current_input[0])[0], 1]) # Add two scores for each token in each sequence tiled_tensor = tf.concat([current_input[0], tile], 1) # ----------------------------------------------------------- # ADDING START TOKEN cur_nb_class = current_input[0].get_shape().as_list()[1] # Create start and end token unary scores start_unary_scores = [[-10000.0] * cur_nb_class + [0.0, -10000.0]] end_unary_tensor = [[-10000.0] * cur_nb_class + [-10000.0, 0.0]] # Concatenate start unary scores to the tiled vector tensor_start = tf.concat([start_unary_scores, tiled_tensor], 0) # ----------------------------------------------------------- # ADDING END TOKEN # Creating mask to fetch elements of the sequence mask = tf.sequence_mask( (tf.cast(tf.reshape(current_input[1], [-1]), dtype=tf.int32) + 1) * tf.shape(tensor_start)[1], tf.shape(tensor_start)[1] * tf.shape(tensor_start)[0], dtype=tf.int32) # Flattening unary scores and partitioning unary_scores_reshaped = tf.reshape(tensor_start, [1, -1]) slices = tf.dynamic_partition(unary_scores_reshaped, mask, 2) # Reshaping slice one slice_1 = tf.reshape(slices[1], [-1, tf.shape(tensor_start)[1]]) # Concatenating and reshaping tensor_start_end = tf.concat([slice_1, end_unary_tensor], 0) tensor_start_end_reshaped = tf.reshape( tensor_start_end, [1, tf.shape(tensor_start_end)[0], tf.shape(tensor_start_end)[1]]) # Setting shape to tensor tensor_start_end_reshaped.set_shape([1, None, cur_nb_class + 2]) # ----------------------------------------------------------- # ADDING START AND END LABELS # Creating mask for target mask_y = tf.sequence_mask( (tf.cast(tf.reshape(current_input[1], [-1]), dtype=tf.int32)), tf.shape(current_input[0])[0], dtype=tf.int32) # Flattening label tensor and partitioning y_reshaped = tf.reshape(current_input[2], [1, -1]) slices_y = tf.dynamic_partition(y_reshaped, mask_y, 2) # Concatenating and reshaping new_y = tf.concat([[cur_nb_class], slices_y[1], [cur_nb_class + 1]], axis=0) new_y_reshaped = tf.reshape(new_y, [1, -1]) # ----------------------------------------------------------- # COMPUTING LOG LIKELIHOOD log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( tensor_start_end_reshaped, new_y_reshaped, current_input[1], transition_params=self.transition_params) return tf.reduce_sum(log_likelihood)
def model(self, seq_length, img_ph, pnt_ph, aud_ph, partitions_ph, train_ph, prompts_ph, variable_scope, variable_scope2, var_img, var_pnt, var_aud, var_lstm, incep_reuse=True): # def process_vars(seq, data_type): # cast inputs to the correct data type seq_inp = tf.cast(seq, tf.float32) return tf.reshape(seq_inp, (self.__batch_size, -1, data_type["cmp_h"], data_type["cmp_w"], data_type["num_c"])) def convolve_data_inception(input_data, val, n, dtype): data = tf.reshape(input_data, [-1, 299, 299, 3]) logits, end_points = inception_resnet_v2(data, num_classes=output_sizes[-1] * output_sizes[-1] * layer_elements[ -2], is_training=False, reuse=incep_reuse) return logits def convolve_data_3layer_pnt(input_data, val, variables, n, dtype): def pad_tf(x, p): return tf.pad(x, [[0, 0], [p, p], [p, p], [0, 0]], "CONSTANT") def gen_convolved_output(sequence, W, b, stride, num_hidden, new_size, train_ph, padding='SAME'): conv = tf.nn.conv2d(sequence, W, strides=[1, stride, stride, 1], padding=padding) + b return tf.nn.relu(conv) input_data = tf.reshape(input_data, [-1, dtype["cmp_h"], dtype["cmp_w"], dtype["num_c"]], name=n + "_inp_reshape") # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out1_n: ") input_data = pad_tf(input_data, padding_size[0]) padding = "VALID" input_data = gen_convolved_output(input_data, variables["W1"], variables["b1"], stride_sizes[0], layer_elements[1], output_sizes[0], train_ph, padding) self.variable_summaries(input_data, dtype["name"] + "_conv1") input_data = tf.verify_tensor_all_finite( input_data, "ERR: Tensor not finite - ", name="conv1_" + n ) # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out2_n: ") input_data = pad_tf(input_data, padding_size[1]) padding = "VALID" input_data = gen_convolved_output(input_data, variables["W2"], variables["b2"], stride_sizes[1], layer_elements[2], output_sizes[1], train_ph, padding) self.variable_summaries(input_data, dtype["name"] + "_conv2") input_data = tf.verify_tensor_all_finite( input_data, "ERR: Tensor not finite - ", name="conv2_" + n ) # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out3_n: ") input_data = pad_tf(input_data, padding_size[2]) padding = "VALID" input_data = gen_convolved_output(input_data, variables["W3"], variables["b3"], stride_sizes[-1], layer_elements[-2], output_sizes[-1], train_ph, padding) self.variable_summaries(input_data, dtype["name"] + "_conv3") input_data = tf.verify_tensor_all_finite( input_data, "ERR: Tensor not finite - ", name="conv3_" + n ) # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out4_n: ") return input_data def convolve_data_3layer_aud(input_data, val, variables, n, dtype): def pad_tf(x, padding): return tf.pad(x, [[0, 0], [padding[0], padding[0]], [padding[1], padding[1]], [0, 0]], "CONSTANT") def gen_convolved_output(sequence, W, b, stride, num_hidden, new_size, train_ph, padding='SAME'): conv = tf.nn.conv2d(sequence, W, strides=[1, stride[0], stride[1], 1], padding=padding) + b return tf.nn.relu(conv) input_data = tf.reshape(input_data, [-1, dtype["cmp_h"], dtype["cmp_w"], dtype["num_c"]], name=n + "_inp_reshape") # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out1_a: ") input_data = pad_tf(input_data, aud_padding_size[0]) padding = "VALID" input_data = gen_convolved_output(input_data, variables["W1"], variables["b1"], aud_stride_sizes[0], aud_layer_elements[1], aud_output_sizes[0], train_ph, padding) self.variable_summaries(input_data, dtype["name"] + "_conv1") input_data = tf.verify_tensor_all_finite( input_data, "ERR: Tensor not finite - conv1_" + n, name="conv1_" + n ) # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out2_a: ") input_data = pad_tf(input_data, aud_padding_size[1]) padding = "VALID" input_data = gen_convolved_output(input_data, variables["W2"], variables["b2"], aud_stride_sizes[1], aud_layer_elements[2], aud_output_sizes[1], train_ph, padding) self.variable_summaries(input_data, dtype["name"] + "_conv2") input_data = tf.verify_tensor_all_finite( input_data, "ERR: Tensor not finite - conv2_" + n, name="conv2_" + n ) # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out3_a: ") input_data = pad_tf(input_data, aud_padding_size[2]) padding = "VALID" input_data = gen_convolved_output(input_data, variables["W3"], variables["b3"], aud_stride_sizes[2], aud_layer_elements[3], aud_output_sizes[2], train_ph, padding) self.variable_summaries(input_data, dtype["name"] + "_conv3") input_data = tf.verify_tensor_all_finite( input_data, "ERR: Tensor not finite - conv3_" + n, name="conv3_" + n ) return input_data # pass different data types through conv networks inp_data = [0] * TOTAL_PARAMS conv_inp = [0] * TOTAL_PARAMS # with tf.device('/gpu:0'): with tf.device('/gpu:1'): if (self.graphbuild[0]): val = 0 inp_data[val] = process_vars(img_ph, img_dtype) conv_inp[val] = convolve_data_inception(inp_data[val], val, "img", img_dtype) with variable_scope as scope: # with tf.device('/gpu:1'): if (self.graphbuild[1]): val = 1 inp_data[val] = process_vars(pnt_ph, pnt_dtype) conv_inp[val] = convolve_data_3layer_pnt(inp_data[val], val, var_pnt, "pnt", pnt_dtype) if (self.graphbuild[2]): val = 2 inp_data[val] = process_vars(aud_ph, aud_dtype) conv_inp[val] = convolve_data_3layer_aud(inp_data[val], val, var_aud, "aud", aud_dtype) # combine different inputs together combined_data = None for i in range(TOTAL_PARAMS): if (self.graphbuild[i]): tf.Print(conv_inp[i], [tf.shape(conv_inp[i])]) if (i < 2): conv_inp[i] = tf.reshape(conv_inp[i], [self.__batch_size, -1, output_sizes[-1] * output_sizes[-1] * layer_elements[ -2]], name="combine_reshape") else: # print(">>", aud_output_sizes[-1][0]*aud_output_sizes[-1][0]*aud_layer_elements[-2]) conv_inp[i] = tf.reshape(conv_inp[i], [self.__batch_size, -1, aud_output_sizes[-1][0] * aud_output_sizes[-1][0] * aud_layer_elements[-2]], name="combine_reshape_aud") # tf.Print(conv_inp[i], [tf.shape(conv_inp[i])]) if (combined_data == None): combined_data = conv_inp[i] else: combined_data = tf.concat([combined_data, conv_inp[i]], 2) W_lstm = var_lstm["W_lstm"] b_lstm = var_lstm["b_lstm"] W_fc = var_lstm["W_fc"] b_fc = var_lstm["b_fc"] combined_data = tf.verify_tensor_all_finite( combined_data, "ERR: Tensor not finite - combined_data", name="combined_data" ) # combined_data = tf.Print(combined_data, [tf.shape(combined_data)], message="combined_data") with variable_scope2 as scope: # lstm_cell = BNLSTMCell(layer_elements[-2], is_training_tensor=train_ph, max_bn_steps=MAX_BN_LEN) lstm_cell = tf.contrib.rnn.LSTMCell(layer_elements[-2], use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, proj_clip=None, forget_bias=1.0, state_is_tuple=True, activation=None, reuse=None ) outputs, states = tf.nn.dynamic_rnn( cell=lstm_cell, inputs=combined_data, dtype=tf.float32, sequence_length=seq_length, time_major=False ) outputs = tf.where(tf.is_nan(outputs), tf.zeros_like(outputs), outputs) # outputs = tf.Print(outputs, [outputs], message="outputs", summarize=100) # outputs = tf.Print(outputs, [tf.reduce_max(outputs)], message="outputs", summarize=100) outputs = tf.verify_tensor_all_finite( outputs, "ERR: Tensor not finite - outputs", name="outputs" ) num_partitions = 2 res_out = tf.dynamic_partition(outputs, partitions_ph, num_partitions)[1] # res_out = tf.Print(res_out, [res_out], message="res_out") # tf.where(tf.is_nan(res_out), tf.zeros_like(res_out), res_out) # res_out = tf.Print(res_out, [res_out], message="res_out", summarize=100) # res_out = tf.Print(res_out, [tf.reduce_max(res_out)], message="res_out", summarize=100) rnn_x = tf.matmul(res_out, W_lstm) + b_lstm self.variable_summaries(rnn_x, "lstm") rnn_x = tf.verify_tensor_all_finite( rnn_x, "ERR: Tensor not finite - fc1", name="fc1" ) # prompts_ph = tf.reshape(prompts_ph, [-1, 1]) x_tensor = rnn_x # tf.concat([rnn_x, prompts_ph], 1) rnn_x = tf.matmul(x_tensor, W_fc) + b_fc self.variable_summaries(rnn_x, "fc") rnn_x = tf.verify_tensor_all_finite( rnn_x, "ERR: Tensor not finite - fc2", name="fc2" ) return rnn_x
def testLinearThompsonSamplingUpdateWithForgetting(self, batch_size, context_dim, dtype): """Check forgetting agent updates for specified actions and rewards.""" gamma = 0.9 # Construct a `Trajectory` for the given action, observation, reward. num_actions = 5 initial_step, final_step = _get_initial_and_final_steps( batch_size, context_dim) action = np.random.randint(num_actions, size=batch_size, dtype=np.int32) action_step = _get_action_step(action) experience = _get_experience(initial_step, action_step, final_step) # Construct an agent and perform the update. Record initial and final # weights. observation_spec = tensor_spec.TensorSpec([context_dim], tf.float32) time_step_spec = time_step.time_step_spec(observation_spec) action_spec = tensor_spec.BoundedTensorSpec(dtype=tf.int32, shape=(), minimum=0, maximum=num_actions - 1) agent = lin_ts_agent.LinearThompsonSamplingAgent( time_step_spec=time_step_spec, action_spec=action_spec, gamma=gamma, dtype=dtype) self.evaluate(tf.compat.v1.global_variables_initializer()) initial_weight_covariances = self.evaluate(agent._weight_covariances) initial_parameter_estimators = self.evaluate( agent._parameter_estimators) loss_info = agent.train(experience) self.evaluate(loss_info) final_weight_covariances = self.evaluate(agent.weight_covariances) final_parameter_estimators = self.evaluate(agent.parameter_estimators) # Compute the expected updates. observations_list = tf.dynamic_partition( data=tf.reshape(experience.observation, [batch_size, context_dim]), partitions=tf.convert_to_tensor(action), num_partitions=num_actions) rewards_list = tf.dynamic_partition( data=tf.reshape(experience.reward, [batch_size]), partitions=tf.convert_to_tensor(action), num_partitions=num_actions) expected_weight_covariances_update = [] expected_parameter_estimators_update = [] for k, (observations_for_arm, rewards_for_arm) in enumerate( zip(observations_list, rewards_list)): expected_weight_covariances_update.append( self.evaluate(gamma * initial_weight_covariances[k] + tf.matmul(observations_for_arm, observations_for_arm, transpose_a=True))) expected_parameter_estimators_update.append( self.evaluate(gamma * initial_parameter_estimators[k] + bandit_utils.sum_reward_weighted_observations( rewards_for_arm, observations_for_arm))) self.assertAllClose(expected_weight_covariances_update, final_weight_covariances) self.assertAllClose(expected_parameter_estimators_update, final_parameter_estimators)
def update_centroids(samples, nearest_indices, n_clusters): # Updates the centroid to be the mean of all samples associated with it. nearest_indices = tf.to_int32(nearest_indices) partitions = tf.dynamic_partition(samples, nearest_indices, n_clusters) new_centroids = tf.concat(0, [tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitions]) return new_centroids
# word_embedding_uniform = tf.concat([word_embedding_0, word_embedding_1], axis=0) fact_description = tf.nn.embedding_lookup(word_embedding, fact_input) law_description = tf.nn.embedding_lookup(word_embedding, law_input) max_graph = len(graph_list_1) deg_list = [len(neigh_index[i]) for i in range(n_law)] graph_list = list(zip(*graph_membership))[1] gold_matrix_law = tf.one_hot(law_labels, 118, dtype=tf.float32) gold_matrix_accu = tf.one_hot(accu_labels, 130, dtype=tf.float32) gold_matrix_time = tf.one_hot(time_labels, 12, dtype=tf.float32) #############----------------------################### graph_label = tf.dynamic_partition( tf.transpose(gold_matrix_law, [1, 0]), graph_list, max_graph) # size: [batch_size, graph_num, N_each_graph]) label = [] for i in range(max_graph): label.append(tf.reduce_sum(graph_label[i], 0, keepdims=True)) graph_label = tf.transpose(tf.concat(label, 0), [1, 0]) # size: [batch_size, graph_num] #############----------------------################### neigh_index = sorted(neigh_index.items(), key=lambda x: len(x[1])) max_deg = len(neigh_index[-1][1]) t = 0 adj_list = [[]] for i in range(n_law): each = neigh_index[i]
def model_fn(features, labels, mode, params): """Model function for PASCAL VOC.""" if isinstance(features, dict): features = features['data'] images = features network = deeplab_v3_generator(params['num_classes'], _OUTPUT_STRIDE, _BASE_ARCHITECTURE, None, _BATCH_NORM_DECAY) logits = network(features, mode == tf.estimator.ModeKeys.TRAIN) pred_classes = tf.expand_dims(tf.argmax(logits, axis=3, output_type=tf.int32), axis=3) pred_decoded_labels = tf.cast(pred_classes, tf.uint8) predictions = { 'classes': pred_classes, 'probabilities': tf.nn.softmax(logits, name='softmax_tensor'), 'decoded_labels': pred_decoded_labels } if mode == tf.estimator.ModeKeys.PREDICT: # Delete 'decoded_labels' from predictions because custom functions produce error when used with saved_model predictions_without_decoded_labels = predictions.copy() del predictions_without_decoded_labels['decoded_labels'] return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs={ 'preds': tf.estimator.export.PredictOutput( predictions_without_decoded_labels) }) gt_decoded_labels = tf.cast(labels, tf.uint8) labels = tf.squeeze(labels, axis=3) # reduce the channel dimension. logits_by_num_classes = tf.reshape(logits, [-1, params['num_classes']]) labels_flat = tf.reshape(labels, [-1, ]) valid_indices = tf.to_int32(labels_flat <= params['num_classes'] - 1) valid_logits = tf.dynamic_partition(logits_by_num_classes, valid_indices, num_partitions=2)[1] valid_labels = tf.dynamic_partition(labels_flat, valid_indices, num_partitions=2)[1] preds_flat = tf.reshape(pred_classes, [-1, ]) valid_preds = tf.dynamic_partition(preds_flat, valid_indices, num_partitions=2)[1] confusion_matrix = tf.confusion_matrix(valid_labels, valid_preds, num_classes=params['num_classes']) predictions['valid_preds'] = valid_preds predictions['valid_labels'] = valid_labels predictions['confusion_matrix'] = confusion_matrix cross_entropy = tf.losses.sparse_softmax_cross_entropy(logits=valid_logits, labels=tf.cast(valid_labels,tf.int32)) # Create a tensor named cross_entropy for logging purposes. tf.identity(cross_entropy, name='cross_entropy') tf.summary.scalar('cross_entropy', cross_entropy) if not _FREEZE_BATCH_NORM: train_var_list = [v for v in tf.trainable_variables()] else: train_var_list = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name] # Add weight decay to the loss. with tf.variable_scope("total_loss"): loss = cross_entropy + params.get('weight_decay', _WEIGHT_DECAY) * tf.add_n([tf.nn.l2_loss(v) for v in train_var_list]) # loss = tf.losses.get_total_loss() # obtain the regularization losses as well if mode == tf.estimator.ModeKeys.TRAIN: rgb=images[:,:,:,0:3] rgb_norm=((rgb-tf.reduce_min(rgb))/tf.reduce_max(rgb))*255 ir=tf.expand_dims(images[:,:,:,3],-1)*255 tf.summary.image('images', rgb_norm,max_outputs=params['tensorboard_images_max_outputs']) tf.summary.image('ir_near', ir, max_outputs=params['tensorboard_images_max_outputs']) tf.summary.image('labels', gt_decoded_labels*255, max_outputs=params['tensorboard_images_max_outputs']) tf.summary.image('output', pred_decoded_labels*255, max_outputs=params['tensorboard_images_max_outputs']) # tf.summary.image('images', # tf.concat(axis=2, values=[images, gt_decoded_labels, pred_decoded_labels]), # max_outputs=params['tensorboard_images_max_outputs']) # Concatenate row-wise. global_step = tf.train.get_or_create_global_step() #if _LEARNING_RATE_POLICY == 'piecewise': # # Scale the learning rate linearly with the batch size. When the batch size # # is 128, the learning rate should be 0.1. # initial_learning_rate = 0.1 * params['batch_size'] / 128 # batches_per_epoch = params['num_train'] / params['batch_size'] # # Multiply the learning rate by 0.1 at 100, 150, and 200 epochs. # boundaries = [int(batches_per_epoch * epoch) for epoch in [100, 150, 200]] # values = [initial_learning_rate * decay for decay in [1, 0.1, 0.01, 0.001]] # learning_rate = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), boundaries, values) #elif _LEARNING_RATE_POLICY == 'poly': # learning_rate = tf.train.polynomial_decay( # _INITIAL_LEARNING_RATE, # tf.cast(global_step, tf.int32) - _INITIAL_GLOBAL_STEP, # _MAX_ITER, _END_LEARNING_RATE, power=_POWER # ) #else: # raise ValueError('Learning rate policy must be "piecewise" or "poly"') learning_rate=params['learning_rate'] # Create a tensor named learning_rate for logging purposes tf.identity(learning_rate, name='learning_rate') tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=_MOMENTUM) # Batch norm requires update ops to be added as a dependency to the train_op update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step, var_list=train_var_list) else: train_op = None accuracy = tf.metrics.accuracy(valid_labels, valid_preds) mean_iou = tf.metrics.mean_iou(valid_labels, valid_preds, params['num_classes']) metrics = {'px_accuracy': accuracy, 'mean_iou': mean_iou} # Create a tensor named train_accuracy for logging purposes tf.identity(accuracy[1], name='train_px_accuracy') tf.summary.scalar('train_px_accuracy', accuracy[1]) def compute_mean_iou(total_cm, name='mean_iou'): """Compute the mean intersection-over-union via the confusion matrix.""" sum_over_row = tf.to_float(tf.reduce_sum(total_cm, 0)) sum_over_col = tf.to_float(tf.reduce_sum(total_cm, 1)) cm_diag = tf.to_float(tf.diag_part(total_cm)) denominator = sum_over_row + sum_over_col - cm_diag # The mean is only computed over classes that appear in the # label or prediction tensor. If the denominator is 0, we need to # ignore the class. num_valid_entries = tf.reduce_sum(tf.cast( tf.not_equal(denominator, 0), dtype=tf.float32)) # If the value of the denominator is 0, set it to 1 to avoid # zero division. denominator = tf.where( tf.greater(denominator, 0), denominator, tf.ones_like(denominator)) iou = tf.div(cm_diag, denominator) for i in range(params['num_classes']): tf.identity(iou[i], name='train_iou_class{}'.format(i)) tf.summary.scalar('train_iou_class{}'.format(i), iou[i]) # If the number of valid entries is 0 (no classes) we return 0. result = tf.where( tf.greater(num_valid_entries, 0), tf.reduce_sum(iou, name=name) / num_valid_entries, 0) return result train_mean_iou = compute_mean_iou(mean_iou[1]) tf.identity(train_mean_iou, name='train_mean_iou') tf.summary.scalar('train_mean_iou', train_mean_iou) return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=metrics )
def create_ppo_optimizer(self, probs, old_probs, value, entropy, beta, epsilon, lr, max_step): """ Creates training-specific Tensorflow ops for PPO models. :param probs: Current policy probabilities :param old_probs: Past policy probabilities :param value: Current value estimate :param beta: Entropy regularization strength :param entropy: Current policy entropy :param epsilon: Value for policy-divergence threshold :param lr: Learning rate :param max_step: Total number of training steps. """ self.returns_holder = tf.placeholder(shape=[None], dtype=tf.float32, name='discounted_rewards') self.advantage = tf.placeholder(shape=[None, 1], dtype=tf.float32, name='advantages') self.learning_rate = tf.train.polynomial_decay(lr, self.global_step, max_step, 1e-10, power=1.0) self.old_value = tf.placeholder(shape=[None], dtype=tf.float32, name='old_value_estimates') decay_epsilon = tf.train.polynomial_decay(epsilon, self.global_step, max_step, 0.1, power=1.0) decay_beta = tf.train.polynomial_decay(beta, self.global_step, max_step, 1e-5, power=1.0) optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) clipped_value_estimate = self.old_value + tf.clip_by_value( tf.reduce_sum(value, axis=1) - self.old_value, -decay_epsilon, decay_epsilon) v_opt_a = tf.squared_difference(self.returns_holder, tf.reduce_sum(value, axis=1)) v_opt_b = tf.squared_difference(self.returns_holder, clipped_value_estimate) self.value_loss = tf.reduce_mean( tf.dynamic_partition(tf.maximum(v_opt_a, v_opt_b), self.mask, 2)[1]) # Here we calculate PPO policy loss. In continuous control this is done independently for each action gaussian # and then averaged together. This provides significantly better performance than treating the probability # as an average of probabilities, or as a joint probability. r_theta = tf.exp(probs - old_probs) p_opt_a = r_theta * self.advantage p_opt_b = tf.clip_by_value(r_theta, 1.0 - decay_epsilon, 1.0 + decay_epsilon) * self.advantage self.policy_loss = -tf.reduce_mean( tf.dynamic_partition(tf.minimum(p_opt_a, p_opt_b), self.mask, 2)[1]) self.loss = self.policy_loss + 0.5 * self.value_loss - decay_beta * tf.reduce_mean( tf.dynamic_partition(entropy, self.mask, 2)[1]) if self.use_curiosity: self.loss += 10 * (0.2 * self.forward_loss + 0.8 * self.inverse_loss) self.update_batch = optimizer.minimize(self.loss)
def one_dimensional_calibration_layer(uncalibrated_tensor, num_keypoints, signal_name, keypoints_initializers=None, keypoints_initializer_fns=None, bound=False, monotonic=None, missing_input_value=None, missing_output_value=None, **regularizer_amounts): """Creates a calibration layer for one single continuous signal. Returns a calibrated tensor of the uncalibrated continuous signal and a list of projections ops. Args: uncalibrated_tensor: Tensor of shape [batch_size] of one single signal. num_keypoints: Number of keypoints to use. signal_name: (Required) Used as a suffix to the variable names. keypoints_initializers: For evaluation or inference (or when resuming training from a checkpoint) the values will be loaded from disk, so they don't need to be given -- but in this case num_keypoints need to be accurate. Two tensors of shape [num_keypoints]. See load_keypoints_from_quantiles or uniform_keypoints_for_signal on how to generate these (module keypoints_initialization). keypoints_initializer_fns: Like keypoints_initializers but using lambda initializers. They should be compatible with tf.compat.v1.get_variable. If this is set, then keypoints_initializers must be None. bound: boolean whether output of calibration must be bound. Alternatively a dict mapping feature name to boundness. monotonic: whether calibration has to be kept monotonic: None or 0 means no monotonicity. Positive or negative values mean increasing or decreasing monotonicity respectively. Alternatively a dict mapping feature name to monotonic. missing_input_value: If set, and if the input has this value it is assumed to be missing and the output will either be calibrated to some value between `[calibration_output_min, calibration_output_max]` or set to a fixed value set by missing_output_value. Limitation: it only works for scalars. missing_output_value: Requires missing_input_value also to be set. If set if will convert missing input to this value. **regularizer_amounts: Keyword args of regularization amounts passed to regularizers.calibrator_regularization(). Keyword names should be among supported regularizers.CALIBRATOR_REGULARIZERS and values should be float. Returns: A tuple of: * calibrated tensor of shape [batchsize] * None or projection ops, that must be applied at each step (or every so many steps) to project the model to a feasible space: used for bounding the outputs or for imposing monotonicity. * None of a regularization loss, if regularization is configured. Raises: ValueError: if dtypes are incompatible. ValueError: if keypoints_initializers and keypoints_initializer_fns are both set. """ if (keypoints_initializers is not None and keypoints_initializer_fns is not None): raise ValueError( 'keypoints_initializers and keypoints_initializer_fns ' 'cannot both be set.') with tf.compat.v1.variable_scope('pwl_calibration'): # Sanity checks. if uncalibrated_tensor.get_shape().ndims != 1: raise ValueError( 'one_dimensional_calibration_layer can only be used for a single ' 'signal, so uncalibrated shape must be of form (batchsize), got %s' % uncalibrated_tensor.get_shape()) if missing_output_value is not None and missing_input_value is None: raise ValueError( 'missing_output_value can only be set if a misisng_input_value is ' 'also set, missing_input_value=None, missing_output_values=%s' % missing_output_value) # Create variables: only uses initializer if they are given. kp_in_name = signal_name + '_keypoints_inputs' kp_out_name = signal_name + '_keypoints_outputs' missing_out_calibrated_name = signal_name + '_calibrated_missing_output' if keypoints_initializers is not None: kp_in, kp_out = keypoints_initializers[0], keypoints_initializers[ 1] if (uncalibrated_tensor.dtype != kp_in.dtype or uncalibrated_tensor.dtype != kp_out.dtype): raise ValueError( 'incompatible types for signal \'%s\': uncalibrated=%s, ' 'keypoints_initializers[input=%s, output=%s]' % (signal_name, uncalibrated_tensor.dtype, kp_in.dtype, kp_out.dtype)) tools.assert_shape(kp_in, [num_keypoints], 'keypoints_initializers[input]') tools.assert_shape(kp_out, [num_keypoints], 'keypoints_initializers[output]') keypoints_inputs = tf.compat.v1.get_variable(kp_in_name, initializer=kp_in) keypoints_outputs = tf.compat.v1.get_variable(kp_out_name, initializer=kp_out) if missing_input_value is not None: # Value to be taken by missing features. if missing_output_value is not None: missing_out_calibrated = tf.constant( missing_output_value, dtype=uncalibrated_tensor.dtype) else: # Learned missing value, initialized by the first value of kp_out. missing_out_calibrated = tf.compat.v1.get_variable( missing_out_calibrated_name, initializer=kp_out[0]) elif keypoints_initializer_fns is not None: kp_in, kp_out = keypoints_initializer_fns[ 0], keypoints_initializer_fns[1] keypoints_inputs = tf.compat.v1.get_variable(kp_in_name, shape=[num_keypoints], initializer=kp_in) keypoints_outputs = tf.compat.v1.get_variable( kp_out_name, shape=[num_keypoints], initializer=kp_out) if missing_input_value is not None: # Value to be taken by missing features. if missing_output_value is not None: missing_out_calibrated = tf.constant( missing_output_value, dtype=uncalibrated_tensor.dtype) else: # Learned missing value, initialized by the first value of kp_out. def first_kp_out(*args, **kwargs): return kp_out(*args, **kwargs)[0] missing_out_calibrated = tf.compat.v1.get_variable( missing_out_calibrated_name, shape=[], initializer=first_kp_out) else: # When loading a model, no initializer. keypoints_inputs = tf.compat.v1.get_variable( kp_in_name, shape=[num_keypoints], dtype=uncalibrated_tensor.dtype) keypoints_outputs = tf.compat.v1.get_variable( kp_out_name, shape=[num_keypoints], dtype=uncalibrated_tensor.dtype) if missing_input_value is not None: if missing_output_value is not None: missing_out_calibrated = tf.constant( missing_output_value, dtype=uncalibrated_tensor.dtype) else: missing_out_calibrated = tf.compat.v1.get_variable( missing_out_calibrated_name, shape=[], dtype=uncalibrated_tensor.dtype) # Split missing values from normal values. # FutureWork: move handling of missing values be moved to C++ land. if missing_input_value is not None: missing_mask = tf.equal(uncalibrated_tensor, tf.constant(missing_input_value)) mask_indices = tf.range(tf.shape(uncalibrated_tensor)[0]) mask_indices = tf.dynamic_partition( mask_indices, tf.cast(missing_mask, tf.int32), 2) (uncalibrated_tensor, missing_values) = tf.dynamic_partition( uncalibrated_tensor, tf.cast(missing_mask, tf.int32), 2) # Assign value to missing_values. missing_values = tf.ones_like(missing_values) missing_values *= missing_out_calibrated # Dense implementation. interpolation = pwl_calibration_ops.pwl_indexing_calibrator( uncalibrated_tensor, keypoints_inputs) calibrated = tf.reduce_sum(interpolation * keypoints_outputs, 1) projection_ops = None # Re-join missing values. if missing_input_value is not None: calibrated = tf.dynamic_stitch(mask_indices, [calibrated, missing_values]) # Boundness. projected_keypoints_outputs = None if bound: bound_min_name = signal_name + '_bound_min' bound_max_name = signal_name + '_bound_max' # Set bound_min/max from min/max values initialized. if keypoints_initializers is not None: # Store bound_min and bound_max in variables because their values (from # kp_out) are only available during train (when keypoints_initializers # is available). During inference the value is not available. Storing # them in variables make them available during inference. bound_min = tf.compat.v1.get_variable( bound_min_name, dtype=uncalibrated_tensor.dtype, initializer=tf.reduce_min(kp_out)) bound_max = tf.compat.v1.get_variable( bound_max_name, dtype=uncalibrated_tensor.dtype, initializer=tf.reduce_max(kp_out)) elif keypoints_initializer_fns is not None: # Store bound_min and bound_max in variables because their values (from # kp_out) are only available during train (when keypoints_initializers # is available). During inference the value is not available. Storing # them in variables make them available during inference. def min_kp_out(*args, **kwargs): return tf.reduce_min(kp_out(*args, **kwargs)) def max_kp_out(*args, **kwargs): return tf.reduce_max(kp_out(*args, **kwargs)) bound_min = tf.compat.v1.get_variable( bound_min_name, dtype=uncalibrated_tensor.dtype, shape=[], initializer=min_kp_out) bound_max = tf.compat.v1.get_variable( bound_max_name, dtype=uncalibrated_tensor.dtype, shape=[], initializer=max_kp_out) else: # No need to initialize, since presumably their values will be read # from some checkpoint. bound_min = tf.compat.v1.get_variable( bound_min_name, dtype=uncalibrated_tensor.dtype, shape=[]) bound_max = tf.compat.v1.get_variable( bound_max_name, dtype=uncalibrated_tensor.dtype, shape=[]) projected_keypoints_outputs = tf.minimum( tf.maximum(keypoints_outputs, bound_min), bound_max) # Monotonicity. if monotonic: # First a soft-enforcement: might not break indirect constraints. if projected_keypoints_outputs is None: projected_keypoints_outputs = keypoints_outputs projected_keypoints_outputs = pwl_calibration_ops.monotonic_projection( increasing=bool(monotonic > 0), values=projected_keypoints_outputs, name='project_calibration_to_monotonic') # Make assing_add op to projected output. if projected_keypoints_outputs is not None: constrained_diff = projected_keypoints_outputs - keypoints_outputs projection_ops = tf.compat.v1.assign_add(keypoints_outputs, constrained_diff, use_locking=None, name='project_feasible') if (bound and missing_input_value is not None and missing_output_value is None): # Include op bounding calibrated missing value. projected_missing_out_calibrated = tf.minimum( tf.maximum(missing_out_calibrated, bound_min), bound_max) projected_missing_out_calibrated_diff = ( projected_missing_out_calibrated - missing_out_calibrated) projected_missing_out_calibrated_op = tf.compat.v1.assign_add( missing_out_calibrated, projected_missing_out_calibrated_diff, use_locking=None, name='project_missing_calibration_to_bounds') projection_ops = tf.group(projection_ops, projected_missing_out_calibrated_op) # Regularization regularization = regularizers.calibrator_regularization( keypoints_outputs, name=signal_name + '_calibrator_regularization', **regularizer_amounts) return calibrated, projection_ops, regularization
def build(self): self.global_step = tf.Variable(0, name='global_step', trainable=False) self.softmax_temperature = tf.maximum( \ self.config.max_temperature-tf.cast(tf.divide(self.global_step, tf.constant(self.config.linear_steps)), dtype=tf.float32), \ self.config.min_temperature) with tf.name_scope('t_variables'): self.sample = self.t_variables['sample'] self.batch_l = self.t_variables['batch_l'] self.doc_l = self.t_variables['doc_l'] self.sent_l = self.t_variables['sent_l'] self.dec_sent_l = self.t_variables[ 'dec_sent_l'] # batch_l x max_doc_l self.max_doc_l = tf.reduce_max(self.doc_l) self.max_sent_l = tf.reduce_max(self.sent_l) self.max_dec_sent_l = tf.reduce_max( self.dec_sent_l) # = max_sent_l + 1 self.mask_doc = tf.sequence_mask(self.doc_l, dtype=tf.float32) self.mask_sent = tf.sequence_mask(self.sent_l, dtype=tf.float32) mask_bow = np.zeros(self.config.n_vocab) mask_bow[self.config.bow_idxs] = 1. self.mask_bow = tf.constant(mask_bow, dtype=tf.float32) self.enc_keep_prob = self.t_variables['enc_keep_prob'] # ------------------------------Encoder ------------------------------ with tf.variable_scope('emb'): with tf.variable_scope('word', reuse=False): pad_embedding = tf.zeros([1, self.config.dim_emb], dtype=tf.float32) nonpad_embeddings = tf.get_variable('emb', [self.config.n_vocab-1, self.config.dim_emb], dtype=tf.float32, \ initializer=tf.contrib.layers.xavier_initializer()) self.embeddings = tf.concat([pad_embedding, nonpad_embeddings], 0) # n_vocab x dim_emb self.bow_embeddings = tf.nn.embedding_lookup( self.embeddings, self.config.bow_idxs) # dim_bow x dim_emb # get sentence embeddings self.enc_input_idxs = tf.one_hot( self.t_variables['enc_input_idxs'], depth=self.config.n_vocab ) # batch_l x max_doc_l x max_sent_l x n_vocab self.enc_inputs = tf.tensordot( self.enc_input_idxs, self.embeddings, axes=[[-1], [0]]) # batch_l x max_doc_l x max_sent_l x dim_emb with tf.variable_scope('sent', reuse=False): self.sent_outputs, self.sent_state = \ encode_inputs(self, enc_inputs=self.enc_inputs, sent_l=self.sent_l) # batch_l x max_doc_l x dim_hidden*2 with tf.variable_scope('enc'): # get sentence latents with tf.variable_scope('latents_sent', reuse=False): self.w_topic_posterior = tf.get_variable( 'topic_posterior/kernel', [ self.config.n_topic, self.sent_state.shape[-1], self.config.dim_hidden ], dtype=tf.float32) self.b_topic_posterior = tf.get_variable( 'topic_posterior/bias', [1, self.config.n_topic, self.config.dim_hidden], dtype=tf.float32) self.topic_state = tf.reduce_sum( self.sent_state * tf.expand_dims(self.mask_doc, -1), -2) / tf.reduce_sum(self.mask_doc, -1, keepdims=True) self.hidden_topic_posterior = tf.tensordot( self.topic_state, self.w_topic_posterior, axes=[[1], [1]] ) + self.b_topic_posterior # batch_l x n_topic x dim_hidden # ------------------------------Discriminator------------------------------ with tf.variable_scope('disc'): with tf.variable_scope('prob_topic', reuse=False): # encode by TSNTM self.probs_sent_topic_posterior, _, _ = \ encode_gsm_probs_topic_posterior(self, self.hidden_topic_posterior.get_shape()[-1], self.hidden_topic_posterior, self.mask_doc, self.config) # batch_l x max_doc_l x n_topic with tf.name_scope('latents_topic'): # get topic sentence posterior distribution for each document self.probs_topic_posterior = tf.reduce_sum( self.probs_sent_topic_posterior, 1) # batch_l x n_topic self.means_sent_topic_posterior = tf.multiply(tf.expand_dims(self.probs_sent_topic_posterior, -1), \ tf.expand_dims(self.means_sent_posterior, -2)) # batch_l x max_doc_l x n_topic x dim_latent self.means_topic_posterior_ = tf.reduce_sum(self.means_sent_topic_posterior, 1) / \ tf.expand_dims(self.probs_topic_posterior, -1) # batch_l x n_topic x dim_latent self.means_topic_posterior = tf_clip_means( self.means_topic_posterior_, self.probs_topic_posterior) diffs_sent_topic_posterior = tf.expand_dims(self.means_sent_posterior, 2) - \ tf.expand_dims(self.means_topic_posterior, 1) # batch_l x max_doc_l x n_topic x dim_latent self.covs_sent_topic_posterior = tf.multiply(tf.expand_dims(tf.expand_dims(self.probs_sent_topic_posterior, -1), -1), \ tf.matrix_diag(tf.expand_dims(tf.exp(self.logvars_sent_posterior), 2)) + tf.matmul(tf.expand_dims(diffs_sent_topic_posterior, -1), \ tf.expand_dims(diffs_sent_topic_posterior, -2))) # batch_l x max_doc_l x n_topic x dim_latent x dim_latent self.covs_topic_posterior_ = tf.reduce_sum(self.covs_sent_topic_posterior, 1) / \ tf.expand_dims(tf.expand_dims(self.probs_topic_posterior, -1), -1) # batch_l x n_topic x dim_latent x dim_latent self.covs_topic_posterior = tf_clip_covs( self.covs_topic_posterior_, self.probs_topic_posterior) self.latents_topic_posterior = sample_latents_fullcov(self.means_topic_posterior, self.covs_topic_posterior, \ seed=self.config.seed, sample=self.sample) self.means_topic_prior = tf.zeros( [ self.batch_l, self.config.n_topic, self.config.dim_latent ], dtype=tf.float32) # batch_l x n_topic x dim_latent self.covs_topic_prior = tf.eye( self.config.dim_latent, batch_shape=[self.batch_l, self.config.n_topic], dtype=tf.float32) * self.config.cov_root # ------------------------------Decoder---------------------------------- with tf.variable_scope('dec'): # decode for training sent with tf.variable_scope( 'sent', initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32, reuse=False): self.dec_cell = tf.contrib.rnn.GRUCell(self.config.dim_hidden) self.dec_cell = tf.contrib.rnn.DropoutWrapper( self.dec_cell, output_keep_prob=self.t_variables['dec_keep_prob']) self.dec_sent_cell = self.dec_cell self.latent_hidden_layer = tf.layers.Dense( units=self.config.dim_hidden, activation=tf.nn.relu, name='latent_hidden_linear') self.dec_sent_initial_state = self.latent_hidden_layer( self.latents_sent_posterior ) # batch_l x max_doc_l x dim_hidden self.output_layer = tf.layers.Dense(self.config.n_vocab, use_bias=False, name='out') if self.config.attention: self.sent_outputs_flat = tf.reshape( self.sent_outputs, [ self.batch_l * self.max_doc_l, self.max_sent_l, self.config.dim_hidden * 2 ]) self.att_sent_l_flat = tf.reshape( tf.maximum(self.sent_l, 1), [self.batch_l * self.max_doc_l]) self.att_sent_mechanism = tf.contrib.seq2seq.LuongAttention(num_units=self.config.dim_hidden, memory=self.sent_outputs_flat, \ memory_sequence_length=self.att_sent_l_flat) self.att_cell = tf.contrib.seq2seq.AttentionWrapper( self.dec_cell, attention_mechanism=self.att_sent_mechanism, attention_layer_size=self.config.dim_hidden) self.dec_sent_cell = self.att_cell # teacher forcing self.dec_input_idxs = self.t_variables[ 'dec_input_idxs'] # batch_l x max_doc_l x max_dec_sent_l self.dec_inputs = tf.nn.embedding_lookup( self.embeddings, self.dec_input_idxs ) # batch_l x max_doc_l x max_dec_sent_l x dim_emb # output_sent_l == dec_sent_l self.output_logits_flat, self.output_sent_l_flat = decode_output_logits_flat( self, dec_cell=self.dec_sent_cell, dec_initial_state=self.dec_sent_initial_state, dec_inputs=self.dec_inputs, dec_sent_l=self.dec_sent_l, latents_input=self.latents_sent_posterior ) # batch_l*max_doc_l x max_output_sent_l x n_vocab self.output_sent_l = tf.reshape(self.output_sent_l_flat, [self.batch_l, self.max_doc_l]) self.max_output_sent_l = tf.reduce_max(self.output_sent_l) self.output_logits = tf.reshape(self.output_logits_flat, \ [self.batch_l, self.max_doc_l, self.max_output_sent_l, self.config.n_vocab], name='output_logits') if self.config.disc_gumbel: self.output_input_idxs = sample_gumbels( self.output_logits, self.softmax_temperature, self.config.seed, self.sample ) # batch_l x max_doc_l x max_output_sent_l x n_vocab else: self.output_input_idxs = self.output_logits # decode for training topic probs with tf.variable_scope( 'sent', initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32, reuse=True): self.dec_topic_cell = self.dec_cell if self.config.attention: self.topic_outputs_flat = tf.contrib.seq2seq.tile_batch(tf.reshape(self.sent_outputs, \ [self.batch_l, self.max_doc_l*self.max_sent_l, self.sent_outputs.get_shape()[-1]]), \ multiplier=self.config.n_topic) # batch_l*n_topic x max_doc_l*max_sent_l x dim_hidden*2 self.score_mask = tf.contrib.seq2seq.tile_batch(tf.reshape(tf.sequence_mask(self.sent_l), \ [self.batch_l, self.max_doc_l*self.max_sent_l]), multiplier=self.config.n_topic) # batch_l*n_topic x max_doc_l*max_sent_l self.hier_score = tf.reshape(tf.transpose(self.probs_sent_topic_posterior, [0, 2, 1]), \ [self.batch_l*self.config.n_topic, self.max_doc_l]) # batch_l*n_topic x max_doc_l self.att_topic_mechanism = HierarchicalAttention( num_units=self.config.dim_hidden, memory=self.topic_outputs_flat, score_mask=self.score_mask, hier_score=self.hier_score) self.att_topic_cell = AttentionWrapper( self.dec_cell, attention_mechanism=self.att_topic_mechanism, attention_layer_size=self.config.dim_hidden) self.dec_topic_cell = self.att_topic_cell if not self.config.disc_mean: self.dec_topic_initial_state = self.latent_hidden_layer( self.latents_topic_posterior) dec_topic_outputs, self.summary_sent_l_flat = decode_output_sample_flat( self, dec_cell=self.dec_topic_cell, dec_initial_state=self.dec_topic_initial_state, softmax_temperature=self.softmax_temperature, sample=self.sample, latents_input=self.latents_topic_posterior ) # batch_l*max_doc_l x max_summary_sent_l x n_vocab else: self.dec_topic_initial_state = self.latent_hidden_layer( self.means_topic_posterior) dec_topic_outputs, self.summary_sent_l_flat = decode_output_sample_flat( self, dec_cell=self.dec_topic_cell, dec_initial_state=self.dec_topic_initial_state, softmax_temperature=self.softmax_temperature, sample=self.sample, latents_input=self.means_topic_posterior ) # batch_l*max_doc_l x max_summary_sent_l x n_vocab self.summary_sent_l = tf.reshape( self.summary_sent_l_flat, [self.batch_l, self.config.n_topic]) self.max_summary_sent_l = tf.reduce_max(self.summary_sent_l) if self.config.disc_gumbel: summary_input_idxs_flat = dec_topic_outputs.sample_id else: summary_input_idxs_flat = dec_topic_outputs.rnn_output self.summary_input_idxs = tf.reshape(summary_input_idxs_flat, \ [self.batch_l, self.config.n_topic, self.max_summary_sent_l, self.config.n_vocab], name='summary_input_idxs') # re-encode topic sentence outputs self.summary_inputs = tf.tensordot( self.summary_input_idxs, self.embeddings, axes=[[-1], [ 0 ]]) # batch_l x n_topic x max_summary_sent_l x dim_emb self.summary_input_sent_l = self.summary_sent_l - 1 # to remove EOS self.mask_summary_sent = tf.sequence_mask(self.summary_input_sent_l, \ maxlen=self.max_summary_sent_l, dtype=tf.float32) # batch_l x n_topic x max_summary_sent_l self.mask_summary_doc = tf.ones( [self.batch_l, self.config.n_topic], dtype=tf.float32) # beam decode for inference of original sentences with tf.variable_scope( 'sent', initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32, reuse=True): self.beam_dec_sent_cell = self.dec_cell if self.config.attention: self.beam_sent_outputs_flat = tf.contrib.seq2seq.tile_batch( self.sent_outputs_flat, multiplier=self.config.beam_width) self.beam_att_sent_l_flat = tf.contrib.seq2seq.tile_batch( self.att_sent_l_flat, multiplier=self.config.beam_width) self.beam_att_sent_mechanism = tf.contrib.seq2seq.LuongAttention( num_units=self.config.dim_hidden, memory=self.beam_sent_outputs_flat, memory_sequence_length=self.beam_att_sent_l_flat) self.beam_dec_sent_cell = tf.contrib.seq2seq.AttentionWrapper( self.beam_dec_sent_cell, attention_mechanism=self.beam_att_sent_mechanism, attention_layer_size=self.config.dim_hidden) # infer original sentences self.beam_output_idxs, _, _ = decode_beam_output_token_idxs( self, beam_dec_cell=self.beam_dec_sent_cell, dec_initial_state=self.dec_sent_initial_state, latents_input=self.means_sent_posterior, name='beam_output_idxs') # beam decode for inference of topic sentences with tf.variable_scope( 'sent', initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32, reuse=True): self.beam_dec_topic_cell = self.dec_cell if self.config.attention: self.beam_topic_outputs_flat = tf.contrib.seq2seq.tile_batch( self.topic_outputs_flat, multiplier=self.config.beam_width) self.beam_score_mask = tf.contrib.seq2seq.tile_batch( self.score_mask, multiplier=self.config.beam_width) self.beam_hier_score = tf.contrib.seq2seq.tile_batch( self.hier_score, multiplier=self.config.beam_width) self.beam_att_topic_mechanism = HierarchicalAttention( num_units=self.config.dim_hidden, memory=self.beam_topic_outputs_flat, score_mask=self.beam_score_mask, hier_score=self.beam_hier_score) self.beam_dec_topic_cell = AttentionWrapper( self.beam_dec_topic_cell, attention_mechanism=self.beam_att_topic_mechanism, attention_layer_size=self.config.dim_hidden) # infer topic sentences self.beam_summary_idxs, _, _ = decode_beam_output_token_idxs( self, beam_dec_cell=self.beam_dec_topic_cell, dec_initial_state=self.dec_topic_initial_state, latents_input=self.latents_topic_posterior, name='beam_summary_idxs') self.beam_mask_summary_sent = tf.logical_not(tf.equal(self.beam_summary_idxs, \ self.config.EOS_IDX)) # batch_l x n_topic x max_summary_sent_l self.beam_summary_input_sent_l = tf.reduce_sum( tf.cast(self.beam_mask_summary_sent, tf.int32), -1) # batch_l x n_topic beam_summary_soft_idxs = tf.one_hot(tf.where(self.beam_mask_summary_sent, \ self.beam_summary_idxs, tf.zeros_like(self.beam_summary_idxs)), depth=self.config.n_vocab) self.beam_summary_inputs = tf.tensordot(beam_summary_soft_idxs, \ self.embeddings, [[-1], [0]]) # batch_l x n_topic x max_beam_summary_sent_l x dim_emb # ------------------------------Discriminator------------------------------ # encode by MLP if self.config.enc == 'mlp': with tf.variable_scope('disc'): with tf.variable_scope('prob_topic', reuse=True): self.summary_state = encode_states(self, enc_inputs=self.summary_inputs, mask_sent=self.mask_summary_sent, \ enc_keep_prob=self.enc_keep_prob, config=self.config) # batch_l x n_topic x dim_hidden elif self.config.enc == 'bow': with tf.variable_scope('disc'): with tf.variable_scope('prob_topic', reuse=True): self.bow_summary_input_idxs = tf.multiply( self.summary_input_idxs, self.mask_bow) self.bow_summary_inputs = tf.tensordot( self.bow_summary_input_idxs, self.embeddings, axes=[[-1], [0] ]) # batch_l x max_doc_l x max_sent_l x dim_emb self.mask_summary_bow = tf.reduce_sum( self.bow_summary_input_idxs, -1) self.summary_state = encode_states(self, enc_inputs=self.bow_summary_inputs, mask_sent=self.mask_summary_bow, \ enc_keep_prob=self.enc_keep_prob, config=self.config) # batch_l x max_doc_l x dim_hidden elif self.config.enc == 'rnn': with tf.variable_scope('emb'): with tf.variable_scope('sent', reuse=True): _, self.summary_state = encode_inputs( self, enc_inputs=self.summary_inputs, sent_l=self.summary_input_sent_l ) # batch_l x max_doc_l x dim_hidden*2 _, self.beam_summary_state = encode_inputs( self, enc_inputs=self.beam_summary_inputs, sent_l=self.beam_summary_input_sent_l ) # batch_l x max_doc_l x dim_hidden*2 with tf.variable_scope('disc'): with tf.variable_scope('prob_topic', reuse=True): self.probs_summary_topic_posterior, _, _ = \ encode_gsm_probs_topic_posterior(self, self.summary_state.get_shape()[-1], self.summary_state, self.mask_summary_doc, self.config) self.logits_summary_topic_posterior_ = tf_log( tf.matrix_diag_part(self.probs_summary_topic_posterior) ) # batch_l x n_topic self.logits_summary_topic_posterior = tf_clip_vals( self.logits_summary_topic_posterior_, self.probs_topic_posterior) # ------------------------------Optimizer and Loss------------------------------ with tf.name_scope('opt'): partition_doc = tf.cast(self.mask_doc, dtype=tf.int32) self.n_sents = tf.cast(tf.reduce_sum(self.doc_l), dtype=tf.float32) self.n_tokens = tf.reduce_sum(self.dec_sent_l) # ------------------------------Reconstruction Loss of Language Model------------------------------ # target and mask self.dec_target_idxs = self.t_variables[ 'dec_target_idxs'] # batch_l x max_doc_l x max_dec_sent_l self.dec_sent_l = self.t_variables[ 'dec_sent_l'] # batch_l x max_doc_l self.max_dec_sent_l = tf.reduce_max( self.dec_sent_l) # = max_sent_l + 1 self.dec_mask_sent = tf.sequence_mask(self.dec_sent_l, maxlen=self.max_dec_sent_l, dtype=tf.float32) self.dec_target_idxs_flat = tf.reshape( self.dec_target_idxs, [self.batch_l * self.max_doc_l, self.max_dec_sent_l]) self.dec_mask_sent_flat = tf.reshape( self.dec_mask_sent, [self.batch_l * self.max_doc_l, self.max_dec_sent_l]) # nll for each token (summed over sentence) self.recon_max_sent_l = tf.minimum( self.max_dec_sent_l, self.max_output_sent_l) if self.config.sample else None losses_recon_flat = tf.reduce_sum( tf.contrib.seq2seq.sequence_loss( self.output_logits_flat[:, :self.recon_max_sent_l, :], self.dec_target_idxs_flat[:, :self.recon_max_sent_l], self.dec_mask_sent_flat[:, :self.recon_max_sent_l], average_across_timesteps=False, average_across_batch=False), -1) # batch_l*max_doc_l self.losses_recon = tf.reshape(losses_recon_flat, [self.batch_l, self.max_doc_l]) self.loss_recon = tf.reduce_mean( tf.dynamic_partition( self.losses_recon, partition_doc, num_partitions=2)[1]) # average over doc x batch # ------------------------------KL divergence Loss of Topic Probability Distribution------------------------------ if self.config.topic_model: self.probs_sent_topic_prior = tf.expand_dims( self.probs_doc_topic_posterior, 1) # batch_l x 1 x n_topic else: self.probs_sent_topic_prior = tf.ones_like(self.probs_sent_topic_posterior, dtype=tf.float32) / \ self.config.n_topic # batch_l x max_doc_l x n_topic, uniform distribution over topics self.losses_kl_prob = tf.reduce_sum(tf.multiply(self.probs_sent_topic_posterior, \ (tf_log(self.probs_sent_topic_posterior)-tf_log(self.probs_sent_topic_prior))), -1) self.loss_kl_prob = tf.reduce_mean( tf.dynamic_partition( self.losses_kl_prob, partition_doc, num_partitions=2)[1]) # average over doc x batch # ------------------------------KL divergence Loss of Sentence Latents Distribution------------------------------ self.losses_kl_sent_gauss = compute_kl_losses_sent_gauss( self ) # batch_l x max_doc_l x n_topic, sum over latent dimension self.losses_kl_sent_gmm = tf.reduce_sum( tf.multiply(self.probs_sent_topic_posterior, self.losses_kl_sent_gauss), -1) # batch_l x max_doc_l, sum over topics self.loss_kl_sent_gmm = tf.reduce_mean( tf.dynamic_partition( self.losses_kl_sent_gmm, partition_doc, num_partitions=2)[1]) # average over doc x batch # ------------------------------KL divergence Loss of Topic Latents Distribution------------------------------ if self.config.reverse_kl: self.losses_kl_topic_pairs_gauss = compute_kl_losses_topic_paris_gauss( self) self.losses_kl_topic_gauss_reverse = tf.reduce_sum(self.losses_kl_topic_pairs_gauss * self.config.mask_tree[None, None, :, :], -1) / \ np.maximum(np.sum(self.config.mask_tree[None, None, :, :], -1), 1) # batch_l x 1 x n_topic, mean over other child topics self.losses_kl_topic_gmm_reverse = tf.reduce_sum( tf.multiply(self.probs_sent_topic_posterior, self.losses_kl_topic_gauss_reverse), -1) # batch_l x max_doc_l, sum over topics self.loss_kl_topic_gmm_reverse = tf.reduce_mean( tf.dynamic_partition(self.losses_kl_topic_gmm_reverse, partition_doc, num_partitions=2)[1]) else: self.loss_kl_topic_gmm_reverse = tf.constant(0., dtype=tf.float32) # for monitor self.losses_kl_topic_gauss = compute_kl_losses_topic_gauss( self) # batch_l x 1 x n_topic, sum over latent dimension self.losses_kl_topic_gmm = tf.reduce_sum( tf.multiply(self.probs_sent_topic_posterior, self.losses_kl_topic_gauss), -1) # batch_l x max_doc_l, sum over topics self.loss_kl_topic_gmm = tf.reduce_mean( tf.dynamic_partition(self.losses_kl_topic_gmm, partition_doc, num_partitions=2)[1]) # ------------------------------KL divergence Loss of Root State Distribution------------------------------ if self.config.prior_root: self.losses_kl_root = compute_kl_losses( self.means_state_root_posterior, self.logvars_state_root_posterior) # batch_l x max_doc_l self.loss_kl_root = tf.reduce_sum( self.losses_kl_root) / tf.cast( tf.reduce_sum(self.doc_l), dtype=tf.float32) # average over doc x batch else: self.loss_kl_root = tf.constant(0, dtype=tf.float32) # ------------------------------Discriminator Loss------------------------------ if self.config.disc_topic: self.losses_disc_topic = -tf.reduce_sum( self.logits_summary_topic_posterior, -1) # batch_l, sum over topic self.loss_disc_topic = tf.reduce_sum( self.losses_disc_topic ) / self.n_sents # average over doc x batch else: self.loss_disc_topic = tf.constant(0, dtype=tf.float32) # ------------------------------Loss of Topic Model------------------------------ if self.config.topic_model: # recon self.topic_losses_recon = -tf.reduce_sum( tf.multiply(self.t_variables['doc_bows'], self.logits_bow), -1) # n_batch, sum over n_bow self.topic_loss_recon = tf.reduce_mean( self.topic_losses_recon) # average over doc x batch # kl_bow self.means_topic_bow_prior = tf.squeeze(get_params_topic_prior(self, tf.expand_dims(self.means_topic_bow_posterior, 0), \ tf.zeros([1, self.config.dim_latent], dtype=tf.float32)), 0) # n_topic x dim_latent self.logvars_topic_bow_prior = tf.squeeze(get_params_topic_prior(self, tf.expand_dims(self.logvars_topic_bow_posterior, 0), \ tf.zeros([1, self.config.dim_latent], dtype=tf.float32)), 0) # n_topic x dim_latent self.topic_losses_kl_bow = compute_kl_losses(self.means_topic_bow_posterior, self.logvars_topic_bow_posterior, \ means_prior=self.means_topic_bow_prior, logvars_prior=self.logvars_topic_bow_prior) # n_topic self.topic_loss_kl_bow = tf.reduce_mean( self.topic_losses_kl_bow) # average over doc x batch # kl_prob self.topic_losses_kl_prob = compute_kl_losses( self.means_probs_doc_topic_posterior, self.logvars_probs_doc_topic_posterior) # batch_l self.topic_loss_kl_prob = tf.reduce_mean( self.topic_losses_kl_prob) # average over doc x batch else: self.topic_loss_recon = tf.constant(0, dtype=tf.float32) self.topic_loss_kl_bow = tf.constant(0, dtype=tf.float32) self.topic_loss_kl_prob = tf.constant(0, dtype=tf.float32) # ------------------------------Topic Regularization Loss------------------------------ if self.config.reg != '': if self.config.reg == 'mean': self.topic_dots = self.get_topic_dots( self.means_topic_posterior ) # batch_l x n_topic-1 x n_topic-1 elif self.config.reg == 'bow': self.topic_dots = self.get_topic_dots( tf.expand_dims( self.topic_bow, 0)) # batch_l(=1) x n_topic-1 x n_topic-1 self.losses_reg = tf.reduce_sum(tf.square(self.topic_dots - tf.eye(len(self.config.all_child_idxs))) * self.config.mask_tree_reg, [1, 2])\ / tf.reduce_sum(self.config.mask_tree_reg) # batch_l self.loss_reg = tf.reduce_mean( self.losses_reg) # average over batch else: self.loss_reg = tf.constant(0, dtype=tf.float32) # ------------------------------Optimizer------------------------------ if self.config.anneal == 'linear': self.tau = tf.cast(tf.divide( self.global_step, tf.constant(self.config.linear_steps)), dtype=tf.float32) self.beta = tf.minimum(1., self.config.beta_init + self.tau) elif self.config.anneal == 'cycle': self.tau = tf.cast(tf.divide( tf.mod(self.global_step, tf.constant(self.config.cycle_steps)), tf.constant(self.config.cycle_steps)), dtype=tf.float32) self.beta = tf.minimum( 1., self.config.beta_init + self.tau / (1. - self.config.r_cycle)) else: self.beta = tf.constant(1.) self.beta_disc = self.beta if self.config.beta_disc else tf.constant( 1.) def get_opt(loss, var_list, lr, global_step=None): if self.config.opt == 'adam': Optimizer = tf.train.AdamOptimizer elif self.config.opt == 'adagrad': Optimizer = tf.train.AdagradOptimizer optimizer = Optimizer(lr) grad_vars = optimizer.compute_gradients(loss=loss, var_list=var_list) clipped_grad_vars = [ (tf.clip_by_value(grad, -self.config.grad_clip, self.config.grad_clip), var) for grad, var in grad_vars if grad is not None ] opt = optimizer.apply_gradients(clipped_grad_vars, global_step=global_step) return opt, grad_vars, clipped_grad_vars # ------------------------------Loss Setting------------------------------ if self.config.turn: self.loss = self.loss_recon + \ self.beta * tf.maximum(tf.maximum(self.loss_kl_sent_gmm, self.config.capacity_gmm) \ - self.loss_kl_topic_gmm_reverse, self.config.margin_gmm) + \ self.beta * self.loss_kl_root + \ self.topic_loss_recon + \ self.beta * self.topic_loss_kl_bow + \ self.beta * self.topic_loss_kl_prob + \ self.config.lam_reg * self.loss_reg self.opt, self.grad_vars, self.clipped_grad_vars = \ get_opt(self.loss, var_list=list(tf.trainable_variables('emb') + tf.trainable_variables('enc') + tf.trainable_variables('dec')), \ lr=self.config.lr, global_step=self.global_step) self.loss_disc = self.beta_disc * self.config.lam_disc * self.loss_disc_topic + \ self.beta * tf.maximum(self.loss_kl_prob, self.config.capacity_prob) self.opt_disc, self.grad_vars_disc, self.clipped_grad_vars_disc = \ get_opt(self.loss_disc, var_list=list(tf.trainable_variables('emb') + tf.trainable_variables('disc')), lr=self.config.lr_disc) else: self.loss = self.loss_recon + \ self.beta * tf.maximum(tf.maximum(self.loss_kl_sent_gmm, self.config.capacity_gmm) \ - self.loss_kl_topic_gmm_reverse, self.config.margin_gmm) + \ self.beta * self.loss_kl_root + \ self.topic_loss_recon + \ self.beta * self.topic_loss_kl_bow + \ self.beta * self.topic_loss_kl_prob + \ self.beta_disc * self.config.lam_disc * self.loss_disc_topic + \ self.beta * tf.maximum(self.loss_kl_prob, self.config.capacity_prob) + \ self.config.lam_reg * self.loss_reg self.loss_disc = tf.constant(0, dtype=tf.float32) self.opt, self.grad_vars, self.clipped_grad_vars = \ get_opt(self.loss, var_list=tf.trainable_variables(), lr=self.config.lr, global_step=self.global_step) self.opt_disc = tf.constant(0, dtype=tf.float32) # ------------------------------Evaluatiion------------------------------ self.loss_list_train = [self.loss, self.loss_disc, self.loss_recon, self.loss_kl_prob, self.loss_kl_sent_gmm, self.loss_kl_topic_gmm_reverse, \ self.loss_kl_root, self.loss_disc_topic, self.topic_loss_recon, self.topic_loss_kl_bow, self.topic_loss_kl_prob, self.loss_reg, tf.constant(0)] self.loss_list_eval = [self.loss, self.loss_disc, self.loss_recon, self.loss_kl_prob, self.loss_kl_sent_gmm, self.loss_kl_topic_gmm_reverse, \ self.loss_kl_root, self.loss_disc_topic, self.topic_loss_recon, self.topic_loss_kl_bow, self.topic_loss_kl_prob, self.loss_reg, self.loss_kl_topic_gmm] self.loss_sum = (self.loss_recon + self.loss_kl_prob + self.loss_kl_sent_gmm + self.loss_kl_root + self.loss_disc_topic + \ self.topic_loss_recon + self.topic_loss_kl_bow + self.topic_loss_kl_prob) * self.n_sents
def _sample_n(self, n, seed=None): if self._use_static_graph: # This sampling approach is almost the same as the approach used by # `MixtureSameFamily`. The differences are due to having a list of # `Distribution` objects rather than a single object, and maintaining # random seed management that is consistent with the non-static code path. samples = [] cat_samples = self.cat.sample(n, seed=seed) for c in range(self.num_components): seed = distribution_util.gen_new_seed(seed, "mixture") samples.append(self.components[c].sample(n, seed=seed)) x = tf.stack(samples, -self._static_event_shape.ndims - 1) # [n, B, k, E] npdt = x.dtype.as_numpy_dtype mask = tf.one_hot( indices=cat_samples, # [n, B] depth=self._num_components, # == k on_value=np.ones([], dtype=npdt), off_value=np.zeros([], dtype=npdt)) # [n, B, k] mask = distribution_utils.pad_mixture_dimensions( mask, self, self._cat, self._static_event_shape.ndims) # [n, B, k, [1]*e] return tf.reduce_sum( x * mask, axis=-1 - self._static_event_shape.ndims) # [n, B, E] with tf.control_dependencies(self._assertions): n = tf.convert_to_tensor(n, name="n") static_n = tensor_util.constant_value(n) n = int(static_n) if static_n is not None else n cat_samples = self.cat.sample(n, seed=seed) static_samples_shape = cat_samples.get_shape() if static_samples_shape.is_fully_defined(): samples_shape = static_samples_shape.as_list() samples_size = static_samples_shape.num_elements() else: samples_shape = tf.shape(cat_samples) samples_size = tf.size(cat_samples) static_batch_shape = self.batch_shape if static_batch_shape.is_fully_defined(): batch_shape = static_batch_shape.as_list() batch_size = static_batch_shape.num_elements() else: batch_shape = self.batch_shape_tensor() batch_size = tf.reduce_prod(batch_shape) static_event_shape = self.event_shape if static_event_shape.is_fully_defined(): event_shape = np.array(static_event_shape.as_list(), dtype=np.int32) else: event_shape = self.event_shape_tensor() # Get indices into the raw cat sampling tensor. We will # need these to stitch sample values back out after sampling # within the component partitions. samples_raw_indices = tf.reshape(tf.range(0, samples_size), samples_shape) # Partition the raw indices so that we can use # dynamic_stitch later to reconstruct the samples from the # known partitions. partitioned_samples_indices = tf.dynamic_partition( data=samples_raw_indices, partitions=cat_samples, num_partitions=self.num_components) # Copy the batch indices n times, as we will need to know # these to pull out the appropriate rows within the # component partitions. batch_raw_indices = tf.reshape( tf.tile(tf.range(0, batch_size), [n]), samples_shape) # Explanation of the dynamic partitioning below: # batch indices are i.e., [0, 1, 0, 1, 0, 1] # Suppose partitions are: # [1 1 0 0 1 1] # After partitioning, batch indices are cut as: # [batch_indices[x] for x in 2, 3] # [batch_indices[x] for x in 0, 1, 4, 5] # i.e. # [1 1] and [0 0 0 0] # Now we sample n=2 from part 0 and n=4 from part 1. # For part 0 we want samples from batch entries 1, 1 (samples 0, 1), # and for part 1 we want samples from batch entries 0, 0, 0, 0 # (samples 0, 1, 2, 3). partitioned_batch_indices = tf.dynamic_partition( data=batch_raw_indices, partitions=cat_samples, num_partitions=self.num_components) samples_class = [None for _ in range(self.num_components)] for c in range(self.num_components): n_class = tf.size(partitioned_samples_indices[c]) seed = distribution_util.gen_new_seed(seed, "mixture") samples_class_c = self.components[c].sample(n_class, seed=seed) # Pull out the correct batch entries from each index. # To do this, we may have to flatten the batch shape. # For sample s, batch element b of component c, we get the # partitioned batch indices from # partitioned_batch_indices[c]; and shift each element by # the sample index. The final lookup can be thought of as # a matrix gather along locations (s, b) in # samples_class_c where the n_class rows correspond to # samples within this component and the batch_size columns # correspond to batch elements within the component. # # Thus the lookup index is # lookup[c, i] = batch_size * s[i] + b[c, i] # for i = 0 ... n_class[c] - 1. lookup_partitioned_batch_indices = ( batch_size * tf.range(n_class) + partitioned_batch_indices[c]) samples_class_c = tf.reshape( samples_class_c, tf.concat([[n_class * batch_size], event_shape], 0)) samples_class_c = tf.gather( samples_class_c, lookup_partitioned_batch_indices, name="samples_class_c_gather") samples_class[c] = samples_class_c # Stitch back together the samples across the components. lhs_flat_ret = tf.dynamic_stitch( indices=partitioned_samples_indices, data=samples_class) # Reshape back to proper sample, batch, and event shape. ret = tf.reshape( lhs_flat_ret, tf.concat( [samples_shape, self.event_shape_tensor()], 0)) ret.set_shape( tf.TensorShape(static_samples_shape).concatenate(self.event_shape)) return ret
def generate_dynamic_mask(inputs, lengths, present_rate, mask_id, boa_id, eoa_id, pad_id, partition_num): def _fill_mask(inputs, lengths, present_rate, eoa_id, pad_id, partition_num): """ The input batch has the same mask pattern, randoms through max_seq_length in lengths. :param inputs: :param lengths: :param present_rate: :return: answers: a tensor of shape [batch_size, sum(unfixed_answer_len for each ans)] start_pos and end_pos marks out ranges for answers """ def _fill_mask_py_func(inputs, lengths, present_rate, eoa_id, pad_id, partition_num): # TODO(wanrong): bound check def _get_split_pos(masked_num): # split masked_num into partition_num segments if masked_num <= 1: return [1] * (partition_num - 1) splitted = np.array_split(range(masked_num), partition_num) split_positions = [a.size for a in splitted] for i in range(1, partition_num): split_positions[i] += split_positions[i - 1] return np.insert(split_positions, 0, 0, axis=0) batch_size = inputs.shape[0] masked_nums = ((lengths - 2) * (1 - present_rate)).astype( np.int64) # [batch_size] split_positions = \ [_get_split_pos(masked_num) for masked_num in masked_nums] # [batch_size, partition_num+1] # calculate the length of each mask segment mask_lengths = np.zeros(shape=(batch_size, partition_num), dtype=np.int64) left_len = np.zeros(shape=(batch_size, partition_num + 1), dtype=np.int64) # add a -1 at the end for bid, split_position in enumerate(split_positions): for idx, (prev, cur) in enumerate( zip(split_position[:-1], split_position[1:])): mask_lengths[bid][idx] = cur - prev left_len[bid][-1] = 0 # leave <EOS> unmasked for idx, cur_len in reversed(list(enumerate( mask_lengths[bid]))): left_len[bid][idx] = left_len[bid][idx + 1] + cur_len + 1 left_len = left_len[:, :-1] # remove last column # splitting start_positions = np.zeros(shape=(batch_size, 1)) end_positions = np.zeros(shape=(batch_size, 1)) answers = np.zeros((batch_size, 0)) partitions = np.array([]) masks = np.full_like(inputs, 0) after_pad_ans_lens = np.zeros(shape=partition_num) boa = np.full(shape=(batch_size, 1), fill_value=boa_id) for i in range(1, partition_num + 1): idx = i - 1 # ignore padding 0 in start/end_positions # get start and end position for current mask cur_start_pos = np.zeros(shape=(batch_size, 1), dtype=np.int64) cur_end_pos = np.zeros(shape=(batch_size, 1), dtype=np.int64) cur_answers = [] for bid in range(batch_size): s = end_positions[bid][idx] + 1 e = lengths[bid] - left_len[bid][idx] + 1 cur_start_pos[bid][0] = s + (e - s) / (partition_num + 1) cur_end_pos[bid][ 0] = cur_start_pos[bid][0] + mask_lengths[bid][idx] cur_answers.append( np.append( inputs[bid] [cur_start_pos[bid][0]:cur_end_pos[bid][0]], eoa_id)) # update mask for j in range(cur_start_pos[bid][0], cur_end_pos[bid][0]): masks[bid][j] = 1 # set masked element to 1 start_positions = np.concatenate( (start_positions, cur_start_pos), axis=1) end_positions = np.concatenate((end_positions, cur_end_pos), axis=1) # pad cur_answers to same length cur_padded_ans, cur_max_len = _pad_array_list( cur_answers, mask_lengths[:, idx], pad_id) cur_padded_ans = np.concatenate((boa, cur_padded_ans), axis=1) after_pad_ans_lens[idx] = cur_max_len answers = np.concatenate((answers, cur_padded_ans), axis=1) # generate current partition index cur_idx = np.full_like(cur_padded_ans[0], idx) partitions = np.concatenate((partitions, cur_idx), axis=0) return masks, start_positions[:, 1:].astype(np.int64),\ end_positions[:, 1:].astype(np.int64),\ answers.astype(np.int64), after_pad_ans_lens.astype(np.int64), \ mask_lengths.astype(np.int32), partitions.astype(np.int32) eoa_id = tf.Variable(eoa_id, dtype=tf.int64) present_rate = tf.Variable(present_rate, dtype=tf.float32) partition_num = tf.Variable(partition_num, dtype=tf.int64) return tf.py_func( _fill_mask_py_func, [inputs, lengths, present_rate, eoa_id, pad_id, partition_num], [ tf.int64, tf.int64, tf.int64, tf.int64, tf.int64, tf.int32, tf.int32 ]) masks, start_positions, end_positions, answers, after_pad_ans_lens, true_ans_lens, partitions = \ _fill_mask(inputs, lengths, present_rate, eoa_id, pad_id, partition_num) answers = tf.dynamic_partition( data=tf.transpose(answers, perm=[1, 0]), # [sum(lens), batch_size] partitions=partitions, num_partitions=partition_num) answers = [tf.transpose(ans, perm=[1, 0]) for ans in answers] mask_id = tf.Variable(mask_id, dtype=tf.int64) pad_id = tf.Variable(pad_id, dtype=tf.int64) templates, template_masks = \ _prepare_squeezed_template(inputs, masks, start_positions, end_positions, mask_id, pad_id) return masks, answers, after_pad_ans_lens, true_ans_lens, templates, template_masks, \ start_positions, end_positions
centroids = tf.Variable(tf.gather(vector_values, centroid_indices)) expanded_vectors = tf.expand_dims(vectors, 0) expanded_centroids = tf.expand_dims(centroids, 1) vectors_subtration = tf.sub(expanded_vectors,expanded_centroids) euclidean_distances = \ tf.reduce_sum(tf.square(vectors_subtration), 2) assignments = tf.to_int32(tf.argmin(euclidean_distances, 0)) partitions = [0, 0, 1, 1, 0] num_partitions = 2 data = [10, 20, 30, 40, 50] outputs[0] = [10, 20, 50] outputs[1] = [30, 40] partitions = tf.dynamic_partition(vectors, assignments, num_clusters) update_centroids = tf.concat(0, \ [tf.expand_dims\ (tf.reduce_mean(partition, 0), 0)\ for partition in partitions]) init_op = tf.initialize_all_variables() sess = tf.Session() sess.run(init_op) for step in xrange(num_steps): _, centroid_values, assignment_values =\ sess.run([update_centroids,\ centroids,\
def step_m(self, x): #reshape input input = tf.concat([x, tf.reshape(self.read_vecs, [1, self.num_heads*self.word_size])],1) #forward propagation l1_out = tf.matmul(input, self.W1) + self.b1 l1_act = tf.nn.tanh(l1_out) l2_out = tf.matmul(l1_act, self.W2) + self.b2 l2_act = tf.nn.tanh(l2_out) #output vector self.nn_out = tf.matmul(l2_act, self.nn_out_weights) #(1*eta+Y, eta+Y*Y)->(1*Y) #interaction vector - how to interact with memory self.interface_vec = tf.matmul(l2_act, self.interface_weights) #(1*eta+Y, eta+Y*eta)->(1*eta) partition = tf.constant([[0]*(self.num_heads*self.word_size) + [1]*(self.num_heads) + [2]*(self.word_size) + [3] + \ [4]*(self.word_size) + [5]*(self.word_size) + \ [6]*(self.num_heads) + [7] + [8] + [9]*(self.num_heads*3)], dtype=tf.int32) #convert interface vector into a set of read write vectors #using tf.dynamic_partitions(Partitions interface_vec into 10 tensors using indices from partition) (read_keys, read_str, write_key, write_str, erase_vec, write_vec, free_gates, alloc_gate, write_gate, read_modes) = \ tf.dynamic_partition(self.interface_vec, partition, 10) #read vectors read_keys = tf.reshape(read_keys,[self.num_heads, self.word_size]) #R*W read_str = 1 + tf.nn.softplus(tf.expand_dims(read_str, 0)) #1*R #write vectors write_key = tf.expand_dims(write_key, 0) #1*W #help init our write weights write_str = 1 + tf.nn.softplus(tf.expand_dims(write_str, 0)) #1*1 erase_vec = tf.nn.sigmoid(tf.expand_dims(erase_vec, 0)) #1*W write_vec = tf.expand_dims(write_vec, 0) #1*W #the degree to which locations at read heads will be freed free_gates = tf.nn.sigmoid(tf.expand_dims(free_gates, 0)) #1*R #the fraction of writing that is being allocated in a new location alloc_gate = tf.nn.sigmoid(alloc_gate) #1 #the amount of information to be written to memory write_gate = tf.nn.sigmoid(write_gate) #1 #the softmax distribution between the three read modes (backward, forward, lookup) #The read heads can use gates called read modes to switch between content lookup #using a read key and reading out locations either forwards or backwards #in the order they were written. read_modes = tf.nn.softmax(tf.reshape(read_modes, [3, self.num_heads])) #3*R #used to calculate usage vector, what's available to write to? retention_vec = tf.reduce_prod(1-free_gates*self.read_weights, reduction_indices=1) #used to dynamically allocate memory self.usage_vec = (self.usage_vec + self.write_weights - self.usage_vec * self.write_weights) * retention_vec ##retreives the writing allocation weighting alloc_weights = self.allocation_weighting() #N*1 #where to write to?? write_lookup_weights = self.content_lookup(write_key, write_str) #N*1 #define our write weights now that we know how much space to allocate for them and where to write to self.write_weights = write_gate*(alloc_gate*alloc_weights + (1-alloc_gate)*write_lookup_weights) #write erase, then write to memory! self.mem_mat = self.mem_mat*(1-tf.matmul(self.write_weights, erase_vec)) + \ tf.matmul(self.write_weights, write_vec) #As well as writing, the controller can read from multiple locations in memory. #Memory can be searched based on the content of each location, or the associative #temporal links can be followed forward and backward to recall information written #in sequence or in reverse. (3rd attention mechanism) #updates and returns the temporal link matrix for the latest write #given the precedence vector and the link matrix from previous step nnweight_vec = tf.matmul(self.write_weights, tf.ones([1,self.num_words])) #N*N self.link_mat = (1 - nnweight_vec - tf.transpose(nnweight_vec))*self.link_mat + \ tf.matmul(self.write_weights, self.precedence_weight, transpose_b=True) self.link_mat *= tf.ones([self.num_words, self.num_words]) - tf.constant(np.identity(self.num_words, dtype=np.float32)) self.precedence_weight = (1-tf.reduce_sum(self.write_weights, reduction_indices=0)) * \ self.precedence_weight + self.write_weights #3 modes - forward, backward, content lookup forw_w = read_modes[2]*tf.matmul(self.link_mat, self.read_weights) #(N*N,N*R)->N*R look_w = read_modes[1]*self.content_lookup(read_keys, read_str) #N*R back_w = read_modes[0]*tf.matmul(self.link_mat, self.read_weights, transpose_a=True) #N*R #use them to intiialize read weights self.read_weights = back_w + look_w + forw_w #N*R #create read vectors by applying read weights to memory matrix self.read_vecs = tf.transpose(tf.matmul(self.mem_mat, self.read_weights, transpose_a=True)) #(W*N,N*R)^T->R*W #multiply them together read_vec_mut = tf.matmul(tf.reshape(self.read_vecs, [1, self.num_heads * self.word_size]), self.read_vecs_out_weight) # (1*RW, RW*Y)-> (1*Y) #return output + read vecs product return self.nn_out+read_vec_mut