def parse_learning_rate(step, learning_rate): """Returns the learning rate as a tensor.""" if isinstance(learning_rate, float): return learning_rate # Learning rate schedule of the form: # initial_learning_rate[,learning@steps]*. E.g., "1e-3" or # "1e-3,1e-4@15000,1e-5@25000". We use eval to allow learning specified as # fractions (e.g., 2/255). tokens = learning_rate.split(',') first_lr = float(eval(tokens[0])) # pylint: disable=eval-used if len(tokens) == 1: return tf.constant(first_lr, dtype=tf.float32) # Parse steps. init_values = [first_lr] final_values = [] init_step = [0] final_step = [] for t in tokens[1:]: if '@' in t: lr, boundary = t.split('@', 1) is_linear = False elif 'S' in t: # Syntactic sugar to indicate a step. lr, boundary = t.split('S', 1) is_linear = False elif 'L' in t: lr, boundary = t.split('L', 1) is_linear = True else: raise ValueError('Unknown specification.') lr = float(eval(lr)) # pylint: disable=eval-used init_values.append(lr) if is_linear: final_values.append(lr) else: final_values.append(init_values[-2]) boundary = int(boundary) init_step.append(boundary) final_step.append(boundary) large_step = max(final_step) + 1 final_step.append(large_step) final_values.append(lr) # Find current index. boundaries = list(final_step) + [large_step + 2] boundaries = tf.convert_to_tensor(boundaries, dtype=tf.int64) b = boundaries - tf.minimum(step + 1, large_step + 1) large_step = tf.constant( large_step, shape=boundaries.shape, dtype=step.dtype) b = tf.where(b < 0, large_step, b) idx = tf.minimum(tf.argmin(b), len(init_values) - 1) init_step = tf.convert_to_tensor(init_step, dtype=tf.float32) final_step = tf.convert_to_tensor(final_step, dtype=tf.float32) init_values = tf.convert_to_tensor(init_values, dtype=tf.float32) final_values = tf.convert_to_tensor(final_values, dtype=tf.float32) x1 = tf.gather(init_step, idx) x2 = tf.gather(final_step, idx) y1 = tf.gather(init_values, idx) y2 = tf.gather(final_values, idx) return (tf.cast(step, tf.float32) - x1) / (x2 - x1) * (y2 - y1) + y1
def __call__(self, codes): """Uses codebook to find nearest neighbor for each code. Args: codes: A `float`-like `Tensor` containing the latent vectors to be compared to the codebook. These are rank-3 with shape `[batch_size, latent_size, code_size]`. Returns: nearest_codebook_entries: The 1-nearest neighbor in Euclidean distance for each code in the batch. one_hot_assignments: The one-hot vectors corresponding to the matched codebook entry for each code in the batch. """ distances = tf.norm( tensor=tf.expand_dims(codes, 2) - tf.reshape(self.codebook, [1, 1, self.num_codes, self.code_size]), axis=3) assignments = tf.argmin(input=distances, axis=2) one_hot_assignments = tf.one_hot(assignments, depth=self.num_codes) nearest_codebook_entries = tf.reduce_sum( input_tensor=tf.expand_dims(one_hot_assignments, -1) * tf.reshape(self.codebook, [1, 1, self.num_codes, self.code_size]), axis=2) return nearest_codebook_entries, one_hot_assignments
def GetDiscreteActionLoss(logits, action_labels, bin_centers, num_bins): """Convert labels to one-hot, compute cross-entropy loss, and return loss. Args: logits: Tensor corresponding to the predicted action logits, with size [batch_size, timesteps, action_dim*num_bins] action_labels: Tensor corresponding to the real valued action labels, with size [batch_size, 1, timesteps, action_dim] bin_centers: numpy array of size [num_bins, action_dim] corresponding to the centers of each bin for each dimension. num_bins: number of discrete bins. Returns: Scalar tensor, cross entropy loss between the predicted actions and labels. """ action_labels = tf.expand_dims(action_labels, -2) bin_centers = tf.constant(bin_centers, dtype=tf.float32) while len(bin_centers.shape) < len(action_labels.shape): bin_centers = tf.expand_dims(bin_centers, 0) discrete_labels = tf.argmin((action_labels - bin_centers)**2, -2) onehot_labels = tf.one_hot(discrete_labels, num_bins) onehot_labels = tf.reshape(onehot_labels, (-1, num_bins)) logits = tf.reshape(logits, (-1, num_bins)) loss = tf.nn.softmax_cross_entropy_with_logits_v2(onehot_labels, logits) loss = tf.reduce_mean(loss) return loss
def _choose_sample(sampled_latent, sampled_keypoints, sample_losses): """Returns the first or lowest-loss sample, depending on learning phase. During training, the sample with the lowest loss is returned. During inference, the first sample is returned without regard to the loss. Args: sampled_latent: [num_samples, batch_size, latent_code_size] tensor. sampled_keypoints: [num_samples, batch_size, 3 * num_keypoints] tensor. sample_losses: [num_samples, batch_size] tensor. Returns: Two tensors: latent and keypoint representation of the best sample. """ # Find the indices of the samples with the lowest loss: best_sample_ind = tf.argmin(sample_losses, axis=0) # Shape is [batch_size]. best_sample_ind = tf.cast(best_sample_ind, tf.int32) batch_ind = tf.range(tf.shape(sampled_latent)[1], dtype=tf.int32) indices = tf.stack([best_sample_ind, batch_ind], axis=-1) # Only keep the best keypoints and latent sample: best_latent = tf.gather_nd(sampled_latent, indices) best_keypoints = tf.gather_nd(sampled_keypoints, indices) # During training, return the best sample. During inference, return the # first sample: return [ tf.keras.backend.in_train_phase(best_latent, sampled_latent[0]), tf.keras.backend.in_train_phase(best_keypoints, sampled_keypoints[0]), ]
def testArgMaxMin(self): self.assertAllClose([1], tf.argmax([[1, 3, 2]], name='abc', dimension=1)) self.assertAllClose([0, 0, 0], tf.argmax([[1, 3, 2]], dimension=0)) self.assertAllClose([0], tf.argmin([[1, 3, 2]], name='abc', dimension=1))
def nearest_centroid(X, MU): # Return nearest centroids distances = distanceFunc(X, MU) # distances is NxK tensor, getting the index # of the minimum argument in axis=1 will give # us the value of the closest centroid. nearest_centroids = tf.argmin(distances, 1) return nearest_centroids
def k(self): """Picks the index of the closest embedding for every encoding, in evaluation node it can be directly supplied as a placeholder.""" k_train = tf.argmin(self.z_dist_flat, axis=-1) k_test = self.prediction_input k=tf.cond(self.is_training, lambda: k_train, lambda: k_test) tf.summary.histogram("clusters", k) return k
def argmin_grad(x, y): abs_diff = tf.abs(tf.subtract(x, y)) argmin = tf.cast(tf.argmin(abs_diff), tf.float32) def grad(dy): """ Let gradients pass through. """ return dy, None return argmin, grad
def get_bmu(self): square_difference = tf.square(self.input - self.weight) distance = tf.sqrt(tf.reduce_mean(square_difference, axis=1)) bmu_index = tf.argmin(distance) bmu_location = tf.to_float( [tf.div(bmu_index, self.width), tf.mod(bmu_index, self.width)]) return bmu_location
def _find_interval_containing_new_value(x, new_value): """Find the index of x (ascending-ordered) after which new_value occurs.""" new_value_shape = shape_utils.combined_static_and_dynamic_shape( new_value)[0] x_shape = shape_utils.combined_static_and_dynamic_shape(x)[0] compare = tf.cast(tf.reshape(new_value, shape=(new_value_shape, 1)) >= tf.reshape(x, shape=(1, x_shape)), dtype=tf.int32) diff = compare[:, 1:] - compare[:, :-1] interval_idx = tf.argmin(diff, axis=1) return interval_idx
def grad_hbar(self, v, gradbs, reuse=True): """Compute gradient of hbar function for Wasserstein iteration.""" c_xy = self.basedist(self.source, self.target) c_xy -= v # [gradbs, trnsize] idx = tf_v1.argmin(c_xy, axis=1) # [1] (index of subgradient) xi_ij = tf_v1.one_hot(idx, self.gradbs) # find matches, [gradbs, trnsize] xi_ij = tf_v1.reduce_mean(xi_ij, axis=0, keep_dims=True) # [1, trnsize] grad = 1. / self.gradbs - xi_ij # output: [1, trnsize] return grad
def get_pulling_indices(self, weight): clst_num = self.cluster_centroids.shape[0] tiled_weights = tf.tile(tf.expand_dims(weight, axis=1), [1, clst_num]) tiled_cluster_centroids = tf.tile( tf.reshape(self.cluster_centroids, [1, clst_num]), [weight.shape[0], 1]) pulling_indices = tf.argmin(tf.abs(tiled_weights - tiled_cluster_centroids), axis=1) return pulling_indices
def _compute_head_weights_with_position_prior(weights, masks, paddings, num_heads, attn_size): """Computes head-specific attention weights with position prior. This function simply masks out the weights for items if they don't belong to a certain chunk, using a sliding window technique. I.e., head i only focuses on ith recent "chunk_size" items with respect to the query. Note that chunks are non-overlapping, meaning, sliding window stride is also set to attn_size. Args: weights: A 3d tensor with shape of [h*N, T_q, T_k]. masks: A 3d tensor with shape of [h*N, T_q, T_k]. paddings: A 3d tensor with shape of [h*N, T_q, T_k]. num_heads: An integer denoting number of chunks. attn_size: An integer denoting the size of the sliding window. Returns: A list of h tensors (each shaped [N, T_q, T_k]) where tensors correspond to chunk specific weights. """ # Masks is a lower triangular tensor with ones in the bottom and zeros in the # upper section. Since chunks are allocated with respect to query position, we # first need to count the available items prior to each query. argmin function # would work for this, except the last query because it returns the smallest # index in the case of ties. To make sure we have the accurate count for the # last query, we first append a zero tensor and call the argmin function. max_idxs = tf.argmin(tf.concat([masks, tf.zeros_like(masks)], axis=-1), 2) # (h*N, T_q) # Split for heads. max_idxs_split = tf.split(max_idxs, num_heads, axis=0) # (h x (N, T_q)) weights_split = tf.split(weights, num_heads, axis=0) # (h x (N, T_q, T_k)) paddings_split = tf.split(paddings, num_heads, axis=0) # (h x (N, T_q, T_k)) # Collects output weights per chunk. chunk_outputs_list = [] for i in range(num_heads): mask_left = tf.sequence_mask( tf.maximum(max_idxs_split[i] - (attn_size * (i + 1)), 0), tf.shape(weights_split[i])[2]) # (N, T_q, T_k) mask_right = tf.sequence_mask( tf.maximum(max_idxs_split[i] - (attn_size * i), 0), tf.shape(weights_split[i])[2]) # (N, T_q, T_k) mask = tf.logical_and(tf.logical_not(mask_left), mask_right) # (N, T_q, T_k) # Adjust weights for chunk i. output = tf.where(mask, weights_split[i], paddings_split[i]) # (N, T_q, T_k) chunk_outputs_list.append(output) return chunk_outputs_list # (h x (N, T_q, T_k))
def getBMU(self): #Best Matching Unit #Eucledian distance square_distance = tf.square(self.input - self.weight) distance = tf.sqrt(tf.reduce_sum(square_distance, axis=1)) #Get BMU index bmu_index = tf.argmin(distance) #Get the position bmu_position = tf.to_float( [tf.div(bmu_index, self.width), tf.mod(bmu_index, self.width)]) return bmu_position
def add_summary_images(self, num=9): """Visualize source images and nearest neighbors from target.""" vis_images = self.add_summary_montage(self.source, 'source_ims', num) _ = self.add_summary_montage(self.target, 'target_ims', num) c_xy = self.basedist(self.source, self.target) # pairwise cost idx = tf_v1.argmin(c_xy, axis=1) # find nearest neighbors matches = tf_v1.gather(self.target, idx) vis_matches = self.add_summary_montage(matches, 'neighbors_ims', num) vis_both = tf_v1.concat([vis_images, vis_matches], axis=1) tf_v1.summary.image('matches_ims', vis_both) return
def get_pulling_indices(self, weight): clst_num = self.cluster_centroids.shape[0] tiled_weights = tf.tile(tf.expand_dims(weight, axis=2), [1, 1, clst_num]) tiled_cluster_centroids = tf.tile( tf.reshape(self.cluster_centroids, [1, 1, clst_num]), [weight.shape[0], weight.shape[1], 1]) # We find the nearest cluster centroids and store them so that ops can build # their kernels upon it pulling_indices = tf.argmin(tf.abs(tiled_weights - tiled_cluster_centroids), axis=2) return pulling_indices
def setup_v_rot_(links): r = links.thickness[:-1, newaxis] dl = (links.points[:-1] - links.points[1:]) / 2 dlh = dl / (tf.norm(dl, axis=1)[:, newaxis] + 1e-10) idx = tf.argmin(dlh, 1) r0 = tf.one_hot(idx, 3) r1h = tf.cross(dlh, r0) r2h = tf.cross(dlh, r1h) r1 = r * r1h r2 = r * r2h vi = tf.concat( (dl[:, :, newaxis], r1[:, :, newaxis], r2[:, :, newaxis]), axis=2) ri = tf.concat( (dlh[:, :, newaxis], r1h[:, :, newaxis], r2h[:, :, newaxis]), axis=2) links.v_rot = tf.matmul(vi, ri, transpose_b=True)
def _resolve_permutation(loss_matrix): """Resolves permutation from an all-pairs loss_matrix input. Args: loss_matrix: tensor of shape [batch, source, source] axis 1 refers to the estimate. axis 2 refers to the reference. Returns: permutation: tensor of shape [batch, source, 2] such that tf.gather_nd(estimates, permutation, 1) returns the permuted estimates that achieves the lowest loss. """ batch = loss_matrix.shape[0] source = loss_matrix.shape[1] # Compute permutations as vectors of indices into flattened loss matrix. # permutations will have shape [batch, source!, source, 1]. permutations = tf.constant(list(itertools.permutations(range(source)))) permutations = tf.expand_dims(tf.expand_dims(permutations, 0), 3) permutations = tf.tile(permutations, [batch, 1, 1, 1]) # Expand loss dimensions for gather. # loss_matrix.shape will be (batch, source!, source, source) loss_matrix = tf.expand_dims(loss_matrix, 1) loss_matrix = tf.tile(loss_matrix, [1, permutations.shape[1], 1, 1]) # Compute the total loss for each permutation. # permuted_loss.shape will be (batch, source!) permuted_loss = tf.gather_nd(loss_matrix, permutations, batch_dims=3) permuted_loss = tf.reduce_sum(permuted_loss, axis=2) # Get and return the permutation with the lowest total loss. # loss_argmin.shape will be (batch, 1) loss_argmin = tf.argmin(permuted_loss, axis=1) loss_argmin = tf.expand_dims(loss_argmin, 1) # permutation.shape will be (batch, source, 1) permutation = tf.gather_nd(permutations, loss_argmin, batch_dims=1) return permutation
def get_accuracy(final_layer, ground_truth, onehot=False, inverted=False): """Computes accuracy for the tensor.""" batch_size = final_layer.shape[-3] if onehot: if inverted: pred = tf.argmin(final_layer[Ellipsis, 0], axis=-1) else: pred = tf.argmax(final_layer[Ellipsis, 0], axis=-1) gt = tf.argmax(ground_truth, axis=-1) else: assert final_layer.shape[-2] == 1 pred = final_layer[Ellipsis, 0, 0] > 0 if inverted: pred = tf.logical_not(pred) gt = ground_truth[Ellipsis, 0] > 0 agreed = tf.cast(tf.equal(pred, gt), tf.float32) # all this acrobatics is because tf1 uses Dimension for shape, while tf2 # uses tuples. batch_size = tf.cast(tf.convert_to_tensor(batch_size), tf.float32) accuracy = tf.reduce_sum(agreed, axis=-1) / batch_size return accuracy
def __init__(self, iSquaredMapDim, iInputLength, learning_rate, maxIter): self.iSquaredMapDim = iSquaredMapDim iNeurons = self.iSquaredMapDim * self.iSquaredMapDim self.maxIter = maxIter self.dRadius = self.iSquaredMapDim * .6 self.tfGraph = tf.Graph() # Initialize a TensorFlow computation graph with self.tfGraph.as_default(): # Initializing variables tf.set_random_seed(0) self.NeuronWeights = tf.Variable( tf.random_normal([iNeurons, iInputLength])) self.NeuronLocation = self.generateIndexMatrix() # Input placeholders self.inputPlaceholder = tf.placeholder("float", [iInputLength]) self.iterInputPlaceholder = tf.placeholder("float") # Calculating best mapping unit (BMU) and its location input_matix = tf.stack( [self.inputPlaceholder for i in range(iNeurons)]) euclidenDistances = tf.sqrt( tf.reduce_sum( tf.pow(tf.subtract(self.NeuronWeights, input_matix), 2), 1)) bmu = tf.argmin(euclidenDistances, 0) mask = tf.pad(tf.reshape(bmu, [1]), np.array([[0, 1]])) size = tf.cast(tf.constant(np.array([1, 2])), dtype=tf.int64) bmu_location = tf.reshape( tf.slice(self.NeuronLocation, mask, size), [2]) # Calculate learning rate and radius decay_function = tf.subtract( 1.0, tf.div(self.iterInputPlaceholder, self.maxIter)) _current_learning_rate = tf.multiply(learning_rate, decay_function) _current_radius = tf.multiply(self.dRadius, decay_function) # Adapt learning rate to each neuron based on position bmu_matrix = tf.stack([bmu_location for i in range(iNeurons)]) bmu_distance = tf.reduce_sum( tf.pow(tf.subtract(self.NeuronLocation, bmu_matrix), 2), 1) # Gaussian distrbution gaussianNeighbourhood = tf.exp( tf.negative( tf.div(tf.cast(bmu_distance, "float32"), tf.pow(_current_radius, 2)))) learning_rate_matrix = tf.multiply(_current_learning_rate, gaussianNeighbourhood) # Update all the weights multiplytiplier = tf.stack([ tf.tile( tf.slice(learning_rate_matrix, np.array([i]), np.array([1])), [iInputLength]) for i in range(iNeurons) ]) delta = tf.multiply( multiplytiplier, tf.subtract( tf.stack([self.inputPlaceholder for i in range(iNeurons)]), self.NeuronWeights)) new_weights = tf.add(self.NeuronWeights, delta) self._training = tf.assign(self.NeuronWeights, new_weights) #Initilize session and run it self._sess = tf.Session() self._sess.run(tf.global_variables_initializer()) return
def test_argmin_reduce(self): input = tf.placeholder(shape=(4, 32, 32, 3), dtype=tf.float32) output = tf.argmin(input, axis=-1) self._test_conversion('axgmin_reduce', [input], [output])
import tensorflow.compat.v1 as tf # 构建二维张量 t2d = tf.constant([[0, 2, 5, 6], [7, 4, 9, 1]], tf.float32) # 沿列方向‘axis=1’,得到最大值的列信息 result_max = tf.argmax(t2d, axis=1) # 沿列方向‘axis=1’,得到最小值的列信息 result_min = tf.argmin(t2d, axis=1) session = tf.Session() # 打印结果 print("最大值位置索引:", session.run(result_max)) print("最小值位置索引:", session.run(result_min))
def k(self): """Picks the index of the closest centroid for every embedding.""" k = tf.argmin(self.z_dist_flat, axis=-1, name="k") return k
def build(self): self.audios = tf.placeholder(tf.float32, [self.batch_size, self.n_speaker, None], name='input_signals') self.mix_input = tf.reduce_sum(self.audios, axis=1) with tf.variable_scope("encoder"): # [batch, encode_len, channels] encoded_input = tf.layers.Conv1D( filters=self.config["model"]["filters"]["ae"], kernel_size=self.fft_len, strides=self.fft_hop, activation=tf.nn.relu, name="conv1d_relu")(tf.expand_dims(self.mix_input, -1)) stfts_mix = tf.signal.stft(self.mix_input, frame_length=self.fft_len, frame_step=self.fft_hop, fft_length=self.fft_len, window_fn=self.fft_wnd) magni_mix = tf.abs(stfts_mix) phase_mix = tf.atan2(tf.imag(stfts_mix), tf.real(stfts_mix)) with tf.variable_scope("bottle_start"): norm_input = self.cLN( tf.concat([encoded_input, tf.log1p(magni_mix)], axis=-1), "layer_norm") block_input = tf.layers.Conv1D( filters=self.config["model"]["filters"]["1*1-conv"], kernel_size=1)(norm_input) for stack_i in range(self.num_stacks): for dilation in self.dilations: with tf.variable_scope("conv_block_{}_{}".format( stack_i, dilation)): block_output = tf.layers.Conv1D( filters=self.config["model"]["filters"]["d-conv"], kernel_size=1)(block_input) block_output = self.prelu(block_output, name='1st-prelu', shared_axes=[1]) block_output = self.gLN(block_output, "first") block_output = self._depthwise_conv1d( block_output, dilation) block_output = self.prelu(block_output, name='2nd-prelu', shared_axes=[1]) block_output = self.gLN(block_output, "second") block_output = tf.layers.Conv1D( filters=self.config["model"]["filters"]["1*1-conv"], kernel_size=1)(block_output) block_input += block_output if self.output_ratio == 1: embed_channel = self.config["model"]["filters"]["ae"] feature_map = encoded_input elif self.output_ratio == 0: embed_channel = self.stft_ch feature_map = magni_mix else: embed_channel = self.concat_channels feature_map = tf.concat([encoded_input, magni_mix], axis=-1) with tf.variable_scope('separator'): s_embed = tf.layers.Dense( embed_channel * self.config["model"]["embed_size"])(block_input) s_embed = tf.reshape(s_embed, [ self.batch_size, -1, embed_channel, self.config["model"]["embed_size"] ]) # Estimate attractor from best combination from anchors v_anchors = tf.get_variable( 'anchors', [self.n_anchor, self.config["model"]["embed_size"]], dtype=tf.float32) c_combs = tf.constant(list( itertools.combinations(range(self.n_anchor), self.n_speaker)), name='combs') s_anchor_sets = tf.gather(v_anchors, c_combs) s_anchor_assignment = tf.einsum('btfe,pce->bptfc', s_embed, s_anchor_sets) s_anchor_assignment = tf.nn.softmax(s_anchor_assignment) s_attractor_sets = tf.einsum('bptfc,btfe->bpce', s_anchor_assignment, s_embed) s_attractor_sets /= tf.expand_dims( tf.reduce_sum(s_anchor_assignment, axis=(2, 3)), -1) sp = tf.matmul(s_attractor_sets, tf.transpose(s_attractor_sets, [0, 1, 3, 2])) diag = tf.fill(sp.shape[:-1], float("-inf")) sp = tf.linalg.set_diag(sp, diag) s_in_set_similarities = tf.reduce_max(sp, axis=(-1, -2)) s_subset_choice = tf.argmin(s_in_set_similarities, axis=1) s_subset_choice_nd = tf.transpose( tf.stack([ tf.range(self.batch_size, dtype=tf.int64), s_subset_choice ])) s_attractors = tf.gather_nd(s_attractor_sets, s_subset_choice_nd) s_logits = tf.einsum('btfe,bce->bctf', s_embed, s_attractors) output_code = s_logits * tf.expand_dims(feature_map, 1) with tf.variable_scope("decoder"): conv_out = pred_istfts = 0 if self.output_ratio != 0: output_frame = tf.layers.Dense( self.config["model"]["kernel_size"]["ae"])(output_code[ ..., :self.config["model"]["filters"]["ae"]]) conv_out = tf.signal.overlap_and_add(signal=output_frame, frame_step=self.fft_hop) if self.output_ratio != 1: phase_mix_expand = tf.expand_dims(phase_mix, 1) pred_stfts = tf.complex( tf.cos(phase_mix_expand) * output_code[..., -self.stft_ch:], tf.sin(phase_mix_expand) * output_code[..., -self.stft_ch:]) pred_istfts = tf.signal.inverse_stft( pred_stfts, frame_length=self.fft_len, frame_step=self.fft_hop, fft_length=self.fft_len, window_fn=tf.signal.inverse_stft_window_fn( self.fft_hop, forward_window_fn=self.fft_wnd)) self.data_out = conv_out * self.output_ratio + pred_istfts * ( 1 - self.output_ratio) self.loss, self.pred_output, self.sdr, self.perm_idxs = loss.pit_loss( self.audios, self.data_out, self.config, self.batch_size, self.n_speaker, self.n_output) ### fixed loss not implemented yet !!!!!! ### self.loss_fix, self.pred_output_fix, self.sdr_fix, self.perm_idxs_fix = loss.pit_loss( self.audios, self.data_out, self.config, self.batch_size, self.n_speaker, self.n_output)
cluster_labels = tf.Variable(tf.zeros([num_samples], dtype=tf.int64)) # Define initial centroids initial_centroids = np.array( [dataset[np.random.choice(len(dataset))] for _ in range(num_clusters)]) centroids = tf.Variable(initial_centroids) # Do some reshape to apply subtraction expanded_centroids = tf.reshape(tf.tile(centroids, [num_samples, 1]), [num_samples, num_clusters, dimensions]) expanded_points = tf.reshape(tf.tile(data_points, [1, num_clusters]), [num_samples, num_clusters, dimensions]) # Calculate distance distances = tf.reduce_sum(tf.square(expanded_points - expanded_centroids), axis=2) # Assign a cluster to each point assignments = tf.argmin(distances, 1) # Update new cluster def data_group_avg(assignments, data): sum_total = tf.unsorted_segment_sum(data, assignments, 3) num_total = tf.unsorted_segment_sum(tf.ones_like(data), assignments, 3) avg_by_group = sum_total / num_total return avg_by_group means = data_group_avg(assignments, data_points) update = tf.group(centroids.assign(means), cluster_labels.assign(assignments)) # Calculate objective
def _tower_som(self): """ Build a single SOM tower on the TensorFlow graph """ # Randomly initialized weights for all neurons, stored together # as a matrix Variable of shape [num_neurons, input_dims] with tf.name_scope('Weights'): # Each tower will get its own copy of the weights variable. Since the towers are constructed sequentially, # the handle to the Tensors will be different for each tower even if we reference "self" self._weights = tf.get_variable( name='weights', shape=[self._m * self._n, self._dim], initializer=tf.random_uniform_initializer(maxval=1)) with tf.name_scope('summaries'): # All summary ops are added to a list and then the merge() function is called at the end of # this method mean = tf.reduce_mean(self._weights) self._summary_list.append(tf.summary.scalar('mean', mean)) with tf.name_scope('stdev'): stdev = tf.sqrt( tf.reduce_mean( tf.squared_difference(self._weights, mean))) self._summary_list.append(tf.summary.scalar('stdev', stdev)) self._summary_list.append( tf.summary.scalar('max', tf.reduce_max(self._weights))) self._summary_list.append( tf.summary.scalar('min', tf.reduce_min(self._weights))) self._summary_list.append( tf.summary.histogram('histogram', self._weights)) # Matrix of size [m*n, 2] for SOM grid locations of neurons. # Maps an index to an (x,y) coordinate of a neuron in the map for calculating the neighborhood distance self._location_vects = tf.constant(np.array( list(self._neuron_locations())), name='Location_Vectors') with tf.name_scope('Input'): self._input = tf.identity(self._input_tensor) # Start by computing the best matching units / winning units for each input vector in the batch. # Basically calculates the Euclidean distance between every # neuron's weight vector and the inputs, and returns the index of the neurons which give the least value # Since we are doing batch processing of the input, we need to calculate a BMU for each of the individual # inputs in the batch. Will have the shape [batch_size] # Oh also any time we call expand_dims it's almost always so we can make TF broadcast stuff properly with tf.name_scope('BMU_Indices'): # Distance between weights and the input vector # Note we are reducing along 2nd axis so we end up with a tensor of [batch_size, num_neurons] # corresponding to the distance between a particular input and each neuron in the map # Also note we are getting the squared distance because there's no point calling sqrt or tf.norm # if we're just doing a strict comparison squared_distance = tf.reduce_sum( tf.pow( tf.subtract(tf.expand_dims(self._weights, axis=0), tf.expand_dims(self._input, axis=1)), 2), 2) # Get the index of the minimum distance for each input item, shape will be [batch_size], bmu_indices = tf.argmin(squared_distance, axis=1) # This will extract the location of the BMU in the map for each input based on the BMU's indices with tf.name_scope('BMU_Locations'): # Using tf.gather we can use `bmu_indices` to index the location vectors directly bmu_locs = tf.reshape(tf.gather(self._location_vects, bmu_indices), [-1, 2]) with tf.name_scope('Learning_Rate'): # With each epoch, the initial sigma value decreases linearly radius = tf.subtract( self._initial_radius, tf.multiply( self._epoch, tf.divide( tf.cast(tf.subtract(self._initial_radius, 1), tf.float32), tf.cast(tf.subtract(self._max_epochs, 1), tf.float32)))) alpha = tf.multiply( self._initial_learning_rate, tf.subtract( 1.0, tf.divide(tf.cast(self._epoch, tf.float32), tf.cast(self._max_epochs, tf.float32)))) # Construct the op that will generate a matrix with learning rates for all neurons and all inputs, # based on iteration number and location to BMU # Start by getting the squared difference between each BMU location and every other unit in the map # bmu_locs is [batch_size, 2], i.e. the coordinates of the BMU for each input vector. # location vects shape should be [1, num_neurons, 2] # bmu_locs should be [batch_size, 1, 2] # Output needs to be [batch_size, num_neurons], i.e. a row vector of distances for each input item bmu_distance_squares = tf.reduce_sum( tf.pow( tf.subtract(tf.expand_dims(self._location_vects, axis=0), tf.expand_dims(bmu_locs, axis=1)), 2), 2) # Using the distances between each BMU, construct the Gaussian neighborhood function. # Basically, neurons which are close to the winner will move more than those further away. # The radius tensor decreases the width of the Gaussian over time, so early in training more # neurons will be affected by the winner and by the end of training only the winner will move. # This tensor will be of shape [batch_size, num_neurons] as well and will be the value multiplied to # each neuron based on its distance from the BMU for each input vector neighbourhood_func = tf.exp( tf.divide( tf.negative(tf.cast(bmu_distance_squares, "float32")), tf.multiply( tf.square(tf.multiply(radius, self._std_coeff)), 2))) # Finally multiply by the learning rate to decrease overall neuron movement over time learning_rate_op = tf.multiply(neighbourhood_func, alpha) # The batch formula for SOMs multiplies a neuron's neighborhood by all of the input vectors in the batch, # then divides that by just the sum of the neighborhood function for each of the inputs. # We are writing this in a way that performs that operation for each of the neurons in the map. with tf.name_scope('Update_Weights'): # The numerator needs to be shaped [num_neurons, dimensions] to represent the new weights # for each of the neurons. At this point, the learning rate tensor will be # shaped [batch_size, neurons]. # The end result is that, for each neuron in the network, we use the learning # rate between it and each of the input vectors, to calculate a new set of weights. numerator = tf.reduce_sum(tf.multiply( tf.expand_dims(learning_rate_op, axis=-1), tf.expand_dims(self._input, axis=1)), axis=0) # The denominator is just the sum of the neighborhood functions for each neuron, so we get the sum # along axis 1 giving us an output shape of [num_neurons]. We then expand the dims so we can # broadcast for the division op. Again we transpose the learning rate tensor so it's # [num_neurons, batch_size] representing the learning rate of each neuron for each input vector denominator = tf.expand_dims( tf.reduce_sum(learning_rate_op, axis=0) + float(1e-12), axis=-1) # We on;y really care about summaries from one of the tower SOMs, so assign the merge op to # the last tower we make. Otherwise there's way too many on Tensorboard. self._merged = tf.summary.merge(self._summary_list) # With multi-gpu training we collect the results and do the weight assignment on the CPU return numerator, denominator
def _apply(loss_fn: typing.Callable[..., tf.Tensor], reference: tf.Tensor, estimate: tf.Tensor, allow_repeated: bool, enable: bool) -> typing.Any: """Return permutation invariant loss. Note that loss_fn must in general handle an arbitrary number of sources, since this function may expand in that dimention to get losses on all reference-estimate pairs. Args: loss_fn: function with the following signature: Args reference [batch, source', ...] tensor estimate [batch, source', ...] tensor Returns A [batch, source'] tensor of dtype=tf.float32 reference: [batch, source, ...] tensor. estimate: [batch, source, ...] tensor. allow_repeated: If true, allow the same estimate to be used to match multiple references. enable: If False, apply the loss function in fixed order and return its value and the unpermuted estimates. Returns: loss, A [batch, source] tensor of dtype=tf.float32 permuted_estimate, A tensor like estimate. """ if not enable: return loss_fn(reference, estimate), estimate assert reference.shape[:2] == estimate.shape[:2] batch = reference.shape[0] source = reference.shape[1] # Replicate estimate on axis 1 # estimate.shape will be (batch, source * source, ...) multiples = np.ones_like(estimate.shape) multiples[1] = source multiples = tf.convert_to_tensor(multiples) estimate_tiled = tf.tile(estimate, multiples) # Replicate reference on new axis 2, then combine axes [1, 2]. # reference.shape will be (batch, source * source, ...) reference_tiled = tf.expand_dims(reference, 2) multiples = np.ones_like(reference_tiled.shape, dtype=np.int32) multiples[2] = source multiples = tf.convert_to_tensor(multiples) reference_tiled = tf.tile(reference_tiled, multiples) reference_tiled = tf.reshape(reference_tiled, estimate_tiled.shape) # Compute the loss matrix. # loss_matrix.shape will be (batch, source, source). # Axis 1 is the estimate. Axis 2 is the reference. loss_matrix = tf.reshape(loss_fn(reference_tiled, estimate_tiled), [batch, source, source]) # Get the best permutation. # permutation.shape will be (batch, source, 1) if allow_repeated: permutation = tf.argmin(loss_matrix, axis=2, output_type=tf.int32) permutation = tf.expand_dims(permutation, 2) else: permutation = _resolve_permutation(loss_matrix) assert permutation.shape == (batch, source, 1), permutation.shape # Permute the estimates according to the best permutation. estimate_permuted = tf.gather_nd(estimate, permutation, batch_dims=1) loss_permuted = tf.gather_nd(loss_matrix, permutation, batch_dims=2) return loss_permuted, estimate_permuted
0, ] size = [ num_clusters, ] size[0] = num_clusters centroid_indices = tf.slice(random_indices, begin, size) centroids = tf.Variable(tf.gather(vector_values, centroid_indices)) expanded_vectors = tf.expand_dims(vectors, 0) expanded_centroids = tf.expand_dims(centroids, 1) vectors_subtration = tf.subtract(expanded_vectors, expanded_centroids) euclidean_distances = tf.reduce_sum(tf.square(vectors_subtration), 2) assignments = tf.to_int32(tf.argmin(euclidean_distances, 0)) partitions = [0, 0, 1, 1, 0] num_partitions = 2 data = [10, 20, 30, 40, 50] #outputs[0] = [10, 20, 50] #outputs[1] = [30, 40] partitions = tf.dynamic_partition(vectors, assignments, num_clusters) update_centroids = tf.concat(0, [ tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitions ]) init_op = tf.initialize_all_variables() sess = tf.Session()
def k(self): """Picks the index of the closest centroid for every embedding.""" k = tf.argmin(self.z_dist_flat, axis=-1, name="k") tf.summary.histogram("clusters", k) return k
def execute(configs): tf.reset_default_graph() random.seed(configs["random_state"]) nprand.seed(configs["random_state"]) DECAY_FACTOR = 0.80 decay_steps = 1000 latent_dim = configs["latent_dim"] som_dim = [configs["som_dim"], configs["som_dim"]] num_classes = 10 global_step = tf.Variable(0, trainable=False, name="global_step") embeddings = tf.get_variable( "embeddings", som_dim + [latent_dim], initializer=tf.truncated_normal_initializer(stddev=0.05)) x = tf.placeholder(tf.float32, shape=[None, 784]) x_image = tf.reshape(x, [-1, 28, 28, 1]) y = tf.placeholder(tf.int32, shape=[None]) train = tf.placeholder(tf.bool, name="train") batch_size = tf.shape(x)[0] with tf.variable_scope("encoder"): h_conv1 = tf.nn.relu( conv2d(x_image, [4, 4, 1, configs["conv_size"]], "conv1")) h_pool1 = max_pool_2x2(h_conv1) h_conv2 = tf.nn.relu( conv2d(h_pool1, [4, 4, configs["conv_size"], configs["conv_size"]], "conv2")) h_pool2 = max_pool_2x2(h_conv2) flat_size = 7 * 7 * configs["conv_size"] h_flat = tf.reshape(h_pool2, [batch_size, flat_size]) # h_flat_norm = tf.layers.batch_normalization(h_flat, training=train, renorm=True) z_e = tf.keras.layers.Dense(latent_dim)(h_flat) z_dist = tf.squared_difference(tf.expand_dims(tf.expand_dims(z_e, 1), 1), tf.expand_dims(embeddings, 0)) z_dist_red = tf.reduce_sum(z_dist, axis=-1) z_dist_flat = tf.reshape(z_dist_red, [batch_size, -1]) k = tf.argmin(z_dist_flat, axis=-1) k_1 = k // som_dim[1] k_2 = k % som_dim[1] k_stacked = tf.stack([k_1, k_2], axis=1) z_q = tf.gather_nd(embeddings, k_stacked) def decoder(z_tensor): with tf.variable_scope("decoder", reuse=tf.AUTO_REUSE): h_flat_dec = tf.keras.layers.Dense(flat_size)(z_tensor) h_reshaped = tf.reshape(h_flat_dec, tf.shape(h_pool2)) h_unpool1 = tf.keras.layers.UpSampling2D((2, 2))(h_reshaped) h_deconv1 = tf.nn.relu( conv2d(h_unpool1, [4, 4, configs["conv_size"], configs["conv_size"]], "deconv1")) h_unpool2 = tf.keras.layers.UpSampling2D((2, 2))(h_deconv1) h_deconv2 = tf.nn.sigmoid( conv2d(h_unpool2, [4, 4, configs["conv_size"], 1], "deconv2")) x_hat = h_deconv2 return x_hat x_hat = decoder(z_q) beta = 0.25 loss_rec_mse = tf.losses.mean_squared_error(x_image, x_hat) loss_vq = tf.reduce_mean(tf.squared_difference(tf.stop_gradient(z_e), z_q)) loss_commit = tf.reduce_mean( tf.squared_difference(z_e, tf.stop_gradient(z_q))) loss = loss_rec_mse + loss_vq + beta * loss_commit learning_rate = tf.placeholder_with_default(0.001, []) lr_decay = tf.train.exponential_decay(learning_rate, global_step, decay_steps, DECAY_FACTOR, staircase=True) decoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "decoder") decoder_grads = list(zip(tf.gradients(loss, decoder_vars), decoder_vars)) encoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "encoder") grad_z = tf.gradients(loss_rec_mse, z_q) encoder_grads = [(tf.gradients(z_e, var, grad_z)[0] + beta * tf.gradients(loss_commit, var)[0], var) for var in encoder_vars] embed_grads = list(zip(tf.gradients(loss_vq, embeddings), [embeddings])) optimizer = tf.train.AdamOptimizer(lr_decay) train_step = optimizer.apply_gradients(decoder_grads + encoder_grads + embed_grads) BATCH_SIZE = configs["batch_size"] EPOCHS = configs["n_epochs"] NUM_TESTS = 1 for data_set in configs["DATASETS"]: if data_set == "mnist": ds_train, ds_test = tf.keras.datasets.mnist.load_data() elif data_set == "fashion": ds_train, ds_test = tf.keras.datasets.fashion_mnist.load_data() data_train = ds_train[0] data_train = np.reshape( data_train, (data_train.shape[0], data_train.shape[1] * data_train.shape[2])) data_test = ds_test[0] data_test = np.reshape( data_test, (data_test.shape[0], data_test.shape[1] * data_test.shape[2])) labels_test = ds_test[1] labels_train = ds_train[1] aggregated_mses = [] aggregated_NMIs = [] aggregated_purities = [] for _ in range(NUM_TESTS): with tf.Session() as sess: sess.run(tf.global_variables_initializer()) indices_unsup = np.arange(data_train.shape[0]) with tqdm(total=EPOCHS * (data_train.shape[0] // BATCH_SIZE)) as pbar: for epoch in range(EPOCHS): np.random.shuffle(indices_unsup) test_mse = sess.run(loss_rec_mse, feed_dict={ x: data_test[:100], train: False }) for i in range(indices_unsup.shape[0] // BATCH_SIZE): batch_data = data_train[ indices_unsup[BATCH_SIZE * i:BATCH_SIZE * (i + 1)]] if i % 100 == 0: train_mse, train_commit, train_loss = sess.run( [loss_rec_mse, loss_commit, loss], feed_dict={ x: batch_data, train: False }) train_step.run(feed_dict={ x: batch_data, train: True }) pbar.set_postfix(epoch=epoch, train_mse=train_mse, train_commit=train_commit, test_mse=test_mse, refresh=False) pbar.update(1) test_k_all = [] test_x_hat_all = [] for i in trange(data_test.shape[0] // 100): batch_data = data_test[100 * i:100 * (i + 1)] test_k_all.extend( sess.run(k, feed_dict={ x: batch_data, train: False })) test_x_hat_all.extend( sess.run(x_hat, feed_dict={ x: batch_data, train: False })) test_x_hat_all = np.array(test_x_hat_all) test_k_all = np.array(test_k_all) aggregated_mses.append( mean_squared_error(data_test, np.reshape(test_x_hat_all, [10000, 784]))) aggregated_NMIs.append( normalized_mutual_info_score(test_k_all, labels_test[:len(test_k_all)])) aggregated_purities.append( cluster_purity(test_k_all, labels_test[:len(test_k_all)])) print("Results for {}".format(data_set)) print("Test MSE: {} +- {}\nTest NMI: {} +- {}\nTest purity: {} +- {}". format(np.mean(aggregated_mses), np.std(aggregated_mses) / np.sqrt(NUM_TESTS), np.mean(aggregated_NMIs), np.std(aggregated_NMIs) / np.sqrt(NUM_TESTS), np.mean(aggregated_purities), np.std(aggregated_purities) / np.sqrt(NUM_TESTS))) if not configs["debug_mode"]: with open( "../results/vqvae_{}_{}_somdim_{}.tsv".format( data_set, configs["random_state"], configs["som_dim"]), 'w') as fp: csv_fp = csv.writer(fp, delimiter='\t') csv_fp.writerow(["model", "mse", "nmi", "purity"]) csv_fp.writerow([ "vqvae", str(aggregated_mses[0]), str(aggregated_NMIs[0]), str(aggregated_purities[0]) ])