def sample(prob, alias, rng_state): """Given data for a Walker alias sampler, perform sampling. Parameters ---------- prob: array of shape (n_items_for_sampling,) The probabilities of selecting an element or its alias alias: array of shape (n_items_for_sampling,) The alternate choice if the element is not to be selected. rng_state: array of int64, shape (3,) The internal state of the rng Returns ------- The index of the sampled item. """ k = tau_rand_int(rng_state) % prob.shape[0] u = tau_rand(rng_state) if u < prob[k]: return k else: return alias[k]
def select_side(hyperplane, offset, point, rng_state): margin = offset for d in range(point.shape[0]): margin += hyperplane[d] * point[d] if abs(margin) < EPS: side = tau_rand_int(rng_state) % 2 if side == 0: return 0 else: return 1 elif margin > 0: return 0 else: return 1
def sparse_select_side(hyperplane, offset, point_inds, point_data, rng_state): margin = offset hyperplane_inds = arr_unique(hyperplane[0]) hyperplane_data = hyperplane[1, :hyperplane_inds.shape[0]] aux_inds, aux_data = sparse_mul(hyperplane_inds, hyperplane_data, point_inds, point_data) for d in range(aux_data.shape[0]): margin += aux_data[d] if margin == 0: side = abs(tau_rand_int(rng_state)) % 2 if side == 0: return 0 else: return 1 elif margin > 0: return 0 else: return 1
def _optimize_layout_aligned_euclidean_single_epoch( head_embeddings, tail_embeddings, heads, tails, epochs_per_sample, a, b, regularisation_weights, relations, rng_state, gamma, lambda_, dim, move_other, alpha, epochs_per_negative_sample, epoch_of_next_negative_sample, epoch_of_next_sample, n, ): n_embeddings = len(heads) window_size = (relations.shape[1] - 1) // 2 max_n_edges = 0 for e_p_s in epochs_per_sample: if e_p_s.shape[0] >= max_n_edges: max_n_edges = e_p_s.shape[0] embedding_order = np.arange(n_embeddings).astype(np.int32) np.random.shuffle(embedding_order) for i in range(max_n_edges): for m in embedding_order: if i < epoch_of_next_sample[m].shape[0] and epoch_of_next_sample[ m][i] <= n: j = heads[m][i] k = tails[m][i] current = head_embeddings[m][j] other = tail_embeddings[m][k] dist_squared = rdist(current, other) if dist_squared > 0.0: grad_coeff = -2.0 * a * b * pow(dist_squared, b - 1.0) grad_coeff /= a * pow(dist_squared, b) + 1.0 else: grad_coeff = 0.0 for d in range(dim): grad_d = clip(grad_coeff * (current[d] - other[d])) for offset in range(-window_size, window_size): neighbor_m = m + offset if (neighbor_m >= 0 and neighbor_m < n_embeddings and offset != 0): identified_index = relations[m, offset + window_size, j] if identified_index >= 0: grad_d -= clip( (lambda_ * np.exp(-(np.abs(offset) - 1))) * regularisation_weights[m, offset + window_size, j] * (current[d] - head_embeddings[neighbor_m][ identified_index, d])) current[d] += clip(grad_d) * alpha if move_other: other_grad_d = clip(grad_coeff * (other[d] - current[d])) for offset in range(-window_size, window_size): neighbor_m = m + offset if (neighbor_m >= 0 and neighbor_m < n_embeddings and offset != 0): identified_index = relations[m, offset + window_size, k] if identified_index >= 0: grad_d -= clip( (lambda_ * np.exp(-(np.abs(offset) - 1))) * regularisation_weights[m, offset + window_size, k] * (other[d] - head_embeddings[neighbor_m] [identified_index, d])) other[d] += clip(other_grad_d) * alpha epoch_of_next_sample[m][i] += epochs_per_sample[m][i] if epochs_per_negative_sample[m][i] > 0: n_neg_samples = int( (n - epoch_of_next_negative_sample[m][i]) / epochs_per_negative_sample[m][i]) else: n_neg_samples = 0 for p in range(n_neg_samples): k = tau_rand_int(rng_state) % tail_embeddings[m].shape[0] other = tail_embeddings[m][k] dist_squared = rdist(current, other) if dist_squared > 0.0: grad_coeff = 2.0 * gamma * b grad_coeff /= (0.001 + dist_squared) * ( a * pow(dist_squared, b) + 1) elif j == k: continue else: grad_coeff = 0.0 for d in range(dim): if grad_coeff > 0.0: grad_d = clip(grad_coeff * (current[d] - other[d])) else: grad_d = 4.0 for offset in range(-window_size, window_size): neighbor_m = m + offset if (neighbor_m >= 0 and neighbor_m < n_embeddings and offset != 0): identified_index = relations[m, offset + window_size, j] if identified_index >= 0: grad_d -= clip( (lambda_ * np.exp(-(np.abs(offset) - 1))) * regularisation_weights[m, offset + window_size, j] * (current[d] - head_embeddings[neighbor_m][ identified_index, d])) current[d] += clip(grad_d) * alpha epoch_of_next_negative_sample[m][i] += ( n_neg_samples * epochs_per_negative_sample[m][i])
def _optimize_layout_euclidean_single_epoch(head_embedding, tail_embedding, head, tail, n_vertices, epochs_per_sample, a, b, rng_state, gamma, dim, move_other, push_tail, alpha, epochs_per_negative_sample, epoch_of_next_negative_sample, epoch_of_next_sample, negative_sample_rate, n, densmap_flag, dens_phi_sum, dens_re_sum, dens_re_cov, dens_re_std, dens_re_mean, dens_lambda, dens_R, dens_mu, dens_mu_tot, log_samples=False, log_losses=None) -> object: # set up variables for logging samples and loss pos_samples = np.zeros_like(epochs_per_sample) # twice negative sample rate, because see definition of n_neg_samples neg_samples = -np.ones((len(epochs_per_sample), negative_sample_rate * 2)) loss_a = 0 loss_r = 0 for i in numba.prange(epochs_per_sample.shape[0]): if epoch_of_next_sample[i] <= n: j = head[i] k = tail[i] if log_samples or log_losses == "after": pos_samples[i] = 1 current = head_embedding[j] other = tail_embedding[k] dist_squared = rdist(current, other) if densmap_flag: phi = 1.0 / (1.0 + a * pow(dist_squared, b)) dphi_term = (a * b * pow(dist_squared, b - 1) / (1.0 + a * pow(dist_squared, b))) q_jk = phi / dens_phi_sum[k] q_kj = phi / dens_phi_sum[j] drk = q_jk * ((1.0 - b * (1 - phi)) / np.exp(dens_re_sum[k]) + dphi_term) drj = q_kj * ((1.0 - b * (1 - phi)) / np.exp(dens_re_sum[j]) + dphi_term) re_std_sq = dens_re_std * dens_re_std weight_k = (dens_R[k] - dens_re_cov * (dens_re_sum[k] - dens_re_mean) / re_std_sq) weight_j = (dens_R[j] - dens_re_cov * (dens_re_sum[j] - dens_re_mean) / re_std_sq) grad_cor_coeff = (dens_lambda * dens_mu_tot * (weight_k * drk + weight_j * drj) / (dens_mu[i] * dens_re_std) / n_vertices) if dist_squared > 0.0: grad_coeff = -2.0 * a * b * pow(dist_squared, b - 1.0) grad_coeff /= a * pow(dist_squared, b) + 1.0 else: grad_coeff = 0.0 for d in range(dim): grad_d = clip(grad_coeff * (current[d] - other[d])) if densmap_flag: grad_d += clip(2 * grad_cor_coeff * (current[d] - other[d])) current[d] += grad_d * alpha if move_other: other[d] += -grad_d * alpha if log_losses == "during": loss_a += my_log( low_dim_sim_dist(dist_squared, a, b, squared=True)) epoch_of_next_sample[i] += epochs_per_sample[i] n_neg_samples = int((n - epoch_of_next_negative_sample[i]) / epochs_per_negative_sample[i]) for j, p in enumerate(range(n_neg_samples)): k = tau_rand_int(rng_state) % n_vertices if log_samples or log_losses == "after": neg_samples[i, j] = k other = tail_embedding[k] dist_squared = rdist(current, other) if dist_squared > 0.0: grad_coeff = 2.0 * gamma * b grad_coeff /= (0.001 + dist_squared) * ( a * pow(dist_squared, b) + 1) elif j == k: continue else: grad_coeff = 0.0 for d in range(dim): if grad_coeff > 0.0: grad_d = clip(grad_coeff * (current[d] - other[d])) else: grad_d = 4.0 current[d] += grad_d * alpha if push_tail: other[d] += -grad_d * alpha if log_losses == "during": loss_r += my_log( 1.0 - low_dim_sim_dist(dist_squared, a, b, squared=True)) epoch_of_next_negative_sample[i] += (n_neg_samples * epochs_per_negative_sample[i]) return pos_samples, neg_samples, -loss_a, -loss_r
def optimize_layout_inverse( head_embedding, tail_embedding, head, tail, weight, sigmas, rhos, n_epochs, n_vertices, epochs_per_sample, a, b, rng_state, gamma=1.0, initial_alpha=1.0, negative_sample_rate=5.0, output_metric=dist.euclidean, output_metric_kwds=(), verbose=False, ): """Improve an embedding using stochastic gradient descent to minimize the fuzzy set cross entropy between the 1-skeletons of the high dimensional and low dimensional fuzzy simplicial sets. In practice this is done by sampling edges based on their membership strength (with the (1-p) terms coming from negative sampling similar to word2vec). Parameters ---------- head_embedding: array of shape (n_samples, n_components) The initial embedding to be improved by SGD. tail_embedding: array of shape (source_samples, n_components) The reference embedding of embedded points. If not embedding new previously unseen points with respect to an existing embedding this is simply the head_embedding (again); otherwise it provides the existing embedding to embed with respect to. head: array of shape (n_1_simplices) The indices of the heads of 1-simplices with non-zero membership. tail: array of shape (n_1_simplices) The indices of the tails of 1-simplices with non-zero membership. weight: array of shape (n_1_simplices) The membership weights of the 1-simplices. n_epochs: int The number of training epochs to use in optimization. n_vertices: int The number of vertices (0-simplices) in the dataset. epochs_per_sample: array of shape (n_1_simplices) A float value of the number of epochs per 1-simplex. 1-simplices with weaker membership strength will have more epochs between being sampled. a: float Parameter of differentiable approximation of right adjoint functor b: float Parameter of differentiable approximation of right adjoint functor rng_state: array of int64, shape (3,) The internal state of the rng gamma: float (optional, default 1.0) Weight to apply to negative samples. initial_alpha: float (optional, default 1.0) Initial learning rate for the SGD. negative_sample_rate: int (optional, default 5) Number of negative samples to use per positive sample. verbose: bool (optional, default False) Whether to report information on the current progress of the algorithm. Returns ------- embedding: array of shape (n_samples, n_components) The optimized embedding. """ dim = head_embedding.shape[1] move_other = head_embedding.shape[0] == tail_embedding.shape[0] alpha = initial_alpha epochs_per_negative_sample = epochs_per_sample / negative_sample_rate epoch_of_next_negative_sample = epochs_per_negative_sample.copy() epoch_of_next_sample = epochs_per_sample.copy() for n in range(n_epochs): for i in range(epochs_per_sample.shape[0]): if epoch_of_next_sample[i] <= n: j = head[i] k = tail[i] current = head_embedding[j] other = tail_embedding[k] dist_output, grad_dist_output = output_metric( current, other, *output_metric_kwds) w_l = weight[i] grad_coeff = -(1 / (w_l * sigmas[k] + 1e-6)) for d in range(dim): grad_d = clip(grad_coeff * grad_dist_output[d]) current[d] += grad_d * alpha if move_other: other[d] += -grad_d * alpha epoch_of_next_sample[i] += epochs_per_sample[i] n_neg_samples = int((n - epoch_of_next_negative_sample[i]) / epochs_per_negative_sample[i]) for p in range(n_neg_samples): k = tau_rand_int(rng_state) % n_vertices other = tail_embedding[k] dist_output, grad_dist_output = output_metric( current, other, *output_metric_kwds) # w_l = 0.0 # for negative samples, the edge does not exist w_h = np.exp(-max(dist_output - rhos[k], 1e-6) / (sigmas[k] + 1e-6)) grad_coeff = -gamma * ((0 - w_h) / ((1 - w_h) * sigmas[k] + 1e-6)) for d in range(dim): grad_d = clip(grad_coeff * grad_dist_output[d]) current[d] += grad_d * alpha epoch_of_next_negative_sample[i] += ( n_neg_samples * epochs_per_negative_sample[i]) alpha = initial_alpha * (1.0 - (float(n) / float(n_epochs))) if verbose and n % int(n_epochs / 10) == 0: print("\tcompleted ", n, " / ", n_epochs, "epochs") return head_embedding
def _optimize_layout_euclidean_single_epoch( head_embedding, tail_embedding, head, tail, n_vertices, epochs_per_sample, a, b, rng_state, gamma, dim, move_other, alpha, epochs_per_negative_sample, epoch_of_next_negative_sample, epoch_of_next_sample, n, ): for i in numba.prange(epochs_per_sample.shape[0]): if epoch_of_next_sample[i] <= n: j = head[i] k = tail[i] current = head_embedding[j] other = tail_embedding[k] dist_squared = rdist(current, other) if dist_squared > 0.0: grad_coeff = -2.0 * a * b * pow(dist_squared, b - 1.0) grad_coeff /= a * pow(dist_squared, b) + 1.0 else: grad_coeff = 0.0 for d in range(dim): grad_d = clip(grad_coeff * (current[d] - other[d])) current[d] += grad_d * alpha if move_other: other[d] += -grad_d * alpha epoch_of_next_sample[i] += epochs_per_sample[i] n_neg_samples = int((n - epoch_of_next_negative_sample[i]) / epochs_per_negative_sample[i]) for p in range(n_neg_samples): k = tau_rand_int(rng_state) % n_vertices other = tail_embedding[k] dist_squared = rdist(current, other) if dist_squared > 0.0: grad_coeff = 2.0 * gamma * b grad_coeff /= (0.001 + dist_squared) * ( a * pow(dist_squared, b) + 1) elif j == k: continue else: grad_coeff = 0.0 for d in range(dim): if grad_coeff > 0.0: grad_d = clip(grad_coeff * (current[d] - other[d])) else: grad_d = 4.0 current[d] += grad_d * alpha epoch_of_next_negative_sample[i] += (n_neg_samples * epochs_per_negative_sample[i])
def _optimize_layout_euclidean_single_epoch( head_embedding, tail_embedding, head, tail, n_vertices, epochs_per_sample, a, b, rng_state, gamma, dim, move_other, alpha, epochs_per_negative_sample, epoch_of_next_negative_sample, epoch_of_next_sample, n, densmap_flag, dens_phi_sum, dens_re_sum, dens_re_cov, dens_re_std, dens_re_mean, dens_lambda, dens_R, dens_mu, dens_mu_tot, ): for i in numba.prange(epochs_per_sample.shape[0]): if epoch_of_next_sample[i] <= n: j = head[i] k = tail[i] current = head_embedding[j] other = tail_embedding[k] dist_squared = rdist(current, other) if densmap_flag: phi = 1.0 / (1.0 + a * pow(dist_squared, b)) dphi_term = (a * b * pow(dist_squared, b - 1) / (1.0 + a * pow(dist_squared, b))) q_jk = phi / dens_phi_sum[k] q_kj = phi / dens_phi_sum[j] drk = q_jk * ((1.0 - b * (1 - phi)) / np.exp(dens_re_sum[k]) + dphi_term) drj = q_kj * ((1.0 - b * (1 - phi)) / np.exp(dens_re_sum[j]) + dphi_term) re_std_sq = dens_re_std * dens_re_std weight_k = (dens_R[k] - dens_re_cov * (dens_re_sum[k] - dens_re_mean) / re_std_sq) weight_j = (dens_R[j] - dens_re_cov * (dens_re_sum[j] - dens_re_mean) / re_std_sq) grad_cor_coeff = (dens_lambda * dens_mu_tot * (weight_k * drk + weight_j * drj) / (dens_mu[i] * dens_re_std) / n_vertices) if dist_squared > 0.0: grad_coeff = -2.0 * a * b * pow(dist_squared, b - 1.0) grad_coeff /= a * pow(dist_squared, b) + 1.0 else: grad_coeff = 0.0 for d in range(dim): grad_d = clip(grad_coeff * (current[d] - other[d])) if densmap_flag: grad_d += clip(2 * grad_cor_coeff * (current[d] - other[d])) current[d] += grad_d * alpha if move_other: other[d] += -grad_d * alpha epoch_of_next_sample[i] += epochs_per_sample[i] n_neg_samples = int((n - epoch_of_next_negative_sample[i]) / epochs_per_negative_sample[i]) for p in range(n_neg_samples): k = tau_rand_int(rng_state) % n_vertices other = tail_embedding[k] dist_squared = rdist(current, other) if dist_squared > 0.0: grad_coeff = 2.0 * gamma * b grad_coeff /= (0.001 + dist_squared) * ( a * pow(dist_squared, b) + 1) elif j == k: continue else: grad_coeff = 0.0 for d in range(dim): if grad_coeff > 0.0: grad_d = clip(grad_coeff * (current[d] - other[d])) else: grad_d = 4.0 current[d] += grad_d * alpha epoch_of_next_negative_sample[i] += (n_neg_samples * epochs_per_negative_sample[i])
def angular_random_projection_split(data, indices, rng_state): """Given a set of ``indices`` for data points from ``data``, create a random hyperplane to split the data, returning two arrays indices that fall on either side of the hyperplane. This is the basis for a random projection tree, which simply uses this splitting recursively. This particular split uses cosine distance to determine the hyperplane and which side each data sample falls on. Parameters ---------- data: array of shape (n_samples, n_features) The original data to be split indices: array of shape (tree_node_size,) The indices of the elements in the ``data`` array that are to be split in the current operation. rng_state: array of int64, shape (3,) The internal state of the rng Returns ------- indices_left: array The elements of ``indices`` that fall on the "left" side of the random hyperplane. indices_right: array The elements of ``indices`` that fall on the "left" side of the random hyperplane. """ dim = data.shape[1] # Select two random points, set the hyperplane between them left_index = tau_rand_int(rng_state) % indices.shape[0] right_index = tau_rand_int(rng_state) % indices.shape[0] right_index += left_index == right_index right_index = right_index % indices.shape[0] left = indices[left_index] right = indices[right_index] left_norm = norm(data[left]) right_norm = norm(data[right]) if abs(left_norm) < EPS: left_norm = 1.0 if abs(right_norm) < EPS: right_norm = 1.0 # Compute the normal vector to the hyperplane (the vector between # the two points) hyperplane_vector = np.empty(dim, dtype=np.float32) for d in range(dim): hyperplane_vector[d] = (data[left, d] / left_norm) - (data[right, d] / right_norm) hyperplane_norm = norm(hyperplane_vector) if abs(hyperplane_norm) < EPS: hyperplane_norm = 1.0 for d in range(dim): hyperplane_vector[d] = hyperplane_vector[d] / hyperplane_norm # For each point compute the margin (project into normal vector) # If we are on lower side of the hyperplane put in one pile, otherwise # put it in the other pile (if we hit hyperplane on the nose, flip a coin) n_left = 0 n_right = 0 side = np.empty(indices.shape[0], np.int8) for i in range(indices.shape[0]): margin = 0.0 for d in range(dim): margin += hyperplane_vector[d] * data[indices[i], d] if abs(margin) < EPS: side[i] = tau_rand_int(rng_state) % 2 if side[i] == 0: n_left += 1 else: n_right += 1 elif margin > 0: side[i] = 0 n_left += 1 else: side[i] = 1 n_right += 1 # Now that we have the counts allocate arrays indices_left = np.empty(n_left, dtype=np.int64) indices_right = np.empty(n_right, dtype=np.int64) # Populate the arrays with indices according to which side they fell on n_left = 0 n_right = 0 for i in range(side.shape[0]): if side[i] == 0: indices_left[n_left] = indices[i] n_left += 1 else: indices_right[n_right] = indices[i] n_right += 1 return indices_left, indices_right, hyperplane_vector, None
def sparse_euclidean_random_projection_split(inds, indptr, data, indices, rng_state): """Given a set of ``indices`` for data points from a sparse data set presented in csr sparse format as inds, indptr and data, create a random hyperplane to split the data, returning two arrays indices that fall on either side of the hyperplane. This is the basis for a random projection tree, which simply uses this splitting recursively. This particular split uses cosine distance to determine the hyperplane and which side each data sample falls on. Parameters ---------- inds: array CSR format index array of the matrix indptr: array CSR format index pointer array of the matrix data: array CSR format data array of the matrix indices: array of shape (tree_node_size,) The indices of the elements in the ``data`` array that are to be split in the current operation. rng_state: array of int64, shape (3,) The internal state of the rng Returns ------- indices_left: array The elements of ``indices`` that fall on the "left" side of the random hyperplane. indices_right: array The elements of ``indices`` that fall on the "left" side of the random hyperplane. """ # Select two random points, set the hyperplane between them left_index = tau_rand_int(rng_state) % indices.shape[0] right_index = tau_rand_int(rng_state) % indices.shape[0] right_index += left_index == right_index right_index = right_index % indices.shape[0] left = indices[left_index] right = indices[right_index] left_inds = inds[indptr[left]:indptr[left + 1]] left_data = data[indptr[left]:indptr[left + 1]] right_inds = inds[indptr[right]:indptr[right + 1]] right_data = data[indptr[right]:indptr[right + 1]] # Compute the normal vector to the hyperplane (the vector between # the two points) and the offset from the origin hyperplane_offset = 0.0 hyperplane_inds, hyperplane_data = sparse_diff(left_inds, left_data, right_inds, right_data) offset_inds, offset_data = sparse_sum(left_inds, left_data, right_inds, right_data) offset_data = offset_data / 2.0 offset_inds, offset_data = sparse_mul(hyperplane_inds, hyperplane_data, offset_inds, offset_data) for d in range(offset_data.shape[0]): hyperplane_offset -= offset_data[d] # For each point compute the margin (project into normal vector, add offset) # If we are on lower side of the hyperplane put in one pile, otherwise # put it in the other pile (if we hit hyperplane on the nose, flip a coin) n_left = 0 n_right = 0 side = np.empty(indices.shape[0], np.int8) for i in range(indices.shape[0]): margin = hyperplane_offset i_inds = inds[indptr[indices[i]]:indptr[indices[i] + 1]] i_data = data[indptr[indices[i]]:indptr[indices[i] + 1]] mul_inds, mul_data = sparse_mul(hyperplane_inds, hyperplane_data, i_inds, i_data) for d in range(mul_data.shape[0]): margin += mul_data[d] if abs(margin) < EPS: side[i] = tau_rand_int(rng_state) % 2 if side[i] == 0: n_left += 1 else: n_right += 1 elif margin > 0: side[i] = 0 n_left += 1 else: side[i] = 1 n_right += 1 # Now that we have the counts allocate arrays indices_left = np.empty(n_left, dtype=np.int64) indices_right = np.empty(n_right, dtype=np.int64) # Populate the arrays with indices according to which side they fell on n_left = 0 n_right = 0 for i in range(side.shape[0]): if side[i] == 0: indices_left[n_left] = indices[i] n_left += 1 else: indices_right[n_right] = indices[i] n_right += 1 hyperplane = np.vstack((hyperplane_inds, hyperplane_data)) return indices_left, indices_right, hyperplane, hyperplane_offset
def optimize_layout(embedding, positive_head, positive_tail, n_edge_samples, n_vertices, prob, alias, a, b, rng_state, gamma=1.0, initial_alpha=1.0, negative_sample_rate=5, verbose=False): """Improve an embedding using stochastic gradient descent to minimize the fuzzy set cross entropy between the 1-skeletons of the high dimensional and low dimensional fuzzy simplicial sets. In practice this is done by sampling edges based on their membership strength (with the (1-p) terms coming from negative sampling similar to word2vec). Parameters ---------- embedding: array of shape (n_samples, n_components) The initial embedding to be improved by SGD. positive_head: array of shape (n_1_simplices) The indices of the heads of 1-simplices with non-zero membership. positive_tail: array of shape (n_1_simplices) The indices of the tails of 1-simplices with non-zero membership. n_edge_samples: int The total number of edge samples to use in the optimization step. n_vertices: int The number of vertices (0-simplices) in the dataset. prob: array of shape (n_1_simplices) Walker alias sampler data. alias: array of shape (n_1_simplices) Walker alias sampler data a: float Parameter of differentiable approximation of right adjoint functor b: float Parameter of differentiable approximation of right adjoint functor rng_state: array of int64, shape (3,) The internal state of the rng gamma: float (optional, default 1.0) Weight to apply to negative samples. initial_alpha: float (optional, default 1.0) Initial learning rate for the SGD. negative_sample_rate: int (optional, default 5) Number of negative samples to use per positive sample. verbose: bool (optional, default False) Whether to report information on the current progress of the algorithm. Returns ------- embedding: array of shape (n_samples, n_components) The optimized embedding. """ dim = embedding.shape[1] alpha = initial_alpha for i in range(n_edge_samples): if i % negative_sample_rate == 0: is_negative_sample = False else: is_negative_sample = True if is_negative_sample: edge = tau_rand_int(rng_state) % (n_vertices**2) j = edge // n_vertices k = edge % n_vertices else: edge = sample(prob, alias, rng_state) j = positive_head[edge] k = positive_tail[edge] current = embedding[j] other = embedding[k] dist_squared = rdist(current, other) if is_negative_sample: grad_coeff = (2.0 * gamma * b) grad_coeff /= (0.001 + dist_squared) * (a * pow(dist_squared, b) + 1) if not np.isfinite(grad_coeff): grad_coeff = 8.0 else: grad_coeff = (-2.0 * a * b * pow(dist_squared, b - 1.0)) grad_coeff /= (a * pow(dist_squared, b) + 1.0) for d in range(dim): grad_d = clip(grad_coeff * (current[d] - other[d])) current[d] += grad_d * alpha other[d] += -grad_d * alpha if i % 10000 == 0: # alpha = np.exp( # -0.69314718055994529 * ( # (3 * i) / n_edge_samples) ** 2) * initial_alpha alpha = (1.0 - np.sqrt(float(i) / n_edge_samples)) * initial_alpha if alpha < (initial_alpha * 0.000001): alpha = initial_alpha * 0.000001 if verbose and i % int(n_edge_samples / 10) == 0: print("\t", i, " / ", n_edge_samples) return embedding
def _optimize_layout_inverse_single_epoch( epochs_per_sample, epoch_of_next_sample, head, tail, head_embedding, tail_embedding, output_metric, output_metric_kwds, weight, sigmas, dim, alpha, move_other, n, epoch_of_next_negative_sample, epochs_per_negative_sample, rng_state, n_vertices, rhos, gamma, ): for i in range(epochs_per_sample.shape[0]): if epoch_of_next_sample[i] <= n: j = head[i] k = tail[i] current = head_embedding[j] other = tail_embedding[k] dist_output, grad_dist_output = output_metric( current, other, *output_metric_kwds) w_l = weight[i] grad_coeff = -(1 / (w_l * sigmas[k] + 1e-6)) for d in range(dim): grad_d = clip(grad_coeff * grad_dist_output[d]) current[d] += grad_d * alpha if move_other: other[d] += -grad_d * alpha epoch_of_next_sample[i] += epochs_per_sample[i] n_neg_samples = int((n - epoch_of_next_negative_sample[i]) / epochs_per_negative_sample[i]) for p in range(n_neg_samples): k = tau_rand_int(rng_state) % n_vertices other = tail_embedding[k] dist_output, grad_dist_output = output_metric( current, other, *output_metric_kwds) # w_l = 0.0 # for negative samples, the edge does not exist w_h = np.exp(-max(dist_output - rhos[k], 1e-6) / (sigmas[k] + 1e-6)) grad_coeff = -gamma * ((0 - w_h) / ((1 - w_h) * sigmas[k] + 1e-6)) for d in range(dim): grad_d = clip(grad_coeff * grad_dist_output[d]) current[d] += grad_d * alpha epoch_of_next_negative_sample[i] += (n_neg_samples * epochs_per_negative_sample[i])
def _optimize_layout_generic_single_epoch( epochs_per_sample, epoch_of_next_sample, head, tail, head_embedding, tail_embedding, output_metric, output_metric_kwds, dim, alpha, move_other, n, epoch_of_next_negative_sample, epochs_per_negative_sample, rng_state, n_vertices, a, b, gamma, ): for i in range(epochs_per_sample.shape[0]): if epoch_of_next_sample[i] <= n: j = head[i] k = tail[i] current = head_embedding[j] other = tail_embedding[k] dist_output, grad_dist_output = output_metric( current, other, *output_metric_kwds) _, rev_grad_dist_output = output_metric(other, current, *output_metric_kwds) if dist_output > 0.0: w_l = pow((1 + a * pow(dist_output, 2 * b)), -1) else: w_l = 1.0 grad_coeff = 2 * b * (w_l - 1) / (dist_output + 1e-6) for d in range(dim): grad_d = clip(grad_coeff * grad_dist_output[d]) current[d] += grad_d * alpha if move_other: grad_d = clip(grad_coeff * rev_grad_dist_output[d]) other[d] += grad_d * alpha epoch_of_next_sample[i] += epochs_per_sample[i] n_neg_samples = int((n - epoch_of_next_negative_sample[i]) / epochs_per_negative_sample[i]) for p in range(n_neg_samples): k = tau_rand_int(rng_state) % n_vertices other = tail_embedding[k] dist_output, grad_dist_output = output_metric( current, other, *output_metric_kwds) if dist_output > 0.0: w_l = pow((1 + a * pow(dist_output, 2 * b)), -1) elif j == k: continue else: w_l = 1.0 grad_coeff = gamma * 2 * b * w_l / (dist_output + 1e-6) for d in range(dim): grad_d = clip(grad_coeff * grad_dist_output[d]) current[d] += grad_d * alpha epoch_of_next_negative_sample[i] += (n_neg_samples * epochs_per_negative_sample[i]) return epoch_of_next_sample, epoch_of_next_negative_sample
def optimize_layout_generic(head_embedding, tail_embedding, head, tail, n_epochs, n_vertices, epochs_per_sample, a, b, rng_state, gamma=1.0, initial_alpha=1.0, negative_sample_rate=5.0, output_metric=dist.euclidean, output_metric_kwds=(), verbose=False, sleep_duration=None): """Improve an embedding using stochastic gradient descent to minimize the fuzzy set cross entropy between the 1-skeletons of the high dimensional and low dimensional fuzzy simplicial sets. In practice this is done by sampling edges based on their membership strength (with the (1-p) terms coming from negative sampling similar to word2vec). Parameters ---------- head_embedding: array of shape (n_samples, n_components) The initial embedding to be improved by SGD. tail_embedding: array of shape (source_samples, n_components) The reference embedding of embedded points. If not embedding new previously unseen points with respect to an existing embedding this is simply the head_embedding (again); otherwise it provides the existing embedding to embed with respect to. head: array of shape (n_1_simplices) The indices of the heads of 1-simplices with non-zero membership. tail: array of shape (n_1_simplices) The indices of the tails of 1-simplices with non-zero membership. weight: array of shape (n_1_simplices) The membership weights of the 1-simplices. n_epochs: int The number of training epochs to use in optimization. n_vertices: int The number of vertices (0-simplices) in the dataset. epochs_per_sample: array of shape (n_1_simplices) A float value of the number of epochs per 1-simplex. 1-simplices with weaker membership strength will have more epochs between being sampled. a: float Parameter of differentiable approximation of right adjoint functor b: float Parameter of differentiable approximation of right adjoint functor rng_state: array of int64, shape (3,) The internal state of the rng gamma: float (optional, default 1.0) Weight to apply to negative samples. initial_alpha: float (optional, default 1.0) Initial learning rate for the SGD. negative_sample_rate: int (optional, default 5) Number of negative samples to use per positive sample. verbose: bool (optional, default False) Whether to report information on the current progress of the algorithm. Returns ------- embedding: array of shape (n_samples, n_components) The optimized embedding. """ dim = head_embedding.shape[1] move_other = head_embedding.shape[0] == tail_embedding.shape[0] alpha = initial_alpha epochs_per_negative_sample = epochs_per_sample / negative_sample_rate epoch_of_next_negative_sample = epochs_per_negative_sample.copy() epoch_of_next_sample = epochs_per_sample.copy() for n in range(n_epochs): for i in range(epochs_per_sample.shape[0]): if epoch_of_next_sample[i] <= n: j = head[i] k = tail[i] current = head_embedding[j] other = tail_embedding[k] dist_output, grad_dist_output = output_metric( current, other, *output_metric_kwds) _, rev_grad_dist_output = output_metric( other, current, *output_metric_kwds) if dist_output > 0.0: w_l = pow((1 + a * pow(dist_output, 2 * b)), -1) else: w_l = 1.0 grad_coeff = 2 * b * (w_l - 1) / (dist_output + 1e-6) for d in range(dim): grad_d = clip(grad_coeff * grad_dist_output[d]) current[d] += grad_d * alpha if move_other: grad_d = clip(grad_coeff * rev_grad_dist_output[d]) other[d] += grad_d * alpha epoch_of_next_sample[i] += epochs_per_sample[i] n_neg_samples = int((n - epoch_of_next_negative_sample[i]) / epochs_per_negative_sample[i]) for p in range(n_neg_samples): k = tau_rand_int(rng_state) % n_vertices other = tail_embedding[k] dist_output, grad_dist_output = output_metric( current, other, *output_metric_kwds) if dist_output > 0.0: w_l = pow((1 + a * pow(dist_output, 2 * b)), -1) elif j == k: continue else: w_l = 1.0 grad_coeff = gamma * 2 * b * w_l / (dist_output + 1e-6) for d in range(dim): grad_d = clip(grad_coeff * grad_dist_output[d]) current[d] += grad_d * alpha epoch_of_next_negative_sample[i] += ( n_neg_samples * epochs_per_negative_sample[i]) alpha = initial_alpha * (1.0 - (float(n) / float(n_epochs))) if sleep_duration is not None: with numba.objmode(): # Call into object mode to temporarily sleep (and thus release GIL) logging.info("(obj mode) Fit epoch iteration.") time.sleep(sleep_duration) if verbose and n % int(n_epochs / 10) == 0: print("\tcompleted ", n, " / ", n_epochs, "epochs") return head_embedding