def run_one_exp(n, k, dim, ampl, type_matrix, scaled, n_avrg, type_lap=None, type_noise='gaussian'): """ Run n_avrg experiments for a given set of parameters, and return the mean kendall-tau score and the associated standard deviation (among the n_avrg instances). """ # Pre-defined settings for Laplacian_Fiedler : if circular, random_walk, # if linear, unnormalized; Laplacian_init : random_walk if not type_lap: type_lap = 'random_walk' if type_matrix[0] == 'L': circular = False elif type_matrix[0] == 'C': circular = True else: raise ValueError("type matrix must be in ['LinearBanded'," "'CircularBanded', 'LinearStrongDecrease'," "'CircularStrongDecrease']") # Create matrix generator data_gen = MatrixGenerator() # Create spectral solver reord_method = SpectralOrdering(dim=dim, k_nbrs=k, circular=circular, scaled=scaled, type_laplacian=type_lap, verb=1) # Initialize array of results scores = np.zeros(n_avrg) for i_exp in range(n_avrg): np.random.seed(i_exp) data_gen.gen_matrix(n, type_matrix=type_matrix, apply_perm=True, noise_ampl=ampl, law=type_noise) this_perm = reord_method.fit_transform(data_gen.sim_matrix) scores[i_exp] = evaluate_ordering(this_perm, data_gen.true_perm, circular=circular) print('.', end='') print('') return (scores.mean(), scores.std(), scores)
def clusterize_mat(X, n_clusters, reord_mat=False, reord_method='eta-trick'): # X2 = X.copy() # minX = X2.min() # X2 -= minX if reord_mat: if reord_method == 'eta-trick': my_method = SpectralEtaTrick(n_iter=10) elif reord_method == 'mdso': my_method = SpectralOrdering() else: my_method = SpectralBaseline() ebd = spectral_embedding(X - X.min(), norm_laplacian='random_walk', norm_adjacency=False) N = X.shape[0] if n_clusters == 1: if reord_mat: return (X, np.arange(N)) else: return (X) else: fied_vec = ebd[:, 0] fied_diff = abs(fied_vec[1:] - fied_vec[:-1]) bps = np.append(0, np.argsort(-fied_diff)[:n_clusters - 1]) bps = np.append(bps, N) bps = np.sort(bps) x_flat = X.flatten() s_clus = np.zeros(N**2) if reord_mat: permu = np.zeros(0, dtype='int32') for k_ in range(n_clusters): in_clst = np.arange(bps[k_], bps[k_ + 1]) if not in_clst.size: print("empty cluster!") continue iis = np.repeat(in_clst, len(in_clst)) jjs = np.tile(in_clst, len(in_clst)) sub_idx = np.ravel_multi_index((iis, jjs), (N, N)) s_clus[sub_idx] = x_flat[sub_idx] # Projection on block matrices if reord_mat: sub_mat = X.copy()[in_clst, :] sub_mat = sub_mat.T[in_clst, :].T sub_perm = my_method.fit_transform(sub_mat - sub_mat.min()) sub_cc = in_clst[sub_perm] permu = np.append(permu, sub_cc) S_clus = np.reshape(s_clus, (N, N)) if reord_mat: return (S_clus, permu) else: return (S_clus)
def clusterize_from_bps(X, bps, reord_clusters=True, reord_method=None): (N, N2) = X.shape assert (N == N2) n_clusters = len(bps) - 1 if reord_clusters: permu = np.zeros(0, dtype='int32') if reord_method == 'eta-trick': my_method = SpectralEtaTrick(n_iter=10) elif reord_method == 'mdso': my_method = SpectralOrdering() else: my_method = SpectralBaseline() x_flat = X.flatten() s_clus = np.zeros(N**2) for k_ in range(n_clusters): in_clst = np.arange(bps[k_], bps[k_ + 1]) if not in_clst.size: print("empty cluster!") continue iis = np.repeat(in_clst, len(in_clst)) jjs = np.tile(in_clst, len(in_clst)) sub_idx = np.ravel_multi_index((iis, jjs), (N, N)) s_clus[sub_idx] = x_flat[sub_idx] # Projection on block matrices if reord_clusters: if len(in_clst) < 3: sub_perm = np.arange(len(in_clst)) else: sub_mat = X.copy()[in_clst, :] sub_mat = sub_mat.T[in_clst, :].T min_sub = sub_mat.min() if min_sub < 0: sub_perm = my_method.fit_transform(sub_mat - sub_mat.min()) else: sub_perm = my_method.fit_transform(sub_mat) sub_cc = in_clst[sub_perm] permu = np.append(permu, sub_cc) S_clus = np.reshape(s_clus, (N, N)) if reord_clusters: return (S_clus, permu) else: return (S_clus)
def ser_dupli_alt_clust2(A, C, seriation_solver='eta-trick', n_iter=100, n_clusters=8, do_strong=False, include_main_diag=True, do_show=True, Z_true=None): (n_, n1) = A.shape n2 = len(C) N = int(np.sum(C)) assert (n_ == n1 and n_ == n2) if seriation_solver == 'mdso': my_solver = SpectralOrdering(norm_laplacian='random_walk') elif seriation_solver == 'eta-trick': my_solver = SpectralEtaTrick(n_iter=10) else: # use basic spectral Algorithm from Atkins et. al. my_solver = SpectralBaseline() cluster_solver = SpectralClustering(n_clusters=n_clusters, affinity='precomputed') # Initialization Z = np.zeros((n_, N)) jj = 0 for ii in range(n_): # TODO : make this faster ? Z[ii, jj:jj + C[ii]] = 1 jj += C[ii] dc = np.diag(1. / C) S_t = Z.T @ dc @ A @ dc @ Z max_val = A.max() # max_val = S_t.max() perm_tot = np.arange(N) # Iterate for it in range(n_iter): # S_old # S_t -= S_t.min() # to make sure it is non-negative after linprog # Reorder the matrix permu = my_solver.fit_transform(S_t) # S_tp = S_t[permu, :][:, permu] S_tp = S_t.copy()[permu, :] S_tp = S_tp.T[permu, :].T R_t = proj2Rmat(S_tp, do_strong=do_strong, include_main_diag=include_main_diag, verbose=0, u_b=max_val) print(R_t.min()) R_t -= R_t.min() # (iis, jjs, vvs) = find(R_t) # qv = np.percentile(vvs, 50) # iis = iis[vvs>qv] # jjs = jjs[vvs>qv] # vvs = vvs[vvs>qv] # R_t = coo_matrix((vvs, (iis, jjs)), shape=R_t.shape) # R_t = R_t.toarray() ebd = spectral_embedding(R_t, norm_laplacian=False) if n_clusters > 1: # fied_vec = ebd[:, 0] # fied_diff = abs(fied_vec[1:] - fied_vec[:-1]) # bps = np.append(0, np.argsort(-fied_diff)[:n_clusters-1]) # bps = np.append(bps, N) # bps = np.sort(bps) # bps = get_k_necks(R_t, n_clusters-1) # bps = np.append(0, bps) # bps = np.append(bps, N) # bps = np.sort(bps) bps = np.array([0, N]) else: bps = np.array([0, N]) print(bps) labels_ = np.zeros(N) # for labels_[bps[]] Z = Z[:, permu] # perm_tot = perm_tot[permu] # Cluster the similarity matrix # labels_ = cluster_solver.fit_predict(R_t.max() - R_t) # print(sum(labels_)) # Reorder each cluster s_clus = np.zeros(N**2) # TODO: adapt to the sparse case s_flat = R_t.flatten() permu2 = np.zeros(0, dtype='int32') # permu = np.arange(N) for k_ in range(n_clusters): # in_clst = np.where(labels_ == k_)[0] in_clst = np.arange(bps[k_], bps[k_ + 1]) # sub_mat = R_t[in_clst, :] # sub_mat = sub_mat.T[in_clst, :].T # sub_perm = my_solver.fit_transform(sub_mat) # sub_cc = in_clst[sub_perm] sub_cc = in_clst # inv_sub_perm = np.argsort(sub_perm) # permu[in_clst] = sub_cc # in_clst[inv_sub_perm] # permu[in_clst] = in_clst[inv_sub_perm] permu2 = np.append(permu2, sub_cc) # (iis, jjs) = np.meshgrid(in_clst, in_clst) # iis = iis.flatten() # jjs = jjs.flatten() iis = np.repeat(in_clst, len(in_clst)) jjs = np.tile(in_clst, len(in_clst)) sub_idx = np.ravel_multi_index((iis, jjs), (N, N)) # # (iord, jord) = np.meshgrid(sub_cc, sub_cc) # iord = iord.flatten() # jord = jord.flatten() # sub_ord = np.ravel_multi_index((iord, jord), (N, N)) # s_clus[sub_idx] = s_flat[sub_idx] # Projection on block matrices # S_clus[in_clst, :][:, in_clst] += sub_mat # is_identity = (np.all(permu == np.arange(N)) or # np.all(permu == np.arange(N)[::-1])) # if is_identity: # break alpha_ = 0. S_clus = (1 - alpha_) * np.reshape(s_clus, (N, N)) + alpha_ * S_t # S_clus = np.reshape(s_clus, (N, N)) S_tp = S_clus.copy()[permu2, :] # S_tp = S_t.copy()[permu, :] S_tp = S_tp.T[permu2, :].T # S_tp = S_tp.T[permu, :].T # R_t = proj2Rmat(S_tp, do_strong=do_strong, # include_main_diag=include_main_diag, verbose=0, # u_b=max_val) # R_t = S_tp double_perm = permu[permu2] Z = Z[:, permu2] perm_tot = perm_tot[double_perm] if do_show: title = "iter {}".format(int(it)) if Z_true is not None: mean_dist, _, is_inv = eval_assignments(Z, Z_true) title += " mean dist {}".format(mean_dist) # if is_inv: # Z = Z[:, ::-1] visualize_mat(S_t, S_tp, R_t, Z, ebd, title, Z_true=Z_true) S_t = proj2dupli(S_tp, Z, A, u_b=max_val, k_sparse=None, include_main_diag=include_main_diag) return (S_t, Z, R_t)
def ser_dupli_alt_clust(A, C, seriation_solver='eta-trick', n_iter=100, n_clusters=8, do_strong=False, include_main_diag=True, do_show=True, Z_true=None): (n_, n1) = A.shape n2 = len(C) N = int(np.sum(C)) assert (n_ == n1 and n_ == n2) if seriation_solver == 'mdso': my_solver = SpectralOrdering(norm_laplacian='random_walk') elif seriation_solver == 'eta-trick': my_solver = SpectralEtaTrick(n_iter=10) else: # use basic spectral Algorithm from Atkins et. al. my_solver = SpectralBaseline() cluster_solver = SpectralClustering(n_clusters=n_clusters, affinity='precomputed') # Initialization Z = np.zeros((n_, N)) jj = 0 for ii in range(n_): # TODO : make this faster ? Z[ii, jj:jj + C[ii]] = 1 jj += C[ii] dc = np.diag(1. / C) S_t = Z.T @ dc @ A @ dc @ Z max_val = A.max() # max_val = S_t.max() perm_tot = np.arange(N) # Iterate for it in range(n_iter): # S_old # S_t -= S_t.min() # to make sure it is non-negative after linprog permu1 = my_solver.fit_transform(S_t) S_t = S_t[permu1, :] S_t = S_t.T[permu1, :].T # Cluster the similarity matrix if (it % 10 == 0) and (it > 9): labels_ = cluster_solver.fit_predict(R_t.max() - R_t) # Reorder each cluster s_clus = np.zeros(N**2) # TODO: adapt to the sparse case s_flat = S_t.flatten() permu = np.zeros(0, dtype='int32') # permu = np.arange(N) for k_ in range(n_clusters): in_clst = np.where(labels_ == k_)[0] sub_mat = S_t[in_clst, :] sub_mat = sub_mat.T[in_clst, :].T sub_perm = my_solver.fit_transform(sub_mat) sub_cc = in_clst[sub_perm] # inv_sub_perm = np.argsort(sub_perm) # permu[in_clst] = sub_cc # in_clst[inv_sub_perm] # permu[in_clst] = in_clst[inv_sub_perm] permu = np.append(permu, sub_cc) # (iis, jjs) = np.meshgrid(in_clst, in_clst) # iis = iis.flatten() # jjs = jjs.flatten() iis = np.repeat(in_clst, len(in_clst)) jjs = np.tile(in_clst, len(in_clst)) sub_idx = np.ravel_multi_index((iis, jjs), (N, N)) # # (iord, jord) = np.meshgrid(sub_cc, sub_cc) # iord = iord.flatten() # jord = jord.flatten() # sub_ord = np.ravel_multi_index((iord, jord), (N, N)) # s_clus[sub_idx] = s_flat[ sub_idx] # Projection on block matrices # S_clus[in_clst, :][:, in_clst] += sub_mat is_identity = (np.all(permu == np.arange(N)) or np.all(permu == np.arange(N)[::-1])) # if is_identity: # break alpha_ = 0. S_clus = (1 - alpha_) * np.reshape(s_clus, (N, N)) + alpha_ * S_t # S_clus = np.reshape(s_clus, (N, N)) S_tp = S_clus.copy()[permu, :] # S_tp = S_t.copy()[permu, :] S_tp = S_tp.T[permu, :].T # S_tp = S_tp.T[permu, :].T else: permu = np.arange(N) S_tp = S_t permu = permu1[permu] R_t = proj2Rmat(S_tp, do_strong=do_strong, include_main_diag=include_main_diag, verbose=0, u_b=max_val) # R_t = S_tp Z = Z[:, permu] perm_tot = perm_tot[permu] if do_show: title = "iter {}".format(int(it)) if Z_true is not None: mean_dist, _, is_inv = eval_assignments(Z, Z_true) title += " mean dist {}".format(mean_dist) # if is_inv: # Z = Z[:, ::-1] visualize_mat(S_t, S_tp, R_t, Z, permu, title, Z_true=Z_true) S_t = proj2dupli(R_t, Z, A, u_b=max_val, k_sparse=None, include_main_diag=include_main_diag) return (S_t, Z)
def ser_dupli_alt_clust3(A, C, seriation_solver='eta-trick', n_iter=100, n_clusters=1, do_strong=False, include_main_diag=True, do_show=True, Z_true=None, cluster_interval=10, enforce_sparsity=False): (n_, n1) = A.shape n2 = len(C) N = int(np.sum(C)) assert (n_ == n1 and n_ == n2) if seriation_solver == 'mdso': my_solver = SpectralOrdering(norm_laplacian='random_walk') elif seriation_solver == 'eta-trick': my_solver = SpectralEtaTrick(n_iter=20) else: # use basic spectral Algorithm from Atkins et. al. my_solver = SpectralBaseline() # Initialization Z = np.zeros((n_, N)) jj = 0 for ii in range(n_): # TODO : make this faster ? Z[ii, jj:jj + C[ii]] = 1 jj += C[ii] dc = np.diag(1. / C) S_t = Z.T @ dc @ A @ dc @ Z max_val = A.max() perm_tot = np.arange(N) bps_exists = False # Iterate for it in range(n_iter): # S_old # S_t -= S_t.min() # to make sure it is non-negative after linprog # print(S_t.min()) permu = my_solver.fit_transform(S_t - S_t.min()) is_identity = (np.all(permu == np.arange(N)) or np.all(permu == np.arange(N)[::-1])) # if is_identity: # break # S_tp = S_t[permu, :][:, permu] S_tp = S_t.copy()[permu, :] S_tp = S_tp.T[permu, :].T # if False: #(it % cluster_interval == 0) and (it > 0): # R_clus, p2 = clusterize_mat(S_tp, n_clusters, reord_mat=False) # else: # R_clus = S_tp # p2 = np.arange(N) # R_clus = R_clus[p2, :] # R_clus = R_clus.T[:, p2].T # permu = permu[p2] if (it % cluster_interval == 0) and (it > 10000): # R_clus, p2 = clusterize_mat(S_tp, n_clusters, reord_mat=True) R_clus, p2 = simple_clusters(S_tp, n_clusters, reord_clusters=True) R_clus = R_clus[p2, :] R_clus = R_clus.T[p2, :].T # R_clus = clusterize_mat(S_tp, n_clusters, reord_mat=False) # p2 = np.arange(N) else: R_clus = S_tp # R_clus = simple_clusters(S_tp, n_clusters) p2 = np.arange(N) permu = permu[p2] R_t = proj2Rmat(R_clus, do_strong=do_strong, include_main_diag=include_main_diag, verbose=0, u_b=max_val) print(R_t.min()) if (it % cluster_interval == 0) and (it > 0): # R_clus = clusterize_mat(R_t, n_clusters, reord_mat=False) reord_clusters = True if reord_clusters: (R_clus, p2, bps) = simple_clusters(R_t, n_clusters, reord_clusters=True, return_breakpoints=True) else: R_clus, bps = simple_clusters(R_t, n_clusters, reord_clusters=False, return_breakpoints=True) p2 = np.arange(N) permu = permu[p2] bps_exists = True else: R_clus = R_t # R_t -= R_t.min() Z = Z[:, permu] # Flip sub-orderings in clusters if Z_true provided mean_dist, _, is_inv = eval_assignments(Z, Z_true) print("before rearranging clusters, mean dist : %1.2f" % (mean_dist)) if (Z_true is not None) and (bps_exists): n_clusters = len(bps) - 1 for k_ in range(n_clusters): Zbis = Z.copy() in_clst = np.arange(bps[k_], bps[k_ + 1]) if not in_clst.size: print("empty cluster!") continue mean_dist1, _, _ = eval_assignments(Zbis, Z_true) Zbis[:, in_clst] = Zbis[:, in_clst[::-1]] mean_dist2, _, _ = eval_assignments(Zbis, Z_true) if mean_dist2 < mean_dist1: Z[:, in_clst] = Z[:, in_clst[::-1]] permu[in_clst] = permu[in_clst[::-1]] # if is_inv: # Z[:, in_clst] = Z[:, in_clst][:, ::-1] # permu[in_clst] = permu[in_clst][::-1] mean_dist, _, is_inv = eval_assignments(Z, Z_true) print("after rearranging clusters, mean dist : %1.2f" % (mean_dist)) perm_tot = perm_tot[permu] # r_clus_sym = is_symmetric(R_clus) # r_sym = is_symmetric(R_t) # s_sym = is_symmetric(S_tp) # print(r_clus_sym, r_sym, s_sym) if do_show: title = "iter {}".format(int(it)) if Z_true is not None: mean_dist, _, is_inv = eval_assignments(Z, Z_true) title += " mean dist {}".format(mean_dist) if is_inv: Z = Z[:, ::-1] visualize_mat(R_clus, S_tp, R_t, Z, permu, title, Z_true=Z_true) S_t = proj2dupli(R_clus, Z, A, u_b=max_val, k_sparse=enforce_sparsity, include_main_diag=include_main_diag) return (S_t, Z, R_clus, S_tp)
def ser_dupli_alt(A, C, seriation_solver='eta-trick', n_iter=100, do_strong=False, include_main_diag=True, do_show=True, Z_true=None): (n_, n1) = A.shape n2 = len(C) N = int(np.sum(C)) assert (n_ == n1 and n_ == n2) if seriation_solver == 'mdso': my_solver = SpectralOrdering(norm_laplacian='random_walk') elif seriation_solver == 'eta-trick': my_solver = SpectralEtaTrick(n_iter=10) else: # use basic spectral Algorithm from Atkins et. al. my_solver = SpectralBaseline() # Initialization Z = np.zeros((n_, N)) jj = 0 for ii in range(n_): # TODO : make this faster ? Z[ii, jj:jj + C[ii]] = 1 jj += C[ii] dc = np.diag(1. / C) S_t = Z.T @ dc @ A @ dc @ Z max_val = A.max() perm_tot = np.arange(N) # Iterate for it in range(n_iter): # S_old # S_t -= S_t.min() # to make sure it is non-negative after linprog # print(S_t.min()) permu = my_solver.fit_transform(S_t) is_identity = (np.all(permu == np.arange(N)) or np.all(permu == np.arange(N)[::-1])) # if is_identity: # break # S_tp = S_t[permu, :][:, permu] S_tp = S_t.copy()[permu, :] S_tp = S_tp.T[permu, :].T R_t = proj2Rmat(S_tp, do_strong=do_strong, include_main_diag=include_main_diag, verbose=0, u_b=max_val) print(R_t.min()) # R_t -= R_t.min() Z = Z[:, permu] perm_tot = perm_tot[permu] if do_show: title = "iter {}".format(int(it)) if Z_true is not None: mean_dist, _, is_inv = eval_assignments(Z, Z_true) title += " mean dist {}".format(mean_dist) if is_inv: Z = Z[:, ::-1] visualize_mat(S_t, S_tp, R_t, Z, permu, title, Z_true=Z_true) S_t = proj2dupli(R_t, Z, A, u_b=max_val, k_sparse=None, include_main_diag=include_main_diag) return (S_t, Z)
ax = fig.add_subplot(111) plt.scatter(true_inv_perm[iis], true_inv_perm[jjs]) # Parameters for Spectral Ordering apply_perm = False # whether to randomly permute the matrix, so that the # ground truth is not the trivial permutation (1, ..., n). circular = True # Call Spectral Ordering method reord_method = SpectralOrdering(dim=dim, k_nbrs=k_nbrs, circular=circular, scaled=scaled, type_laplacian=type_lap, verb=1, type_new_sim='exp', norm_local_diss=False, norm_sim=False, merge_if_ccs=False, min_cc_len=min_cc_len, do_eps_graph=False, eps_val=95) # Run the spectral ordering method on the DNA reads similarity matrix global_perm = reord_method.fit_transform(new_mat) # sim_mat = new_mat if do_plots: ebd = reord_method.embedding fig = plt.figure() ax = Axes3D(fig) ax.scatter(ebd[:, 0], ebd[:, 1], ebd[:, 2])
circular = True if type_similarity[0] == 'C' else False # circular or linear scaled = 'heuristic' # whether or not to scale the coordinates of the # embedding so that the larger dimensions have fewer importance # Build data matrix data_gen = SimilarityMatrix() data_gen.gen_matrix(n, type_matrix=type_similarity, apply_perm=apply_perm, noise_ampl=ampl_noise, law=type_noise) # Call Spectral Ordering method reord_method = SpectralOrdering(n_components=n_components, k_nbrs=k_nbrs, circular=circular, scale_embedding=scaled, norm_laplacian='random_walk') my_perm = reord_method.fit_transform(data_gen.sim_matrix) reord_method.new_sim = reord_method.new_sim.toarray() # reord_method.fit(data_gen.sim_matrix) score = evaluate_ordering(my_perm, data_gen.true_perm, circular=circular) print("Kendall-Tau score = {}".format(score)) inv_perm = np.argsort(data_gen.true_perm) # Display some results fig, axes = plt.subplots(2, 2) axes[0, 0].tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected
circular = False # whether we are running Circular or Linear Seriation scaled = True # whether or not to scale the coordinates of the embedding so # that the larger dimensions have fewer importance # Build data matrix data_gen = MatrixGenerator() data_gen.gen_matrix(n, type_matrix=type_similarity, apply_perm=apply_perm, noise_ampl=ampl_noise, law=type_noise) # Call Spectral Ordering method reord_method = SpectralOrdering(dim=dim, k_nbrs=k_nbrs, circular=circular, scaled=scaled, type_laplacian='random_walk') my_perm = reord_method.fit_transform(data_gen.sim_matrix) # reord_method.fit(data_gen.sim_matrix) score = evaluate_ordering(my_perm, data_gen.true_perm, circular=circular) print("Kendall-Tau score = {}".format(score)) inv_perm = inverse_perm(data_gen.true_perm) # Display some results fig, axes = plt.subplots(2, 2) axes[0, 0].tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom=False, # ticks along the bottom edge are off
def run_one_exp(n, k, dim, ampl, type_matrix, n_avrg, type_noise='gaussian', norm_laplacian='unnormalized', norm_adjacency=False, scale_embedding='heuristic', embedding_method='spectral'): """ Run n_avrg experiments for a given set of parameters, and return the mean kendall-tau score and the associated standard deviation (among the n_avrg instances). """ if type_matrix[0] == 'L': circular = False elif type_matrix[0] == 'C': circular = True else: raise ValueError("type matrix must be in ['LinearBanded'," "'CircularBanded', 'LinearStrongDecrease'," "'CircularStrongDecrease']") # Create matrix generator data_gen = SimilarityMatrix() # Create spectral solver if embedding_method == 'TSNE': reord_method = SpectralOrdering(n_components=2, k_nbrs=k, norm_adjacency=norm_adjacency, norm_laplacian=norm_laplacian, scale_embedding=scale_embedding, circular=circular, merge_if_ccs=True, embedding_method=embedding_method) else: reord_method = SpectralOrdering(n_components=dim, k_nbrs=k, norm_adjacency=norm_adjacency, norm_laplacian=norm_laplacian, scale_embedding=scale_embedding, circular=circular, merge_if_ccs=True, embedding_method=embedding_method) # Initialize array of results scores = np.zeros(n_avrg) for i_exp in range(n_avrg): np.random.seed(i_exp) data_gen.gen_matrix(n, type_matrix=type_matrix, apply_perm=True, noise_ampl=ampl, law=type_noise) this_perm = reord_method.fit_transform(data_gen.sim_matrix) scores[i_exp] = evaluate_ordering(this_perm, data_gen.true_perm, circular=circular) return (scores.mean(), scores.std(), scores)
dim = 5 # number of dimensions of the embedding circular = True # whether we are running Circular or Linear Seriation scaled = 'CTD' # whether or not to scale the coordinates of the embedding so # that the larger dimensions have fewer importance type_lap = 'unnormalized' # Remark : we have observed stranged (and poor) # results with the normalized Laplacians min_cc_len = 10 # Drop the tiny connected components # Call Spectral Ordering method reord_method = SpectralOrdering(dim=dim, k_nbrs=k_nbrs, circular=circular, scaled=scaled, type_laplacian=type_lap, verb=1, type_new_sim='exp', norm_local_diss=False, norm_sim=False, merge_if_ccs=True, min_cc_len=min_cc_len, do_eps_graph=True, preprocess_only=True) # Run the spectral ordering method on the DNA reads similarity matrix t0 = time() reord_method.fit(new_mat) my_ebd = reord_method.embedding tme = time() - t0 print("my embedding in {}s".format(tme)) skl_method = SpectralEmbedding(n_components=dim, affinity='precomputed')
true_inv_perm = np.argsort(true_perm) # Set parameters for Spectral Ordering method scale_embedding = False k_nbrs = 20 circular = True eigen_solver = 'amg' # faster than arpack on large sparse matrices. # requires pyamg package (conda install pyamg or pip install pyamg) norm_adjacency = 'coifman' # yields better results in practice norm_laplacian = False # normalization of the laplacian seems to mess things # up for large sparse matrices merge_if_ccs = True # the new similarity matrix may be disconnected reord_method = SpectralOrdering(scale_embedding=scale_embedding, k_nbrs=k_nbrs, circular=circular, eigen_solver=eigen_solver, norm_adjacency=norm_adjacency, norm_laplacian=norm_laplacian, merge_if_ccs=merge_if_ccs, n_components=8) # Run the method reord_method.fit(new_mat) # Plot the laplacian embedding embedding = reord_method.embedding fig = plt.figure() ax = Axes3D(fig) ax.scatter(embedding[:, 0], embedding[:, 1], embedding[:, 2], c=true_inv_perm) # plt.title("3d embedding of DNA overlap based similarity matrix") ax.set_xlabel(r'$f_1$', fontsize=18) ax.set_ylabel(r'$f_2$', fontsize=18) ax.set_zlabel(r'$f_3$', fontsize=18)