def test(G, verbose=False, compare_parafac=False): print "rank of G:", G.rank print "G:" print G.get_full_tensor() print "" c = Compressor(accuracy=0.0001, n_iter_max=1000, min_error_dec=1e-2, display_progress=verbose) F, info = c.compress(G) print "rank of F:", F.rank print "number of iter:", info.n_iter print "where the rank is increased:", info.iter_with_rank_inc print "F:" print F.get_full_tensor() print "" if verbose: print "lambdas in F:" print F.lambdas print "factors in F:" for f in F.factors: print f print "" if compare_parafac: G_tl = tl.tensor(G.get_full_tensor()) factors_tl = parafac(G_tl, rank=2) for f in factors_tl: print f print "" print tl.kruskal_to_tensor(factors_tl) print ""
def CPDMWUTime(X, F, sketching_rates, lamb, eps, nu, Hinit, max_time, sample_interval=0.5): weights = np.array([1] * len(sketching_rates)) / (len(sketching_rates)) dim_1, dim_2, dim_3 = X.shape A, B, C = Hinit[0], Hinit[1], Hinit[2] X_unfold = [tl.unfold(X, m) for m in range(3)] norm_x = norm(X) I = np.eye(F) PP = tl.kruskal_to_tensor((np.ones(F), [A, B, C])) error = np.linalg.norm(X - PP)**2 / norm_x NRE_A = {0: error} start = time.time() sketching_rates_selected = {} now = time.time() itr = 1 with tqdm(position=0) as pbar: while now - start < max_time: s = sketching_weight(sketching_rates, weights) # Solve Ridge Regression for A,B,C A, B, C = update_factors(A, B, C, X_unfold, I, lamb, s, F) # Update weights p = np.random.binomial(n=1, p=eps) if p == 1 and len(sketching_rates) > 1: update_weights(A, B, C, X_unfold, I, norm_x, lamb, weights, sketching_rates, F, nu, eps) now = time.time() PP = tl.kruskal_to_tensor((np.ones(F), [A, B, C])) error = np.linalg.norm(X - PP)**2 / norm_x elapsed = now - start NRE_A[elapsed] = error sketching_rates_selected[elapsed] = s pbar.set_description( "iteration: {} t: {:.5f} s: {} error: {:.5f} rates: {}". format(itr, elapsed, s, error, sketching_rates)) itr += 1 pbar.update(1) return A, B, C, NRE_A, sketching_rates_selected
def _calculate_elbo(self, X, M): Lx = tl.kruskal_to_tensor(self.L) krL = khatri_rao(self.L) temp = (krL * np.log(krL)).sum(axis=-1).reshape(X.shape) bound = - np.sum(M * tl.kruskal_to_tensor(self.E)) \ - np.sum(M * sp.gammaln(X + 1, dtype='float')) \ - np.sum(M * X * ((temp / Lx) - np.log(Lx))) for n in range(self.n_modes): bound += - np.sum((self.A[n] / self.B[n]) * self.E[n]) \ - np.sum(sp.gammaln(self.A[n], dtype='float')) \ - np.sum(self.A[n] * np.log(self.B[n] / self.A[n])) \ + np.sum(self.C[n] * (np.log(self.D[n]) + 1) + sp.gammaln(self.C[n], dtype='float')) return bound
def recompose_conv2d_rcp(dense_factors, conv_factor, params): input_shape, output_shape, rank = params["input_shape"], params[ "output_shape"], params["rank"] order = len(input_shape) for l in range(order): dense_factors[l] = np.transpose( np.reshape(dense_factors[l], [rank, input_shape[l] * output_shape[l]])) conv_factor = np.transpose( np.reshape(conv_factor, [rank, conv_factor.shape[0] * conv_factor.shape[1]])) factors = [conv_factor] + dense_factors tensor = tl.kruskal_to_tensor(factors) newshape = [tensor.shape[0]] + input_shape + output_shape tensor = np.reshape(tensor, newshape) axes = [0] + [ val for pair in zip(range(1, 1 + order), range(1 + order, 1 + 2 * order)) for val in pair ] tensor = np.transpose(tensor, axes=axes) newshape = [ int(np.sqrt(tensor.shape[0])), int(np.sqrt(tensor.shape[0])), np.prod(input_shape), np.prod(output_shape) ] tensor = np.reshape(tensor, newshape) return tensor
def __init__(self, num_filters, filter_h, filter_w, image_channels, rank): self.image_channels = image_channels self.num_filters = num_filters self.filter_h = filter_h self.filter_w = filter_w self.rank = rank self.filters = np.random.randn(num_filters, filter_h, filter_w, image_channels) / (filter_h * filter_w) tensor = tl.tensor(self.filters) ##initialize the cp-decomposed convolutional factors self.factors = parafac(tensor, rank) self.filters_recon = tl.kruskal_to_tensor((self.factors)) ##initialize moments and parameters for adam self.v0 = np.zeros(self.factors[0].shape) self.v1 = np.zeros(self.factors[1].shape) self.v2 = np.zeros(self.factors[2].shape) self.v3 = np.zeros(self.factors[3].shape) self.v = [self.v0, self.v1, self.v2, self.v3] self.s0 = np.zeros(self.factors[0].shape) self.s1 = np.zeros(self.factors[1].shape) self.s2 = np.zeros(self.factors[2].shape) self.s3 = np.zeros(self.factors[3].shape) self.s = [self.s0, self.s1, self.s2, self.s3] self.beta1 = 0.99 self.beta2 = 0.999
def get_factorization_error(ndarray, args): factorizer = nn_parafac if args.nonnegative else parafac (weights, factors), errors = \ factorizer(ndarray, rank=args.dimensions, return_errors=True) rec = kruskal_to_tensor((weights, factors)) max_err = np.max(np.absolute(rec - ndarray)) mean_abs_err = np.mean(np.absolute(rec - ndarray)) mean_sq_err = np.mean(np.multiply(rec - ndarray, rec - ndarray)) return errors[-1], max_err, mean_abs_err, mean_sq_err
def rank_search_parafac(tensor, rank_range): AIC = [] for rank in range(1, rank_range + 1): factors = parafac(tensor, rank=rank) recon = tl.kruskal_to_tensor(factors) err = tensor - recon rank_AIC = 2 * tl.tenalg.inner(err, err) + 2 * rank AIC.append(rank_AIC) return AIC
def MVLVM(X1, X2, X3, k=2, sigma=0.2, n_iter=100, reg=0): rank = k Xa = np.vstack((X1, X2)) Xb = np.vstack((X2, X1)) K = gauss_kernal_mat(Xa, Xa, sigma) # [1st elmts of pair, 2nd elmts of pair] K = 0.5 * (K + K.T) + reg * np.eye(Xa.shape[0]) L = gauss_kernal_mat(Xb, Xb, sigma) # [2nd elmts of pair, 1st elmts of pair] L = 0.5 * (L + L.T) + reg * np.eye(Xa.shape[0]) n = K.shape[0] // 2 R = scipy.linalg.cholesky(K) ### todo: why is K not full rank?? R = R.T #account for their convention s, beta_tilde = scipy.sparse.linalg.eigs((1 / (4 * n**2)) * R @ L @ R.T, k=rank) s = np.real(s) beta_tilde = np.real(beta_tilde) S12 = np.diag(s**0.5) Sn12 = np.diag(s**-0.5) beta = pinv(R) @ beta_tilde #form the tensor z1 = (Sn12 @ beta.T) @ K[:, :n] z2 = (Sn12 @ beta.T) @ K[:, n:] z3 = (Sn12 @ beta.T) @ gauss_kernal_mat(Xa, X3, sigma) weights = np.ones((n, )) factors = [z3, z1, z2] #different order to make tensor power method T = tl.kruskal_to_tensor((weights, factors)) T = (1 / (3 * n)) * tl.to_numpy(T) #tensor power method M = np.zeros((rank, rank)) lams = np.zeros((rank, )) for j in range(rank): v = np.random.randn(rank, ) v = v / np.inner(v, v)**0.5 vold = v for i in range(n_iter): v = np.dot(T, v) @ v lam = np.inner(v, v)**0.5 v = v / lam # print(np.linalg.norm(v-vold)) vold = v M[:, j] = v lams[j] = lam ws = np.ones((1, )) facs = [v, v, v] V = np.einsum("i,j,k ->ijk", v, v, v) T = T - lam * V A = beta @ S12 @ M @ np.diag(lams) pi = lams**-2 return A, pi
def test_symmetric_parafac_power_iteration(): """Test for symmetric Parafac optimized with robust tensor power iterations""" rng = check_random_state(1234) tol_norm_2 = 10e-1 tol_max_abs = 10e-1 size = 5 rank = 4 true_factor = tl.tensor(rng.random_sample((size, rank))) true_weights = tl.ones(rank) tensor = tl.kruskal_to_tensor((true_weights, [true_factor]*3)) weights, factor = symmetric_parafac_power_iteration(tensor, rank=10, n_repeat=10, n_iteration=10) rec = tl.kruskal_to_tensor((weights, [factor]*3)) error = tl.norm(rec - tensor, 2) error /= tl.norm(tensor, 2) assert_(error < tol_norm_2, 'norm 2 of reconstruction higher than tol') # Test the max abs difference between the reconstruction and the tensor assert_(tl.max(tl.abs(rec - tensor)) < tol_max_abs, 'abs norm of reconstruction error higher than tol')
def reco(acti, factors, rank, nidx=None, kidx=None): """Show reconstruction of activity trace, true vs predicted Arguments: acti {array} -- 3-dimensional activity array factors {list} -- list of 3 arrays containing the TCA factors rank {scalar} -- component from which reconstructed trace is most ressembling Keyword Arguments: nidx {scalar} -- ROI whose trace will be reconstructed (default: {None}) if None, nidx chosen to maximize neuron factors of given rank kidx {scalar} -- Trial whose trace will be reconstructed (default: {None}) if None, kidx chosen to maximize trial factors of given rank """ N, T, K = acti.shape norm_acti = norm_tensor(acti) pred_tensor = tl.kruskal_to_tensor(factors) factors = ord_fact(factors, give_order(factors)) if nidx is None: _, nidx = max((factors[0][:, rank][i], i) for i in range(N)) if kidx is None: _, kidx = max((factors[2][:, rank][i], i) for i in range(K)) plt.plot(pred_tensor[nidx, :, kidx], color='orangered', linewidth=2, label='Model') plt.plot(norm_acti[nidx, :, kidx], color='blue', linewidth=1, label='True') plt.xlabel('Time (s)', {'fontsize': 'medium', 'fontweight': 'bold'}) plt.ylabel('Normalized df/f0', { 'fontsize': 'medium', 'fontweight': 'bold' }) plt.locator_params(nbins=T // 30, steps=[1, 3, 5, 10], min_n_ticks=T // 30) plt.fill_betweenx([0, 1], 105, 135, facecolor='red', alpha=0.3, label='Odor Pres.') time_index = list(np.arange(0, T // 15 + 1, 2)) plt.xticks([0, 30, 60, 90, 120, 150, 180, 210, 240, 270], time_index) plt.legend(loc=1) r2 = round(r2_score(pred_tensor[nidx, :, kidx], norm_acti[nidx, :, kidx]), 3) plt.text(x=0, y=0.9, s='R2 = {0}'.format(r2))
def test_kruskal_norm(): """Test for kruskal_norm """ shape = (8, 5, 6, 4) rank = 25 kruskal_tensor = random_kruskal(shape=shape, rank=rank, full=False, normalise_factors=True) tol = 10e-5 rec = tl.kruskal_to_tensor(kruskal_tensor) true_res = tl.norm(rec, 2) res = kruskal_norm(kruskal_tensor) assert_(tl.to_numpy(tl.abs(true_res - res)) <= tol)
def test_masked_parafac(linesearch): """Test for the masked CANDECOMP-PARAFAC decomposition. This checks that a mask of 1's is identical to the unmasked case. """ tensor = random_kruskal((4, 4, 4), rank=1, full=True) mask = np.ones((4, 4, 4)) mask[1, :, 3] = 0 mask[:, 2, 3] = 0 mask = tl.tensor(mask) tensor_mask = tensor * mask - 10000.0 * (1 - mask) fac = parafac(tensor_mask, svd_mask_repeats=0, mask=mask, n_iter_max=0, rank=1, init="svd") fac_resvd = parafac(tensor_mask, svd_mask_repeats=10, mask=mask, n_iter_max=0, rank=1, init="svd") err = tl.norm(tl.kruskal_to_tensor(fac) - tensor, 2) err_resvd = tl.norm(tl.kruskal_to_tensor(fac_resvd) - tensor, 2) assert_(err_resvd < err, 'restarting SVD did not help') # Check that we get roughly the same answer with the full tensor and masking mask_fact = parafac(tensor, rank=1, mask=mask, init='random', random_state=1234, linesearch=linesearch) fact = parafac(tensor, rank=1) diff = kruskal_to_tensor(mask_fact) - kruskal_to_tensor(fact) assert_(T.norm(diff) < 0.001, 'norm 2 of reconstruction higher than 0.001')
def recompose_conv2d_cp(factors, params): rank = params["rank"] in_channels = factors[0].shape[2] ksz = factors[1].shape[0] out_channels = factors[2].shape[3] H0 = factors[0].reshape((in_channels, rank)) H1 = factors[1].reshape((ksz * ksz, rank)) H2 = factors[2].reshape((rank, out_channels)).transpose([1, 0]) K = tl.kruskal_to_tensor([H0, H1, H2]) K = np.reshape(K, (in_channels, ksz, ksz, out_channels)) K = np.moveaxis(K, 0, 2) return K
def test_kruskal_mode_dot(): """Test for kruskal_mode_dot We will compare kruskal_mode_dot (which operates directly on decomposed tensors) with mode_dot (which operates on full tensors) and check that the results are the same. """ rng = check_random_state(12345) shape = (5, 4, 6) rank = 3 kruskal_ten = random_kruskal(shape, rank=rank, orthogonal=True, full=False) full_tensor = tl.kruskal_to_tensor(kruskal_ten) # matrix for mode 1 matrix = tl.tensor(rng.random_sample((7, shape[1]))) # vec for mode 2 vec = tl.tensor(rng.random_sample(shape[2])) # Test kruskal_mode_dot with matrix res = kruskal_mode_dot(kruskal_ten, matrix, mode=1, copy=True) # Note that if copy=True is not respected, factors will be changes # And the next test will fail res = tl.kruskal_to_tensor(res) true_res = mode_dot(full_tensor, matrix, mode=1) assert_array_almost_equal(true_res, res) # Check that the data was indeed copied rec = tl.kruskal_to_tensor(kruskal_ten) assert_array_almost_equal(full_tensor, rec) # Test kruskal_mode_dot with vec res = kruskal_mode_dot(kruskal_ten, vec, mode=2, copy=True) res = tl.kruskal_to_tensor(res) true_res = mode_dot(full_tensor, vec, mode=2) assert_equal(res.shape, true_res.shape) assert_array_almost_equal(true_res, res)
def _update_factors(self, X, M): # Update components for n in range(self.n_modes): self.C[n] = self.A[n] + self.L[n] * self._uttkrp( M * (X / tl.kruskal_to_tensor(self.L)), self.L, n) self.D[n] = 1 / (self.A[n] / self.B[n] + self._uttkrp(M, self.E, n)) self.E[n] = self.C[n] * self.D[n] self._check_component(n) # Calculate lower bound elbo = self._calculate_elbo(X, M) # Update exp(log(<Z>) values: for n in range(self.n_modes): self.L[n] = np.exp(sp.psi(self.C[n])) * self.D[n] return elbo
def CPD_MWU(X, F, sketching_rates, lamb, eps, nu, Hinit, mttkrps=30): weights = np.array([1] * len(sketching_rates)) / (len(sketching_rates)) dim_1, dim_2, dim_3 = X.shape A, B, C = Hinit[0], Hinit[1], Hinit[2] X_unfold = [tl.unfold(X, m) for m in range(3)] norm_x = norm(X) I = np.eye(F) errors = {} res_time = {} iter_mttkrp = 0 i = 0 mttkrp = [] last = -1 start = time.time() while iter_mttkrp < mttkrps: s = sketching_weight(sketching_rates, weights) iter_mttkrp += s mttkrp.append(iter_mttkrp) # Solve Ridge Regression for A,B,C A, B, C = update_factors(A, B, C, X_unfold, I, lamb, s, F) # Update weights if np.random.binomial(n=1, p=eps) == 1 and len(sketching_rates) > 1: update_weights(A, B, C, X_unfold, I, norm_x, lamb, weights, sketching_rates, F, nu, eps) if iter_mttkrp > last + 1: PP = tl.kruskal_to_tensor((np.ones(F), [A, B, C])) error = np.linalg.norm(X - PP)**2 print("error: {}, mttkrps: {}".format(error / norm(X), int(iter_mttkrp))) now = time.time() errors[iter_mttkrp] = error / norm(X) end = time.time() res_time[iter_mttkrp] = now - start last += 1 i += 1 return A, B, C, errors, res_time
def forward(ctx, imgs): # Tucker_reconstructions = np.zeros_like(imgs) Cp_reconstructions = torch.zeros_like(imgs).cpu() cp_rank = rank for j, img in enumerate(imgs): factors = tl.decomposition.parafac( img, rank=cp_rank, init='random', tol=1e-4, random_state=np.random.RandomState()) cp_reconstruction = tl.kruskal_to_tensor(factors) Cp_reconstructions[j] = cp_reconstruction # Tucker_reconstructions = torch.from_numpy(Tucker_reconstructions) return Cp_reconstructions
def prepare_ndarrays(ndarray, iterations, fixed, noise): tensors, base = [], None if noise.startswith('gen'): cuda = noise[3:6] == 'gpu' dim = int(noise[6:]) # Factorizing the tensor ndarray = tl.tensor(ndarray, device="cuda:0") if cuda else ndarray weights, factors = parafac(ndarray, rank=dim, init='random') base = tl.kruskal_to_tensor((weights, factors)) noise_tensor = (ndarray - base).flatten() print('Estimated noise mean: {}'.format(np.mean(noise_tensor))) print('Estimated noise std: {}'.format(np.std(noise_tensor))) curr_tensor = get_noisy_tensor(ndarray, noise, base) for _ in range(iterations): if not fixed: curr_tensor = get_noisy_tensor(ndarray, noise, base) tensors.append(curr_tensor) return tensors
def parafac(self, tensor, rank, n_iter_max=100, tol=1e-8): factors = initialize_factors(tensor, rank) rec_errors = [] norm_tensor = tl.norm(tensor, 2) for iteration in range(n_iter_max): for mode in range(tl.ndim(tensor)): # No reverse of factors, because tensorly's unfold works different # First frontal slice: # array([[ 1, 2, 3, 4], # [ 5, 6, 7, 8], # [ 9, 10, 11, 12]]) # # Second frontal slice: # array([[13, 14, 15, 16], # [17, 18, 19, 20], # [21, 22, 23, 24]]) # # 0th unfolding: # array([[ 1, 13, 2, 14, 3, 15, 4, 16], # [ 5, 17, 6, 18, 7, 19, 8, 20], # [ 9, 21, 10, 22, 11, 23, 12, 24]]) mode_factors = [f for i, f in enumerate(factors) if i != mode] mode_sq_factors = [f.T @ f for f in mode_factors] unfold = tl.unfold(tensor, mode) m1 = khatri_rao(mode_factors) # Fix for tensorly's singular trouble m2 = np.linalg.pinv(reduce(lambda x, y: x * y, mode_sq_factors)) factor = unfold @ m1 @ m2 factors[mode] = factor rec_error = tl.norm(tensor - tl.kruskal_to_tensor((None, factors)), order=2) rec_error = rec_error / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = abs(rec_errors[-2] - rec_errors[-1]) stop_flag = rec_error_decrease < tol if stop_flag: break return kruskal_normalise(KruskalTensor((None, factors)))
def recompose_dense_cp(factors, params): input_shape, output_shape, rank = params["input_shape"], params[ "output_shape"], params["rank"] order = len(input_shape) tensors = [] for f in factors: newshape = [rank, np.prod(f.shape[1:])] tensors.append(np.transpose(np.reshape(f, newshape))) tensor = tl.kruskal_to_tensor(tensors) tensor = np.reshape(tensor, input_shape + output_shape) axes = [ val for pair in zip(range(order), range(order, 2 * order)) for val in pair ] axes[1:-1] = axes[1:-1][::-1] tensor = np.transpose(tensor, axes=axes) tensor = np.reshape(tensor, (np.prod(input_shape), np.prod(output_shape))) return tensor
def _point_estimate(self): self.Z = self.E self.X = tl.kruskal_to_tensor(self.Z)
import numpy as np import tensorly as tl from tensorly.random import random_kruskal from tensorly.decomposition import parafac import matplotlib.pyplot as plt tol = np.logspace(-1, -9) err = np.empty_like(tol) err_ls = np.empty_like(tol) tt = np.empty_like(tol) tt_ls = np.empty_like(tol) tensor = random_kruskal((10, 10, 10), 3, random_state=1234, full=True) # Get a high-accuracy decomposition for comparison fac = parafac(tensor, rank=3, n_iter_max=2000000, tol=1.0e-15, linesearch=True) err_min = tl.norm(tl.kruskal_to_tensor(fac) - tensor) for ii, toll in enumerate(tol): # Run PARAFAC decomposition without line search and time start = time() fac = parafac(tensor, rank=3, n_iter_max=2000000, tol=toll) tt[ii] = time() - start # Run PARAFAC decomposition with line search and time start = time() fac_ls = parafac(tensor, rank=3, n_iter_max=2000000, tol=toll, linesearch=True) tt_ls[ii] = time() - start
im = tl.to_numpy(tensor) im -= im.min() im /= im.max() im *= 255 return im.astype(np.uint8) # Rank of the CP decomposition cp_rank = 25 # Rank of the Tucker decomposition tucker_rank = [100, 100, 2] # Perform the CP decomposition weights, factors = parafac(image, rank=cp_rank, init='random', tol=10e-6) # Reconstruct the image from the factors cp_reconstruction = tl.kruskal_to_tensor((weights, factors)) # Tucker decomposition core, tucker_factors = tucker(image, ranks=tucker_rank, init='random', tol=10e-5, random_state=random_state) tucker_reconstruction = tl.tucker_to_tensor((core, tucker_factors)) # Plotting the original and reconstruction from the decompositions fig = plt.figure() ax = fig.add_subplot(1, 3, 1) ax.set_axis_off() ax.imshow(to_image(image)) ax.set_title('original')
im = tl.to_numpy(tensor) im -= im.min() im /= im.max() im *= 255 return im.astype(np.uint8) # Rank of the CP decomposition cp_rank = 25 # Rank of the Tucker decomposition tucker_rank = [100, 100, 2] # Perform the CP decomposition factors = parafac(image, rank=cp_rank, init='random', tol=10e-6) # Reconstruct the image from the factors cp_reconstruction = tl.kruskal_to_tensor(factors) # Tucker decomposition core, tucker_factors = tucker(image, ranks=tucker_rank, init='random', tol=10e-5, random_state=random_state) tucker_reconstruction = tl.tucker_to_tensor(core, tucker_factors) # Plotting the original and reconstruction from the decompositions fig = plt.figure() ax = fig.add_subplot(1, 3, 1) ax.set_axis_off() ax.imshow(to_image(image)) ax.set_title('original')
X = X / 256 Hinit = [] for d in range(3): Hinit.append(np.random.random((X.shape[d], F))) iters = 30 sketching_rates = list(np.linspace(10**(-3), 10**(-1), 4)) + [1] lamb = 0.001 shape = (300, 300, 300) nu = 2 A, B, C, error, res_time = CPD_MWU(X, F, sketching_rates, lamb, 0.0001, nu, Hinit, mttkrps=3) Hinit = [A, B, C] time_A, NRE_A, MSE_A, A = AdaCPD(X, 1, 100, 10, Hinit, None) reconstructed = PP = tl.kruskal_to_tensor((np.ones(F), A)) tensorToVideo(reconstructed, "reconstruced")
frobeniusNormOfreconstructedPCA = norm(reconstructedPCA, ord='fro') L1NormOfreconstructedPCA = norm(reconstructedPCA, ord=1) L2NormOfreconstructedPCA = norm(reconstructedPCA, ord=2) # ------- X - W*H ------- numpyW = WSpectraMatrixDataFrame.to_numpy() numpyH = HSpectraMatrixDataFrame.to_numpy() numpySpectra = SpectraMatrix.to_numpy() reconstructedNMF = np.matmul(numpyW, numpyH) frobeniusNormOfreconstructedNMF = norm(reconstructedNMF, ord='fro') L1NormOfreconstructedNMF = norm(reconstructedNMF, ord=1) L2NormOfreconstructedNMF = norm(reconstructedNMF, ord=2) # ------- CPT reconstruction reconstructedCPT = tl.kruskal_to_tensor(theCPT) frobeniusNormOfreconstructedCPT = norm(reconstructedCPT, ord='fro', axis=(1, 2)) L1NormOfreconstructedCPT = norm(reconstructedCPT, ord=1, axis=(1, 2)) L2NormOfreconstructedCPT = norm(reconstructedCPT, ord=2, axis=(1, 2)) # ------- All the norms ------- print(f'----- Frobenius Norms -----') print( f'Before PCA (after scaling): {frobeniusNormAfterScaling}, After PCA: {frobeniusNormOfreconstructedPCA}\n' f'Before NMF (original data): {frobeniusNormOfOriginalData}, After NMF: {frobeniusNormOfreconstructedNMF}\n' f'Before CPT (as tensor): {frobeniusNormOftheTensor}, After CPT: {frobeniusNormOfreconstructedCPT}\n' f'Before CPT (from MATLAB): 1.5889e-05, After CPT (rank 5): 1.5695e-05, After CPT (rank 8): 1.5777e-05\n' ) print(f'----- L1 Norms -----')
def main(): w2v = gensim.models.Word2Vec.load( '../data/skip_w2v_model_stemmed') # pre-trained word embedding idf = pickle.load( open('../data/my_idf', 'rb')) # pre-trained idf value of all words in the w2v dictionary records = pickle.load(open("../data/records_final.pkl", 'rb')) print(len(records)) #获取需要推荐的问题 experiments = util.get_class_experiments() print(len(experiments)) csvfile_path = os.path.join(args.output_path, "topclass_expand11-10.csv") #输出结果 csvfile = open(csvfile_path, 'w', newline="") writer = csv.writer(csvfile) writer.writerow( ["question_title", "top5", "ground_truth_intersection", "true_apis"]) #所有问题的api的集合,看这个集合里面是否有答案存在 #统计能进行推荐的问题个数,推荐出来的问题的个数 recommend_num = 0 recommend_success_num = 0 processnum = 0 #统计指标 mrr = 0.0 map = 0.0 precision = 0 recall = 0 ndcg = 0.0 rec_num = args.rec_num start = time.clock() for experiment in experiments: experiment_method_annotation = experiment.method_annotation # print(experiment_method_annotation) experiment_now_method_flat = experiment.now_method_flat experiment_true_api = experiment.true_api experiment_now_api = experiment.now_api # 求差,取出交集 experiment_true_api = set(experiment_true_api) - set( experiment_now_api) query = experiment_method_annotation query_words = WordPunctTokenizer().tokenize(query.lower()) query_words = [ SnowballStemmer('english').stem(word) for word in query_words ] query_matrix = similarity.init_doc_matrix(query_words, w2v) query_idf_vector = similarity.init_doc_idf_vector(query_words, idf) #获取相似的TOP-N问题 top_questions = similarity.get_topk_questions(query_words, query_matrix, query_idf_vector, records, 11, 0.0) #获取得到问题的长度 # print(top_questions) similar_questions_length = len(top_questions) # print("similar_questions_length:",similar_questions_length) #查看现有问题是否在相似问题中,如果不在则加入,否则直接根据相似问题构建张量 flag = False similar_records_list = list(top_questions.keys()) for record in similar_records_list: if (record.title_words == query_words): flag = True processnum += 1 #现有问题在相似问题里面 record_method_annotation_words = list() record_method_flat = list() record_api = list() for record in similar_records_list: if record.title_words not in record_method_annotation_words: record_method_annotation_words.append(record.title_words) if record.method_block_flat not in record_method_flat: record_method_flat.append(record.method_block_flat) for api in record.method_api_sequence: if api not in record_api: record_api.append(api) #加入编程环境中出现的api for now_api in experiment_now_api: if now_api not in record_api: record_api.append(now_api) api_rec_all = [] if flag == True: recommend_num += 1 #构建张量 print(len(record_method_annotation_words), len(record_method_flat), len(record_api)) record_method_annotation_words_dict = dict( zip(range(len(record_method_annotation_words)), record_method_annotation_words)) record_method_flat_dict = dict( zip(range(len(record_method_flat)), record_method_flat)) record_api_dict = dict(zip(range(len(record_api)), record_api)) tensor = np.zeros((len(record_method_annotation_words), len(record_method_flat), len(record_api)), dtype=int) for record in similar_records_list: for concrete_api in record.method_api_sequence: tensor[list(record_method_annotation_words_dict.keys( ))[list(record_method_annotation_words_dict.values()). index(record.title_words)], list(record_method_flat_dict.keys() )[list(record_method_flat_dict.values()). index(record.method_block_flat)], list(record_api_dict.keys( ))[list(record_api_dict.values()).index(concrete_api )]] = 1 for api in experiment_now_api: if api in record_api_dict.values(): tensor[list(record_method_annotation_words_dict.keys( ))[list(record_method_annotation_words_dict.values()). index(query_words)], :, list(record_api_dict.keys( ))[list(record_api_dict.values()).index(api)]] = 1 #处理不是张量的情况 one = query_words if len(record_api) == 0: continue if (len(record_method_annotation_words) == 1 or len(record_method_flat) == 1 or len(record_api) == 1): if (len(record_method_annotation_words) == 1 and len(record_method_flat) == 1 or len(record_method_flat) == 1 and len(record_api) == 1 or len(record_api) == 1 and len(record_method_annotation_words) == 1): api_rec_all = record_api for m in set(experiment_now_api): if m in api_rec_all: api_rec_all.remove(m) elif (len(record_api) == 1): api_rec_all = record_api for m in set(experiment_now_api): if m in api_rec_all: api_rec_all.remove(m) else: if (len(record_method_annotation_words) == 1): matrix = tl.unfold(tensor, mode=1) nmf = nimfa.Nmf(matrix, max_iter=200, rank=round(min(matrix.shape) / 2), update='euclidean', objective='fro') nmf_fit = nmf() W = nmf_fit.basis() H = nmf_fit.coef() matrix = np.dot(W, H) two = list( similarity.get_topk_method_flat( experiment_now_method_flat, list(record_method_flat_dict.values()), 1, 1, -1, 1).values())[0] rec_combine_api_key = np.argsort( -matrix[list(record_method_flat_dict.keys() )[list(record_method_flat_dict.values( )).index(two)], :]).tolist()[0] api_rec_all = [ record_api_dict[i] for i in rec_combine_api_key ] for m in set(experiment_now_api): if m in api_rec_all: api_rec_all.remove(m) elif (len(record_method_flat) == 1): matrix = tl.unfold(tensor, mode=0) nmf = nimfa.Nmf(matrix, max_iter=200, rank=round(min(matrix.shape) / 2), update='euclidean', objective='fro') nmf_fit = nmf() W = nmf_fit.basis() H = nmf_fit.coef() matrix = np.dot(W, H) rec_combine_api_key = np.argsort(-matrix[ list(record_method_annotation_words_dict.keys( ))[list(record_method_annotation_words_dict.values( )).index(one)], :]).tolist()[0] api_rec_all = [ record_api_dict[i] for i in rec_combine_api_key ] for m in set(experiment_now_api): if m in api_rec_all: api_rec_all.remove(m) else: #张量分解 tf.reset_default_graph() tensor = tl.tensor(tensor).astype(np.float32) data_provider = Provider() data_provider.full_tensor = lambda: tensor env = Environment(data_provider, summary_path='/tensor/ncp_ml') ncp = NCP_BCU(env) arg = NCP_BCU.NCP_Args(rank=round( min(len(record_method_annotation_words), len(record_method_flat), len(record_api)) / 2), validation_internal=1) ncp.build_model(arg) loss_hist = ncp.train(100) factor_matrices = ncp.factors full_tensor = tl.kruskal_to_tensor(factor_matrices) two = list( similarity.get_topk_method_flat( experiment_now_method_flat, list(record_method_flat_dict.values()), 1, 1, -1, 1).values())[0] rec_combine_api_key = np.argsort( -full_tensor[list(record_method_annotation_words_dict.keys( ))[list(record_method_annotation_words_dict.values()). index(one)], list(record_method_flat_dict.keys() )[list(record_method_flat_dict.values()). index(two)], :]).tolist() # 推荐的API列表,去除情境中已经含有的api api_rec_all = [record_api_dict[i] for i in rec_combine_api_key] for m in set(experiment_now_api): if m in api_rec_all: api_rec_all.remove(m) #现有问题不在相似问题里面 else: similar_questions_length += 1 #去除找不到相似问题的问题 if similar_questions_length == 1: continue recommend_num += 1 #添加新来的query record_method_annotation_words.append(query_words) print(len(record_method_annotation_words), len(record_method_flat), len(record_api)) #构建张量 record_method_annotation_words_dict = dict( zip(range(len(record_method_annotation_words)), record_method_annotation_words)) record_method_flat_dict = dict( zip(range(len(record_method_flat)), record_method_flat)) record_api_dict = dict(zip(range(len(record_api)), record_api)) tensor = np.zeros((len(record_method_annotation_words), len(record_method_flat), len(record_api)), dtype=int) for record in similar_records_list: for concrete_api in record.method_api_sequence: tensor[list(record_method_annotation_words_dict.keys( ))[list(record_method_annotation_words_dict.values()). index(record.title_words)], list(record_method_flat_dict.keys() )[list(record_method_flat_dict.values()). index(record.method_block_flat)], list(record_api_dict.keys( ))[list(record_api_dict.values()).index(concrete_api )]] = 1 for api in experiment_now_api: if api in record_api_dict.values(): tensor[list(record_method_annotation_words_dict.keys( ))[list(record_method_annotation_words_dict.values()). index(query_words)], :, list(record_api_dict.keys( ))[list(record_api_dict.values()).index(api)]] = 1 #处理不是张量分解 one = query_words if len(record_api) == 0: continue if (len(record_method_annotation_words) == 1 or len(record_method_flat) == 1 or len(record_api) == 1): if (len(record_method_annotation_words) == 1 and len(record_method_flat) == 1 or len(record_method_flat) == 1 and len(record_api) == 1 or len(record_api) == 1 and len(record_method_annotation_words) == 1): api_rec_all = record_api for m in set(experiment_now_api): if m in api_rec_all: api_rec_all.remove(m) elif (len(record_api) == 1): api_rec_all = record_api for m in set(experiment_now_api): if m in api_rec_all: api_rec_all.remove(m) else: if (len(record_method_annotation_words) == 1): matrix = tl.unfold(tensor, mode=1) nmf = nimfa.Nmf(matrix, max_iter=200, rank=round(min(matrix.shape) / 2), update='euclidean', objective='fro') nmf_fit = nmf() W = nmf_fit.basis() H = nmf_fit.coef() matrix = np.dot(W, H) two = list( similarity.get_topk_method_flat( experiment_now_method_flat, list(record_method_flat_dict.values()), 1, 1, -1, 1).values())[0] rec_combine_api_key = np.argsort( -matrix[list(record_method_flat_dict.keys() )[list(record_method_flat_dict.values( )).index(two)], :]).tolist()[0] api_rec_all = [ record_api_dict[i] for i in rec_combine_api_key ] for m in set(experiment_now_api): if m in api_rec_all: api_rec_all.remove(m) elif (len(record_method_flat) == 1): matrix = tl.unfold(tensor, mode=0) nmf = nimfa.Nmf(matrix, max_iter=200, rank=round(min(matrix.shape) / 2), update='euclidean', objective='fro') nmf_fit = nmf() W = nmf_fit.basis() H = nmf_fit.coef() matrix = np.dot(W, H) rec_combine_api_key = np.argsort(-matrix[ list(record_method_annotation_words_dict.keys( ))[list(record_method_annotation_words_dict.values( )).index(one)], :]).tolist()[0] api_rec_all = [ record_api_dict[i] for i in rec_combine_api_key ] for m in set(experiment_now_api): if m in api_rec_all: api_rec_all.remove(m) else: # 张量分解 tf.reset_default_graph() tensor = tl.tensor(tensor).astype(np.float32) data_provider = Provider() data_provider.full_tensor = lambda: tensor env = Environment(data_provider, summary_path='/tensor/ncp_ml') ncp = NCP_BCU(env) arg = NCP_BCU.NCP_Args(rank=round( min(len(record_method_annotation_words), len(record_method_flat), len(record_api)) / 2), validation_internal=1) ncp.build_model(arg) loss_hist = ncp.train(100) factor_matrices = ncp.factors full_tensor = tl.kruskal_to_tensor(factor_matrices) # one = query_words two = list( similarity.get_topk_method_flat( experiment_now_method_flat, list(record_method_flat_dict.values()), 1, 1, -1, 1).values())[0] rec_combine_api_key = np.argsort( -full_tensor[list(record_method_annotation_words_dict.keys( ))[list(record_method_annotation_words_dict.values()). index(one)], list(record_method_flat_dict.keys() )[list(record_method_flat_dict.values()). index(two)], :]).tolist() #推荐的API列表 api_rec_all = [record_api_dict[i] for i in rec_combine_api_key] for m in set(experiment_now_api): if m in api_rec_all: api_rec_all.remove(m) #判断结果在相似的问题中有没有出现 # print(experiment_true_api) # print('----------------------------------') experiment_true_api = [ true_api.split('.')[-2] for true_api in experiment_true_api ] experiment_true_api = removelist(experiment_true_api) experiment_now_api = [ true_api.split('.')[-2] for true_api in experiment_now_api ] experiment_now_api = removelist(experiment_now_api) #去除experiment_now_api experiment_true_api = set(experiment_true_api) - set( experiment_now_api) record_api = [true_api.split('.')[-2] for true_api in record_api] record_api = removelist(record_api) api_rec_all = [true_api.split('.')[-2] for true_api in api_rec_all] api_rec_all = removelist(api_rec_all) for m in set(experiment_now_api): if m in api_rec_all: api_rec_all.remove(m) api_rec = api_rec_all[:rec_num] pos = -1 tmp_map = 0.0 hits = 0.0 vector = list() for i, api in enumerate(api_rec_all[:rec_num]): if api in set(experiment_true_api) and pos == -1: pos = i + 1 if api in set(experiment_true_api): vector.append(1) hits += 1 tmp_map += hits / (i + 1) else: vector.append(0) tmp_map /= len(set(experiment_true_api)) tmp_mrr = 0.0 if pos != -1: tmp_mrr = 1.0 / pos map += tmp_map mrr += tmp_mrr ndcg += calculateNDCG.ndcg_at_k(vector[:rec_num], rec_num) ground_truth_intersection = set(api_rec).intersection( set(experiment_true_api)) if (len(ground_truth_intersection) > 0): recommend_success_num += 1 precision += len(ground_truth_intersection) / rec_num recall += len(ground_truth_intersection) / len( set(experiment_true_api)) writer.writerow([ experiment_method_annotation, api_rec, ground_truth_intersection, experiment_true_api ]) writer.writerow(["recommend_num", "recommend_success_num"]) writer.writerow([recommend_num, recommend_success_num]) writer.writerow([ "mrr/recommend_num", "recommend_num", "map/recommend_num", "success_rate@N", "precision@N/recommend_num", "recall@N/recommend_num", "ndcg/recommend_num" ]), writer.writerow([ mrr / recommend_num, recommend_num, map / recommend_num, recommend_success_num / recommend_num, precision / recommend_num, recall / recommend_num, ndcg / recommend_num ]) csvfile.close() end = time.clock() print('Running time: %s Seconds' % (end - start)) logging.info("Finish")
from tensorly.decomposition import parafac import tensorly as tl import matplotlib.pyplot as plt # ---------------------------------------------------- # Vorlesung 4, Folie 22 # ---------------------------------------------------- # Bild laden lena = plt.imread("../data/lena.png") # Tensorfaktorisierung w_, fac = parafac( lena, 256) # Je kleiner, desto schlechter das Bild (dafür kleinere Bildgrösse) print(w_) lena_rec = tl.kruskal_to_tensor((w_, fac)) plt.imshow(lena_rec, cmap="gray") plt.show()
fig = mpl.pyplot.gcf() fig.set_size_inches(18.5, 10.5) plt.savefig(f"{testname}{Size},{Rank},{trial}.svg") ax2.set_yscale("log") plt.savefig(f"{testname}loj{Size},{Rank},{trial}.svg") pickle.dump( ( A, B, C, NRE_A, weights, X, Rank, sketching_rates, lamb, eps, eta_cpd, A_init, max_time, b0, eta_ada, ), open(f"{testname}{Size},{Rank},{trial}.dat", "wb"), ) X_reconstruction = tl.kruskal_to_tensor((np.ones(Rank), [A, B, C])) saveTensorVideo(X_reconstruction, f"{trial}{testname}after.mp4")
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', normalize_factors=False, tol=1e-8, orthogonalise=False, random_state=None, verbose=0, return_errors=False, non_negative=False, mask=None): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [|weights; factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors non_negative : bool, optional Perform non_negative PARAFAC. See :func:`non_negative_parafac`. mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ Returns ------- KruskalTensor : (weight, factors) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=non_negative, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [ tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors) ] if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) if non_negative: accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.conj(tl.transpose(factor)), factor) if mask is not None: tensor = tensor * mask + tl.kruskal_to_tensor( (None, factors), mask=1 - mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) if non_negative: numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator else: factor = tl.transpose( tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where( tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor / (tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp * factor, axis=0) * weights) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm**2 - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor