def cp_als(X, rank, random_state=None, init='randn', **options): """Fits CP Decomposition using the Alternating Least Squares (ALS). Parameters ---------- X : (I_1, ..., I_N) array_like A real array with ``X.ndim >= 3``. rank : integer The `rank` sets the number of components to be computed. random_state : integer, ``RandomState``, or ``None``, optional (default ``None``) If integer, sets the seed of the random number generator; If RandomState instance, random_state is the random number generator; If None, use the RandomState instance used by ``numpy.random``. init : str, or KTensor, optional (default ``'randn'``). Specifies initial guess for KTensor factor matrices. If ``'randn'``, Gaussian random numbers are used to initialize. If ``'rand'``, uniform random numbers are used to initialize. If KTensor instance, a copy is made to initialize the optimization. options : dict, specifying fitting options. tol : float, optional (default ``tol=1E-5``) Stopping tolerance for reconstruction error. max_iter : integer, optional (default ``max_iter = 500``) Maximum number of iterations to perform before exiting. min_iter : integer, optional (default ``min_iter = 1``) Minimum number of iterations to perform before exiting. max_time : integer, optional (default ``max_time = np.inf``) Maximum computational time before exiting. verbose : bool ``{'True', 'False'}``, optional (default ``verbose=True``) Display progress. Returns ------- result : FitResult instance Object which holds the fitted results. It provides the factor matrices in form of a KTensor, ``result.factors``. Notes ----- This implemenation uses the Alternating Least Squares Method. References ---------- Kolda, T. G. & Bader, B. W. "Tensor Decompositions and Applications." SIAM Rev. 51 (2009): 455-500 http://epubs.siam.org/doi/pdf/10.1137/07070111X Comon, Pierre & Xavier Luciani & Andre De Almeida. "Tensor decompositions, alternating least squares and other tales." Journal of chemometrics 23 (2009): 393-405. http://onlinelibrary.wiley.com/doi/10.1002/cem.1236/abstract Examples -------- ``` import tensortools as tt I, J, K, R = 20, 20, 20, 4 X = tt.randn_tensor(I, J, K, rank=R) tt.cp_als(X, rank=R) ``` """ # Check inputs. optim_utils._check_cpd_inputs(X, rank) # Initialize problem. U, normX = optim_utils._get_initial_ktensor(init, X, rank, random_state) result = FitResult(U, 'CP_ALS', **options) # Main optimization loop. while result.still_optimizing: # Iterate over each tensor mode. for n in range(X.ndim): # i) Normalize factors to prevent singularities. U.rebalance() # ii) Compute the N-1 gram matrices. components = [U[j] for j in range(X.ndim) if j != n] grams = sci.multiply.reduce([sci.dot(u.T, u) for u in components]) # iii) Compute Khatri-Rao product. kr = khatri_rao(components) # iv) Form normal equations and solve via Cholesky # c = linalg.cho_factor(grams, overwrite_a=False) # p = unfold(X, n).dot(kr) # U[n] = linalg.cho_solve(c, p.T, overwrite_b=False).T U[n] = linalg.solve(grams, unfold(X, n).dot(kr).T).T # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Update the optimization result, checks for convergence. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Compute objective function # grams *= U[-1].T.dot(U[-1]) # obj = np.sqrt(np.sum(grams) - 2*sci.sum(p*U[-1]) + normX**2) / normX obj = linalg.norm(U.full() - X) / normX # Update result result.update(obj) # Finalize and return the optimization result. return result.finalize()
def fit_disengaged_sated(mouse, trace_type='zscore_day', method='mncp_hals', cs='', warp=False, word=None, group_by='all', nan_thresh=0.85, score_threshold=None, random_state=None, init='rand', rank=18, verbose=False): """ Use an existing TCA decomposition to fit trials from disengaged and sated days. Compare ratio of trials disengaged vs engaged using a ramp index. dis_index: log2(mean(disengaged trials)/mean(engaged trials)) """ # load full-size TCA results mouse = mouse.mouse load_kwargs = { 'mouse': mouse, 'method': method, 'cs': cs, 'warp': warp, 'word': word, 'group_by': group_by, 'nan_thresh': nan_thresh, 'score_threshold': score_threshold, 'rank': rank } ensemble, ids2, clus = load.groupday_tca_model(**load_kwargs) # get all days with disengaged or sated trials dis_dates = flow.DateSorter.frommeta(mice=[mouse], tags='disengaged', exclude_tags=['bad']) sated_dates = flow.DateSorter.frommeta(mice=[mouse], tags='sated', exclude_tags=['bad']) all_dates = [] day_type = [] for day in dis_dates: all_dates.append(day) day_type.append('disengaged') for day in sated_dates: all_dates.append(day) day_type.append('sated') # preallocate fits_vec = [] ratios_vec = [] comp_vec = [] day_vec = [] day_type_vec = [] for c, day in enumerate(all_dates): # load single day tensor with dis trials X, meta, ids = load.singleday_tensor(mouse, day.date) # only include matched cells, no empties good_ids = ids[np.isin(ids, ids2)] X_indexer = np.isin(ids, good_ids) X = X[X_indexer, :, :] # only keep indices that exist in the single day tensor A_indexer = np.isin(ids2, good_ids) A = ensemble.results[rank][0].factors[0][A_indexer, :] B = ensemble.results[rank][0].factors[1] C = ensemble.results[rank][0].factors[2] # make sure X is in the order of the sorted TCA results X_sorter = [ np.where(ids[X_indexer] == s)[0][0] for s in ids2[A_indexer] ] X = X[X_sorter, :, :] # create a mask for TCA # (not actually used here since there are no empties) mask = np.ones(np.shape(X)) == 1 # Check inputs. optim_utils._check_cpd_inputs(X, rank) # Initialize problem. U, _ = optim_utils._get_initial_ktensor(init, X, rank, random_state, scale_norm=False) result = FitResult(U, 'NCP_HALS', tol=0.000001, max_iter=500, verbose=True) # Store problem dimensions. normX = linalg.norm(X[mask].ravel()) # fit a single iteration of HALS for the trial dimension for i in range(1): # First, HALS update. Fit only trials (dim = 2) n = 2 # add in known dimensions to Ktensor U[0] = np.ascontiguousarray(A) U[1] = np.ascontiguousarray(B) # Select all components, but U_n components = [U[j] for j in range(X.ndim) if j != n] # i) compute the N-1 gram matrices grams = sci.multiply.reduce([arr.T.dot(arr) for arr in components]) # ii) Compute Khatri-Rao product kr = khatri_rao(components) p = unfold(X, n).dot(kr) # iii) Update component U_n _hals_update(U[n], grams, p) # Then, update masked elements. pred = U.full() X[~mask] = pred[~mask] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Update the optimization result, checks for convergence. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Compute objective function # grams *= U[X.ndim - 1].T.dot(U[X.ndim - 1]) # obj = np.sqrt( (sci.sum(grams) - 2 * sci.sum(U[X.ndim - 1] * p) # +normX**2)) / normX resid = X - pred result.update(linalg.norm(resid.ravel()) / normX) # calculate ramp index for each component test = deepcopy(U[2]) # dis_ri = [] # dis_rat = [] # test[test == 0] = np.nan if day_type[c] == 'disengaged': notdise = ~meta['tag'].isin(['disengaged']).values dise = meta['tag'].isin(['disengaged']).values elif day_type[c] == 'sated': notdise = ~meta['hunger'].isin(['sated']).values dise = meta['hunger'].isin(['sated']).values for i in range(rank): ri = np.log2( np.nanmean(test[dise, i]) / np.nanmean(test[notdise, i])) # ratio = np.nanmean(test[dise, i])/np.nanmean(test[:, i]) # dis_ri.append(ri) # dis_rat.append(ratio) fits_vec.append(ri) comp_vec.append(i + 1) day_vec.append(day.date) day_type_vec.append(day_type[c]) # save ramp indices for each day # fits_vec.append(dis_ri) # ratios_vec.append(dis_rat) # make dataframe of data # create your index out of relevant variables index = pd.MultiIndex.from_arrays([[mouse] * len(fits_vec)], names=['mouse']) data = { 'rank': [rank] * len(fits_vec), 'date': day_vec, 'component': comp_vec, 'day_type': day_type_vec, 'dis_index': fits_vec } dfdis = pd.DataFrame(data, index=index) return dfdis
def ncp_bcd(X, rank, random_state=None, init='rand', skip_modes=[], **options): """ Fits nonnegative CP Decomposition using the Block Coordinate Descent (BCD) Method. Parameters ---------- X : (I_1, ..., I_N) array_like A real array with nonnegative entries and ``X.ndim >= 3``. rank : integer The `rank` sets the number of components to be computed. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. init : str, or KTensor, optional (default ``'rand'``). Specifies initial guess for KTensor factor matrices. If ``'randn'``, Gaussian random numbers are used to initialize. If ``'rand'``, uniform random numbers are used to initialize. If KTensor instance, a copy is made to initialize the optimization. skip_modes : iterable, optional (default ``[]``). Specifies modes of the tensor that are not fit. This can be used to fix certain factor matrices that have been previously fit. options : dict, specifying fitting options. tol : float, optional (default ``tol=1E-5``) Stopping tolerance for reconstruction error. max_iter : integer, optional (default ``max_iter = 500``) Maximum number of iterations to perform before exiting. min_iter : integer, optional (default ``min_iter = 1``) Minimum number of iterations to perform before exiting. max_time : integer, optional (default ``max_time = np.inf``) Maximum computational time before exiting. verbose : bool ``{'True', 'False'}``, optional (default ``verbose=True``) Display progress. Returns ------- result : FitResult instance Object which holds the fitted results. It provides the factor matrices in form of a KTensor, ``result.factors``. Notes ----- This implemenation is using the Block Coordinate Descent Method. References ---------- Xu, Yangyang, and Wotao Yin. "A block coordinate descent method for regularized multiconvex optimization with applications to negative tensor factorization and completion." SIAM Journal on imaging sciences 6.3 (2013): 1758-1789. Examples -------- """ # Check inputs. optim_utils._check_cpd_inputs(X, rank) # Store norm of X for computing objective function. N = X.ndim # Initialize problem. U, normX = optim_utils._get_initial_ktensor(init, X, rank, random_state) result = FitResult(U, 'NCP_BCD', **options) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Block coordinate descent # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Um = U.copy() # Extrapolations of compoenents extraw = 1 # Used for extrapolation weight update weights_U = np.ones(N) # Extrapolation weights L = np.ones(N) # Lipschitz constants obj_bcd = 0.5 * normX**2 # Initial objective value # Main optimization loop. while result.still_optimizing: obj_bcd_old = obj_bcd # Old objective value U_old = U.copy() extraw_old = extraw for n in range(N): # Skip modes that are specified as fixed. if n in skip_modes: continue # Select all components, but U_n components = [U[j] for j in range(N) if j != n] # i) compute the N-1 gram matrices grams = sci.multiply.reduce([arr.T.dot(arr) for arr in components]) # Update gradient Lipschnitz constant L0 = L # Lipschitz constants L[n] = linalg.norm(grams, 2) # ii) Compute Khatri-Rao product kr = khatri_rao(components) p = unfold(X, n).dot(kr) # Compute Gradient. grad = Um[n].dot(grams) - p # Enforce nonnegativity (project onto nonnegative orthant). U[n] = sci.maximum(0.0, Um[n] - grad / L[n]) # Compute objective function and update optimization result. # grams *= U[X.ndim - 1].T.dot(U[X.ndim - 1]) # obj = np.sqrt(sci.sum(grams) - 2 * sci.sum(U[X.ndim - 1] * p) + normX**2) / normX obj = linalg.norm(X - U.full()) / normX result.update(obj) # Correction and extrapolation. grams *= U[N - 1].T.dot(U[N - 1]) obj_bcd = 0.5 * (sci.sum(grams) - 2 * sci.sum(U[N - 1] * p) + normX**2) extraw = (1 + sci.sqrt(1 + 4 * extraw_old**2)) / 2.0 if obj_bcd >= obj_bcd_old: # restore previous A to make the objective nonincreasing Um = sci.copy(U_old) else: # apply extrapolation w = (extraw_old - 1.0) / extraw # Extrapolation weight for n in range(N): weights_U[n] = min(w, 1.0 * sci.sqrt( L0[n] / L[n])) # choose smaller weights for convergence Um[n] = U[n] + weights_U[n] * (U[n] - U_old[n] ) # extrapolation # Finalize and return the optimization result. return result.finalize()
def ncp_hals(X, rank, random_state=None, init='rand', **options): """ Fits nonnegtaive CP Decomposition using the Hierarcial Alternating Least Squares (HALS) Method. Parameters ---------- X : (I_1, ..., I_N) array_like A real array with nonnegative entries and ``X.ndim >= 3``. rank : integer The `rank` sets the number of components to be computed. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. init : str, or KTensor, optional (default ``'rand'``). Specifies initial guess for KTensor factor matrices. If ``'randn'``, Gaussian random numbers are used to initialize. If ``'rand'``, uniform random numbers are used to initialize. If KTensor instance, a copy is made to initialize the optimization. options : dict, specifying fitting options. tol : float, optional (default ``tol=1E-5``) Stopping tolerance for reconstruction error. max_iter : integer, optional (default ``max_iter = 500``) Maximum number of iterations to perform before exiting. min_iter : integer, optional (default ``min_iter = 1``) Minimum number of iterations to perform before exiting. max_time : integer, optional (default ``max_time = np.inf``) Maximum computational time before exiting. verbose : bool ``{'True', 'False'}``, optional (default ``verbose=True``) Display progress. Returns ------- result : FitResult instance Object which holds the fitted results. It provides the factor matrices in form of a KTensor, ``result.factors``. Notes ----- This implemenation is using the Hierarcial Alternating Least Squares Method. References ---------- Cichocki, Andrzej, and P. H. A. N. Anh-Huy. "Fast local algorithms for large scale nonnegative matrix and tensor factorizations." IEICE transactions on fundamentals of electronics, communications and computer sciences 92.3: 708-721, 2009. Examples -------- """ # Check inputs. optim_utils._check_cpd_inputs(X, rank) # Initialize problem. U, normX = optim_utils._get_initial_ktensor(init, X, rank, random_state) result = FitResult(U, 'NCP_HALS', **options) # Store problem dimensions. normX = linalg.norm(X) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Iterate the HALS algorithm until convergence or maxiter is reached # i) compute the N gram matrices and multiply # ii) Compute Khatri-Rao product # iii) Update component U_1, U_2, ... U_N # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while result.still_optimizing: violation = 0.0 for n in range(X.ndim): # Select all components, but U_n components = [U[j] for j in range(X.ndim) if j != n] # i) compute the N-1 gram matrices grams = sci.multiply.reduce([arr.T.dot(arr) for arr in components]) # ii) Compute Khatri-Rao product kr = khatri_rao(components) p = unfold(X, n).dot(kr) # iii) Update component U_n violation += _hals_update(U[n], grams, p) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Update the optimization result, checks for convergence. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Compute objective function # grams *= U[X.ndim - 1].T.dot(U[X.ndim - 1]) # obj = np.sqrt( (sci.sum(grams) - 2 * sci.sum(U[X.ndim - 1] * p) + normX**2)) / normX result.update(linalg.norm(X - U.full()) / normX) # end optimization loop, return result. return result.finalize()
def model_forecast( X, exog_input, model, fit_dict={ 'method': '{}-Divergence'.format(u'\u03B2'), 'tol': 1e-5, 'min_iter': 1, 'max_iter': 500, 'verbose': True }): """ Use a trained NN-LDS model to forecast future states and observations. Parameters ---------- X : np.ndarray, tensor_like with shape: [I_1, I_2, ..., I_N] Tensor containing dimensionality of the system output. Each Tensor fiber, I, is considered a mode of the system. Example modes are channels, time, trials, spectral frequency, etc. model : FitModel object Model that was created using the init_model function. The `model` must explicitly contain an LDS component. exog_input: np.ndarray, shape: [t, p] If LDS_dict is used, then exog_input specifies the p-dimensional input signal, or control input, over time t. Must match the length of the observed axis. forecast_steps: int Number of samples ahead to forecast using each time sample in X as a starting-point. fit_dict: dict, specifying fitting options. tol: float, Stopping tolerance for reconstruction error. max_iter: int, Max number of iterations to perform before exiting. min_iter: int, Min number of iterations to perform before exiting. verbose : bool, Display progress. Returns ------- Xp : list[np.ndarray], listtensor_like with shape: [I_1, I_2, ..., I_N] Skeletal Tensor containing dimensionality of the system output. Each Tensor fiber, I, is considered a mode of the system. Example modes are channels, time, trials, spectral frequency, etc. """ # Check model if 'NTF' not in model.model_param: raise Exception('Model does not have a observation component.') if 'LDS' not in model.model_param: raise Exception('Model does not have a dynamical system component.') # Check input matrix optim_utils._check_cpd_inputs(X, model.model_param['rank']) forecast_steps = (exog_input.shape[0] - X.shape[model.model_param['LDS']['axis']]) if forecast_steps < 0: raise Exception('Length of exogeneous input must be geq than ' + 'length of data tensor in order to filter/forecast.') if exog_input.shape[1] != model.model_param['LDS']['AB'].B.shape[-1]: raise Exception('Shape of input signal does not match shape of ' + 'control-input matrix.') # Update model fit parameters model.set_fit_param(**fit_dict) # Reset the status of the model model.reset_status() # Set pointers to commonly used objects mp = model.model_param dAB = mp['LDS']['AB'] ax_t = mp['LDS']['axis'] Xn = unfold(X, ax_t) # Initialize temporal state coefficients assert model.model_param['NTF']['init'] in ['rand', 'randn'] if model.model_param['NTF']['init'] == 'randn': H = np.random.randn(X.shape[ax_t], model.model_param['rank']) else: H = np.random.rand(X.shape[ax_t], model.model_param['rank']) # Create a new model tensor with the temporal state mode replaced W = KTensor([mp['NTF']['W'][j] if j != ax_t else H for j in range(X.ndim)]) mp['NTF']['W'] = W # Use observation model to estimate the current temporal state mode # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Iterate algorithm until convergence or maxiter is reached # i) compute the N gram matrices and multiply # ii) Compute Khatri-Rao product # iii) Update component U_1, U_2, ... U_N # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if forecast_steps > 0: Ufilter = exog_input[:-forecast_steps] while model.still_optimizing: # Select all components, but U_n # i) Compute Khatri-Rao product kr = khatri_rao([W[j] for j in range(X.ndim) if j != ax_t]) # ii) Compute unfolded prediction of X p = W[ax_t].dot(kr.T) # iii) Compute gradient for the observation model neg, pos = calc_div_grad(Xn, p, kr, mp['NTF']['beta']) # iv) Compute gradient for the dynamical model mp['LDS']['AB'].as_ord_1() WL = mp['LDS']['AB'].conv_state_to_lagged(W[ax_t].T) UL = mp['LDS']['AB'].conv_exog_to_lagged(Ufilter.T) lag_diff = mp['LDS']['AB'].lag_state - mp['LDS']['AB'].lag_exog if lag_diff > 0: UL = UL[:, int(np.abs(lag_diff)):] elif lag_diff < 0: WL = WL[:, int(np.abs(lag_diff)):] neg1, pos1 = calc_time_grad(mp['LDS']['AB'].A, WL, mp['LDS']['AB'].B, UL, mp['LDS']['beta']) neg1 = mp['LDS']['AB'].conv_state_to_unlagged(neg1) pos1 = mp['LDS']['AB'].conv_state_to_unlagged(pos1) neg += neg1.T pos += pos1.T mp['LDS']['AB'].as_ord_p() # vi) Update the observational component weights W[ax_t] *= (neg / pos)**mm_gamma_func(mp['NTF']['beta']) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Update the optimization model, checks for convergence. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Compute objective function # Cost of the observation model cost_obs = calc_cost(X, W.full(), mp['NTF']['beta']) # Update the model model.update(cost_obs) # end optimization loop. # Current temporal state mode is inferred, coefficients have been updated model.finalize() # Use LDS and current temporal state mode to forecast future state mode dAB.as_ord_p() Wn = list(W[ax_t]) Un = list(exog_input) for p in range(forecast_steps): W_ix = range(len(Wn) - 1, len(Wn) - 1 - dAB.lag_state, -1) U_ix = range(len(Wn) - 1, len(Wn) - 1 - dAB.lag_exog, -1) AX = np.array([ dAB.A[ii, :, :].dot(Wn[ij].reshape(-1, 1)) for ii, ij in enumerate(W_ix) ])[:, :, 0].sum(axis=0) BU = np.array([ dAB.B[ii, :, :].dot(Un[ij].reshape(-1, 1)) for ii, ij in enumerate(U_ix) ])[:, :, 0].sum(axis=0) Wn.append(AX + BU) Wn = np.array(Wn) Wn = Wn[-forecast_steps:, :] # Re-mix forecasted state mode coefs through NTF XP = KTensor([W[j] if j != ax_t else Wn for j in range(X.ndim)]) return XP
def model_update( X, model, mask=None, exog_input=None, fixed_axes=[], fit_dict={ 'method': '{}-Divergence'.format(u'\u03B2'), 'tol': 1e-5, 'min_iter': 1, 'max_iter': 500, 'LDS_iter': 0, 'verbose': True }): """ Update the model parameters by optimizing Beta-Divergence Cost Functions using Multiplicative Updates (MU) method. Parameters ---------- X : np.ndarray, tensor_like with shape: [I_1, I_2, ..., I_N] Tensor containing dimensionality of the system output. Each Tensor fiber, I, is considered a mode of the system. Example modes are channels, time, trials, spectral frequency, etc. model : FitModel object Model that was created using the init_model function. exog_input: np.ndarray, shape: [t, p] If LDS_dict is used, then exogeneous input specifies the p-dimensional input signal, or control input, over time t. fixed_axes: None, int, or list[int] Modes of the model to keep constant during the update. Typically used to test the model on new data by keeping "basis modes" fixed and updating "activation" coefficients. If list[int], fix modes corresponding to axes in X for each int. An empty list implies that all modes get updated. fit_dict: dict, specifying fitting options. tol: float, Stopping tolerance for reconstruction error. max_iter: int, Max number of iterations to perform before exiting. min_iter: int, Min number of iterations to perform before exiting. verbose : bool, Display progress. Returns ------- model : FitModel instance Object which holds the fitted model. It provides the factor matrices in form of a KTensor, ``model.factors``. References ---------- Févotte, Cédric, and Jérôme Idier. "Algorithms for nonnegative matrix factorization with the β-divergence." Neural computation 23.9 (2011): 2421-2456. """ # Check input matrix optim_utils._check_cpd_inputs(X, model.model_param['rank']) if X.shape != model.model_param['NTF']['W'].shape: raise Exception('Shape of input X does not match shape expected by ' + 'initialized model.') if mask is not None: if mask.shape != X.shape: raise Exception( 'Size of mask array does not match size of data tensor.') else: mask = np.ones_like(X) if exog_input is not None: if exog_input.shape[0] != X.shape[model.model_param['LDS']['axis']]: raise Exception( 'Length of exogeneous input does not match length of ' + 'data tensor.') if exog_input.shape[1] != model.model_param['LDS']['AB'].B.shape[-1]: raise Exception('Shape of input signal does not match shape of ' + 'control-input matrix.') # Check fixed axes if type(fixed_axes) is not list: raise Exception('Fixed axes must be list of axis indices') if not all([ True if (int(a) == a) and (a >= 0) and (a < X.ndim) else False for a in fixed_axes ]): raise Exception('Fixed axes not integers or exceed dimensions of X.') fixed_axes = [int(a) for a in fixed_axes] # Update model fit parameters model.set_fit_param(**fit_dict) # Reset the status of the model model.reset_status() # Set pointers to commonly used objects mp = model.model_param W = mp['NTF']['W'] X_unfold = [unfold(X, n) for n in range(X.ndim)] M_unfold = [unfold(mask, n) for n in range(mask.ndim)] # Set flags for conditional operations flag_lds = True if mp['LDS'] is not None else False flag_reg = True if mp['REG'] is not None else False # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Iterate algorithm until convergence or maxiter is reached # i) compute the N gram matrices and multiply # ii) Compute Khatri-Rao product # iii) Update component U_1, U_2, ... U_N # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while model.still_optimizing: for n in range(X.ndim): # If n corresponds to one of the fixed axes then don't update if n in fixed_axes: continue # Select all components, but U_n # i) Compute Khatri-Rao product kr = khatri_rao([W[j] for j in range(W.ndim) if j != n]) # ii) Compute unfolded prediction of X p = W[n].dot(kr.T) # iii) Compute gradient for the observation model Xn = X_unfold[n] Mn = M_unfold[n] neg, pos = calc_div_grad(Xn * Mn, p * Mn, kr, mp['NTF']['beta']) # iv) Add a regularizer if (flag_reg): if n == mp['REG']['axis']: pos += (mp['REG']['alpha'] * (2 * (1 - mp['REG']['l1_ratio']) * W[n] + mp['REG']['l1_ratio'])) # v) Compute gradient for the dynamical model if (flag_lds): if n == mp['LDS']['axis']: mp['LDS']['AB'].as_ord_1() Mlag = Mn.all(axis=-1).reshape(-1, 1) # Update H WL = mp['LDS']['AB'].conv_state_to_lagged(W[n].T) UL = mp['LDS']['AB'].conv_exog_to_lagged(exog_input.T) MWL = mp['LDS']['AB'].conv_state_to_lagged( np.repeat(Mlag, mp['LDS']['AB'].rank_state, axis=1).T) MUL = mp['LDS']['AB'].conv_exog_to_lagged( np.repeat(Mlag, mp['LDS']['AB'].rank_exog, axis=1).T) lag_diff = mp['LDS']['AB'].lag_state - mp['LDS'][ 'AB'].lag_exog if lag_diff > 0: UL = UL[:, int(np.abs(lag_diff)):] MUL = MUL[:, int(np.abs(lag_diff)):] elif lag_diff < 0: WL = WL[:, int(np.abs(lag_diff)):] MWL = MWL[:, int(np.abs(lag_diff)):] neg1, pos1 = calc_time_grad(mp['LDS']['AB'].A, WL * MWL, mp['LDS']['AB'].B, UL * MUL, mp['LDS']['beta']) neg1 = mp['LDS']['AB'].conv_state_to_unlagged(neg1) pos1 = mp['LDS']['AB'].conv_state_to_unlagged(pos1) neg += neg1.T pos += pos1.T mp['LDS']['AB'].as_ord_p() # vi) Update the observational component weights W[n] *= (neg / pos)**mm_gamma_func(mp['NTF']['beta']) W[n][~np.isfinite(W[n])] = 0 # vii) Update the dynamical state weights if (flag_lds): if ((n == mp['LDS']['axis']) & (model.status['iterations'] >= model.fit_param['LDS_iter'])): mp['LDS']['AB'].as_ord_1() Mlag = Mn.all(axis=-1).reshape(-1, 1) # Update A/B WL = mp['LDS']['AB'].conv_state_to_lagged(W[n].T) UL = mp['LDS']['AB'].conv_exog_to_lagged(exog_input.T) MWL = mp['LDS']['AB'].conv_state_to_lagged( np.repeat(Mlag, mp['LDS']['AB'].rank_state, axis=1).T) MUL = mp['LDS']['AB'].conv_exog_to_lagged( np.repeat(Mlag, mp['LDS']['AB'].rank_exog, axis=1).T) lag_diff = mp['LDS']['AB'].lag_state - mp['LDS'][ 'AB'].lag_exog if lag_diff > 0: UL = UL[:, int(np.abs(lag_diff)):] MUL = MUL[:, int(np.abs(lag_diff)):] elif lag_diff < 0: WL = WL[:, int(np.abs(lag_diff)):] MWL = MWL[:, int(np.abs(lag_diff)):] AX = mp['LDS']['AB'].A.dot(WL[:, :-1] * MWL[:, :-1]) BU = mp['LDS']['AB'].B.dot(UL[:, :-1] * MUL[:, :-1]) # Update A neg, pos = calc_div_grad(WL[:, 1:] * MWL[:, 1:], AX + BU, (WL[:, :-1] * MWL[:, :-1]).T, mp['LDS']['beta']) mp['LDS']['AB'].A *= \ (neg / pos)**mm_gamma_func(mp['LDS']['beta']) mp['LDS']['AB'].A[~np.isfinite(mp['LDS']['AB'].A)] = 0 # Update B neg, pos = calc_div_grad(WL[:, 1:] * MWL[:, 1:], AX + BU, (UL[:, :-1] * MUL[:, :-1]).T, mp['LDS']['beta']) mp['LDS']['AB'].B *= \ (neg / pos)**mm_gamma_func(mp['LDS']['beta']) mp['LDS']['AB'].B[~np.isfinite(mp['LDS']['AB'].B)] = 0 mp['LDS']['AB'].as_ord_p() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Update the optimization model, checks for convergence. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Compute objective function # Cost of the observation model cost_obs = calc_cost(X[mask], W.full()[mask], mp['NTF']['beta']) # Update the model model.update(cost_obs) # end optimization loop, return model. return model.finalize()
def ncp_hals( X, rank, mask=None, random_state=None, init='rand', skip_modes=[], negative_modes=[], **options): """ Fits nonnegtaive CP Decomposition using the Hierarcial Alternating Least Squares (HALS) Method. Parameters ---------- X : (I_1, ..., I_N) array_like A real array with nonnegative entries and ``X.ndim >= 3``. rank : integer The `rank` sets the number of components to be computed. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. init : str, or KTensor, optional (default ``'rand'``). Specifies initial guess for KTensor factor matrices. If ``'randn'``, Gaussian random numbers are used to initialize. If ``'rand'``, uniform random numbers are used to initialize. If KTensor instance, a copy is made to initialize the optimization. skip_modes : iterable, optional (default ``[]``). Specifies modes of the tensor that are not fit. This can be used to fix certain factor matrices that have been previously fit. negative_modes : iterable, optional (default ``[]``). Specifies modes of the tensor whose factors are not constrained to be nonnegative. options : dict, specifying fitting options. tol : float, optional (default ``tol=1E-5``) Stopping tolerance for reconstruction error. max_iter : integer, optional (default ``max_iter = 500``) Maximum number of iterations to perform before exiting. min_iter : integer, optional (default ``min_iter = 1``) Minimum number of iterations to perform before exiting. max_time : integer, optional (default ``max_time = np.inf``) Maximum computational time before exiting. verbose : bool ``{'True', 'False'}``, optional (default ``verbose=True``) Display progress. Returns ------- result : FitResult instance Object which holds the fitted results. It provides the factor matrices in form of a KTensor, ``result.factors``. Notes ----- This implemenation is using the Hierarcial Alternating Least Squares Method. References ---------- Cichocki, Andrzej, and P. H. A. N. Anh-Huy. "Fast local algorithms for large scale nonnegative matrix and tensor factorizations." IEICE transactions on fundamentals of electronics, communications and computer sciences 92.3: 708-721, 2009. Examples -------- """ # Mask missing elements. if mask is not None: X = np.copy(X) X[~mask] = np.mean(X[mask]) # Check inputs. optim_utils._check_cpd_inputs(X, rank) # Initialize problem. U, normX = optim_utils._get_initial_ktensor(init, X, rank, random_state) result = FitResult(U, 'NCP_HALS', **options) # Store problem dimensions. normX = np.linalg.norm(X) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Iterate the HALS algorithm until convergence or maxiter is reached # i) compute the N gram matrices and multiply # ii) Compute Khatri-Rao product # iii) Update component U_1, U_2, ... U_N # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while result.still_optimizing: for n in range(X.ndim): # Skip modes that are specified as fixed. if n in skip_modes: continue # Select all components, but U_n components = [U[j] for j in range(X.ndim) if j != n] # i) compute the N-1 gram matrices grams = sci.multiply.reduce([arr.T @ arr for arr in components]) # ii) Compute Khatri-Rao product kr = khatri_rao(components) Xmkr = unfold(X, n).dot(kr) # iii) Update component U_n _hals_update(U[n], grams, Xmkr, n not in negative_modes) # iv) Update masked elements. if mask is not None: pred = U.full() X[~mask] = pred[~mask] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Update the optimization result, checks for convergence. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if mask is None: grams *= U[-1].T @ U[-1] residsq = np.sum(grams) - 2 * np.sum(U[-1] * Xmkr) + (normX ** 2) result.update(np.sqrt(residsq) / normX) else: result.update(np.linalg.norm(X - pred) / normX) # end optimization loop, return result. return result.finalize()
def mcp_als(X, rank, mask, random_state=None, init='randn', **options): """Fits CP Decomposition with missing data using Alternating Least Squares (ALS). Parameters ---------- X : (I_1, ..., I_N) array_like A tensor with ``X.ndim >= 3``. rank : integer The `rank` sets the number of components to be computed. mask : (I_1, ..., I_N) array_like A binary tensor with the same shape as ``X``. All entries equal to zero correspond to held out or missing data in ``X``. All entries equal to one correspond to observed entries in ``X`` and the decomposition is fit to these datapoints. random_state : integer, ``RandomState``, or ``None``, optional (default ``None``) If integer, sets the seed of the random number generator; If RandomState instance, random_state is the random number generator; If None, use the RandomState instance used by ``numpy.random``. init : str, or KTensor, optional (default ``'randn'``). Specifies initial guess for KTensor factor matrices. If ``'randn'``, Gaussian random numbers are used to initialize. If ``'rand'``, uniform random numbers are used to initialize. If KTensor instance, a copy is made to initialize the optimization. options : dict, specifying fitting options. tol : float, optional (default ``tol=1E-5``) Stopping tolerance for reconstruction error. max_iter : integer, optional (default ``max_iter = 500``) Maximum number of iterations to perform before exiting. min_iter : integer, optional (default ``min_iter = 1``) Minimum number of iterations to perform before exiting. max_time : integer, optional (default ``max_time = np.inf``) Maximum computational time before exiting. verbose : bool ``{'True', 'False'}``, optional (default ``verbose=True``) Display progress. Returns ------- result : FitResult instance Object which holds the fitted results. It provides the factor matrices in form of a KTensor, ``result.factors``. Notes ----- Fitting CP decompositions with missing data can be exploited to perform cross-validation. References ---------- Williams, A. H. "Solving Least-Squares Regression with Missing Data." http://alexhwilliams.info/itsneuronalblog/2018/02/26/censored-lstsq/ """ # Check inputs. optim_utils._check_cpd_inputs(X, rank) # Initialize problem. U, _ = optim_utils._get_initial_ktensor(init, X, rank, random_state, scale_norm=False) result = FitResult(U, 'MCP_ALS', **options) normX = np.linalg.norm((X * mask)) # Main optimization loop. while result.still_optimizing: # Iterate over each tensor mode. for n in range(X.ndim): # i) Normalize factors to prevent singularities. U.rebalance() # ii) Unfold data and mask along the nth mode. unf = unfold(X, n) # i_n x N m = unfold(mask, n) # i_n x N # iii) Form Khatri-Rao product of factors matrices. components = [U[j] for j in range(X.ndim) if j != n] krt = khatri_rao(components).T # N x r # iv) Broadcasted solve of linear systems. # Left hand side of equations, R x R x X.shape[n] # Right hand side of equations, X.shape[n] x R x 1 lhs_stack = np.matmul(m[:, None, :] * krt[None, :, :], krt.T[None, :, :]) rhs_stack = np.dot(unf * m, krt.T)[:, :, None] # vi) Update factor. U[n] = np.linalg.solve(lhs_stack, rhs_stack).reshape(X.shape[n], rank) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Update the optimization result, checks for convergence. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Compute objective function # grams *= U[-1].T.dot(U[-1]) # obj = np.sqrt(np.sum(grams) - 2*sci.sum(p*U[-1]) + normX**2) / normX obj = linalg.norm(mask * (U.full() - X)) / normX # Update result result.update(obj) # Finalize and return the optimization result. return result.finalize()