def glrm_impute(dct_data, dct_param): df_raw_data = dct_data['df_raw_data'] # df_raw_data, df_col_mu_std = scale(df_raw_data, dct_data['lst_num_cols']) k = df_raw_data.shape[0] lst_char_cols = dct_data['lst_char_cols'] lst_year_cols = dct_data['lst_year_cols'] np_char = df_raw_data[[ col for col in lst_char_cols if col in df_raw_data.columns ]].values np_num = df_raw_data[[ col for col in df_raw_data.columns if col in dct_data['lst_year_cols'] + dct_data['lst_num_cols'] ]].values lst_missing_num = np.argwhere(np.isnan(np_num)).tolist() lst_missing_char = np.argwhere(pd.isnull(np_char)).tolist() dat_list = [np_num, np_char] loss_list = [QuadraticLoss, HingeLoss] regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) missing_list = [lst_missing_num, lst_missing_char] c = Convergence(TOL=1e-3, max_iters=10) model = GLRM(dat_list, loss_list, regX, regY, k, missing_list, converge=c) model.fit() X, Y = model.factors() A_hat = model.predict() # a horizontally concatenated matrix, not a list x = 0
# Generate problem data (draw smiley with -1's, 1's) m, n, k = 500, 500, 8 data = -ones((m, n)) for i,j in product(list(range(120, 190)), list(range(120, 190))): d = (155-i)**2 + (155-j)**2 if d <= 35**2: data[i,j] = 1 data[i, m-j] = 1 for i,j in product(list(range(300, 451)), list(range(100, 251))): d = (250 - i)**2 + (250-j)**2 if d <= 200**2 and d >= 150**2: data[i,j] = 1 data[i,m-j] = 1 # Initialize model A = data loss = HingeLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) converge = Convergence(TOL = 1e-2) glrm_binary = GLRM(A, loss, regX, regY, k, converge = converge) # Fit glrm_binary.fit() # Results X, Y = glrm_binary.factors() A_hat = glrm_binary.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_binary.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
from glrm.util import pplot from numpy.random import randn, choice, seed from numpy.random import choice from itertools import product from numpy import sign seed(1) # Generate problem data m, n, k = 50, 50, 10 eta = 0.1 # noise power data = randn(m,k).dot(randn(k,n)) + eta*randn(m,n) # noisy rank k # Initialize model A = data loss = QuadraticLoss regX, regY = QuadraticReg(0.0001), QuadraticReg(0.0001) glrm_nn = GLRM(A, loss, regX, regY, k) # Fit glrm_nn.fit(eps=1e-4, max_iters=1000) # Results X, Y = glrm_nn.factors() A_hat = glrm_nn.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_nn.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"]) # # Now with missing data # missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n)))) # glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing) # glrm_nn_missing.fit()