def GLRMfit(A, k, missing=None): loss = QuadraticLoss regX, regY = LinearReg(0.001), LinearReg(0.001) model = GLRM(A, loss, regX, regY, k, missing) model.fit(eps=1e-4, max_iters=1000) model.converge.plot() return model.factors()
# Generate problem data (draw smiley with -1's, 1's) m, n, k = 500, 500, 8 data = -ones((m, n)) for i,j in product(list(range(120, 190)), list(range(120, 190))): d = (155-i)**2 + (155-j)**2 if d <= 35**2: data[i,j] = 1 data[i, m-j] = 1 for i,j in product(list(range(300, 451)), list(range(100, 251))): d = (250 - i)**2 + (250-j)**2 if d <= 200**2 and d >= 150**2: data[i,j] = 1 data[i,m-j] = 1 # Initialize model A = data loss = HingeLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) converge = Convergence(TOL = 1e-2) glrm_binary = GLRM(A, loss, regX, regY, k, converge = converge) # Fit glrm_binary.fit() # Results X, Y = glrm_binary.factors() A_hat = glrm_binary.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_binary.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
from numpy.random import randn, choice, seed from numpy.random import choice from itertools import product from numpy import sign # Generate problem data m, n, k = 20, 20, 5 eta = 0.1 # noise power X_true, Y_true = abs(randn(m, k)), abs(randn(k, n)) data = X_true.dot(Y_true) + eta * randn(m, n) # noisy rank k # Initialize model A = data loss = QuadraticLoss regX, regY = NonnegativeReg(0.1), NonnegativeReg(0.1) glrm_nn = GLRM(A, loss, regX, regY, k) # Fit glrm_nn.fit() # Results X, Y = glrm_nn.factors() A_hat = glrm_nn.predict( ) # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_nn.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"]) # Now with missing data missing = list( product(range(int(0.25 * m), int(0.75 * m)), range(int(0.25 * n), int(0.75 * n))))
from glrm import GLRM from glrm.util import pplot from numpy.random import randn, choice, seed from numpy import sign, exp seed(2) # Generate problem data m, n, k = 50, 50, 5 eta = 0.1 # noise power data = exp(randn(m,k).dot(randn(k,n)) + eta*randn(m,n))+eta*randn(m,n) # noisy rank k # Initialize model A = data loss = FractionalLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) glrm_frac = GLRM(A, loss, regX, regY, k) # Fit glrm_frac.fit() # Results X, Y = glrm_frac.factors() A_hat = glrm_frac.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_frac.convergence() # convergence history pplot([A, A_hat, A-A_hat], ["original", "glrm", "error"]) # Now with missing data # from numpy.random import choice # from itertools import product # missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n)))) #
n = n1 + n2 + n3 data = randn(m, k).dot(randn(k, n)) data_real = data[:, :n1] # numerical data data_ord = data[:, n1:n1 + n2] data_ord = data_ord - data_ord.min() data_ord = (data_ord / data_ord.max() * 6 + 1).round() # ordinal data, e.g., Likert scale data_bool = sign(data[:, n1 + n2:]) # Initialize model A = [data_real, data_ord, data_bool] loss = [QuadraticLoss, OrdinalLoss, HingeLoss] regX, regY = QuadraticReg(0.01), QuadraticReg(0.01) converge = Convergence(TOL=1e-2, max_iters=1000) # optional (default TOL = 1e-3) glrm_mix = GLRM(A, loss, regX, regY, k, converge=converge) # Fit glrm_mix.fit() # Results X, Y = glrm_mix.factors() A_hat = glrm_mix.predict( ) # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_mix.convergence() # convergence history pplot([hstack(A), A_hat, hstack(A) - A_hat], ["original", "glrm", "error"]) # Now with missing data missing = [ list(product(range(35, 50), range(n1 - 5, n1))), list(product(range(35, 50), range(0, n2))),
from numpy.random import randn, choice, seed from numpy.random import choice from itertools import product from numpy import sign seed(1) # Generate problem data m, n, k = 50, 50, 10 eta = 0.1 # noise power data = randn(m,k).dot(randn(k,n)) + eta*randn(m,n) # noisy rank k # Initialize model A = data loss = QuadraticLoss regX, regY = QuadraticReg(0.0001), QuadraticReg(0.0001) glrm_nn = GLRM(A, loss, regX, regY, k) # Fit glrm_nn.fit(eps=1e-4, max_iters=1000) # Results X, Y = glrm_nn.factors() A_hat = glrm_nn.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_nn.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"]) # # Now with missing data # missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n)))) # glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing) # glrm_nn_missing.fit() # A_hat = glrm_nn_missing.predict()
from glrm.convergence import Convergence from glrm.util import pplot from numpy.random import randn, choice, seed from numpy import sign from itertools import product from math import ceil seed(1) # Generate problem data m, n, k = 100, 100, 10 data = randn(m, k).dot(randn(k, n)) data = data - data.min() data = (data / data.max() * 6).round() + 1 # approx rank k #data = choice(range(7), (m,n)) + 1 # not inherently rank k # Initialize model A = data loss = OrdinalLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) glrm_ord = GLRM(A, loss, regX, regY, k) # Fit glrm_ord.fit(eps=1e-3, max_iters=1000) # Results X, Y = glrm_ord.factors() A_hat = glrm_ord.predict( ) # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_ord.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
from numpy.random import choice from itertools import product from numpy import sign # Generate problem data m, n, k = 100, 100, 10 eta = 0.1 # noise power X_true, Y_true = randn(m, k), randn(k, n) data = sign(X_true.dot(Y_true) + eta * randn(m, n)) # noisy rank k # Initialize model A = data loss = HingeLoss regX, regY = QuadraticReg(0.01), QuadraticReg(0.01) c = Convergence(TOL=1e-2) model = GLRM(A, loss, regX, regY, k, converge=c) # Fit model.fit(eps=1e-4, max_iters=1000) # want more precision for hinge loss problem # Results X, Y = model.factors() A_hat = model.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = model.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"]) # # # Now with missing data # missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n)))) # glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing) # glrm_nn_missing.fit()
seed(1) # Generate problem data m, n, k = 50, 50, 5 sym_noise = 0.2*sqrt(k)*randn(m,n) asym_noise = sqrt(k)*randn(m,n) + 3*abs(sqrt(k)*randn(m,n)) # large, sparse noise rate = 0.3 # percent of entries that are corrupted by large, outlier noise corrupted_entries = sample(list(product(list(range(m)), list(range(n)))), int(m*n*rate)) data = randn(m,k).dot(randn(k,n)) A = data + sym_noise for ij in corrupted_entries: A[ij] += asym_noise[ij] # Initialize model loss = HuberLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) glrm_huber = GLRM(A, loss, regX, regY, k) # Fit glrm_huber.fit() # Results X, Y = glrm_huber.factors() A_hat = glrm_huber.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_huber.convergence() # convergence history pplot([data, A, A_hat, data-A_hat], ["original", "corrupted", "glrm", "error"]) # Now with missing data from numpy.random import choice missing = list(product(list(range(int(0.25*m), int(0.75*m))), list(range(int(0.25*n), int(0.75*n)))))