# Generate problem data (draw smiley with -1's, 1's) m, n, k = 500, 500, 8 data = -ones((m, n)) for i,j in product(list(range(120, 190)), list(range(120, 190))): d = (155-i)**2 + (155-j)**2 if d <= 35**2: data[i,j] = 1 data[i, m-j] = 1 for i,j in product(list(range(300, 451)), list(range(100, 251))): d = (250 - i)**2 + (250-j)**2 if d <= 200**2 and d >= 150**2: data[i,j] = 1 data[i,m-j] = 1 # Initialize model A = data loss = HingeLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) converge = Convergence(TOL = 1e-2) glrm_binary = GLRM(A, loss, regX, regY, k, converge = converge) # Fit glrm_binary.fit() # Results X, Y = glrm_binary.factors() A_hat = glrm_binary.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_binary.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
from glrm import GLRM from glrm.convergence import Convergence from glrm.util import pplot from numpy.random import randn, choice, seed from numpy import sign from itertools import product from math import ceil seed(1) # Generate problem data m, n, k = 100, 100, 10 data = randn(m,k).dot(randn(k,n)) data = data - data.min() data = (data/data.max()*6).round() + 1 # approx rank k #data = choice(range(7), (m,n)) + 1 # not inherently rank k # Initialize model A = data loss = OrdinalLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) glrm_ord = GLRM(A, loss, regX, regY, k) # Fit glrm_ord.fit(eps=1e-3, max_iters=1000) # Results X, Y = glrm_ord.factors() A_hat = glrm_ord.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_ord.convergence() # convergence history pplot([A, A_hat, A-A_hat], ["original", "glrm", "error"])
# Generate problem data m, n, k = 20, 20, 5 eta = 0.1 # noise power X_true, Y_true = abs(randn(m,k)), abs(randn(k,n)) data = X_true.dot(Y_true) + eta*randn(m,n) # noisy rank k # Initialize model A = data loss = QuadraticLoss regX, regY = NonnegativeReg(0.1), NonnegativeReg(0.1) glrm_nn = GLRM(A, loss, regX, regY, k) # Fit glrm_nn.fit() # Results X, Y = glrm_nn.factors() A_hat = glrm_nn.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_nn.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"]) # Now with missing data missing = list(product(list(range(int(0.25*m), int(0.75*m))), list(range(int(0.25*n), int(0.75*n))))) glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing) glrm_nn_missing.fit() A_hat = glrm_nn_missing.predict() pplot([A, missing, A_hat, A - A_hat], \ ["original", "missing", "glrm", "error"])
# Generate problem data m, n, k = 100, 100, 10 eta = 0.1 # noise power X_true, Y_true = randn(m,k), randn(k,n) data = sign(X_true.dot(Y_true) + eta*randn(m,n)) # noisy rank k # Initialize model A = data loss = HingeLoss regX, regY = QuadraticReg(0.01), QuadraticReg(0.01) c = Convergence(TOL=1e-2) model = GLRM(A, loss, regX, regY, k, converge=c) # Fit model.fit(eps=1e-4, max_iters = 1000) # want more precision for hinge loss problem # Results X, Y = model.factors() A_hat = model.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = model.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"]) # # # Now with missing data # missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n)))) # glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing) # glrm_nn_missing.fit() # A_hat = glrm_nn_missing.predict() # pplot([A, missing, A_hat, A - A_hat], \ # ["original", "missing", "glrm", "error"]) #
# Initialize model A = [data_real, data_ord, data_bool] loss = [QuadraticLoss, OrdinalLoss, HingeLoss] regX, regY = QuadraticReg(0.01), QuadraticReg(0.01) converge = Convergence(TOL=1e-2, max_iters=1000) # optional (default TOL = 1e-3) glrm_mix = GLRM(A, loss, regX, regY, k, converge=converge) # Fit glrm_mix.fit() # Results X, Y = glrm_mix.factors() A_hat = glrm_mix.predict( ) # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_mix.convergence() # convergence history pplot([hstack(A), A_hat, hstack(A) - A_hat], ["original", "glrm", "error"]) # Now with missing data missing = [ list(product(range(35, 50), range(n1 - 5, n1))), list(product(range(35, 50), range(0, n2))), list(product(range(35, 50), range(0, n3 - 5))) ] glrm_mix_missing = GLRM(A, loss, regX, regY, k, missing) glrm_mix_missing.fit() A_hat = glrm_mix_missing.predict() # translate missing list into something that we can plot new_missing = unroll_missing(missing, [n1, n2, n3])
seed(2) # Generate problem data m, n, k = 50, 50, 5 eta = 0.1 # noise power data = exp(randn(m,k).dot(randn(k,n)) + eta*randn(m,n))+eta*randn(m,n) # noisy rank k # Initialize model A = data loss = FractionalLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) glrm_frac = GLRM(A, loss, regX, regY, k) # Fit glrm_frac.fit() # Results X, Y = glrm_frac.factors() A_hat = glrm_frac.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_frac.convergence() # convergence history pplot([A, A_hat, A-A_hat], ["original", "glrm", "error"]) # Now with missing data # from numpy.random import choice # from itertools import product # missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n)))) # # glrm_pca_nn_missing = GLRM(A, loss, regX, regY, k, missing) # glrm_pca_nn_missing.fit() # glrm_pca_nn_missing.compare()
asym_noise = sqrt(k)*randn(m,n) + 3*abs(sqrt(k)*randn(m,n)) # large, sparse noise rate = 0.3 # percent of entries that are corrupted by large, outlier noise corrupted_entries = sample(list(product(range(m), range(n))), int(m*n*rate)) data = randn(m,k).dot(randn(k,n)) A = data + sym_noise for ij in corrupted_entries: A[ij] += asym_noise[ij] # Initialize model loss = HuberLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) glrm_huber = GLRM(A, loss, regX, regY, k) # Fit glrm_huber.fit() # Results X, Y = glrm_huber.factors() A_hat = glrm_huber.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_huber.convergence() # convergence history pplot([data, A, A_hat, data-A_hat], ["original", "corrupted", "glrm", "error"]) # Now with missing data from numpy.random import choice missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n)))) glrm_huber_missing = GLRM(A, loss, regX, regY, k, missing) glrm_huber_missing.fit() A_hat = glrm_huber_missing.predict() pplot([data, A, missing, A_hat, data-A_hat], ["original", "corrupted", "missing", "glrm", "error"])
seed(1) # Generate problem data m, n, k = 50, 50, 10 eta = 0.1 # noise power data = randn(m,k).dot(randn(k,n)) + eta*randn(m,n) # noisy rank k # Initialize model A = data loss = QuadraticLoss regX, regY = QuadraticReg(0.0001), QuadraticReg(0.0001) glrm_nn = GLRM(A, loss, regX, regY, k) # Fit glrm_nn.fit(eps=1e-4, max_iters=1000) # Results X, Y = glrm_nn.factors() A_hat = glrm_nn.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_nn.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"]) # # Now with missing data # missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n)))) # glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing) # glrm_nn_missing.fit() # A_hat = glrm_nn_missing.predict() # pplot([A, missing, A_hat, A - A_hat], \ # ["original", "missing", "glrm", "error"]) #
from glrm.convergence import Convergence from glrm.util import pplot from numpy.random import randn, choice, seed from numpy import sign from itertools import product from math import ceil seed(1) # Generate problem data m, n, k = 100, 100, 10 data = randn(m, k).dot(randn(k, n)) data = data - data.min() data = (data / data.max() * 6).round() + 1 # approx rank k #data = choice(range(7), (m,n)) + 1 # not inherently rank k # Initialize model A = data loss = OrdinalLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) glrm_ord = GLRM(A, loss, regX, regY, k) # Fit glrm_ord.fit(eps=1e-3, max_iters=1000) # Results X, Y = glrm_ord.factors() A_hat = glrm_ord.predict( ) # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_ord.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
m, n, k = 100, 100, 10 eta = 0.1 # noise power X_true, Y_true = randn(m, k), randn(k, n) data = sign(X_true.dot(Y_true) + eta * randn(m, n)) # noisy rank k # Initialize model A = data loss = HingeLoss regX, regY = QuadraticReg(0.01), QuadraticReg(0.01) c = Convergence(TOL=1e-2) model = GLRM(A, loss, regX, regY, k, converge=c) # Fit model.fit(eps=1e-4, max_iters=1000) # want more precision for hinge loss problem # Results X, Y = model.factors() A_hat = model.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = model.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"]) # # # Now with missing data # missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n)))) # glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing) # glrm_nn_missing.fit() # A_hat = glrm_nn_missing.predict() # pplot([A, missing, A_hat, A - A_hat], \ # ["original", "missing", "glrm", "error"]) #
asym_noise = sqrt(k)*randn(m,n) + 3*abs(sqrt(k)*randn(m,n)) # large, sparse noise rate = 0.3 # percent of entries that are corrupted by large, outlier noise corrupted_entries = sample(list(product(list(range(m)), list(range(n)))), int(m*n*rate)) data = randn(m,k).dot(randn(k,n)) A = data + sym_noise for ij in corrupted_entries: A[ij] += asym_noise[ij] # Initialize model loss = HuberLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) glrm_huber = GLRM(A, loss, regX, regY, k) # Fit glrm_huber.fit() # Results X, Y = glrm_huber.factors() A_hat = glrm_huber.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_huber.convergence() # convergence history pplot([data, A, A_hat, data-A_hat], ["original", "corrupted", "glrm", "error"]) # Now with missing data from numpy.random import choice missing = list(product(list(range(int(0.25*m), int(0.75*m))), list(range(int(0.25*n), int(0.75*n))))) glrm_huber_missing = GLRM(A, loss, regX, regY, k, missing) glrm_huber_missing.fit() A_hat = glrm_huber_missing.predict() pplot([data, A, missing, A_hat, data-A_hat], ["original", "corrupted", "missing", "glrm", "error"])
# Generate problem data (draw smiley with -1's, 1's) m, n, k = 500, 500, 8 data = -ones((m, n)) for i, j in product(range(120, 190), range(120, 190)): d = (155 - i)**2 + (155 - j)**2 if d <= 35**2: data[i, j] = 1 data[i, m - j] = 1 for i, j in product(range(300, 451), range(100, 251)): d = (250 - i)**2 + (250 - j)**2 if d <= 200**2 and d >= 150**2: data[i, j] = 1 data[i, m - j] = 1 # Initialize model A = data loss = HingeLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) converge = Convergence(TOL=1e-2) glrm_binary = GLRM(A, loss, regX, regY, k, converge=converge) # Fit glrm_binary.fit() # Results X, Y = glrm_binary.factors() A_hat = glrm_binary.predict( ) # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_binary.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
data_ord = (data_ord/data_ord.max()*6 + 1).round()# ordinal data, e.g., Likert scale data_bool = sign(data[:,n1+n2:]) # Initialize model A = [data_real, data_ord, data_bool] loss = [QuadraticLoss, OrdinalLoss, HingeLoss] regX, regY = QuadraticReg(0.01), QuadraticReg(0.01) converge = Convergence(TOL = 1e-2, max_iters = 1000) # optional (default TOL = 1e-3) glrm_mix = GLRM(A, loss, regX, regY, k, converge = converge) # Fit glrm_mix.fit() # Results X, Y = glrm_mix.factors() A_hat = glrm_mix.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_mix.convergence() # convergence history pplot([hstack(A), A_hat, hstack(A)-A_hat], ["original", "glrm", "error"]) # Now with missing data missing = [list(product(list(range(35, 50)), list(range(n1-5, n1)))), list(product(list(range(35, 50)), list(range(0, n2)))), list(product(list(range(35, 50)), list(range(0, n3-5))))] glrm_mix_missing = GLRM(A, loss, regX, regY, k, missing) glrm_mix_missing.fit() A_hat = glrm_mix_missing.predict() # translate missing list into something that we can plot new_missing = unroll_missing(missing, [n1, n2, n3]) pplot([hstack(A), new_missing, A_hat, hstack(A)-A_hat], ["original", "missing", "glrm", "error"])