def GLRMfit(A, k, missing=None): loss = QuadraticLoss regX, regY = LinearReg(0.001), LinearReg(0.001) model = GLRM(A, loss, regX, regY, k, missing) model.fit(eps=1e-4, max_iters=1000) model.converge.plot() return model.factors()
from glrm import GLRM from glrm.convergence import Convergence from glrm.util import pplot from numpy.random import randn, choice, seed from numpy import sign from itertools import product from math import ceil seed(1) # Generate problem data m, n, k = 100, 100, 10 data = randn(m,k).dot(randn(k,n)) data = data - data.min() data = (data/data.max()*6).round() + 1 # approx rank k #data = choice(range(7), (m,n)) + 1 # not inherently rank k # Initialize model A = data loss = OrdinalLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) glrm_ord = GLRM(A, loss, regX, regY, k) # Fit glrm_ord.fit(eps=1e-3, max_iters=1000) # Results X, Y = glrm_ord.factors() A_hat = glrm_ord.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_ord.convergence() # convergence history pplot([A, A_hat, A-A_hat], ["original", "glrm", "error"])
# Generate problem data (draw smiley with -1's, 1's) m, n, k = 500, 500, 8 data = -ones((m, n)) for i,j in product(list(range(120, 190)), list(range(120, 190))): d = (155-i)**2 + (155-j)**2 if d <= 35**2: data[i,j] = 1 data[i, m-j] = 1 for i,j in product(list(range(300, 451)), list(range(100, 251))): d = (250 - i)**2 + (250-j)**2 if d <= 200**2 and d >= 150**2: data[i,j] = 1 data[i,m-j] = 1 # Initialize model A = data loss = HingeLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) converge = Convergence(TOL = 1e-2) glrm_binary = GLRM(A, loss, regX, regY, k, converge = converge) # Fit glrm_binary.fit() # Results X, Y = glrm_binary.factors() A_hat = glrm_binary.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_binary.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
from numpy.random import choice from itertools import product from numpy import sign # Generate problem data m, n, k = 100, 100, 10 eta = 0.1 # noise power X_true, Y_true = randn(m,k), randn(k,n) data = sign(X_true.dot(Y_true) + eta*randn(m,n)) # noisy rank k # Initialize model A = data loss = HingeLoss regX, regY = QuadraticReg(0.01), QuadraticReg(0.01) c = Convergence(TOL=1e-2) model = GLRM(A, loss, regX, regY, k, converge=c) # Fit model.fit(eps=1e-4, max_iters = 1000) # want more precision for hinge loss problem # Results X, Y = model.factors() A_hat = model.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = model.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"]) # # # Now with missing data # missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n)))) # glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing) # glrm_nn_missing.fit() # A_hat = glrm_nn_missing.predict()
from numpy.random import randn, choice, seed from numpy.random import choice from itertools import product from numpy import sign # Generate problem data m, n, k = 20, 20, 5 eta = 0.1 # noise power X_true, Y_true = abs(randn(m, k)), abs(randn(k, n)) data = X_true.dot(Y_true) + eta * randn(m, n) # noisy rank k # Initialize model A = data loss = QuadraticLoss regX, regY = NonnegativeReg(0.1), NonnegativeReg(0.1) glrm_nn = GLRM(A, loss, regX, regY, k) # Fit glrm_nn.fit() # Results X, Y = glrm_nn.factors() A_hat = glrm_nn.predict( ) # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_nn.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"]) # Now with missing data missing = list( product(range(int(0.25 * m), int(0.75 * m)), range(int(0.25 * n), int(0.75 * n))))
from glrm import GLRM from glrm.loss import QuadraticLoss, HingeLoss from glrm.reg import QuadraticReg from glrm.convergence import Convergence # In[80]: get_ipython().magic(u'pinfo GLRM') # In[5]: regX, regY = QuadraticReg(0.01), QuadraticReg(0.01) converge = Convergence(TOL = 1e-5, max_iters = 100) model = GLRM(df.values, QuadraticLoss, regX, regY, k=2, converge=converge) model.fit() X, Y = model.factors() A_hat = model.predict() # a horizontally concatenated matrix, not a list norm(A_hat - hstack(A_list)) # by hand # In[ ]:
from numpy.random import randn, choice, seed from numpy.random import choice from itertools import product from numpy import sign # Generate problem data m, n, k = 20, 20, 5 eta = 0.1 # noise power X_true, Y_true = abs(randn(m,k)), abs(randn(k,n)) data = X_true.dot(Y_true) + eta*randn(m,n) # noisy rank k # Initialize model A = data loss = QuadraticLoss regX, regY = NonnegativeReg(0.1), NonnegativeReg(0.1) glrm_nn = GLRM(A, loss, regX, regY, k) # Fit glrm_nn.fit() # Results X, Y = glrm_nn.factors() A_hat = glrm_nn.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_nn.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"]) # Now with missing data missing = list(product(list(range(int(0.25*m), int(0.75*m))), list(range(int(0.25*n), int(0.75*n))))) glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing) glrm_nn_missing.fit() A_hat = glrm_nn_missing.predict()
k = 5 # decomposition rank N, M = sgdata_matrix.shape from glrm.loss import QuadraticLoss from glrm.reg import QuadraticReg loss = [QuadraticLoss] regX, regY = [QuadraticReg(regC1), QuadraticReg(regC2)] A, A_miss, v_miss = find_missing_entries(sgdata_matrix) A_list = [A] miss = [A_miss] start_time = time.time() model = GLRM(A_list, loss, regX, regY, k, miss) model.fit() end_time = time.time() print 'time:' + str(round(end_time-start_time,1)) + 'seconds' X, Y = model.factors() A_hat = model.predict() error = fbnorm(A_hat - np.hstack(A_list), v_miss) print 'Frobenius Error: ' + str(round(error,2)) error2 = rmse(A, A_hat, v_miss) print 'RMSE: ' + str(round(error2,2)) print A[~v_miss][:10] print A_hat[~v_miss][:10].round()
from glrm import GLRM from glrm.util import pplot from numpy.random import randn, choice, seed from numpy import sign, exp seed(2) # Generate problem data m, n, k = 50, 50, 5 eta = 0.1 # noise power data = exp(randn(m,k).dot(randn(k,n)) + eta*randn(m,n))+eta*randn(m,n) # noisy rank k # Initialize model A = data loss = FractionalLoss regX, regY = QuadraticReg(0.1), QuadraticReg(0.1) glrm_frac = GLRM(A, loss, regX, regY, k) # Fit glrm_frac.fit() # Results X, Y = glrm_frac.factors() A_hat = glrm_frac.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_frac.convergence() # convergence history pplot([A, A_hat, A-A_hat], ["original", "glrm", "error"]) # Now with missing data # from numpy.random import choice # from itertools import product # missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n)))) #
from numpy.random import randn, choice, seed from numpy.random import choice from itertools import product from numpy import sign seed(1) # Generate problem data m, n, k = 50, 50, 10 eta = 0.1 # noise power data = randn(m,k).dot(randn(k,n)) + eta*randn(m,n) # noisy rank k # Initialize model A = data loss = QuadraticLoss regX, regY = QuadraticReg(0.0001), QuadraticReg(0.0001) glrm_nn = GLRM(A, loss, regX, regY, k) # Fit glrm_nn.fit(eps=1e-4, max_iters=1000) # Results X, Y = glrm_nn.factors() A_hat = glrm_nn.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = glrm_nn.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"]) # # Now with missing data # missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n)))) # glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing) # glrm_nn_missing.fit() # A_hat = glrm_nn_missing.predict()
from numpy.random import choice from itertools import product from numpy import sign # Generate problem data m, n, k = 100, 100, 10 eta = 0.1 # noise power X_true, Y_true = randn(m, k), randn(k, n) data = sign(X_true.dot(Y_true) + eta * randn(m, n)) # noisy rank k # Initialize model A = data loss = HingeLoss regX, regY = QuadraticReg(0.01), QuadraticReg(0.01) c = Convergence(TOL=1e-2) model = GLRM(A, loss, regX, regY, k, converge=c) # Fit model.fit(eps=1e-4, max_iters=1000) # want more precision for hinge loss problem # Results X, Y = model.factors() A_hat = model.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY) ch = model.convergence() # convergence history pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"]) # # # Now with missing data # missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n)))) # glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing) # glrm_nn_missing.fit()