Esempio n. 1
0
def GLRMfit(A, k, missing=None):
    loss = QuadraticLoss
    regX, regY = LinearReg(0.001), LinearReg(0.001)
    model = GLRM(A, loss, regX, regY, k, missing)
    model.fit(eps=1e-4, max_iters=1000)
    model.converge.plot()
    return model.factors()
Esempio n. 2
0
# Generate problem data (draw smiley with -1's, 1's)
m, n, k = 500, 500, 8
data = -ones((m, n))
for i,j in product(list(range(120, 190)), list(range(120, 190))): 
    d = (155-i)**2 + (155-j)**2
    if d <= 35**2: 
        data[i,j] = 1
        data[i, m-j] = 1
for i,j in product(list(range(300, 451)), list(range(100, 251))):
    d = (250 - i)**2 + (250-j)**2
    if d <= 200**2 and d >= 150**2: 
        data[i,j] = 1
        data[i,m-j] = 1

# Initialize model
A = data
loss = HingeLoss
regX, regY = QuadraticReg(0.1), QuadraticReg(0.1)
converge = Convergence(TOL = 1e-2)
glrm_binary = GLRM(A, loss, regX, regY, k, converge = converge)

# Fit
glrm_binary.fit()

# Results
X, Y = glrm_binary.factors()
A_hat = glrm_binary.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_binary.convergence() # convergence history
pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
Esempio n. 3
0
from numpy.random import randn, choice, seed
from numpy.random import choice
from itertools import product
from numpy import sign

# Generate problem data
m, n, k = 20, 20, 5
eta = 0.1  # noise power
X_true, Y_true = abs(randn(m, k)), abs(randn(k, n))
data = X_true.dot(Y_true) + eta * randn(m, n)  # noisy rank k

# Initialize model
A = data
loss = QuadraticLoss
regX, regY = NonnegativeReg(0.1), NonnegativeReg(0.1)
glrm_nn = GLRM(A, loss, regX, regY, k)

# Fit
glrm_nn.fit()

# Results
X, Y = glrm_nn.factors()
A_hat = glrm_nn.predict(
)  # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_nn.convergence()  # convergence history
pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])

# Now with missing data
missing = list(
    product(range(int(0.25 * m), int(0.75 * m)),
            range(int(0.25 * n), int(0.75 * n))))
from glrm import GLRM
from glrm.util import pplot
from numpy.random import randn, choice, seed
from numpy import sign, exp
seed(2)

# Generate problem data
m, n, k = 50, 50, 5
eta = 0.1 # noise power
data = exp(randn(m,k).dot(randn(k,n)) + eta*randn(m,n))+eta*randn(m,n) # noisy rank k

# Initialize model
A = data
loss = FractionalLoss
regX, regY = QuadraticReg(0.1), QuadraticReg(0.1)
glrm_frac = GLRM(A, loss, regX, regY, k)

# Fit
glrm_frac.fit()

# Results
X, Y = glrm_frac.factors()
A_hat = glrm_frac.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_frac.convergence() # convergence history
pplot([A, A_hat, A-A_hat], ["original", "glrm", "error"])

# Now with missing data
# from numpy.random import choice
# from itertools import product
# missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n))))
# 
Esempio n. 5
0
n = n1 + n2 + n3
data = randn(m, k).dot(randn(k, n))
data_real = data[:, :n1]  # numerical data
data_ord = data[:, n1:n1 + n2]
data_ord = data_ord - data_ord.min()
data_ord = (data_ord / data_ord.max() * 6 +
            1).round()  # ordinal data, e.g., Likert scale
data_bool = sign(data[:, n1 + n2:])

# Initialize model
A = [data_real, data_ord, data_bool]
loss = [QuadraticLoss, OrdinalLoss, HingeLoss]
regX, regY = QuadraticReg(0.01), QuadraticReg(0.01)
converge = Convergence(TOL=1e-2,
                       max_iters=1000)  # optional (default TOL = 1e-3)
glrm_mix = GLRM(A, loss, regX, regY, k, converge=converge)

# Fit
glrm_mix.fit()

# Results
X, Y = glrm_mix.factors()
A_hat = glrm_mix.predict(
)  # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_mix.convergence()  # convergence history
pplot([hstack(A), A_hat, hstack(A) - A_hat], ["original", "glrm", "error"])

# Now with missing data
missing = [
    list(product(range(35, 50), range(n1 - 5, n1))),
    list(product(range(35, 50), range(0, n2))),
Esempio n. 6
0
from numpy.random import randn, choice, seed
from numpy.random import choice
from itertools import product
from numpy import sign
seed(1)

# Generate problem data
m, n, k = 50, 50, 10
eta = 0.1 # noise power
data = randn(m,k).dot(randn(k,n)) + eta*randn(m,n) # noisy rank k

# Initialize model
A = data
loss = QuadraticLoss
regX, regY = QuadraticReg(0.0001), QuadraticReg(0.0001)
glrm_nn = GLRM(A, loss, regX, regY, k)

# Fit
glrm_nn.fit(eps=1e-4, max_iters=1000)

# Results
X, Y = glrm_nn.factors()
A_hat = glrm_nn.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_nn.convergence() # convergence history
pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])

# # Now with missing data
# missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n))))
# glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing)
# glrm_nn_missing.fit()
# A_hat = glrm_nn_missing.predict()
Esempio n. 7
0
from glrm.convergence import Convergence
from glrm.util import pplot
from numpy.random import randn, choice, seed
from numpy import sign
from itertools import product
from math import ceil
seed(1)

# Generate problem data
m, n, k = 100, 100, 10
data = randn(m, k).dot(randn(k, n))
data = data - data.min()
data = (data / data.max() * 6).round() + 1  # approx rank k
#data = choice(range(7), (m,n)) + 1 # not inherently rank k

# Initialize model
A = data
loss = OrdinalLoss
regX, regY = QuadraticReg(0.1), QuadraticReg(0.1)
glrm_ord = GLRM(A, loss, regX, regY, k)

# Fit
glrm_ord.fit(eps=1e-3, max_iters=1000)

# Results
X, Y = glrm_ord.factors()
A_hat = glrm_ord.predict(
)  # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_ord.convergence()  # convergence history
pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
Esempio n. 8
0
from numpy.random import choice
from itertools import product
from numpy import sign

# Generate problem data
m, n, k = 100, 100, 10
eta = 0.1  # noise power
X_true, Y_true = randn(m, k), randn(k, n)
data = sign(X_true.dot(Y_true) + eta * randn(m, n))  # noisy rank k

# Initialize model
A = data
loss = HingeLoss
regX, regY = QuadraticReg(0.01), QuadraticReg(0.01)
c = Convergence(TOL=1e-2)
model = GLRM(A, loss, regX, regY, k, converge=c)

# Fit
model.fit(eps=1e-4,
          max_iters=1000)  # want more precision for hinge loss problem

# Results
X, Y = model.factors()
A_hat = model.predict()  # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = model.convergence()  # convergence history
pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
#
# # Now with missing data
# missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n))))
# glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing)
# glrm_nn_missing.fit()
Esempio n. 9
0
seed(1)

# Generate problem data
m, n, k = 50, 50, 5
sym_noise = 0.2*sqrt(k)*randn(m,n)
asym_noise = sqrt(k)*randn(m,n) + 3*abs(sqrt(k)*randn(m,n)) # large, sparse noise
rate = 0.3 # percent of entries that are corrupted by large, outlier noise
corrupted_entries = sample(list(product(list(range(m)), list(range(n)))), int(m*n*rate))
data = randn(m,k).dot(randn(k,n))
A = data + sym_noise
for ij in corrupted_entries: A[ij] += asym_noise[ij]

# Initialize model
loss = HuberLoss
regX, regY = QuadraticReg(0.1), QuadraticReg(0.1)
glrm_huber = GLRM(A, loss, regX, regY, k)

# Fit
glrm_huber.fit()

# Results
X, Y = glrm_huber.factors()
A_hat = glrm_huber.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_huber.convergence() # convergence history
pplot([data, A, A_hat, data-A_hat], ["original", "corrupted", "glrm", "error"])


# Now with missing data
from numpy.random import choice
missing = list(product(list(range(int(0.25*m), int(0.75*m))), list(range(int(0.25*n), int(0.75*n)))))