コード例 #1
0
ファイル: smiley.py プロジェクト: wlattner/GLRM
# Generate problem data (draw smiley with -1's, 1's)
m, n, k = 500, 500, 8
data = -ones((m, n))
for i,j in product(list(range(120, 190)), list(range(120, 190))): 
    d = (155-i)**2 + (155-j)**2
    if d <= 35**2: 
        data[i,j] = 1
        data[i, m-j] = 1
for i,j in product(list(range(300, 451)), list(range(100, 251))):
    d = (250 - i)**2 + (250-j)**2
    if d <= 200**2 and d >= 150**2: 
        data[i,j] = 1
        data[i,m-j] = 1

# Initialize model
A = data
loss = HingeLoss
regX, regY = QuadraticReg(0.1), QuadraticReg(0.1)
converge = Convergence(TOL = 1e-2)
glrm_binary = GLRM(A, loss, regX, regY, k, converge = converge)

# Fit
glrm_binary.fit()

# Results
X, Y = glrm_binary.factors()
A_hat = glrm_binary.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_binary.convergence() # convergence history
pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
コード例 #2
0
ファイル: likert.py プロジェクト: JuergenNeubauer/GLRM
from glrm import GLRM
from glrm.convergence import Convergence
from glrm.util import pplot
from numpy.random import randn, choice, seed
from numpy import sign
from itertools import product
from math import ceil
seed(1)

# Generate problem data
m, n, k = 100, 100, 10
data = randn(m,k).dot(randn(k,n))
data = data - data.min()
data = (data/data.max()*6).round() + 1 # approx rank k
#data = choice(range(7), (m,n)) + 1 # not inherently rank k

# Initialize model
A = data
loss = OrdinalLoss
regX, regY = QuadraticReg(0.1), QuadraticReg(0.1)
glrm_ord = GLRM(A, loss, regX, regY, k)

# Fit
glrm_ord.fit(eps=1e-3, max_iters=1000)

# Results
X, Y = glrm_ord.factors()
A_hat = glrm_ord.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_ord.convergence() # convergence history
pplot([A, A_hat, A-A_hat], ["original", "glrm", "error"])
コード例 #3
0
ファイル: pca_nonneg.py プロジェクト: wlattner/GLRM
# Generate problem data
m, n, k = 20, 20, 5
eta = 0.1 # noise power
X_true, Y_true = abs(randn(m,k)), abs(randn(k,n))
data = X_true.dot(Y_true) + eta*randn(m,n) # noisy rank k

# Initialize model
A = data
loss = QuadraticLoss
regX, regY = NonnegativeReg(0.1), NonnegativeReg(0.1)
glrm_nn = GLRM(A, loss, regX, regY, k)

# Fit
glrm_nn.fit()

# Results
X, Y = glrm_nn.factors()
A_hat = glrm_nn.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_nn.convergence() # convergence history
pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])

# Now with missing data
missing = list(product(list(range(int(0.25*m), int(0.75*m))), list(range(int(0.25*n), int(0.75*n)))))
glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing)
glrm_nn_missing.fit()
A_hat = glrm_nn_missing.predict()
pplot([A, missing, A_hat, A - A_hat], \
        ["original", "missing", "glrm", "error"])

コード例 #4
0
ファイル: hinge.py プロジェクト: JuergenNeubauer/GLRM
# Generate problem data
m, n, k = 100, 100, 10
eta = 0.1 # noise power
X_true, Y_true = randn(m,k), randn(k,n)
data = sign(X_true.dot(Y_true) + eta*randn(m,n)) # noisy rank k

# Initialize model
A = data
loss = HingeLoss
regX, regY = QuadraticReg(0.01), QuadraticReg(0.01)
c = Convergence(TOL=1e-2)
model = GLRM(A, loss, regX, regY, k, converge=c)

# Fit
model.fit(eps=1e-4, max_iters = 1000) # want more precision for hinge loss problem

# Results
X, Y = model.factors()
A_hat = model.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = model.convergence() # convergence history
pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
# 
# # Now with missing data
# missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n))))
# glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing)
# glrm_nn_missing.fit()
# A_hat = glrm_nn_missing.predict()
# pplot([A, missing, A_hat, A - A_hat], \
#         ["original", "missing", "glrm", "error"])
# 
コード例 #5
0
ファイル: mixed.py プロジェクト: pmnyc/Source_Codes_Collected
# Initialize model
A = [data_real, data_ord, data_bool]
loss = [QuadraticLoss, OrdinalLoss, HingeLoss]
regX, regY = QuadraticReg(0.01), QuadraticReg(0.01)
converge = Convergence(TOL=1e-2,
                       max_iters=1000)  # optional (default TOL = 1e-3)
glrm_mix = GLRM(A, loss, regX, regY, k, converge=converge)

# Fit
glrm_mix.fit()

# Results
X, Y = glrm_mix.factors()
A_hat = glrm_mix.predict(
)  # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_mix.convergence()  # convergence history
pplot([hstack(A), A_hat, hstack(A) - A_hat], ["original", "glrm", "error"])

# Now with missing data
missing = [
    list(product(range(35, 50), range(n1 - 5, n1))),
    list(product(range(35, 50), range(0, n2))),
    list(product(range(35, 50), range(0, n3 - 5)))
]

glrm_mix_missing = GLRM(A, loss, regX, regY, k, missing)
glrm_mix_missing.fit()
A_hat = glrm_mix_missing.predict()

# translate missing list into something that we can plot
new_missing = unroll_missing(missing, [n1, n2, n3])
コード例 #6
0
seed(2)

# Generate problem data
m, n, k = 50, 50, 5
eta = 0.1 # noise power
data = exp(randn(m,k).dot(randn(k,n)) + eta*randn(m,n))+eta*randn(m,n) # noisy rank k

# Initialize model
A = data
loss = FractionalLoss
regX, regY = QuadraticReg(0.1), QuadraticReg(0.1)
glrm_frac = GLRM(A, loss, regX, regY, k)

# Fit
glrm_frac.fit()

# Results
X, Y = glrm_frac.factors()
A_hat = glrm_frac.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_frac.convergence() # convergence history
pplot([A, A_hat, A-A_hat], ["original", "glrm", "error"])

# Now with missing data
# from numpy.random import choice
# from itertools import product
# missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n))))
# 
# glrm_pca_nn_missing = GLRM(A, loss, regX, regY, k, missing)
# glrm_pca_nn_missing.fit()
# glrm_pca_nn_missing.compare()
コード例 #7
0
ファイル: huber_pca.py プロジェクト: JuergenNeubauer/GLRM
asym_noise = sqrt(k)*randn(m,n) + 3*abs(sqrt(k)*randn(m,n)) # large, sparse noise
rate = 0.3 # percent of entries that are corrupted by large, outlier noise
corrupted_entries = sample(list(product(range(m), range(n))), int(m*n*rate))
data = randn(m,k).dot(randn(k,n))
A = data + sym_noise
for ij in corrupted_entries: A[ij] += asym_noise[ij]

# Initialize model
loss = HuberLoss
regX, regY = QuadraticReg(0.1), QuadraticReg(0.1)
glrm_huber = GLRM(A, loss, regX, regY, k)

# Fit
glrm_huber.fit()

# Results
X, Y = glrm_huber.factors()
A_hat = glrm_huber.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_huber.convergence() # convergence history
pplot([data, A, A_hat, data-A_hat], ["original", "corrupted", "glrm", "error"])


# Now with missing data
from numpy.random import choice
missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n))))

glrm_huber_missing = GLRM(A, loss, regX, regY, k, missing)
glrm_huber_missing.fit()
A_hat = glrm_huber_missing.predict()
pplot([data, A, missing, A_hat, data-A_hat], ["original", "corrupted", "missing", "glrm", "error"])
コード例 #8
0
seed(1)

# Generate problem data
m, n, k = 50, 50, 10
eta = 0.1 # noise power
data = randn(m,k).dot(randn(k,n)) + eta*randn(m,n) # noisy rank k

# Initialize model
A = data
loss = QuadraticLoss
regX, regY = QuadraticReg(0.0001), QuadraticReg(0.0001)
glrm_nn = GLRM(A, loss, regX, regY, k)

# Fit
glrm_nn.fit(eps=1e-4, max_iters=1000)

# Results
X, Y = glrm_nn.factors()
A_hat = glrm_nn.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_nn.convergence() # convergence history
pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])

# # Now with missing data
# missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n))))
# glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing)
# glrm_nn_missing.fit()
# A_hat = glrm_nn_missing.predict()
# pplot([A, missing, A_hat, A - A_hat], \
#         ["original", "missing", "glrm", "error"])
# 
コード例 #9
0
from glrm.convergence import Convergence
from glrm.util import pplot
from numpy.random import randn, choice, seed
from numpy import sign
from itertools import product
from math import ceil
seed(1)

# Generate problem data
m, n, k = 100, 100, 10
data = randn(m, k).dot(randn(k, n))
data = data - data.min()
data = (data / data.max() * 6).round() + 1  # approx rank k
#data = choice(range(7), (m,n)) + 1 # not inherently rank k

# Initialize model
A = data
loss = OrdinalLoss
regX, regY = QuadraticReg(0.1), QuadraticReg(0.1)
glrm_ord = GLRM(A, loss, regX, regY, k)

# Fit
glrm_ord.fit(eps=1e-3, max_iters=1000)

# Results
X, Y = glrm_ord.factors()
A_hat = glrm_ord.predict(
)  # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_ord.convergence()  # convergence history
pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
コード例 #10
0
m, n, k = 100, 100, 10
eta = 0.1  # noise power
X_true, Y_true = randn(m, k), randn(k, n)
data = sign(X_true.dot(Y_true) + eta * randn(m, n))  # noisy rank k

# Initialize model
A = data
loss = HingeLoss
regX, regY = QuadraticReg(0.01), QuadraticReg(0.01)
c = Convergence(TOL=1e-2)
model = GLRM(A, loss, regX, regY, k, converge=c)

# Fit
model.fit(eps=1e-4,
          max_iters=1000)  # want more precision for hinge loss problem

# Results
X, Y = model.factors()
A_hat = model.predict()  # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = model.convergence()  # convergence history
pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
#
# # Now with missing data
# missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n))))
# glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing)
# glrm_nn_missing.fit()
# A_hat = glrm_nn_missing.predict()
# pplot([A, missing, A_hat, A - A_hat], \
#         ["original", "missing", "glrm", "error"])
#
コード例 #11
0
ファイル: huber_pca.py プロジェクト: wlattner/GLRM
asym_noise = sqrt(k)*randn(m,n) + 3*abs(sqrt(k)*randn(m,n)) # large, sparse noise
rate = 0.3 # percent of entries that are corrupted by large, outlier noise
corrupted_entries = sample(list(product(list(range(m)), list(range(n)))), int(m*n*rate))
data = randn(m,k).dot(randn(k,n))
A = data + sym_noise
for ij in corrupted_entries: A[ij] += asym_noise[ij]

# Initialize model
loss = HuberLoss
regX, regY = QuadraticReg(0.1), QuadraticReg(0.1)
glrm_huber = GLRM(A, loss, regX, regY, k)

# Fit
glrm_huber.fit()

# Results
X, Y = glrm_huber.factors()
A_hat = glrm_huber.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_huber.convergence() # convergence history
pplot([data, A, A_hat, data-A_hat], ["original", "corrupted", "glrm", "error"])


# Now with missing data
from numpy.random import choice
missing = list(product(list(range(int(0.25*m), int(0.75*m))), list(range(int(0.25*n), int(0.75*n)))))

glrm_huber_missing = GLRM(A, loss, regX, regY, k, missing)
glrm_huber_missing.fit()
A_hat = glrm_huber_missing.predict()
pplot([data, A, missing, A_hat, data-A_hat], ["original", "corrupted", "missing", "glrm", "error"])
コード例 #12
0
# Generate problem data (draw smiley with -1's, 1's)
m, n, k = 500, 500, 8
data = -ones((m, n))
for i, j in product(range(120, 190), range(120, 190)):
    d = (155 - i)**2 + (155 - j)**2
    if d <= 35**2:
        data[i, j] = 1
        data[i, m - j] = 1
for i, j in product(range(300, 451), range(100, 251)):
    d = (250 - i)**2 + (250 - j)**2
    if d <= 200**2 and d >= 150**2:
        data[i, j] = 1
        data[i, m - j] = 1

# Initialize model
A = data
loss = HingeLoss
regX, regY = QuadraticReg(0.1), QuadraticReg(0.1)
converge = Convergence(TOL=1e-2)
glrm_binary = GLRM(A, loss, regX, regY, k, converge=converge)

# Fit
glrm_binary.fit()

# Results
X, Y = glrm_binary.factors()
A_hat = glrm_binary.predict(
)  # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_binary.convergence()  # convergence history
pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
コード例 #13
0
ファイル: mixed.py プロジェクト: wlattner/GLRM
data_ord = (data_ord/data_ord.max()*6 + 1).round()# ordinal data, e.g., Likert scale
data_bool = sign(data[:,n1+n2:])

# Initialize model
A = [data_real, data_ord, data_bool]
loss = [QuadraticLoss, OrdinalLoss, HingeLoss]
regX, regY = QuadraticReg(0.01), QuadraticReg(0.01)
converge = Convergence(TOL = 1e-2, max_iters = 1000) # optional (default TOL = 1e-3)
glrm_mix = GLRM(A, loss, regX, regY, k, converge = converge)

# Fit
glrm_mix.fit()

# Results
X, Y = glrm_mix.factors()
A_hat = glrm_mix.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_mix.convergence() # convergence history
pplot([hstack(A), A_hat, hstack(A)-A_hat], ["original", "glrm", "error"])

# Now with missing data
missing = [list(product(list(range(35, 50)), list(range(n1-5, n1)))), list(product(list(range(35,
    50)), list(range(0, n2)))), list(product(list(range(35, 50)), list(range(0, n3-5))))]

glrm_mix_missing = GLRM(A, loss, regX, regY, k, missing)
glrm_mix_missing.fit()
A_hat = glrm_mix_missing.predict()

# translate missing list into something that we can plot
new_missing = unroll_missing(missing, [n1, n2, n3]) 
pplot([hstack(A), new_missing, A_hat, hstack(A)-A_hat], ["original", "missing", "glrm", "error"])