Beispiel #1
0
def glrm_impute(dct_data, dct_param):

    df_raw_data = dct_data['df_raw_data']
    # df_raw_data, df_col_mu_std = scale(df_raw_data, dct_data['lst_num_cols'])
    k = df_raw_data.shape[0]
    lst_char_cols = dct_data['lst_char_cols']
    lst_year_cols = dct_data['lst_year_cols']
    np_char = df_raw_data[[
        col for col in lst_char_cols if col in df_raw_data.columns
    ]].values
    np_num = df_raw_data[[
        col for col in df_raw_data.columns
        if col in dct_data['lst_year_cols'] + dct_data['lst_num_cols']
    ]].values
    lst_missing_num = np.argwhere(np.isnan(np_num)).tolist()
    lst_missing_char = np.argwhere(pd.isnull(np_char)).tolist()

    dat_list = [np_num, np_char]
    loss_list = [QuadraticLoss, HingeLoss]
    regX, regY = QuadraticReg(0.1), QuadraticReg(0.1)
    missing_list = [lst_missing_num, lst_missing_char]

    c = Convergence(TOL=1e-3, max_iters=10)
    model = GLRM(dat_list, loss_list, regX, regY, k, missing_list, converge=c)
    model.fit()
    X, Y = model.factors()
    A_hat = model.predict()  # a horizontally concatenated matrix, not a list
    x = 0
Beispiel #2
0
# Generate problem data (draw smiley with -1's, 1's)
m, n, k = 500, 500, 8
data = -ones((m, n))
for i,j in product(list(range(120, 190)), list(range(120, 190))): 
    d = (155-i)**2 + (155-j)**2
    if d <= 35**2: 
        data[i,j] = 1
        data[i, m-j] = 1
for i,j in product(list(range(300, 451)), list(range(100, 251))):
    d = (250 - i)**2 + (250-j)**2
    if d <= 200**2 and d >= 150**2: 
        data[i,j] = 1
        data[i,m-j] = 1

# Initialize model
A = data
loss = HingeLoss
regX, regY = QuadraticReg(0.1), QuadraticReg(0.1)
converge = Convergence(TOL = 1e-2)
glrm_binary = GLRM(A, loss, regX, regY, k, converge = converge)

# Fit
glrm_binary.fit()

# Results
X, Y = glrm_binary.factors()
A_hat = glrm_binary.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_binary.convergence() # convergence history
pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])
Beispiel #3
0
from glrm.util import pplot
from numpy.random import randn, choice, seed
from numpy.random import choice
from itertools import product
from numpy import sign
seed(1)

# Generate problem data
m, n, k = 50, 50, 10
eta = 0.1 # noise power
data = randn(m,k).dot(randn(k,n)) + eta*randn(m,n) # noisy rank k

# Initialize model
A = data
loss = QuadraticLoss
regX, regY = QuadraticReg(0.0001), QuadraticReg(0.0001)
glrm_nn = GLRM(A, loss, regX, regY, k)

# Fit
glrm_nn.fit(eps=1e-4, max_iters=1000)

# Results
X, Y = glrm_nn.factors()
A_hat = glrm_nn.predict() # glrm_pca.predict(X, Y) works too; returns decode(XY)
ch = glrm_nn.convergence() # convergence history
pplot([A, A_hat, A - A_hat], ["original", "glrm", "error"])

# # Now with missing data
# missing = list(product(range(int(0.25*m), int(0.75*m)), range(int(0.25*n), int(0.75*n))))
# glrm_nn_missing = GLRM(A, loss, regX, regY, k, missing)
# glrm_nn_missing.fit()