Exemple #1
0
def train_fm_model():
    X_train, y_train = load_processed_data(pathify('data', 'processed', 'avazu-cv-train.csv'), label_col='click')
    X_val, y_val = load_processed_data(pathify('data', 'processed', 'avazu-cv-val.csv'), label_col='click')
    
    encoder = OneHotEncoder(handle_unknown='ignore').fit(X_train)
    X_train = encoder.transform(X_train)
    X_val = encoder.transform(X_val)


    X_train = csr_matrix(X_train)
    X_val = csr_matrix(X_val)
    y_train[y_train == 0] = -1
    y_val[y_val == 0] = -1
    y_train = np.array(y_train)
    y_val   = np.array(y_val)

    fm = mcmc.FMClassification(n_iter=50, init_stdev=0.1, random_state= 123, rank=2)
    y_pred = fm.fit_predict_proba(X_train, y_train, X_val)

    auc_score = cal_auc(y_val, y_pred)
    log.info("auc_score: {:.4f}".format(auc_score))

    log_loss = cal_logloss(y_val, y_pred)
    log.info("log_loss: {:.4f}".format(log_loss))

    save_pickle(fm, pathify('models', 'avazu-fm.pickle'))
    return fm
Exemple #2
0
def test_clone():
    from sklearn.base import clone

    a = mcmc.FMRegression()
    b = clone(a)
    assert a.get_params() == b.get_params()

    a = mcmc.FMClassification()
    b = clone(a)
    assert a.get_params() == b.get_params()
Exemple #3
0
def test_fm_classification_proba():
    w0, w, V, y, X = get_test_problem()
    # transform to labels easier problem then default one
    y_labels = np.ones_like(y)
    y_labels[y < np.mean(y)] = -1

    fm = mcmc.FMClassification(n_iter=1000, init_stdev=0.1, rank=2)
    y_pred_proba = fm.fit_predict_proba(X, y_labels, X)
    y_pred = fm.fit_predict(X, y_labels, X)
    y_pred_proba[y_pred_proba < .5] = -1
    y_pred_proba[y_pred_proba != -1] = 1
    assert_array_equal(y_pred, y_pred_proba)
Exemple #4
0
def test_linear_fm_classification():
    w0, w, V, y, X = get_test_problem()
    # transform to labels easier problem then default one
    y_labels = np.ones_like(y)
    y_labels[y < np.mean(y)] = -1

    fm = mcmc.FMClassification(n_iter=1000, init_stdev=0.1, rank=0)
    y_pred = fm.fit_predict_proba(X, y_labels, X)

    fpr, tpr, thresholds = metrics.roc_curve(y_labels, y_pred)
    auc = metrics.auc(fpr, tpr)
    assert auc > 0.95
    y_pred = fm.predict(X[:2, ])
Exemple #5
0
 def __init__(self,
              learning_method='mcmc',
              num_iter=100,
              init_stdev=0.1,
              k2=8,
              learn_rate=0,
              r0_regularization=0.1,
              r1_regularization=0.1,
              r2_regularization=0.1,
              seed=123,
              model_path=None):
     if learning_method.upper() == 'MCMC':
         self.fm = mcmc.FMClassification(n_iter=num_iter,
                                         init_stdev=init_stdev,
                                         rank=k2,
                                         random_state=seed)
     elif learning_method.upper() == 'ALS':
         self.fm = als.FMClassification(n_iter=num_iter,
                                        init_stdev=init_stdev,
                                        rank=k2,
                                        random_state=seed,
                                        l2_reg=r0_regularization,
                                        l2_reg_w=r1_regularization,
                                        l2_reg_V=r2_regularization)
     elif learning_method.upper() == 'SGD':
         self.fm = sgd.FMClassification(n_iter=num_iter,
                                        init_stdev=init_stdev,
                                        rank=k2,
                                        random_state=seed,
                                        l2_reg=r0_regularization,
                                        l2_reg_w=r1_regularization,
                                        l2_reg_V=r2_regularization,
                                        step_size=learn_rate)
     else:
         raise TypeError('method should be one of {sgd, als, mcmc}')
     self.__method = learning_method.upper()  # __* means private attribute
     self.__model_path = model_path
def demo_fastfm():
    fm = mcmc.FMClassification(n_iter=100, init_stdev=0.1, rank=16, random_state=123, copy_X=True)
    y_pred = fm.fit_predict_proba(sparse.csr_matrix(X_tr), np.array(y_tr), sparse.csr_matrix(X_te))
    print(y_pred, type(y_pred))
Exemple #7
0
print('mse:', mean_squared_error(y_test, y_pred))

import numpy as np
# Convert dataset to binary classification task.
y_labels = np.ones_like(y)
y_labels[y < np.mean(y)] = -1
X_train, X_test, y_train, y_test = train_test_split(X, y_labels)

from fastFM import sgd
fm = sgd.FMClassification(n_iter=1000, init_stdev=0.1, l2_reg_w=0,
                          l2_reg_V=0, rank=2, step_size=0.1)
fm.fit(X_train, y_train)
y_pred = fm.predict(X_test)

y_pred_proba = fm.predict_proba(X_test)

from sklearn.metrics import accuracy_score, roc_auc_score
print('acc:', accuracy_score(y_test, y_pred))
print('auc:', roc_auc_score(y_test, y_pred_proba))

from fastFM import mcmc
fm = mcmc.FMClassification(n_iter=1000, rank=2, init_stdev=0.1)

y_pred = fm.fit_predict(X_train, y_train, X_test)
y_pred_proba = fm.fit_predict_proba(X_train, y_train, X_test)

from sklearn.metrics import accuracy_score, roc_auc_score
print('acc:', accuracy_score(y_test, y_pred))
print('auc:', roc_auc_score(y_test, y_pred_proba))

##INITIAL TEST OF MCMC WITHOUT OPTIMIZATION##
#fm.fit(trainX, trainY)
#testY = fm.predict(testX)
#print(testY)
###y_pred = fm.fit_predict(trainX, trainY, testX)
###y_pred_proba = fm.fit_predict_proba(trainX, trainY, testX)
##INITIAL TEST OF MCMC WITHOUT OPTIMIZATION##

fm = None
#TESTS
#n_iter = [10,20,30,40,50,60,70,80,90, 100,500,1000,2000,3000]
#rank_iter = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,50]
#stdev_iter= [0,0.05,0.1,0.5,1.0]
#for i in stdev_iter:
##TESTS
fm = mcmc.FMClassification(n_iter=80, rank=25, init_stdev=0.1)
y_pred = fm.fit_predict(trainX, trainY, testX)
y_pred_proba = fm.fit_predict_proba(trainX, trainY, testX)
y_pred_proba_auc = fm.fit_predict_proba(X_t, y_t, X_val)

fpr,tpr, thresholds = metrics.roc_curve(y_val, y_pred_proba_auc, pos_label = 1)
auc = metrics.auc(fpr, tpr)
#print auc
print "AUC: " %(auc)


#submission = pd.DataFrame({'id':test_df['id'], 'ACTION':y_pred_proba_auc})
#submission.to_csv('/Users/admin/Dropbox/EE379K/project/ee379k_project/submissions/submission_FMoptimized.csv', index = False)
print "Saving results at submission_FMoptimized.csv"

Exemple #9
0
# coding:utf-8

import pandas as pd
import numpy as np
from fastFM import mcmc

train = pd.read_csv('../data/dup/train_xgb11U.csv')
valid = pd.read_csv('../data/dup/valid_xgb11U.csv')
test = pd.read_csv('../data/dup/test_xgb11U.csv')

train.fillna(0, inplace=True)
valid.fillna(0, inplace=True)
test.fillna(0, inplace=True)

train_Y = train['label']
train.drop('label', axis=1, inplace=True)
valid_Y = valid['label']
valid.drop('label', axis=1, inplace=True)
test_Y = test['label']
test.drop('label', axis=1, inplace=True)

fm = mcmc.FMClassification(n_iter=50, random_state=133)

y = fm.fit_predict_proba(train, train_Y, valid)

print y