Ejemplo n.º 1
0
 def create_fm_model_by_file(cls,
                             train_path,
                             model_output,
                             valid_path=None,
                             iterations=100,
                             thread_count=4,
                             task='binary',
                             k=16,
                             lr=0.1,
                             metric='auc',
                             stop_window=100):
     # from sklearn.datasets import dump_svmlight_file
     # dump_svmlight_file(X_tr, y_tr, 'train.libsvm')
     # dump_svmlight_file(X_te, y_te, 'test.libsvm')
     fm_model = xl.create_fm()
     fm_model.setTrain(str(train_path))
     if valid_path:
         fm_model.setValidate(str(valid_path))
     param = {
         'task': task,
         'lr': lr,
         'k': k,
         'lambda': 0.002,
         'metric': metric,
         'epoch': iterations,
         'stop_window': stop_window,
         'nthread': thread_count
     }
     fm_model.fit(param, str(model_output))
     return fm_model
Ejemplo n.º 2
0
    def create_model(self):
        print("model start")

        fm_model = xl.create_fm()

        train_file = "/".join([self.OPT_HOME, 'train.txt'])
        validate_file = "/".join([self.OPT_HOME, 'validate.txt'])
        test_file = "/".join([self.OPT_HOME, 'test.txt'])
        param = {
            'task': 'binary',
            'epoch': 10,
            'lr': 0.2,
            'lambda': 0.002,
            'metric': 'auc'
        }

        fm_model.setTrain(train_file)
        fm_model.setValidate(validate_file)

        fm_model.fit(param, 'model.out')
        fm_model.setTest(test_file)
        fm_model.setSigmoid()
        fm_model.predict('model.out', "output.txt")

        model_file = "/".join([self.MY_HOME, 'model.out'])
        print(model_file)
        self.move_to_opt(model_file)
Ejemplo n.º 3
0
def run():
    fm_model = xl.create_fm()

    train_path = './dataset/train.txt'
    test_path = './dataset/test.txt'

    fm_model.setTrain(train_path)
    fm_model.setValidate(test_path)

    # Parameters:
    param = {
        'task': 'binary',
        'epoch': 10,
        'lr': 0.2,
        'k': 4,
        'lambda': 0.002,
        'metric': 'auc'
    }

    # Start to train
    # The trained model will be stored in model.out
    fm_model.fit(param, './model.out')
    fm_model.setTXTModel('./model.txt')

    # Prediction task
    fm_model.setTest(test_path)  # Set the path of test dataset
    fm_model.setSigmoid()  # Convert output to 0-1

    # Start to predict
    # The output result will be stored in output.txt
    fm_model.predict("./model.out", "./output.txt")
Ejemplo n.º 4
0
def demo_xlearn_1():
    # Generate predictions
    temp_output_file = tempfile.NamedTemporaryFile(delete=True)
    fm_model = xl.create_fm()

    # convert to libsvm format
    # converts train and test data to libSVM format
    dump_svmlight_file(X_tr, y_tr, BASE_DIR+'/tmp/train.libsvm')
    dump_svmlight_file(X_te, y_te, BASE_DIR+'/tmp/test.libsvm')

    # set training and validation data
    fm_model.setTrain(BASE_DIR+'/tmp/train.libsvm')
    fm_model.setValidate(BASE_DIR+'/tmp/test.libsvm')

    # define params and train
    param = {'task': 'binary',
             'lr': 0.1,
             'k': 16,
             'lambda': 0.0002,
             'metric': 'auc',
             'epoch': 100}
    fm_model.fit(param, BASE_DIR+'/tmp/model.out')
    fm_model.setTest(BASE_DIR+'/tmp/test.libsvm')

    # 预测
    fm_model.predict(BASE_DIR+'/tmp/model.out', temp_output_file.name)
    y_pred = pd.read_csv(temp_output_file.name, header=None)[0].values

    print(y_pred, type(y_pred))
    temp_output_file.close()
Ejemplo n.º 5
0
 def val(self, path_trn, val_prop=0.2, xlfm_params=XLFM_PARAMS_DEFAULT):
     self.logger.info('Preparing datasets')
     path_trn, path_val = self._split_features(path_trn, val_prop)
     fm_model = xl.create_fm()
     fm_model.setTrain(path_trn)
     fm_model.setValidate(path_val)
     fm_model.fit(xlfm_params,
                  '%s/model-ffm-best-val.out' % self.artifacts_dir)
Ejemplo n.º 6
0
    def __init__(self, model_type):
        assert model_type in ["FM","FFM","linear"]   # 只能是这三种模型
        self.model_type = model_type

        if self.model_type == "FM":
            self.model = xl.create_fm()
        elif self.model_type == "FFM":
            self.model = xl.create_ffm()
        else:   # 采用默认的线性模型
            self.model = xl.create_linear()
Ejemplo n.º 7
0
def creat_model(model_type):  # 创建模型对象
    if model_type == "FM":
        model = xl.create_fm()

    elif model_type == "FFM":
        model = xl.create_ffm()

    else:  # 采用默认的线性模型
        model = xl.create_linear()

    return model
Ejemplo n.º 8
0
    def simple_predict2(self):
        fm_model = xl.create_fm()
        # Prediction task
        predict_path = 'xlearn_dataset/predict_'+self._user_id+'.txt'
        fm_model.setTest(predict_path)  # Set the path of test dataset
        # Start to predict
        # The output result will be stored in output.txt
        fm_model.predict('final_model/xLearn_v0.3.out', 'xlearn_dataset/output_'+self._user_id+'.txt')
        result = pd.read_csv('xlearn_dataset/output_'+self._user_id+'.txt', header=None, names=['xlearn_stars'])        
        result = pd.concat([self._xrestaurants['business_id'],result], axis=1)

        return result    
Ejemplo n.º 9
0
    def train(self):
        # %% fm
        fm_model = xl.create_fm()
        fm_model.setTrain(self.fp_train)
        fm_model.setSigmoid()  # 二分类
        param = CONFIG['fm']['params']
        fp_pred_fm = "../data/fm/output_fm.txt"
        print('starting training...')
        fm_model.fit(param, self.fp_fm_model)

        fm_model.setValidate(self.fp_valid)
        fm_model.predict(self.fp_fm_model, fp_pred_fm)
Ejemplo n.º 10
0
    def predict(self, num=10):
        fm_model = xl.create_fm()
        # Prediction task
        predict_path = 'xlearn_dataset/predict_'+self._user_id+'.txt'
        fm_model.setTest(predict_path)  # Set the path of test dataset
        # Start to predict
        # The output result will be stored in output.txt
        fm_model.predict('final_model/xLearn_v0.1.out', 'xlearn_dataset/output_'+self._user_id+'.txt')
        result = pd.read_csv('xlearn_dataset/output_'+self._user_id+'.txt', header=None, names=['final_stars'])        
        result = pd.concat([self._xrestaurants,result], axis=1).sort_values('final_stars', ascending=False)[:num]

        return result
Ejemplo n.º 11
0
    def predict(cls, model_path, test_path, set_sigmoid=True) -> np.ndarray:
        fm_model = xl.create_fm()
        temp_output_file = tempfile.NamedTemporaryFile(delete=True)
        if set_sigmoid:
            fm_model.setSigmoid()  # 将分数通过 setSigmoid() API 转换到(0-1)之间
        fm_model.setTest(str(test_path))

        # 预测
        fm_model.predict(str(model_path), temp_output_file.name)
        y_pred = pd.read_csv(temp_output_file.name, header=None)[0].values
        temp_output_file.close()

        return y_pred
Ejemplo n.º 12
0
 def __init__(self, X_train, y_train, categorical_cols, categorical_universe):
     self.model_file = "data/fm/model.txt"
     self.weights_file = "data/fm/model_wt.out"
     self.x_train_file = "data/fm/x_train.txt"
     self.x_test_file = "data/fm/x_test.txt"
     self.categorical_cols = categorical_cols
     self.categorical_universe = categorical_universe
     self.enc = OneHotEncoder().fit(self.categorical_universe.reshape(-1, 1))
     
     self.model = xl.create_fm()
     self.model.setTXTModel(self.model_file)
     
     self.transform_input(X_train, y_train, 'train')
Ejemplo n.º 13
0
    def __init__(self,
                 URM_train,
                 train_svm_file_path,
                 approximate_recommender: BaseRecommender,
                 ICM_train=None,
                 UCM_train=None,
                 item_feature_fields=None,
                 user_feature_fields=None,
                 valid_svm_file_path=None,
                 max_items_to_predict=1000,
                 model_filename="model.out",
                 model_type="ffm",
                 temp_relative_folder="temp/",
                 verbose=True):
        self.ICM_train = ICM_train
        self.UCM_train = UCM_train
        user_fields = np.full(shape=URM_train.shape[0], fill_value=0)
        item_fields = np.full(shape=URM_train.shape[1], fill_value=1)
        if item_feature_fields is not None:
            item_feature_fields = item_feature_fields + 2
        if user_feature_fields is not None:
            user_feature_fields = user_feature_fields + np.max(
                item_feature_fields) + 1
        self.fields = np.concatenate([
            user_fields, item_fields, item_feature_fields, user_feature_fields
        ])

        self.approximate_recommender = approximate_recommender
        self.max_items_to_predict = max_items_to_predict

        # Set path of temp folder and model_path
        root_path = get_project_root_path()
        fm_data_path = os.path.join(root_path, "resources", "ffm_data")
        self.temp_folder = os.path.join(fm_data_path, temp_relative_folder)
        self.model_folder = os.path.join(fm_data_path, "model")
        self.model_path = os.path.join(self.model_folder, model_filename)

        if model_type == "ffm":
            self.model = xl.create_ffm()
        elif model_type == "fm":
            self.model = xl.create_fm()
        else:
            raise ValueError(
                "model_type is inexistent, choose between ffm and fm")
        self.model.setTrain(train_svm_file_path)
        if valid_svm_file_path is not None:
            self.model.setValidate(valid_svm_file_path)

        super().__init__(URM_train, verbose)
Ejemplo n.º 14
0
    def xlearn(self):
        # Training task
        fm_model = xl.create_fm()
        fm_model.setTrain(join(self._data_path, 'train.libsvm')) 
        fm_model.setValidate(join(self._data_path, 'test.libsvm'))
        
        if self.onlyResults: fm_model.setQuiet()
        param = {'task':'reg', 'lr':0.1, 'lambda':0.002, 'metric':'rmse', 'opt':'ftrl'}

        fm_model.fit(param, join(self._xlearn_path, 'model.out'))

        # Prediction task
        fm_model.setTest(join(self._data_path, 'test.libsvm'))
        fm_model.setSigmoid()
        fm_model.predict(join(self._xlearn_path, 'model.out'), join(self._xlearn_path, 'output.txt'))
        predictions = pd.read_csv(join(self._xlearn_path, 'output.txt'), header=None).values.flatten()
        if self.onlyResults: print("Completed xLearn evaluation.")
        return np.sqrt(mean_squared_error(self.y_test, predictions))
Ejemplo n.º 15
0
    def fit(self,
            df,
            label,
            eva_df=None,
            eva_label=None,
            path='datasource/train.ffm',
            overwrite_path=True,
            eva_path='datasource/valid.ffm',
            model_path='datasource/ffm_model.out',
            overwrite_eva_path=True):
        if (eva_df is None) ^ (eva_label is None):
            raise Exception(
                'params eva_df, eva_df must be all None or all have value.')

        df.index = range(df.shape[0])
        label.index = range(label.shape[0])

        if self.model_type == 'lr':
            self.clf = xl.create_ffm()
        elif self.model_type == 'fm':
            self.clf = xl.create_fm()
        elif self.model_type == 'ffm':
            self.clf = xl.create_linear()
        else:
            raise ValueError(self.model_type,
                             ' is an invalid value for param cat.')

        self.fe = FFMEncoder(df)
        self.fe.fit(df, self.cutoff)
        self.fe.transform(df, label, path)
        if eva_df is not None:
            eva_df.index = range(eva_df.shape[0])
            eva_label.index = range(eva_label.shape[0])
            self.fe.transform(eva_df, eva_label, eva_path)

        self.clf.setTrain(path)
        if eva_df is not None:
            self.clf.setValidate(eva_path)

        self.clf.fit(self.params, model_path)
        self.model_path = model_path
Ejemplo n.º 16
0
def xl_fm(train_file, test_file, trainY, testY, rank, reg, epoch):
    for file in (model_file, output_file):
        if os.path.exists(file):
            os.unlink(file)

    param = {
        'task': 'reg',
        'metric': 'rmse',
        'epoch': epoch,
        'k': rank,
        'lambda': reg
    }
    fm_model = xl.create_fm()
    fm_model.setTrain(train_file)
    #     fm_model.setValidate(train_file)
    fm_model.fit(param, model_file)
    fm_model.setTest(test_file)
    fm_model.predict(model_file, output_file)
    pred = pd.read_csv(output_file, header=None).values.flatten()
    test_rmse = np.mean((testY - pred)**2)**0.5
    return test_rmse
Ejemplo n.º 17
0
def run_xlearn():
    if MODEL == 'LM':
        model = xl.create_linear()
    elif MODEL == 'FM':
        model = xl.create_fm()
    else:
        assert MODEL == 'FFM'
        model = xl.create_ffm()
    model.setTrain(TRAIN)
    model.setValidate(TEST)
    if WINDOW == 0:
        model.disableEarlyStop()
    param = {
        'task': TASK,
        'epoch': EPOCH,
        'opt': OPT,
        'metric': METRIC,
        'k': K,
        'lr': LEARNING_RATE,
        'lambda': LAMBDA,
    }
    model.fit(param, './xlearn.model')
Ejemplo n.º 18
0
    def __init__(self, X_train, X_test, y_train, y_test, categorical_cols):
        self.model_file = "data/fm/model.txt"
        self.weights_file = "data/fm/model_wt.out"
        self.x_train_file = "data/fm/x_train.txt"
        self.x_test_file = "data/fm/x_test.txt"
        self.categorical_cols = categorical_cols

        self.model = xl.create_fm()
        self.model.setTXTModel(self.model_file)

        X_train = self.one_hot_categorical_col(X_train)
        X_test = self.one_hot_categorical_col(X_test)
        dump_svmlight_file(X_train,
                           y_train,
                           self.x_train_file,
                           zero_based=True,
                           multilabel=False)
        dump_svmlight_file(X_test,
                           y_test,
                           self.x_test_file,
                           zero_based=True,
                           multilabel=False)
Ejemplo n.º 19
0
    def _build_model(self):
        print("Build Model")
        training_file = self._dataset_path + '/fm_libsvm.csv'

        fm_model = xl.create_fm()
        fm_model.setTrain(training_file)

        param = {
            'task': 'reg',
            'lr': 0.15,
            'epoch': 10,
            'lambda': 0.002,
            'metric': 'rmse',
            'nthread': os.cpu_count(),
            'k': self._num_dim
        }

        with open('fm_parameter.json', 'w') as file:
            json.dump(param, file, indent=4)

        fm_model.setTXTModel(self._txt_model_file)
        fm_model.fit(param, self._out_model_file)
Ejemplo n.º 20
0
def xl_objective(params, method="fm"):

    xl_objective.i+=1

    params['task'] = 'reg'
    params['metric'] = 'rmse'

    # remember hyperopt casts as floats
    params['epoch'] = int(params['epoch'])
    params['k'] = int(params['k'])

    if method is "linear":
        xl_model = xl.create_linear()
    elif method is "fm":
        xl_model = xl.create_fm()

    results = []
    for train, valid, target in zip(train_fpaths, valid_fpaths, valid_target_fpaths):

        preds_fname = os.path.join(XLEARN_DIR, 'tmp_output.txt')
        model_fname = os.path.join(XLEARN_DIR, "tmp_model.out")

        xl_model.setTrain(train)
        xl_model.setTest(valid)
        xl_model.setQuiet()
        xl_model.fit(params, model_fname)
        xl_model.predict(model_fname, preds_fname)

        y_valid = np.loadtxt(target)
        predictions = np.loadtxt(preds_fname)
        loss = np.sqrt(mean_squared_error(y_valid, predictions))

        results.append(loss)

    error = np.mean(results)
    print("INFO: iteration {} error {:.3f}".format(xl_objective.i, error))

    return error
convert_to_ffm(test_data_Label, 'Test', list(num_col_te), list(cat_col_te),
               list(all_col_te))

# # 2. Package Comparison
# ## (1) xlearn
# 可支持ffm、LR和FM

# In[31]:

import xlearn as xl

# - FM

# In[32]:

fm_model = xl.create_fm()  # Use field-aware factorization machine
fm_model.setTrain("Train_ffm.txt")  # Training data
fm_model.setValidate("Test_ffm.txt")  # Validation data
# param:
#  0. binary classification
#  1. learning rate : 0.2
#  2. regular lambda : 0.002
param = {'task': 'binary', 'lr': 0.2, 'lambda': 0.002, 'metric': 'acc'}
# Train model
fm_model.fit(param, "./model_fm.out")

# Prediction task
fm_model.setTest("Test_ffm.txt")  # Test data
fm_model.setSigmoid()  # Convert output to 0-1

# Start to predict
Ejemplo n.º 22
0
import xlearn as xl
import pandas as pd
from sklearn.datasets import dump_svmlight_file

df = pd.read_csv("data/ts_FE.csv")

features = pd.read_csv('feature.csv')
x_columns = features.head(30)['feature'].tolist() #特征重要性排序中,选择前30的特征

y = df.click  # y为数据的label值
dummy = pd.get_dummies(df[x_columns])
mat = dummy.as_matrix()
dump_svmlight_file(mat, y, 'test.libsvm', zero_based=False)


xfm = xl.create_fm()
# xfm.setTrain("train.libsvm")
# param = {'task':'binary', 'lr':0.0001, 'lambda':0.01, 'k':8, 'epoch':150}
# xfm.fit(param, 'model.out')
# xfm.setTXTModel("model.txt")

xfm.setSigmoid()
xfm.setTest("test.libsvm")
xfm.predict('model.out', "output.txt")

train = pd.read_csv('data/test.csv', dtype={'id': 'U'})
click = pd.read_csv('output.txt', names=['click'])
out = pd.DataFrame({'id': train['id'], 'click': click['click']})
out.to_csv('Y_out.csv', index=False, sep=',')
Ejemplo n.º 23
0
import xlearn as xl

# Training task
ffm_model = xl.create_fm()  # Use factorization machine
ffm_model.setTrain("./house_price_train.txt")  # Training data

# param:
#  0. Binary task
#  1. learning rate: 0.2
#  2. regular lambda: 0.002
#  4. evaluation metric: rmse
param = {'task': 'reg', 'lr': 0.2, 'lambda': 0.002, 'metric': 'rmse'}

# Use cross-validation
ffm_model.cv(param)
Ejemplo n.º 24
0
import os
import xlearn as xl
from config import Config
from utils import GenResultFromTxt

conf = Config()

if __name__=="__main__":
    # sparse feature and dense features
    sparse_features = ['uid', 'user_city', 'item_id', 'author_id', 'item_city', 'channel',
                       'music_id', 'did', 'gender']
    dense_features = ['video_duration', 'beauty']
    target = ['finish', 'like']

    # 将训练集转化为libsvm形式
    for tar in target:
        test_path = os.path.join(conf.data_path, "FFM_" + tar + "_test.txt")
        model_path = os.path.join(conf.model_save_path, "FFM_"+tar+".model")
        save_path = os.path.join(conf.result_save_path, "FFM_"+tar+".output")
        print("==> Predict model")
        fm_model = xl.create_fm()
        fm_model.setTest(test_path)
        fm_model.setSigmoid()
        fm_model.predict(model_path, save_path)

    print("==> Generate result file")
    finish_output_path = os.path.join(conf.result_save_path, "FFM_finish.output")
    like_output_path = os.path.join(conf.result_save_path, "FFM_like.output")
    result_save_path = os.path.join(conf.result_save_path, "FFM_result322.csv")
    GenResultFromTxt(conf.test_data_path, finish_output_path, like_output_path, result_save_path)
    print("==> Finish generating result file")
Ejemplo n.º 25
0
            for i, x in enumerate(catdict.keys()):
                if(catdict[x]==0):  # numerical
                    datastring = datastring + " "+str(i)+":"+ str(i)+":"+ str(datarow[x])
                else:  # categorical
                    datastring = datastring + " "+str(i)+":"+ str(int(datarow[x]))+":1"
            datastring += '\n'
            
            if n < n1: file_train.write(datastring)
            elif n < n_train: file_valid.write(datastring)
            else: file_test.write(datastring)

convert_to_ffm(df, numerics=[], categories=cols, features=cols, Label='click', n_train=n_train, train_size=0.8)

##================ FM ==================##
## setting
fm_model = xl.create_fm()  # Use factorization machine
fm_model.setTrain(fp_train)   # Training data
fm_model.setValidate(fp_valid)  # Validation data
fm_model.setSigmoid()

param = {'task': 'binary',
         'k': 20,
         'lr': 0.02, 
         'lambda': 0.002,
         'epoch': 100,
         'opt': 'adagrad'
         }

## training
fm_model.fit(param, fp_model_fm)
Ejemplo n.º 26
0
Archivo: fm.py Proyecto: freygit/36
import xlearn as xl

param = {'task':'binary', 'lr':0.2,
         'epoch': 20, 'k':2,
         'lambda':0.002, 'metric':'auc'}

train_data = "../../data/criteo_conversion_logs/small_train.txt"
test_data = "../../data/criteo_conversion_logs/small_test.txt"

lr_model = xl.create_linear()
lr_model.setTrain(train_data)
lr_model.setValidate(test_data)
lr_model.setTest(test_data)
lr_model.setSigmoid()
lr_model.fit(param, './lr_model.out')

fm_model = xl.create_fm()
fm_model.setTrain(train_data)
fm_model.setValidate(test_data)
fm_model.setTest(test_data)
fm_model.setSigmoid()
fm_model.fit(param, './fm_model.out')

ffm_model = xl.create_ffm()
ffm_model.setTrain(train_data)
ffm_model.setValidate(test_data)
ffm_model.setTest(test_data)
ffm_model.setSigmoid()
ffm_model.fit(param, './ffm_model.out')