def __init__(self, model_type): assert model_type in ["FM","FFM","linear"] # 只能是这三种模型 self.model_type = model_type if self.model_type == "FM": self.model = xl.create_fm() elif self.model_type == "FFM": self.model = xl.create_ffm() else: # 采用默认的线性模型 self.model = xl.create_linear()
def creat_model(model_type): # 创建模型对象 if model_type == "FM": model = xl.create_fm() elif model_type == "FFM": model = xl.create_ffm() else: # 采用默认的线性模型 model = xl.create_linear() return model
def fit(self, df, label, eva_df=None, eva_label=None, path='datasource/train.ffm', overwrite_path=True, eva_path='datasource/valid.ffm', model_path='datasource/ffm_model.out', overwrite_eva_path=True): if (eva_df is None) ^ (eva_label is None): raise Exception( 'params eva_df, eva_df must be all None or all have value.') df.index = range(df.shape[0]) label.index = range(label.shape[0]) if self.model_type == 'lr': self.clf = xl.create_ffm() elif self.model_type == 'fm': self.clf = xl.create_fm() elif self.model_type == 'ffm': self.clf = xl.create_linear() else: raise ValueError(self.model_type, ' is an invalid value for param cat.') self.fe = FFMEncoder(df) self.fe.fit(df, self.cutoff) self.fe.transform(df, label, path) if eva_df is not None: eva_df.index = range(eva_df.shape[0]) eva_label.index = range(eva_label.shape[0]) self.fe.transform(eva_df, eva_label, eva_path) self.clf.setTrain(path) if eva_df is not None: self.clf.setValidate(eva_path) self.clf.fit(self.params, model_path) self.model_path = model_path
def run_xlearn(): if MODEL == 'LM': model = xl.create_linear() elif MODEL == 'FM': model = xl.create_fm() else: assert MODEL == 'FFM' model = xl.create_ffm() model.setTrain(TRAIN) model.setValidate(TEST) if WINDOW == 0: model.disableEarlyStop() param = { 'task': TASK, 'epoch': EPOCH, 'opt': OPT, 'metric': METRIC, 'k': K, 'lr': LEARNING_RATE, 'lambda': LAMBDA, } model.fit(param, './xlearn.model')
def xl_objective(params, method="fm"): xl_objective.i+=1 params['task'] = 'reg' params['metric'] = 'rmse' # remember hyperopt casts as floats params['epoch'] = int(params['epoch']) params['k'] = int(params['k']) if method is "linear": xl_model = xl.create_linear() elif method is "fm": xl_model = xl.create_fm() results = [] for train, valid, target in zip(train_fpaths, valid_fpaths, valid_target_fpaths): preds_fname = os.path.join(XLEARN_DIR, 'tmp_output.txt') model_fname = os.path.join(XLEARN_DIR, "tmp_model.out") xl_model.setTrain(train) xl_model.setTest(valid) xl_model.setQuiet() xl_model.fit(params, model_fname) xl_model.predict(model_fname, preds_fname) y_valid = np.loadtxt(target) predictions = np.loadtxt(preds_fname) loss = np.sqrt(mean_squared_error(y_valid, predictions)) results.append(loss) error = np.mean(results) print("INFO: iteration {} error {:.3f}".format(xl_objective.i, error)) return error
import xlearn as xl # Training task linear_model = xl.create_linear() # Use linear model linear_model.setTrain("./agaricus_train.txt") # Training data linear_model.setValidate("./agaricus_test.txt") # Validation data # param: # 0. Binary classification # 1. learning rate: 0.2 # 2. lambda: 0.002 # 3. evaluation metric: accuarcy # 4. Use sgd optimization method param = { 'task': 'binary', 'lr': 0.2, 'lambda': 0.002, 'metric': 'acc', 'opt': 'sgd' } # Start to train # The trained model will be stored in model.out linear_model.fit(param, './model.out') # Prediction task linear_model.setTest("./agaricus_test.txt") # Test data linear_model.setSigmoid() # Convert output to 0-1 # Start to predict # The output result will be stored in output.txt
'epoch': 20 }) param.append({ 'task': 'binary', 'lr': 0.001, 'lambda': 0.001, 'metric': 'auc', 'epoch': 20 }) out_path = '../Input/train/FFM_result' if not os.path.exists(out_path): os.makedirs(out_path) # Training task ffm_model = xl.create_linear() # Use field-aware factorization machine for k, fv in enumerate(featrue_version): if submission == 'y': fte = train_path + '/encode/' + fv + 'test_fullFFM.txt' else: fte = train_path + '/encode/' + fv + 'test_sample10wFFM.txt' for i, pa in enumerate(param): # Prediction task ffm_model.setTest(fte) # Test data ffm_model.setSigmoid() # Convert output to 0-1 if submission == 'y':
import xlearn as xl # Training task linear_model = xl.create_linear() linear_model.setTrain("./agaricus_train.txt") linear_model.setValidate("./agaricus_test.txt") param = {'task':'binary', 'lr':0.2, 'lambda':0.002, 'metric':'acc', 'opt':'sgd'} linear_model.fit(param, './model.out') # Prediction task linear_model.setTest("./agaricus_test.txt") # Convert output to 0-1 linear_model.setSigmoid() linear_model.predict("./model.out", "./output.txt")
import xlearn as xl param = {'task':'binary', 'lr':0.2, 'epoch': 20, 'k':2, 'lambda':0.002, 'metric':'auc'} train_data = "../../data/criteo_conversion_logs/small_train.txt" test_data = "../../data/criteo_conversion_logs/small_test.txt" lr_model = xl.create_linear() lr_model.setTrain(train_data) lr_model.setValidate(test_data) lr_model.setTest(test_data) lr_model.setSigmoid() lr_model.fit(param, './lr_model.out') fm_model = xl.create_fm() fm_model.setTrain(train_data) fm_model.setValidate(test_data) fm_model.setTest(test_data) fm_model.setSigmoid() fm_model.fit(param, './fm_model.out') ffm_model = xl.create_ffm() ffm_model.setTrain(train_data) ffm_model.setValidate(test_data) ffm_model.setTest(test_data) ffm_model.setSigmoid() ffm_model.fit(param, './ffm_model.out')