def check_classification_losses(loss, degree): y = np.sign(_poly_predict(X, P, lams, kernel="anova", degree=degree)) clf = FactorizationMachineClassifier(degree=degree, loss=loss, beta=1e-3, fit_lower=None, fit_linear=False, tol=1e-3, random_state=0) clf.fit(X, y) assert_equal(1.0, clf.score(X, y))
def test_augment(): # The following linear separable dataset cannot be modeled with just an FM X_evil = np.array([[-1, -1], [1, 1]]) y_evil = np.array([-1, 1]) clf = FactorizationMachineClassifier(fit_linear=False, fit_lower=None, random_state=0) clf.fit(X_evil, y_evil) assert_equal(0.5, clf.score(X_evil, y_evil)) # However, by adding a dummy feature (a column of all ones), the linear # effect can be captured. clf = FactorizationMachineClassifier(fit_linear=False, fit_lower='augment', random_state=0) clf.fit(X_evil, y_evil) assert_equal(1.0, clf.score(X_evil, y_evil))
def ModelParamSearch(Param, Model, NumbModel, N_splits, X_train, y_train): N_iter_search = np.maximum(10, NumbModel*4) ss = ShuffleSplit(n_splits=N_splits, test_size=0.2, random_state=0) if Model == 'XGB': clf = XGBoostClassifier(eval_metric = 'auc', num_class = 2, nthread = 4, silent = 1) elif Model == 'NN': clf = MLPClassifier(max_iter=500) elif Model == 'FM': clf = FactorizationMachineClassifier(max_iter=500) elif Model == 'LR': clf = LogisticRegression(max_iter=500) elif Model == 'KNN': clf = KNeighborsClassifier() elif Model == 'RF': clf = RandomForestClassifier(oob_score=True, bootstrap=True) ParamDict = RandomParamSearch(clf, X_train, y_train, N_iter_search, NumbModel, Param, ss, 0, Model) return ParamDict, clf
from polylearn import FactorizationMachineClassifier xx, yy = np.meshgrid(np.linspace(-3, 3, 500), np.linspace(-3, 3, 500)) rng = np.random.RandomState(42) X = rng.randn(300, 2) y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) # XOR is too easy for factorization machines, so add noise :) flip = rng.randint(300, size=15) y[flip] = ~y[flip] # fit the model fm = FactorizationMachineClassifier(n_components=1, fit_linear=False, random_state=0) fm.fit(X, y) # fit a NuSVC for comparison svc = NuSVC(kernel='poly', degree=2) svc.fit(X, y) # plot the decision function for each datapoint on the grid Z = fm.decision_function(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) Z_svc = svc.decision_function(np.c_[xx.ravel(), yy.ravel()]) Z_svc = Z_svc.reshape(xx.shape) plt.imshow(Z,
from polylearn import FactorizationMachineClassifier xx, yy = np.meshgrid(np.linspace(-3, 3, 500), np.linspace(-3, 3, 500)) rng = np.random.RandomState(42) X = rng.randn(300, 2) y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) # XOR is too easy for factorization machines, so add noise :) flip = rng.randint(300, size=15) y[flip] = ~y[flip] # fit the model fm = FactorizationMachineClassifier(n_components=1, fit_linear=False, random_state=0) fm.fit(X, y) # fit a NuSVC for comparison svc = NuSVC(kernel='poly', degree=2) svc.fit(X, y) # plot the decision function for each datapoint on the grid Z = fm.decision_function(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) Z_svc = svc.decision_function(np.c_[xx.ravel(), yy.ravel()]) Z_svc = Z_svc.reshape(xx.shape) plt.imshow(Z, interpolation='nearest', extent=(xx.min(), xx.max(), yy.min(), yy.max()), aspect='auto',
def ModelStacking(Params, Models, NumbModels, X_train, y_train, X): N_splits = 5 # number of splits for shuffle splitting NotDone = 1 while NotDone: NewX = pd.Series().reindex_like(y_train) # create temp pandas Series ResultParams = [None]*len(Models) for i in range(len(NumbModels)): print('## Searching parameter set for model: {} ... '.format(Models[i])) ResultParams[i], ThisClf = ModelParamSearch(Params[i], Models[i], NumbModels[i], N_splits, X_train, y_train) print('## Creating classifiers and predicting results of model: {} ...'.format(Models[i])) for ii in range(len(ResultParams[i])): # create classifiers and fit it ThisClf.set_params(**ResultParams[i][ii]).fit(X_train, y_train) ThisPred = ThisClf.predict(X_train) # prediction of this model ThisTempPredSeries = pd.Series(ThisPred, index=y_train.index) NewX = pd.concat([NewX, ThisTempPredSeries], axis=1) ColumnNumbers = [x for x in range(NewX.shape[1])] ColumnNumbers.remove(0) NewX = NewX.iloc[:, ColumnNumbers] ColNames = [] for i in range(len(NumbModels)): for ii in range(NumbModels[i]): ColNames.append(Models[i]+str(ii+1)) NewX.columns = ColNames # rename columns # Let's remove some features that show high correlation NewX = RemoveCorrFeat(NewX,0.9,np.around(NewX.shape[1]*.8).astype(int)) if NewX.shape[1] != 0: NotDone = 0 NewNewX = pd.Series() for i in range(NewX.columns.shape[0]): ThisModelName = NewX.columns[i] ThisModelNameStr = ThisModelName[0:len(ThisModelName)-len(filter(str.isdigit,ThisModelName))] ThisModelNumber = int(filter(str.isdigit,ThisModelName)) WhichModel = Models.index(ThisModelNameStr) if ThisModelNameStr == 'XGB': ThisClf = XGBoostClassifier(eval_metric = 'auc', num_class = 2, nthread = 4, silent = 1) elif ThisModelNameStr == 'NN': ThisClf = MLPClassifier(max_iter=500) elif ThisModelNameStr == 'FM': ThisClf = FactorizationMachineClassifier(max_iter=500) elif ThisModelNameStr == 'LR': ThisClf = LogisticRegression(max_iter=500) elif ThisModelNameStr == 'KNN': ThisClf = KNeighborsClassifier() elif ThisModelNameStr == 'RF': ThisClf = RandomForestClassifier(oob_score=True, bootstrap=True) ThisClf.set_params(**ResultParams[WhichModel][ThisModelNumber-1]).fit(X_train, y_train) ThisPred = ThisClf.predict(X) # prediction of this model ThisTempPredSeries = pd.Series(ThisPred) NewNewX = pd.concat([NewNewX, ThisTempPredSeries], axis=1) ColumnNumbers = [x for x in range(NewNewX.shape[1])] ColumnNumbers.remove(0) NewNewX = NewNewX.iloc[:, ColumnNumbers] NewNewX.columns = NewX.columns # rename columns return NewX, NewNewX
import numpy as np import scipy.sparse as sp from sklearn.base import clone from sklearn.metrics import accuracy_score, f1_score from sklearn.datasets import fetch_20newsgroups_vectorized from polylearn import (FactorizationMachineClassifier, PolynomialNetworkClassifier) estimators = { 'fm-2': FactorizationMachineClassifier(n_components=30, fit_linear=False, fit_lower=None, degree=2, random_state=0, max_iter=10), 'polynet-2': PolynomialNetworkClassifier(n_components=15, degree=2, fit_lower=None, max_iter=10, random_state=0) } estimators['fm-3'] = clone(estimators['fm-2']).set_params(degree=3) estimators['polynet-3'] = (clone(estimators['polynet-2']).set_params( degree=3, n_components=10)) if __name__ == '__main__':
class FMBidModel(BidModelInterface): # _regressionFormulaY ='' # _regressionFormulaX ='' _model = None _cBudget = 0 _modelType = None def __init__(self, cBudget=6250 * 1000, modelType="fmclassificationsgd"): """ # :param regressionFormulaY: # :param regressionFormulaX: :param cBudget: # :param avgCTR: :param modelType: Options ['fmclassificationsgd','fmclassificationals','polylearn'] """ # self._regressionFormulaY=regressionFormulaY # self._regressionFormulaX = regressionFormulaX # self._defaultBid = 0 self._cBudget = cBudget # self._avgCTR=avgCTR self._modelType = modelType def getThreshold(self): return 0.5 def __computeBidPrice(self, pCTR=None): """ The default computation to compute bid price The implemented model should have its own ways to gather the necessary parameters as follows :param basebid:Using the budget in this case :param pCTR: Compute the probability that click=1 for that bidrequest :param avgCTR: Consider this as the avgCTR for the training set :return: bid """ bid = BidEstimator().linearBidPrice_mConfi(y_pred=pCTR, base_bid=self._cBudget, m_conf=0.8, variable_bid=10) print("Bid type:", type(bid)) return bid def __predictClickOneProb(self, testDF): """ Perform prediction for click label. Take the output of click=1 probability as the CTR. :param oneBidRequest: :return: """ print("Setting up X test for prediction") xTest = testDF print("Converting to sparse matrix") xTest = scipy.sparse.csc_matrix(xTest.as_matrix()) # predict click labels for the test set print("Predicting test set...") # FastFM only give a probabilty of a click=1 predictedClickOneProb = self._model.predict_proba(xTest) return predictedClickOneProb def __predictClickOne(self, testDF): """ Perform prediction for click label. Take the output of click=0 or 1 as the CTR. :param oneBidRequest: :return: """ print("Setting up X test for prediction") xTest = testDF print("Converting to sparse matrix") xTest = scipy.sparse.csc_matrix(xTest.as_matrix()) # predict click labels for the test set print("Predicting test set...") # FastFM only give a probabilty of a click=1 predictedClick = self._model.predict(xTest, self.getThreshold()) return predictedClick def trimToBudget(self, bidpriceDF, budget): """ In case the bidding process exceeds the budget, trim down the bidding :param bidpriceDF: :param budget: :return: """ print("Trimming....") totalspend = np.sum(bidpriceDF) overspend = totalspend - budget print("bidpriceDF:", bidpriceDF.shape) print("budget:", budget) print("totalspend:", totalspend) print("overspend:", overspend) i = -1 while overspend > 0 and len(bidpriceDF) + i > 0: overspend += -bidpriceDF[i] bidpriceDF[i] = 0 i += -1 print("bidpriceDF:", bidpriceDF) print("np.sum(bidpriceDF:", np.sum(bidpriceDF)) assert (np.sum(bidpriceDF) < budget) return bidpriceDF def getBidPrice(self, xTestOneHotDF, yValDF, noBidThreshold=0.2833333, minBid=200, bidRange=90, sigmoidDegree=-10): """ Retrieve the bidding price :param xTestOneHotDF: :param yValDF: :param noBidThreshold: :param minBid: :param bidRange: :param sigmoidDegree: :return: """ print("Computing bid price") print("xTestOneHotDF:", xTestOneHotDF.shape, list(xTestOneHotDF)) print("yValDF:", yValDF.shape, list(yValDF)) if (self._model == None): raise ModelNotTrainedException( "Model must be trained prior to prediction!") pCTR = self.__predictClickOneProb(xTestOneHotDF)[:, 1] #Prob of click==1 bidprice = BidEstimator().thresholdSigmoid(predOneProb=pCTR, noBidThreshold=0.2833333, minBid=200, bidRange=90, sigmoidDegree=-10) print("bidprice:", bidprice) bidprice = self.trimToBudget(bidprice, self._cBudget) print("bidprice after trim:", bidprice) #merge with bidid bidpriceDF = pd.DataFrame(bidprice, columns=['bidprice']) print("bidpriceDF:", bidpriceDF.shape, list(bidpriceDF)) bididDF = pd.DataFrame(yValDF['bidid'], columns=['bidid']) print("bididDF:", bididDF.shape, list(bididDF)) bidIdPriceDF = pd.concat([bididDF, bidpriceDF], axis=1, ignore_index=True) print("bidIdPriceDF:", bidIdPriceDF.shape, list(bidIdPriceDF)) return bidIdPriceDF # def getBidPrice(self, allBidRequest): # """ # 1. Predict click=1 prob for entire test/validation set # Considered as pCTR for each impression # 2. Use the bid=base_price*(pCTR/avgCTR) formula # :param oneBidRequest: # :return: # """ # # if(self._model==None): # raise ModelNotTrainedException("Model must be trained prior to prediction!") # # # # #Compute the CTR of this BidRequest # pCTR=self.__predictClickOneProb(allBidRequest)[:,1] # print("General sensing of pCTR ranges") # print(pCTR) # # #Compute the bid price # bids = np.apply_along_axis(self.__computeBidPrice, axis=0, arr=pCTR) # print("General sensing of bids ranges") # print(bids) # # #Extract the corresponding bidid # allBidRequestMatrix=allBidRequest.as_matrix(columns=['bidid']) # # #Merging bidid and bids into a table (Needed for eval) # bidid_bids=np.column_stack((allBidRequestMatrix, bids)) # # bids = pd.DataFrame(bidid_bids, columns=['bidid', 'bidprice']) # return bids def trainModel(self, X, y, retrain=True, modelFile=None): """ Train model using FM for Click against a set of features Trained model will be saved to disk (No need retrain/reload training data in future if not required during program rerun) :param allTrainData: :param retrain: If False, will load self._modelFile instead of training the dataset. :param modelFile: To save trained model into physical file. :return: """ self._modelFile = modelFile print("Getting xTrain") xTrain = X yTrain = y print("xTrain:", xTrain.shape, list(xTrain)) print("yTrain:", yTrain.shape, set(yTrain['click']), "ListL", list(yTrain)) yTrain['click'] = yTrain['click'].map({0: -1, 1: 1}) xTrain.to_csv("data.pruned/xTrain.csv") yTrain.to_csv("data.pruned/yTrain.csv") print("xTrain:", list(xTrain)) xTrain = xTrain.as_matrix() yTrain = yTrain['click'].as_matrix() if (retrain): print("Performing oversampling to even out") xTrain, yTrain = ImbalanceSampling().oversampling_SMOTE(X=xTrain, y=yTrain) #ADASYN is slower and doesn't offer better model performance, choose SMOTE instead. # xTrain, yTrain = ImbalanceSampling().oversampling_ADASYN(X=xTrain, y=yTrain) # instantiate a logistic regression model if (self._modelType == 'fmclassificationals'): #Don't use this print( "Factorisation Machine with ALS solver will be used for training" ) print("Converting X to sparse matrix, required by FastFM") xTrain = scipy.sparse.csc_matrix(xTrain) self._model = als.FMClassification(n_iter=3000, rank=2, verbose=1) elif (self._modelType == 'fmclassificationsgd'): # Use this, best results print( "Factorisation Machine with SGD solver will be used for training" ) print("Converting X to sparse matrix, required by FastFM") xTrain = scipy.sparse.csc_matrix(xTrain) print( "Training with n_iter=200000, rank=2, l2_reg_w=0.0005, l2_reg_V=0.0005, l2_reg=0.0005,step_size=0.01" ) # Best Training set score: 0.9121148444887212 # Best Param: {'n_iter': 200000, 'l2_reg_w': 0.0005, 'step_size': 0.004, 'l2_reg_V': 0.005, 'rank': 16} self._model = SGDFMClassification(n_iter=200000, rank=16, l2_reg_w=0.0005, l2_reg_V=0.0005, l2_reg=0.0005, step_size=0.01) elif (self._modelType == 'polylearn'): # Don't use this print( "Factorisation Machine from scitkit-learn-contrib polylearn will be used for training" ) self._model = FactorizationMachineClassifier(degree=2, loss='squared_hinge', n_components=2, alpha=1, beta=1, tol=1e-3, fit_lower='explicit', fit_linear=True, warm_start=False, init_lambdas='ones', max_iter=5000, verbose=True, random_state=None) else: raise ModelNotTrainedException( 'Selected model not available', 'Valid models are polylearn,fmclassificationsgd,fmclassificationals' ) if (retrain): print("Setting up Y and X for training") print(datetime.datetime.now()) print("Training Model...") print(datetime.datetime.now()) self._model = self._model.fit(xTrain, yTrain) # Loss function:liblinear super(FMBidModel, self).saveModel(self._model, self._modelFile) else: self._model = super(FMBidModel, self).loadSavedModel(self._modelFile) print("Training completed") print(datetime.datetime.now()) def optimiseBid(self, xTestDF, yTestDF): """ Perform bid optimisation based on params :param xTestDF: :param yTestDF: :return: """ print(" xTestDF:", xTestDF.shape, "\n", list(xTestDF)) print(" yTestDF:", yTestDF.shape, "\n", list(yTestDF)) result = pd.concat([xTestDF, yTestDF], axis=1) print(" result:", result.shape, "\n", list(result)) predProb = self.__predictClickOneProb(xTestDF) be = BidEstimator() be.gridSearch_bidPrice(predProb[:, 1], 0, 0, result, bidpriceest_model='thresholdsigmoid') def gridSearchandCrossValidateFastSGD(self, X, y, retrain=True): """ Perform gridsearch on FM model :param X: :param y: :param retrain: :return: """ # n_iter=100000, rank=2, l2_reg_w=0.01, l2_reg_V=0.01, l2_reg=0.01, step_size=0.004 print("Getting xTrain") xTrain = X yTrain = y print("xTrain:", xTrain.shape, list(xTrain)) print("yTrain:", yTrain.shape, set(yTrain['click']), "ListL", list(yTrain)) yTrain['click'] = yTrain['click'].map({0: -1, 1: 1}) # xTrain.to_csv("data.pruned/xTrain.csv") # yTrain.to_csv("data.pruned/yTrain.csv") print("xTrain:", list(xTrain)) xTrain = xTrain.as_matrix() yTrain = yTrain['click'].as_matrix() print("Performing oversampling to even out") xTrain, yTrain = ImbalanceSampling().oversampling_SMOTE(X=xTrain, y=yTrain) print( "Factorisation Machine with SGD solver will be used for training") print("Converting X to sparse matrix, required by FastFM") xTrain = scipy.sparse.csc_matrix(xTrain) param_grid = [{ 'n_iter': [150000, 200000, 250000], 'l2_reg_w': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1], 'l2_reg_V': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1], # 'l2_reg': [0.0001,0.0005,0.001,0.005,0.01,0.05,0.1], 'step_size': [0.0005, 0.004, 0.007], 'rank': [32, 36, 42, 46, 52, 56, 64] # 'n_iter': [5000], # 'l2_reg_w': [0.0005, 0.001], # 'l2_reg_V': [0.0005, 0.001], # 'l2_reg': [0.0005], # 'step_size': [ 0.004] }] optimized_LR = GridSearchCV( SGDFMClassification(), param_grid=param_grid, scoring='roc_auc', cv=5, # n_jobs=-1, error_score='raise', verbose=1) print("Training model..") print(datetime.datetime.now()) if (retrain): self._model = optimized_LR.fit(xTrain, yTrain) print("Training complete") print(datetime.datetime.now()) print("Best Score: ", optimized_LR.best_score_) print("Best Param: ", optimized_LR.best_params_) def validateModel(self, xVal, yVal): """ Changelog: - 1/4 KS Return PredictProb for emsemble Perform validation of model with different metrics and graphs for analysis :param xVal: :param yVal: :return: predictedProb[:,1] Prob of all click=1 """ if (self._model != None): print("Setting up X Y validation for prediction") xValidate = xVal yVal['click'] = yVal['click'].map({0: -1, 1: 1}) xVal = xVal.reset_index(drop=True) yVal = yVal.reset_index(drop=True) click1list = yVal[yVal['click'] == 1].index.tolist() click0list = yVal[yVal['click'] == -1].index.tolist() print("yVal:", (yVal).shape) print("click1list:", len(click1list)) print("click0list:", len(click0list)) print("Converting to sparse matrix") xValidate = scipy.sparse.csc_matrix(xValidate.as_matrix()) # predict click labels for the validation set print("Predicting validation set...") predicted = self._model.predict(xValidate) predictedProb = self._model.predict_proba(xValidate) predictedOneProbForclick1 = predictedProb[click1list][:, 1] predictedOneProbForclick0 = predictedProb[click0list][:, 1] print("predictedProbclick1:", (predictedOneProbForclick1).shape) print("predictedProbclick0:", (predictedOneProbForclick0).shape) print("yVal['click']", yVal['click'].shape) print("predictedProb:", predictedProb.shape) print("roc_auc", roc_auc_score(yVal['click'], predictedProb[:, 1])) #Get the Goldclick==1 and retrieve the predictedProb1 for it if (False): #Set this to True if want to see plots Evaluator.ClickEvaluator().clickProbHistogram( predictedOneProbForclick1, title='Click=1', showGraph=False) # Get the Goldclick==0 and retrieve the predictedProb1 for it Evaluator.ClickEvaluator().clickProbHistogram( predictedOneProbForclick0, title='Click=0', showGraph=False) Evaluator.ClickEvaluator().clickROC(yVal['click'], predictedProb[:, 1], showGraph=False) #Convert -1 to 0 as Evaluator printClickPredictionScore cannot handle -1 predicted[predicted == -1] = 0 yVal['click'] = yVal['click'].map({-1: 0, 1: 1}) Evaluator.ClickEvaluator().printClickPredictionScore( predicted, yVal['click']) # cnf_matrix = confusion_matrix(yVal['click'], predicted) # Evaluator.ClickEvaluator().plot_confusion_matrix(cm=cnf_matrix,classes=set(yVal['click']),plotgraph=False,printStats=False) #Change back, just in case predicted[predicted == 0] = -1 yVal['click'] = yVal['click'].map({0: -1, 1: 1}) print("Gold label: ", yVal['click']) print("predicted label: ", predicted) print("Writing to validated prediction csv") valPredictionWriter = ResultWriter() valPredictionWriter.writeResult( filename="data.pruned/FastFMpredictValidate.csv", data=predicted) else: print("Error: No model was trained in this instance....") return predictedProb[:, 1]
def trainModel(self, X, y, retrain=True, modelFile=None): """ Train model using FM for Click against a set of features Trained model will be saved to disk (No need retrain/reload training data in future if not required during program rerun) :param allTrainData: :param retrain: If False, will load self._modelFile instead of training the dataset. :param modelFile: To save trained model into physical file. :return: """ self._modelFile = modelFile print("Getting xTrain") xTrain = X yTrain = y print("xTrain:", xTrain.shape, list(xTrain)) print("yTrain:", yTrain.shape, set(yTrain['click']), "ListL", list(yTrain)) yTrain['click'] = yTrain['click'].map({0: -1, 1: 1}) xTrain.to_csv("data.pruned/xTrain.csv") yTrain.to_csv("data.pruned/yTrain.csv") print("xTrain:", list(xTrain)) xTrain = xTrain.as_matrix() yTrain = yTrain['click'].as_matrix() if (retrain): print("Performing oversampling to even out") xTrain, yTrain = ImbalanceSampling().oversampling_SMOTE(X=xTrain, y=yTrain) #ADASYN is slower and doesn't offer better model performance, choose SMOTE instead. # xTrain, yTrain = ImbalanceSampling().oversampling_ADASYN(X=xTrain, y=yTrain) # instantiate a logistic regression model if (self._modelType == 'fmclassificationals'): #Don't use this print( "Factorisation Machine with ALS solver will be used for training" ) print("Converting X to sparse matrix, required by FastFM") xTrain = scipy.sparse.csc_matrix(xTrain) self._model = als.FMClassification(n_iter=3000, rank=2, verbose=1) elif (self._modelType == 'fmclassificationsgd'): # Use this, best results print( "Factorisation Machine with SGD solver will be used for training" ) print("Converting X to sparse matrix, required by FastFM") xTrain = scipy.sparse.csc_matrix(xTrain) print( "Training with n_iter=200000, rank=2, l2_reg_w=0.0005, l2_reg_V=0.0005, l2_reg=0.0005,step_size=0.01" ) # Best Training set score: 0.9121148444887212 # Best Param: {'n_iter': 200000, 'l2_reg_w': 0.0005, 'step_size': 0.004, 'l2_reg_V': 0.005, 'rank': 16} self._model = SGDFMClassification(n_iter=200000, rank=16, l2_reg_w=0.0005, l2_reg_V=0.0005, l2_reg=0.0005, step_size=0.01) elif (self._modelType == 'polylearn'): # Don't use this print( "Factorisation Machine from scitkit-learn-contrib polylearn will be used for training" ) self._model = FactorizationMachineClassifier(degree=2, loss='squared_hinge', n_components=2, alpha=1, beta=1, tol=1e-3, fit_lower='explicit', fit_linear=True, warm_start=False, init_lambdas='ones', max_iter=5000, verbose=True, random_state=None) else: raise ModelNotTrainedException( 'Selected model not available', 'Valid models are polylearn,fmclassificationsgd,fmclassificationals' ) if (retrain): print("Setting up Y and X for training") print(datetime.datetime.now()) print("Training Model...") print(datetime.datetime.now()) self._model = self._model.fit(xTrain, yTrain) # Loss function:liblinear super(FMBidModel, self).saveModel(self._model, self._modelFile) else: self._model = super(FMBidModel, self).loadSavedModel(self._modelFile) print("Training completed") print(datetime.datetime.now())