def validateModel(self, xValidate, yValidate, validateDF): pred = self._model.predict_proba(xValidate) pred = pred[:, 1] ce = Evaluator.ClickEvaluator() ce.printRMSE(pred, yValidate) ce.clickROC(yValidate, pred, False) click1 = pred[validateDF.click == 1] n, bins, patches = ClickEvaluator().clickProbHistogram( pred_prob=click1, color='g', title='Predicted probabilities for clicks=1', # imgpath="./SavedCNNModels/xgboost-click1-" + bidmodel.timestr + ".jpg", showGraph=True) # click=0 prediction as click=1 probabilities click0 = pred[validateDF.click == 0] n, bins, patches = ClickEvaluator().clickProbHistogram( pred_prob=click0, color='r', title='Predicted probabilities for clicks=0', # imgpath="./SavedCNNModels/xgboost-click0-" + bidmodel.timestr + ".jpg", showGraph=True) pred = [1 if i >= 0.5 else 0 for i in pred] ce.printClickPredictionScore(pred, yValidate)
def validateModel(self, allValidateData): if (self._model != None): print("Setting up X Y validation for prediction") yValidate, xValidate = patsy.dmatrices( self._regressionFormulaY + ' ~ ' + self._regressionFormulaX, allValidateData, return_type="dataframe") print("No of features in input matrix: %d" % len(xValidate.columns)) # predict click labels for the validation set print("Predicting validation set...") predicted = self._model.predict(xValidate) # 0.5 prob threshold print("Writing to csv") valPredictionWriter = ResultWriter() valPredictionWriter.writeResult(filename="predictValidate.csv", data=predicted) print("\n\nPrediction acc on validation set: %f5.3" % metrics.accuracy_score(yValidate, predicted)) ce = Evaluator.ClickEvaluator() ce.printClickPredictionScore(predicted, yValidate) ce.printRMSE(predicted, yValidate) ce.clickROC(yValidate, predicted, False) else: print("Error: No model was trained in this instance....")
def exeEnsemble_v1(trainDF, targetDF, trainPath, validationPath, targetPath, writeResult2CSV=False): xg_y_pred = exeXGBoostBidModel(validationData=targetDF, trainData=trainDF, writeResult2CSV=False) cnn_y_pred = exeCNNBidModel(validationDataPath=validationPath, trainDataPath=trainset, testDataPath=targetPath, writeResult2CSV=False) # fm_y_pred = exeFM_SGDBidModel(validationDataOneHot=validateDFonehot, trainDataOneHot=trainDFonehot, validationData=validateDF, writeResult2CSV=True) # Use XG's 0 when its threshold is below 0.75. y_pred = [0 if xg < 0.75 else cnn for xg, cnn in zip(xg_y_pred, cnn_y_pred)] # Use CNN's 1 when its threshold is above 0.2? prune_thresh = 0.2 be = BidEstimator() bidprice = be.linearBidPrice_mConfi(y_pred, 230, 100, prune_thresh) # bidprice = be.linearBidPrice_variation(y_pred, 80, 0.2, slotprices=slotprices, prune_thresh=prune_thresh) bids = np.stack([targetDF['bidid'], bidprice], axis=1) bids = pd.DataFrame(bids, columns=['bidid', 'bidprice']) if writeResult2CSV: ipinyouWriter.ResultWriter().writeResult("resultEnsemble_v1.csv", bids) myEvaluator = Evaluator.Evaluator() myEvaluator.computePerformanceMetricsDF(6250*1000, bids, targetDF) # Force CNN result to 1 and 0 for F1 score y_pred = [1 if i >= prune_thresh else 0 for i in y_pred] ce = Evaluator.ClickEvaluator() ce.printClickPredictionScore(y_pred, targetDF)
def trainModel(self, xTrain, yTrain): self._model = SGDClassifier(alpha=0.0005, penalty='l2', loss='log', n_iter=200) # self._model = SGDClassifier(alpha=0.0015, penalty='l1', loss='log', n_iter=100) self._model = self._model.fit(xTrain, yTrain) # Loss function:liblinear pred = self._model.predict_proba(xTrain) pred = pred[:, 1] ce = Evaluator.ClickEvaluator() ce.printRMSE(pred, yTrain) ce.clickROC(yTrain, pred, False) pred = [1 if i >= 0.5 else 0 for i in pred] ce.printClickPredictionScore(pred, yTrain)
def validateModel(self, xVal, yVal): """ Changelog: - 1/4 KS Return PredictProb for emsemble Perform validation of model with different metrics and graphs for analysis :param xVal: :param yVal: :return: predictedProb[:,1] Prob of all click=1 """ if (self._model != None): print("Setting up X Y validation for prediction") xValidate = xVal yVal['click'] = yVal['click'].map({0: -1, 1: 1}) xVal = xVal.reset_index(drop=True) yVal = yVal.reset_index(drop=True) click1list = yVal[yVal['click'] == 1].index.tolist() click0list = yVal[yVal['click'] == -1].index.tolist() print("yVal:", (yVal).shape) print("click1list:", len(click1list)) print("click0list:", len(click0list)) print("Converting to sparse matrix") xValidate = scipy.sparse.csc_matrix(xValidate.as_matrix()) # predict click labels for the validation set print("Predicting validation set...") predicted = self._model.predict(xValidate) predictedProb = self._model.predict_proba(xValidate) predictedOneProbForclick1 = predictedProb[click1list][:, 1] predictedOneProbForclick0 = predictedProb[click0list][:, 1] print("predictedProbclick1:", (predictedOneProbForclick1).shape) print("predictedProbclick0:", (predictedOneProbForclick0).shape) print("yVal['click']", yVal['click'].shape) print("predictedProb:", predictedProb.shape) print("roc_auc", roc_auc_score(yVal['click'], predictedProb[:, 1])) #Get the Goldclick==1 and retrieve the predictedProb1 for it if (False): #Set this to True if want to see plots Evaluator.ClickEvaluator().clickProbHistogram( predictedOneProbForclick1, title='Click=1', showGraph=False) # Get the Goldclick==0 and retrieve the predictedProb1 for it Evaluator.ClickEvaluator().clickProbHistogram( predictedOneProbForclick0, title='Click=0', showGraph=False) Evaluator.ClickEvaluator().clickROC(yVal['click'], predictedProb[:, 1], showGraph=False) #Convert -1 to 0 as Evaluator printClickPredictionScore cannot handle -1 predicted[predicted == -1] = 0 yVal['click'] = yVal['click'].map({-1: 0, 1: 1}) Evaluator.ClickEvaluator().printClickPredictionScore( predicted, yVal['click']) # cnf_matrix = confusion_matrix(yVal['click'], predicted) # Evaluator.ClickEvaluator().plot_confusion_matrix(cm=cnf_matrix,classes=set(yVal['click']),plotgraph=False,printStats=False) #Change back, just in case predicted[predicted == 0] = -1 yVal['click'] = yVal['click'].map({0: -1, 1: 1}) print("Gold label: ", yVal['click']) print("predicted label: ", predicted) print("Writing to validated prediction csv") valPredictionWriter = ResultWriter() valPredictionWriter.writeResult( filename="data.pruned/FastFMpredictValidate.csv", data=predicted) else: print("Error: No model was trained in this instance....") return predictedProb[:, 1]