def exeEnsemble_v1(trainDF, targetDF, trainPath, validationPath, targetPath, writeResult2CSV=False): xg_y_pred = exeXGBoostBidModel(validationData=targetDF, trainData=trainDF, writeResult2CSV=False) cnn_y_pred = exeCNNBidModel(validationDataPath=validationPath, trainDataPath=trainset, testDataPath=targetPath, writeResult2CSV=False) # fm_y_pred = exeFM_SGDBidModel(validationDataOneHot=validateDFonehot, trainDataOneHot=trainDFonehot, validationData=validateDF, writeResult2CSV=True) # Use XG's 0 when its threshold is below 0.75. y_pred = [0 if xg < 0.75 else cnn for xg, cnn in zip(xg_y_pred, cnn_y_pred)] # Use CNN's 1 when its threshold is above 0.2? prune_thresh = 0.2 be = BidEstimator() bidprice = be.linearBidPrice_mConfi(y_pred, 230, 100, prune_thresh) # bidprice = be.linearBidPrice_variation(y_pred, 80, 0.2, slotprices=slotprices, prune_thresh=prune_thresh) bids = np.stack([targetDF['bidid'], bidprice], axis=1) bids = pd.DataFrame(bids, columns=['bidid', 'bidprice']) if writeResult2CSV: ipinyouWriter.ResultWriter().writeResult("resultEnsemble_v1.csv", bids) myEvaluator = Evaluator.Evaluator() myEvaluator.computePerformanceMetricsDF(6250*1000, bids, targetDF) # Force CNN result to 1 and 0 for F1 score y_pred = [1 if i >= prune_thresh else 0 for i in y_pred] ce = Evaluator.ClickEvaluator() ce.printClickPredictionScore(y_pred, targetDF)
def optimiseBid(self, xTestDF, yTestDF): print(" xTestDF:", xTestDF.shape, "\n", list(xTestDF)) print(" yTestDF:", yTestDF.shape, "\n", list(yTestDF)) result = pd.concat([xTestDF, yTestDF], axis=1) print(" result:", result.shape, "\n", list(result)) predProb = self._model.predict_proba(xTestDF) be = BidEstimator() be.gridSearch_bidPrice(predProb[:, 1], 0.2, 0, result, bidpriceest_model='linearBidPrice')
def gridSearchBidPrice(self, y_prob, slotprices): print("=== Get best bid prices") #avg_ctr = ClickEvaluator().compute_avgCTR(self.Y_train) avg_ctr = 0.00075 # use fixed ctr from full train set print("Train avgCTR = {}".format(avg_ctr)) bid_estimator = BidEstimator() # TODO: could add option for alternate bid strats best_pred_thresh, best_base_bid, perf_df = bid_estimator.gridSearch_bidPrice(y_prob, avg_ctr, slotprices,self.gold_val,bidpriceest_model='linearBidPrice') ipinyouWriter.ResultWriter().writeResult(re.sub('.csv','-linearBidPrice.csv',self.bids_tuning_perf_filepath), perf_df) # best_pred_thresh, best_base_bid, perf_df = bid_estimator.gridSearch_bidPrice(y_prob, avg_ctr, slotprices,self.gold_val,bidpriceest_model='linearBidPrice_variation') ipinyouWriter.ResultWriter().writeResult(re.sub('.csv','-linearBidPrice_variation.csv',self.bids_tuning_perf_filepath), perf_df) return best_pred_thresh,best_base_bid
def tuneConfidenceBaseBid(self, testDF): print("Setting up XGBoost for Test set") y_pred = self.__estimateClick(testDF) y_pred = [1 if i >= 0.7 else 0 for i in y_pred] # print("number of 1 here: ", sum(y_pred)) # avgCTR = np.count_nonzero(testDF.click) / testDF.shape[0] myEvaluator = Evaluator.Evaluator() bestCTR = -1 bestBidPrice = -1 for i in range(300, 301): bidprice = BidEstimator().confidenceBidPrice(y_pred, -1, i) # print("total bid price: ", sum(bidprice)) # print("total bid submitted: ", np.count_nonzero(bidprice)) # print("Number of $0 bid", bidprice.count(0)) bids = np.stack([testDF['bidid'], bidprice], axis=1) bids = pd.DataFrame(bids, columns=['bidid', 'bidprice']) # print("Estimated bid price: ", bids.bidprice.ix[0]) resultDict = myEvaluator.computePerformanceMetricsDF(6250 * 1000, bids, validateDF) myEvaluator.printResult() ctr = resultDict['click'] / resultDict['won'] if ctr > bestCTR: bestCTR = ctr bestBidPrice = i print("Best CTR: %.5f \nPrice: %d" % (bestCTR, bestBidPrice))
def optimiseBid(self, xTestDF, yTestDF): """ Perform bid optimisation based on params :param xTestDF: :param yTestDF: :return: """ print(" xTestDF:", xTestDF.shape, "\n", list(xTestDF)) print(" yTestDF:", yTestDF.shape, "\n", list(yTestDF)) result = pd.concat([xTestDF, yTestDF], axis=1) print(" result:", result.shape, "\n", list(result)) predProb = self.__predictClickOneProb(xTestDF) be = BidEstimator() be.gridSearch_bidPrice(predProb[:, 1], 0, 0, result, bidpriceest_model='thresholdsigmoid')
def getBidPrice(self,y_prob,bidids,base_bid,slotprices,pred_thresh=0.5): #avg_ctr = ClickEvaluator().compute_avgCTR(self.Y_train) avg_ctr = 0.00075 #use fixed ctr from full train set print("Train avgCTR = {}".format(avg_ctr)) bid_estimator = BidEstimator() #bids = bid_estimator.linearBidPrice(y_pred, 50, avg_ctr) #TODO: could add option for alternate bid strats bidids return bids_df
def getBidPrice(self, testDF): print("Setting up XGBoost for Test set") y_pred = self.getY_Pred(testDF) # y_pred = [1 if i >= 0.07 else 0 for i in y_pred] # bidprice = BidEstimator().linearBidPrice(y_pred, base_bid=220, avg_ctr=0.2) bidprice = BidEstimator().linearBidPrice_mConfi(y_pred, base_bid=240, variable_bid=70, m_conf=0.95) bids = np.stack([testDF['bidid'], bidprice], axis=1) bids = pd.DataFrame(bids, columns=['bidid', 'bidprice']) return bids
def __computeBidPrice(self, pCTR=None): """ The default computation to compute bid price The implemented model should have its own ways to gather the necessary parameters as follows :param basebid:Using the budget in this case :param pCTR: Compute the probability that click=1 for that bidrequest :param avgCTR: Consider this as the avgCTR for the training set :return: bid """ bid = BidEstimator().linearBidPrice_mConfi(y_pred=pCTR, base_bid=self._cBudget, m_conf=0.8, variable_bid=10) print("Bid type:", type(bid)) return bid
def getBidPrice(self, xTestOneHotDF, yValDF, noBidThreshold=0.2833333, minBid=200, bidRange=90, sigmoidDegree=-10): """ Retrieve the bidding price :param xTestOneHotDF: :param yValDF: :param noBidThreshold: :param minBid: :param bidRange: :param sigmoidDegree: :return: """ print("Computing bid price") print("xTestOneHotDF:", xTestOneHotDF.shape, list(xTestOneHotDF)) print("yValDF:", yValDF.shape, list(yValDF)) if (self._model == None): raise ModelNotTrainedException( "Model must be trained prior to prediction!") pCTR = self.__predictClickOneProb(xTestOneHotDF)[:, 1] #Prob of click==1 bidprice = BidEstimator().thresholdSigmoid(predOneProb=pCTR, noBidThreshold=0.2833333, minBid=200, bidRange=90, sigmoidDegree=-10) print("bidprice:", bidprice) bidprice = self.trimToBudget(bidprice, self._cBudget) print("bidprice after trim:", bidprice) #merge with bidid bidpriceDF = pd.DataFrame(bidprice, columns=['bidprice']) print("bidpriceDF:", bidpriceDF.shape, list(bidpriceDF)) bididDF = pd.DataFrame(yValDF['bidid'], columns=['bidid']) print("bididDF:", bididDF.shape, list(bididDF)) bidIdPriceDF = pd.concat([bididDF, bidpriceDF], axis=1, ignore_index=True) print("bidIdPriceDF:", bidIdPriceDF.shape, list(bidIdPriceDF)) return bidIdPriceDF
def getBidPrice(self, allBidRequest, v_df): """ 1. Predict click=1 prob for entire test/validation set Considered as pCTR for each impression 2. Use the bid=base_price*(pCTR/avgCTR) formula :param oneBidRequest: :return: """ if (self._model == None): raise ModelNotTrainedException( "Model must be trained prior to prediction!") #Compute the CTR of this BidRequest y_pred = self._model.predict_proba(allBidRequest) y_pred = y_pred[:, 1] bidprice = BidEstimator().linearBidPrice(y_pred, self._cBudget, self._avgCTR) bids = np.stack([v_df['bidid'], bidprice], axis=1) bids = pd.DataFrame(bids, columns=['bidid', 'bidprice']) print(bids.info()) return y_pred, bids
def exeEnsemble_Weighted(trainDF, validateDF, testDF, trainPath, validationPath, testPath, trainReader, validateReader, testReader, writeResult2CSV=False): ''' Takes the average of y_pred from all models. ''' xg_val_y_pred, xg_test_y_pred = exeXGBoostBidModel(validationData=validateDF, trainData=trainDF, testData=testDF, writeResult2CSV=False) cnn_val_y_pred, cnn_test_y_pred = exeCNNBidModel(validationDataPath=validationPath, trainDataPath=trainPath, testDataPath=testPath, writeResult2CSV=False) #lr_y_pred = exeLogisticRegressionBidModel_v2(validationReader=validationReader, trainReader=trainReader, writeResult2CSV=False) #fm_y_pred=exeFMBidModel(trainReader=trainReader, validationReader=validateReader, testReader=testReader, writeResult2CSV=False) # Average them # y_pred = [(xg+ lr) / 2.0 for xg, lr in zip(xg_y_pred, lr_y_pred)] # y_pred = [(xg + cnn + lr)/3.0 for xg, cnn, lr in zip(xg_y_pred, cnn_y_pred, lr_y_pred)] #y_pred = [(xg*0.4 + cnn*0.4 + lr*0.05 + fm*0.15) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 87.80 #This one hits 0.874 for the xg/lr/fm emsemble models, perviously 0.861 (Can't run CNN on my mac yet, got this convolution missing error) # y_pred = [(xg * 0.6 + lr * 0.1 + fm * 0.3) for xg, lr, fm in zip(xg_y_pred, lr_y_pred, fm_y_pred)] #ongmin testing # y_pred = [(xg * 0.5 + cnn * 0.5 + lr * 0.05 + fm * 0.15) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 0.8760 # y_pred = [(xg * 0.6 + cnn * 0.4 + lr * 0.00 + fm * 0.00) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 0.8810 # y_pred = [(xg*0.5 + cnn*0.5 + lr*0.00 + fm*0.00) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 0.8797 #y_pred = [(xg * 0.7 + cnn * 0.3 + lr * 0.00 + fm * 0.00) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 0.8840 #y_pred = [(xg * 0.8 + cnn * 0.2 + lr * 0.00 + fm * 0.00) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 0.8836 val_y_pred = [(xg * 0.7 + cnn * 0.3 ) for xg, cnn in zip(xg_val_y_pred, cnn_val_y_pred)] # AUC 0.8840 timestamp=str(time.strftime("%Y%m%d-%H%M%S")) print("XGBoost AUC:") ClickEvaluator().clickROC(validateDF['click'], xg_val_y_pred, imgpath="./SavedEnsembleInfo/XGBoost_AUC-" + timestamp + ".jpg") print("CNN AUC:") ClickEvaluator().clickROC(validateDF['click'], cnn_val_y_pred, imgpath="./SavedEnsembleInfo/CNN_AUC-" + timestamp + ".jpg") # print("Logistic AUC:") # ClickEvaluator().clickROC(validateDF['click'], lr_y_pred, imgpath="./SavedEnsembleInfo/LogisticR_AUC-" + timestamp + ".jpg") # print("FastFM AUC:") # ClickEvaluator().clickROC(validateDF['click'], fm_y_pred, imgpath="./SavedEnsembleInfo/FastFM_AUC-" + timestamp + ".jpg") print("Ensemble AUC:") ClickEvaluator().clickROC(validateDF['click'], val_y_pred, imgpath="./SavedEnsembleInfo/ensemble_weighted_AUC-" + timestamp + ".jpg", showGraph=False) val_y_pred = np.array(val_y_pred) click1 = val_y_pred[validateDF.click == 1] n, bins, patches = ClickEvaluator().clickProbHistogram(pred_prob=click1, color='g', title='Predicted probabilities for clicks=1', imgpath="./SavedEnsembleInfo/ensemble_weighted-click1-" + timestamp + ".jpg", showGraph=False) # click=0 prediction as click=1 probabilities click0 = val_y_pred[validateDF.click == 0] n, bins, patches = ClickEvaluator().clickProbHistogram(pred_prob=click0, color='r', title='Predicted probabilities for clicks=0', imgpath="./SavedEnsembleInfo/ensemble_weighted-click0-" + timestamp + ".jpg", showGraph=False) ### Bid price model evaluations test_y_pred = [(xg * 0.7 + cnn * 0.3 ) for xg, cnn in zip(xg_test_y_pred, cnn_test_y_pred)] slotprices_val = validateDF['slotprice'].as_matrix().astype(int) slotprices_test = testDF['slotprice'].as_matrix().astype(int) print("=== Get best bid prices on validation set") #avg_ctr = ClickEvaluator().compute_avgCTR(trainDF.click) #TODO override with complete train set avg ctr avg_ctr = 0.00075 print("Train avgCTR = {}".format(avg_ctr)) bid_estimator = BidEstimator() print("== linearBidPrice") best_pred_thresh, best_base_bid, perf_df = bid_estimator.gridSearch_bidPrice(val_y_pred, avg_ctr, slotprices_val, validateDF, bidpriceest_model='linearBidPrice') ipinyouWriter.ResultWriter().writeResult("./SavedEnsembleInfo/ensemble_weighted-linearBidPrice-"+ timestamp +".csv",perf_df) # print("= linearBidPrice estimate test bids") bids = bid_estimator.linearBidPrice(test_y_pred, best_base_bid, avg_ctr) # format bids into bidids pandas frame bids_df = pd.concat([testDF['bidid'], pd.DataFrame(bids, columns=['bidprice'], index=testDF['bidid'].index)],axis=1) ipinyouWriter.ResultWriter().writeResult("./SavedEnsembleInfo/ensemble_weighted-testbids-"+ timestamp +".csv", bids_df)
def tunelinearBaseBid(self, testDF): print("Setting up XGBoost for Test set") y_pred = self.__estimateClick(testDF) be = BidEstimator() be.gridSearch_bidPrice(y_pred, 0, 0, testDF, budget=(6250*1000), bidpriceest_model='linearBidPrice_mConfi')