def optimiseBid(self, xTestDF, yTestDF):
        print(" xTestDF:", xTestDF.shape, "\n", list(xTestDF))
        print(" yTestDF:", yTestDF.shape, "\n", list(yTestDF))
        result = pd.concat([xTestDF, yTestDF], axis=1)
        print(" result:", result.shape, "\n", list(result))
        predProb = self._model.predict_proba(xTestDF)

        be = BidEstimator()
        be.gridSearch_bidPrice(predProb[:, 1],
                               0.2,
                               0,
                               result,
                               bidpriceest_model='linearBidPrice')
Exemple #2
0
    def gridSearchBidPrice(self, y_prob, slotprices):
        print("=== Get best bid prices")
        #avg_ctr = ClickEvaluator().compute_avgCTR(self.Y_train)
        avg_ctr = 0.00075  # use fixed ctr from full train set
        print("Train avgCTR = {}".format(avg_ctr))

        bid_estimator = BidEstimator()
        # TODO: could add option for alternate  bid strats
        best_pred_thresh, best_base_bid, perf_df = bid_estimator.gridSearch_bidPrice(y_prob, avg_ctr, slotprices,self.gold_val,bidpriceest_model='linearBidPrice')
        ipinyouWriter.ResultWriter().writeResult(re.sub('.csv','-linearBidPrice.csv',self.bids_tuning_perf_filepath), perf_df) #
        best_pred_thresh, best_base_bid, perf_df = bid_estimator.gridSearch_bidPrice(y_prob, avg_ctr, slotprices,self.gold_val,bidpriceest_model='linearBidPrice_variation')
        ipinyouWriter.ResultWriter().writeResult(re.sub('.csv','-linearBidPrice_variation.csv',self.bids_tuning_perf_filepath), perf_df)


        return best_pred_thresh,best_base_bid
Exemple #3
0
 def optimiseBid(self, xTestDF, yTestDF):
     """
     Perform bid optimisation based on params
     :param xTestDF:
     :param yTestDF:
     :return:
     """
     print(" xTestDF:", xTestDF.shape, "\n", list(xTestDF))
     print(" yTestDF:", yTestDF.shape, "\n", list(yTestDF))
     result = pd.concat([xTestDF, yTestDF], axis=1)
     print(" result:", result.shape, "\n", list(result))
     predProb = self.__predictClickOneProb(xTestDF)
     be = BidEstimator()
     be.gridSearch_bidPrice(predProb[:, 1],
                            0,
                            0,
                            result,
                            bidpriceest_model='thresholdsigmoid')
Exemple #4
0
def exeEnsemble_Weighted(trainDF, validateDF, testDF,
                   trainPath, validationPath, testPath,
                   trainReader, validateReader, testReader,
                   writeResult2CSV=False):
    '''
    Takes the average of y_pred from all models.
    '''
    xg_val_y_pred, xg_test_y_pred = exeXGBoostBidModel(validationData=validateDF, trainData=trainDF, testData=testDF, writeResult2CSV=False)
    cnn_val_y_pred, cnn_test_y_pred = exeCNNBidModel(validationDataPath=validationPath, trainDataPath=trainPath, testDataPath=testPath, writeResult2CSV=False)
    #lr_y_pred = exeLogisticRegressionBidModel_v2(validationReader=validationReader, trainReader=trainReader, writeResult2CSV=False)
    #fm_y_pred=exeFMBidModel(trainReader=trainReader, validationReader=validateReader, testReader=testReader, writeResult2CSV=False)

    # Average them
    # y_pred = [(xg+ lr) / 2.0 for xg, lr in zip(xg_y_pred, lr_y_pred)]
    # y_pred = [(xg + cnn + lr)/3.0 for xg, cnn, lr in zip(xg_y_pred, cnn_y_pred, lr_y_pred)]
    #y_pred = [(xg*0.4 + cnn*0.4 + lr*0.05 + fm*0.15)  for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 87.80

    #This one hits 0.874 for the xg/lr/fm emsemble models, perviously 0.861 (Can't run CNN on my mac yet, got this convolution missing error)
    # y_pred = [(xg * 0.6 + lr * 0.1 + fm * 0.3) for xg, lr, fm in zip(xg_y_pred, lr_y_pred, fm_y_pred)]

    #ongmin testing
    # y_pred = [(xg * 0.5 + cnn * 0.5 + lr * 0.05 + fm * 0.15) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 0.8760
    # y_pred = [(xg * 0.6 + cnn * 0.4 + lr * 0.00 + fm * 0.00) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 0.8810
    # y_pred = [(xg*0.5 + cnn*0.5 + lr*0.00 + fm*0.00)  for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 0.8797
    #y_pred = [(xg * 0.7 + cnn * 0.3 + lr * 0.00 + fm * 0.00) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)]  # AUC 0.8840
    #y_pred = [(xg * 0.8 + cnn * 0.2 + lr * 0.00 + fm * 0.00) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)]  # AUC 0.8836
    val_y_pred = [(xg * 0.7 + cnn * 0.3 ) for xg, cnn in zip(xg_val_y_pred, cnn_val_y_pred)]  # AUC 0.8840

    timestamp=str(time.strftime("%Y%m%d-%H%M%S"))

    print("XGBoost AUC:")
    ClickEvaluator().clickROC(validateDF['click'], xg_val_y_pred, imgpath="./SavedEnsembleInfo/XGBoost_AUC-" + timestamp + ".jpg")
    print("CNN AUC:")
    ClickEvaluator().clickROC(validateDF['click'], cnn_val_y_pred, imgpath="./SavedEnsembleInfo/CNN_AUC-" + timestamp + ".jpg")
    # print("Logistic AUC:")
    # ClickEvaluator().clickROC(validateDF['click'], lr_y_pred, imgpath="./SavedEnsembleInfo/LogisticR_AUC-" + timestamp + ".jpg")
    # print("FastFM AUC:")
    # ClickEvaluator().clickROC(validateDF['click'], fm_y_pred, imgpath="./SavedEnsembleInfo/FastFM_AUC-" + timestamp + ".jpg")

    print("Ensemble AUC:")
    ClickEvaluator().clickROC(validateDF['click'], val_y_pred, imgpath="./SavedEnsembleInfo/ensemble_weighted_AUC-" + timestamp + ".jpg",
                                                           showGraph=False)

    val_y_pred = np.array(val_y_pred)
    click1 = val_y_pred[validateDF.click == 1]
    n, bins, patches = ClickEvaluator().clickProbHistogram(pred_prob=click1, color='g',
                                                           title='Predicted probabilities for clicks=1',
                                                           imgpath="./SavedEnsembleInfo/ensemble_weighted-click1-" + timestamp + ".jpg",
                                                           showGraph=False)

    # click=0 prediction as click=1 probabilities
    click0 = val_y_pred[validateDF.click == 0]
    n, bins, patches = ClickEvaluator().clickProbHistogram(pred_prob=click0, color='r',
                                                           title='Predicted probabilities for clicks=0',
                                                           imgpath="./SavedEnsembleInfo/ensemble_weighted-click0-" + timestamp + ".jpg",
                                                           showGraph=False)


    ### Bid price model evaluations
    test_y_pred = [(xg * 0.7 + cnn * 0.3 ) for xg, cnn in zip(xg_test_y_pred, cnn_test_y_pred)]

    slotprices_val = validateDF['slotprice'].as_matrix().astype(int)
    slotprices_test = testDF['slotprice'].as_matrix().astype(int)

    print("=== Get best bid prices on validation set")
    #avg_ctr = ClickEvaluator().compute_avgCTR(trainDF.click)
    #TODO override with complete train set avg ctr
    avg_ctr = 0.00075
    print("Train avgCTR = {}".format(avg_ctr))

    bid_estimator = BidEstimator()
    print("== linearBidPrice")
    best_pred_thresh, best_base_bid, perf_df = bid_estimator.gridSearch_bidPrice(val_y_pred, avg_ctr, slotprices_val,
                                                                                 validateDF,
                                                                                 bidpriceest_model='linearBidPrice')
    ipinyouWriter.ResultWriter().writeResult("./SavedEnsembleInfo/ensemble_weighted-linearBidPrice-"+ timestamp +".csv",perf_df)  #
    print("= linearBidPrice estimate test bids")
    bids = bid_estimator.linearBidPrice(test_y_pred, best_base_bid, avg_ctr)
    # format bids into bidids pandas frame
    bids_df = pd.concat([testDF['bidid'], pd.DataFrame(bids, columns=['bidprice'], index=testDF['bidid'].index)],axis=1)
    ipinyouWriter.ResultWriter().writeResult("./SavedEnsembleInfo/ensemble_weighted-testbids-"+ timestamp +".csv", bids_df)
    def tunelinearBaseBid(self, testDF):
        print("Setting up XGBoost for Test set")
        y_pred = self.__estimateClick(testDF)

        be = BidEstimator()
        be.gridSearch_bidPrice(y_pred, 0, 0, testDF, budget=(6250*1000), bidpriceest_model='linearBidPrice_mConfi')