Exemple #1
0
def exeEnsemble_v1(trainDF, targetDF, trainPath, validationPath, targetPath, writeResult2CSV=False):
    xg_y_pred = exeXGBoostBidModel(validationData=targetDF, trainData=trainDF, writeResult2CSV=False)
    cnn_y_pred = exeCNNBidModel(validationDataPath=validationPath, trainDataPath=trainset, testDataPath=targetPath, writeResult2CSV=False)
    # fm_y_pred = exeFM_SGDBidModel(validationDataOneHot=validateDFonehot, trainDataOneHot=trainDFonehot, validationData=validateDF, writeResult2CSV=True)

    # Use XG's 0 when its threshold is below 0.75.
    y_pred = [0 if xg < 0.75 else cnn for xg, cnn in zip(xg_y_pred, cnn_y_pred)]

    # Use CNN's 1 when its threshold is above 0.2?
    prune_thresh = 0.2

    be = BidEstimator()
    bidprice = be.linearBidPrice_mConfi(y_pred, 230, 100, prune_thresh)
    # bidprice = be.linearBidPrice_variation(y_pred, 80, 0.2, slotprices=slotprices, prune_thresh=prune_thresh)
    bids = np.stack([targetDF['bidid'], bidprice], axis=1)
    bids = pd.DataFrame(bids, columns=['bidid', 'bidprice'])

    if writeResult2CSV:
        ipinyouWriter.ResultWriter().writeResult("resultEnsemble_v1.csv", bids)

    myEvaluator = Evaluator.Evaluator()
    myEvaluator.computePerformanceMetricsDF(6250*1000, bids, targetDF)

    # Force CNN result to 1 and 0 for F1 score
    y_pred = [1 if i >= prune_thresh else 0 for i in y_pred]
    ce = Evaluator.ClickEvaluator()
    ce.printClickPredictionScore(y_pred, targetDF)
    def optimiseBid(self, xTestDF, yTestDF):
        print(" xTestDF:", xTestDF.shape, "\n", list(xTestDF))
        print(" yTestDF:", yTestDF.shape, "\n", list(yTestDF))
        result = pd.concat([xTestDF, yTestDF], axis=1)
        print(" result:", result.shape, "\n", list(result))
        predProb = self._model.predict_proba(xTestDF)

        be = BidEstimator()
        be.gridSearch_bidPrice(predProb[:, 1],
                               0.2,
                               0,
                               result,
                               bidpriceest_model='linearBidPrice')
Exemple #3
0
    def gridSearchBidPrice(self, y_prob, slotprices):
        print("=== Get best bid prices")
        #avg_ctr = ClickEvaluator().compute_avgCTR(self.Y_train)
        avg_ctr = 0.00075  # use fixed ctr from full train set
        print("Train avgCTR = {}".format(avg_ctr))

        bid_estimator = BidEstimator()
        # TODO: could add option for alternate  bid strats
        best_pred_thresh, best_base_bid, perf_df = bid_estimator.gridSearch_bidPrice(y_prob, avg_ctr, slotprices,self.gold_val,bidpriceest_model='linearBidPrice')
        ipinyouWriter.ResultWriter().writeResult(re.sub('.csv','-linearBidPrice.csv',self.bids_tuning_perf_filepath), perf_df) #
        best_pred_thresh, best_base_bid, perf_df = bid_estimator.gridSearch_bidPrice(y_prob, avg_ctr, slotprices,self.gold_val,bidpriceest_model='linearBidPrice_variation')
        ipinyouWriter.ResultWriter().writeResult(re.sub('.csv','-linearBidPrice_variation.csv',self.bids_tuning_perf_filepath), perf_df)


        return best_pred_thresh,best_base_bid
    def tuneConfidenceBaseBid(self, testDF):
        print("Setting up XGBoost for Test set")
        y_pred = self.__estimateClick(testDF)

        y_pred = [1 if i >= 0.7 else 0 for i in y_pred]

        # print("number of 1 here: ", sum(y_pred))
        # avgCTR = np.count_nonzero(testDF.click) / testDF.shape[0]
        myEvaluator = Evaluator.Evaluator()

        bestCTR = -1
        bestBidPrice = -1
        for i in range(300, 301):
            bidprice = BidEstimator().confidenceBidPrice(y_pred, -1, i)

            # print("total bid price: ", sum(bidprice))
            # print("total bid submitted: ", np.count_nonzero(bidprice))
            # print("Number of $0 bid", bidprice.count(0))

            bids = np.stack([testDF['bidid'], bidprice], axis=1)

            bids = pd.DataFrame(bids, columns=['bidid', 'bidprice'])

            # print("Estimated bid price: ", bids.bidprice.ix[0])

            resultDict = myEvaluator.computePerformanceMetricsDF(6250 * 1000, bids, validateDF)
            myEvaluator.printResult()
            ctr = resultDict['click'] / resultDict['won']

            if ctr > bestCTR:
                bestCTR = ctr
                bestBidPrice = i

        print("Best CTR: %.5f \nPrice: %d" % (bestCTR, bestBidPrice))
Exemple #5
0
 def optimiseBid(self, xTestDF, yTestDF):
     """
     Perform bid optimisation based on params
     :param xTestDF:
     :param yTestDF:
     :return:
     """
     print(" xTestDF:", xTestDF.shape, "\n", list(xTestDF))
     print(" yTestDF:", yTestDF.shape, "\n", list(yTestDF))
     result = pd.concat([xTestDF, yTestDF], axis=1)
     print(" result:", result.shape, "\n", list(result))
     predProb = self.__predictClickOneProb(xTestDF)
     be = BidEstimator()
     be.gridSearch_bidPrice(predProb[:, 1],
                            0,
                            0,
                            result,
                            bidpriceest_model='thresholdsigmoid')
Exemple #6
0
    def getBidPrice(self,y_prob,bidids,base_bid,slotprices,pred_thresh=0.5):
        #avg_ctr = ClickEvaluator().compute_avgCTR(self.Y_train)
        avg_ctr = 0.00075 #use fixed ctr from full train set
        print("Train avgCTR = {}".format(avg_ctr))

        bid_estimator = BidEstimator()
        #bids = bid_estimator.linearBidPrice(y_pred, 50, avg_ctr)
        #TODO: could add option for alternate  bid strats
        bidids
        return bids_df
    def getBidPrice(self, testDF):
        print("Setting up XGBoost for Test set")
        y_pred = self.getY_Pred(testDF)

        # y_pred = [1 if i >= 0.07 else 0 for i in y_pred]

        # bidprice = BidEstimator().linearBidPrice(y_pred, base_bid=220, avg_ctr=0.2)
        bidprice = BidEstimator().linearBidPrice_mConfi(y_pred, base_bid=240, variable_bid=70, m_conf=0.95)

        bids = np.stack([testDF['bidid'], bidprice], axis=1)
        bids = pd.DataFrame(bids, columns=['bidid', 'bidprice'])

        return bids
Exemple #8
0
 def __computeBidPrice(self, pCTR=None):
     """
     The default computation to compute bid price
     The implemented model should have its own ways to gather the necessary parameters as follows
     :param basebid:Using the budget in this case
     :param pCTR: Compute the probability that click=1 for that bidrequest
     :param avgCTR: Consider this as the avgCTR for the training set
     :return: bid
     """
     bid = BidEstimator().linearBidPrice_mConfi(y_pred=pCTR,
                                                base_bid=self._cBudget,
                                                m_conf=0.8,
                                                variable_bid=10)
     print("Bid type:", type(bid))
     return bid
Exemple #9
0
    def getBidPrice(self,
                    xTestOneHotDF,
                    yValDF,
                    noBidThreshold=0.2833333,
                    minBid=200,
                    bidRange=90,
                    sigmoidDegree=-10):
        """
        Retrieve the bidding price
        :param xTestOneHotDF:
        :param yValDF:
        :param noBidThreshold:
        :param minBid:
        :param bidRange:
        :param sigmoidDegree:
        :return:
        """
        print("Computing bid price")
        print("xTestOneHotDF:", xTestOneHotDF.shape, list(xTestOneHotDF))
        print("yValDF:", yValDF.shape, list(yValDF))
        if (self._model == None):
            raise ModelNotTrainedException(
                "Model must be trained prior to prediction!")

        pCTR = self.__predictClickOneProb(xTestOneHotDF)[:,
                                                         1]  #Prob of click==1
        bidprice = BidEstimator().thresholdSigmoid(predOneProb=pCTR,
                                                   noBidThreshold=0.2833333,
                                                   minBid=200,
                                                   bidRange=90,
                                                   sigmoidDegree=-10)
        print("bidprice:", bidprice)
        bidprice = self.trimToBudget(bidprice, self._cBudget)
        print("bidprice after trim:", bidprice)

        #merge with bidid
        bidpriceDF = pd.DataFrame(bidprice, columns=['bidprice'])
        print("bidpriceDF:", bidpriceDF.shape, list(bidpriceDF))
        bididDF = pd.DataFrame(yValDF['bidid'], columns=['bidid'])
        print("bididDF:", bididDF.shape, list(bididDF))
        bidIdPriceDF = pd.concat([bididDF, bidpriceDF],
                                 axis=1,
                                 ignore_index=True)
        print("bidIdPriceDF:", bidIdPriceDF.shape, list(bidIdPriceDF))
        return bidIdPriceDF
    def getBidPrice(self, allBidRequest, v_df):
        """
        1. Predict click=1 prob for entire test/validation set
            Considered as pCTR for each impression
        2. Use the bid=base_price*(pCTR/avgCTR) formula
        :param oneBidRequest:
        :return:
        """

        if (self._model == None):
            raise ModelNotTrainedException(
                "Model must be trained prior to prediction!")

        #Compute the CTR of this BidRequest
        y_pred = self._model.predict_proba(allBidRequest)
        y_pred = y_pred[:, 1]

        bidprice = BidEstimator().linearBidPrice(y_pred, self._cBudget,
                                                 self._avgCTR)

        bids = np.stack([v_df['bidid'], bidprice], axis=1)
        bids = pd.DataFrame(bids, columns=['bidid', 'bidprice'])
        print(bids.info())
        return y_pred, bids
Exemple #11
0
def exeEnsemble_Weighted(trainDF, validateDF, testDF,
                   trainPath, validationPath, testPath,
                   trainReader, validateReader, testReader,
                   writeResult2CSV=False):
    '''
    Takes the average of y_pred from all models.
    '''
    xg_val_y_pred, xg_test_y_pred = exeXGBoostBidModel(validationData=validateDF, trainData=trainDF, testData=testDF, writeResult2CSV=False)
    cnn_val_y_pred, cnn_test_y_pred = exeCNNBidModel(validationDataPath=validationPath, trainDataPath=trainPath, testDataPath=testPath, writeResult2CSV=False)
    #lr_y_pred = exeLogisticRegressionBidModel_v2(validationReader=validationReader, trainReader=trainReader, writeResult2CSV=False)
    #fm_y_pred=exeFMBidModel(trainReader=trainReader, validationReader=validateReader, testReader=testReader, writeResult2CSV=False)

    # Average them
    # y_pred = [(xg+ lr) / 2.0 for xg, lr in zip(xg_y_pred, lr_y_pred)]
    # y_pred = [(xg + cnn + lr)/3.0 for xg, cnn, lr in zip(xg_y_pred, cnn_y_pred, lr_y_pred)]
    #y_pred = [(xg*0.4 + cnn*0.4 + lr*0.05 + fm*0.15)  for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 87.80

    #This one hits 0.874 for the xg/lr/fm emsemble models, perviously 0.861 (Can't run CNN on my mac yet, got this convolution missing error)
    # y_pred = [(xg * 0.6 + lr * 0.1 + fm * 0.3) for xg, lr, fm in zip(xg_y_pred, lr_y_pred, fm_y_pred)]

    #ongmin testing
    # y_pred = [(xg * 0.5 + cnn * 0.5 + lr * 0.05 + fm * 0.15) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 0.8760
    # y_pred = [(xg * 0.6 + cnn * 0.4 + lr * 0.00 + fm * 0.00) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 0.8810
    # y_pred = [(xg*0.5 + cnn*0.5 + lr*0.00 + fm*0.00)  for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)] # AUC 0.8797
    #y_pred = [(xg * 0.7 + cnn * 0.3 + lr * 0.00 + fm * 0.00) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)]  # AUC 0.8840
    #y_pred = [(xg * 0.8 + cnn * 0.2 + lr * 0.00 + fm * 0.00) for xg, cnn, lr, fm in zip(xg_y_pred, cnn_y_pred, lr_y_pred, fm_y_pred)]  # AUC 0.8836
    val_y_pred = [(xg * 0.7 + cnn * 0.3 ) for xg, cnn in zip(xg_val_y_pred, cnn_val_y_pred)]  # AUC 0.8840

    timestamp=str(time.strftime("%Y%m%d-%H%M%S"))

    print("XGBoost AUC:")
    ClickEvaluator().clickROC(validateDF['click'], xg_val_y_pred, imgpath="./SavedEnsembleInfo/XGBoost_AUC-" + timestamp + ".jpg")
    print("CNN AUC:")
    ClickEvaluator().clickROC(validateDF['click'], cnn_val_y_pred, imgpath="./SavedEnsembleInfo/CNN_AUC-" + timestamp + ".jpg")
    # print("Logistic AUC:")
    # ClickEvaluator().clickROC(validateDF['click'], lr_y_pred, imgpath="./SavedEnsembleInfo/LogisticR_AUC-" + timestamp + ".jpg")
    # print("FastFM AUC:")
    # ClickEvaluator().clickROC(validateDF['click'], fm_y_pred, imgpath="./SavedEnsembleInfo/FastFM_AUC-" + timestamp + ".jpg")

    print("Ensemble AUC:")
    ClickEvaluator().clickROC(validateDF['click'], val_y_pred, imgpath="./SavedEnsembleInfo/ensemble_weighted_AUC-" + timestamp + ".jpg",
                                                           showGraph=False)

    val_y_pred = np.array(val_y_pred)
    click1 = val_y_pred[validateDF.click == 1]
    n, bins, patches = ClickEvaluator().clickProbHistogram(pred_prob=click1, color='g',
                                                           title='Predicted probabilities for clicks=1',
                                                           imgpath="./SavedEnsembleInfo/ensemble_weighted-click1-" + timestamp + ".jpg",
                                                           showGraph=False)

    # click=0 prediction as click=1 probabilities
    click0 = val_y_pred[validateDF.click == 0]
    n, bins, patches = ClickEvaluator().clickProbHistogram(pred_prob=click0, color='r',
                                                           title='Predicted probabilities for clicks=0',
                                                           imgpath="./SavedEnsembleInfo/ensemble_weighted-click0-" + timestamp + ".jpg",
                                                           showGraph=False)


    ### Bid price model evaluations
    test_y_pred = [(xg * 0.7 + cnn * 0.3 ) for xg, cnn in zip(xg_test_y_pred, cnn_test_y_pred)]

    slotprices_val = validateDF['slotprice'].as_matrix().astype(int)
    slotprices_test = testDF['slotprice'].as_matrix().astype(int)

    print("=== Get best bid prices on validation set")
    #avg_ctr = ClickEvaluator().compute_avgCTR(trainDF.click)
    #TODO override with complete train set avg ctr
    avg_ctr = 0.00075
    print("Train avgCTR = {}".format(avg_ctr))

    bid_estimator = BidEstimator()
    print("== linearBidPrice")
    best_pred_thresh, best_base_bid, perf_df = bid_estimator.gridSearch_bidPrice(val_y_pred, avg_ctr, slotprices_val,
                                                                                 validateDF,
                                                                                 bidpriceest_model='linearBidPrice')
    ipinyouWriter.ResultWriter().writeResult("./SavedEnsembleInfo/ensemble_weighted-linearBidPrice-"+ timestamp +".csv",perf_df)  #
    print("= linearBidPrice estimate test bids")
    bids = bid_estimator.linearBidPrice(test_y_pred, best_base_bid, avg_ctr)
    # format bids into bidids pandas frame
    bids_df = pd.concat([testDF['bidid'], pd.DataFrame(bids, columns=['bidprice'], index=testDF['bidid'].index)],axis=1)
    ipinyouWriter.ResultWriter().writeResult("./SavedEnsembleInfo/ensemble_weighted-testbids-"+ timestamp +".csv", bids_df)
Exemple #12
0
    def tunelinearBaseBid(self, testDF):
        print("Setting up XGBoost for Test set")
        y_pred = self.__estimateClick(testDF)

        be = BidEstimator()
        be.gridSearch_bidPrice(y_pred, 0, 0, testDF, budget=(6250*1000), bidpriceest_model='linearBidPrice_mConfi')