예제 #1
0
    def validateModel(self, xValidate, yValidate, validateDF):

        pred = self._model.predict_proba(xValidate)
        pred = pred[:, 1]

        ce = Evaluator.ClickEvaluator()
        ce.printRMSE(pred, yValidate)
        ce.clickROC(yValidate, pred, False)
        click1 = pred[validateDF.click == 1]
        n, bins, patches = ClickEvaluator().clickProbHistogram(
            pred_prob=click1,
            color='g',
            title='Predicted probabilities for clicks=1',
            # imgpath="./SavedCNNModels/xgboost-click1-" + bidmodel.timestr + ".jpg",
            showGraph=True)

        # click=0 prediction as click=1 probabilities
        click0 = pred[validateDF.click == 0]
        n, bins, patches = ClickEvaluator().clickProbHistogram(
            pred_prob=click0,
            color='r',
            title='Predicted probabilities for clicks=0',
            # imgpath="./SavedCNNModels/xgboost-click0-" + bidmodel.timestr + ".jpg",
            showGraph=True)
        pred = [1 if i >= 0.5 else 0 for i in pred]
        ce.printClickPredictionScore(pred, yValidate)
예제 #2
0
    def validateModel(self, allValidateData):
        if (self._model != None):
            print("Setting up X Y validation for prediction")
            yValidate, xValidate = patsy.dmatrices(
                self._regressionFormulaY + ' ~ ' + self._regressionFormulaX,
                allValidateData,
                return_type="dataframe")
            print("No of features in input matrix: %d" %
                  len(xValidate.columns))

            # predict click labels for the validation set
            print("Predicting validation set...")
            predicted = self._model.predict(xValidate)  # 0.5 prob threshold
            print("Writing to csv")
            valPredictionWriter = ResultWriter()
            valPredictionWriter.writeResult(filename="predictValidate.csv",
                                            data=predicted)
            print("\n\nPrediction acc on validation set: %f5.3" %
                  metrics.accuracy_score(yValidate, predicted))
            ce = Evaluator.ClickEvaluator()
            ce.printClickPredictionScore(predicted, yValidate)
            ce.printRMSE(predicted, yValidate)
            ce.clickROC(yValidate, predicted, False)

        else:
            print("Error: No model was trained in this instance....")
예제 #3
0
def exeEnsemble_v1(trainDF, targetDF, trainPath, validationPath, targetPath, writeResult2CSV=False):
    xg_y_pred = exeXGBoostBidModel(validationData=targetDF, trainData=trainDF, writeResult2CSV=False)
    cnn_y_pred = exeCNNBidModel(validationDataPath=validationPath, trainDataPath=trainset, testDataPath=targetPath, writeResult2CSV=False)
    # fm_y_pred = exeFM_SGDBidModel(validationDataOneHot=validateDFonehot, trainDataOneHot=trainDFonehot, validationData=validateDF, writeResult2CSV=True)

    # Use XG's 0 when its threshold is below 0.75.
    y_pred = [0 if xg < 0.75 else cnn for xg, cnn in zip(xg_y_pred, cnn_y_pred)]

    # Use CNN's 1 when its threshold is above 0.2?
    prune_thresh = 0.2

    be = BidEstimator()
    bidprice = be.linearBidPrice_mConfi(y_pred, 230, 100, prune_thresh)
    # bidprice = be.linearBidPrice_variation(y_pred, 80, 0.2, slotprices=slotprices, prune_thresh=prune_thresh)
    bids = np.stack([targetDF['bidid'], bidprice], axis=1)
    bids = pd.DataFrame(bids, columns=['bidid', 'bidprice'])

    if writeResult2CSV:
        ipinyouWriter.ResultWriter().writeResult("resultEnsemble_v1.csv", bids)

    myEvaluator = Evaluator.Evaluator()
    myEvaluator.computePerformanceMetricsDF(6250*1000, bids, targetDF)

    # Force CNN result to 1 and 0 for F1 score
    y_pred = [1 if i >= prune_thresh else 0 for i in y_pred]
    ce = Evaluator.ClickEvaluator()
    ce.printClickPredictionScore(y_pred, targetDF)
예제 #4
0
    def trainModel(self, xTrain, yTrain):
        self._model = SGDClassifier(alpha=0.0005,
                                    penalty='l2',
                                    loss='log',
                                    n_iter=200)
        # self._model = SGDClassifier(alpha=0.0015, penalty='l1', loss='log', n_iter=100)
        self._model = self._model.fit(xTrain,
                                      yTrain)  # Loss function:liblinear

        pred = self._model.predict_proba(xTrain)
        pred = pred[:, 1]

        ce = Evaluator.ClickEvaluator()
        ce.printRMSE(pred, yTrain)
        ce.clickROC(yTrain, pred, False)
        pred = [1 if i >= 0.5 else 0 for i in pred]
        ce.printClickPredictionScore(pred, yTrain)
예제 #5
0
    def validateModel(self, xVal, yVal):
        """
        Changelog:
        - 1/4 KS Return PredictProb for emsemble
        Perform validation of model with different metrics and graphs for analysis
        :param xVal:
        :param yVal:
        :return: predictedProb[:,1]  Prob of all click=1
        """
        if (self._model != None):
            print("Setting up X Y validation for prediction")

            xValidate = xVal
            yVal['click'] = yVal['click'].map({0: -1, 1: 1})

            xVal = xVal.reset_index(drop=True)
            yVal = yVal.reset_index(drop=True)

            click1list = yVal[yVal['click'] == 1].index.tolist()
            click0list = yVal[yVal['click'] == -1].index.tolist()
            print("yVal:", (yVal).shape)
            print("click1list:", len(click1list))
            print("click0list:", len(click0list))

            print("Converting to sparse matrix")
            xValidate = scipy.sparse.csc_matrix(xValidate.as_matrix())

            # predict click labels for the validation set
            print("Predicting validation set...")
            predicted = self._model.predict(xValidate)
            predictedProb = self._model.predict_proba(xValidate)

            predictedOneProbForclick1 = predictedProb[click1list][:, 1]
            predictedOneProbForclick0 = predictedProb[click0list][:, 1]
            print("predictedProbclick1:", (predictedOneProbForclick1).shape)
            print("predictedProbclick0:", (predictedOneProbForclick0).shape)
            print("yVal['click']", yVal['click'].shape)
            print("predictedProb:", predictedProb.shape)
            print("roc_auc", roc_auc_score(yVal['click'], predictedProb[:, 1]))

            #Get the Goldclick==1 and retrieve the predictedProb1 for it
            if (False):  #Set this to True if want to see plots
                Evaluator.ClickEvaluator().clickProbHistogram(
                    predictedOneProbForclick1,
                    title='Click=1',
                    showGraph=False)

                # Get the Goldclick==0 and retrieve the predictedProb1 for it
                Evaluator.ClickEvaluator().clickProbHistogram(
                    predictedOneProbForclick0,
                    title='Click=0',
                    showGraph=False)

                Evaluator.ClickEvaluator().clickROC(yVal['click'],
                                                    predictedProb[:, 1],
                                                    showGraph=False)

                #Convert -1 to 0 as Evaluator printClickPredictionScore cannot handle -1
                predicted[predicted == -1] = 0
                yVal['click'] = yVal['click'].map({-1: 0, 1: 1})
                Evaluator.ClickEvaluator().printClickPredictionScore(
                    predicted, yVal['click'])

                # cnf_matrix = confusion_matrix(yVal['click'], predicted)

                # Evaluator.ClickEvaluator().plot_confusion_matrix(cm=cnf_matrix,classes=set(yVal['click']),plotgraph=False,printStats=False)
                #Change back, just in case
                predicted[predicted == 0] = -1
                yVal['click'] = yVal['click'].map({0: -1, 1: 1})

                print("Gold label: ", yVal['click'])
                print("predicted label: ", predicted)

                print("Writing to validated prediction csv")
                valPredictionWriter = ResultWriter()
                valPredictionWriter.writeResult(
                    filename="data.pruned/FastFMpredictValidate.csv",
                    data=predicted)

        else:
            print("Error: No model was trained in this instance....")

        return predictedProb[:, 1]