コード例 #1
0
ファイル: Predict.py プロジェクト: praneshvyas11/evolveML
def predict_proba(rf_model, data):
    '''
    This wrapper overcomes the "binary" nature of predictions in the native
    RandomForestModel.
    '''  # Collect the individual decision tree models by calling the underlying
    # Java model. These are returned as JavaArray defined by py4j.
    trees = rf_model._java_model.trees()
    ntrees = rf_model.numTrees()
    scores = DecisionTreeModel(trees[0]).predict(
        data.map(lambda row: [
            float(row.SearchID),
            float(row.AdID),
            float(row.Position),
            float(row.ObjectType),
            float(row.HistCTR)
        ]))

    # For each decision tree, apply its prediction to the entire dataset and
    # accumulate the results using 'zip'.
    for i in range(1, ntrees):
        dtm = DecisionTreeModel(trees[i])
        scores = scores.zip(
            dtm.predict(
                data.map(lambda row: [
                    float(row.SearchID),
                    float(row.AdID),
                    float(row.Position),
                    float(row.ObjectType),
                    float(row.HistCTR)
                ])))
        scores = scores.map(lambda x: x[0] + x[1])

    # Divide the accumulated scores over the number of trees
    return scores.map(lambda x: x / ntrees)
コード例 #2
0
ファイル: stargalaxy.py プロジェクト: beatriceliang/POPREU
def get_probs_classify (model, data):
    # Collect the individual decision trees as JavaArray objects
    trees = model._java_model.trees()
    ntrees = model.numTrees()
    scores = DecisionTreeModel(trees[0]).predict(data)

    # For each tree, apply its prediction to the entire dataset and zip together the results
    for i in range(1,ntrees):
        dtm = DecisionTreeModel(trees[i])
        scores = scores.zip(dtm.predict(data))
        scores = scores.map(lambda x: x[0] + x[1])
    
    # Divide the accumulated scores over the number of trees
    return scores.map(lambda x: x/ntrees)
コード例 #3
0
ファイル: stargalaxy.py プロジェクト: bbw7561135/POPREU
def get_probs_classify(model, data):
    # Collect the individual decision trees as JavaArray objects
    trees = model._java_model.trees()
    ntrees = model.numTrees()
    scores = DecisionTreeModel(trees[0]).predict(data)

    # For each tree, apply its prediction to the entire dataset and zip together the results
    for i in range(1, ntrees):
        dtm = DecisionTreeModel(trees[i])
        scores = scores.zip(dtm.predict(data))
        scores = scores.map(lambda x: x[0] + x[1])

    # Divide the accumulated scores over the number of trees
    return scores.map(lambda x: x / ntrees)
コード例 #4
0
def predict_proba(rf_model, testRDD):

        trees = rf_model._java_model.trees()
        ntrees = rf_model.numTrees()
        scores_dict = {i: 0 for i in range(0,10)}
        scoresRDD = testRDD.map(lambda x: scores_dict.copy())

        for tree in trees:
                dtm = DecisionTreeModel(tree)
                currentScoreRDD = dtm.predict(testRDD)
                scoresRDD = scoresRDD.zip(currentScoreRDD)

                def reduceTuple(x):
                        x[0][int(x[1])] += 1
                        return x[0]

                scoresRDD = scoresRDD.map(reduceTuple)
        return scoresRDD
コード例 #5
0
ファイル: Predict.py プロジェクト: abhishek-ch/evolveML
def predict_proba(rf_model, data):
    '''
    This wrapper overcomes the "binary" nature of predictions in the native
    RandomForestModel.
    '''  # Collect the individual decision tree models by calling the underlying
    # Java model. These are returned as JavaArray defined by py4j.
    trees = rf_model._java_model.trees()
    ntrees = rf_model.numTrees()
    scores = DecisionTreeModel(trees[0]).predict(data.map(
        lambda row: [float(row.SearchID), float(row.AdID), float(row.Position), float(row.ObjectType),
                     float(row.HistCTR)]))

    # For each decision tree, apply its prediction to the entire dataset and
    # accumulate the results using 'zip'.
    for i in range(1, ntrees):
        dtm = DecisionTreeModel(trees[i])
        scores = scores.zip(dtm.predict(data.map(lambda row : [float(row.SearchID),float(row.AdID),float(row.Position),float(row.ObjectType),float(row.HistCTR)])))
        scores = scores.map(lambda x: x[0] + x[1])

    # Divide the accumulated scores over the number of trees
    return scores.map(lambda x: x / ntrees)
コード例 #6
0
def predict_proba(model, data):
    '''
    Input: A PySpark RandomForestModel object, RDD of LabeledPoints
    Output: List of probabilies 
    This wrapper exposes the probabilities (i.e. confidences) for a given prediciton. 
    '''
    # Collect the individual decision tree models by calling the underlying
    # Java model. These are returned as JavaArray defined by py4j.
    trees = model._java_model.trees()
    ntrees = model.numTrees()
    scores = DecisionTreeModel(trees[0]).predict(
        data.map(lambda x: x.features))

    # For each decision tree, apply its prediction to the entire dataset and
    # accumulate the results using 'zip'.
    for i in xrange(1, ntrees):
        dtm = DecisionTreeModel(trees[i])
        scores = scores.zip(dtm.predict(data.map(lambda x: x.features)))
        scores = scores.map(lambda x: x[0] + x[1])

    # Divide the accumulated scores over the number of trees
    probabilities = scores.map(lambda x: float(x) / ntrees).collect()
    return probabilities
コード例 #7
0
def predict_proba(rf_model, data):
    '''
    This wrapper overcomes the "binary" nature of predictions in the native
    RandomForestModel. 
    '''
    # Collect the individual decision tree models by calling the underlying
    # Java model. These are returned as JavaArray defined by py4j.
    trees = rf_model._java_model.trees()
    ntrees = rf_model.numTrees()
    scores = DecisionTreeModel(trees[0]).predict(data.map(lambda x: x.features))

    # For each decision tree, apply its prediction to the entire dataset and
    # accumulate the results using 'zip'.
    featsAndPredictions = sc.parallelize([]) #empty RDD
    for i in range(ntrees):
        dtm = DecisionTreeModel(trees[i])
        predictions = dtm.predict(data.map(lambda x: x.features))
        featsAndPredictions=featsAndPredictions.union(data.map(lambda lp: lp.features).zip(predictions))

        #scores = scores.zip(dtm.predict(data.map(lambda x: x.features)))
        #scores = scores.map(lambda x: x[0] + x[1])
        
    #add up the predictions and divide the accumulated scores over the number of trees
    return featsAndPredictions.reduceByKey(lambda a,b: a+b).map(lambda (key,val): (key,val/ntrees)) #add up the predictions