Python splitDataSetの例

プログラミング言語: Python

名前空間/パッケージ名: split_dataset

メソッド/関数: splitDataSet

hotexamples.comのコード掲載数: 4

Python splitDataSet - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsplit_dataset.splitDataSetの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: main.py プロジェクト: omeroot/decision-tree

def chooseBestFeatureToSplit(dataset):
    numFeatures = len(dataset[0])

    baseEntropy = entropy_shannon.calcShannonEnt(dataset)
    print "baseEntropy", baseEntropy

    bestInfoGain = 0.0
    bestFeature = -1

    for i in range(numFeatures):
        featList = [example[i] for example in dataset]
        uniqueVals = set(featList)
        #print "uniqueVals",uniqueVals
        newEntorpy = 0.0

        for value in uniqueVals:
            subDataSet = split_dataset.splitDataSet(dataset, i, value)
            prob = len(subDataSet) / float(len(dataset))
            newEntorpy += prob * entropy_shannon.calcShannonEnt(subDataSet)

        infoGain = baseEntropy - newEntorpy
        print "infoGain & entropy for " + str(i) + " => " + str(
            infoGain) + " " + str(newEntorpy)
        if (infoGain > bestInfoGain):
            bestInfoGain = infoGain
            bestFeature = i

    return bestFeature

コード例 #2

ファイルを表示

ファイル: main.py プロジェクト: omeroot/decision-tree

def chooseBestFeatureToSplit(dataset):
    numFeatures = len(dataset[0])

    baseEntropy = entropy_shannon.calcShannonEnt(dataset)
    print "baseEntropy", baseEntropy

    bestInfoGain = 0.0
    bestFeature = -1

    for i in range(numFeatures):
        featList = [example[i] for example in dataset]
        uniqueVals = set(featList)
        # print "uniqueVals",uniqueVals
        newEntorpy = 0.0

        for value in uniqueVals:
            subDataSet = split_dataset.splitDataSet(dataset, i, value)
            prob = len(subDataSet) / float(len(dataset))
            newEntorpy += prob * entropy_shannon.calcShannonEnt(subDataSet)

        infoGain = baseEntropy - newEntorpy
        print "infoGain & entropy for " + str(i) + " => " + str(infoGain) + " " + str(newEntorpy)
        if infoGain > bestInfoGain:
            bestInfoGain = infoGain
            bestFeature = i

    return bestFeature

コード例 #3

ファイルを表示

ファイル: main.py プロジェクト: omeroot/decision-tree

def createTree(dataset, labels):
    classList = [example[-1] for example in dataset]
    if classList.count(classList[0]) == len(classList):
        return classList[0]
    if len(dataset[0]) == 1:
        majorityCnt(classList)
    bestFeat = chooseBestFeatureToSplit(dataset)
    bestFeatLabel = labels[bestFeat]
    myTree = {}
    myTree = {bestFeatLabel: {}}
    del (labels[bestFeat])
    featValues = [example[bestFeat] for example in dataset]
    uniqueVals = set(featValues)
    for value in uniqueVals:
        subLabels = labels[:]
        myTree[bestFeatLabel][value] = createTree(split_dataset.splitDataSet(dataset, bestFeat, value), subLabels)

    return myTree

コード例 #4

ファイルを表示

ファイル: main.py プロジェクト: omeroot/decision-tree

def createTree(dataset, labels):
    classList = [example[-1] for example in dataset]
    if classList.count(classList[0]) == len(classList):
        return classList[0]
    if len(dataset[0]) == 1:
        majorityCnt(classList)
    bestFeat = chooseBestFeatureToSplit(dataset)
    bestFeatLabel = labels[bestFeat]
    myTree = {}
    myTree = {bestFeatLabel: {}}
    del (labels[bestFeat])
    featValues = [example[bestFeat] for example in dataset]
    uniqueVals = set(featValues)
    for value in uniqueVals:
        subLabels = labels[:]
        myTree[bestFeatLabel][value] = createTree(
            split_dataset.splitDataSet(dataset, bestFeat, value), subLabels)

    return myTree