Python DT Examples, DecisionTree.DT Python Examples

Example #1

0

Show file

File: dt_project_fileload2.py Project: anbangx/Data-Mining

def decisionTree(training_list, testing_list, fileTestBelongCategory, words_name, use_version2=True):
    print "-----------------------------------------------------------------------------------------"
    print "\nDecision Tree Algorithm\n"
    if use_version2:
        # adjust_depth_dict = {}
        # for max_depth in range(10, 121, 10):
        #     DT.decisionTree_version2(training_list, testing_list, max_depth=max_depth, adjust_depth_dict=adjust_depth_dict)
        # save_obj(adjust_depth_dict, 'adjust_depth')
        DT.decisionTree_version2(training_list, testing_list, fileTestBelongCategory)
    else:
        DT.decisionTree_version1(training_list, testing_list, words_name, num_trainning_file=200, num_features=1000) # num_trainning_file=len(training_list), num_features=len(training_list[0]) - 1

Example #2

0

Show file

def decisionTree(training_list, testing_list, fileTestBelongCategory, words_name, use_version2=True):
    print "-----------------------------------------------------------------------------------------"
    print "\nDecision Tree Algorithm\n"
    if use_version2:
        # adjust_depth_dict = {}
        # for max_depth in range(10, 121, 10):
        #     DT.decisionTree_version2(training_list, testing_list, max_depth=max_depth, adjust_depth_dict=adjust_depth_dict)
        # save_obj(adjust_depth_dict, 'adjust_depth')
        DT.decisionTree_version2(training_list, testing_list, fileTestBelongCategory)
    else:
        DT.decisionTree_version1(training_list, testing_list, words_name, num_trainning_file=200, num_features=1000) # num_trainning_file=len(training_list), num_features=len(training_list[0]) - 1

Example #3

0

Show file

File: test_decision_tree.py Project: CarveTheFuture/SupervisedLearning

def test_gini():
    array = [1, 1, 2, 1, 2]
    result = DT()._gini(np.array(array))
    actual = 0.48
    message = 'Gini value for {}: Got {:.2f}. Should be {:.2f}'.format(
        array, result, actual)
    n.assert_almost_equal(result, actual, 4, message)

Example #4

0

Show file

File: test_decision_tree.py Project: CarveTheFuture/SupervisedLearning

def test_information_gain():
    X, y, X1, y1, X2, y2 = fake_data()
    result = DT()._information_gain(y, y1, y2)
    actual = 0.019973
    message = 'Information gain for:\n{}, {}, {}:\nGot {:.3f}. Should be {:.3f}'.format(
        y, y1, y2, result, actual)
    n.assert_almost_equal(result, actual, 4, message)

Example #5

0

Show file

File: test_decision_tree.py Project: CarveTheFuture/SupervisedLearning

def test_entropy():
    array = [1, 1, 2, 1, 2]
    result = DT()._entropy(np.array(array))
    actual = 0.97095
    message = 'Entropy value for {}: Got {:.2f}. Should be {:.2f}'.format(
        array, result, actual)
    n.assert_almost_equal(result, actual, 4, message)

Example #6

0

Show file

def test_entropy():
    array = [1, 1, 2, 1, 2]
    result = DT()._entropy(np.array(array))
    actual = 0.67301
    message = 'Entropy value for %r: Got %.2f. Should be %.2f' \
              % (array, result, actual)
    n.assert_almost_equal(result, actual, 4, message)

Example #7

0

Show file

def test_information_gain():
    X, y, X1, y1, X2, y2 = fake_data()
    result = DT()._information_gain(y, y1, y2)
    actual = 0.01384
    message = 'Information gain for:\n%r, %r, %r:\nGot %.3f. Should be %.3f' \
              % (y, y1, y2, result, actual)
    n.assert_almost_equal(result, actual, 4, message)

Example #8

0

Show file

def test_gini():
    array = [1, 1, 2, 1, 2]
    result = DT()._gini(np.array(array))
    actual = 0.48
    message = 'Gini value for %r: Got %.2f. Should be %.2f' \
              % (array, result, actual)
    n.assert_almost_equal(result, actual, 4, message)

Example #9

0

Show file

File: dt_project_fileload2.py Project: anbangx/Data-Mining

def create_CategoryAUC(categoryList):  
    # {'acq':['1', '2'], 'cad':['3', '4'] ...}
    categoryAssigFileTFIDF = {} 
   
    for cat in categoryList: 
        categoryAssigFileTFIDF[cat] = DT.getTopCategory(cat, len(fileTestAlphaNumericStrStemmedDict.keys()))
                
    return categoryAssigFileTFIDF

Example #10

0

Show file

def create_CategoryAUC(categoryList):
    # {'acq':['1', '2'], 'cad':['3', '4'] ...}
    categoryAssigFileTFIDF = {}

    for cat in categoryList:
        categoryAssigFileTFIDF[cat] = DT.getTopCategory(
            cat, len(fileTestAlphaNumericStrStemmedDict.keys()))

    return categoryAssigFileTFIDF

Example #11

0

Show file

def test_make_split():
    X, y, X1, y1, X2, y2 = fake_data()
    split_index, split_value = 1, 'bat'
    dt = DT()
    dt.categorical = np.array([False, True])
    result = dt._make_split(X, y, split_index, split_value)
    try:
        X1_result, y1_result, X2_result, y2_result = result
    except ValueError:
        n.assert_true(False, 'result not in correct form: (X1, y1, X2, y2)')
    actual = (X1, y1, X2, y2)
    message = '_make_split got results\n%r\nShould be\n%r' % (result, actual)
    n.ok_(np.array_equal(X1, X1_result), message)
    n.ok_(np.array_equal(y1, y1_result), message)
    n.ok_(np.array_equal(X2, X2_result), message)
    n.ok_(np.array_equal(y2, y2_result), message)

Example #12

0

Show file

def test_choose_split_index():
    X, y, X1, y1, X2, y2 = fake_data()
    index, value = 1, 'cat'
    dt = DT()
    dt.categorical = np.array([False, True])
    result = dt._choose_split_index(X, y)
    try:
        split_index, split_value, splits = result
    except ValueError:
        message = 'result not in correct form. Should be:\n' \
                  '    split_index, split_value, splits'
        n.assert_true(False, message)
    message = 'choose split for data:\n%r\n%r\n' \
              'split index, split value should be: %r, %r\n' \
              'not: %r, %r' \
              % (X, y, index, value, split_index, split_value)
    n.eq_(split_index, index, message)
    n.eq_(split_value, value, message)

Example #13

0

Show file

File: test_decision_tree.py Project: CarveTheFuture/SupervisedLearning

def test_choose_split_index():
    X, y, X1, y1, X2, y2 = fake_data()
    index, value = 1, 'cat'
    dt = DT()
    dt.categorical = np.array([False, True])
    result = dt._choose_split_index(X, y)
    try:
        split_index, split_value, splits = result
    except ValueError:
        message = ('result not in correct form. Should be:\n'
                   '    split_index, split_value, splits')
        n.assert_true(False, message)
    message = (
        'choose split for data:\n{}\n{}\n'.format(X, y) +
        'split index, split value should be: {}, {}\n'.format(index, value) +
        'not: {}, {}'.format(split_index, split_value))
    n.eq_(split_index, index, message)
    n.eq_(split_value, value, message)

Example #14

0

Show file

File: GBDT.py Project: Wlgls/machine_learning

    def fit(self, X, y, maxIter=3):
        y = y.reshape((-1, 1))

        ffit = np.zeros(y.shape)

        for i in range(maxIter):
            r = y - ffit  # 残差

            # 将X和残差拼接，送到决策树算法中
            alg = DT()
            alg.BuildTree(np.concatenate((X, r), axis=1))
            # 在做DecisionTree的时候，有一个阈值，所以我们的算法甚至可能不运行
            if alg.T is None:
                break
            self.ModelArr.append(alg)
            # 更新ffit
            ffit = ffit + alg.predict(X).reshape((-1, 1))

        return self.ModelArr

Example #15

0

Show file

File: AI.py Project: tomhettinger/simplestone

    def play_turn(self, board):
        """Execute all of the actions on the board for this turn, then end the turn 
        and give the game back to the human."""
        while True:
            loop.refresh(board)
            time.sleep(0.3)

            # Check if we have lethal.
            #lethalActions = check_for_lethal(board)
            #if lethalActions is not None:
            #    lethalActions[0].perform(board)
            #    continue

            tree = DT(board, strategy=self.strategy)
            bestAction = tree.bestAction
            del tree
            if bestAction is None:  # No more available actions.
                break
            if isinstance(bestAction, Action.DoNothingAction):
                bestAction.perform(board)
                break
            bestAction.perform(board)
        loop.end_turn(board)

Example #16

0

Show file

File: DTwithScikitLearn.py Project: BroG-Chen/Machine-Learning-in-Action

from DecisionTree import DT
from sklearn import tree
import numpy as np


if __name__ == '__main__':
	DT_dataSet, featurn_labels = DT.createDataSet()
	samples = [example[:-1] for example in DT_dataSet]
	class_lables = [example[-1] for example in DT_dataSet]
	classify = tree.DecisionTreeClassifier()
	classify.fit(samples, class_lables)
	tv = [1, 0]
	r = classify.predict([tv])
	# r = classify.predict(np.array(tv).reshape(1, -1))
	print(r)

Example #17

0

Show file

File: dt_project_fileload2.py Project: anbangx/Data-Mining

def naiveBayes(list):
    print "\nNaive Bayes Algorithm\n"

# Define Naive Bayes algorithm in detail
def naiveBayesDetail(list):
    print "\nNaive Bayes Algorithm\n"

# Execute TF-IDF based Cosine Similarity algorithm
# tfidfCosineSimilarity(termFrequencyPerCategoryList)

# Execute Decision Tree algorithm
# decisionTree(frequencyInFilePerCategoryInTrainingSetList, frequencyInFilePerCategoryInTestSetList, fileTestBelongCategory, wholeVocabularyFromTrainingAndTestSetList)


# clf = DT.create_decision_tree(frequencyInFilePerCategoryInTrainingSetList, max_depth=80)
clf = DT.create_decision_tree(frequencyInFilePerCategoryInTrainingSetList)
top_k_categories = DT.get_top_k_prediction_class(clf, frequencyInFilePerCategoryInTestSetList[0], k=1)
# print top_k_categories
# print len(frequencyInFilePerCategoryInTestSetList)

# Confusion Matrix - row : true test category, column - true column category
realCategorySize = len(categoryAlphaNumericStrStemmedDict.keys())
confusionMatrix = np.zeros((realCategorySize, realCategorySize), dtype=np.int)
categoryTestToIndexDict = {}
idx = 0

for key in categoryAlphaNumericStrStemmedDict.keys():
    categoryTestToIndexDict[key] = idx
    idx += 1

confusionTable = np.zeros((2,2), dtype=np.int)

Example #18

0

Show file

def naiveBayes(list):
    print "\nNaive Bayes Algorithm\n"

# Define Naive Bayes algorithm in detail
def naiveBayesDetail(list):
    print "\nNaive Bayes Algorithm\n"

# Execute TF-IDF based Cosine Similarity algorithm
# tfidfCosineSimilarity(termFrequencyPerCategoryList)

# Execute Decision Tree algorithm
# decisionTree(frequencyInFilePerCategoryInTrainingSetList, frequencyInFilePerCategoryInTestSetList, fileTestBelongCategory, wholeVocabularyFromTrainingAndTestSetList)


# clf = DT.create_decision_tree(frequencyInFilePerCategoryInTrainingSetList, max_depth=80)
clf = DT.create_decision_tree(frequencyInFilePerCategoryInTrainingSetList)
top_k_categories = DT.get_top_k_prediction_class(clf, frequencyInFilePerCategoryInTestSetList[0], k=1)
# print top_k_categories
# print len(frequencyInFilePerCategoryInTestSetList)

# Confusion Matrix - row : true test category, column - true column category
realCategorySize = len(categoryAlphaNumericStrStemmedDict.keys())
confusionMatrix = np.zeros((realCategorySize, realCategorySize), dtype=np.int)
categoryTestToIndexDict = {}
idx = 0

for key in categoryAlphaNumericStrStemmedDict.keys():
    categoryTestToIndexDict[key] = idx
    idx += 1

confusionTable = np.zeros((2,2), dtype=np.int)

Example #19

0

Show file

    print "\nNaive Bayes Algorithm\n"


# Define Naive Bayes algorithm in detail
def naiveBayesDetail(list):
    print "\nNaive Bayes Algorithm\n"


# Execute TF-IDF based Cosine Similarity algorithm
# tfidfCosineSimilarity(termFrequencyPerCategoryList)

# Execute Decision Tree algorithm
# decisionTree(frequencyInFilePerCategoryInTrainingSetList, frequencyInFilePerCategoryInTestSetList, fileTestBelongCategory, wholeVocabularyFromTrainingAndTestSetList)

# clf = DT.create_decision_tree(frequencyInFilePerCategoryInTrainingSetList, max_depth=80)
clf = DT.create_decision_tree(frequencyInFilePerCategoryInTrainingSetList)
top_k_categories = DT.get_top_k_prediction_class(
    clf, frequencyInFilePerCategoryInTestSetList[0], k=1)
# print top_k_categories
# print len(frequencyInFilePerCategoryInTestSetList)

# Confusion Matrix - row : true test category, column - true column category
realCategorySize = len(categoryAlphaNumericStrStemmedDict.keys())
confusionMatrix = np.zeros((realCategorySize, realCategorySize), dtype=np.int)
categoryTestToIndexDict = {}
idx = 0

for key in categoryAlphaNumericStrStemmedDict.keys():
    categoryTestToIndexDict[key] = idx
    idx += 1

Example #20

0

Show file

def main():

    #SVM:
    t = train()
    #im = image.ProcessImages('train')
    #X, Y = im.make_all()
    #t.train(X, Y)
    #rbf:

    print('SVM clf:')
    t.load(name='clf_rbf.pkl')
    print(t.predict(measure.process('test3.png')))
    print(t.test())

    #linear:
    print('SVM linear:')
    t.load(name='clf_linear.pkl')
    print(t.predict(measure.process('test3.png')))
    print(t.test())

    #k szomszed:
    #osszes feature-ra
    #getBestKValue(400)
    print('osszes feature k neighbours:')
    im = image.ProcessImages('train')
    X, Y = im.make_all()
    #train halmazon elert eredmeny:
    #maximum:  0.7032590051457976  neighbours:  183
    t2 = train_neigh(183)
    t2.train(X, Y)
    #teszt halmazon elert eredmeny:
    #elert eredmeny:  0.5483870967741935
    print('elert eredmeny: ', t2.test())

    #dontesi fa:
    print('dontesi fa: ')
    tree = DT()
    tree.train(X, Y)
    print('elert eredmeny: ', tree.test())
    print(tree.valid())
    tree.show(X, Y)

    #ecc-extent:
    print('ket feature: ecc, extent:')
    #getBestKValue(100, two_feature=True, features=['ecc', 'extent'])
    t3 = train_neigh(80, first_two=True, features=['ecc', 'extent'])
    t3.train(X, Y)
    print('elert eredmeny: ', t3.test())
    #elert eredmeny:  0.4946236559139785
    t3.train_first_two_features(X, Y)
    #_train_neigh(80, feauters=['ecc', 'extent'])

    #x0-y0:
    print('ket feature: x0, y0:')
    #getBestKValue(100, two_feature=True, features=['x0', 'y0'])
    t4 = train_neigh(54, first_two=True, features=['x0', 'y0'])
    t4.train(X, Y)
    print('elert eredmeny: ', t4.test())
    #elert eredmeny:  0.3333333333333333
    t4.train_first_two_features(X, Y)
    _train_neigh(54, feauters=['x0', 'y0'])

    #train_neigh_()
    im2 = image.ProcessImages('test')
    _train_neigh(80, feauters=['ecc', 'extent'])