Beispiel #1
0
def handle_predict(argv):
    hypothesis = None
    model = None
    with open(argv[3], "r") as f:
        # DONT DO THIS ITS INSECURE. IM INSANE
        model = f.readline().strip('\n')
        hypothesis = f.readline()
    f.close()
    hypothesis = literal_eval(hypothesis)
    tree = None
    tree = DecisionTree()
    tree.define_positive_class(lambda x: x.classification == 'en')
    tree.define_classes(processing.classes)
    tree.define_attributes(processing.attr_definitions)
    examples = process_file(argv[4], training=False)
    examples = tree.create_examples(examples)
    return tree.classify(examples, hypothesis)
Beispiel #2
0
def handle_predict(argv):
    hypothesis = None
    model = None
    with open(argv[2], "r") as f:
        model = f.readline().strip('\n')
        hypothesis = f.readline()
    f.close()
    hypothesis = literal_eval(hypothesis)
    tree = None
    if model == "dt":
        tree = DecisionTree()
    else:
        tree = Adaboost()
    tree.define_positive_class(lambda x: x.classification == 'en')
    tree.define_classes(processing.classes)
    tree.define_attributes(processing.attr_definitions)
    examples = process_file(argv[3], training=False)
    examples = tree.create_examples(examples)
    for classification in tree.classify(examples, hypothesis):
        print(classification)
Beispiel #3
0
    reader = csv.reader(csvfile, delimiter=',')

    trainingRecords = []
    testingRecords = []
    cnt = 0
    for row in reader:
        record = Record(row)
        cnt = cnt + 1
        if cnt < 300:
            trainingRecords.append(record)
        else:
            testingRecords.append(record)

    forest = RandomForest(trainingRecords, 200, 3, 3)
    tree = DecisionTree(trainingRecords)

    print("Printing Random Forest")
    print(str(forest))
    print("--------------------------------------------")
    print("Feature for a single classification : "+str(trainingRecords[0].feature))
    print("True result for a single classification : " + str(trainingRecords[0].party))
    print("Est. result from random forest : " + str(forest.classify(trainingRecords[0].feature)))
    print("Est. result from decision tree : " + str(tree.classify(trainingRecords[0].feature)))
    print("--------------------------------------------")
    eval = Evaluator()
    print("Est. average accuracy from random forest : " + str(eval.evaluation(forest,testingRecords)))
    print("Est. average accuracy from decision tree : " + str(eval.evaluation(tree,testingRecords)))



Beispiel #4
0
def main():
    parser = get_parser()
    args = parser.parse_args()
    input_file_name = args.input_file_name
    number_of_trials = args.number_of_trials
    training_set_size = args.training_set_size
    verbose = args.verbose

    decision_tree_performance = []
    prior_classifier_performance = []

    for i in range(number_of_trials):
        labels, dataset = parse_file(input_file_name)
        training_set, testing_set = partition(dataset, training_set_size)
        testing_set_size = len(testing_set)

        t = DecisionTree(labels, training_set, testing_set)
        p = PriorClassifier(labels, training_set, testing_set)

        # Evaluating Testing set performance
        c = t.test()
        tree_prob = int(
            round(100 * float(c['TP'] + c['TN']) / len(t.testing_set)))
        decision_tree_performance.append(tree_prob)

        k = p.test()
        prior_prob = int(
            round(100. * float(k['TP'] + k['TN']) / len(p.testing_set)))
        prior_classifier_performance.append(prior_prob)

        # PRINTING ALL THE THINGS ####

        print("""
TRIAL NUMBER: %d
--------------------

DECISION TREE STRUCTURE: """ % i)
        print(t.tree)
        print("""

Percent of test cases correctly classified by a decision tree built 
\twith ID3 = %d%%

Percent of test cases correctly classified by using prior 
\tprobabilities from the training set = %d%% """ % (tree_prob, prior_prob))

        if verbose:
            print("Examples in the training set\n--------------------" "")
            print("\t".join(label for label in t.labels) + '\n')

            for vector in t.training_set:
                example_string = str(vector.attributes[t.labels[0]])
                for i in range(len(t.labels) - 1):
                    tabs = '\t' * ((len(t.labels[i]) // 8) + 1)
                    label = t.labels[i + 1]
                    example_string += tabs + str(vector.attributes[label])
                print(example_string)

            print("Examples in the testing set\n--------------------")
            labels = t.labels + ["CLASS", "PRIOR RESULT", "ID3 RESULT"]

            print("\t".join(label for label in labels) + '\n')
            for vector in t.testing_set:
                example_string = str(vector.attributes[t.labels[0]])
                for j in range(len(t.labels) - 1):
                    tabs = '\t' * ((len(t.labels[j]) // 8) + 1)
                    label = t.labels[j + 1]
                    example_string += tabs + str(vector.attributes[label])

                last_tab = '\t' * ((len(t.labels[-1]) // 8) + 1)
                example_string += last_tab + str(vector.CLASS) + '\t'
                example_string += str(False) + '\t\t'
                example_string += str(t.classify(vector.attributes))
                print(example_string)

        treeMean = sum(decision_tree_performance) / len(
            decision_tree_performance)
        priorMean = sum(prior_classifier_performance) / len(
            prior_classifier_performance)

    print("""
Example file used = %s
Number of trials = %d
Training set size for each trial = %d
Testing set size for each trial = %d
Mean performance of decision tree over all trials = %d%%
Mean performance of using prior probability derived from training set = %d%%
    correct classification
""" % (input_file_name, number_of_trials, training_set_size, testing_set_size,
       treeMean, priorMean))
Beispiel #5
0
# 深拷贝
newLabel = copy.deepcopy(labels)
print(labels)
tree = DecisionTree.createTree(myData, labels)
# print(myData)

# print(DecisionTree.calcShannonEntropy(myData))
# print(DecisionTree.splitDataSet(myData,0,1))
# print(DecisionTree.chooseBestFeatureToSplit(myData))

# print(tree)

print(myData)
print(newLabel)

classLabel = DecisionTree.classify(tree, newLabel, [1, 1])
print(classLabel)

DecisionTree.storeTree(tree, 'classifierStorage.txt')

treeTxt = DecisionTree.grabTree('classifierStorage.txt')

print(treeTxt)

fr = open('../resource/lenses/lenses.txt')

lenses = [inst.strip().split('\t') for inst in fr.readlines()]

lensesLables = ['age', 'prescript', 'astigmatic', 'tearRate']

lensesTree = DecisionTree.createTree(lenses, lensesLables)
Beispiel #6
0
def main():
    parser = ArgumentParser(description='Porcessador de dados utilizando ID3.')

    parser.add_argument(
        '-e',
        '--examples',
        type=str,
        help='Documento com os dados que queremeos que a máquina aprenda.')
    parser.add_argument('-p',
                        '--print',
                        action='store_true',
                        help='Imprimir a árvore.')
    parser.add_argument(
        '-t',
        '--testes',
        type=str,
        help='Documentos onde se encontram os dados que se prentende avaliar.')

    args = parser.parse_args()

    if len(argv) == 1:
        parser.print_help()
        exit(0)
    '''Leitura do ficheiro CSV dos exemplos'''
    with open(args.examples, 'rt') as fd:

        exemplosBuf = csv.reader(fd)
        firstRow = exemplosBuf.__next__()

        exemplos = []  # type: list(list(str))
        for i in exemplosBuf:
            exemplos.append(i)

        atributos = {}  # type: dict(str,int)

        for i in range(len(firstRow)):
            atributos[firstRow[i]] = i

        classe = firstRow[-1]
        fd.close()

    arvore = DecisionTree(exemplos, atributos, classe)

    if args.print:
        print(arvore)

    if args.testes is not None:
        '''Leitura do csv dos testes'''
        with open(args.testes, 'rt') as fd:
            exemplosBuf = csv.reader(fd)
            firstRow = exemplosBuf.__next__()

            for aux in exemplosBuf:
                if not aux:
                    break

                dicio = {}  # type: dict(str,str)
                for i in range(len(firstRow)):
                    dicio[firstRow[i]] = aux[i]
                '''Procurar resposta'''
                resul = arvore.classify(dicio)
                if resul is None:
                    print(arvore.mostCommon())
                else:
                    print(resul)