예제 #1
0
def p3():
    results = []
    model = None

    # TODO: Get the Minecraft dataset
    data_train, data_test, target_train, target_test = get_minecraft(
        'histogram')

    for n_estimators in [2, 5, 10, 20, 30]:
        # TODO: create a random forest classifier with n_estimators estimators
        model = create_random_forest(n_estimators)

        #TODO: use the model to fit the training data and predict labels for the training and test data
        model = model.fit(data_train, target_train)
        predict_train = model.predict(data_train)
        predict_test = model.predict(data_test)

        # TODO: calculate the accuracies of the models and add them to the results
        accuracy_train, accuracy_test = calculate_model_accuracy(
            predict_train, predict_test, target_train, target_test)

        results.append((n_estimators, accuracy_train, accuracy_test))

    print results
    return model, results
예제 #2
0
def p2():
    results = []
    model = create_decision_tree()

    # TODO: Get the Minecraft dataset using get_minecraft() and create a decision tree
    data_train, data_test, target_train, target_test = get_minecraft(
        'histogram')

    for n in [50, 100, 150, 200, 250]:
        # TODO: Fit the model using a subset of the training data of size n
        # Hint: use the get_first_n_samples function imported from data.py
        adjusted_data, adjusted_target = get_first_n_samples(
            data_train, target_train, n)

        #TODO: use the model to fit the training data and predict labels for the training and test data
        model = model.fit(adjusted_data, adjusted_target)
        predict_train = model.predict(adjusted_data)
        predict_test = model.predict(data_test)

        # TODO: Calculate the accuracys of the model (use the training data that fit the model in the current iteration)
        accuracy_train_n, accuracy_test = calculate_model_accuracy(
            predict_train, predict_test, adjusted_target, target_test)

        results.append((n, accuracy_train_n, accuracy_test))

    print(results)
    return model, results
예제 #3
0
def p0(featuretype='histogram'):
    data_train, data_test, target_train, target_test = get_minecraft(
        featuretype)
    model = create_decision_tree()

    # TODO: Fit the model to the data using its fit method

    # TODO: Use the model's predict method to predict labels for the training and test sets
    #data_train, data_test, target_train, target_test = train_test_split(features, labels, test_size=0.33, random_state=42)

    #gnb = GaussianNB()
    #model = gnb.fit(data_train, target_train)

    # import some data to play with

    X = data_test
    y = target_test
    #class_names = model.target_names
    # Split the data into a training set and a test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    # Run classifier, using a model that is too regularized (C too low) to see
    # the impact on the results
    #classifier = svm.SVC(kernel='linear', C=0.01)
    #y_pred = classifier.fit(X_train, y_train).predict(X_test)

    # Run classifier, using a model that is too regularized (C too low) to set
    # the impact on the results
    classifier = svm.SVC(kernel='linear', C=0.01)
    y_pred = classifier.fit(X_train, y_train).predict(X_test)

    predict_train = y_test
    predict_test = y_pred

    accuracy_train, accuracy_test = calculate_model_accuracy(
        predict_train, predict_test, target_train, target_test)
    print('Training accuracy: {0:3f}, Test accuracy: {1:3f}'.format(
        accuracy_train, accuracy_test))

    cfm = calculate_confusion_matrix(predict_test, target_test)
    print "Confusion matrix"
    print cfm

    for q in range(1, 3):
        for p in range(0, q):
            #compute confusion between classes p and q
            index_pq = [i for i, v in enumerate(target_train) if v in [p, q]]
            modelpq = create_decision_tree()
            #TODO: fit model to the data only involving classes p and q
            data_train, data_test, target_train, target_test = train_test_split(
                features, labels, test_size=0.5, random_state=42)
            testindex_pq = [
                i for i, v in enumerate(target_test) if v in [p, q]
            ]
            #TODO: calculate and print the accuracy
            gnb = GaussianNB()
            model = gnb.fit(data_train, target_train)
            accuracy_pq = gnb.predict(data_test)
            print "One-vs-one accuracy between classes", p, "and", q, ":", accuracy_pq

    return model, predict_train, predict_test, accuracy_train, accuracy_test
def minecraft_decision_tree():
    results = []
    model = None

    # Get the Minecraft dataset using get_minecraft() and create a decision tree
    data_train, data_test, target_train, target_test = get_minecraft(
        'histogram')
    model = create_decision_tree()

    for n in [50, 100, 150, 200, 250]:
        # Fit the model using a subset of the training data of size n
        nData, nTarget = get_first_n_samples(data_train, target_train, n)
        model.fit(nData, nTarget)

        # use the model to fit the training data and predict labels for the training and test data
        predict_train = model.predict(nData)
        # nDataTest, nTargetTest = get_first_n_samples(data_test, target_test,n)
        predict_test = model.predict(data_test)

        # Calculate the accuracys of the model (use the training data that fit the model in the current iteration)
        accuracy_train_n, accuracy_test = calculate_model_accuracy(
            predict_train, predict_test, nTarget, target_test)

        results.append((n, accuracy_train_n, accuracy_test))

    print(results)
    return model, results
def log_regression(featuretype='histogram'):
    # model = None

    data_train, data_test, target_train, target_test = get_minecraft(
        featuretype)
    model = LogisticRegression()

    # Fit the model to the data using its fit method
    model.fit(data_train, target_train)

    # Use the model's predict method to predict labels for the training and test sets
    predict_train = model.predict(data_train)
    predict_test = model.predict(data_test)

    accuracy_train, accuracy_test = calculate_model_accuracy(
        predict_train, predict_test, target_train, target_test)
    print('Training accuracy: {0:3f}, Test accuracy: {1:3f}'.format(
        accuracy_train, accuracy_test))

    cfm = calculate_confusion_matrix(predict_test, target_test)
    print "Confusion matrix"
    print cfm

    for q in range(1, 3):
        for p in range(0, q):
            #compute confusion between classes p and q
            index_pq = [i for i, v in enumerate(target_train) if v in [p, q]]
            modelpq = create_decision_tree()
            # fit model to the data only involving classes p and q
            modelpq.fit([data_train[i] for i in index_pq],
                        [target_train[i] for i in index_pq])
            testindex_pq = [
                i for i, v in enumerate(target_test) if v in [p, q]
            ]
            # calculate and print the accuracy
            predict_train_pq = modelpq.predict(
                [data_train[i] for i in index_pq])
            predict_test_pq = modelpq.predict(
                [data_test[i] for i in testindex_pq])
            accuracy_pq = calculate_model_accuracy(
                predict_train_pq, predict_test_pq,
                [target_train[i]
                 for i in index_pq], [target_test[i] for i in testindex_pq])[1]

            print "One-vs-one accuracy between classes", p, "and", q, ":", accuracy_pq

    return model, predict_train, predict_test, accuracy_train, accuracy_test
예제 #6
0
def p0(featuretype='histogram'):
    data_train, data_test, target_train, target_test = get_minecraft(
        featuretype)
    model = create_decision_tree()

    # TODO: Fit the model to the data using its fit method

    model = model.fit(data_train, target_train)

    # TODO: Use the model's predict method to predict labels for the training and test sets
    predict_train = model.predict(data_train)
    predict_test = model.predict(data_test)

    accuracy_train, accuracy_test = calculate_model_accuracy(
        predict_train, predict_test, target_train, target_test)
    print('Training accuracy: {0:3f}, Test accuracy: {1:3f}'.format(
        accuracy_train, accuracy_test))

    cfm = calculate_confusion_matrix(predict_test, target_test)
    print "Confusion matrix"
    print cfm

    for q in range(1, 3):
        for p in range(0, q):
            #compute confusion between classes p and q
            index_pq = [i for i, v in enumerate(target_train) if v in [p, q]]
            modelpq = create_decision_tree()
            #TODO: fit model to the data only involving classes p and q
            relevant_data_train = [data_train[c] for c in index_pq]
            relevant_target_train = [target_train[c] for c in index_pq]

            modelpq = model.fit(relevant_data_train, relevant_target_train)

            testindex_pq = [
                i for i, v in enumerate(target_test) if v in [p, q]
            ]
            #TODO: calculate and print the accuracy

            relevant_result = [data_test[c] for c in testindex_pq]
            relevant_result2 = [target_test[c] for c in testindex_pq]

            accuracy_pq = modelpq.score(relevant_result, relevant_result2)
            print "One-vs-one accuracy between classes", p, "and", q, ":", accuracy_pq

    return model, predict_train, predict_test, accuracy_train, accuracy_test