Ejemplo n.º 1
0
    def Validate(self, subset):

        # Ensure that given the subset is valid.
        for feat in subset:
            if (0 > feat) or (feat > self.classifier.num_features):
                print("ERROR: Subset does not match features.")
                return 0

        # Make copy of the dataset for feature selection.
        # temp_normalized = self.classifier.normalized.copy()
        to_delete = []

        # Trim down to given subset.

        if subset != [0]:
            for col in range(self.classifier.num_features + 1)[1:]:
                if col not in subset:
                    to_delete.append(col)

        # K-fold vars.
        fold_list = []
        folded = []
        num_correct = 0

        print("\n--------------------------------------")
        print("\nPerforming k-fold cross validation.\n")

        # Perform k-fold cross validation.
        for leave_out in range(self.classifier.num_points):

            # Prepare temporary datasets for k-folding.
            fold_list = self.classifier.normalized.copy()
            folded = self.classifier.normalized[leave_out].copy()
            del fold_list[leave_out]

            # Remove the columns not specified by the subset.
            fold_list = np.delete(fold_list, to_delete, axis=1)
            folded = np.delete(folded, to_delete)

            # Create classifier for folded data.
            temp_c = Classifier()
            temp_c.Train(fold_list)
            result = temp_c.Test(folded)

            # Increment if classification accurate.
            if result == folded[0]:
                num_correct += 1

        return num_correct / self.classifier.num_points
Ejemplo n.º 2
0
def intro():

    print("\n--------------------------------------")
    print("CS170 Project 02 / Part 02\n")
    print("Otniel Thehumury 862029595")
    print("Matthew Walsh    862088280")
    print("--------------------------------------")

    # Collect the number of features from the user.
    # num_features = input("Please enter total number of features: ")

    # print("\nType the number of the algorithm you want to run.")
    # print("\n1) Forward Selection")
    # print("\n2) Backward Elimination")
    # print("\n3) NN Classifier")

    # choice = input("\nHere: ")
    # choice = int(choice)

    # Project 2, Part 2:
    choice = 3

    print("")
    p = Problem()

    if choice == 1:
        print("Chosen Forward Elimination")
        # p.forward_selection(num_features)

    elif choice == 2:
        print("Chosen Backward Elimination")
        # p.backward_elimination(num_features)

    elif choice == 3:
        print("Nearest Neighbor Classifier:\n")

        training_data = read_dataset()

        print("Please select a feature subset.\n")
        print("For example, type: 1 3 5 7 9")
        print("Enter zero or blank for all features.\n")
        sub_input = input("Enter your choice here: ")

        # Separate input into int list.
        subset = []
        for feat in sub_input.split():
            subset.append(int(feat))

        # Zero indicates all features.
        if sub_input == "":
            subset = [0]

        # Train the classifier.
        c = Classifier()
        c.Train(training_data)
        # c.Test([2.0, 2.1530859, 4.4095784, 3.6216757, 3.8451064, 2.9807186, 2.0171732, 0.5397355, 3.3933456, 2.2950856, 3.0431002])

        # Implement validator.
        v = Validator(c)
        print(v.Validate(subset))

    else:
        print("Incorrect choice - closing program.")
Ejemplo n.º 3
0
    Q.queryRanker()

if arguments.run == 'cluster' or arguments.run == 'clusterclassify' or arguments.run == 'all':
    print "-- Clustering --"
    bestRankersFile = 'QueryData/' + dataset + str(iterations) + '.data'
    KM = KMeans(arguments.fromrangek, arguments.torangek, bestRankersFile,
                dataset, iterations)
    (queryToCluster, clusterToRanker) = KM.runScript()
    #print 'queryToCluster', queryToCluster

if arguments.run == 'classify' or arguments.run == 'clusterclassify' or arguments.run == 'all':
    print "-- Classification --"
    clusterPath = "ClusterData/" + dataset + str(iterations) + ".data"
    rankerPath = "QueryData/generalRanker.data"
    C = Classifier(clusterPath, path_train, rankerPath, iterations)
    C.Train()

if arguments.run == 'compare':
    print "-- Comparison --"
    classifierPath = "Classifier/" + dataset + str(iterations) + ".data"
    basic_ranker_path = "QueryData/generalRanker.data"
    clusterPath = "ClusterData/" + dataset + str(iterations) + ".data"
    compare.compareSystems(path_validate, classifierPath, basic_ranker_path,
                           clusterPath, click)

if arguments.run == "fake":
    print "--Fake clustering and learning--"
    rankerPath = "QueryData/generalRanker.data"
    bestRankersFile = 'QueryData/' + dataset + str(iterations) + '.data'
    clusterPath = "ClusterData/" + dataset + str(iterations) + ".data"
    rankerPath = "QueryData/generalRanker.data"