Beispiel #1
0
def main(argv):
    usage = "\tusage: --dataset=[file_name] --split=[1-100] --classifier=[algorithm_name] --target=[index] \
            --ordered=[column:val0|val1|val2,column2:val0|val1],etc --ignore=[col1,col2,etc] \
            --mapped=[1,2,3,etc] --bins=[col:num_bins,col:num_bins] --normalize"

    # get args
    opts, args = getopt.getopt(argv[1:], "h", ["help", "dataset=", "split=", "classifier=", "target=", "ordered=",
                                               "ignore=", "mapped=", "normalize", "bins="])
    # handle args
    data_set = class_name = "none"
    split = 70
    target = -1
    ordered = {}
    ignore = []
    mapped = []
    bins = {}
    norm = False

    for key, val in opts:
        if key in ("-h", "--help"):
            print(usage)
            sys.exit()
        elif key == "--dataset":
            data_set = val
        elif key == "--split":
            split = int(val)
        elif key == "--classifier":
            class_name = val
        elif key == "--target":
            target = int(val)
        elif key == "--ignore":
            ignore = val.split(",")
            for idx, s in enumerate(ignore):
                ignore[idx] = int(s)
        elif key == "--ordered":
            entries = val.split(",")
            for entry in entries:
                entry_split = entry.split(":")
                ordered[int(entry_split[0])] = entry_split[1].split("|")
        elif key == "--mapped":
            entries = val.split(",")
            for entry in entries:
                mapped.append(int(entry))
        elif key == "--bins":
            entries = val.split(",")
            for entry in entries:
                entry_split = entry.split(":")
                bins[int(entry_split[0])] = int(entry_split[1])
        elif key == "--normalize":
            norm = True

        else:
            assert False, "unhandled option"

    # Load the data set
    data = None

    # default load iris data
    if data_set == "none":
        print("No data set specified")
        sys.exit()
    # load from a csv file
    else:
        data = preprocessor.DataSet(data_set, ordered=ordered, target=target, split=split, ignore=ignore,
                                    mapped=mapped, norm=norm, bins=bins)

    # select the network to use
    net = None
    predict_targets = None

    if class_name == "HardCoded":
        net = classifier.HardCoded()
        # train
        net.train(data.train_attributes, data.train_targets, data.mapped_columns)
        # predict
        predictions = net.predict(data.test_attributes)
        predict_targets = data.test_targets
    elif class_name == "KNearestNeighbors":
        net = classifier.KNearestNeighbor(k=1)
        # train
        net.train(data.train_attributes, data.train_targets, data.mapped_columns)
        # predict
        predictions = net.predict(data.test_attributes)
        predict_targets = data.test_targets
    elif class_name == "KNearestNeighbors_alt":
        net = KNeighborsClassifier(n_neighbors=5)
        # train
        net.fit(data.train_attributes, data.train_targets.ravel())
        # predict
        predictions = net.predict(data.test_attributes)
        predict_targets = data.test_targets
    elif class_name == "DecisionTree":
        net = classifier.ID3()
        # train
        net.train(data.train_attributes, data.train_targets)
        net.output_tree(net.root, 0)
        # predict
        predictions = net.predict(data.test_attributes)
        predict_targets = data.test_targets
    elif class_name == "DecisionTree_alt":
        net = tree.DecisionTreeClassifier()
        # train
        net.fit(data.train_attributes, data.train_targets.ravel())
        # predict
        predictions = net.predict(data.test_attributes)
        predict_targets = data.test_targets
    elif class_name == "Perceptron":
        net = classifier.Perceptron(.1, [4, 4])
        # train
        net.train(data.train_attributes, data.train_targets)
        # predict
        predictions = net.predict(data.test_attributes)
        predict_targets = data.test_targets
    else:
        print("Unrecognized classifier")
        sys.exit(1)

    # test the predictions
    i = 0
    num_right = 0
    while i < len(predictions):
        if predictions[i] == predict_targets[i]:
            num_right += 1
        i += 1

    print("The number of correct predictions is: ", str(num_right / len(predictions) * 100), "%")