def main(max, classifyMetric):
    for k in range(1, max + 1):
        foutModel = open(
            path + str(k) + "_" + X + "_" + Y + "_" + classifyMetric +
            "_result_2.txt", "w")
        knn.main(sys.argv[1], k, foutModel, classifyMetric)
        foutModel.close
Beispiel #2
0
def main():
  num_hiddens = 10
  eps = 0.1
  momentum = 0.0
  num_epochs = 1000

  current_problem = [2.1, 2.2, 2.3, 2.4, 2.5]
  print "Running problems: ", current_problem

  # 2.1 and 2.2
  if 2.2 in current_problem and 2.1 in current_problem:
      W1, W2, b1, b2, train_error, valid_error, train_class_error, valid_class_error = TrainNN(num_hiddens, eps, momentum, num_epochs)
      DisplayErrorPlot(train_error, valid_error, mode='cross_entropy')
      DisplayErrorPlot(train_class_error, valid_class_error, mode='classification_error') 

  # 2.3
  if 2.3 in current_problem:
      for eps in [0.5, 0.2, 0.01]:
          W1, W2, b1, b2, train_error, valid_error, train_class_error, valid_class_error = TrainNN(num_hiddens, eps, momentum, num_epochs)
          # iterate through different eps
          suffix = '_at_eps_' + str(eps)
          suffix = suffix.replace('.', ',')
          DisplayErrorPlot(train_error, valid_error, mode='cross_entropy' + suffix)
          DisplayErrorPlot(train_class_error, valid_class_error, mode='classification_error' + suffix) 

      eps = 0.1
      for momentum in [0.9, 0.5, 0.0]:
          W1, W2, b1, b2, train_error, valid_error, train_class_error, valid_class_error = TrainNN(num_hiddens, eps, momentum, num_epochs)
          # iterate through different momentum
          suffix = '_at_momentum_' + str(momentum)
          suffix = suffix.replace('.', ',')
          DisplayErrorPlot(train_error, valid_error, mode='cross_entropy' + suffix)
          DisplayErrorPlot(train_class_error, valid_class_error, mode='classification_error' + suffix) 

  # 2.4
  if 2.4 in current_problem:
      eps = 0.02
      momentum = 0.5
      for num_hiddens in [2, 5, 10, 30, 100]:
          start = time.time()
          (
              W1, W2, b1, b2,
              train_error, valid_error, test_error,
              train_class_error, valid_class_error, test_class_error,
          ) = TrainNN(num_hiddens, eps, momentum, num_epochs, run_test=True)
          elapsed = time.time() - start
          print "Training time at hidden_unit=", num_hiddens, " is ", elapsed, "seconds"
          # iterate through different num_hiddens
          suffix = '_at_hidden_unit_' + str(num_hiddens)
          DisplayErrorPlot(train_error, valid_error, mode='cross_entropy' + suffix, test=test_error)
          DisplayErrorPlot(train_class_error, valid_class_error, mode='classification_error' + suffix, test=test_class_error) 

  if 2.5 in current_problem:
      import knn
      knn.main()
def main(testfile=None):
	if testfile:
		a=rd.read(testfile)
		k=3
		predictions=[]
		for x in range(len(a)):
			neighbors = knn.getNeighbors(train, a[x], k)
			result = knn.getResponse(neighbors)
			predictions.append(result)
		return(predictions)
	else:
		knn.main(train,tests)
def main(i=0, j=62, testfile=None):
    tr, ts = rt.read2(i, j)
    k = 3
    if testfile:
        tst = rd.read2(testfile)
        predictions = []
        for x in range(len(tst)):
            neighbors = knn.getNeighbors(tr, tst[x], k)
            result = knn.getResponse(neighbors)
            predictions.append(result)
        return (predictions)
    else:
        knn.main(tr, ts)
Beispiel #5
0
def main(kInKnn):
    start = time.time()
    [trainingX, trainingY, testX, testY] = loadDataset("ex2data1train.csv", "ex2data1test.csv")
    pred = knn.main(trainingX, trainingY, testX, kInKnn)
    end = time.time()
    count = 0
    for i in range(len(pred)):
        if pred[i] == testY[i]:
            count += 1
    print("accuracy: " + str(round(count / float(len(testY)) * 100, 2)) + "%")
    print("training time: " + str(round(end - start, 1)) + " seconds")
Beispiel #6
0
def main(kInKnn):
    start = time.time()
    [trainingX, trainingY, testX, testY] = loadDataset('ex2data1train.csv',
                                                       'ex2data1test.csv')
    pred = knn.main(trainingX, trainingY, testX, kInKnn)
    end = time.time()
    count = 0
    for i in range(len(pred)):
        if pred[i] == testY[i]:
            count += 1
    print('accuracy: ' + str(round(count / float(len(testY)) * 100, 2)) + '%')
    print('training time: ' + str(round(end - start, 1)) + ' seconds')
Beispiel #7
0
def bagging(N):
    #training data
    xRF = RFData.x
    y = RFData.y
    #testing data
    xtRF = RFData.xt
    yt = RFData.yt

    xKNN = knnData.main()[0]  #training X
    xtKNN = knnData.main()[2]  #test X

    countYPredict = []
    for i in range(len(yt)):
        countYPredict.append(0)

    for k in range(N):  # number of bootstrapping
        x_RF = []
        y_RF = []
        x_KNN = []
        y_KNN = []
        # bootstrapping
        for i in range(int(len(xRF) * 0.6)):
            r = randint(0, len(xRF) - 1)
            tRF = []
            for j in range(len(xRF[0]) - 1):
                tRF.append(xRF[r][j])
            # for RF, data duplicates are not allowed
            if tRF not in x_RF:
                x_RF.append(tRF)
                y_RF.append(y[r])
            x_KNN.append(xKNN[r])
            y_KNN.append(y[r])

        # RF
        start = time.time()
        rf = RF(B=TreeNum, Bagging=isBagging)
        rf.train(x_RF, y_RF)
        pred = rf.predict(xtRF)
        end = time.time()
        count = 0
        for i in range(len(pred)):
            if pred[i] == yt[i]:
                count += 1
        print("RF, trial #" + str(k + 1) + ": ")
        print('\taccuracy: ' + str(round(count / float(len(yt)) * 100, 2)) +
              '%')
        print('\ttraining time: ' + str(round(end - start, 1)) + ' seconds')
        for i in range(len(pred)):
            countYPredict[i] = countYPredict[i] + pred[i]

        # KNN
        start = time.time()
        pred = knn.main(x_KNN, y_KNN, xtKNN, kInKnn)
        end = time.time()
        count = 0
        for i in range(len(pred)):
            if pred[i] == yt[i]:
                count += 1
        print("KNN, trial #" + str(k + 1) + ": ")
        print('\taccuracy: ' + str(round(count / float(len(yt)) * 100, 2)) +
              '%')
        print('\ttraining time: ' + str(round(end - start, 1)) + ' seconds')
        for i in range(len(pred)):
            countYPredict[i] = countYPredict[i] + pred[i]

    finalPredict = []
    for i in range(len(yt)):
        if countYPredict[i] >= N:
            finalPredict.append(1)
        else:
            finalPredict.append(0)

    count = 0
    for i in range(len(finalPredict)):
        if finalPredict[i] == yt[i]:
            count += 1
    print()
    print('After combining the classifiers by bagging: ')
    print('\taccuracy: ' + str(round(count / float(len(yt)) * 100, 2)) + '%')
            while choice2 != 4:
                print('\nkNN Classification:')
                print(
                    '1 -> Classify Dataset. (Splits Dataset into training and test set)'
                )
                print(
                    '2 -> Classify Testset. (Test kNN accuracy using Dataset as training set)'
                )
                print('3 -> Classify New Emails.\n4 -> Back.')
                choice2 = int(input('Enter your choice: '))

                if choice2 == 1:
                    # Classify Dataset
                    dataset_name = input(
                        'Enter the name of dataset folder to be classified: ')
                    knn.main(dataset_name)

                elif choice2 == 2:
                    # Classify Testset
                    dataset_name = input(
                        'Enter the name of dataset folder to be used as training set: '
                    )
                    testset_name = input('Enter the name of test set folder: ')
                    knn_classify.main(dataset_name, testset_name)

                elif choice2 == 3:
                    # Classify New Emails
                    dataset_name = input(
                        'Enter the name of dataset folder to be used as training set: '
                    )
                    reply = input(
def hello():
    in_data = request.json['details']
    predictions = main(in_data)
    return predictions
Beispiel #10
0
def main(argv):
    arg_index_start = 0
    DEBUG = "FALSE"
    try:
        opts, args = getopt.getopt(argv, 'd', ['debug'])
        if not args:
            usage()
            sys.exit(2)
    except getopt.GetoptError as err:
        usage()
        sys.exit(2)

    for opt, arg in opts:
        if opt in ('-d'):
            DEBUG = "TRUE"
            arg_index_start = 1

    # arguments
    if not argv[arg_index_start + 4]:
        usage()
        sys.exit(2)
    if os.path.isdir(argv[arg_index_start]):
        feature_files_dir = argv[arg_index_start]
    else:
        print("Feature directory does not exist: ",
              argv[arg_index_start],
              file=sys.stderr)
        usage()
        sys.exit(2)
    if os.path.isfile(argv[arg_index_start + 1]):
        outtype_dataset_file = argv[arg_index_start + 1]
    else:
        print("Out type datafile does not exist: ",
              argv[arg_index_start + 1],
              file=sys.stderr)
        usage()
        sys.exit(2)
    i = 2
    datatype_filenames = []
    datatypes = []
    dist_metrics = []
    weights = []
    while i < len(args):
        if os.path.isfile(argv[arg_index_start + i]):
            datatype_filename = argv[arg_index_start + i]
            datatype_filenames.append(datatype_filename)
            datatype = os.path.basename(datatype_filename.rsplit('-', 1)[0])
            datatypes.append(datatype)
        else:
            print("Input datatype file does not exist: ",
                  argv[arg_index_start + i],
                  file=sys.stderr)
            usage()
            sys.exit(2)
        if argv[arg_index_start + (i + 1)]:
            dist_metrics.append(argv[arg_index_start + (i + 1)])
        else:
            print("No distance metric provided for ",
                  argv[arg_index_start + i],
                  file=sys.stderr)
            usage()
            sys.exit(2)
        if argv[arg_index_start + (i + 2)]:
            weights.append(argv[arg_index_start + (i + 2)])
        else:
            print("No weight provided for ",
                  argv[arg_index_start + i],
                  file=sys.stderr)
            usage()
            sys.exit(2)
        i = i + 3
    outtype_col_name = "Id"
    if DEBUG == "TRUE":
        print("*** DEBUG: " + sys.argv[0] + ": datatypes:",
              datatypes,
              file=sys.stderr)
        print("*** DEBUG: " + sys.argv[0] + ": dist_metrics:",
              dist_metrics,
              file=sys.stderr)
        print("*** DEBUG: " + sys.argv[0] + ": weights:",
              weights,
              file=sys.stderr)

    total_ids_scores = []
    for datatype_num in range(len(datatypes)):
        if DEBUG == "TRUE":
            print("*** DEBUG: " + sys.argv[0] + ": datatype_num:",
                  datatype_num,
                  file=sys.stderr)
        features_file = feature_files_dir + "/" + datatypes[
            datatype_num] + ".tmp"
        if not os.path.isfile(features_file):
            print("Features file does not exist: ",
                  features_file,
                  file=sys.stderr)
            sys.exit(2)
        knn_args = features_file + " " + outtype_dataset_file + " " + datatype_filenames[
            datatype_num] + " " + dist_metrics[datatype_num] + " false"
        ids_scores = knn.main(knn_args.split())
        if DEBUG == "TRUE":
            print("*** DEBUG: " + sys.argv[0] + ": ids_scores:",
                  ids_scores,
                  file=sys.stderr)
        total_ids_scores = total_ids_scores + ids_scores
        if DEBUG == "TRUE":
            print("*** DEBUG: " + sys.argv[0] + ": total_ids_scores:",
                  total_ids_scores,
                  file=sys.stderr)

    new_total_ids_scores = []
    for id_score in total_ids_scores:
        if DEBUG == "TRUE":
            print("*** DEBUG: " + sys.argv[0] + ": id_score:",
                  id_score,
                  file=sys.stderr)
        found = "FALSE"
        for new_id_score in new_total_ids_scores:
            if new_id_score[0] == id_score[0]:
                new_id_score[1] = new_id_score[1] + id_score[1]
                found = "TRUE"
                break
        if found == "FALSE":
            new_total_ids_scores.append(id_score)
        if DEBUG == "TRUE":
            print("*** DEBUG: " + sys.argv[0] + ": new_total_ids_scores:",
                  new_total_ids_scores,
                  file=sys.stderr)
    for id_score in new_total_ids_scores:
        id_score[1] = id_score[1] / len(datatypes)
    new_total_ids_scores.sort(key=lambda x: x[1], reverse=True)
    if DEBUG == "TRUE":
        print("*** DEBUG: " + sys.argv[0] + ": new_total_ids_score:",
              new_total_ids_scores,
              file=sys.stderr)
    return (new_total_ids_scores)
Beispiel #11
0
def bagging(N):
    # training data
    xRF = RFData.x
    y = RFData.y
    # testing data
    xtRF = RFData.xt
    yt = RFData.yt

    xKNN = knnData.main()[0]  # training X
    xtKNN = knnData.main()[2]  # test X

    countYPredict = []
    for i in range(len(yt)):
        countYPredict.append(0)

    for k in range(N):  # number of bootstrapping
        x_RF = []
        y_RF = []
        x_KNN = []
        y_KNN = []
        # bootstrapping
        for i in range(int(len(xRF) * 0.6)):
            r = randint(0, len(xRF) - 1)
            tRF = []
            for j in range(len(xRF[0]) - 1):
                tRF.append(xRF[r][j])
            # for RF, data duplicates are not allowed
            if tRF not in x_RF:
                x_RF.append(tRF)
                y_RF.append(y[r])
            x_KNN.append(xKNN[r])
            y_KNN.append(y[r])

        # RF
        start = time.time()
        rf = RF(B=TreeNum, Bagging=isBagging)
        rf.train(x_RF, y_RF)
        pred = rf.predict(xtRF)
        end = time.time()
        count = 0
        for i in range(len(pred)):
            if pred[i] == yt[i]:
                count += 1
        print("RF, trial #" + str(k + 1) + ": ")
        print("\taccuracy: " + str(round(count / float(len(yt)) * 100, 2)) + "%")
        print("\ttraining time: " + str(round(end - start, 1)) + " seconds")
        for i in range(len(pred)):
            countYPredict[i] = countYPredict[i] + pred[i]

        # KNN
        start = time.time()
        pred = knn.main(x_KNN, y_KNN, xtKNN, kInKnn)
        end = time.time()
        count = 0
        for i in range(len(pred)):
            if pred[i] == yt[i]:
                count += 1
        print("KNN, trial #" + str(k + 1) + ": ")
        print("\taccuracy: " + str(round(count / float(len(yt)) * 100, 2)) + "%")
        print("\ttraining time: " + str(round(end - start, 1)) + " seconds")
        for i in range(len(pred)):
            countYPredict[i] = countYPredict[i] + pred[i]

    finalPredict = []
    for i in range(len(yt)):
        if countYPredict[i] >= N:
            finalPredict.append(1)
        else:
            finalPredict.append(0)

    count = 0
    for i in range(len(finalPredict)):
        if finalPredict[i] == yt[i]:
            count += 1
    print()
    print("After combining the classifiers by bagging: ")
    print("\taccuracy: " + str(round(count / float(len(yt)) * 100, 2)) + "%")
Beispiel #12
0
import knn
import matplotlib.pyplot as plt
from pylab import *

k_values = [1,5,10,15,30,50]
p = 0.5
for k in k_values:
	knn.main("ALL.dat", "AML.dat", k, p)
	print "\n"


# plot(k_values, [0.93, 0.94, 0.83, 0.79, 0.61, 0.61])
# xlabel('k')
# ylabel('accuracy')
# savefig('question1.jpg')

print "\n\n---------------------------------\n\n"

p_values = [0, 0.05, 0.20, 0.50, 0.75, 0.95, 1.00]
results = []
k = 30
for p in p_values:
	#knn.main("ALL.dat", "AML.dat", k, p)
	print "\n"

fig, ax = plt.subplots()
sensitivity = [1.0, 1.0, 1.0, 1.0, 0.77, 0.0, 0.0]
specificity = [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0]
to_plot_spec = [1 - x for x in specificity]
ax.plot(sensitivity, to_plot_spec)
Beispiel #13
0
    print(
        '\tk           - [OPTIONAL] number of neighbours for knn subsection \'a\''
    )


if __name__ == '__main__':
    if len(sys.argv) <= 2:
        usage()
        sys.exit(1)

    mod = sys.argv[1]
    sub = sys.argv[2]
    ret = 0
    if mod == 'knn':
        import knn
        ret = knn.main(sub, sys.argv[3:])

    elif mod == 'perceptron':
        import perceptron
        ret = perceptron.main(sub)

    elif mod == 'svm':
        import svm
        ret = svm.main(sub)

    else:
        usage()
        sys.exit(1)

    if ret != 0:
        usage()
				print('Invalid Input.')

		elif choice == 2:
			# Classify using kNN
			choice2 = 0
			while choice2 != 4:
				print('\nkNN Classification:')
				print('1 -> Classify Dataset. (Splits Dataset into training and test set)')
				print('2 -> Classify Testset. (Test kNN accuracy using Dataset as training set)')
				print('3 -> Classify New Emails.\n4 -> Back.')
				choice2 = int(input('Enter your choice: '))

				if choice2 == 1:
					# Classify Dataset
					dataset_name = input('Enter the name of dataset folder to be classified: ')
					knn.main(dataset_name)

				elif choice2 == 2:
					# Classify Testset
					dataset_name = input('Enter the name of dataset folder to be used as training set: ')
					testset_name = input('Enter the name of test set folder: ')
					knn_classify.main(dataset_name, testset_name)

				elif choice2 == 3:
					# Classify New Emails
					dataset_name = input('Enter the name of dataset folder to be used as training set: ')
					reply = input('Do you want get new unread emails from your email account? (y/n): ')[0].lower()
					
					if reply == 'y':
						usr = input('Email: ')
						pwd = getpass('Password: ')
def process_message(msg):
    msg = msg.lower()
    global qno
    global ans
    #NLTK functions
    hlink = "nationalrail.co.uk"
    label_link.configure(text=hlink, fg="blue", cursor="hand2")
    label_link.bind("<Button-1>", openLink)

    hiList = [
        "hello", "hi", "hey", "heya", "hiya", "hai", "howdy", "ciao", "ni hao"
    ]
    biList = [
        "goodbye", "bye", "bi", "exit", "see you", "cya", "see ya", "byebye",
        "ciao", "bye bye"
    ]
    if msg.lower() in hiList:
        return "Hello to you too user!"
    elif msg.lower() in biList:
        sys.exit()
    if msg.lower() == "good":
        return "That's very good"

    delayList = ["delay", "late", "delayed"]
    print(qno)
    k = False
    for w in delayList:
        if msg in delayList:
            k = True

    if k == True:
        if qno == 0 or qno > 4 and qno < 10:
            qno = 10
            qno += 1
            print()
            return "How long has your train been delayed by?"

    if qno == 11:
        msg = re.sub(r'[^\d]', "", msg)
        msg = int(msg)
        ans[0] = msg
        qno += 1
        return "Where did your train depart from and arriving at?"
    elif qno == 12:
        x = nlp.findStation(msg)
        ke.startKE(x[1], x[0], "", "", "")
        facts = readfacts()
        if facts[0] != "null":
            qno += 1

            for s in stations:
                if x[1] == s[0]:
                    x[1] = s[1]
                if x[0] == s[0]:
                    x[0] = s[1]
            ans[1] = x[1]
            ans[2] = x[0]
        return "What time did you depart?"
    elif qno == 13:
        x = nlp.findTime(msg)
        x = re.sub(r'[^\d]', "", x)
        ans[3] = int(x)
        qno += 1
        return "What was you orginal estimated arrival?"
    elif qno == 14:
        x = nlp.findTime(msg)
        x = re.sub(r'[^\d]', "", x)
        ans[4] = int(x)
        qno += 1
        return "OK 1 sec!"
    elif qno == 15:
        s1 = " ".join(ans[1])
        s2 = " ".join(ans[2])
        x = knn.main(ans[4], ans[3], stationsDict[s1], stationsDict[s2],
                     ans[0])
        x = "The estimated time of arrival is  " + x
        resetfacts()
        qno = 0
        return x

    if qno == 0:

        x = nlp.findStation(msg)
        facts = readfacts()
        print(facts)
        print("asdaf")
        ke.startKE(x[1], x[0], "", "", "")
        facts = readfacts()
        print(facts)
        if facts[0] != "null":
            qno += 1
            ans[0] = x[1]
        if facts[1] != "null":
            qno += 1
            ans[1] = x[0]
        return facts[5]
    elif qno == 1:
        x = nlp.finddplace(msg)
        facts = readfacts()
        ke.startKE(x, ans[0], "", "", "")
        facts = readfacts()
        if facts[0] != "null":
            qno += 1
            ans[0] = x
        return facts[5]
    elif qno == 2:
        x = nlp.findDate(msg)
        ke.startKE(ans[0], ans[1], "", x, "")
        facts = readfacts()
        if facts[2] != "null":
            qno += 1
            ans[2] = x
        return facts[5]
    elif qno == 3:
        x = nlp.findTime(msg)
        ke.startKE(ans[0], ans[1], x, ans[2], "")
        facts = readfacts()
        if facts[3] != "null":
            qno += 1
            ans[3] = x
        return facts[5]
    elif qno == 4:
        x = nlp.checkDepart(msg)
        ke.startKE(ans[0], ans[1], ans[3], ans[2], x)
        facts = readfacts()
        if facts[4] != "null":
            qno += 1
            ans[4] = x
            temp = webScraper.main()
            hlink = temp[1]
            label_link.configure(text=hlink, fg="blue", cursor="hand2")
            return temp[0]
        return ["arrival or departure?"]

    if msg == " ":
        return "Hello?"

    facts = readfacts()
    print(facts)

    # while "null" in facts:
    #ke.test(msg)
    # print("NUL IN FACT")

    return "Sorry! I don't understand!"
Beispiel #16
0
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    fgmask = fgbg.apply(frame)

    fgmask = cv2.morphologyEx(fgmask, cv2.MORPH_OPEN, kernel)

    #fgmask = cv2.filter2D(fgmask,-1,kernel)
    #fgmask = cv2.dilate(fgmask,kernel,iterations = 1)

    fgmask = cv2.morphologyEx(fgmask, cv2.MORPH_CLOSE, kernel2)

    res = cv2.bitwise_and(fgmask, mask)

    cv2.imshow("frame", res)

    k = cv2.waitKey(1) & 0xFF

    #if k == ord('a'):
    feature = cleaner.do(res)

    if feature != None:
        knn.main(float(feature[0]), float(feature[1]))

    if k == ord('q'):
        break

    time.sleep(0.2)

cap.release()
cv2.destroyAllWindows()
Beispiel #17
0
    with open('recent.file', 'w') as myfile:
        myfile.write(feature_data)
    print(feature_data)



while True:

    # mengcapture frame realtime
    (ret, frame) = cap.read()

    cv2.putText(
        frame,
        'Prediksi: ' + prediction,
        (15, 60),
        cv2.FONT_HERSHEY_SIMPLEX,1, (0, 0, 255), 2)

    # window output
    cv2.imshow('Sistem Pendeteksi warna kematangan tomat', frame)

    color_histogram_of_test_image(frame)

    prediction = knn.main('training.csv', 'recent.file')
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
cv2.destroyAllWindows()