class KNN_strings(object): ''' classdocs ''' def __init__(self, n_neighbors=1): ''' Constructor ''' self.dsr = DatasetReader() self.fenc = FreemanEncoder() self.data = [] self.knn = KNeighborsClassifier(n_neighbors=n_neighbors, algorithm='auto', metric=self.lev_metric) def lev_metric(self, x, y): i, j = int(x[0]), int(y[0]) # extract indices # if self.data[i] == self.data[j]: # print self.data[i], self.data[j], edit_dist(self.data[i], self.data[j]) return edit_dist(self.data[i], self.data[j]) def knn_train(self, dataset, cv=1, datasplit=0.7): images_dataset= self.dsr.read_dataset_images(dataset) freeman_code_dict = self.fenc.encode_freeman_dataset(images_dataset) _, codes, labels = self.dsr.gen_labelled_arrays(freeman_code_dict) self.data = codes X = np.arange(len(self.data)).reshape(-1, 1) if cv <= 1: self.knn.fit(X, labels) elif cv > 1: cv_result = cross_validation.cross_val_score(self.knn, X, labels, cv=cv) print cv_result print 'Training Done!' def knn_predict(self, test_data, score=False): images_dataset= self.dsr.read_dataset_images(test_data) freeman_code_dict = self.fenc.encode_freeman_dataset(images_dataset) _, codes, labels = self.dsr.gen_labelled_arrays(freeman_code_dict) X_pred = np.arange(len(codes)).reshape(-1, 1) predictions = self.knn.predict(X_pred) if score == True: accuracy = self.knn.score(X_pred, labels) print "Test Accuracy: ", accuracy return predictions def knn_predict_one(self, test_image): image_code = self.fenc.encode_freeman(test_image) print image_code data = [image_code] X_pred = np.arange(len(data)).reshape(-1, 1) prediction = self.knn.predict(X_pred) return prediction
def __init__(self): ''' Constructor ''' self.dsr = DatasetReader() self.fenc = FreemanEncoder() self.training_data = []
def __init__(self, n_neighbors=1): ''' Constructor ''' self.dsr = DatasetReader() self.fenc = FreemanEncoder() self.data = [] self.knn = KNeighborsClassifier(n_neighbors=n_neighbors, algorithm='auto', metric=self.lev_metric)
def _init_classifiers(self): # Initialize classifier objects self.fenc = FreemanEncoder() self.knn = KNN.KNN() self.HMM = HMM.HMM() self.NaiveBayes = NaiveBayes.NaiveBayes() self.RandomForest = RandomForest.RandomForests() self.SVM = svm.SVM_SVC() self.LogisticReg = LogisticReg.LogisticReg() self.AdaBoost = adaboost.AdaBoost() self.GBRT = gbrt.GBRT() #Train initially on the default data set, if no model saved already # Initialize KNN, no saved model for KNN self.knn.knn_train(CharRecognitionGUI_support.training_dataset, 1.0) # Initialize HMM self.HMM.training(CharRecognitionGUI_support.training_dataset) # Initialize Naive Bayes try: pickle.load( open( "./Models/naivebayes_model.p", "rb" ) ) except IOError: self.NaiveBayes.training(CharRecognitionGUI_support.training_dataset) # Initialize Random Forest try: pickle.load( open( "./Models/random_forest.p", "rb" ) ) except IOError: self.RandomForest.training(CharRecognitionGUI_support.training_dataset) # Initialize SVM try: pickle.load( open( "./Models/svm.p", "rb" ) ) except IOError: self.SVM.training(CharRecognitionGUI_support.training_dataset) # Initialize Logistic Regression try: pickle.load( open( "./Models/logistic_model.p", "rb" ) ) except IOError: self.LogisticReg.training(CharRecognitionGUI_support.training_dataset) # Initialize AdaBoost try: pickle.load( open( "./Models/AdaBoostClassifier.p", "rb" ) ) except IOError: self.AdaBoost.training(CharRecognitionGUI_support.training_dataset) # Initialize GBRT try: pickle.load( open( "./Models/GradientBoostingClassifier.p", "rb" ) ) except IOError: self.GBRT.training(CharRecognitionGUI_support.training_dataset)
def __init__(self): ''' Constructor ''' self.dsr = DatasetReader() self.fenc = FreemanEncoder() states = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] symbols = ['0', '1', '2', '3', '4', '5', '6', '7'] self.learning_model = HiddenMarkovModelTrainer(states=states, symbols=symbols) self.model = None
class HMM(object): ''' classdocs ''' def __init__(self): ''' Constructor ''' self.dsr = DatasetReader() self.fenc = FreemanEncoder() states = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] symbols = ['0', '1', '2', '3', '4', '5', '6', '7'] self.learning_model = HiddenMarkovModelTrainer(states=states, symbols=symbols) self.model = None def generate_labelled_sequences(self, freeman_codes_dict): labeled_sequences = [] labeled_symbols = [] codes_list = freeman_codes_dict.items() for tup in codes_list: for code in tup[1]: temp = [] for symbol in code: temp.append((symbol, tup[0])) labeled_symbols.append(temp) for tup in codes_list: for code in tup[1]: labeled_sequences.append((code, tup[0])) codes = numpy.array([x[0] for x in labeled_sequences]) labels = numpy.array([y[1] for y in labeled_sequences]) return labeled_symbols, labeled_sequences, codes, labels def learning_curve(self, dataset, n_iter, train_sizes=numpy.linspace(0.1, 1.0, 5)): cv_scores = [] train_scores = [] for i in train_sizes: data = dataset[:int(len(dataset) * i)] cv_score = [] t_score = [] for j in range(n_iter): cv_score.extend(self.training(dataset, cv=10, n_iter=1)) train_score, test_score = self.training(dataset, n_iter=1) t_score.extend(train_score) cv_scores.append(cv_score) train_scores.append(t_score) cv_scores = numpy.array(cv_scores) train_scores = numpy.array(train_scores) print cv_scores.shape print train_scores.shape return train_scores, cv_scores def get_data(self, dataset_path): dataset = self.dsr.read_dataset_images(dataset_path) freeman_codes_dict = self.fenc.encode_freeman_dataset(dataset) labeled_symbols, labeled_sequence, codes, labels = self.generate_labelled_sequences( freeman_codes_dict) return labeled_symbols, labeled_sequence, codes, labels def training(self, dataset, cv=1, n_iter=1): if isinstance(dataset, basestring): labeled_symbols, labeled_sequence, codes, labels = self.get_data( dataset) else: labeled_symbols, labeled_sequence, codes, labels = dataset self.model = self.learning_model.train(labeled_symbols) if cv > 1: cv_scores = [] for i in range(n_iter): skf = cross_validation.KFold(len(labels), n_folds=10, shuffle=True) iter_score = [] for train_index, test_index in skf: train_data = list( numpy.array(labeled_symbols)[train_index]) test_data = list(numpy.array(labeled_symbols)[test_index]) self.model = self.learning_model.train(train_data) fold_score = self.model.evaluate(test_data) iter_score.append(fold_score) cv_scores.append(numpy.mean(iter_score)) return cv_scores else: skf = cross_validation.ShuffleSplit(len(labels), n_iter=n_iter, test_size=0.2, random_state=0) training_score = [] test_score = [] for train_index, test_index in skf: train_data = list(numpy.array(labeled_symbols)[train_index]) test_data = list(numpy.array(labeled_symbols)[test_index]) self.model = self.learning_model.train(train_data) training_score.append(self.model.evaluate(train_data)) test_score.append(self.model.evaluate(test_data)) if n_iter == 1: predict_labels = [] for i in range(len(list(codes[test_index]))): predicted_states = self.model.tag( list(codes[test_index])[i]) predict_labels.append(predicted_states[0][1]) self.ConfusionMatrix = ConfusionMatrix( list(labels[test_index]), predict_labels) return training_score, test_score def predict(self, image_path): if os.path.isfile(image_path): image_array = self.dsr.read_img_bw(image_path) freeman_code = self.fenc.encode_freeman(image_array) else: freeman_code = image_path predicted_states = self.model.tag(freeman_code) predicted_states = [x[1] for x in predicted_states] if len(set(predicted_states)) == 1: predicted_class = list(set(predicted_states))[0] return predicted_class ## TESTING CODE (WILL BE REMOVED) ## # from HMM import HMM # hmm = HMM() # cv_scores = hmm.training('I:\\eclipse_workspace\\CharacterRecognition\\teams_dataset', cv=10, n_iter=50) # train_score, test_score = hmm.training('I:\\eclipse_workspace\\CharacterRecognition\\teams_dataset', n_iter=1) # with open('hmm_confusion_matrix.txt', 'w') as fp: # fp.write(hmm.ConfusionMatrix.__str__()) # # with open("./Results/hmm.txt", 'w') as fp: # for i in range(len(cv_scores)): # text = str(cv_scores[i]) + ',' + str(train_score[i]) + ',' + str(test_score[i]) + '\n' # print text # print '--------------------------------' # fp.write(text)
class HMM(object): ''' classdocs ''' def __init__(self): ''' Constructor ''' self.dsr = DatasetReader() self.fenc = FreemanEncoder() states = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] symbols = ['0', '1', '2', '3', '4', '5', '6', '7'] self.learning_model = HiddenMarkovModelTrainer(states=states, symbols=symbols) self.model = None def generate_labelled_sequences(self, freeman_codes_dict): labeled_sequences = [] labeled_symbols = [] codes_list = freeman_codes_dict.items() for tup in codes_list: for code in tup[1]: temp = [] for symbol in code: temp.append((symbol, tup[0])) labeled_symbols.append(temp) for tup in codes_list: for code in tup[1]: labeled_sequences.append((code,tup[0])) codes = numpy.array([x[0] for x in labeled_sequences]) labels = numpy.array([y[1] for y in labeled_sequences]) return labeled_symbols, labeled_sequences, codes, labels def learning_curve(self, dataset, n_iter, train_sizes=numpy.linspace(0.1, 1.0, 5)): cv_scores = [] train_scores = [] for i in train_sizes: data = dataset[:int(len(dataset)*i)] cv_score = [] t_score = [] for j in range(n_iter): cv_score.extend(self.training(dataset, cv=10, n_iter=1)) train_score, test_score = self.training(dataset, n_iter=1) t_score.extend(train_score) cv_scores.append(cv_score) train_scores.append(t_score) cv_scores = numpy.array(cv_scores) train_scores = numpy.array(train_scores) print cv_scores.shape print train_scores.shape return train_scores, cv_scores def get_data(self, dataset_path): dataset = self.dsr.read_dataset_images(dataset_path) freeman_codes_dict = self.fenc.encode_freeman_dataset(dataset) labeled_symbols, labeled_sequence, codes, labels = self.generate_labelled_sequences(freeman_codes_dict) return labeled_symbols, labeled_sequence, codes, labels def training(self, dataset, cv=1, n_iter=1): if isinstance(dataset, basestring): labeled_symbols, labeled_sequence, codes, labels = self.get_data(dataset) else: labeled_symbols, labeled_sequence, codes, labels = dataset self.model = self.learning_model.train(labeled_symbols) if cv > 1: cv_scores = [] for i in range(n_iter): skf = cross_validation.KFold(len(labels), n_folds=10, shuffle=True) iter_score = [] for train_index, test_index in skf: train_data = list(numpy.array(labeled_symbols)[train_index]) test_data = list(numpy.array(labeled_symbols)[test_index]) self.model = self.learning_model.train(train_data) fold_score = self.model.evaluate(test_data) iter_score.append(fold_score) cv_scores.append(numpy.mean(iter_score)) return cv_scores else: skf = cross_validation.ShuffleSplit(len(labels), n_iter=n_iter, test_size=0.2, random_state=0) training_score = [] test_score = [] for train_index, test_index in skf: train_data = list(numpy.array(labeled_symbols)[train_index]) test_data = list(numpy.array(labeled_symbols)[test_index]) self.model = self.learning_model.train(train_data) training_score.append(self.model.evaluate(train_data)) test_score.append(self.model.evaluate(test_data)) if n_iter==1: predict_labels = [] for i in range(len(list(codes[test_index]))): predicted_states = self.model.tag(list(codes[test_index])[i]) predict_labels.append(predicted_states[0][1]) self.ConfusionMatrix = ConfusionMatrix(list(labels[test_index]), predict_labels) return training_score, test_score def predict(self, image_path): if os.path.isfile(image_path): image_array = self.dsr.read_img_bw(image_path) freeman_code = self.fenc.encode_freeman(image_array) else: freeman_code = image_path predicted_states = self.model.tag(freeman_code) predicted_states = [x[1] for x in predicted_states] if len(set(predicted_states)) == 1: predicted_class = list(set(predicted_states))[0] return predicted_class ## TESTING CODE (WILL BE REMOVED) ## # from HMM import HMM # hmm = HMM() # cv_scores = hmm.training('I:\\eclipse_workspace\\CharacterRecognition\\teams_dataset', cv=10, n_iter=50) # train_score, test_score = hmm.training('I:\\eclipse_workspace\\CharacterRecognition\\teams_dataset', n_iter=1) # with open('hmm_confusion_matrix.txt', 'w') as fp: # fp.write(hmm.ConfusionMatrix.__str__()) # # with open("./Results/hmm.txt", 'w') as fp: # for i in range(len(cv_scores)): # text = str(cv_scores[i]) + ',' + str(train_score[i]) + ',' + str(test_score[i]) + '\n' # print text # print '--------------------------------' # fp.write(text)
class KNN_statistic(object): ''' classdocs ''' def __init__(self): ''' Constructor ''' self.dsr = DatasetReader() self.fenc = FreemanEncoder() self.training_data = [] def generate_labelled_sequences(self, freeman_codes_dict): labelled_sequences = [] codes_list = freeman_codes_dict.items() for tup in codes_list: for code in tup[1]: labelled_sequences.append((tup[0],code)) return labelled_sequences def prepare_data(self, arrays_data=[], arrays_labels=[], split=0.2): # Separate data into 2 sets, 1 is training and 1 is test,split is the ratio (the default is 0.20) ad_train, ad_test, al_train, al_test = train_test_split(arrays_data, arrays_labels, test_size=split, random_state=42) return ad_train, ad_test, al_train, al_test def get_neighbors(self, data, data_label, test_instance, k): # Get the list of nearest neighbors to a test instance distances = [] for i in range(len(data)): dist = edit_dist(test_instance, data[i]) distances.append((data[i], data_label[i], dist)) distances.sort(key=operator.itemgetter(2)) neighbors = [] for x in range(0, k): neighbors.append([distances[x][0], distances[x][1]]) return neighbors def get_label(self, neighbors): # Determine the label of a test instance base on its nearest neighbors labels = {} for neighbor in neighbors: if neighbor[1] not in labels: labels[neighbor[1]] = 1 else: labels[neighbor[1]] += 1 sorted_labels = sorted(labels.items(), key=operator.itemgetter(1), reverse=True) return sorted_labels[0][0] def evaluation(self, data, data_for_distance_caculation, data_label, data_for_distance_calculation_label, k=3): # Evaluate the accuracy of knn correct_count = 0 for instance in range(0, len(data)-1): neighbors = self.get_neighbors(data_for_distance_caculation,data_for_distance_calculation_label, data[instance], k) label = self.get_label(neighbors) if int(label) == int(data_label[instance]): correct_count += 1 return (float(correct_count)/len(data)) def knn_train(self, dataset_path, train_test_split=0.2): dataset = self.dsr.read_dataset_images(dataset_path) freeman_codes_dict = self.fenc.encode_freeman_dataset(dataset) _, arrays_data, arrays_label = self.dsr.gen_labelled_arrays(freeman_codes_dict) arrays_data, arrays_label = shuffle(arrays_data, arrays_label) ad_train, ad_test, al_train, al_test = self.prepare_data(arrays_data, arrays_label, split=train_test_split) # Cross validation with 5 folds kf = KFold(len(ad_train), 5) result = 0 for train_index, test_index in kf: ad_train_kfold, ad_test_kfold = ad_train[train_index], ad_train[test_index] al_train_kfold, al_test_kfold = al_train[train_index], al_train[test_index] result += self.evaluation(ad_test_kfold, ad_train_kfold, al_test_kfold, al_train_kfold, k=2) result_average = result/5 # Result with the training result_training = self.evaluation(ad_train, ad_train, al_train, al_train, k=2) # Result with the test result_test = self.evaluation(ad_test, ad_train, al_test, al_train, k=2) return result_average, result_training, result_test # knn = KNN_strings(n_neighbors=1) # knn = KNN_statistic() # results = [] # for x in range(50): # result_average, result_training, result_test = knn.knn_train("/home/thovo/PycharmProjects/CharacterRecognition/digits_dataset", 0.2) # text = result_average.__str__() + " , " + result_training.__str__() + " , " + result_test.__str__() + "\n" # results.append(text) # # # f = open("Results/knn.txt", "w") # for item in results: # f.write(item) # # f.close()
class KNN(object): ''' classdocs ''' def __init__(self): ''' Constructor ''' self.dsr = DatasetReader() self.fenc = FreemanEncoder() self.training_data = [] def generate_labelled_sequences(self, freeman_codes_dict): labelled_sequences = [] codes_list = freeman_codes_dict.items() for tup in codes_list: for code in tup[1]: labelled_sequences.append((tup[0],code)) return labelled_sequences def prepare_data(self, datas, training=[], test=[], split=0.80): # Separate data into 2 sets, 1 is training and 1 is test,split is the ratio (the default is 0.70) for data in range(len(datas)-1): if random.random() < split: training.append(datas[data]) else: test.append(datas[data]) def get_neighbors(self, training, test_instance, k): # Get the list of nearest neighbors to a test instance distances =[] for i in range(len(training)-1): dist = edit_dist(test_instance, training[i][1]) distances.append((training[i], dist)) distances.sort(key=operator.itemgetter(1)) neighbors = [] for x in range(0, k): neighbors.append(distances[x][0]) return neighbors def get_label(self, neighbors): # Determine the label of a test instance base on its nearest neighbors max = 0 labels = {} for neighbor in neighbors: if neighbor[0] not in labels: labels[neighbor[0]] = 1 else: labels[neighbor[0]] += 1 sorted_labels = sorted(labels.items(), key=operator.itemgetter(1), reverse=True) return sorted_labels[0][0] def evaluation(self, training, test): # Evaluate the accuracy of knn correct_count = 0 # k = int(math.ceil(len(training)/10)) k = 1 for test_data in test: neighbors = self.get_neighbors(training, test_data[1], k) label = self.get_label(neighbors) if int(label) == int(test_data[0]): correct_count += 1 print (float(correct_count)/len(test))*100 def knn_train(self, dataset_path, train_test_split=0.8): dataset = self.dsr.read_dataset_images(dataset_path) freeman_codes_dict = self.fenc.encode_freeman_dataset(dataset) labelled_sequences = self.generate_labelled_sequences(freeman_codes_dict) training = [] test = [] # print labelled_sequences self.prepare_data(labelled_sequences,training,test, split=train_test_split) self.training_data = training if train_test_split != 1.0: print "Training:" + len(training).__str__() print "Test:" + len(test).__str__() self.evaluation(training,test) def knn_predict_one(self, image, k=1): if os.path.isfile(image): image_array = self.dsr.read_img_bw(image) test = self.fenc.encode_freeman(image_array) else: test = image # Try to find the nearest neighbors of the first sequences in training neighbors = self.get_neighbors(self.training_data, test, k) label = self.get_label(neighbors) return label
class New_Toplevel_1: def __init__(self, master=None): _bgcolor = '#d9d9d9' # X11 color: 'gray85' _fgcolor = '#000000' # X11 color: 'black' _compcolor = '#d9d9d9' # X11 color: 'gray85' _ana1color = '#d9d9d9' # X11 color: 'gray85' _ana2color = '#d9d9d9' # X11 color: 'gray85' self.style = ttk.Style() if sys.platform == "win32": self.style.theme_use('winnative') self.style.configure('.',background=_bgcolor) self.style.configure('.',foreground=_fgcolor) self.style.configure('.',font="TkDefaultFont") self.style.map('.',background= [('selected', _compcolor), ('active',_ana2color)]) master.configure(background="#d9d9d9") self.style.configure('TNotebook.Tab', background=_bgcolor) self.style.configure('TNotebook.Tab', foreground=_fgcolor) self.style.map('TNotebook.Tab', background= [('selected', _compcolor), ('active',_ana2color)]) self.TNotebook1 = ttk.Notebook(master) self.TNotebook1.place(relx=0.02, rely=0.02, relheight=0.95 , relwidth=0.96) self.TNotebook1.configure(width=574) self.TNotebook1.configure(takefocus="") self.TNotebook1_predict = ttk.Frame(self.TNotebook1) self.TNotebook1.add(self.TNotebook1_predict, padding=3) self.TNotebook1.tab(0, text="Recognize",underline="-1",) self.TNotebook1_train = ttk.Frame(self.TNotebook1) self.TNotebook1.add(self.TNotebook1_train, padding=3) self.TNotebook1.tab(1, text="Train",underline="-1",) #user defined variables self.thumbnails = self.load_thumbnails(CharRecognitionGUI_support.thumbnails_path) self.PIL_image = Image.new("1", (300, 300), "white") self.hidden_canvas = ImageDraw.Draw(self.PIL_image) self.x = None self.y = None # Initialize and train all classifiers self._init_classifiers() self.Clear = ttk.Button(self.TNotebook1_predict) self.Clear.place(relx=0.44, rely=0.14, height=24, width=78) self.Clear.configure(text='''Clear Canvas''') self.Clear.bind("<Button-1>",self.clear) self.Canvas1 = Canvas(self.TNotebook1_predict) self.Canvas1.place(relx=0.01, rely=0.02, relheight=0.61, relwidth=0.35) self.Canvas1.configure(background="white") self.Canvas1.configure(borderwidth="2") self.Canvas1.configure(highlightbackground="#d9d9d9") self.Canvas1.configure(highlightcolor="black") self.Canvas1.configure(insertbackground="black") self.Canvas1.configure(relief=RIDGE) self.Canvas1.configure(selectbackground="#c4c4c4") self.Canvas1.configure(selectforeground="black") self.Canvas1.configure(width=378) self.Canvas1.bind("<B1-Motion>",self.drag) self.Canvas1.bind("<ButtonRelease-1>",self.drag_end) self.Save = ttk.Button(self.TNotebook1_predict) self.Save.place(relx=0.5, rely=0.04, height=24, width=77) self.Save.configure(text='''Save''') self.Save.bind("<Button-1>",self.save) self.Select = ttk.Button(self.TNotebook1_predict) self.Select.place(relx=0.44, rely=0.24, height=24, width=77) self.Select.configure(text='''Select Image''') self.Select.bind("<Button-1>",self.select) # self.Frame1 = Frame(self.TNotebook1_predict) # self.Frame1.place(relx=0.61, rely=0.02, relheight=0.6, relwidth=0.36) # self.Frame1.configure(relief=GROOVE) # self.Frame1.configure(borderwidth="2") # self.Frame1.configure(relief=GROOVE) # self.Frame1.configure(background=_bgcolor) # self.Frame1.configure(highlightbackground="#d9d9d9") # self.Frame1.configure(highlightcolor="black") # self.Frame1.configure(width=305) self.CanvasHist = Canvas(self.TNotebook1_predict) self.CanvasHist.place(relx=0.61, rely=0.02, relheight=0.6, relwidth=0.36) self.CanvasHist.configure(background="white") self.CanvasHist.configure(borderwidth="2") self.CanvasHist.configure(highlightbackground="#d9d9d9") self.CanvasHist.configure(highlightcolor="black") self.CanvasHist.configure(insertbackground="black") self.CanvasHist.configure(relief=RIDGE) self.CanvasHist.configure(selectbackground="#c4c4c4") self.CanvasHist.configure(selectforeground="black") self.CanvasHist.configure(width=378) self.freeman_textbox = Text(self.TNotebook1_predict) self.freeman_textbox.place(relx=0.01, rely=0.69, height=131, width=514) self.freeman_textbox.configure(background=_bgcolor) self.freeman_textbox.configure(foreground="#000000") self.freeman_textbox.configure(highlightbackground="#d9d9d9") self.freeman_textbox.configure(highlightcolor="black") self.scrollbar = Scrollbar(self.freeman_textbox) self.scrollbar.pack(side=RIGHT, fill=Y) self.scrollbar.config(command=self.freeman_textbox.yview) self.freeman_textbox.configure(yscrollcommand=self.scrollbar.set) self.TCombobox1 = ttk.Combobox(self.TNotebook1_predict) self.TCombobox1.place(relx=0.41, rely=0.35, relheight=0.06 , relwidth=0.16) self.value_list = ['kNN (Freeman Code)','HMM (Freeman Code)','RandomForest (EFD)', 'NaiveBayes (RAW)', 'SVM (EFD)', 'LogisticReg (EFD)', 'AdaBoost (EFD)', 'GradientBoosting (EFD)'] self.TCombobox1.configure(values=self.value_list) self.TCombobox1.configure(textvariable=CharRecognitionGUI_support.combobox) self.TCombobox1.configure(takefocus="") self.TCombobox2 = ttk.Combobox(self.TNotebook1_predict) self.TCombobox2.place(relx=0.4, rely=0.04, height=24, width=77) self.value_list = ['0','1','2','3','4','5','6','7','8','9',] self.TCombobox2.configure(values=self.value_list) self.TCombobox2.configure(textvariable=CharRecognitionGUI_support.combobox2) self.TCombobox2.configure(takefocus="") self.TCombobox2.current(0) self.Frame2 = Frame(self.TNotebook1_predict) self.Frame2.place(relx=0.737, rely=0.715, relheight=0.26, relwidth=0.15) self.Frame2.configure(relief=GROOVE) self.Frame2.configure(borderwidth="2") self.Frame2.configure(relief=GROOVE) self.Frame2.configure(background=_bgcolor) self.Frame2.configure(highlightbackground="#d9d9d9") self.Frame2.configure(highlightcolor="black") self.Frame2.configure(width=150, height=150) self.Thumbnail = Label(self.TNotebook1_predict) self.Thumbnail.place(relx=0.75, rely=0.73, relheight=0.225, relwidth=0.125) self.Thumbnail.configure(background=_bgcolor) self.Thumbnail.configure(disabledforeground="#a3a3a3") self.Thumbnail.configure(foreground="#000000") self._img1 = PhotoImage(file='./thumbnails/blank.gif') self.Thumbnail.configure(image=self._img1) self.Thumbnail.configure(text='''Label''') self.Thumbnail.configure(width=94) self.Recognize = ttk.Button(self.TNotebook1_predict) self.Recognize.place(relx=0.44, rely=0.45, height=24, width=87) self.Recognize.configure(text='''Recognize''') self.Recognize.bind("<Button-1>",self.recognize) self.Clear_Results = ttk.Button(self.TNotebook1_predict) self.Clear_Results.place(relx=0.44, rely=0.55, height=24, width=87) self.Clear_Results.configure(text='''Clear Results''') self.Clear_Results.bind("<Button-1>",self.clear_results) def _init_classifiers(self): # Initialize classifier objects self.fenc = FreemanEncoder() self.knn = KNN.KNN() self.HMM = HMM.HMM() self.NaiveBayes = NaiveBayes.NaiveBayes() self.RandomForest = RandomForest.RandomForests() self.SVM = svm.SVM_SVC() self.LogisticReg = LogisticReg.LogisticReg() self.AdaBoost = adaboost.AdaBoost() self.GBRT = gbrt.GBRT() #Train initially on the default data set, if no model saved already # Initialize KNN, no saved model for KNN self.knn.knn_train(CharRecognitionGUI_support.training_dataset, 1.0) # Initialize HMM self.HMM.training(CharRecognitionGUI_support.training_dataset) # Initialize Naive Bayes try: pickle.load( open( "./Models/naivebayes_model.p", "rb" ) ) except IOError: self.NaiveBayes.training(CharRecognitionGUI_support.training_dataset) # Initialize Random Forest try: pickle.load( open( "./Models/random_forest.p", "rb" ) ) except IOError: self.RandomForest.training(CharRecognitionGUI_support.training_dataset) # Initialize SVM try: pickle.load( open( "./Models/svm.p", "rb" ) ) except IOError: self.SVM.training(CharRecognitionGUI_support.training_dataset) # Initialize Logistic Regression try: pickle.load( open( "./Models/logistic_model.p", "rb" ) ) except IOError: self.LogisticReg.training(CharRecognitionGUI_support.training_dataset) # Initialize AdaBoost try: pickle.load( open( "./Models/AdaBoostClassifier.p", "rb" ) ) except IOError: self.AdaBoost.training(CharRecognitionGUI_support.training_dataset) # Initialize GBRT try: pickle.load( open( "./Models/GradientBoostingClassifier.p", "rb" ) ) except IOError: self.GBRT.training(CharRecognitionGUI_support.training_dataset) def load_thumbnails(self, thumbnails_path): images = {} for thumb in os.listdir(thumbnails_path): thumb_name = os.path.splitext(thumb)[0] images[thumb_name] = thumbnails_path + '/' + thumb return images def quit(self, event): ''' Event function to quit the drawer window ''' sys.exit() def select(self, event): Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing self.clear("<Button-1>") image_path = askopenfilename() self.PIL_image = Image.open(image_path) self.selected_image = ImageTk.PhotoImage(self.PIL_image) self.Canvas1.create_image(150, 150, image=self.selected_image) def clear(self, event): ''' Event function to clear the drawing canvas (draw white fill) ''' self.Canvas1.delete("all") self.PIL_image = Image.new("1", (300, 300), "white") self.hidden_canvas = ImageDraw.Draw(self.PIL_image) def drag(self, event): ''' Event function to start drawing on canvas when left mouse drag happens ''' newx,newy=event.x,event.y if self.x is None: self.x,self.y=newx,newy return self.Canvas1.create_line((self.x,self.y,newx,newy), width=5, smooth=True) self.hidden_canvas.line((self.x,self.y,newx,newy), width=12) self.x,self.y=newx,newy def drag_end(self, event): ''' Event function to stop drawing on canvas when mouse drag stops ''' self.x,self.y=None,None def save(self, event): ''' Event function to save the current canvas image in JPG format ''' image_cnt = 1 if not os.path.exists(CharRecognitionGUI_support.save_dir): os.makedirs(CharRecognitionGUI_support.save_dir) file_name = CharRecognitionGUI_support.save_dir + self.TCombobox2.get() + '_' + str(image_cnt) + ".jpg" while os.path.isfile(file_name): image_cnt += 1 file_name = CharRecognitionGUI_support.save_dir + self.TCombobox2.get() + '_' + str(image_cnt) + ".jpg" self.PIL_image.save(file_name) self.freeman_textbox.delete("1.0", END) self.freeman_textbox.insert(END, 'SAVED!') self.freeman_textbox.see(END) def clear_results(self, event): self.freeman_textbox.delete("1.0", END) self._img1 = PhotoImage(file='./thumbnails/blank.gif') self.Thumbnail.configure(image=self._img1) self.CanvasHist.delete("all") def recognize(self, event): image = ~numpy.array(self.PIL_image.convert('L')) try: code = self.fenc.encode_freeman(image) # Plotting the histogram figure = plt.figure() hist = list(map(int, list(code))) plt_hist = plt.hist(hist) plt.savefig('hist.png') self.hist_im = ImageTk.PhotoImage(Image.open('hist.png').resize((280,280), Image.LANCZOS)) os.remove('hist.png') self.CanvasHist.create_image(290, 265, image=self.hist_im, anchor=SE) except ValueError: self.freeman_textbox.delete("1.0", END) self.freeman_textbox.insert(END, 'Please redraw the image') self.freeman_textbox.see(END) self.freeman_textbox.delete("1.0", END) self.freeman_textbox.insert(END, str(code)) self.freeman_textbox.see(END) if self.TCombobox1.get() == '': pass elif self.TCombobox1.get().split(" ")[0] == 'kNN': pred = self.knn.knn_predict_one(code, 1) pred_thumb = self.thumbnails[pred[0]] self._image = PhotoImage(file=pred_thumb) self.Thumbnail.configure(image=self._image) elif self.TCombobox1.get().split(" ")[0] == 'HMM': pred = self.HMM.predict(code) pred_thumb = self.thumbnails[pred[0]] self._image = PhotoImage(file=pred_thumb) self.Thumbnail.configure(image=self._image) elif self.TCombobox1.get().split(" ")[0] == 'NaiveBayes': image = numpy.array(self.PIL_image.convert('L').resize((100,100), Image.LANCZOS)) image[image < 128] = 1 image[image >= 128] = 0 image[image == 1] = 255 pred = self.NaiveBayes.predict(image) pred_thumb = self.thumbnails[pred[0]] self._image = PhotoImage(file=pred_thumb) self.Thumbnail.configure(image=self._image) elif self.TCombobox1.get().split(" ")[0] == 'RandomForest': image = numpy.array(self.PIL_image.convert('L').resize((100,100), Image.LANCZOS)) image[image < 128] = 1 image[image >= 128] = 0 image[image == 1] = 255 pred = self.RandomForest.predict(image) pred_thumb = self.thumbnails[pred[0]] self._image = PhotoImage(file=pred_thumb) self.Thumbnail.configure(image=self._image) elif self.TCombobox1.get().split(" ")[0] == 'SVM': image = numpy.array(self.PIL_image.convert('L').resize((100,100), Image.LANCZOS)) image[image < 128] = 1 image[image >= 128] = 0 image[image == 1] = 255 pred = self.SVM.predict(image) pred_thumb = self.thumbnails[pred[0]] self._image = PhotoImage(file=pred_thumb) self.Thumbnail.configure(image=self._image) elif self.TCombobox1.get().split(" ")[0] == 'LogisticReg': image = numpy.array(self.PIL_image.convert('L').resize((100,100), Image.LANCZOS)) image[image < 128] = 1 image[image >= 128] = 0 image[image == 1] = 255 pred = self.LogisticReg.predict(image) pred_thumb = self.thumbnails[pred[0]] self._image = PhotoImage(file=pred_thumb) self.Thumbnail.configure(image=self._image) elif self.TCombobox1.get().split(" ")[0] == 'AdaBoost': image = numpy.array(self.PIL_image.convert('L').resize((100,100), Image.LANCZOS)) image[image < 128] = 1 image[image >= 128] = 0 image[image == 1] = 255 pred = self.AdaBoost.predict(image) pred_thumb = self.thumbnails[pred[0]] self._image = PhotoImage(file=pred_thumb) self.Thumbnail.configure(image=self._image) elif self.TCombobox1.get().split(" ")[0] == 'GradientBoosting': image = numpy.array(self.PIL_image.convert('L').resize((100,100), Image.LANCZOS)) image[image < 128] = 1 image[image >= 128] = 0 image[image == 1] = 255 pred = self.GBRT.predict(image) pred_thumb = self.thumbnails[pred[0]] self._image = PhotoImage(file=pred_thumb) self.Thumbnail.configure(image=self._image) else: self.freeman_textbox.delete("1.0", END) self.freeman_textbox.insert(END, 'Not Implemented yet') self.freeman_textbox.see(END)
class KNN_statistic(object): ''' classdocs ''' def __init__(self): ''' Constructor ''' self.dsr = DatasetReader() self.fenc = FreemanEncoder() self.training_data = [] def generate_labelled_sequences(self, freeman_codes_dict): labelled_sequences = [] codes_list = freeman_codes_dict.items() for tup in codes_list: for code in tup[1]: labelled_sequences.append((tup[0], code)) return labelled_sequences def prepare_data(self, arrays_data=[], arrays_labels=[], split=0.2): # Separate data into 2 sets, 1 is training and 1 is test,split is the ratio (the default is 0.20) ad_train, ad_test, al_train, al_test = train_test_split( arrays_data, arrays_labels, test_size=split, random_state=42) return ad_train, ad_test, al_train, al_test def get_neighbors(self, data, data_label, test_instance, k): # Get the list of nearest neighbors to a test instance distances = [] for i in range(len(data)): dist = edit_dist(test_instance, data[i]) distances.append((data[i], data_label[i], dist)) distances.sort(key=operator.itemgetter(2)) neighbors = [] for x in range(0, k): neighbors.append([distances[x][0], distances[x][1]]) return neighbors def get_label(self, neighbors): # Determine the label of a test instance base on its nearest neighbors labels = {} for neighbor in neighbors: if neighbor[1] not in labels: labels[neighbor[1]] = 1 else: labels[neighbor[1]] += 1 sorted_labels = sorted(labels.items(), key=operator.itemgetter(1), reverse=True) return sorted_labels[0][0] def evaluation(self, data, data_for_distance_caculation, data_label, data_for_distance_calculation_label, k=3): # Evaluate the accuracy of knn correct_count = 0 for instance in range(0, len(data) - 1): neighbors = self.get_neighbors( data_for_distance_caculation, data_for_distance_calculation_label, data[instance], k) label = self.get_label(neighbors) if int(label) == int(data_label[instance]): correct_count += 1 return (float(correct_count) / len(data)) def knn_train(self, dataset_path, train_test_split=0.2): dataset = self.dsr.read_dataset_images(dataset_path) freeman_codes_dict = self.fenc.encode_freeman_dataset(dataset) _, arrays_data, arrays_label = self.dsr.gen_labelled_arrays( freeman_codes_dict) arrays_data, arrays_label = shuffle(arrays_data, arrays_label) ad_train, ad_test, al_train, al_test = self.prepare_data( arrays_data, arrays_label, split=train_test_split) # Cross validation with 5 folds kf = KFold(len(ad_train), 5) result = 0 for train_index, test_index in kf: ad_train_kfold, ad_test_kfold = ad_train[train_index], ad_train[ test_index] al_train_kfold, al_test_kfold = al_train[train_index], al_train[ test_index] result += self.evaluation(ad_test_kfold, ad_train_kfold, al_test_kfold, al_train_kfold, k=2) result_average = result / 5 # Result with the training result_training = self.evaluation(ad_train, ad_train, al_train, al_train, k=2) # Result with the test result_test = self.evaluation(ad_test, ad_train, al_test, al_train, k=2) return result_average, result_training, result_test # knn = KNN_strings(n_neighbors=1) # knn = KNN_statistic() # results = [] # for x in range(50): # result_average, result_training, result_test = knn.knn_train("/home/thovo/PycharmProjects/CharacterRecognition/digits_dataset", 0.2) # text = result_average.__str__() + " , " + result_training.__str__() + " , " + result_test.__str__() + "\n" # results.append(text) # # # f = open("Results/knn.txt", "w") # for item in results: # f.write(item) # # f.close()
class KNN(object): ''' classdocs ''' def __init__(self): ''' Constructor ''' self.dsr = DatasetReader() self.fenc = FreemanEncoder() self.training_data = [] def generate_labelled_sequences(self, freeman_codes_dict): labelled_sequences = [] codes_list = freeman_codes_dict.items() for tup in codes_list: for code in tup[1]: labelled_sequences.append((tup[0], code)) return labelled_sequences def prepare_data(self, datas, training=[], test=[], split=0.80): # Separate data into 2 sets, 1 is training and 1 is test,split is the ratio (the default is 0.70) for data in range(len(datas) - 1): if random.random() < split: training.append(datas[data]) else: test.append(datas[data]) def get_neighbors(self, training, test_instance, k): # Get the list of nearest neighbors to a test instance distances = [] for i in range(len(training) - 1): dist = edit_dist(test_instance, training[i][1]) distances.append((training[i], dist)) distances.sort(key=operator.itemgetter(1)) neighbors = [] for x in range(0, k): neighbors.append(distances[x][0]) return neighbors def get_label(self, neighbors): # Determine the label of a test instance base on its nearest neighbors max = 0 labels = {} for neighbor in neighbors: if neighbor[0] not in labels: labels[neighbor[0]] = 1 else: labels[neighbor[0]] += 1 sorted_labels = sorted(labels.items(), key=operator.itemgetter(1), reverse=True) return sorted_labels[0][0] def evaluation(self, training, test): # Evaluate the accuracy of knn correct_count = 0 # k = int(math.ceil(len(training)/10)) k = 1 for test_data in test: neighbors = self.get_neighbors(training, test_data[1], k) label = self.get_label(neighbors) if int(label) == int(test_data[0]): correct_count += 1 print(float(correct_count) / len(test)) * 100 def knn_train(self, dataset_path, train_test_split=0.8): dataset = self.dsr.read_dataset_images(dataset_path) freeman_codes_dict = self.fenc.encode_freeman_dataset(dataset) labelled_sequences = self.generate_labelled_sequences( freeman_codes_dict) training = [] test = [] # print labelled_sequences self.prepare_data(labelled_sequences, training, test, split=train_test_split) self.training_data = training if train_test_split != 1.0: print "Training:" + len(training).__str__() print "Test:" + len(test).__str__() self.evaluation(training, test) def knn_predict_one(self, image, k=1): if os.path.isfile(image): image_array = self.dsr.read_img_bw(image) test = self.fenc.encode_freeman(image_array) else: test = image # Try to find the nearest neighbors of the first sequences in training neighbors = self.get_neighbors(self.training_data, test, k) label = self.get_label(neighbors) return label
class KNN_strings(object): ''' classdocs ''' def __init__(self, n_neighbors=1): ''' Constructor ''' self.dsr = DatasetReader() self.fenc = FreemanEncoder() self.data = [] self.knn = KNeighborsClassifier(n_neighbors=n_neighbors, algorithm='auto', metric=self.lev_metric) def lev_metric(self, x, y): i, j = int(x[0]), int(y[0]) # extract indices # if self.data[i] == self.data[j]: # print self.data[i], self.data[j], edit_dist(self.data[i], self.data[j]) return edit_dist(self.data[i], self.data[j]) def knn_train(self, dataset, cv=1, datasplit=0.7): images_dataset = self.dsr.read_dataset_images(dataset) freeman_code_dict = self.fenc.encode_freeman_dataset(images_dataset) _, codes, labels = self.dsr.gen_labelled_arrays(freeman_code_dict) self.data = codes X = np.arange(len(self.data)).reshape(-1, 1) if cv <= 1: self.knn.fit(X, labels) elif cv > 1: cv_result = cross_validation.cross_val_score(self.knn, X, labels, cv=cv) print cv_result print 'Training Done!' def knn_predict(self, test_data, score=False): images_dataset = self.dsr.read_dataset_images(test_data) freeman_code_dict = self.fenc.encode_freeman_dataset(images_dataset) _, codes, labels = self.dsr.gen_labelled_arrays(freeman_code_dict) X_pred = np.arange(len(codes)).reshape(-1, 1) predictions = self.knn.predict(X_pred) if score == True: accuracy = self.knn.score(X_pred, labels) print "Test Accuracy: ", accuracy return predictions def knn_predict_one(self, test_image): image_code = self.fenc.encode_freeman(test_image) print image_code data = [image_code] X_pred = np.arange(len(data)).reshape(-1, 1) prediction = self.knn.predict(X_pred) return prediction