class ImageScore: def __init__(self, input_dim=(3, 32, 32)): self.image_size = input_dim[1:] self.net = DeepConvNet(input_dim) self.net.load_params('./neural_net/network_params.pkl') def _predict(self, file_path): x = im2mat(file_path, self.image_size) y = self.net.predict(np.expand_dims(x, 0)) y = np.squeeze(y) y = softmax(y) return y def predict_score(self, file_path): y = self._predict(file_path) score = 50 for i, yi in enumerate(y): score += yi * (i + 1) * (50 / 5) return round(score, 3)
network.load_params("deep_convnet_params.pkl") print("calculating test accuracy ... ") #sampled = 1000 #x_test = x_test[:sampled] #t_test = t_test[:sampled] classified_ids = [] acc = 0.0 batch_size = 100 for i in range(int(x_test.shape[0] / batch_size)): tx = x_test[i * batch_size:(i + 1) * batch_size] tt = t_test[i * batch_size:(i + 1) * batch_size] y = network.predict(tx, train_flg=False) y = np.argmax(y, axis=1) classified_ids.append(y) acc += np.sum(y == tt) acc = acc / x_test.shape[0] print("test accuracy:" + str(acc)) classified_ids = np.array(classified_ids) classified_ids = classified_ids.flatten() max_view = 20 current_view = 1 fig = plt.figure() fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.2, wspace=0.2)
# coding: utf-8 import sys, os sys.path.append(os.pardir) # 親ディレクトリのファイルをインポートするための設定 import numpy as np from deep_convnet import DeepConvNet from PIL import Image from common.functions import softmax size = 28 test = np.zeros((1,1,size,size)) img = Image.open("../dataset/test.png") img = img.resize((size,size)) img_rgb = img.convert('RGB') for x in range(size): for y in range(size): r,g,b = img_rgb.getpixel((x,y)) test[0,0,y,x] = g network = DeepConvNet() network.load_params("deep_convnet_params.pkl") print(np.argmax(softmax(network.predict(test))))
# cmap='gray'でグレースケールで表示 # plt.imshow(img_data.reshape(28, 28), cmap='gray') # plt.savefig("/Users/ryuto/works/judge-num/mnist_edited_numbers/edited_" + filename.replace(".png", "") + ".png") img_test = np.r_[img_test, img_data.reshape(1, -1)] # 画像データの正解を配列にしておく img_ans = [] for filename in filenames: img_ans += [int(filename[:1])] img_ans = np.array(img_ans) img_test = img_test.reshape(-1, 1, 28, 28) pred = network.predict(img_test) pred_label = np.argmax(pred, axis=1) pred_score = list(map(lambda x: round(x, 2), np.max(softmax(pred), axis=1))) # 結果の出力 print("判定結果") print("観測:", img_ans) print("予測:", pred_label) # print("信頼度:", pred_score) print("正答率:", np.sum(pred_label == img_ans) / (img_test.shape[0])) img_advs = [] for i, x in enumerate(img_test):
def recognizeVideo(self): network = DeepConvNet() network.load_params("deep_convnet_params.pkl") # create button instance button = Button('QUIT', 0, 0, 100, 30) # 전처리 과정1: Grayscale -> Erosion -> Resize -> Binary # 전처리 과정2 # Grayscale -> Morph Gradient -> Adaptive Threshold -> Morph Close -> HoughLinesP capture = cv2.VideoCapture(0) capture.set(3, 640) capture.set(4, 480) print('image width %d' % capture.get(3)) print('image height %d' % capture.get(4)) while (1): ret, frame = capture.read() # val1 rectangle cv2.rectangle(frame, (40, 140), (40 + 250, 140 + 80), (0, 0, 255), 3) va1 = frame[140:140 + 80, 140:40 + 250] # val2 rectangle cv2.rectangle(frame, (390, 140), (390 + 250, 140 + 80), (0, 0, 255), 3) #tesserocrFrame = self.doTesserectOCR(frame) # 1. GRAY Image로 변경 grayFrame = cv2.cvtColor(va1, cv2.COLOR_BGR2GRAY) # 2. Erosion kernel = np.ones((5, 5), np.uint8) erosion = cv2.erode(grayFrame, kernel, iterations=1) # 3. Morph Gradient : 경계 이미지 추출 kernel = np.ones((5, 5), np.uint8) gradient = cv2.morphologyEx(erosion, cv2.MORPH_GRADIENT, kernel) # 4. Adaptive Threshold : 잡영 제거 thresh = cv2.adaptiveThreshold(gradient, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 15, 8) # 5. Morph Close : 작은 구멍을 메우고 경계를 강화 kernel = np.ones((10, 10), np.uint8) closing = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) # 5. Long Line Remove(HoughLinesP() 사용) : 글씨 추출에 방해 되는 요소 제거 threshold = 100 # 선 추출 정확도 minLength = 80 # 추출할 선의 길이 lineGap = 5 # 5픽셀 이내로 겹치는 선은 제외 rho = 1 lines = cv2.HoughLinesP(closing, rho, np.pi / 180, threshold, minLength, lineGap) limit = 10 if lines is not None: for line in lines: gapY = np.abs(line[0][3] - line[0][1]) gapX = np.abs(line[0][2] - line[0][0]) if gapY > limit and limit > 0: # remove line cv2.line(closing, (line[0][0], line[0][1]), (line[0][2], line[0][3]), (0, 0, 0), 10) # 6. Contour 추출 # contours는 point의 list형태. 예제에서는 사각형이 하나의 contours line을 구성하기 때문에 len(contours) = 1. 값은 사각형의 꼭지점 좌표. # hierachy는 contours line의 계층 구조 contourFrame, contours, hierachy = cv2.findContours( closing, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) padding = 5 if len(contours) > 0: for contour in contours: x, y, w, h = cv2.boundingRect(contour) #print(w, h) #cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 5) if h > 30 and w > 10: cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 5) self.boxes.append([ cv2.boundingRect(contour), cv2.resize( thresh[y:y + h + padding, x:x + w + padding], (28, 28)) ]) ##Buble Sort on python for i in range(len(self.boxes)): for j in range(len(self.boxes) - (i + 1)): # x 값으로 비교 if self.boxes[j][0][0] > self.boxes[j + 1][0][0]: temp = self.boxes[j] self.boxes[j] = self.boxes[j + 1] self.boxes[j + 1] = temp # show boxes... for box in self.boxes: npbox = np.array([[box[1]]]) y = network.predict(npbox) #print(np.argmax(y, axis=1)) self.cnnStr.append(np.argmax(y, axis=1)) cv2.putText( frame, str(np.argmax(y, axis=1)), (box[0][0] + box[0][2] % 2, box[0][1] + box[0][3] * 2), # text 출력 위치 cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) print(self.cnnStr) # add button to frame calculation = button.draw(frame) if len(self.cnnStr) >= 2: cv2.putText( frame, str( eval( str(self.cnnStr[0][0]) + calculation + str(self.cnnStr[1][0]))), (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) cv2.imshow('VideoCalculator', frame) # assign mouse click to method in button instance cv2.setMouseCallback("VideoCalculator", button.handle_event) self.boxes = [] self.cnnStr = [] if cv2.waitKey(1) & 0xFF == ord('q'): break capture.release() cv2.destroyAllWindows()
sampled = 10000 # 고속화를 위한 표본추출 x_test = x_test[:sampled] t_test = t_test[:sampled] #print("caluculate accuracy (float64) ... ") #print(network.accuracy(x_test, t_test)) print("predict (float64) ... ") print(x_test[0][0][0][0]) #print(network.predict(x_train[0])) original = cv2.imread('images/digits3.jpg', cv2.IMREAD_COLOR) gray = cv2.imread('image_result/gray.jpg', cv2.IMREAD_COLOR) print(original.shape) print(gray.shape) arr = np.array([original,gray]) print(arr.shape) print(network.predict(arr)) # float16(반정밀도)로 형변환 #x_test = x_test.astype(np.float16) #for param in network.params.values(): # param[...] = param.astype(np.float16) #print("caluculate accuracy (float16) ... ") #print(network.accuracy(x_test, t_test))
def OrganizeImage(self, img_src): img = cv2.imread(img_src, cv2.IMREAD_COLOR) # convert to gray image convert_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #cv2.imwrite('image_result/gray.jpg', convert_img) # 노이즈 제거를 위한 스무딩 작업 blur = cv2.GaussianBlur(convert_img, (1, 1), 0) #cv2.imwrite('image_result/blur.jpg',blur) canny_img = cv2.Canny(blur, 0, 255) #cv2.imwrite('image_result/canny.jpg',canny_img) #edge 검출 알고리즘 적용 cnts, contours, hierarchy = cv2.findContours(canny_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) box1 = [] box2 = [] # 이미지를 다듬는 전처리 과정 나누기 for i in range(len(contours)): cnt = contours[i] area = cv2.contourArea(cnt) x, y, w, h = cv2.boundingRect(cnt) rect_area = w * h #area size aspect_ratio = float(w) / h # ratio = width/height if (aspect_ratio >= 2.0) and (rect_area >= 1000): continue elif (x == 0) or (y >= 900): continue else: #print(img[y:y+h, x:x+w]) box2.append(cv2.resize(convert_img[y:y + h, x:x + w], (28, 28))) #print(box2) #print(img[y:y+h, x:x+w]) #cv2.imshow('adf', img[y:y+h, x:x+w]) #cv2.waitKey(0) box1.append(cv2.boundingRect(cnt)) for i in range(len(box1)): ##Buble Sort on python for j in range(len(box1) - (i + 1)): if box1[j][0] > box1[j + 1][0]: temp = box1[j] temp2 = box2[j] box1[j] = box1[j + 1] box2[j] = box2[j + 1] box1[j + 1] = temp box2[j + 1] = temp2 #to find number measuring length between rectangles for m in range(len(box1)): count = 0 for n in range(m + 1, (len(box1) - 1)): delta_x = abs(box1[n + 1][0] - box1[m][0]) if delta_x > 150: break delta_y = abs(box1[n + 1][1] - box1[m][1]) if delta_x == 0: delta_x = 1 if delta_y == 0: delta_y = 1 gradient = float(delta_y) / float(delta_x) if gradient < 0.25: count += 1 cv2.imwrite('image_result/snake.jpg', img) #가장 자리 처리 완료 network = DeepConvNet() network.load_params("deep_convnet_params.pkl") #print(box1) nine = cv2.imread('images/9.jpg', cv2.IMREAD_GRAYSCALE) nine = cv2.resize(nine, (28, 28)) #print(nine.shape) #cv2.imwrite('image_result/nine.jpg', nine) nine = np.array([[nine]]) #lalal = np.array(box2) #print(ndarr.shape) #print(nine.shape) for box in box2: #print(box) #print(box.shape) npbox = np.array([[box]]) y = network.predict(npbox) print(np.argmax(y, axis=1)) cv2.imshow('adf', box) cv2.waitKey(0) #우리 프로젝트의 경우 적정 길이는 아직 미정 ret, number_th = cv2.threshold(convert_img, 100, 255, cv2.THRESH_BINARY) cv2.imwrite('image_result/number_th.jpg', number_th) kernel = np.ones((3, 3)) er_number = cv2.erode(number_th, kernel, iterations=2) cv2.imwrite('image_result/er_number.jpg', er_number) return (Image.open('image_result/er_number.jpg'))
sys.path.append(os.pardir) # 親ディレクトリのファイルをインポートするための設定 import numpy as np import matplotlib.pyplot as plt from dataset.mnist import load_mnist from common.layers import * from deep_convnet import DeepConvNet import random from PIL import Image import cv2 net = DeepConvNet() net.load_params("deep_convnet_params.pkl") (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) # img = cv2.imread("tmp.jpg") # img = cv2.resize(img, (28,28)) # img = img.astype(np.float) # img /= 255 # img = np.array(img).reshape(1,1,28,28) print(random.choice(t_test)) img = random.choice(x_test) img = np.reshape(img, (1, 1, 28, 28)) # img = np.array(img) # cv2.imshow('image', img) plt.imshow(img) num = net.predict(img) print(np.argmax(num.data)) cv2.waitKey(0) cv2.destroyAllWindows()
bright = 255 - crop.mean()**2 / 255 img_data256 = np.append(img_data256, bright) img_data = img_data256 / 255 # 加工した画像データをmnist_edited_imagesに出力する # cmap='gray'でグレースケールで表示 # plt.imshow(img_data.reshape(28, 28), cmap='gray') # plt.savefig("/Users/ryuto/works/judge-num/mnist_edited_numbers/edited_" + filename.replace(".png", "") + ".png") img_test = np.r_[img_test, img_data.reshape(1, -1)] # 画像データの正解を配列にしておく img_ans = [] for filename in filenames: img_ans += [int(filename[:1])] img_ans = np.array(img_ans) img_test = img_test.reshape(-1, 1, 28, 28) pred = network.predict(img_test) pred = np.argmax(pred, axis=1) # 結果の出力 print("判定結果") print("観測:", img_ans) print("予測:", pred) print("正答率:", np.sum(pred == img_ans) / (img_test.shape[0]))
network.load_params("deep_convnet_params.pkl") print("calculating test accuracy ... ") #sampled = 1000 #x_test = x_test[:sampled] #t_test = t_test[:sampled] classified_ids = [] acc = 0.0 batch_size = 100 for i in range(int(x_test.shape[0] / batch_size)): tx = x_test[i*batch_size:(i+1)*batch_size] tt = t_test[i*batch_size:(i+1)*batch_size] y = network.predict(tx, train_flg=False) y = np.argmax(y, axis=1) classified_ids.append(y) acc += np.sum(y == tt) acc = acc / x_test.shape[0] print("test accuracy:" + str(acc)) classified_ids = np.array(classified_ids) classified_ids = classified_ids.flatten() max_view = 20 current_view = 1 fig = plt.figure() fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.2, wspace=0.2)