def __init__(self): self.image = np.empty((28, 28, 1)) self.detectLP = detectNumberPlate() self.recogChar = CNN_Model(trainable=False).model self.recogChar.load_weights('./weights/weight.h5') self.candidates = [] self.preLpCnt = None
def __init__(self, max_experience_buffer_len=120, param_dict={}, restore_params=False, pickle_file_path=""): CNN_Model.__init__(self, param_dict, restore_params, pickle_file_path) self.max_experience_buffer_len = max_experience_buffer_len
def train(): from configuration import get_config config = get_config() cnn = CNN_Model(**config) code2idx, idx2code = get_top_50_code(config['top_50_code_file']) ### 1. word2idx, idx2word, embed_mat = load_embedding( config['embed_file']) ### 2. embed_mat = torch.FloatTensor(embed_mat) embedding = nn.Embedding.from_pretrained(embed_mat) trainData = CSV_Data_Reader(batch_size = config['batch_size'], filename = config['train_file'], max_length = config['max_length'], \ code2idx = code2idx, word2idx = word2idx) opt_ = torch.optim.SGD(cnn.parameters(), lr=config['learning_rate']) for i in range(1000): batch_embed, batch_label = trainData.next(embedding) loss, _, __ = cnn(batch_embed, batch_label) opt_.zero_grad() loss.backward() opt_.step() loss_value = loss.data[0] print('iteration {}, loss value: {}'.format(i, loss_value)) print('============begin testing===========') testData = CSV_Data_Reader(batch_size = config['batch_size'], filename = config['test_file'], max_length = config['max_length'], \ code2idx = code2idx, word2idx = word2idx) test_num = config['test_size'] for i in range(int(np.ceil(test_num / config['batch_size']))): batch_embed, batch_label = testData.next(embedding) _, y_pred, _ = cnn(batch_embed, batch_label) Y_pred = np.concatenate([Y_pred, y_pred.data.numpy()], 0) if i > 0 else y_pred.data.numpy() Batch_label = np.concatenate([Batch_label, batch_label], 0) if i > 0 else batch_label Y_pred = Y_pred > 0.5 all_macro(Y_pred, Batch_label)
def produce_model(lr): # lr is learning rate # Simply a function to get the model, loss function and optimizer. # Honestly, the function isn't even necessary, this can just be done right before the training loop. model = CNN_Model(instance_train_data.shape[1]) loss_function = torch.nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) # Notice how the model parameters are passed to the optimizer, will explain later return model, loss_function, optimizer
def get_answers(list_answers): results = defaultdict(list) model = CNN_Model('weight.h5').build_model(rt=True) list_answers = np.array(list_answers) scores = model.predict_on_batch(list_answers / 255.0) for idx, score in enumerate(scores): question = idx // 4 # score [unchoiced_cf, choiced_cf] if score[1] > 0.9: # choiced confidence score > 0.9 chosed_answer = map_answer(idx) results[question + 1].append(chosed_answer) return results
def predict(dir_path, data): # model = load_model(model_name) # model_aug = load_model('64753.h5') # pred_list = model.predict(data) # pred_list = pred_list + model_aug.predict(data) # pred = np.argmax(pred_list , axis = 1) # file_list = glob.glob(os.path.join(dir_path, '*.h5')) # print(file_list) # file_list.sort() # input() model = CNN_Model() model.load_weights('./model_best_62998.h5') preds = model.predict(data) # for i in range(1, 10): # print(i) # modelname = 'model_'+str(i)+'.h5' # model = load_model(modelname) # preds += model.predict(data) pred = np.argmax(preds, axis=1) return pred
print('Build Graph') from model import CNN_Model graph_cnn = tf.Graph() #Create models for training and testing data with graph_cnn.as_default(): initializer = tf.random_uniform_initializer(-0.02, 0.02) with tf.name_scope('train'): train_data = tf.placeholder( tf.int32, [trainConfig.batch_size, MAX_DOCUMENT_LENGTH]) train_label = tf.placeholder(tf.int32, [trainConfig.batch_size]) train_lengths = tf.placeholder(tf.int32, [trainConfig.batch_size]) #Set different models for different buckets with tf.variable_scope("Model", reuse=None, initializer=initializer): train_model = CNN_Model(trainConfig, train_data, train_label, train_lengths) saver = tf.train.Saver() with tf.name_scope('test'): test_data = tf.placeholder(tf.int32, [testConfig.batch_size, None]) test_label = tf.placeholder(tf.int32, [testConfig.batch_size]) test_lengths = tf.placeholder(tf.int32, [testConfig.batch_size]) single_data = tf.placeholder(tf.int32, [singleConfig.batch_size, None]) single_label = tf.placeholder(tf.int32, [singleConfig.batch_size]) single_lengths = tf.placeholder(tf.int32, [singleConfig.batch_size]) #Set different models for different buckets with tf.variable_scope("Model", reuse=True, initializer=initializer): test_model = CNN_Model(testConfig, test_data, test_label,
test_image = tf.reshape(test_image_decode, [IMAGE_WIDTH, IMAGE_HEIGHT, 1]) # make batch file image_batch, label_batch, test_batch_x = tf.train.shuffle_batch( [image, label_decoded, test_image], batch_size=BATCH_SIZE, num_threads=4, capacity=50000, min_after_dequeue=10000) # start session with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) models = CNN_Model(sess, "cnn_model") sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(TB_SUMMARY_DIR) writer.add_graph(sess.graph) global_step = 0 # saver = tf.train.Saver() print("Learning Start") # train model for epoch in range(10): avg_cost = 0 total_batch = int(25000 / BATCH_SIZE) print(total_batch) avg_accuracy = 0
train_set = pickle.load(open('data/train_%s_set'%(Args.task), 'rb')) else: train_set = SemEval_DataSet('train', 'oc', save=True) pickle.dump(train_set, open('data/train_%s_set'%(Args.task), 'wb+')) if os.path.exists('data/valid_%s_set'%(Args.task)): valid_set = pickle.load(open('data/valid_%s_set'%(Args.task), 'rb')) else: valid_set = SemEval_DataSet('dev', 'oc', wordict=train_set.wordict) pickle.dump(valid_set, open('data/valid_%s_set'%(Args.task), 'wb+')) if os.path.exists('data/test_%s_set'%(Args.task)): valid_set = pickle.load(open('data/test_%s_set'%(Args.task), 'rb')) else: valid_set = SemEval_DataSet('test', 'oc', wordict=train_set.wordict) pickle.dump(valid_set, open('data/test_%s_set'%(Args.task), 'wb+')) print('Wordict size: %d'%(len(train_set.wordict))) if Args.phase == 'train': # Training model_generator = lambda wordict_size, weight: \ CNN_Model(wordict_size, Args.emb_len, Args.max_len, weight) def collate_fn(entry): return entry[0], entry[1].long().unsqueeze(1) trainer = Trainer(model_generator, train_set, valid_set, max_epoch=Args.epoch, use_cuda=True, collate_fn=collate_fn, use_tensorboard=Args.tensorboard, save_best_model=Args.save) trainer.train() else: # Testing pass else: parser.print_help()
vocab_token_to_id, vocab_id_to_token = build_vocabulary( train_instances, VOCAB_SIZE, glove_common_words) vocab_size = len(np.unique(vocab_token_to_id.keys())[0]) train_instances = index_instances(train_instances, vocab_token_to_id) val_instances = index_instances(val_instances, vocab_token_to_id) # make a config file here as expected by your CNN_Model config = { 'vocab_size': vocab_size, 'embed_dim': args.embed_dim, 'training': True } model = CNN_Model(**config) config['type'] = 'advanced' optimizer = optimizers.Adam() embeddings = load_glove_embeddings(args.embed_file, args.embed_dim, vocab_id_to_token) model.embeddings.assign(tf.convert_to_tensor(embeddings)) save_serialization_dir = os.path.join('serialization_dirs', 'advanced') if not os.path.exists(save_serialization_dir): os.makedirs(save_serialization_dir) train_output = train(model, optimizer, train_instances, val_instances, args.epochs, args.batch_size, save_serialization_dir)
import argparse import torch import torch.nn as NN import torch.nn.functional as F import torch.optim as optim from torch.utils.data import Dataset, DataLoader from torchvision import datasets, transforms import time from model import CNN_Model Net = CNN_Model() model_set = [] a = [] for worker_id in range(5): model_set.append(Net) a.append(CNN_Model()) print(model_set[0] is model_set[1]) print(a[0] is a[1])
class E2E(object): def __init__(self): self.image = np.empty((28, 28, 1)) self.detectLP = detectNumberPlate() self.recogChar = CNN_Model(trainable=False).model self.recogChar.load_weights('./weights/weight.h5') self.candidates = [] self.preLpCnt = None def extractLP(self): coordinates = self.detectLP.detect(self.image) if len(coordinates) == 0: ValueError('No images detected') for coordinate in coordinates: yield coordinate def predict(self, image): # Input image or frame self.image = image for coordinate in self.extractLP(): # detect license plate by yolov3 self.candidates = [] x_min, y_min, width, height = coordinate LpRegion = self.image[y_min:y_min + height, x_min:x_min + width] # segmentation self.segmentation(LpRegion) # recognize characters self.recognizeChar() # format and display license plate license_plate = self.format() if len(license_plate) < 8: continue # draw labels self.image = draw_labels_and_boxes(self.image, license_plate, coordinate) # cv2.imwrite('example.png', self.image) return self.image def check_four_corners(self, rec): topLeft = topRight = bottomLeft = bottomRight = 0 w, h = rec.shape[:2] for corner in rec: if corner[0] < w / 2 and corner[1] < h / 2: topLeft += 1 elif corner[0] > w / 2 and corner[1] < h / 2: topRight += 1 elif corner[0] < w / 2 and corner[1] > h / 2: bottomLeft += 1 else: bottomRight += 1 return topLeft == topRight == bottomLeft == bottomRight == 1 def clean_border(self, LpRegion): LpRegion = imutils.resize(LpRegion, width=400) lab = cv2.cvtColor(LpRegion, cv2.COLOR_BGR2LAB) lab_planes = cv2.split(lab) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4, 4)) lab_planes[0] = clahe.apply(lab_planes[0]) lab = cv2.merge(lab_planes) LpRegion = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR) gray = cv2.cvtColor(LpRegion, cv2.COLOR_BGR2GRAY) # clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) # gray = clahe.apply(gray) blur1 = cv2.GaussianBlur(gray, (11, 11), cv2.BORDER_CONSTANT) blur2 = cv2.GaussianBlur(gray, (25, 25), cv2.BORDER_CONSTANT) difference = blur2 - blur1 # _, difference = cv2.threshold(difference, 127, 255, 0) difference = clear_border(difference) difference = cv2.adaptiveThreshold(difference, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 9, 9) # cv2.imshow("gray", difference) # edged = cv2.Canny(gray, 50, 125) # cv2.imshow("edged", edged) cnts = cv2.findContours(difference.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnts = imutils.grab_contours(cnts) cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[1:3] # print(cnts) lpCnt = None for c in cnts: peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP( c, 0.05 * peri, True) # TODO: Playing around with this precision value if len(approx) == 4: lpCnt = approx self.preLpCnt = lpCnt break if lpCnt is not None and self.check_four_corners(self.preLpCnt): return LpRegion # cv2.drawContours(LpRegion, [self.preLpCnt], -1, (255, 255, 0), 3) # cv2.imshow("ROI", LpRegion) # cv2.waitKey(0) def segmentation(self, LpRegion): LpRegion = self.clean_border(LpRegion) # cv2.imshow("edge", edged) V = cv2.split(cv2.cvtColor(LpRegion, cv2.COLOR_BGR2HSV))[2] # adaptive threshold T = threshold_local(V, 15, offset=10, method="gaussian") thresh = (V > T).astype("uint8") * 255 # convert black pixel of digits to white pixel thresh = cv2.bitwise_not(thresh) thresh = imutils.resize(thresh, width=400) thresh = clear_border(thresh) # cv2.imwrite("step2_2.png", thresh) cv2.imshow("thresh", thresh) cv2.waitKey(0) cv2.destroyAllWindows() # try: # lines = cv2.HoughLinesP(image=thresh,rho=1,theta=np.pi/180, threshold=200,lines=np.array([]), minLineLength=200,maxLineGap=20) # angle = 0 # num = 0 # thresh = cv2.cvtColor(thresh, cv2.COLOR_GRAY2BGR) # for line in lines: # my_degree = math.degrees(math.atan2(line[0][3]-line[0][1], line[0][2]-line[0][0])) # if -45 < my_degree < 45: # angle += my_degree # num += 1 # cv2.line(thresh, (line[0][0], line[0][1]), (line[0][2], line[0][3]), (255, 0, 0)) # angle /= num # cv2.imshow("draw", thresh) # cv2.waitKey(0) # cv2.destroyAllWindows() # # cv2.imwrite("draw.png", thresh) # # Rotate image to deskew # (h, w) = thresh.shape[:2] # center = (w // 2, h // 2) # M = cv2.getRotationMatrix2D(center, angle, 1.0) # thresh = cv2.warpAffine(thresh, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) # except: # pass # edges = cv2.Canny(thresh,100,200) # thresh = cv2.medianBlur(thresh, 5) # cv2.imshow("thresh", edges) # cv2.waitKey(0) # cv2.destroyAllWindows() # cv2.imwrite("thresh.png", thresh) # connected components analysis labels = measure.label(thresh, connectivity=2, background=0) # loop over the unique components for label in np.unique(labels): # if this is background label, ignore it if label == 0: continue # init mask to store the location of the character candidates mask = np.zeros(thresh.shape, dtype="uint8") mask[labels == label] = 255 # find contours from mask contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if len(contours) > 0: contour = max(contours, key=cv2.contourArea) (x, y, w, h) = cv2.boundingRect(contour) # rule to determine characters aspectRatio = w / float(h) solidity = cv2.contourArea(contour) / float(w * h) heightRatio = h / float(LpRegion.shape[0]) if h * w > MIN_PIXEL_AREA and 0.25 < aspectRatio < 1.0 and solidity > 0.2 and 0.35 < heightRatio < 2.0: # extract characters candidate = np.array(mask[y:y + h, x:x + w]) square_candidate = convert2Square(candidate) square_candidate = cv2.resize(square_candidate, (28, 28), cv2.INTER_AREA) # cv2.imwrite('./characters/' + str(y) + "_" + str(x) + ".png", cv2.resize(square_candidate, (56, 56), cv2.INTER_AREA)) square_candidate = square_candidate.reshape((28, 28, 1)) # cv2.imshow("square_candidate", square_candidate) # cv2.waitKey(0) # cv2.destroyAllWindows() self.candidates.append((square_candidate, (y, x))) def recognizeChar(self): characters = [] coordinates = [] for char, coordinate in self.candidates: characters.append(char) coordinates.append(coordinate) characters = np.array(characters) result = self.recogChar.predict_on_batch(characters) result_idx = np.argmax(result, axis=1) self.candidates = [] for i in range(len(result_idx)): if result_idx[i] == 31: # if is background or noise, ignore it continue self.candidates.append((ALPHA_DICT[result_idx[i]], coordinates[i])) def format(self): first_line = [] second_line = [] for candidate, coordinate in self.candidates: if self.candidates[0][1][0] + 40 > coordinate[0]: first_line.append((candidate, coordinate[1])) else: second_line.append((candidate, coordinate[1])) def take_second(s): return s[1] first_line = sorted(first_line, key=take_second) second_line = sorted(second_line, key=take_second) if len(second_line) == 0: # if license plate has 1 line license_plate = "".join([str(ele[0]) for ele in first_line]) else: # if license plate has 2 lines license_plate = "".join([str(ele[0]) for ele in first_line]) + "-" + "".join( [str(ele[0]) for ele in second_line]) return license_plate
from Game import * from setup import * from model import CNN_Model from conf import * from train import trainNetwork import torch if __name__ == "__main__": # choosing cpu/cuda device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print("Device:",device) # open chome to set up a game env game = Game() # set the agent with different and intilize/start the game dino = DinoAgent(game) game_state = Game_sate(dino,game) model = CNN_Model() # return resnet18 #model.achitect_summary((4, 80, 80)) try: process = trainNetwork(model.to(device),game_state,observe=False,device=device) # train function process.start() except StopIteration: game.end()
def __get_data__(): _, fr = rgb.read() gray = cv2.cvtColor(fr, cv2.COLOR_BGR2GRAY) faces = facec.detectMultiScale(gray, 1.3, 5) return faces, fr, gray def start_app(cnn): skip_frame = 10 data = [] flag = False ix = 0 while True: ix += 1 faces, fr, gray_fr = __get_data__() for (x, y, w, h) in faces: fc = gray_fr[y:y+h, x:x+w] roi = cv2.resize(fc, (48, 48)) ac = 'acc' res,acc = cnn.predict_result(roi[np.newaxis, :, :, np.newaxis]) cv2.putText(fr, res + ', ' + acc + '%' , (x, y), font, 1, (255, 255, 0), 2) cv2.rectangle(fr,(x,y),(x+w,y+h),(255,0,0),2) if cv2.waitKey(1) == 27: break cv2.imshow('Filter', fr) cv2.destroyAllWindows() if __name__ == '__main__': model = CNN_Model("face_model.json", "face_model.h5") start_app(model)
def model_generator(wordict_size, weight): model = CNN_Model(wordict_size, 100, 100, 'reg', weight) model.build_model(lr=5e-5) return model
def model_generator(wordict_size, weight): model = CNN_Model(wordict_size, 100, 100, 'oc', weight) model.build_model() return model
X_train = X_train.reshape(-1, 48, 48, 1) #### rescale X_train = X_train / 255. print('X_train shape : ', X_train.shape) print('Y_train shape : ', Y_train.shape) #### convert class vectors to binary class matrices (one hot encoding vector) Y_train = np_utils.to_categorical(Y_train, 7) #### build model for i in range(1): print('No.' + str(i)) SaveModel_name = 'model_best_62998.h5' model = CNN_Model() #model = fit(model , X_train , Y_train , epochs = 50 , val_split = 0.2) model = fit(model, X_train, Y_train, epochs=400, d_set=D_SET, remander_value=i, SaveModel_name=SaveModel_name) # plt.clf() # plt.plot(model.history.history['acc']) # plt.plot(model.history.history['val_acc']) # plt.title('Training Process_CNN') # plt.ylabel('accuracy') # plt.xlabel('epoch') # plt.legend(['acc', 'val_acc'], loc='upper left')
class E2E(object): def __init__(self): self.image = np.empty((28, 28, 1)) self.detectLP = detectNumberPlate() self.recogChar = CNN_Model(trainable=False).model self.recogChar.load_weights('./weight.h5') self.candidates = [] def extractLP(self): coordinates = self.detectLP.detect(self.image) if len(coordinates) == 0: ValueError('No images detected') for coordinate in coordinates: yield coordinate def segmentation(self, LpRegion): # apply thresh to extracted licences plate V = cv2.split(cv2.cvtColor(LpRegion, cv2.COLOR_BGR2HSV))[2] T = threshold_local(V, 15, offset=10, method="gaussian") thresh = (V > T).astype("uint8") * 255 # convert black pixel of digits to white pixel thresh = cv2.bitwise_not(thresh) thresh = imutils.resize(thresh, width=400) thresh = cv2.medianBlur(thresh, 5) # connected components analysis labels = measure.label(thresh, neighbors=8, background=0) # loop over the unique components for label in np.unique(labels): # if this is background label, ignore it if label == 0: continue # init mask to store the location of the character candidates mask = np.zeros(thresh.shape, dtype="uint8") mask[labels == label] = 255 # find contours from mask contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if len(contours) > 0: contour = max(contours, key=cv2.contourArea) (x, y, w, h) = cv2.boundingRect(contour) # rule to determine characters aspectRatio = w / float(h) solidity = cv2.contourArea(contour) / float(w * h) heightRatio = h / float(LpRegion.shape[0]) if 0.1 < aspectRatio < 1.0 and solidity > 0.1 and 0.35 < heightRatio < 2.0: # extract characters candidate = np.array(mask[y:y + h, x:x + w]) square_candidate = convert2Square(candidate) square_candidate = cv2.resize(square_candidate, (28, 28), cv2.INTER_AREA) # cv2.imwrite('./characters/' + str(y) + "_" + str(x) + ".png", square_candidate) square_candidate = square_candidate.reshape((28, 28, 1)) self.candidates.append((square_candidate, (y, x))) def recognizeChar(self): characters = [] coordinates = [] for char, coordinate in self.candidates: characters.append(char) coordinates.append(coordinate) characters = np.array(characters) result = self.recogChar.predict_on_batch(characters) result_idx = np.argmax(result, axis=1) self.candidates = [] for i in range(len(result_idx)): if result_idx[i] == 31: # if is background or noise, ignore it continue self.candidates.append((ALPHA_DICT[result_idx[i]], coordinates[i])) def format(self): first_line = [] second_line = [] for candidate, coordinate in self.candidates: if self.candidates[0][1][0] + 40 > coordinate[0]: first_line.append((candidate, coordinate[1])) else: second_line.append((candidate, coordinate[1])) def take_second(s): return s[1] first_line = sorted(first_line, key=take_second) second_line = sorted(second_line, key=take_second) if len(second_line) == 0: # if license plate has 1 line license_plate = "".join([str(ele[0]) for ele in first_line]) else: # if license plate has 2 lines license_plate = "".join([str(ele[0]) for ele in first_line]) + "-" + "".join([str(ele[0]) for ele in second_line]) return license_plate def predict(self, image): # Input image or frame self.image = image for coordinate in self.extractLP(): self.candidates = [] # convert (x_min, y_min, width, height) to coordinate(top left, top right, bottom left, bottom right) pts = order_points(coordinate) # crop number plate LpRegion = perspective.four_point_transform(self.image, pts) # segmentation self.segmentation(LpRegion) # recognize characters self.recognizeChar() # format and display license plate license_plate = self.format() # draw labels self.image = draw_labels_and_boxes(self.image, license_plate, coordinate) cv2.imwrite('example.png', self.image) return self.image
class E2E(object): def __init__(self): self.image = np.empty((28, 28, 1)) self.detectLP = detectNumberPlate() self.recogChar = CNN_Model(trainable=False).model self.recogChar.load_weights('./weights/weight.h5') self.candidates = [] self.lpNumber = None def extractLP(self): """ Hàm trích xuất vùng chứa biển số xe :return: """ coordinates = self.detectLP.detect(self.image) if len(coordinates) == 0: ValueError('No images detected') for coordinate in coordinates: yield coordinate def predict(self, image): """ Hàm dự đoán giá trị của biển số xe :param image: :return: Ảnh được chú thích vùng biển số và giá trị Chuỗi ký tự biển số xe """ # Ảnh đầu vào self.image = image # Xét các vùng biển số detect được bằng YOLOv3 Tiny for coordinate in self.extractLP(): # Khởi tạo candidates để lưu giá trị biển số và tọa độ cần chú thích trong ảnh self.candidates = [] # Chuyển đổi (x_min, y_min, width, height) thành dạng (top left, top right, bottom left, bottom right) pts = order_points(coordinate) # Cắt ảnh biển số xe dùng bird's eyes view transformation LpRegion = perspective.four_point_transform(self.image, pts) # Xử lý trường hợp biển số 1 dòng và 2 dòng # Chọn ngưỡng tỷ lệ chiều ngang / chiều dọc là 1.5 # Nếu tỷ lệ này > 1.5 => Biển số 1 dòng # Ngược lại => Biển số 2 dòng if (LpRegion.shape[1] / LpRegion.shape[0] > 1.5): # Tỷ lệ scale scale_ratio = 40 / LpRegion.shape[0] (w, h) = (int(LpRegion.shape[1] * scale_ratio), int(LpRegion.shape[0] * scale_ratio)) else: # Tỷ lệ scale scale_ratio = 100 / LpRegion.shape[0] (w, h) = (int(LpRegion.shape[1] * scale_ratio), int(LpRegion.shape[0] * scale_ratio)) # Resize ảnh vùng biển số về kích thước chuẩn # Đối với biển số 2 dòng: chiều cao = 40px # Đối với biển số 1 dòng: chiều cao = 100px LpRegion = cv2.resize(LpRegion, (w, h)) # Phân đoạn từng ký tự self.segmentation(LpRegion) # Nhận diện các ký tự self.recognizeChar() # Định dạng các ký tự biển số self.lpNumber = self.format() # Vẽ bounding box và giá trị biển số vào ảnh self.image = draw_labels_and_boxes(self.image, self.lpNumber, coordinate) # Trả về ảnh dự đoán và giá trị biển số xe return self.image, self.lpNumber def segmentation(self, LpRegion): """ Hàm phân đoạn ảnh :param LpRegion: :return: """ # Áp dụng thresh để trích xuất vùng biển số V = cv2.split(cv2.cvtColor(LpRegion, cv2.COLOR_BGR2HSV))[2] # Phân ngưỡng bằng adaptive threshold retval, threshold = cv2.threshold(V, 128, 255, cv2.THRESH_BINARY) T = threshold_local(V, 15, offset=10, method="gaussian") thresh = (V > T).astype("uint8") * 255 # Chuyển đổi pixel đen của chữ số thành pixel trắng thresh = cv2.bitwise_not(thresh) # Resize ảnh thresh với chiều rộng = 400px thresh = imutils.resize(thresh, width=400) # thresh = cv2.medianBlur(thresh, 5) # Xóa nhiễu bằng thuật toán opening (erode => dilate) kernel = np.ones((2, 2), np.uint8) thresh = cv2.erode(thresh, kernel) thresh = cv2.dilate(thresh, kernel) # Thực hiện thuật toán connected components analysis labels = measure.label(thresh, connectivity=2, background=0) # Lặp qua các thành phần duy nhất for label in np.unique(labels): # if this is background label, ignore it if label == 0: continue # Khởi tạo mặt nạ chứa vị trí của các ký tự ứng viên mask = np.zeros(thresh.shape, dtype="uint8") mask[labels == label] = 255 # Tìm contours từ mặt nạ contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if len(contours) > 0: contour = max(contours, key=cv2.contourArea) (x, y, w, h) = cv2.boundingRect(contour) # Xác định ký tự aspectRatio = w / float(h) solidity = cv2.contourArea(contour) / float(w * h) heightRatio = h / float(LpRegion.shape[0]) if 0.1 < aspectRatio < 1.0 and solidity > 0.1 and 0.35 < heightRatio < 2.0: # Trích xuất các ký tự candidate = np.array(mask[y:y + h, x:x + w]) square_candidate = convert2Square(candidate) square_candidate = cv2.resize(square_candidate, (28, 28), cv2.INTER_AREA) square_candidate = square_candidate.reshape((28, 28, 1)) self.candidates.append((square_candidate, (y, x))) def recognizeChar(self): """ Hàm nhận diện ký tự biển số xe :return: """ # Khởi tạo danh sách ký tự và tọa độ của chúng characters = [] coordinates = [] # Gán giá trị cho characters và coordinates từ biến candidates for char, coordinate in self.candidates: characters.append(char) coordinates.append(coordinate) characters = np.array(characters) # Gán candidates là mảng empty self.candidates = [] # Duyệt các ký tự ứng viên của vùng ảnh biển số đang xét if len(characters): result = self.recogChar.predict_on_batch(characters) result_idx = np.argmax(result, axis=1) for i in range(len(result_idx)): if result_idx[i] == 31: # Bỏ qua trường hợp background continue # Gán giá trị ký tự đã nhận diện được vào biến candidates self.candidates.append( (ALPHA_DICT[result_idx[i]], coordinates[i])) def format(self): """ Hàm định dạng lại chuỗi ký tự biển số xe :return: """ # Khởi tạo biến chứa các ký tự ở dòng 1 và dòng 2 first_line = [] second_line = [] # Xác định ký tự trên từng dòng for candidate, coordinate in self.candidates: # Trường hợp biển số 1 dòng if self.candidates[0][1][0] + 40 > coordinate[0]: first_line.append((candidate, coordinate[1])) # Trường hợp biển số 2 dòng else: second_line.append((candidate, coordinate[1])) def take_second(s): return s[1] first_line = sorted(first_line, key=take_second) second_line = sorted(second_line, key=take_second) # Gán giá trị cho chuỗi kết quả cuối cùng if len(second_line) == 0: # if license plate has 1 line license_plate = "".join([str(ele[0]) for ele in first_line]) else: # if license plate has 2 lines license_plate = "".join([str(ele[0]) for ele in first_line]) + "".join( [str(ele[0]) for ele in second_line]) return license_plate