def _fscale(data, split=False, load=False, verbose=False): # initialize scaler scaler = pp.MinMaxScaler() # initialize variables config = _configinfo() sdpath = config['root_data_path'] + 'scaled.npy' # scale data if verbose: print('scaling features............... ', end = '') data = np.array(data, dtype='float64') if load and os.path.isfile(sdpath): m = np.load(sdpath)[0] r = np.load(sdpath)[1] r[r==0] = 1 data = (data - m) / r elif split: train = data[:config['train_d']] valid = data[config['train_d']:] scaler.fit(train) m = scaler.data_min_ r = scaler.data_range_ train = scaler.transform(train) valid = scaler.transform(valid) data = np.vstack((train, valid)) else: data = scaler.fit_transform(data) m = scaler.data_min_ r = scaler.data_range_ if verbose: print('done') # save scaled config if not load: np.save(sdpath, np.vstack((m, r))) # return scaled data return data
def main(): # parse arguments args = _args() # begin time t_beg = time.time() # load config config = _configinfo() preprocess = config['preprocess'] shape_x = config['shape_x'] shape_y = config['shape_y'] clfrpath = config['clfr_data_path'] rootpath = config['root_data_path'] filepath = args.filepath savename = rootpath + os.path.split(filepath)[-1].split('.')[0] if args.verbose: print('\ninitializing configurations.... done') # load classifier if args.verbose: print('loading classifier............. ', end = '') if os.path.isfile(clfrpath): classifier = skex.joblib.load(clfrpath) if args.verbose: print('done') else: if args.verbose: print('not found\n') sys.exit() # load image if args.verbose: print('loading image.................. ', end = '') if os.path.isfile(filepath): (_, extn) = os.path.splitext(filepath) if extn in ['.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff']: image = cv.imread(filepath) if args.verbose: print('done') else: if args.verbose: print('unsupported format\n') sys.exit() else: if args.verbose: print('not found\n') sys.exit() image_input = image.copy() # convert image to grayscale if args.verbose: print('converting colorspace.......... ', end = '') image_gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY) if args.verbose: print('done') # apply guassian blurring to remove noise if args.verbose: print('removing noise from image...... ', end = '') image_blur = cv.GaussianBlur(image_gray, (3, 3), 0, 0) if args.verbose: print('done') # threshold image if args.verbose: print('thresholding image............. ', end = '') (_, image_th) = cv.threshold(image_blur, 95, 255, cv.THRESH_BINARY_INV) if args.verbose: print('done') image_rois = image_th.copy() # find contours if args.verbose: print('finding contours in image..... ', end = '') (_, contours, _) = cv.findContours(image_th.copy(), cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE) if args.verbose: print('done') # find bounding rectangle around each contour bn_rects = [] for cntr in contours: bn_rects.append(cv.boundingRect(cntr)) if args.verbose: print('number of objects detected..... %d' %(len(bn_rects))) # process each bounding rectangle loop = 0 for rect in bn_rects: if args.verbose: print('\rprocessing objects............. %d%%' %(loop*100//len(bn_rects)), end = '') # attributes of bounding rectangle x = rect[0] y = rect[1] w = rect[2] h = rect[3] # ignore tiny objects assuming them as noise if h <= 8: continue # draw bounding rectangle on images cv.rectangle(image_rois, (x, y), (x+w, y+h), (255, 255, 255), 1) cv.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2) # extract region of interest from thresholded image using attributes of bounding rectangle image_roi = image_th[y:y+h, x:x+w] # pad region of interest if w >= h: length = int(1.8 * w) else: length = int(1.8 * h) image_pad = np.zeros((length, length), dtype='uint8') i = (length - h) // 2 j = (length - w) // 2 image_pad[i:i+h, j:j+w] = image_roi # resize padded image image_roi = cv.resize(image_pad, (shape_x, shape_y), interpolation=cv.INTER_AREA) # perform dilation as morphological transformation image_roi = cv.dilate(image_roi, (3, 3)) # preprocess roi image_roi = image_roi.reshape(1, -1) if preprocess == 'haar': X = _haar(image_roi, load=False) elif preprocess == 'hogs': X = _hogs(image_roi, load=False) elif preprocess == 'sift': X = _sift(image_roi, load=False) elif preprocess == 'surf': X = _surf(image_roi, load=False) else:X = _fscale(image_roi, load=True) X = X.reshape(1, -1) # predict label pred = classifier.predict(X) pred = pred[0] # put predicted label on image cv.putText(image, str(pred), (x, y-8), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) loop = loop + 1 if args.verbose: print('\rprocessing objects............. done') # save images if args.save: if args.verbose: print('saving images.................. ', end = '') cv.imwrite(savename+'_1_input.png' , image_input) cv.imwrite(savename+'_2_gray.png' , image_gray) cv.imwrite(savename+'_3_blur.png' , image_blur) cv.imwrite(savename+'_4_th.png' , image_th) cv.imwrite(savename+'_5_rois.png' , image_rois) cv.imwrite(savename+'_6_output.png', image) if args.verbose: print('done') # end time t_end = time.time() if args.verbose: print('processing time................ %.2f sec\n' %(t_end - t_beg)) # display image with predictions cv.imshow('DIGIT RECOGNIZER', image) cv.waitKey(0) cv.destroyAllWindows()