def textRecognize(mask1,mask2,angleError,truAz,truEl): # crop region of interest roi1 = frame[mask1] roi2 = frame[mask2] # Convert to grayscale roi1 = cv2.cv2Color(roi1, cv2.COLOR_BGR2GRAY) roi2 = cv2.cv2Color(roi2, cv2.COLOR_BGR2GRAY) # double image size, linear interpolation roi1 = cv2.resize(roi1,None,fx=2,fy=2,interpolation=cv2.INTER_LINEAR) roi2 = cv2.resize(roi2,None,fx=2,fy=2,interpolation=cv2.INTER_LINEAR) # perform thresholding level,roi1 = cv2.threshold(roi1,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU) level,roi2 = cv2.threshold(roi2,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU) # Take ROI and use tesseract to convert to string config = '--oem 1 --psm 7' text1 = pytesseract.image_to_string(roi1,config=config) text2 = pytesseract.image_to_string(roi2,config=config) try: textNum1 = int(text1[1:3]) + int(text1[4:6])/60 - truAz textNum2 = int(text2[1:2]) + int(text1[3:5])/60 - truEl textDiff = round(math.sqrt(textNum1**2 + textNum2**2),2) pixel2truth = round(textDiff - angleError,2) print(pixel2truth) except: print('failed')
def apply_portrait(frame): frame = apply_alpha_convert(frame) gray = cv2.cv2Color(frame, cv2.COLOR_BGR2GRAY) _, mask = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY) mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGRA) blurred = cv2.GaussianBlur(frame, (21, 21), 0) blended = apply_blend(frame, blurred, mask) frame = cv2.cvtColor(blended, cv2.COLOR_BGRA2BGR) return frame
def load_image(filepath): """ Load image in dataset into numpy array """ image = cv2.imread(filepath) # As cv2 loads images as BGR, convert to RGB image = cv2.cv2Color(image, cv2.COLOR_BGR2RGB) image = image.astype(np.float32) return image
def strokeEdges(src, dst, blurKsize=7, edgeKsize=5): if blurKsize >= 3: # 模糊核大小 > 3 blurredSrc = cv2.medianBlur(src, blurKsize) # 中值模糊 graySrc = cv2.cvtColor(blurredSrc, cv2.COLOR_BGR2GRAY) # 灰度化 else: graySrc = cv2.cv2Color(src, cv2.COLOR_BGR2GRAY) # 直接灰度化 cv2.Laplacian(graySrc, cv2.CV_8U, graySrc, ksize=edgeKsize) # 边缘化 normalizedInverseAlpha = (1.0 / 255) * (255 - graySrc) # 用于反相并归一化 channels = cv2.split(src) # 分离通道 for channel in channels: channel[:] = channel * normalizedInverseAlpha # 对每个通道的所有像素进行反相和归一化 cv2.merge(channels, dst) # 合并处理后的所有通道
def findArucoMarkers(img, markerSize=6, totalMarkers=250, draw=True): imgGray = cv2.cv2Color(img, cv2.COLOR_BGR2GRAY) key = getattr(aruco, f'DICT_{markerSize}*{markerSize}_{totalMarkers}') arucoDict = aruco.Dictionary_get(key) arucoParam = aruco.DetectorParameters_create() bboxs, ids, rejected = aruco.detectMarkers(imgGray, arucoDict, parameters=arucoParam) if draw: aruco.drawDetectedMarkers(img, bboxs) return [bboxs, ids]
def preprocess_frame(self, frame): """ Take the gradients of the frame """ image = cv2.imread(args["image"]) # read image from given path gray = cv2.cv2Color(image, cv2.COLOR_BGR2GRAY) ddepth = cv2.cv.CV_32F if imutils.is_cv2() else cv2.CV_32F gradX = cv2.Sobel(gray, ddpeth=ddepth, dx=1, dy=0, ksize=-1) gradY = cv2.Sobel(gray, ddepth=ddepth, dx=0, dy=1, ksize=-1) gradient = cv2.subtract(gradX, gradY) return gray
def LFDecodeLensletImageSimple(LensletImage, WhiteImage, LensletGridModel, DecodeOptions, return_LFWeight=False, return_DecodeOptions=False, return_DebayerLensletImage=False, return_CorrectedLensletImage=False): # set default parameters DecodeOptions["LevelLimits"] = [np.min(WhiteImage), np.max(WhiteImage)] DecodeOptions["ResampMethod"] = "fast" DecodeOptions["Precision"] = np.float32 DecodeOptions["DoDehex"] = True DecodeOptions["DoSquareST"] = True intmax_uint16 = float(2**16-1) # Rescale image values, remove black level DecodeOptions["LevelLimits"] = DecodeOptions["Precision"]( DecodeOptions["LevelLimits"]) BlackLevel = DecodeOptions["LevelLimits"][0] WhiteLevel = DecodeOptions["LevelLimits"][1] WhiteImage = WhiteImage.astype(DecodeOptions["Precision"]) WhiteImage = (WhiteImage - BlackLevel)/(WhiteLevel - BlackLevel) LensletImage = LensletImage.astype(DecodeOptions["Precision"]) LensletImage = (LensletImage - BlackLevel)/(WhiteLevel - BlackLevel) LensletImage = LensletImage/WhiteImage # Clip -- this is aggressive and throws away bright areas; there is a potential for an HDR approach here LensletImage = np.minimum(1, np.maximum(0, LensletImage)) nargout = 1 + np.sum([return_CorrectedLensletImage, return_DebayerLensletImage, return_DecodeOptions, return_LFWeight]) if nargout < 2: del WhiteImage LensletImage = ( LensletImage*intmax_uint16).astype(DecodeOptions["Precision"]) # THIS LINE IS NOT SURE TO WORK. FIND SOMETHING CONCRETE THAT WORKS WITH LFS LensletImage = cv2.cv2Color(LensletImage, cv2.COLOR_BAYER_BG2RGB) LensletImage = LensletImage.astype(DecodeOptions["Precision"]) LensletImage = LensletImage/intmax_uint16 DecodeOptions["NColChans"] = 3 if nargout >= 2: DecodeOptions["NWeightChans"] = 1 else: DecodeOptions["NWeightChans"] = 0 if nargout > 3: DebayerLensletImage = LensletImage # Tranform to an integer-spaced grid print("\nAligning image to lenslet array...") InputSpacing = np.array( [LensletGridModel["HSpacing"], LensletGridModel["VSpacing"]]) NewLensletSpacing = np.ceil(InputSpacing) # Force even so hex shift is a whole pixel multiple NewLensletSpacing = np.ceil(NewLensletSpacing/2)*2 XformScale = NewLensletSpacing/InputSpacing NewOffset = np.array([LensletGridModel["HOffset"], LensletGridModel["VOffset"]]) * XformScale RoundedOffset = np.round(NewOffset) XformTrans = RoundedOffset-NewOffset NewLensletGridModel = {'HSpacing': NewLensletSpacing[0], 'VSpacing': NewLensletSpacing[1], 'HOffset': RoundedOffset[0], 'VOffset': RoundedOffset[1], 'Rot': 0, 'UMax': LensletGridModel["UMax"], 'VMax': LensletGridModel["VMax"], 'Orientation': LensletGridModel["Orientation"], 'FirstPosShiftRow': LensletGridModel["FirstPosShiftRow"]} RRot = LFRotz(LensletGridModel["Rot"]) RScale = np.eye(3) RScale[0, 0] = XformScale[0] RScale[1, 1] = XformScale[1] DecodeOptions.OutputScale[:2] = XformScale DecodeOptions.OutputScale[2:4] = np.array([1, 2/np.sqrt(3)]) RTrans = np.eye(3) RTrans[-1, :2] = XformTrans # Change this later!! ''' This part of the code is not complete ''' FixAll = maketform('affine', RRot@RScale@RTrans) temp = XformScale[0] NewSize = np.shape(LensletImage[:, :, 0]*XformScale[::-1].reshape(1, 2)) LensletImage = imtransform(LensletImage, FixAll, 'YData', np.array( [1, NewSize[0]]), 'XData', np.array([1, NewSize[1]])) if nargout >= 2: WhiteImage = imtransform(WhiteImage, FixAll, 'YData', np.array( [1, NewSize[0]]), 'XData', np.array([1, NewSize[1]])) if nargout >= 4: CorrectedLensletImage = LensletImage LF = SliceXYImage(NewLensletGridModel, LensletImage, WhiteImage, DecodeOptions) del WhiteImage, LensletImage ''' Till this much ''' # Correct for hex grid and resize to square u,v pixels LFSize = list(np.shape(LF)) HexAspect = 2/np.sqrt(3) if DecodeOptions["ResampMethod"] == "fast": print("\nResampling (1D approximation) to square u,v pixels") n_steps = int(np.ceil(LFSize[3]+1)) NewUVec = HexAspect*np.arange(n_steps) NewUVec = NewUVec[:int(np.ceil(LFSize[3]*HexAspect))] OrigUSize = LFSize[3] LFSize[3] = len(NewUVec) # Allocate dest and copy orig LF into it (memory saving vs. keeping both separately) LF2 = np.zeros(LFSize, dtype=DecodeOptions["Precision"]) LF2[:, :, :, :OrigUSize, :] = LF LF = LF2 del LF2 if DecodeOptions["DoDehex"]: ShiftUVec = -0.5+NewUVec print('removing hex sampling...') else: ShiftUVec = NewUVec print("...") for ColChan in range(np.shape(LF)[4]): CurUVec = ShiftUVec for RowIter in range(2): # Check if this works!!! RowIdx = np.mod( NewLensletGridModel["FirstPosShiftRow"] + RowIter, 2) + 1 ShiftRows = np.squeeze( LF[:, :, RowIdx:-1:2, :OrigUSize, ColChan]) SliceSize = list(np.shape(ShiftRows)) SliceSize[3] = len(NewUVec) ShiftRows = ShiftRows.reshape( SliceSize[0]*SliceSize[1]*SliceSize[2], np.shape(ShiftRows)[3]) ShiftRows_func = interp1d( np.arange(np.shape(ShiftRows)[1]), ShiftRows, kind='linear') ShiftRows = ShiftRows_func(CurUVec) ShiftRows[~np.isfinite(ShiftRows)] = 0 LF[:, :, RowIdx:-1:2, :, ColChan] = ShiftRows.reshape(SliceSize) CurUVec = NewUVec del ShiftRows, ShiftRows_func DecodeOptions["OutputScale"][2] = DecodeOptions["OutputScale"][2] * HexAspect elif DecodeOptions["ResampMethod"] == "triangulation": pass else: print('\nNo valid dehex / resampling selected\n') # Resize to square s,t pixels # Assumes only a very slight resampling is required, resulting in an identically-sized output light field if DecodeOptions["DoSquareST"]: print('\nResizing to square s,t pixels using 1D linear interp...') ResizeScale = DecodeOptions["OutputScale"][0] / \ DecodeOptions["OutputScale"][1] ResizeDim1 = 0 ResizeDim2 = 1 if ResizeScale < 1: ResizeScale = 1/ResizeScale ResizeDim1 = 1 ResizeDim2 = 0 OrigSize = np.shape(LF)[ResizeDim1] OrigVec = np.arange(OrigSize) - OrigSize//2 NewVec = OrigVec/ResizeScale OrigDims = np.arange(5) OrigDims = np.delete(OrigDims, ResizeDim1) UBlkSize = 32 USize = np.shape(LF)[3]
# grab paths from args imagePaths = sorted(glob.glob(args["images"] + "/*.png")) maskPaths = sorted(glob.glob(args["masks"] + "/*.png")) data = [] target = [] # yields 512 dimensional feature vector used to # characterize the color of the flower desc = RGBHistogram([8, 8, 8]) for (imagePath, maskPath) in zip(imagePaths, maskPaths): image = cv2.imread(imagePath) mask = cv2.imread(maskPath) # convert to grayscale mask = cv2.cv2Color(mask, cv2.COLOR_BGR2GRAY) features = desc.describe(image, mask) data.append(features) target.append(imagePath.split("_")[-2]) # encode labels # unique finds unique species names, which are fed to labelencoder targetNames = np.unique(target) le = LabelEncoder() # fits unique species names into integers, a category for each species # then transforms the strings into their corresponding integer classes # target now contains list of integers, one for each data point # where integer maps to flower species name target = le.fit_transform(target)
def main(): ################# # configurations ################# parser = argparse.ArgumentParser() parser.add_argument("--input_path", type=str, required=True) parser.add_argument("--gt_path", type=str, required=True) parser.add_argument("--output_path", type=str, required=True) parser.add_argument("--model_path", type=str, required=True) parser.add_argument("--gpu_id", type=str, required=True) #parser.add_argument("--screen_notation", type=str, required=True) parser.add_argument('--opt', type=str, required=True, help='Path to option YAML file.') args = parser.parse_args() opt = option.parse(args.opt, is_train=False) PAD = 32 total_run_time = AverageMeter() print("GPU ", torch.cuda.device_count()) os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id) device = torch.device('cuda') data_mode = 'sharp_bicubic' flip_test = False Input_folder = args.input_path GT_folder = args.gt_path Result_folder = args.output_path Model_path = args.model_path # create results folder if not os.path.exists(Result_folder): os.makedirs(Result_folder, exist_ok=True) model_path = Model_path N_in = 5 model = EDVR_arch.EDVR(nf=opt['network_G']['nf'], nframes=opt['network_G']['nframes'], groups=opt['network_G']['groups'], front_RBs=opt['network_G']['front_RBs'], back_RBs=opt['network_G']['back_RBs'], predeblur=opt['network_G']['predeblur'], HR_in=opt['network_G']['HR_in'], w_TSA=opt['network_G']['w_TSA']) #### dataset test_dataset_folder = Input_folder GT_dataset_folder = GT_folder #### evaluation crop_border = 0 border_frame = N_in // 2 # border frames when evaluate # temporal padding mode padding = 'new_info' save_imgs = True save_folder = os.path.join(Result_folder, data_mode) util.mkdirs(save_folder) util.setup_logger('base', save_folder, 'test', level=logging.INFO, screen=True, tofile=True) logger = logging.getLogger('base') #### log info logger.info('Data: {} - {}'.format(data_mode, test_dataset_folder)) logger.info('Padding mode: {}'.format(padding)) logger.info('Model path: {}'.format(model_path)) logger.info('Save images: {}'.format(save_imgs)) logger.info('Flip test: {}'.format(flip_test)) #### set up the models model.load_state_dict(torch.load(model_path), strict=True) model.eval() model = model.to(device) avg_psnr_l, avg_psnr_center_l, avg_psnr_border_l = [], [], [] avg_rgb_psnr_l, avg_rgb_psnr_center_l, avg_rgb_psnr_border_l = [], [], [] subfolder_name_l = [] subfolder_l = sorted(glob.glob(osp.join(test_dataset_folder, '*'))) subfolder_GT_l = sorted(glob.glob(osp.join(GT_dataset_folder, '*'))) end = time.time() for subfolder in subfolder_l: input_subfolder = os.path.split(subfolder)[1] subfolder_GT = os.path.join(GT_dataset_folder, input_subfolder) if not os.path.exists(subfolder_GT): continue print("Evaluate Folders: ", input_subfolder) subfolder_name = osp.basename(subfolder) subfolder_name_l.append(subfolder_name) save_subfolder = osp.join(save_folder, subfolder_name) img_path_l = sorted(glob.glob(osp.join(subfolder, '*'))) max_idx = len(img_path_l) if save_imgs: util.mkdirs(save_subfolder) #### read LQ and GT images, notice we load yuv img here imgs_LQ = data_util.read_img_seq_yuv(subfolder) # Num x 3 x H x W img_GT_l = [] for img_GT_path in sorted(glob.glob(osp.join(subfolder_GT, '*'))): img_GT_l.append(data_util.read_img_yuv(None, img_GT_path)) avg_psnr, avg_psnr_border, avg_psnr_center, N_border, N_center = 0, 0, 0, 0, 0 avg_rgb_psnr, avg_rgb_psnr_border, avg_rgb_psnr_center = 0, 0, 0 # process each image for img_idx, img_path in enumerate(img_path_l): img_name = osp.splitext(osp.basename(img_path))[0] select_idx = data_util.index_generation(img_idx, max_idx, N_in, padding=padding) imgs_in = imgs_LQ.index_select(0, torch.LongTensor(select_idx)).unsqueeze(0).to(device) # 960 x 540 # here we split the input images 960x540 into 9 320x180 patch gtWidth = 3840 gtHeight = 2160 intWidth_ori = imgs_in.shape[4] # 960 intHeight_ori = imgs_in.shape[3] # 540 scale = 4 intPaddingRight = PAD # 32# 64# 128# 256 intPaddingLeft = PAD # 32#64 #128# 256 intPaddingTop = PAD # 32#64 #128#256 intPaddingBottom = PAD # 32#64 # 128# 256 pader = torch.nn.ReplicationPad2d([intPaddingLeft, intPaddingRight, intPaddingTop, intPaddingBottom]) imgs_in = torch.squeeze(imgs_in, 0) # N C H W imgs_in = pader(imgs_in) # N C 604 1024 # todo: output 4k X0 = imgs_in X0 = torch.unsqueeze(X0, 0) if flip_test: output = util.flipx4_forward(model, X0) else: output = util.single_forward(model, X0) # todo remove padding output = output[0, :, intPaddingTop * scale:(intPaddingTop + intHeight_ori) * scale, intPaddingLeft * scale: (intPaddingLeft + intWidth_ori) * scale] output = util.tensor2img(output.squeeze(0)) print("*****************current image process time \t " + str( time.time() - end) + "s ******************") total_run_time.update(time.time() - end, 1) # calculate PSNR on YUV y_all = output / 255. GT = np.copy(img_GT_l[img_idx]) y_all, GT = util.crop_border([y_all, GT], crop_border) crt_psnr = util.calculate_psnr(y_all * 255, GT * 255) logger.info('{:3d} - {:25} \tYUV_PSNR: {:.6f} dB'.format(img_idx + 1, img_name, crt_psnr)) # here we also calculate PSNR on RGB y_all_rgb = data_util.ycbcr2rgb(output / 255.) GT_rgb = data_util.ycbcr2rgb(np.copy(img_GT_l[img_idx])) y_all_rgb, GT_rgb = util.crop_border([y_all_rgb, GT_rgb], crop_border) crt_rgb_psnr = util.calculate_psnr(y_all_rgb * 255, GT_rgb * 255) logger.info('{:3d} - {:25} \tRGB_PSNR: {:.6f} dB'.format(img_idx + 1, img_name, crt_rgb_psnr)) if save_imgs: im_out = np.round(y_all_rgb*255.).astype(numpy.uint8) # todo, notice here we got rgb img, but cv2 need bgr when saving a img cv2.imwrite(osp.join(save_subfolder, '{}.png'.format(img_name)), cv2.cv2Color(im_out, cv2.COLOR_RGB2BGR)) # for YUV and RGB, respectively if img_idx >= border_frame and img_idx < max_idx - border_frame: # center frames avg_psnr_center += crt_psnr avg_rgb_psnr_center += crt_rgb_psnr N_center += 1 else: # border frames avg_psnr_border += crt_psnr avg_rgb_psnr_border += crt_rgb_psnr N_border += 1 # for YUV avg_psnr = (avg_psnr_center + avg_psnr_border) / (N_center + N_border) avg_psnr_center = avg_psnr_center / N_center avg_psnr_border = 0 if N_border == 0 else avg_psnr_border / N_border avg_psnr_l.append(avg_psnr) avg_psnr_center_l.append(avg_psnr_center) avg_psnr_border_l.append(avg_psnr_border) logger.info('Folder {} - Average YUV PSNR: {:.6f} dB for {} frames; ' 'Center YUV PSNR: {:.6f} dB for {} frames; ' 'Border YUV PSNR: {:.6f} dB for {} frames.'.format(subfolder_name, avg_psnr, (N_center + N_border), avg_psnr_center, N_center, avg_psnr_border, N_border)) # for RGB avg_rgb_psnr = (avg_rgb_psnr_center + avg_rgb_psnr_border) / (N_center + N_border) avg_rgb_psnr_center = avg_rgb_psnr_center / N_center avg_rgb_psnr_border = 0 if N_border == 0 else avg_rgb_psnr_border / N_border avg_rgb_psnr_l.append(avg_rgb_psnr) avg_rgb_psnr_center_l.append(avg_rgb_psnr_center) avg_rgb_psnr_border_l.append(avg_rgb_psnr_border) logger.info('Folder {} - Average RGB PSNR: {:.6f} dB for {} frames; ' 'Center RGB PSNR: {:.6f} dB for {} frames; ' 'Border RGB PSNR: {:.6f} dB for {} frames.'.format(subfolder_name, avg_rgb_psnr, (N_center + N_border), avg_rgb_psnr_center, N_center, avg_rgb_psnr_border, N_border)) logger.info('################ Tidy Outputs ################') # for YUV for subfolder_name, psnr, psnr_center, psnr_border in zip(subfolder_name_l, avg_psnr_l, avg_psnr_center_l, avg_psnr_border_l): logger.info('Folder {} - Average YUV PSNR: {:.6f} dB. ' 'Center YUV PSNR: {:.6f} dB. ' 'Border YUV PSNR: {:.6f} dB.'.format(subfolder_name, psnr, psnr_center, psnr_border)) # for RGB for subfolder_name, psnr, psnr_center, psnr_border in zip(subfolder_name_l, avg_rgb_psnr_l, avg_rgb_psnr_center_l, avg_rgb_psnr_border_l): logger.info('Folder {} - Average RGB PSNR: {:.6f} dB. ' 'Center RGB PSNR: {:.6f} dB. ' 'Border RGB PSNR: {:.6f} dB.'.format(subfolder_name, psnr, psnr_center, psnr_border)) logger.info('################ Final Results ################') logger.info('Data: {} - {}'.format(data_mode, test_dataset_folder)) logger.info('Padding mode: {}'.format(padding)) logger.info('Model path: {}'.format(model_path)) logger.info('Save images: {}'.format(save_imgs)) logger.info('Flip test: {}'.format(flip_test)) logger.info('Total Average YUV PSNR: {:.6f} dB for {} clips. ' 'Center YUV PSNR: {:.6f} dB. Border YUV PSNR: {:.6f} dB.'.format( sum(avg_psnr_l) / len(avg_psnr_l), len(subfolder_l), sum(avg_psnr_center_l) / len(avg_psnr_center_l), sum(avg_psnr_border_l) / len(avg_psnr_border_l))) logger.info('Total Average RGB PSNR: {:.6f} dB for {} clips. ' 'Center RGB PSNR: {:.6f} dB. Border RGB PSNR: {:.6f} dB.'.format( sum(avg_rgb_psnr_l) / len(avg_rgb_psnr_l), len(subfolder_l), sum(avg_rgb_psnr_center_l) / len(avg_rgb_psnr_center_l), sum(avg_rgb_psnr_border_l) / len(avg_rgb_psnr_border_l)))
import cv2 import numpy as np cap = cv2.VideoCapture('vtest.avi') ret, frame1 = cap.read() ret, frame2 = cap.read() while cap.isOpened(): diff = cv2.absdiff(frame1, frame2) gray = cv2.cv2Color(diff, cv2.COLOR_BGR2GRAY) blur = cv2.GaussianBlur(gray, (5, 5), 0) _, thresh = cv2.threshold(blur, 20, 255) ret, frame = cap.read() cv2.imshow("inter", frame) if cv2.waitKey(40) == 27: break cv2.destroyAllWindows() cap.release()