def elastic_transform(image, label, alpha=1000, sigma=30, spline_order=1, mode='nearest', random_state=np.random): """Elastic deformation of image as described in [Simard2003]_. .. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for Convolutional Neural Networks applied to Visual Document Analysis", in Proc. of the International Conference on Document Analysis and Recognition, 2003. """ #assert image.ndim == 3 image = np.array(image) label = np.array(label) shape = image.shape[:2] dx = gaussian_filter( (random_state.rand(*shape) * 2 - 1), sigma, mode="constant", cval=0) * alpha dy = gaussian_filter( (random_state.rand(*shape) * 2 - 1), sigma, mode="constant", cval=0) * alpha x, y = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij') indices = [np.reshape(x + dx, (-1, 1)), np.reshape(y + dy, (-1, 1))] result1 = map_coordinates(image, indices, order=spline_order, mode=mode).reshape(shape) result2 = map_coordinates(label, indices, order=spline_order, mode=mode).reshape(shape) return Image.fromarray(result1), Image.fromarray(result2)
def pad_and_bg(self,img): bg_img = np.asarray(Image.open(io.BytesIO( np.random.choice(self.bg_buffer)))) ret_img = img rows,cols = img.size s_x = int(np.random.normal(0,6)) s_y = int(np.random.normal(0,10)) M = np.float32([[1,0,s_x],[0,1,s_y]]) ret_img = cv2.warpAffine(np.asarray(ret_img),M,(cols,rows),borderMode = cv2.BORDER_CONSTANT,borderValue=(255,255,255)) # ret_img = cv2.copyMakeBorder( np.asarray(ret_img), pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT,value=(255,255,255)) #mask mask_img = np.zeros((img.size[0],img.size[1],3)) # mask_2 = Image.fromarray(mask_img.astype('uint8')).rotate(rotate_param,resample = Image.NEAREST,expand = True, fillcolor = (255,255,255)) # mask_3 = cv2.copyMakeBorder( np.asarray(mask_img), pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT,value=(255,255,255)) mask_3 = cv2.warpAffine(np.asarray(mask_img),M,(cols,rows),borderMode = cv2.BORDER_CONSTANT,borderValue=(255,255,255)) # ret_img = cv2.resize(ret_img, dsize=(img.size[0], img.size[1])) bg_img = cv2.resize(bg_img, dsize=(img.size[0], img.size[1])) # mask_3 = cv2.resize(mask_3, dsize=(img.size[0], img.size[1])) mask_3 = mask_3/255 ret_img = ret_img*(1-mask_3) + bg_img*mask_3 ret_img = Image.fromarray(ret_img.astype('uint8')) return ret_img
def cv2_imshow(img): img = img[:, :, [2, 1, 0]] img = Image.fromarray(img) plt.figure(figsize=(20, 20)) plt.imshow(img) plt.axis('off') plt.show()
def pad_and_bg(self,img): bg_img = np.asarray(Image.open(io.BytesIO( np.random.choice(self.bg_buffer)))) ret_img = img pad_top = int(abs(np.random.normal(0,self.pad_param))) pad_bottom = int(abs(np.random.normal(0,self.pad_param))) pad_left = int(abs(np.random.normal(0,self.pad_param))) pad_right = int(abs(np.random.normal(0,self.pad_param))) # trim_top = int(abs(np.random.normal(0,self.trim_param))) # trim_bottom = int(abs(np.random.normal(0,self.trim_param))) # trim_left = int(abs(np.random.normal(0,self.trim_param))) # trim_right = int(abs(np.random.normal(0,self.trim_param))) ret_img = cv2.copyMakeBorder( np.asarray(ret_img), pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT,value=(255,255,255)) #mask mask_img = np.zeros((img.size[0],img.size[1],3)) # mask_2 = Image.fromarray(mask_img.astype('uint8')).rotate(rotate_param,resample = Image.NEAREST,expand = True, fillcolor = (255,255,255)) mask_3 = cv2.copyMakeBorder( np.asarray(mask_img), pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT,value=(255,255,255)) ret_img = cv2.resize(ret_img, dsize=(img.size[0], img.size[1])) bg_img = cv2.resize(bg_img, dsize=(img.size[0], img.size[1])) mask_3 = cv2.resize(mask_3, dsize=(img.size[0], img.size[1])) mask_3 = mask_3/255 ret_img = ret_img*(1-mask_3) + bg_img*mask_3 ret_img = Image.fromarray(ret_img.astype('uint8')) return ret_img
def checkValidateCode(): img = Image.open('code.png').convert('L') ret, img = cv2.threshold(np.array(img), 125, 255, cv2.THRESH_BINARY) img = Image.fromarray(img.astype(np.uint8)) pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files/Tesseract-OCR/tesseract.exe' code = pytesseract.image_to_string(img) return code.strip()
def __getitem__(self, idx): image_path = os.path.join(self.images_path, self.images_name[idx]) image = Image.open(image_path) if len(image.getbands()) == 4: # Fix in png. im2arr = np.array(image) image = Image.fromarray(im2arr[:, :, :-1]) image = image.resize((215, 215), Image.BILINEAR) X = self.transform(image) return X
def extract_face(filename, required_size=(224, 224)): pixels = plt.imread(filename) results = detector.detect_faces(pixels) x1, y1, width, height = results[0]['box'] x2, y2 = x1 + width, y1 + height face = pixels[y1:y2, x1:x2] a = np.asarray(face) image = Image.fromarray(a) image = image.resize(required_size) face_array = np.array(image) return face_array
def to_image(self,data,index_axial=0,index_azimuthal=0,scale=None,absolute_scale=False): from PIL import Image a = float32(data[index_axial,index_azimuthal,:,:].reshape((data.shape[2],data.shape[3]))) if scale is None: a = 255.0*(a)/(a.max()+1e-12) else: if absolute_scale: a = scale*(a) else: a = scale*255.0*(a)/(a.max()+1e-12) im = Image.fromarray(a).convert("RGB") return im.rotate(90)
def create_image(rectangles_in): """ Appends all cut and transformed rectangles into one resulting image :param rectangles_in: list: list of cut and transformed rectangles :return: PIL.Image: resulting Image """ lines=[] for i in range(10): lines.append([]) lines[i]=Image.fromarray(np.hstack(tuple(rectangles_in[j*10+i] for j in range(10)))) new_image=append_images(lines) return new_image
def make_greyscale_white_bg(im, r, b, g): im = im.convert('RGBA') # Convert to RGBA data = np.array(im) # "data" is a height x width x 4 numpy array red, green, blue, alpha = data.T # Temporarily unpack the bands for readability # Replace grey with white... (leaves alpha values alone...) grey_areas = (red == r) & (blue == b) & (green == g) data[..., :-1][grey_areas.T] = (255, 255, 255) # Transpose back needed im2 = Image.fromarray(data) im2 = im2.convert('L') # convert to greyscale image return im2
def show_img(img_arr, label_arr, meta, index, label_fn=default_label_fn): """ Given a numpy array of image from CIFAR-10 labels this method transform the data so that PIL can read and show the image. Check here how CIFAR encodes the image http://www.cs.toronto.ed u/~kriz/cifar.html""" one_img = img_arr[index, :] # Assume image size is 32 x 32. First 1024 px is r, next # 1024 px is g, last 1024 px is b from the (r,g b) channel r = one_img[:1024].reshape(32, 32) g = one_img[1024:2048].reshape(32, 32) b = one_img[2048:].reshape(32, 32) rgb = np.dstack([r, g, b]) img = Image.fromarray(np.array(rgb), 'RGB') display(img) print(label_fn(index, meta[label_arr[index][0]].decode('utf-8')))
def make_gif_slices(self, vol, name='test', timestr=None, length=None): images = [] gifdir = 'figures/gif_dir/' if timestr is not None: if not os.path.isdir(gifdir + timestr): os.mkdir(gifdir + timestr) gifdir += timestr + '/' print('Saving to ', gifdir) if length is None: length = len(nbody.redshift_bins) for i in xrange(length): plooop = (cm.gist_earth( (vol[i, :, :] + 1) / 2) * 255).astype('uint8') images.append(Image.fromarray(plooop).resize((512, 512))) imageio.mimsave(gifdir + name + '.gif', images, fps=2)
def to_image(self, data, index_axial=0, index_azimuthal=0, scale=None, absolute_scale=False): from PIL import Image a = float32(data[index_axial, index_azimuthal, :, :].reshape( (data.shape[2], data.shape[3]))) if scale is None: a = 255.0 * (a) / (a.max() + 1e-12) else: if absolute_scale: a = scale * (a) else: a = scale * 255.0 * (a) / (a.max() + 1e-12) im = Image.fromarray(a).convert("RGB") return im.rotate(90)
def make_zslice_gif(self, model, timestr, epoch, zs=None, fps=2): fixed_z = torch.randn(1, 201, 1, 1, 1).float() zslices = np.zeros((len(nbody.redshift_bins), 64, 64)) images = [] iteration = nbody.redshift_bins if zs is not None: iteration = zs for i in xrange(len(iteration)): fixed_z[:, -1] = iteration[i] gen_samp = model(fixed_z).detach().numpy() ploop = (cm.gist_earth( (gen_samp[0][0][10, :, :] + 1) / 2) * 255).astype('uint8') images.append(Image.fromarray(ploop).resize((512, 512))) imageio.mimsave('figures/gif_dir/' + timestr + '/zslicegif_epoch_' + str(epoch) + '.gif', images, fps=fps)
def invert_colors(im): im = im.convert('RGBA') # Convert to RGBA data = np.array(im) # "data" is a height x width x 4 numpy array red, green, blue, alpha = data.T # Temporarily unpack the bands for readability black_areas = (red == 0) & (blue == 0) & (green == 0) data[..., :-1][black_areas.T] = (255, 0, 0) # Transpose back needed white_areas = (red == 255) & (blue == 255) & (green == 255) data[..., :-1][white_areas.T] = (0, 0, 0) # Transpose back needed red_areas = (red == 255) & (blue == 0) & (green == 0) data[..., :-1][red_areas.T] = (255, 255, 255) # Transpose back needed im2 = Image.fromarray(data) im2 = im2.convert('L') # convert to greyscale image return im2
def upload_PIL_s3(image, bucket, file_name): """ Takes numpy image array as input.Because numpy images cannot be uploaded directly. Must be converted into memory file within the memory. file_name here refers to the file name that ends with PNG or JPEG """ img = Image.fromarray(image) s3_client = boto3.client('s3') buffer = BytesIO() img.save(buffer, get_safe_ext(file_name)) buffer.seek(0) sent_data = s3_client.put_object(Bucket=bucket, Key=file_name, Body=buffer) if sent_data['ResponseMetadata']['HTTPStatusCode'] != 200: raise S3ImagesUploadFailed( 'Failed to upload image {} to bucket {}'.format(key, bucket))
def train(self,start_epoch, max_epoch, batch_size, viz_interval): max_step = sum([len(i) for i in self.pathlist]) // batch_size # permu_ind = list(range(len(self.pathlist))) step = 0 for epoch in range(start_epoch,max_epoch): # permu_ind = np.random.permutation(permu_ind) real_index = 0 for step_index in range(max_step): batch_img = np.zeros((batch_size,self.input_shape[0],self.input_shape[1],self.input_shape[2] )) batch_target = np.zeros((batch_size,7)) for batch_index in range(batch_size): tget = batch_index % self.numclass random_index = np.random.randint(self.train_data_count[tget]) img,target = self.gen_data(io.BytesIO(self.image_buffer[tget][random_index]),self.path_buffer[tget][random_index]) batch_img[batch_index] = img batch_target[batch_index] = target real_index = real_index+1 save_img = (batch_img[np.random.randint(batch_size)]+1)*127.5 save_img = Image.fromarray(save_img.astype('uint8')) save_img.save('temppp.png') # print(batch_target) train_loss = self.model.train_on_batch(batch_img,batch_target) with train_summary_writer.as_default(): tf.summary.scalar('loss', train_loss[0], step=step) tf.summary.scalar('accuracy', train_loss[1], step=step) step = step + 1 # train_summary_writer = tf.summary.create_file_writer(train_log_dir) print('\r epoch ' + str(epoch) + ' / ' + str(max_epoch) + ' ' + 'step ' + str(step_index) + ' / ' + str(max_step) + ' loss = ' + str(train_loss)) # if(step_index%viz_interval==0): # # rand_index = permu_ind[np.random.randint(0,len(permu_ind))] # # img,target = self.gen_data(io.BytesIO(self.image_buffer[rand_index]),self.path_buffer[rand_index]) # # img = np.expand_dims(img, axis=0) # # test_result = self.model.predict(img) # # # print((test_result),(target)) self.test() # # print(np.argmax(test_result),np.argmax(target)) os.makedirs('models',exist_ok=True) self.model.save('countscoreMobile_64_class7_tv6_transAll_rotate.h5')
def print_images(x_data, num_samples): count = 0 Path('/home/kayque/LENSLOAD/').parent os.chdir('/home/kayque/LENSLOAD/') PATH = os.getcwd() for xe in range(0, num_samples, 1): x_line = x_data[xe, :, :, :] print("x_line shape:", x_line.shape) #x_line = x_data[:][:][:][xe] rgb = toimage(x_line) rgb = np.array(rgb) im1 = Image.fromarray(rgb) #im1 = im1.resize((101,101), Image.ANTIALIAS) #cv2.resize(im1, (84, 84)) im1.save("img_Y_%s.png" % xe) count = count + 1 source = '/home/kayque/LENSLOAD/' print(' ** Does an old folder exists?') if os.path.exists(source + 'lens_yes_2'): print(' ** Yes, it does! Trying to delete... ') shutil.rmtree(source + 'lens_yes_2', ignore_errors=True) print(" ** Supposedly done. Checking if there's an RNCV folder...") os.mkdir('lens_yes_2') print(' ** Done!') else: print(" ** None found. Creating one.") os.mkdir('lens_yes_2') print(' ** Done!') dest1 = ('/home/kayque/LENSLOAD/lens_yes_2/') counter = 0 for bu in range(0, num_samples, 1): if os.path.exists(source + './img_Y_%s.png' % bu): shutil.move(source + './img_Y_%s.png' % bu, dest1) counter = counter + 1 print("\n ** Done. %s files moved." % counter)
def invert_colors(im): im = im.convert('RGBA') # Convert to RGBA data = np.array(im) # "data" is a height x width x 4 numpy array red, green, blue, alpha = data.T # Temporarily unpack the bands for readability # Replace black with red temporarily... (leaves alpha values alone...) black_areas = (red == 0) & (blue == 0) & (green == 0) data[..., :-1][black_areas.T] = (255, 0, 0) # Transpose back needed # Replace white areas with black white_areas = (red == 255) & (blue == 255) & (green == 255) data[..., :-1][white_areas.T] = (0, 0, 0) # Transpose back needed # Replace red areas (originally white) with black red_areas = (red == 255) & (blue == 0) & (green == 0) data[..., :-1][red_areas.T] = (255, 255, 255) # Transpose back needed im2 = Image.fromarray(data) im2 = im2.convert('L') # convert to greyscale image return im2
def extract_read_tags(model, prediction, image, height, width): tag_names = [] #may need to get move this logic to get_rekognition_annotation. for pred in prediction['CustomLabels']: if pred['Name'] == 'Tag': bounding_box = pred['Geometry']['BoundingBox'] left = width * bounding_box['Left'] top = height * bounding_box['Top'] width = width * bounding_box['Width'] height = height * bounding_box['Height'] img = Image.fromarray(image) canvas = copy.deepcopy(img) #in case making changes ruin the image #recognition cannot work with images less than 80 pixels width = max(width, 80) height = max(height, 80) cropped = canvas.crop( (left, top, left + width, top + height)) #crop the images display(cropped) tags = read_tags(model, cropped) tag_names.append(tags) else: pass #we don't really need multiple labels. tag = tag_names[0] return tag
def frame(self, idx, frame): # pretty expensive, only do every 10 seconds if idx % (self.in_fps * 1) != 0: return # Inference results = self.yolo(frame) predictions = results.pred[0] # PyTorch uses PIL Format # I wonder if I can skip some of these switches # You may need to convert the color. img_pil = np.ascontiguousarray( Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))) annotator = Annotator((img_pil)) for *box, confidence, cls in predictions: # ic(cls, confidence, results.names[int(cls)]) label = f"{results.names[int(cls)]} {confidence:.2f}" annotator.box_label(box, label, color=colors(cls)) # For reversing the operation: im_np = np.asarray(annotator.im) cv_helper.display_jupyter(im_np)
def showarray(a, fmt='jpeg'): a = np.uint8(np.clip(a, 0, 255)) f = io.BytesIO() display(Image.fromarray(a))
def predict_video(video_name, enc_model, clf_model): n_frames = get_video_frame_num(video_name) pred_trace = {'picked_f': [], 'total_frame': 0, 'picked_rate': 0.0, 'proc_stage': []} encdata = getMetadata(video_name) cvdata = getCVInfoFromLog(video_name) mv_file = os.path.join(MV_INPUT_FOLDER, video_name + '.pickle') with open(mv_file) as fh: mv_features = pickle.load(fh) w = encdata['metadata']['w'] h = encdata['metadata']['h'] selected_fids = [0] prev_fid = 0 stages = [-1] for fid in xrange(1, n_frames): frame_name = str(fid) + '.jpg' prev_frame_name = str(prev_fid) + '.jpg' # generate features #### single frame features enc = encdata[frame_name] x = {} if enc['type'] == 'P': x[ENC_TYPE] = 0 else: x[ENC_TYPE] = 1 cv = cvdata[frame_name] mv = mv_features[fid] ######################### x[IMG_WIDTH] = w x[IMG_HEIGHT] = h x[ENC_SIZE] = enc['size'] x[MV_SIZE] = mv[0] x[MV_MEAN] = mv[1] x[MV_MAX] = mv[2] x[MV_MIN] = mv[3] x[DIST_FROM_PFID] = fid - prev_fid # predict if we should pick this frame fst_layer_f = [x[ENC_TYPE], x[IMG_WIDTH], x[IMG_HEIGHT], x[ENC_SIZE], x[MV_SIZE], x[MV_MEAN], x[MV_MAX], x[MV_MIN], x[DIST_FROM_PFID]] pred_y = enc_model.predict(fst_layer_f) if int(pred_y) == 0: stages += [0] continue ### two-frame cur_img = cv2.imread(os.path.join('/home/t-yuche/frames', video_name, frame_name)) prev_img = cv2.imread(os.path.join('/home/t-yuche/frames', video_name, prev_frame_name)) if h * w > 320 * 240: cur_img = cv2.resize(cur_img, (320, 240)) prev_img = cv2.resize(prev_img, (320, 240)) ## framediff = getFrameDiff(prev_img, cur_img) framediff_prec = framediff/ (h * w * 1.0) ## pilcur_img = cv2.cvtColor(cur_img, cv2.COLOR_BGR2RGB) pilprev_img = cv2.cvtColor(prev_img, cv2.COLOR_BGR2RGB) pil_cur = Image.fromarray(pilcur_img) pil_prev = Image.fromarray(pilprev_img) phash_v = phash(pil_prev, pil_cur) ## hist_score = colorHistSim(prev_img, cur_img) ## sprev_img = cv2.resize(prev_img, (160,120)) scur_img = cv2.resize(cur_img, (160,120)) sift_score = getSIFTMatchingSim(sprev_img, scur_img) ## surf_score = getSURFMatchingSim(sprev_img, scur_img) ### x[SOBEL] = cv['sobel'][0] x[ILLU] = cv['illu'][0] x[FRAME_DIFF] = framediff_prec x[PHASH] = phash_v x[COLORHIST] = hist_score x[SIFTMATCH] = sift_score x[SURFMATCH] = surf_score #x = scale_feature(x, range_value) #p_label, dummy, dummy = svm_predict([1], [x], svm_model, '-q') snd_layer_f = [x[ENC_TYPE], x[IMG_WIDTH], x[IMG_HEIGHT], x[ENC_SIZE], x[MV_SIZE], x[MV_MEAN], x[MV_MAX], x[MV_MIN], x[SOBEL], x[ILLU], x[FRAME_DIFF], x[PHASH], x[COLORHIST], x[SIFTMATCH], x[SURFMATCH], x[DIST_FROM_PFID]] pred_y = clf_model.predict(snd_layer_f) #print p_label if int(pred_y) == 1: stages += [2] selected_fids += [fid] prev_fid = fid else: stages += [1] pred_trace['picked_f'] = selected_fids pred_trace['total_frame'] = n_frames pred_trace['picked_rate'] = len(selected_fids)/(n_frames * 1.0) pred_trace['proc_stage'] = stages #print video_name, selected_fids, '\n' , n_frames, pred_trace['picked_rate'] return pred_trace
def horizontal_plus_vertical_edge_detection(self,image) : horizontal = self.horizontal_edge_detection(image) vertical = self.vertical_edge_detection(image) rows = len(horizontal) columns = len(horizontal[0]) return_array = [] for i in range(rows) : return_array.append([]) for j in range(columns) : return_array[-1].append(np.uint8(int(horizontal[i,j]) + int(vertical[i,j]))) return np.array(return_array) image_processing.horizontal_plus_vertical_edge_detection = horizontal_plus_vertical_edge_detection impr = image_processing() im = Image.fromarray(impr.horizontal_plus_vertical_edge_detection(image)) display(im) # #gaussian blur # In[174]: def gaussian_filter(self,image) : kernel = [273,[[1,4 ,7 ,4 ,1], [4,16,26,16,4], [7,26,41,26,7], [4,16,26,16,4], [1,4 ,7 ,4 ,1]]] processed_image = self.convolution(image,kernel) return processed_image
def predict_video(video_name, enc_model, clf_model): n_frames = get_video_frame_num(video_name) pred_trace = { 'picked_f': [], 'total_frame': 0, 'picked_rate': 0.0, 'proc_stage': [] } encdata = getMetadata(video_name) cvdata = getCVInfoFromLog(video_name) mv_file = os.path.join(MV_INPUT_FOLDER, video_name + '.pickle') with open(mv_file) as fh: mv_features = pickle.load(fh) w = encdata['metadata']['w'] h = encdata['metadata']['h'] selected_fids = [0] prev_fid = 0 stages = [-1] for fid in xrange(1, n_frames): frame_name = str(fid) + '.jpg' prev_frame_name = str(prev_fid) + '.jpg' # generate features #### single frame features enc = encdata[frame_name] x = {} if enc['type'] == 'P': x[ENC_TYPE] = 0 else: x[ENC_TYPE] = 1 cv = cvdata[frame_name] mv = mv_features[fid] ######################### x[IMG_WIDTH] = w x[IMG_HEIGHT] = h x[ENC_SIZE] = enc['size'] x[MV_SIZE] = mv[0] x[MV_MEAN] = mv[1] x[MV_MAX] = mv[2] x[MV_MIN] = mv[3] x[DIST_FROM_PFID] = fid - prev_fid # predict if we should pick this frame fst_layer_f = [ x[ENC_TYPE], x[IMG_WIDTH], x[IMG_HEIGHT], x[ENC_SIZE], x[MV_SIZE], x[MV_MEAN], x[MV_MAX], x[MV_MIN], x[DIST_FROM_PFID] ] pred_y = enc_model.predict(fst_layer_f) if int(pred_y) == 0: stages += [0] continue ### two-frame cur_img = cv2.imread( os.path.join('/home/t-yuche/frames', video_name, frame_name)) prev_img = cv2.imread( os.path.join('/home/t-yuche/frames', video_name, prev_frame_name)) if h * w > 320 * 240: cur_img = cv2.resize(cur_img, (320, 240)) prev_img = cv2.resize(prev_img, (320, 240)) ## framediff = getFrameDiff(prev_img, cur_img) framediff_prec = framediff / (h * w * 1.0) ## pilcur_img = cv2.cvtColor(cur_img, cv2.COLOR_BGR2RGB) pilprev_img = cv2.cvtColor(prev_img, cv2.COLOR_BGR2RGB) pil_cur = Image.fromarray(pilcur_img) pil_prev = Image.fromarray(pilprev_img) phash_v = phash(pil_prev, pil_cur) ## hist_score = colorHistSim(prev_img, cur_img) ## sprev_img = cv2.resize(prev_img, (160, 120)) scur_img = cv2.resize(cur_img, (160, 120)) sift_score = getSIFTMatchingSim(sprev_img, scur_img) ## surf_score = getSURFMatchingSim(sprev_img, scur_img) ### x[SOBEL] = cv['sobel'][0] x[ILLU] = cv['illu'][0] x[FRAME_DIFF] = framediff_prec x[PHASH] = phash_v x[COLORHIST] = hist_score x[SIFTMATCH] = sift_score x[SURFMATCH] = surf_score #x = scale_feature(x, range_value) #p_label, dummy, dummy = svm_predict([1], [x], svm_model, '-q') snd_layer_f = [ x[ENC_TYPE], x[IMG_WIDTH], x[IMG_HEIGHT], x[ENC_SIZE], x[MV_SIZE], x[MV_MEAN], x[MV_MAX], x[MV_MIN], x[SOBEL], x[ILLU], x[FRAME_DIFF], x[PHASH], x[COLORHIST], x[SIFTMATCH], x[SURFMATCH], x[DIST_FROM_PFID] ] pred_y = clf_model.predict(snd_layer_f) #print p_label if int(pred_y) == 1: stages += [2] selected_fids += [fid] prev_fid = fid else: stages += [1] pred_trace['picked_f'] = selected_fids pred_trace['total_frame'] = n_frames pred_trace['picked_rate'] = len(selected_fids) / (n_frames * 1.0) pred_trace['proc_stage'] = stages #print video_name, selected_fids, '\n' , n_frames, pred_trace['picked_rate'] return pred_trace
box = cv2.boxPoints(rect) box = np.int0(box) #check for 90 degrees rotation if ((width / float(height)) > 1): # crop a particular rectangle from the source image cropped_image = cv2.getRectSubPix(carsample_mask, (width, height), centre) else: # crop a particular rectangle from the source image cropped_image = cv2.getRectSubPix(carsample_mask, (height, width), centre) # convert into grayscale cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY) # equalize the histogram cropped_image = cv2.equalizeHist(cropped_image) # resize to 260 cols and 63 rows. (Just something I have set as standard here) cropped_image = cv2.resize(cropped_image, (260, 180)) cropped_images.append(cropped_image) _ = plt.subplots_adjust(hspace=0.000) number_of_subplots = len(cropped_images) for i, v in enumerate(range(number_of_subplots)): v = v + 1 #ax1 = plt.subplot(number_of_subplots,1,v) showfig(cropped_images[i], plt.get_cmap('gray')) plt.show() for img in cropped_images: data = pytesseract.image_to_data(Image.fromarray(img), output_type='dict') print(data)
def remove_noise_and_smooth(file_name): img = cv2.imread(file_name, 0) filtered = cv2.adaptiveThreshold(img.astype(np.uint8), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 41, 3) kernel = np.ones((1, 1), np.uint8) opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel) closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel) img = image_smoothening(img) or_image = cv2.bitwise_or(img, closing) return or_image img = process_image_for_ocr(path) #this is ndArray # ndArray to image from matplotlib import cm img2 = Image.fromarray(np.uint8(cm.gist_earth(img)*255)) display(img2) # OCR import pytesseract print('Result without any preprocessing..') img = Image.open(path) print(type(img)) result = pytesseract.image_to_string(img) print(result) print('Result with #1 preprocessing...') print(type(img2)) result = pytesseract.image_to_string(img2,lang='eng') print(result) import time
img = cv2.imread('image.jpg') dst = [] pts1 = np.float32(areas[0]) pts2 = np.float32([[600, 300], [600, 0], [0, 0], [0, 300]]) M = cv2.getPerspectiveTransform(pts1, pts2) dst = cv2.warpPerspective(img, M, (600, 300)) cv2.imshow("dst", dst) cv2.waitKey(0) cv2.destroyAllWindows() import pyocr from PIL import Image tools = pyocr.get_available_tools() if len(tools) == 0: print("No OCR tool found") sys.exit(1) tool = tools[0] txt = tool.image_to_string(Image.fromarray(dst), lang="eng") print(txt) file = open('test.txt', 'w') file.write(txt) file.close()
def checkValidateCode(): img = Image.open('code.png').convert('L') ret, img = cv2.threshold(np.array(img), 125, 255, cv2.THRESH_BINARY) img = Image.fromarray(img.astype(np.uint8)) code = pytesseract.image_to_string(img) return code
from IPython.display import Image, display display(Image(filename="filippo.jpg")) import face_recognition my_photo = face_recognition.load_image_file("filippo.jpg") my_face_locations = face_recognition.face_locations(my_photo) my_face_locations top, right, bottom, left = my_face_locations[0] from PIL import Image, ImageDraw my_face = my_photo[top:bottom, left:right] my_face_img = Image.fromarray(my_face) display(my_face_img) my_face_encoding = face_recognition.face_encodings(my_photo)[0] from IPython.display import Image, display display(Image(filename="teamphoto.jpg")) group_photo = face_recognition.load_image_file("teamphoto.jpg") group_face_locations = face_recognition.face_locations(group_photo) print(len(group_face_locations)) counter = 0 for face_location in group_face_locations: top, right, bottom, left = face_location group_face_encoding = face_recognition.face_encodings(group_photo)[counter] result = face_recognition.compare_faces([my_face_encoding],
image = image.transpose(0, 1, 2) return image WINDOW_SIZE = 200 STRIDE = 64 K = 36 counter = 0 import matplotlib from PIL import Image fig, ax = plt.subplots(3, 2, figsize=(20, 25)) for i, img in enumerate(images): url = data_dir + img + '.tiff' image, _, best_regions = generate_patches(url, window_size=WINDOW_SIZE, stride=STRIDE, k=K) if np.sum(image) == None: continue glued_image = glue_images_one(tiles=best_regions, image_size=WINDOW_SIZE, n_tiles=K) #Rescale to 0-255 and convert to uint8 rescaled = (255.0 / glued_image.max() * (glued_image - glued_image.min())).astype(np.uint8) im = Image.fromarray(rescaled) im.save(f'./test/{img}.png') print(counter) counter += 1
def imresize(img, size): img = np.array(Image.fromarray(img).resize(size)) return img
bgfg= train_a['mask'] upd = bgfg.squeeze(0) # upd.size() bgfg_np = upd/2 # bgfg_np np_img = bgfg_np.cpu().numpy() # type(np_img) plt.imshow(np_img) np_img.shape from PIL import Image pil_img = Image.fromarray(np_img, mode='L') pil_img.save('n.png') mask_t = _samples[1]['pred_mask'] mask = mask_t.cpu() # type(bgfg_cpu) print("mask size is: ", mask.size()) from torchvision import transforms im = transforms.ToPILImage()(mask)#.convert("RGB") display(im) print(im) print(im.size) a=train[9] bgfg= _samples[1]['mask']