def extract_detections(frame, min_confidence=0.6, labels=None): """Extract detections from frame. frame: numpy array WxHx3 returns: numpy int array Cx5 [[label_id, xmin, ymin, xmax, ymax]] """ # Write code here # First, convert frame to float and resize to 300x300 w, h = np.shape(frame)[:2] frame = resize(frame.astype(float), (300, 300, 3)) # Then use preprocess_input, model.predict and bbox_util.detection_out # Use help(...) function to help x = preprocess_input(frame) x = np.expand_dims(x, axis=0) y = model.predict(x) results = bbox_util.detection_out(y)[0] # Select detections with confidence > min_confidence results = results[results[:, 1] > min_confidence] # If label set is known, use it if labels is not None: result_labels = results[:, 0].astype(np.int32) indeces = [ i for i, l in enumerate(result_labels) if VOC_CLASSES[l - 1] in labels ] results = results[indeces] else: results[:, 0] = -1 # Remove confidence column from result results = np.delete(results, 1, 1) # Resize detection coords to input image shape. # Didn't you forget to save it before resize? results[:, 1] *= h results[:, 2] *= w results[:, 3] *= h results[:, 4] *= w # Return result return detection_cast(results.astype(int))
def predict_result(model, file_path): # print("+++++++++++++++++++++++++++++++++") # print(file_path) global cnt img_path = file_path img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) # print(x) #print('Input image shape:', x.shape) #im = Image.open(file_path) #im.show() preds = model.predict(x) #print('Predicted:', decode_predictions(preds)) label = decode_predictions(preds)[0][0][1] if label == 'toilet_tissue': print('Predicted:', decode_predictions(preds)) print(file_path) cnt += 1 else: print("+++++++++++++++++++++++++++++++++++++++") print('Predicted:', decode_predictions(preds))
def model_predict(img_path, model): img = image.load_img(img_path, target_size=(150, 150)) # Preprocessing the image x = image.img_to_array(img) # x = np.true_divide(x, 255) # Scaling # x = x / 255 x = np.expand_dims(x, axis=0) # Be careful how your trained model deals with the input # otherwise, it won't make correct prediction! x = preprocess_input(x) preds = model.predict(x) # preds = np.argmax(preds, axis=0) if preds[0][0] == 0: preds = "This is Defective Insulator" else: preds = "This is Normal Insulator" return preds
def model_predict(img_path): img = image.load_img(img_path, target_size=(200, 200)) model = load_model('MobileNet_model.h5') # Preprocessing the image x = image.img_to_array(img) # x = np.true_divide(x, 255) ## Scaling x = x / 255 x = np.expand_dims(x, axis=0) # Be careful how your trained model deals with the input # otherwise, it won't make correct prediction! x = preprocess_input(x) preds = model.predict(x) preds = np.argmax(preds, axis=1) if preds == 0: preds = "The Person is not Infected With Pneumonia" else: preds = "The Person is Infected With Pneumonia" return preds
def model_predict(img_path, model): img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) x = x / 255 x = np.expand_dims(x, axis=0) x = preprocess_input(x) preds = model_predict(x) preds = np.argmax(preds, axis=1) if preds == 0: preds = "The Car is Audi !!!" elif preds == 1: preds = "The Car is Lamborghini !!!" else: preds = "The Car is Mercedes !!!" return preds
def model_predict(img_path, model): img = image.load_img(img_path, target_size=(224, 224)) # Preprocessing the image x = image.img_to_array(img) # x = np.true_divide(x, 255) ## Scaling x = x / 255 x = np.expand_dims(x, axis=0) # Be careful how your trained model deals with the input # otherwise, it won't make correct prediction! x = preprocess_input(x) preds = model.predict(x) preds = np.argmax(preds, axis=1) if preds == 0: preds = "Normal" else: preds = "Covid" return preds
def prepare_train_data(vgg_face): # Prepare Train Data x_train = [] y_train = [] person_rep = dict() person_folders = os.listdir('./faces/') for i, person in enumerate(person_folders): print("preparing train set for person: " + person) person_rep[i] = person image_names = os.listdir(r'.\faces\\' + person + '\\') for image_name in image_names: print("\tphoto: " + image_name) img = load_img('./faces/' + person + '/' + image_name, target_size=(224, 224)) img = img_to_array(img) img = np.expand_dims(img, axis=0) img = preprocess_input(img) img_encode = vgg_face(img) x_train.append(np.squeeze(K.eval(img_encode)).tolist()) y_train.append(i) return person_rep, np.array(x_train), np.array(y_train)
def model_predict(img_path, model): img = image.load_img(img_path, target_size=(224, 224)) # Preprocessing the image x = image.img_to_array(img) x = np.true_divide(x, 255) ## Scaling x=x/255 x = np.expand_dims(x, axis=0) x = preprocess_input(x) preds = model.predict(x) preds=np.argmax(preds, axis=1) if preds==0: preds="The Car IS Audi" elif preds==1: preds="The Car is Lamborghini" else: preds="The Car Is Mercedes" return preds
def get_integrated_gradients(model, img_input, top_pred_idx, baseline=None, num_steps=50): if baseline is None: baseline = np.zeros(img_size).astype(np.float32) else: baseline = baseline.astype(np.float32) # 1. Do interpolation. img_input = img_input.astype(np.float32) interpolated_image = [ baseline + (step / num_steps) * (img_input - baseline) for step in range(num_steps + 1) ] interpolated_image = np.array(interpolated_image).astype(np.float32) # 2. Preprocess the interpolated images # interpolated_image = xception.preprocess_input(interpolated_image) interpolated_image = preprocess_input(interpolated_image, mode="tf") # 3. Get the gradients grads = [] for i, img in enumerate(interpolated_image): img = tf.expand_dims(img, axis=0) grad = compute_gradients(model, img, top_pred_idx=top_pred_idx) grads.append(grad[0]) grads = tf.convert_to_tensor(grads, dtype=tf.float32) # 4. Approximate the integral using the trapezoidal rule grads = (grads[:-1] + grads[1:]) / 2.0 avg_grads = tf.reduce_mean(grads, axis=0) # 5. Calculate integrated gradients and return integrated_grads = (img_input - baseline) * avg_grads return integrated_grads
def generate(self, train=True): while True: if train: # 打乱 shuffle(self.train_lines) lines = self.train_lines else: shuffle(self.val_lines) lines = self.val_lines inputs = [] targets = [] for annotation_line in lines: img,y=self.get_random_data(annotation_line,self.image_size[0:2]) if len(y)!=0: boxes = np.array(y[:,:4],dtype=np.float32) boxes[:,0] = boxes[:,0]/self.image_size[1] boxes[:,1] = boxes[:,1]/self.image_size[0] boxes[:,2] = boxes[:,2]/self.image_size[1] boxes[:,3] = boxes[:,3]/self.image_size[0] one_hot_label = np.eye(self.num_classes)[np.array(y[:,4],np.int32)] if ((boxes[:,3]-boxes[:,1])<=0).any() and ((boxes[:,2]-boxes[:,0])<=0).any(): continue y = np.concatenate([boxes,one_hot_label],axis=-1) # print('y', tf.shape(y)) y = self.bbox_util.assign_boxes(y) # print('y', tf.shape(y)) inputs.append(img) targets.append(y) if len(targets) == self.batch_size: tmp_inp = np.array(inputs) tmp_targets = np.array(targets) inputs = [] targets = [] # print(tf.shape(preprocess_input(tmp_inp)), tf.shape(tmp_targets)) yield preprocess_input(tmp_inp), tmp_targets
def predict_with_gradient(dirs): ''' Classify image then create heatmap (gradient). Save combination of image, gradient and gradient on image. :param dirs: list of paths to all data items :return: nothing ''' # loop through all photos in dirs and predict for item in dirs: orig = cv2.imread(path + item) image = load_img(path + item) image = img_to_array(image) image = np.expand_dims(image, axis=0) image = imagenet_utils.preprocess_input(image) # use the network to make predictions preds = model.predict(image) i = np.argmax(preds[0]) label = "Gradient" # initialize our gradient class activation map and build the heatmap cam = GradCAM(model, i) heatmap = cam.compute_heatmap(image) # resize the resulting heatmap to the original input image dimensions # and then overlay heatmap on top of the image heatmap = cv2.resize(heatmap, (orig.shape[1], orig.shape[0])) (heatmap, output) = cam.overlay_heatmap(heatmap, orig, alpha=0.5) # draw the predicted label on the output image cv2.rectangle(output, (0, 0), (340, 40), (0, 0, 0), -1) cv2.putText(output, label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2) # display the original image and resulting heatmap and output image # to our screen output = np.vstack([orig, heatmap, output]) output = imutils.resize(output, height=700) # cv2.imshow("Output", output) cv2.waitKey(0) cv2.imwrite(my_path_results_pos + item, output)
def recognize(img, outputs, class_names, vgg_face): person_rep = dict() person_names = ["angelamerkel", "jinping", "trump"] for person in person_names: embed = np.loadtxt(person + ".txt") person_rep[person] = embed boxes, objectness, classes, nums = outputs boxes, objectness, classes, nums = boxes[0], objectness[0], classes[ 0], nums[0] wh = np.flip(img.shape[0:2]) for i in range(nums): x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32)) x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32)) if class_names[int(classes[i])] == "face": img_crop = img[x1y1[1]:x2y2[1], x1y1[0]:x2y2[0]] crop_img = img_to_array(img_crop) crop_img = np.expand_dims(crop_img, axis=0) crop_img = preprocess_input(crop_img) img_encode = vgg_face(transform_images(crop_img, 224)) embed = K.eval(img_encode) name, score = get_match(person_rep, embed, 0.3) img = cv2.rectangle(img, x1y1, x2y2, (205, 0, 0), 2) img = cv2.putText(img, '{} {:.4f}'.format(name, score), x1y1, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 255), 2) else: img = cv2.rectangle(img, x1y1, x2y2, (255, 0, 0), 2) img = cv2.putText( img, '{} {:.4f}'.format(class_names[int(classes[i])], objectness[i]), x1y1, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) return img
def main(): parser = argparser() args = parser.parse_args() image_path = args.image layer_name = args.layer_name feature_to_visualize = args.feature visualize_mode = args.mode model = vgg16.VGG16(weights='imagenet', include_top=True) layer_dict = dict([(layer.name, layer) for layer in model.layers]) if not layer_name in layer_dict: print('Wrong layer name') sys.exit() # Load data and preprocess img = Image.open(image_path) img = img.resize((224, 224)) img_array = np.array(img) img_array = img_array[np.newaxis, :] img_array = img_array.astype(np.float) img_array = imagenet_utils.preprocess_input(img_array) deconv = visualize(model, img_array, layer_name, feature_to_visualize, visualize_mode) # postprocess and save image deconv = deconv - deconv.min() deconv *= 1.0 / (deconv.max() + 1e-8) deconv = deconv[:, :, ::-1] uint8_deconv = (deconv * 255).astype(np.uint8) img = Image.fromarray(uint8_deconv, 'RGB') img.save('results/{}_{}_{}.png'.format(layer_name, feature_to_visualize, visualize_mode)) print('Saved: results/{}_{}_{}.png'.format(layer_name, feature_to_visualize, visualize_mode))
def get_model_in_blocks(self, model_function, include_top=True): # Load the vanilla model model = model_function(pretrained=True) # Instantiate consecutive blocks blocks = [] # Add preprocess_input blocks.append( Lambda( lambda x: preprocess_input(x, mode=self.preprocess_input_mode), name="preprocess_input")) # Discard the last pooling layer blocks += model.features.children[:-1] if include_top: # Add a GlobalAveragePooling2D layer blocks.append(GlobalAveragePooling2D()) # Add the dense layer with softmax blocks += [model.output1, Softmax()] return blocks
def recognize(self,draw): height,width,_ = np.shape(draw) draw_rgb = cv.cvtColor(draw,cv.COLOR_BGR2RGB) rectangles = self.mtcnn_model.detectFace(draw_rgb,self.threshold) if len(rectangles)==0: return rectangles = np.array(rectangles,dtype=np.int32) rectangles[:,0] = np.clip(rectangles[:,0],0,width) rectangles[:,1] = np.clip(rectangles[:,1],0,height) rectangles[:,2] = np.clip(rectangles[:,2],0,width) rectangles[:,3] = np.clip(rectangles[:,3],0,height) rectangles_temp = utils.rect2square(np.array(rectangles,dtype=np.int32)) rectangles_temp[:,0] = np.clip(rectangles_temp[:,0],0,width) rectangles_temp[:,1] = np.clip(rectangles_temp[:,1],0,height) rectangles_temp[:,2] = np.clip(rectangles_temp[:,2],0,width) rectangles_temp[:,3] = np.clip(rectangles_temp[:,3],0,height) for rectangle in rectangles_temp: # 获取landmark在小图中的坐标 landmark = (np.reshape(rectangle[5:15],(5,2)) - np.array([int(rectangle[0]),int(rectangle[1])]))/(rectangle[3]-rectangle[1])*160 # 截取图像 crop_img = draw_rgb[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] crop_img = cv.resize(crop_img,(self.Crop_HEIGHT,self.Crop_WIDTH)) # 对齐 new_img,_ = utils.Alignment_1(crop_img,landmark) # 归一化 new_img = preprocess_input(np.reshape(np.array(new_img,np.float64),[1,self.Crop_HEIGHT,self.Crop_WIDTH,3])) rectangles = rectangles[:,0:4] #draw = cv2.cvtColor(draw, cv2.COLOR_RGB2BGR) for (left, top, right, bottom) in rectangles: cv.rectangle(draw, (left, top), (right, bottom), (255, 0, 0), 2) return draw
def generate(self, train=True): while True: if train: shuffle(self.train_keys) keys = self.train_keys else: shuffle(self.val_keys) keys = self.val_keys inputs = [] targets = [] for key in keys: img_path = self.path_prefix + key img = imread(img_path).astype('float32') y = self.gt[key].copy() if train and self.do_crop: img, y = self.random_sized_crop(img, y) img = imresize(img, self.image_size).astype('float32') if train: shuffle(self.color_jitter) for jitter in self.color_jitter: img = jitter(img) if self.lighting_std: img = self.lighting(img) if self.hflip_prob > 0: img, y = self.horizontal_flip(img, y) if self.vflip_prob > 0: img, y = self.vertical_flip(img, y) y = self.bbox_util.assign_boxes(y) inputs.append(img) targets.append(y) if len(targets) == self.batch_size: tmp_inp = np.array(inputs) tmp_targets = np.array(targets) inputs = [] targets = [] yield preprocess_input(tmp_inp), tmp_targets
def load_paired_img_wrd(folder): class_names = [fold for fold in os.listdir(folder) if ".DS" not in fold] image_list = [] labels_list = [] paths_list = [] for cl in class_names: splits = cl.split("_") subfiles = [f for f in os.listdir(folder + "/" + cl) if ".DS" not in f] for subf in subfiles: full_path = os.path.join(folder, cl, subf) # 229 x 229 is the size resnet 50 uses for images img = image_utils.load_img(full_path, target_size=(229, 229)) x_raw = image_utils.img_to_array(img) x_expand = np.expand_dims( x_raw, axis=0) # add a column for the index of the image x = preprocess_input(x_expand) # normalize to [-1, 1] image_list.append(x) paths_list.append(full_path) img_data = np.array(image_list) img_data = np.rollaxis(img_data, 1, 0) img_data = img_data[0] return img_data, np.array(labels_list), paths_list
def generate(self): while True: #-----------------------------------# # 对训练集进行打乱 #-----------------------------------# shuffle_index = np.arange(len(self.imgs_path)) shuffle(shuffle_index) self.imgs_path = np.array(self.imgs_path, dtype=np.object)[shuffle_index] self.words = np.array(self.words, dtype=np.object)[shuffle_index] inputs = [] target0 = [] target1 = [] target2 = [] for i, image_path in enumerate(self.imgs_path): #-----------------------------------# # 打开图像,获取对应的标签 #-----------------------------------# img = Image.open(image_path) labels = self.words[i] annotations = np.zeros((0, 15)) for idx, label in enumerate(labels): annotation = np.zeros((1, 15)) #-----------------------------------# # bbox 真实框的位置 #-----------------------------------# annotation[0, 0] = label[0] # x1 annotation[0, 1] = label[1] # y1 annotation[0, 2] = label[0] + label[2] # x2 annotation[0, 3] = label[1] + label[3] # y2 #-----------------------------------# # landmarks 人脸关键点的位置 #-----------------------------------# annotation[0, 4] = label[4] # l0_x annotation[0, 5] = label[5] # l0_y annotation[0, 6] = label[7] # l1_x annotation[0, 7] = label[8] # l1_y annotation[0, 8] = label[10] # l2_x annotation[0, 9] = label[11] # l2_y annotation[0, 10] = label[13] # l3_x annotation[0, 11] = label[14] # l3_y annotation[0, 12] = label[16] # l4_x annotation[0, 13] = label[17] # l4_y if (annotation[0, 4]<0): annotation[0, 14] = -1 else: annotation[0, 14] = 1 annotations = np.append(annotations, annotation, axis=0) target = np.array(annotations) img, target = get_random_data(img, target, [self.img_size,self.img_size]) # 计算真实框对应的先验框,与这个先验框应当有的预测结果 assignment = self.bbox_util.assign_boxes(target) regression = assignment[:,:5] classification = assignment[:,5:8] landms = assignment[:,8:] inputs.append(img) target0.append(np.reshape(regression,[-1,5])) target1.append(np.reshape(classification,[-1,3])) target2.append(np.reshape(landms,[-1,10+1])) if len(target0) == self.batch_size: tmp_inp = np.array(inputs) yield preprocess_input(tmp_inp), np.array(target0,dtype=np.float32),np.array(target1,dtype=np.float32),np.array(target2,dtype=np.float32) inputs = [] target0 = [] target1 = [] target2 = []
def detect_image(self, image_id, image, bgr_img, write_down=False): # img_ori = image if write_down: self.detect_txtfile = open( "./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) crop_img, x_offset, y_offset = self.letterbox_image( image, (self.model_image_size[1], self.model_image_size[0])) photo = np.array(crop_img, dtype=np.float64) # Image preprocessing photo = preprocess_input( np.reshape( photo, [1, self.model_image_size[0], self.model_image_size[1], 3])) start = time.time() preds = self.ssd_model.predict(photo) end = time.time() ti = np.round((end - start) * 1000) print('Execution time: {} ms'.format(ti)) # Decode the predictions self.bbox_util = BBoxUtility( self.num_classes, overlap_threshold=self.IOU_thresh, nms_thresh=self.nms_thresh ) # (self, num_classes, priors=None, overlap_threshold=0.5, nms_thresh=0.3, top_k=400) results = self.bbox_util.detection_out( preds, background_label_id=0, keep_top_k=200, confidence_threshold=self.confidence) if len(results[0]) <= 0: if write_down: self.detect_txtfile.close() return bgr_img det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[ 0][:, 3], results[0][:, 4], results[0][:, 5] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) # Remove the gray column boxes = self.ssd_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) # img_ori = np.array(img_ori) # img_ori = cv2.cvtColor(bgr_img, cv2.COLOR_RGB2BGR) img_ori = bgr_img for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c) - 1] score = str(top_conf[i]) top, left, bottom, right = boxes[i] if write_down: self.detect_txtfile.write( "%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) self.plot_one_box( img_ori, [int(left), int(top), int(right), int(bottom)], label=predicted_class + ', {:.2f}%'.format(np.round(float(score) * 100, 2)), color=self.colors[int(c) - 1]) cv2.putText(bgr_img, 'FPS: {0} Execution time: {1}ms'.format( str(int(1000 / ti)), str(int(ti))), (20, 50), 0, 0.5, [20, 150, 255], thickness=1, lineType=cv2.LINE_AA) cv2.imshow('draw', img_ori) cv2.waitKey(self.gap_time) if write_down: self.detect_txtfile.close() return bgr_img
def preprocess_image(image_path): img = load_img(image_path, target_size=(224, 224)) img = img_to_array(img) img = np.expand_dims(img, axis=0) img = preprocess_input(img) return img
# load the pre-trained CNN from disk print("[INFO] loading model...") model = Model(weights="imagenet") # load the original image from disk (in OpenCV format) and then # resize the image to its target dimensions orig = cv2.imread(args["image"]) resized = cv2.resize(orig, (224, 224)) # load the input image from disk (in Keras/TensorFlow format) and # preprocess it image = load_img(args["image"], target_size=(224, 224)) image = img_to_array(image) image = np.expand_dims(image, axis=0) image = imagenet_utils.preprocess_input(image) # use the network to make predictions on the input imag and find # the class label index with the largest corresponding probability preds = model.predict(image) i = np.argmax(preds[0]) # decode the ImageNet predictions to obtain the human-readable label decoded = imagenet_utils.decode_predictions(preds) (imagenetID, label, prob) = decoded[0][0] label = "{}: {:.2f}%".format(label, prob * 100) print("[INFO] {}".format(label)) # initialize our gradient class activation map and build the heatmap cam = GradCAM(model, i) heatmap = cam.compute_heatmap(image)
def predict_quad(model, img, pixel_threshold=cfg.pixel_threshold, quiet=False, img_name=None): """ Args: model: 检测模型,要load_weights的 img: image 图片,文件类型 pixel_threshold: 阈值 quiet: img_name: 图片的名字 Returns: text_recs_all:一个列表,每个元素是检测边界的quad值 text_recs_len:text_recs_all的长度,一共检测到多少个区域 img_all: 一个四维数组,img_all[0] 是img_to_array的结果 """ if not os.path.exists(root_temp): os.makedirs(root_temp) if not os.path.exists(root_predict): os.makedirs(root_predict) # 获取计算后的图像长宽 d_wight, d_height = resize_image(img, cfg.max_predict_img_size) # 调整图像大小,便于预测 img = img.resize((d_wight, d_height), Image.BILINEAR).convert('RGB') img = image.img_to_array(img) num_img = 1 # 一个4维张量,也就是只有1个3维张量的4维张量 img_all = np.zeros((num_img, d_height, d_wight, 3)) img_all[0] = img # 将张量的数值大小调到【-1 1】 img_ori = imagenet_utils.preprocess_input(img, mode='tf') # suit tf tensor # 又整个一样的 x = np.zeros((num_img, d_height, d_wight, 3)) x[0] = img_ori # (sample, h, w, channels) y_pred = model.predict(x) text_recs_all = [] text_recs_len = [] for n in range(num_img): # (sample, rows, cols, 7_points_pred) y = y_pred[n] y[:, :, :3] = sigmoid(y[:, :, :3]) cond = np.greater_equal(y[:, :, 0], pixel_threshold) activation_pixels = np.where(cond) # fixme 返回元祖tuple类型 a[0]保存了纵坐标 a[1]保存横坐标 quad_scores, quad_after_nms = nms(y, activation_pixels) text_recs = [] x[n] = np.uint8(x[n]) with image.array_to_img(img_all[n]) as im: # Image.fromarray(x[n]) error ? im_array = x[n] # fixme 注意:拿去CRNN识别的是缩放后的图像 scale_ratio_w = 1 scale_ratio_h = 1 quad_im = im.copy() draw = ImageDraw.Draw(im) # 拷贝一个原图像,在拷贝的图像上有文字的地方画线 for i, j in zip(activation_pixels[0], activation_pixels[1]): px = (j + 0.5) * cfg.pixel_size py = (i + 0.5) * cfg.pixel_size line_width, line_color = 1, 'blue' if y[i, j, 1] >= cfg.side_vertex_pixel_threshold: if y[i, j, 2] < cfg.trunc_threshold: line_width, line_color = 2, 'yellow' elif y[i, j, 2] >= 1 - cfg.trunc_threshold: line_width, line_color = 2, 'green' draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)], width=line_width, fill=line_color) if not img_name is None: im.save(root_temp + img_name + '.jpg') quad_draw = ImageDraw.Draw(quad_im) for score, geo, s in zip(quad_scores, quad_after_nms, range(len(quad_scores))): if np.amin(score) > 0: quad_draw.line([tuple(geo[0]), tuple(geo[1]), tuple(geo[2]), tuple(geo[3]), tuple(geo[0])], width=2, fill='blue') if cfg.predict_cut_text_line: cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array, img_name, s) rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] text_rec = np.reshape(rescaled_geo, (8,)).tolist() text_recs.append(text_rec) elif not quiet: print('quad invalid with vertex num less then 4.') if not img_name is None: quad_im.save(root_predict + img_name + '.jpg' ) for t in range(len(text_recs)): text_recs_all.append(text_recs[t]) text_recs_len.append(len(text_recs)) return text_recs_all, text_recs_len, img_all
def predict(east_detect, img_path, pixel_threshold, quiet=False): img = image.load_img(img_path) d_wight, d_height = resize_image(img, cfg.max_predict_img_size) img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB') img = image.img_to_array(img) # 将张量的值 调到【-1 1】 img = imagenet_utils.preprocess_input(img,mode='tf') # 变成4维张量 x = np.expand_dims(img, axis=0) y = east_detect.predict(x) y = np.squeeze(y, axis=0) y[:, :, :3] = sigmoid(y[:, :, :3]) cond = np.greater_equal(y[:, :, 0], pixel_threshold) activation_pixels = np.where(cond) quad_scores, quad_after_nms = nms(y, activation_pixels) with Image.open(img_path) as im: im_array = image.img_to_array(im.convert('RGB')) d_wight, d_height = resize_image(im, cfg.max_predict_img_size) scale_ratio_w = d_wight / im.width scale_ratio_h = d_height / im.height im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB') quad_im = im.copy() draw = ImageDraw.Draw(im) for i, j in zip(activation_pixels[0], activation_pixels[1]): px = (j + 0.5) * cfg.pixel_size py = (i + 0.5) * cfg.pixel_size line_width, line_color = 1, 'red' if y[i, j, 1] >= cfg.side_vertex_pixel_threshold: if y[i, j, 2] < cfg.trunc_threshold: line_width, line_color = 2, 'yellow' elif y[i, j, 2] >= 1 - cfg.trunc_threshold: line_width, line_color = 2, 'green' draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)], width=line_width, fill=line_color) im.save(img_path + '_act.jpg') quad_draw = ImageDraw.Draw(quad_im) txt_items = [] for score, geo, s in zip(quad_scores, quad_after_nms, range(len(quad_scores))): if np.amin(score) > 0: quad_draw.line([tuple(geo[0]), tuple(geo[1]), tuple(geo[2]), tuple(geo[3]), tuple(geo[0])], width=2, fill='red') if cfg.predict_cut_text_line: cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array, img_path, s) rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] rescaled_geo_list = np.reshape(rescaled_geo, (8,)).tolist() txt_item = ','.join(map(str, rescaled_geo_list)) txt_items.append(txt_item + '\n') elif not quiet: print('quad invalid with vertex num less then 4.') quad_im.save(img_path + '_predict.jpg') if cfg.predict_write2txt and len(txt_items) > 0: with open(img_path[:-4] + '.txt', 'w') as f_txt: f_txt.writelines(txt_items)
def detect_image(self, image): old_image = image.copy() image = np.array(image, np.float32) im_height, im_width, _ = np.shape(image) scale = [im_width, im_height, im_width, im_height] scale_for_landmarks = [ im_width, im_height, im_width, im_height, im_width, im_height, im_width, im_height, im_width, im_height ] # 图片预处理,归一化 photo = np.expand_dims(preprocess_input(image), 0) anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() preds = self.get_pred(photo) preds = [pred.numpy() for pred in preds] # 将预测结果进行解码和非极大抑制 results = self.bbox_util.detection_out( preds, anchors, confidence_threshold=self.confidence) if len(results) <= 0: return old_image, 0, 0 results = np.array(results) results[:, :4] = results[:, :4] * scale results[:, 5:] = results[:, 5:] * scale_for_landmarks for b in results: text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) ##################### global cnt, t0, t1 t1 = time.time() image_clip = old_image # if t1 - t0 > 1: # t0 = t1 # image_clip = old_image[b[1]-20:b[3]+20, b[0]-20:b[2]+20] image_clip = old_image[b[1]:b[3], b[0]:b[2]] image_clip = cv2.cvtColor(image_clip, cv2.COLOR_RGB2BGR) # 保存剪切的图片 # cv2.imshow("clip", image_clip) # cv2.imwrite("savedImg/wang/" + str(t1) + ".png", image_clip) # cnt += 1 # print(cnt) ##################### cx = b[0] cy = b[1] + 12 cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4) return old_image, image_clip, len(results)
print(folder, z) pathn = path + folder os.chdir(pathn) filenames = os.listdir(pathn) for f in filenames: x = cv2.imread(f) x = cv2.resize(x, (224, 224), interpolation=cv2.INTER_CUBIC) if z == 1 or z == 2: D = d_aug(x) else: D = [x] # D=D[:3] for x in D: x = np.expand_dims(x, axis=0) x = preprocess_input(x) x = x / 255 img_data.append(x) if z == 2: labels.append(1) else: labels.append(z) img_data = np.array(img_data) img_data = img_data.astype('float32') print(img_data.shape) img_data = np.rollaxis(img_data, 1, 0) print(img_data.shape) img_data = img_data[0] print(img_data.shape) # labels = np.expand_dims(labels, axis=0)
pil_input_images = [] # Store resized versions of the images here. # We'll only load one image in this example. img_dir = './example_images' write_out_dir = './results' os.makedirs(write_out_dir, exist_ok=True) for image_path in glob.glob(os.path.join(img_dir, '*.jpg')): pil_image = image.load_img(image_path, target_size=(config.height, config.width)) pil_input_images.append(pil_image) img = np.array(pil_image) input_images.append(img) input_images = preprocess_input(np.array(input_images)) bboxes, scores, labels = prediction_model.predict(input_images) confidence_threshold = 0.75 for index in range(input_images.shape[0]): bbox = bboxes[index] confidence = scores[index] label = labels[index] print(bbox.shape) # confidence annotated_image = image_annotator(image=pil_input_images[index],
def upload_file(): #print('here') if request.method == 'POST': # check if the post request has the file part if 'file' not in request.files: flash('No file part') return redirect(request.url) file = request.files['file'] if file.filename == '': flash('No file selected for uploading') return redirect(request.url) if file and allowed_file(file.filename): global filename filename = secure_filename(file.filename) img_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(img_path) # Get Features of uploaded image # check if we alrey have this file (by name) in our database have_file = database_df[database_df.loc[:, 'Filename'] == filename] if (have_file.shape[0] > 0): # we have the features in the database img_features = have_file.image_features.tolist() else: img_data = image.load_img(img_path, target_size=(pwidth, pheight)) img_vector = image.img_to_array(img_data) img_vector = np.expand_dims(img_vector, axis=0) img_vector = preprocess_input( img_vector ) #Problem here, must be convention of keras to pass by reference? img_vector = imagenet_utils.preprocess_input(img_vector) img_features = network_model.predict(img_vector) #print(type(img_features)) # get 9 nearest neighbors # n_neighs = 9 # nn_index, neighbors,distance = nearest_neighbor_image_finder(img_features, n_neighs, database_df) dist, nn_index = neighs.kneighbors(img_features, return_distance=True) distance = dist.tolist()[0] # fix path to the database... neighbors = database_df.iloc[nn_index.tolist()[0]].copy() neighbors.loc[:, 'db_path'] = neighbors.loc[:, 'path'].astype( str).copy() #neighbors = neighbors_db # this line is where the "filtering" should occur if we add handles on website #image_name = os.path.join('images',filename) # print(f'saved: {img_path}') # print(f'<upload path>: {UPLOAD_FOLDER}') # print(f'image: {filename}') npath = neighbors['db_path'][0] # print(f'saved: {npath}') # print(f'<media path>: {MEDIA_FOLDER}') header_copy = 'your example:' return render_template('album.html', header_copy=header_copy, image_name=filename, neighbors=neighbors, dist=distance)
vgg_face = Model(inputs=model.layers[0].input, outputs=model.layers[-2].output) x_train = [] y_train = [] person_rep = dict() person_folders = os.listdir('Images_crop') for i, person in enumerate(person_folders): person_rep[i] = person image_names = os.listdir('Images_crop/' + person + '/') for image_name in image_names: img = load_img('Images_crop/' + person + '/' + image_name, target_size=(224, 224)) img = img_to_array(img) img = np.expand_dims(img, axis=0) img = preprocess_input(img) img_encode = vgg_face(img) x_train.append(np.squeeze(K.eval(img_encode)).tolist()) y_train.append(i) # Prepare Test Data x_test = [] y_test = [] test_image_names = os.listdir('Test_Images_crop/' + person + '/') for image_name in test_image_names: img = load_img('Test_Images_crop/' + person + '/' + image_name, target_size=(224, 224)) img = img_to_array(img) img = np.expand_dims(img, axis=0) img = preprocess_input(img) img_encode = vgg_face(img)
def get_FPS(self, image, test_interval): #-------------------------------------# # 转换成RGB图片,可以用于灰度图预测。 #-------------------------------------# image = image.convert("RGB") image_shape = np.array(np.shape(image)[0:2]) old_width, old_height = image_shape[1], image_shape[0] #---------------------------------------------------------# # 给原图像进行resize,resize到短边为600的大小上 #---------------------------------------------------------# width, height = get_new_img_size(old_width, old_height) image = image.resize([width,height], Image.BICUBIC) photo = np.array(image,dtype = np.float64) #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = preprocess_input(np.expand_dims(photo,0)) rpn_pred = self.model_rpn.predict(photo) #-----------------------------------------------------------# # 将建议框网络的预测结果进行解码 #-----------------------------------------------------------# base_feature_width, base_feature_height = self.get_img_output_length(width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # 在获得建议框和共享特征层后,将二者传入classifier中进行预测 #-------------------------------------------------------------# base_layer = rpn_pred[2] proposal_box = np.array(rpn_results)[:, :, 1:] temp_ROIs = np.zeros_like(proposal_box) temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]] classifier_pred = self.model_classifier.predict([base_layer, temp_ROIs]) #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier(classifier_pred, proposal_box, self.config, self.confidence) if len(results[0])>0: results = np.array(results[0]) boxes = results[:, :4] top_conf = results[:, 4] top_label_indices = results[:, 5] boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height t1 = time.time() for _ in range(test_interval): rpn_pred = self.model_rpn.predict(photo) #-----------------------------------------------------------# # 将建议框网络的预测结果进行解码 #-----------------------------------------------------------# base_feature_width, base_feature_height = self.get_img_output_length(width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # 在获得建议框和共享特征层后,将二者传入classifier中进行预测 #-------------------------------------------------------------# base_layer = rpn_pred[2] proposal_box = np.array(rpn_results)[:, :, 1:] temp_ROIs = np.zeros_like(proposal_box) temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]] classifier_pred = self.model_classifier.predict([base_layer, temp_ROIs]) #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier(classifier_pred, proposal_box, self.config, self.confidence) if len(results[0])>0: results = np.array(results[0]) boxes = results[:, :4] top_conf = results[:, 4] top_label_indices = results[:, 5] boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def prepare_input(file_path, keypoints, image_size=IMAGE_SIZE): img = tf.io.read_file(file_path) img = decode_img(img, image_size) img = preprocess_input(img, mode='torch') group_keypoints = tf.cast(tf.reshape(keypoints, shape=(n_keypoints, 2)), tf.float32) return img, group_keypoints