def show_tfms(learn, rows=3, cols=3, figsize=(8, 8)): xb, yb = learn.data.one_batch() rand_int = np.random.randint(len(xb)) rand_img = Image(xb[rand_int]) tfms = learn.data.train_ds.tfms for i in range(len(xb)): xb[i] = Image(xb[i]).apply_tfms(tfms).data cb_tfms = 0 for cb in learn.callback_fns: if hasattr(cb, 'keywords') and hasattr(get_fn(cb), 'on_batch_begin'): cb_fn = partial(get_fn(cb), **cb.keywords) try: fig = plt.subplots(rows, cols, figsize=figsize)[1].flatten() plt.suptitle(get_fn(cb).__name__, size=14) [Image(cb_fn(learn).on_batch_begin( xb, yb, True)['last_input'][0]).show(ax=ax) for i, ax in enumerate(fig)] plt.show() cb_tfms += 1 break except: plt.close('all') if cb_tfms == 0: if tfms is not None: t_ = [] for t in learn.data.train_ds.tfms: t_.append(get_fn(t).__name__) title = f"{str(t_)[1:-1]} transforms applied" else: title = f'No transform applied' rand_int = np.random.randint(len(xb)) fig = plt.subplots(rows, cols, figsize=figsize)[1].flatten() plt.suptitle(title, size=14) [rand_img.apply_tfms(tfms).show(ax=ax) for i, ax in enumerate(fig)] plt.show() return learn
def open_4_channel(fname): fname = str(fname) # strip extension before adding color if fname.endswith('.png'): fname = fname[:-4] colors = ['red', 'green', 'blue', 'yellow'] flags = cv2.IMREAD_GRAYSCALE img = [ cv2.imread(fname + '_' + color + '.png', flags).astype(np.float32) / 255 for color in colors ] x = np.stack(img, axis=-1) return Image(pil2tensor(x, np.float32).float()) ## DEBUGGING # def open_4_channel(fname): # fname = str(fname) # # strip extension before adding color # if fname.endswith('.png'): # fname = fname[:-4] # colors = ['red','green','blue','yellow'] # flags = cv2.IMREAD_GRAYSCALE # img = [] # for color in colors: # try: # im = cv2.imread(fname+'_'+color+'.png', flags).astype(np.float32)/255 # img.append(im) # except AttributeError: # print(f"color: {color}") # print(f"fname: {fname}") # print(f"flags: {flags}") # print(f"Error: OpenCV was unable to open: {fname+'_'+color+'.png'}") # x = np.stack(img, axis=-1) # return Image(pil2tensor(x, np.float32).float())
def run(self): with verrou: H, W, C = self.image.shape X, Y, w, h = self.coord X_1, X_2 = (max(0, X - int(w)), min(X + int(w), W)) Y_1, Y_2 = (max(0, Y - int(h)), min(Y + int(h), H)) img_cp = self.image[Y_1:Y_2, X_1:X_2].copy() img_cp1 = cv2.cvtColor(img_cp, cv2.COLOR_BGR2RGB) prediction = str( learn.predict(Image(pil2tensor(img_cp1, np.float32).div_(255)))[0] ).split(";") label = ( " ".join(prediction) # if "No_Beard" in prediction # else "Beard " + " ".join(prediction) ) label_list = label.split(" ") self.image = cv2.rectangle(self.image, (X, Y), (X + w, Y + h), (150, 0, 100), 2) for idx in range(1, len(label_list) + 1): cv2.putText( self.image, LABEL_MAP[label_list[idx - 1]], (X, Y - 14 * idx), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (80, 100, 50), 2, ) print("Label :", label)
def image_to_vec(url_img, hook, learner): ''' Function to convert image to vector ''' print("Convert image to vec") _ = learner.predict(Image(pil2tensor(url_img, np.float32).div_(255))) vect = hook.features[-1] return vect
def show_multi_img_tfms(learn, rows=3, cols=3, figsize=(8, 8)): xb, yb = learn.data.one_batch() tfms = learn.data.train_ds.tfms for i in range(len(xb)): xb[i] = Image(xb[i]).apply_tfms(tfms).data for cb in learn.callback_fns: try: cb_fn = partial(cb.func, **cb.keywords) [Image(cb_fn(learn).on_batch_begin( xb, yb, True)['last_input'][0]).show(ax=ax) for i, ax in enumerate( plt.subplots(rows, cols, figsize=figsize)[1].flatten())] plt.show() break except: plt.close('all') return learn
def open(self, fn): """ fn: path to the image file return: Image containing FloatTensor with values in [0;1] """ x = open_image(fn) x = pil2tensor(x, np.float32) x = torch.cat((x, x, x)) return Image(x/255)
def feed(self): def extractFaceCoords(img, cascade, tolerance): gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) face_coords = cascade.detectMultiScale(gray, 1.2, tolerance, minSize=(60, 60)) return face_coords cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() face_coords = extractFaceCoords(frame, self.cascade, self.tolerance) if face_coords is not None: for coords in face_coords: x, y, w, h = coords face_rgb = cv2.cvtColor(frame[y:y + h, x:x + h], cv2.COLOR_BGR2RGB) img_fastai = Image( pil2tensor(face_rgb, np.float32).div_(255)) prediction = self.learn.predict(img_fastai) if int(prediction[0]) == 1: result = self.Feature + ': True' else: result = self.Feature + ': False' p = prediction[2].tolist() prob = 'probability: ' + str(round(p[1], 3)) cv2.rectangle(img=frame, pt1=(x, y), pt2=(x + w, y + h), color=(255, 255, 0), thickness=1) # color in BGR cv2.putText(img=frame, text=prob, org=(x, y - 13), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.5, color=(0, 255, 0), thickness=1) cv2.putText(img=frame, text=result, org=(x, y - 26), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.5, color=(0, 255, 0), thickness=1) ret, jpeg = cv2.imencode('.jpg', frame) output = jpeg.tobytes() yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + output + b'\r\n\r\n')
def arr2image(arr, do_normalize=False, stats=imagenet_stats): """ Converts numpy array to fastai Image with 3 channels """ # Drop empty dimensions data = np.squeeze(arr) # Convert to tensor data = torch.from_numpy(data) # Stack 3 copies as new channels data = torch.stack([data, data, data]) # Normalize if necessary if do_normalize: nml = Normalize(stats[0], stats[1], inplace=True) nml(data) # return as image return Image(data)
ret = count % 5 if ret == 0: H, W, C = img.shape gray = cv2.cvtColor(img.copy(), cv2.COLOR_BGR2GRAY) Traffic_sign = Traffic.detectMultiScale(gray, scaleFactor=2, minNeighbors=5, minSize=(90, 90), maxSize=(120, 120)) # 1.05 if len(Traffic_sign) < 1: print("NOTHING FOUND") elif len(Traffic_sign) < 2: print("Found 1") X, Y, w, h = Traffic_sign[0] X_1, X_2 = (max(0, X - int(w)), min(X + int(w), W)) Y_1, Y_2 = (max(0, Y - int(h)), min(Y + int(h), H)) img_cp = img[Y_1:Y_2, X_1:X_2].copy() img_cp1 = cv2.cvtColor(img_cp, cv2.COLOR_BGR2RGB) prediction = str( learn.predict(Image(pil2tensor(img_cp1, np.float32).div_(255)))[0] ).split(";") img = cv2.rectangle(img, (X, Y), (X + w, Y + h), (150, 0, 100), 2) label = ( " ".join(prediction) # if "No_Beard" in prediction # else "Beard " + " ".join(prediction) ) label_list = label.split(" ") for idx in range(1, len(label_list) + 1): cv2.putText( img, LABEL_MAP[label_list[idx - 1]], (X, Y - 14 * idx), cv2.FONT_HERSHEY_SIMPLEX, 0.45,
def predict(self, image_path, threshold=0.1, nms_overlap=0.1, return_scores=True, visualize=False, resize=False): """ Predicts and displays the results of a trained model on a single image. ===================== =========================================== **Argument** **Description** --------------------- ------------------------------------------- image_path Required. Path to the image file to make the predictions on. --------------------- ------------------------------------------- thresh Optional float. The probabilty above which a detection will be considered valid. --------------------- ------------------------------------------- nms_overlap Optional float. The intersection over union threshold with other predicted bounding boxes, above which the box with the highest score will be considered a true positive. --------------------- ------------------------------------------- return_scores Optional boolean. Will return the probability scores of the bounding box predictions if True. --------------------- ------------------------------------------- visualize Optional boolean. Displays the image with predicted bounding boxes if True. --------------------- ------------------------------------------- resize Optional boolean. Resizes the image to the same size (chip_size parameter in prepare_data) that the model was trained on, before detecting objects. Note that if resize_to parameter was used in prepare_data, the image is resized to that size instead. By default, this parameter is false and the detections are run in a sliding window fashion by applying the model on cropped sections of the image (of the same size as the model was trained on). ===================== =========================================== :returns: 'List' of xmin, ymin, width, height of predicted bounding boxes on the given image """ if not HAS_OPENCV: raise Exception( "This function requires opencv 4.0.1.24. Install it using pip install opencv-python==4.0.1.24" ) if not HAS_PIL: raise Exception( "This function requires PIL. Please install it via pip or conda" ) if isinstance(image_path, str): image = cv2.imread(image_path) else: image = image_path orig_height, orig_width, _ = image.shape orig_frame = image.copy() if resize and self._data.resize_to is None\ and self._data.chip_size is not None: image = cv2.resize(image, (self._data.chip_size, self._data.chip_size)) if self._data.resize_to is not None: if isinstance(self._data.resize_to, tuple): image = cv2.resize(image, self._data.resize_to) else: image = cv2.resize( image, (self._data.resize_to, self._data.resize_to)) height, width, _ = image.shape if self._data.chip_size is not None: chips = _get_image_chips(image, self._data.chip_size) else: chips = [{ 'width': width, 'height': height, 'xmin': 0, 'ymin': 0, 'chip': image, 'predictions': [] }] valid_tfms = self._data.valid_ds.tfms self._data.valid_ds.tfms = [] include_pad_detections = False if len(chips) == 1: include_pad_detections = True for chip in chips: frame = Image( pil2tensor(PIL.Image.fromarray( cv2.cvtColor(chip['chip'], cv2.COLOR_BGR2RGB)), dtype=np.float32).div_(255)) self.learn.predicting = True bbox = self.learn.predict(frame, thresh=threshold, nms_overlap=nms_overlap, ret_scores=True, model=self)[0] if bbox: scores = bbox.scores bboxes, lbls = bbox._compute_boxes() bboxes.add_(1).mul_( torch.tensor([ chip['height'] / 2, chip['width'] / 2, chip['height'] / 2, chip['width'] / 2 ])).long() for index, bbox in enumerate(bboxes): if lbls is not None: label = lbls[index] else: label = 'Default' data = bb2hw(bbox) if include_pad_detections or not _exclude_detection( (data[0], data[1], data[2], data[3]), chip['width'], chip['height']): chip['predictions'].append({ 'xmin': data[0], 'ymin': data[1], 'width': data[2], 'height': data[3], 'score': float(scores[index]), 'label': label }) self._data.valid_ds.tfms = valid_tfms predictions, labels, scores = _get_transformed_predictions(chips) # Scale the predictions to original image and clip the predictions to image dims y_ratio = orig_height / height x_ratio = orig_width / width for index, prediction in enumerate(predictions): prediction[0] = prediction[0] * x_ratio prediction[1] = prediction[1] * y_ratio prediction[2] = prediction[2] * x_ratio prediction[3] = prediction[3] * y_ratio # Clip xmin if prediction[0] < 0: prediction[2] = prediction[2] + prediction[0] prediction[0] = 1 # Clip width when xmax greater than original width if prediction[0] + prediction[2] > orig_width: prediction[2] = (prediction[0] + prediction[2]) - orig_width # Clip ymin if prediction[1] < 0: prediction[3] = prediction[3] + prediction[1] prediction[1] = 1 # Clip height when ymax greater than original height if prediction[1] + prediction[3] > orig_height: prediction[3] = (prediction[1] + prediction[3]) - orig_height predictions[index] = [ prediction[0], prediction[1], prediction[2], prediction[3] ] if visualize: image = _draw_predictions(orig_frame, predictions, labels, color=(255, 0, 0), fontface=2, thickness=1) import matplotlib.pyplot as plt image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if getattr(self._data, "_is_coco", "") == True: figsize = (20, 20) else: figsize = (4, 4) fig, ax = plt.subplots(1, 1, figsize=figsize) ax.imshow(image) if return_scores: return predictions, labels, scores else: return predictions, labels
def reconstruct(self, t): return Image(t)
def np_to_image(abs: np.array, phase: np.array) -> Image: a = np.array([abs, phase, np.zeros(abs.shape)]) a = torch.Tensor(a) #ndarray -> Tensor return Image(a) # Tensor -> Image
for c in os.listdir('flowers/'+i): flower_files.append('flowers/'+i+'/'+c) first_half = flower_files[0:(int(len(flower_files)/2))] p = len(flower_files) second_half = flower_files[int(p/2):p] for c in range(int(p/2)): img1 = cv2.imread(first_half[c]) img2 = cv2.imread(second_half[c]) size = (400,400) img1 = cv2.resize(img1,size) img2 = cv2.resize(img2,size) img= np.concatenate((img1,img2),axis=1) img1_pred = cv2.cvtColor(img1,cv2.COLOR_BGR2RGB) img2_pred = cv2.cvtColor(img2,cv2.COLOR_BGR2RGB) pred_1 = str(learn.predict(Image(pil2tensor(img1_pred,np.float32).div_(255)))[0]) pred_2 = str(learn.predict(Image(pil2tensor(img2_pred,np.float32).div_(255)))[0]) print(pred_1,i) if str(pred_1) == str(i): src = cv2.imread('true.png', -1) img = overlay_transparent(img, src, 150, 100, (150,150)) else: src = cv2.imread('wrong.png', -1) img = overlay_transparent(img, src, 150, 100, (150,150)) if str(pred_2) == str(i): src = cv2.imread('true.png',-1) img = overlay_transparent(img, src, 530, 100, (150,150)) else: src = cv2.imread('wrong.png', -1) img = overlay_transparent(img, src, 530, 100, (150,150))
def detect_facial_attributes(input_path, output_path, save_video): path = Path(input_path) # Creating a databunch imagenet_stats = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) data = (ImageItemList.from_csv( path, csv_name="labels.csv").no_split().label_from_df( label_delim=" ").transform( None, size=128).databunch(no_check=True).normalize(imagenet_stats)) # Loading our model learn = create_cnn(data, models.resnet50, pretrained=False) learn.load("ff_stage-2-rn50") # Loading HAAR cascade face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_default.xml") cap = cv2.VideoCapture(0) if save_video: out = cv2.VideoWriter(output_path + "output.avi", -1, 20.0, (640, 480)) while True: # Capture frame-by-frame _, frame = cap.read() # Our operations on the frame come here gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Find faces using Haar cascade face_coord = face_cascade.detectMultiScale(gray, 1.1, 5, minSize=(30, 30)) ## Looping through each face for coords in face_coord: ## Finding co-ordinates of face X, Y, w, h = coords ## Finding frame size H, W, _ = frame.shape ## Computing larger face co-ordinates X_1, X_2 = (max(0, X - int(w * 0.35)), min(X + int(1.35 * w), W)) Y_1, Y_2 = (max(0, Y - int(0.35 * h)), min(Y + int(1.35 * h), H)) ## Cropping face and changing BGR To RGB img_cp = frame[Y_1:Y_2, X_1:X_2].copy() img_cp1 = cv2.cvtColor(img_cp, cv2.COLOR_BGR2RGB) ## Prediction of facial featues prediction = str( learn.predict(Image(pil2tensor( img_cp1, np.float32).div_(255)))[0]).split(";") label = (" ".join(prediction) if "Male" in prediction else "Female " + " ".join(prediction)) label = (" ".join(prediction) if "No_Beard" in prediction else "Beard " + " ".join(prediction)) ## Drawing facial boundaries cv2.rectangle( img=frame, pt1=(X, Y), pt2=(X + w, Y + h), color=(128, 128, 0), thickness=2, ) ## Drawing facial attributes identified label_list = label.split(" ") for idx in range(1, len(label_list) + 1): cv2.putText( frame, label_list[idx - 1], (X, Y - 14 * idx), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 128, 0), 2, ) # Display the resulting frame cv2.imshow("frame", frame) ## Save the resulting frame if save_video: out.write(frame) ## Escape keys if cv2.waitKey(1) & 0xFF == ord("q"): break # When everything done, release the capture cap.release() if save_video: out.release() cv2.destroyAllWindows()
def gen_image(embedding): with torch.no_grad(): _, img = gan_gen(embedding[None]) return Image(img[0].float().clamp(min=0, max=1))
def fastai_img(img): # for getting preds directly from a fastai model from fastai.vision.image import Image import torchvision.transforms as tfms img_tensor = tfms.ToTensor()(img) return Image(img_tensor)
def predict( self, image_path, threshold=0.5, nms_overlap=0.1, return_scores=False, visualize=False ): """ Runs prediction on a video and appends the output VMTI predictions in the metadata file. ===================== =========================================== **Argument** **Description** --------------------- ------------------------------------------- image_path Required. Path to the image file to make the predictions on. --------------------- ------------------------------------------- threshold Optional float. The probability above which a detection will be considered valid. --------------------- ------------------------------------------- nms_overlap Optional float. The intersection over union threshold with other predicted bounding boxes, above which the box with the highest score will be considered a true positive. --------------------- ------------------------------------------- return_scores Optional boolean. Will return the probability scores of the bounding box predictions if True. --------------------- ------------------------------------------- visualize Optional boolean. Displays the image with predicted bounding boxes if True. ===================== =========================================== :returns: 'List' of xmin, ymin, width, height of predicted bounding boxes on the given image """ if not HAS_OPENCV: raise Exception("This function requires opencv 4.0.1.24. Install it using pip install opencv-python==4.0.1.24") if isinstance(image_path, str): image = cv2.imread(image_path) else: image = image_path orig_height, orig_width, _ = image.shape orig_frame = image.copy() if self._data.resize_to is not None: if isinstance(self._data.resize_to, tuple): image = cv2.resize(image, self._data.resize_to) else: image = cv2.resize(image, (self._data.resize_to, self._data.resize_to)) height, width, _ = image.shape if self._data.chip_size is not None: chips = _get_image_chips(image, self._data.chip_size) else: chips = [{'width': width, 'height': height, 'xmin': 0, 'ymin': 0, 'chip': image, 'predictions': []}] valid_tfms = self._data.valid_ds.tfms self._data.valid_ds.tfms = [] for chip in chips: frame = Image(pil2tensor(PIL.Image.fromarray(cv2.cvtColor(chip['chip'], cv2.COLOR_BGR2RGB)), dtype=np.float32).div_(255)) bbox = self.learn.predict(frame, thresh=threshold, nms_overlap=nms_overlap, ret_scores=True, ssd=self)[0] if bbox: scores = bbox.scores bboxes, lbls = bbox._compute_boxes() bboxes.add_(1).mul_(torch.tensor([chip['height'] / 2, chip['width'] / 2, chip['height'] / 2, chip['width'] / 2])).long() for index, bbox in enumerate(bboxes): if lbls is not None: label = lbls[index] else: label = 'Default' data = bb2hw(bbox) if not _exclude_detection((data[0], data[1], data[2], data[3]), chip['width'], chip['height']): chip['predictions'].append({ 'xmin': data[0], 'ymin': data[1], 'width': data[2], 'height': data[3], 'score': float(scores[index]), 'label': label }) self._data.valid_ds.tfms = valid_tfms predictions, labels, scores = _get_transformed_predictions(chips) y_ratio = orig_height/height x_ratio = orig_width/width for index, prediction in enumerate(predictions): prediction[0] = prediction[0]*x_ratio prediction[1] = prediction[1]*y_ratio prediction[2] = prediction[2]*x_ratio prediction[3] = prediction[3]*y_ratio # Clip xmin if prediction[0] < 0: prediction[2] = prediction[2] + prediction[0] prediction[0] = 1 # Clip width when xmax greater than original width if prediction[0] + prediction[2] > orig_width: prediction[2] = (prediction[0] + prediction[2]) - orig_width # Clip ymin if prediction[1] < 0: prediction[3] = prediction[3] + prediction[1] prediction[1] = 1 # Clip height when ymax greater than original height if prediction[1] + prediction[3] > orig_height: prediction[3] = (prediction[1] + prediction[3]) - orig_height predictions[index] = [ prediction[0], prediction[1], prediction[2], prediction[3] ] if visualize: image = _draw_predictions(orig_frame, predictions, labels) import matplotlib.pyplot as plt plt.xticks([]) plt.yticks([]) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) plt.imshow(PIL.Image.fromarray(image)) if return_scores: return predictions, labels, scores else: return predictions, labels
def open(self, fn): return Image(torch.load(fn, map_location='cpu').type(torch.float))
index = i x, y, w, h = face_coords[index] return x, y, w, h cap = cv2.VideoCapture(0) while (True): ret, frame = cap.read() # H, W, D = frame.shape face_coords = extractFaceCoords(frame, fc, 1) if face_coords is not None: x, y, w, h = face_coords face_rgb = cv2.cvtColor(frame[y:y + h, x:x + h], cv2.COLOR_BGR2RGB) img_fastai = Image(pil2tensor(face_rgb, np.float32).div_(255)) prediction = str(learn.predict(img_fastai)) cv2.rectangle(img=frame, pt1=(x, y), pt2=(x + w, y + h), color=(255, 255, 255), thickness=1) cv2.putText(img=frame, text=prediction, org=(x, y - 13), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.5, color=(255, 255, 255)) cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'):