def masks(people,objects,im,im_num): maxArea=-1 maxAreaIndex=-1 for i in range(1,people.number+1): if(people.getarea(i)>maxArea): maxArea=people.getarea(i) maxAreaIndex=i print('max area {}'.format(maxAreaIndex)) wanted=[maxAreaIndex] wanted,wantedset=overlappeep(people,wanted,people) wantedobj,wantedobjset=overlappeep(objects,wanted,people) print('wanted people {}'.format(wanted)) mask=np.zeros(im.shape[:-1]) mask=maskadd(people,wantedset,mask) mask=maskadd(objects,wantedobjset,mask) m = np.zeros(im.shape) incomp=np.zeros(im.shape) for i in range(3): incomp[:,:,i]=np.where(mask==0,im[:,:,i],255) m[:,:,i]=np.where(mask==0,m[:,:,i],255) incomp = cv2.resize(incomp,(256,256)) m = cv2.resize(m,(256,256)) cv2.imwrite(os.path.join('/content/drive/My Drive/Masks','{}.png'.format(str(im_num))),m) cv2.imwrite(os.path.join('/content/drive/My Drive/Images','{}.png'.format(str(im_num))),incomp) cv2_imshow(incomp) cv2_imshow(m)
def image_refiner(gray): org_size = 22 img_size = 28 rows,cols = gray.shape if rows > cols: factor = org_size/rows rows = org_size cols = int(round(cols*factor)) else: factor = org_size/cols cols = org_size rows = int(round(rows*factor)) gray = cv2.resize(gray, (cols, rows)) cv2_imshow(gray) #get padding colsPadding = (int(math.ceil((img_size-cols)/2.0)),int(math.floor((img_size-cols)/2.0))) rowsPadding = (int(math.ceil((img_size-rows)/2.0)),int(math.floor((img_size-rows)/2.0))) #apply apdding gray = np.lib.pad(gray,(rowsPadding,colsPadding),'constant') cv2_imshow(gray) return gray
def show_images(img_array, denorm=True, deprcs=True): shape = img_array.shape img_array = img_array.reshape( (-1, shape[-4], shape[-3], shape[-2], shape[-1])) # convert 1 channel to 3 channels channels = img_array.shape[-1] resolution = img_array.shape[2] img_rows = img_array.shape[0] img_cols = img_array.shape[1] img = np.full([resolution * img_rows, resolution * img_cols, channels], 0.0) for r in range(img_rows): for c in range(img_cols): img[(resolution * r):(resolution * (r + 1)), (resolution * (c % 10)):(resolution * ((c % 10) + 1)), :] = img_array[r, c] if denorm: img = de_norm(img) if deprcs: img = deprocess(img) cv2_imshow(img)
def transform_image(imgFileName, height, width, show=False): image = cv2.imread(imgFileName) imageH, imageW, _ = image.shape if show: print(image.shape) cv2_imshow(image) if (width <= 0 or height <= 0): return image # Step 1: change (portrait/landscape) orientation of imageFile if necessary if ((width > height and imageW < imageH) or (width < height and imageW > imageH)): rotated_image = cv2.transpose(image) image = cv2.flip(rotated_image, flipCode=0) imageH, imageW, _ = image.shape if show: print(image.shape) cv2_imshow(image) # Step 2: find scale and crop the image scale = min(float(imageW) / width, float(imageH) / height) newWidth = int(width * scale) newHeight = int(height * scale) if (abs(scale - 1.0) > sys.float_info.epsilon): image = image[int((imageH - newHeight) / 2):int((imageH + newHeight) / 2), int((imageW - newWidth) / 2):int((imageW + newWidth) / 2)] imageH, imageW, _ = image.shape if show: print(image.shape) cv2_imshow(image) # Step 3: resize to width and height if (image.shape[0] != height or image.shape[1] != width): image = cv2.resize(image, (width, height), interpolation=cv2.INTER_AREA) imageH, imageW, _ = image.shape if show: print(image.shape) cv2_imshow(image) return image
def visualise_landmarks(test, shape, alpha): output = test.copy() overlay = test.copy() (i, j) = facial_landmarks_index["jaw"] #coordinates for only jaw points = shape[i:j] for k in range(1, len(points)): x = tuple(points[k - 1]) y = tuple(points[k]) cv2.line(overlay, x, y, (0, 0, 255), 2) #Draws the line between (x,y) coordinates cv2.addWeighted(overlay, alpha, output, 1 - alpha, 0, output) #Applying the transparent overlay return cv2_imshow(output) #Final output
def Threshold(plates): for i, plate in enumerate(plates): img = cv2.imread(plate) gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) cv2_imshow(gray) ret, thresh = cv2.threshold(gray, 50, 255, cv2.THRESH_BINARY) print('Threshold') cv2_imshow(thresh) threshMean = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 5, 10) print('Adaptive_Threshold') cv2_imshow(threshMean) threshGauss = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 51, 27) print('Thresh_Gauss') cv2_imshow(threshGauss) cv2.imwrite("/content/processed/plate{}.png".format(i), threshGauss) cv2.waitKey(0)
def doSeg(inputImgPath = defaultImgPath, sigma=0.1, k=300, min_size=1000): """ Run OpenCV's image segmentation on an image. @input: sigma: int: The sigma parameter, used to smooth image. k: int: The k parameter of the algorythm. min_size: int: The minimum size of segments. @output: None. """ # check file path validity if not os.path.isfile(inputImgPath): print(f"ERROR: bad input image path. (got '{inputImgPath}')") return # enable multi-threading to optimize performance cv2.setUseOptimized(True) cv2.setNumThreads(8) # create segmintor instance mySeg = cv2.ximgproc.segmentation.createGraphSegmentation(sigma=sigma, k=k, min_size=min_size) # load input image inputImg = cv2.imread(inputImgPath) # process input image using the segementor created and return a # numpy array contianing the cvoordinates of each segment region. segment = mySeg.processImage(inputImg) mask = segment.reshape(list(segment.shape) +[1]).repeat(3, axis=2) masked = np.ma.masked_array(inputImg, fill_value=0) for i in range(np.max(segment)): masked.mask = mask != i y, x = np.where(segment == i) x_i = min(y) y_i = max(y) x_f = min(x) y_f = max(x) imgUT = masked.filled()[x_i: y_i + 1 ,x_f : y_f + 1 ] # cv2.imwrite('segment_{num}.jpg'.format(num=i),dst) display(cv2_imshow(imgUT))
def mask(segmentation, target_image, effect_image, id, opacity): category_id = next(filter(lambda x: x['category_id'] == id, segments_info))['id'] effect_h, effect_w, _ = effect_image.shape print(effect_w, effect_h) target_h, target_w, _ = target_image.shape crop_effect = effect_image[0:target_h, 0:target_w] crop_effect = add_channel(crop_effect) prediction = segmentation.cpu().numpy() target_prediction = np.array( [[prediction[j][i] == category_id for i in range(len(prediction[j]))] for j in range(len(prediction))]) crop_effect[~target_prediction, :] = [0, 0, 0, 0] cv2_imshow(crop_effect) target_image = add_channel(target_image) cv2_imshow(crop_effect + target_image) dst = cv2.addWeighted(crop_effect, opacity, target_image, 1, 0) cv2_imshow(dst) cv2_imshow(target_image) cv2.imwrite("result.jpg", dst) cv2.imwrite("input.jpg", target_image) return dst
from PIL import Image from google.colab.patches import cv2_imshow import cv2 import numpy as np import matplotlib.pyplot as plt im = cv2.imread("citlogo.png", 1) #sharpening kernel = np.array([[0, -1, 0], [-1, 12, -1], [0, -1, 0]]) sharpened = cv2.filter2D(im, -1, kernel) #display sharpened images cv2_imshow(sharpened) #another way aw = cv2.addWeighted(im, 4, cv2.blur(im, (30, 30)), -4, 128) cv2_imshow(aw) #laplacian filter for sharpening new_image = cv2.Laplacian(im, cv2.CV_64F) plt.figure(figsize=(11, 6)) plt.subplot(131), plt.imshow(im, cmap='gray'), plt.title('Original') plt.xticks([]), plt.yticks([]) plt.subplot(132), plt.imshow(new_image, cmap='gray'), plt.title('Laplacian') plt.xticks([]), plt.yticks([]) plt.subplot(133), plt.imshow(im + new_image, cmap='gray'), plt.title('Resulting image') plt.xticks([]), plt.yticks([]) plt.show()
print('---'+str(modelo) +'---') print('Perda/Loss: ' + str(scores[0])) print('Acurácia: ' + str(scores[1])) modelos[modelo] = str(scores[1]) print('\n') #Ordenando em ordem decrescente os modelos com base no valor da acurácia modelos_ordenados = sorted(modelos.items(), key=operator.itemgetter(1), reverse=True) print(modelos_ordenados) modelos_ordenados[0] #Carregando o melhor modelo """## Testes com o modelo carregado""" imagem = cv2.imread('Material/testes/teste_gabriel.png') cv2_imshow(imagem) cascade_faces = 'Material/haarcascade_frontalface_default.xml' caminho_modelo = 'Material/' + str(modelos_ordenados[0][0]) face_detection = cv2.CascadeClassifier(cascade_faces) classificador_emocoes = load_model(caminho_modelo, compile=False) expressoes = ['Raiva', 'Nojo', 'Medo', 'Feliz', 'Triste', 'Surpreso', 'Neutro'] from tensorflow.keras.models import load_model from tensorflow.keras.preprocessing.image import img_to_array #Carrega o modelo face_detection = cv2.CascadeClassifier(cascade_faces) classificador_emocoes = load_model(caminho_modelo, compile=False) expressoes = ['Raiva', 'Nojo', 'Medo', 'Feliz', 'Triste', 'Surpreso', 'Neutro']
results = np.array(Q).mean(axis=0) i = np.argmax(preds) label = labels[i] # draw the activity on the output frame text = "activity: {}:".format(label) cv2.putText(output, text, (35, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 255, 0), 5) # check if the video writer is None if writer is None: # initialize our video writer fourcc = cv2.VideoWriter_fourcc(*"MJPG") writer = cv2.VideoWriter(output_vid, fourcc, 30, (W, H), True) # write the output frame to disk writer.write(output) # show the output image cv2_imshow(output) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break # release the file pointers print("[INFO] cleaning up...") # writer.release() vs.release()
if imag[i+1,j]==255: st[l].append([i+1,j]) if imag[i+1,j+1]==255: st[l].append([i+1,j+1]) p1 = list(st.keys()) f = list(range(50,200)) #set each component to a random intensity for the purpose of visualization i = 0 ra = random.sample(f,len(st)) for g in p1: for x in range(0,len(st[g])): imag[st[g][x][0], st[g][x][1]] = ra[i] i = i+1 imag= imag[1:-1,1:-1] return(imag,len(st)) #return the image and the number of labels(keys) #Run the following code and upload the image to be segmented. from google.colab import files uploaded = files.upload() in_img = Image.open([*uploaded][0]) #lets you upload the image start = time.time() out_img, n = ccGroup(in_img) print("Time taken:",time.time()-start,"seconds") in_img = np.array(in_img,dtype=float) cv2_imshow(in_img) cv2_imshow(out_img) print("Number of connected components",n)
# -*- coding: utf-8 -*- """Sharpening_image.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1kFBznAB525FXLt9ZepnnYPzUbLIxhmVT """ import cv2 from google.colab.patches import cv2_imshow img1 = cv2.imread('/content/reticulated_python.jpg', 1) filter = np.array([[-1, -1, -1], [-1, 10, -1], [-1, -1, -1]]) sharpen_img_1 = cv2.filter2D(img1, -1, filter) cv2_imshow(sharpen_img_1) img2 = cv2.imread('/content/rottweiler.jpg', 1) filter = np.array([[-1, -1, -1], [-1, 12, -1], [-1, -1, -1]]) sharpen_img_2 = cv2.filter2D(img2, -1, filter) cv2_imshow(sharpen_img_2)
def show_img(img): cv2_imshow(load_image(img))
The formula for this conversion is given in the 'additional considerations' section of the paper. """ min_disp = 1 / max_depth max_disp = 1 / min_depth scaled_disp = min_disp + (max_disp - min_disp) * disp depth = 1 / scaled_disp return scaled_disp, depth from google.colab.patches import cv2_imshow im = res[0].reshape(192,640) im = cv2.resize(im, (img.shape[1],img.shape[0])) #im = res[1].reshape(96,320) disp,depth = disp_to_depth(im) cv2_imshow(img) cv2_imshow((1.0/depth) * 32) #cv2_imshow(disp * 32) print(res[3]) !dot monodepth.dot -Tpng -omono.png import cv2 im = cv2.imread("mono.png") from google.colab.patches import cv2_imshow cv2_imshow(im) !cp monodepth2.onnx "/content/drive/My Drive" !cp "/content/drive/My Drive/monodepth2.onnx" . !cp "/content/drive/My Drive/cv2_cuda/cv2.cpython-36m-x86_64-linux-gnu.so" .
copy_image_from_bytes(darknet_image, img_resized.tobytes()) detections = detect_image(network, class_names, darknet_image) free_image(darknet_image) return detections, width_ratio, height_ratio image = cv2.imread("data/giraffe.jpg") detections, width_ratio, height_ratio = darknet_helper(image, width, height) for label, confidence, bbox in detections: left, top, right, bottom = bbox2points(bbox) left, top, right, bottom = int(left * width_ratio), int(top * height_ratio), int(right * width_ratio), int(bottom * height_ratio) cv2.rectangle(image, (left, top), (right, bottom), class_colors[label], 2) cv2.putText(image, "{} [{:.2f}]".format(label, float(confidence)), (left, top - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, class_colors[label], 2) cv2_imshow(image) def js_to_image(js_reply): """ Params: js_reply: JavaScript object containing image from webcam Returns: img: OpenCV BGR image """ # decode base64 image image_bytes = b64decode(js_reply.split(',')[1]) # convert bytes to numpy array jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8) # decode numpy array into OpenCV BGR image img = cv2.imdecode(jpg_as_np, flags=1)
def visualize_video(model, postprocessors): """ Unfinished but manages to label selected frames from a video & log to W&B Ideally would be able to produce an output video with labels """ model.eval() # COCO classes class_id_to_label = { 1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus', 7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light', 11: 'fire hydrant', 13: 'stop sign', 14: 'parking meter', 15: 'bench', 16: 'bird', 17: 'cat', 18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear', 24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella', 31: 'handbag', 32: 'tie', 33: 'suitcase', 34: 'frisbee', 35: 'skis', 36: 'snowboard', 37: 'sports ball', 38: 'kite', 39: 'baseball bat', 40: 'baseball glove', 41: 'skateboard', 42: 'surfboard', 43: 'tennis racket', 44: 'bottle', 46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon', 51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange', 56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza', 60: 'donut', 61: 'cake', 62: 'chair', 63: 'couch', 64: 'potted plant', 65: 'bed', 67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse', 75: 'remote', 76: 'keyboard', 77: 'cell phone', 78: 'microwave', 79: 'oven', 80: 'toaster', 81: 'sink', 82: 'refrigerator', 84: 'book', 85: 'clock', 86: 'vase', 87: 'scissors', 88: 'teddy bear', 89: 'hair drier', 90: 'toothbrush' } vid_frame_count = 0 max_vid_frames = 10 # Try reading the video # path = "../project-video.mp4" path = 'NEW YORK CITY 2019 - BUSY TIMES SQUARE! [4K]-WPvMlyr4H_Y.mp4' assert os.path.exists(path) video = cv2.VideoCapture(path) # output_video = cv2.VideoWriter assert video.isOpened() print(video) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) frames_per_second = video.get(cv2.CAP_PROP_FPS) num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) batch_size = 1 for frame_idx in range(10000): # print(frame_idx) ret, frame = video.read() if frame_idx % 30: # want to get further through video continue if not ret: break frames = [frame] inputs = torchvision.transforms.ToTensor()(frame).to(device) # inputs /= 255 # comes in as [0,255], convert to [0,1] # inputs = inputs.permute(0, 3, 1, 2) # from NHWC to NCHW # x = x.permute(2,0,1) # from HWC to CHW inputs = torchvision.transforms.Resize( (height // 4, width // 4))(inputs) # TODO normalize! normalize = torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) inputs = normalize(inputs) outputs = model([inputs]) orig_target_sizes = torch.stack([torch.tensor([height, width])], dim=0).to(device) results = postprocessors['bbox'](outputs, orig_target_sizes) result = results[0] scores = result['scores'] labels = result['labels'] boxes = result['boxes'].cpu() box_data = [] # each box for j in range(len(labels)): score = scores[j].item() label = labels[j].item() box = boxes[j] if score < 0.1: continue box_data_j = { "position": { "minX": box[0].item(), "maxX": box[2].item(), "minY": box[1].item(), "maxY": box[3].item() }, "class_id": label, "box_caption": "%s (%.3f)" % (class_id_to_label.get( label, f"unknown-label-{label}"), score), "domain": "pixel", "scores": { "score": score } } box_data.append(box_data_j) boxes = { "predictions": { "box_data": box_data, "class_labels": class_id_to_label }, #"ground_truth": {} # would be nice to have } # print("boxes:", boxes) if not args.no_wb: img = wandb.Image(frame, boxes=boxes) wandb.log({f"video{vid_frame_count}": img}) else: from google.colab.patches import cv2_imshow cv2_imshow(frame) # draw bboxes vid_frame_count += batch_size if vid_frame_count >= max_vid_frames: return
temp_photo = np.array(X[sample]) temp_faces = detector.detect_faces(temp_photo) faces = len(temp_faces) for i in range(faces): x = temp_faces[i]['box'][0] y = temp_faces[i]['box'][1] w = temp_faces[i]['box'][2] h = temp_faces[i]['box'][3] croppedFace = temp_photo[y:y + h, x:x + w, :] croppedFace = cv2.resize(croppedFace, (100, 100)) temp = detector.detect_faces(croppedFace) if (len(temp) != 0): x1, y1 = temp[0]['keypoints']['left_eye'] croppedL = croppedFace[y1 - sq:y1 + sq, x1 - sq:x1 + sq, :] x, y = temp[0]['keypoints']['right_eye'] croppedR = croppedFace[y - sq:y + sq, x - sq:x + sq, :] croppedL = croppedL.reshape((1, 32, 32, 3)) croppedR = croppedR.reshape((1, 32, 32, 3)) if (model.predict(croppedL) == 1): if (model.predict(croppedR) == 1): print('Closed') else: print('Open') else: print('Open') croppedFace = cv2.rectangle(croppedFace, (x - sq, y - sq), (x + sq, y + sq), (255, 0, 0), (1)) croppedFace = cv2.rectangle(croppedFace, (x1 - sq, y1 - sq), (x1 + sq, y1 + sq), (255, 0, 0), (1)) cv2_imshow(croppedFace)
def backtrack1(Bounds, Base): All_statfn = list() data = {} data['videos'] = [] for i in range(0, len(Bounds)): print(i) base2 = Base + str(Bounds[i][1]) + "/" files = natsorted(os.listdir(base2)) stat = list() back_track = int(Bounds[i][0][4] * 100) x = int(Bounds[i][0][0]) y = int(Bounds[i][0][1]) h = int(Bounds[i][0][2] / 2) w = int(Bounds[i][0][3] / 2) last_frame = int((str(files[len(files) - 1]).split(".")[0])) if back_track > last_frame: back_track = int(back_len / 10) if back_len <= last_frame: img0 = cv2.imread(base2 + str(back_track) + ".jpg")[y - h:y + h, x - w:x + w] img0 = cv2.cvtColor(img0, cv2.COLOR_BGR2GRAY) for idx in range(np.min((26750, 2 * back_track)), 90, -5): if idx % 10 == 0: img1 = cv2.imread(base2 + str(idx) + ".jpg")[y - h:y + h, x - w:x + w] img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) data['videos'].append({ 'name': videos[(Bounds[i][1]) - 1]['name'].split('.')[0], 'img0': str(back_len) + ".jpg", 'imgs1': [], 'stat': [] }) stat.append( measure.compare_ssim(img0, img1, multichannel=True, win_size=3)) for idx in range(0, len(stat) - 35): stat[idx] = np.max((stat[idx:idx + 35])) data['videos'][0]["stat"][idx] = np.max((stat[idx:idx + 35])) All_statfn.append(stat) Times = {} count = 0 for stat in All_statfn: Times[Bounds[count][1]] = 999 count += 1 count = 0 image_counter = 0 for stat in All_statfn: video = data['videos'][count] video_name = video['name'] stat = reject_outliers(stat) if np.max((stat)) > 0.5 and np.min((stat)) < 0.55: stat = savgol_filter(stat, 21, 1) nstat = (list(reversed(stat)) - min(stat)) / (max(stat) - min(stat)) Found = check_continual(nstat, 150) if Found: frame_image = video["img0"] print("video name:", video_name) print("frame_image:", frame_image) img = cv2.imread(Base + video_name + "/" + frame_image, cv2.IMREAD_UNCHANGED) scale_percent = 50 # percent of original size width = int(img.shape[1] * scale_percent / 100) height = int(img.shape[0] * scale_percent / 100) dim = (width, height) resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA) cv2_imshow(resized) if (np.where(np.array(nstat) >= 0.4))[0][0] != 0: Times[Bounds[count][1]] = np.min( (Times[Bounds[count][1]], np.min(((np.where(np.array(nstat) >= 0.5)[0][0]) * 5 / 30, Times[Bounds[count][1]])))) image_counter += 1 count += 1 return Times, All_statfn
from imutils.object_detection import non_max_suppression # no idea how this works import numpy as np import argparse import time import cv2 import imutils from google.colab.patches import cv2_imshow # change this to cv2.imshow if its not running on colab import tensorflow as tf from tensorflow.keras import layers image = cv2.imread("Akshat_white.jpg") orig = image.copy() orig = cv2.cvtColor(orig , cv2.COLOR_BGR2GRAY) cv2_imshow(orig) # First step is to find contours, use canny edge detection image = cv2.cvtColor(image , cv2.COLOR_BGR2GRAY) high_thresh, thresh_im = cv2.threshold(orig, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # Before canny edge do thresholding then print(high_thresh , thresh_im) image = cv2.Canny(image ,50, 136) cv2_imshow(image) image_copy = image.copy() edges = cv2.findContours(image_copy, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) edges = imutils.grab_contours(edges) edges = sorted(edges, key = cv2.contourArea, reverse = True)[0]
oy = 0 #定義旋轉中心 srci, srcj = np.shape(image[:,:,0]) #原圖片尺寸 corx = [0,srci,srci,0] cory = [0,0,srcj,srcj] cor_newx=[] cor_newy=[] #計算新圖片四個角落尺寸(注意是source --> dst,所以角度要帶負號) for x,y in zip(corx,cory): newx , newy = back_revol(x,y,-angle, ox, oy) cor_newx.append(newx) cor_newy.append(newy) #產生新圖片 newi = int(max(cor_newx) - min(cor_newx))+1 #新圖片尺寸 newj = int(max(cor_newy) - min(cor_newy))+1 new_img = np.zeros((newi,newj,3)) #pad原圖,供之後雙三次內插用 pad_img = pad_cubic(image) #若ix,iy(dst-->source)落在pad_image範圍內,則使用雙三次內插,將值賦予新圖片 for k in range(3): for j in range(newj): for i in range(newi): ix,iy = back_revol(i+min(cor_newx),j+min(cor_newy),angle,ox,oy) #backfoward mapping if (0<=ix) & (ix<srci) & (0<=iy) & (iy<srci): new_img[i,j,k] = Bicubic_convol(pad_img[:,:,k],ix,iy) cv2_imshow(new_img)
args = vars(ap.parse_args())''' # Arguments dir_path = '/content/drive/My Drive/cityscapes/results/video_results/' ext = 'png' output = dir_path + 'output.mp4' images = [] for f in os.listdir(dir_path): if f.endswith(ext): images.append(f) # Determine the width and height from the first image image_path = os.path.join(dir_path, images[0]) frame = cv2.imread(image_path) cv2_imshow(frame) height, width, channels = frame.shape # Define the codec and create VideoWriter object fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Be sure to use lower case out = cv2.VideoWriter(output, fourcc, 20.0, (width, height)) for image in images: image_path = os.path.join(dir_path, image) frame = cv2.imread(image_path) out.write(frame) # Write out frame to video '''cv2.imshow('video',frame) if (cv2.waitKey(1) & 0xFF) == ord('q'): # Hit `q` to exit
https://colab.research.google.com/drive/11RMvMejR-cCDb7YfxH3YAmGc50a6tlb2 Import libraries """ from google.colab.patches import cv2_imshow import cv2 cap = cv2.VideoCapture( 'cars.mp4') car_cascade = cv2.CascadeClassifier('cars.xml') while True: # reads frames from a video ret, frames = cap.read() # convert to gray scale of each frames gray = cv2.cvtColor(frames, cv2.COLOR_BGR2GRAY) # Detects cars of different sizes in the input image cars = car_cascade.detectMultiScale( gray, 1.1, 1) # To draw a rectangle in each cars for (x,y,w,h) in cars: cv2.rectangle(frames,(x,y),(x+w,y+h),(0,0,255),2) # Display frames in a window cv2_imshow(frames) # Wait for Enter key to stop if cv2.waitKey(33) == 27: break cv2.destroyAllWindows()
'vmap_img_v': vmap_img_v, 'vmap_v': vmap_v, }) main() """## Sample code to test """ # load the image on 640 * 420 url = 'https://images.unsplash.com/photo-1456132022829-e771cbd1f7da?ixid=MXwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHw%3D&ixlib=rb-1.2.1&auto=format&fit=crop&w=1498&q=80' img = url2img(url) cv2_imshow(img) # create the accordingly vmaps , scale between 0 to 1 indicate how much to scale the image # take approximately 50 seconds for both horizontally and vertically t = time.time() vmap_img_h, vmap_h = create_vmap_h(img, 0.8) # vmap_img_v, vmap_v = create_vmap_v(img, 0.6) print('Finished create the vmaps after {} seconds'.format(time.time() - t)) # scale the image # pass scale , vmap_img, vmap, true for horizontal and false for vertically scale_img = scale_img_(img, 1.5, vmap_img_h, vmap_h, True)
# Commented out IPython magic to ensure Python compatibility. # %cd /content/drive/My Drive/STAT6000/ResearchProject/train2 #if your dataset is in COCO format, this cell can be replaced by the following three lines: from detectron2.data.datasets import register_coco_instances register_coco_instances("pods_train", {}, "instances_default.json", "/content/drive/My Drive/STAT6000/ResearchProject/train2") register_coco_instances("pods_test", {}, "instances_default2.json", "/content/drive/My Drive/STAT6000/ResearchProject/train2") pods_metadata = MetadataCatalog.get("pods_train") dataset_dicts = DatasetCatalog.get("pods_train") for d in random.sample(dataset_dicts, 3): img = cv2.imread(d["file_name"]) visualizer = Visualizer(img[:, :, ::-1], metadata=pods_metadata, scale=0.5) out = visualizer.draw_dataset_dict(d) cv2_imshow(out.get_image()[:, :, ::-1]) from detectron2.engine import DefaultTrainer cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.DATASETS.TRAIN = ("pods_train",) cfg.DATASETS.TEST = () cfg.DATALOADER.NUM_WORKERS = 2 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.BASE_LR = 0.0025 cfg.SOLVER.MAX_ITER = 300 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2
model.fit(X_train, Y_train, batch_size=10, epochs=2) model.save_weights('content/My Drive/Sem6/DL/A3/model_q2.hdf5') model.load_weights('content/My Drive/Sem6/DL/A3/model_q2.hdf5') input_folder = "content/My Drive/Sem6/DL/A3/rgb_sample" # input_folder = "" X_test = np.load(input_folder + "/x_test.npy") Y_test = np.load(input_folder + "/y_test.npy") print(X_test.shape) print(Y_test.shape) X_test = X_test/255 # Y_test = Y_test[:,:,:,0:1] Y_test = Y_test/255 model.evaluate(X_test, Y_test, verbose=1) preds_val = model.predict(X_test, verbose=1) print(preds_val.shape) x=preds_val[100][:,:,0]*255 gd=Y_test[100]*255 print(x.shape) from google.colab.patches import cv2_imshow cv2_imshow(x) cv2_imshow(gd)
# -*- coding: utf-8 -*- """OCR_Bangalore_Electricity_Bill.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/github/srinivasanibmbangalore/Deep-Learning2/blob/master/OCR_Bangalore_Electricity_Bill.ipynb """ ! apt install tesseract-ocr ! apt install libtesseract-dev !pip install pytesseract import cv2 import numpy as np from google.colab.patches import cv2_imshow from PIL import Image, ImageEnhance, ImageFilter !curl -o bill.jpg https://www.aamindia.com/wp-content/uploads/2014/02/Bescom-Bill-Payment-Find-Account-ID.jpg?x97180 img = cv2.imread('bill.jpg', cv2.IMREAD_UNCHANGED) type(img) cv2_imshow(img) text = pytesseract.image_to_string(img, lang='eng') print (text)
output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2 output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2 output[:,1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0]) output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1]) prediction[0][0][] ######----------------------Image Printing--------------######################## tim=list(map(lambda x: writeim(x, loaded_ims), output)) det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format('det',x.split("/")[-1])) list(map(cv2.imwrite, det_names, loaded_ims)) from google.colab.patches import cv2_imshow cv2_imshow(tim[2]) ###################---------------------------------------VIDEO DETETCION---------------------------------------------######### batch_size =1 confidence =0.5 nms_thesh = 0.4 start = 0 CUDA = torch.cuda.is_available() cap = cv2.VideoCapture('UHD Ultra HD 4K Video Stock Footage Dubai Busy City Street Highway Freeway Traffic Rush Hour Day.mp4') print("Loading network.....") model = Darknet('yolov3.cfg') model.load_weights('yolov3.weights') print("Network successfully loaded") classes = load_classes("coco.names") model.net_info["height"] = 416
# Apply hough transform on the image circles = cv2.HoughCircles(img_blur, cv2.HOUGH_GRADIENT, 1, img.shape[0]/64, param1=200, param2=10, minRadius=10, maxRadius=20) # Draw detected circles if circles is not None: circles = np.uint16(np.around(circles)) for i in circles[0, :]: # Draw outer circle cv2.circle(img, (i[0], i[1]), i[2], (0, 0, 0), 10) #crop_img = img[i[1]:(i[1]+2*i[2]), i[0]:(i[0]+2*i[2])] crop_img = img[i[1] - i[2]:i[1] + i[2], i[0] - i[2]:i[0] + i[2], :] #img[i[1] - 9:i[1] + i[2] - 4, i[0] - 9:i[0] + i[2] - 9, :] cropped.append(np.array(crop_img)) cv2_imshow(cropped[1]) read = cv2.cvtColor(cropped[0], cv2.COLOR_BGR2GRAY) # print(read.shape) dim = (28, 28) resized = cv2.resize(read, dim) # print(resized.shape) imge = np.resize(resized, (28, 28, 1)) arr = np.array(imge) im2arr = arr.reshape(1, 28, 28, 1) y_pred = model.predict_classes(im2arr) print(y_pred) plt.imshow(digit, cmap='gray') plt.show() print("\n\nFinal Output: {}".format(np.argmax(prediction)))
def borderless(table, image, res_cells): cells = [] x_lines = [] y_lines = [] table[0],table[1],table[2],table[3] = table[0]-15,table[1]-15,table[2]+15,table[3]+15 for cell in res_cells: if cell[0]>table[0]-50 and cell[1]>table[1]-50 and cell[2]<table[2]+50 and cell[3]<table[3]+50: cells.append(cell) # print(cell) cells = sorted(cells,key=lambda x: x[3]) row = [] last = -1111 row.append(table[1]) y_lines.append([table[0],table[1],table[2],table[1]]) temp = -1111 prev = None im2 = image.copy() for i, cell in enumerate(cells): if i == 0: last = cell[1] temp = cell[3] elif (cell[1]<last+15 and cell[1]>last-15) or (cell[3]<temp+15 and cell[3]>temp-15): if cell[3]>temp: temp = cell[3] else: last = cell[1] if last > temp: row.append((last+temp)//2) if prev is not None: if ((last+temp)//2) < prev + 10 or ((last+temp)//2) < prev - 10: row.pop() prev = (last+temp)//2 temp = cell[3] row.append(table[3]+50) i=1 rows = [] for r in range(len(row)): rows.append([]) final_rows = rows maxr = -111 # print(len(row)) for cell in cells: if cell[3]<row[i]: rows[i-1].append(cell) else: i+=1 rows[i-1].append(cell) # print(row) for n,r1 in enumerate(rows): if n==len(rows): r1 = r1[:-1] # print(r1) r1 = sorted(r1,key=lambda x:x[0]) prevr = None for no,r in enumerate(r1): if prevr is not None: # print(r[0],prevr[0]) if (r[0]<=prevr[0]+5 and r[0]>=prevr[0]-5) or (r[2]<=prevr[2]+5 and r[2]>=prevr[2]-5): if r[4]<prevr[4]: r1.pop(no) else: r1.pop(no-1) prevr = r # print(len(r1)) final_rows[n] = r1 lasty = [] for x in range(len(final_rows)): lasty.append([99999999,0]) prev = None for n,r1 in enumerate(final_rows): for r in r1: if prev is None: prev = r else: if r[1]<prev[3]: continue if r[1]<lasty[n][0]: lasty[n][0] = r[1] if r[3]>lasty[n][1]: lasty[n][1] = r[3] # print("last y:",lasty) row = [] row.append(table[1]) prev = None pr = None for x in range(len(lasty)-1): if x==0 and prev==None: prev = lasty[x] else: if pr is not None: if abs(((lasty[x][0]+prev[1])//2)-pr)<=10: row.pop() row.append((lasty[x][0]+prev[1])//2) else: row.append((lasty[x][0]+prev[1])//2) else: row.append((lasty[x][0]+prev[1])//2) pr = (lasty[x][0]+prev[1])//2 prev = lasty[x] row.append(table[3]) maxr = 0 for r2 in final_rows: print(r2) if len(r2)>maxr: maxr = len(r2) lastx = [] for n in range(maxr): lastx.append([999999999,0]) for r2 in final_rows: if len(r2)==maxr: for n,col in enumerate(r2): # print(col) if col[2]>lastx[n][1]: lastx[n][1] = col[2] if col[0]<lastx[n][0]: lastx[n][0] = col[0] print(lastx) for r2 in final_rows: if len(r2)!=0: r=0 for n,col in enumerate(r2): while r!=len(r2)-1 and (lastx[n][0]>r2[r][0]): r +=1 if n != 0: if r2[r-1][0] > lastx[n-1][1]: if r2[r-1][0]<lastx[n][0]: lastx[n][0] = r2[r-1][0] for r2 in final_rows: for n,col in enumerate(r2): if n != len(r2)-1: if col[2] < lastx[n+1][0]: if col[2]>lastx[n][1]: lastx[n][1] = col[2] print(lastx) col = np.zeros(maxr+1) col[0] = table[0] prev = 0 i = 1 for x in range(len(lastx)): if x==0: prev = lastx[x] else: col[i] = (lastx[x][0]+prev[1])//2 i+=1 prev = lastx[x] col = col.astype(int) col[maxr] = table[2] _row_ = sorted(row, key=lambda x:x) _col_ = sorted(col, key=lambda x:x) for no,c in enumerate(_col_): x_lines.append([c,table[1],c,table[3]]) cv2.line(im2,(c,table[1]),(c,table[3]),(255,0,0),1) for no,c in enumerate(_row_): y_lines.append([table[0],c,table[2],c]) cv2.line(im2,(table[0],c),(table[2],c),(255,0,0),1) # cv2_imshow(im2) print("table:",table) # for r in row: # cv2.line(im2,(r,table[1]),(r,table[3]),(0,255,0),1) # for c in col: # cv2.line(im2,(c,table[1]),(c,table[3]),(0,255,0),1) final = extract_table(image[table[1]:table[3],table[0]:table[2]],0,(y_lines,x_lines)) cellBoxes = [] img4 = image.copy() for box in final: cellBox = extractTextBless(image[box[1]:box[3],box[0]:box[4]]) for cell in cellBox: cellBoxes.append([box[0]+cell[0], box[1]+cell[1], cell[2], cell[3]]) cv2.rectangle(img4, (box[0]+cell[0], box[1]+cell[1]), (box[0]+cell[0]+cell[2], box[1]+cell[1]+cell[3]), (255,0,0), 2) # cv2_imshow(img4) the_last_y = -1 cellBoxes = sorted(cellBoxes,key=lambda x: x[1]) cellBoxes2BeMerged = [] cellBoxes2BeMerged.append([]) rowCnt = 0 for cell in cellBoxes: if(the_last_y == -1): the_last_y = cell[1] cellBoxes2BeMerged[rowCnt].append(cell) continue if(abs(cell[1]-the_last_y) < 8): cellBoxes2BeMerged[rowCnt].append(cell) else: the_last_y=cell[1] rowCnt+=1 cellBoxes2BeMerged.append([]) cellBoxes2BeMerged[rowCnt].append(cell) MergedBoxes = [] for cellrow in cellBoxes2BeMerged: cellrow = sorted(cellrow,key=lambda x: x[0]) cur_cell = -1 for c,cell in enumerate(cellrow): if(cur_cell == -1): cur_cell = cell continue if(len(cellrow)==1): MergedBoxes.append(cell) break if(abs((cur_cell[0]+cur_cell[2])-cell[0]) < 10): cur_cell[2] = cur_cell[2] + cell[2] + (cell[0]- (cur_cell[0]+cur_cell[2])) if(cur_cell[3]<cell[3]): cur_cell[3]=cell[3] else: cur_cell[2] = cur_cell[0]+cur_cell[2] cur_cell[3] = cur_cell[1]+cur_cell[3] MergedBoxes.append(cur_cell) cur_cell = cell cur_cell[2] = cur_cell[0]+cur_cell[2] cur_cell[3] = cur_cell[1]+cur_cell[3] MergedBoxes.append(cur_cell) im3 = image.copy() for bx in MergedBoxes: cv2.rectangle(im3, (bx[0], bx[1]), (bx[2], bx[3]), (255,0,0), 2) # cv2_imshow(im3) TextChunks = [] TextChunks.append([]) rcnt = 0 ycnt = -1 final = sorted(final,key=lambda x:x[1]) for box in final: if(ycnt == -1): ycnt = box[1] tcurcell = [] mcurcell = [] for mbox in MergedBoxes: if(mbox[0] >= box[0] and mbox[1] >= box[1] and mbox[2] <= box[4] and mbox[3] <= box[3]): if(len(tcurcell) == 0): tcurcell = mbox else: if(mbox[0] < tcurcell[0]): tcurcell[0] = mbox[0] if(mbox[1] < tcurcell[1]): tcurcell[1] = mbox[1] if(mbox[2] > tcurcell[2]): tcurcell[2] = mbox[2] if(mbox[3] > tcurcell[3]): tcurcell[3] = mbox[3] for i,frow in enumerate(final_rows): for j,fbox in enumerate(frow): if(fbox[0] >= box[0] and fbox[0] <= box[4] and fbox[1] >= box[1] and fbox[1] <= box[3]): mcurcell = fbox final_rows[i].pop(j) break if(abs(ycnt-box[1])>10): rcnt+=1 TextChunks.append([]) ycnt = box[1] if(len(tcurcell)==0): if(len(mcurcell)==0): continue else: TextChunks[rcnt].append(mcurcell) else: if(len(mcurcell)==0): TextChunks[rcnt].append(tcurcell) else: if(abs(mcurcell[0] - tcurcell[0])<=20 and abs(mcurcell[1] - tcurcell[1])<=20 and abs(mcurcell[2] - tcurcell[2])<=20 and abs(mcurcell[3] - tcurcell[3])<=20): TextChunks[rcnt].append(tcurcell) elif((abs(mcurcell[0] - tcurcell[0])<=20 and abs(mcurcell[2] - tcurcell[2])<=20) or (abs(mcurcell[1] - tcurcell[1])<=20 or abs(mcurcell[3] - tcurcell[3])<=20)): TextChunks[rcnt].append(mcurcell) else: TextChunks[rcnt].append(tcurcell) colors = [(255,0,0),(0,255,0),(0,0,255),(125,125,0),(0,255,255)] for no,r in enumerate(TextChunks): for tbox in r: cv2.rectangle(im2, (tbox[0], tbox[1]), (tbox[2], tbox[3]), colors[no%len(colors)], 1) # print(tbox) # cv2.imshow("text chunks", im2) cv2_imshow(im2) cv2.waitKey(0) def rowstart(val): r = 0 while(r < len(_row_) and val > _row_[r]): r += 1 if r-1 == -1: return r else: return r-1 def rowend(val): r = 0 while(r < len(_row_) and val > _row_[r]): r += 1 if r-1 == -1: return r else: return r-1 def colstart(val): r = 0 while(r < len(_col_) and val > _col_[r]): r += 1 if r-1 == -1: return r else: return r-1 def colend(val): r = 0 while(r < len(_col_) and val > _col_[r]): r += 1 if r-1 == -1: return r else: return r-1 tableXML = etree.Element("table") Tcoords = etree.Element("Coords", points=str(table[0])+","+str(table[1])+" "+str(table[0])+","+str(table[3])+" "+str(table[2])+","+str(table[3])+" "+str(table[2])+","+str(table[1])) tableXML.append(Tcoords) for final in TextChunks: for box in final: cell = etree.Element("cell") end_col,end_row,start_col,start_row = colend(box[2]),rowend(box[3]),colstart(box[0]),rowstart(box[1]) cell.set("end-col",str(end_col)) cell.set("end-row",str(end_row)) cell.set("start-col",str(start_col)) cell.set("start-row",str(start_row)) # print(cellBox) one = str(box[0])+","+str(box[1]) two = str(box[0])+","+str(box[3]) three = str(box[2])+","+str(box[3]) four = str(box[2])+","+str(box[1]) # print(one) coords = etree.Element("Coords", points=one+" "+two+" "+three+" "+four) cell.append(coords) tableXML.append(cell) return tableXML