def predict1(image): print("predict1"); """Removed the background of given image. :param image: numpy array """ height, width = image.shape[0], image.shape[1] resized_image = imresize(image, (224, 224)) / 255.0 # Model input shape = (224,224,3) # [0:3] - Take only the first 3 RGB channels and drop ALPHA 4th channel in case this is a PNG prediction = ml_predict(resized_image[:, :, 0:3]) print('PREDICTION COUNT', (prediction[:, :, 1]>0.5).sum()) # Resize back to original image size # [:, :, 1] = Take predicted class 1 - currently in our model = Person class. Class 0 = Background prediction = imresize(prediction[:, :, 1], (height, width)) prediction[prediction<THRESHOLD*255] = 0 prediction[prediction>=THRESHOLD*255] = 1 #return prediction res1=prediction*image[0:,:,0] res2=prediction*image[0:,:,1] res3=prediction*image[0:,:,2] img2=np.dstack([res1,res2,res3,res4]) return img2
def im2vgginput(im, shaping_mode='squeeze'): """ :param im: A (size_y, size_x, 3) array representing a RGB image on a [0, 255] scale, or a (n_samples, size_y, size_x, 3) array representing an array of such images. :returns: A (1, 3, size_y, size_x) array representing the BGR image that's ready to feed into VGGNet """ if im.shape[-2:-1] != (224, 224): if shaping_mode == 'squeeze': from scipy.misc.pilutil import imresize # TODO: Test! if im.ndim == 3: im = imresize(im, size=(224, 224)) elif im.ndim == 4: im = np.array([imresize(x, size=(224, 224)) for x in im]) elif shaping_mode == 'crop': current_shape = im.shape[-3:-1] assert current_shape[0] >= 224 and current_shape[ 1] >= 224, "Don't currently have padding implemented" row_start, col_start = [(c - 224) / 2 for c in current_shape] im = im[..., row_start:row_start + 224, col_start:col_start + 224, :] else: raise Exception('Unknown shaping mode: "%s"' % (shaping_mode, )) centered_bgr_im = im[..., ::-1] - np.array([103.939, 116.779, 123.68]) feature_map_im = centered_bgr_im.dimshuffle('x', 2, 0, 1) if isinstance( centered_bgr_im, Variable) else np.rollaxis(centered_bgr_im, 2, 0)[None, :, :, :] return feature_map_im.astype(theano.config.floatX)
def picEnhance(file_name, autoencoder, shape=theShape, F = 4, save_path="enhanced"): Sh, Sw = shape[0], shape[1] # Shape height and shape width. Sh0, Sw0 = int(Sh/2), int(Sw/2) # number of pixels to move the window in each patch. Not implemented. Sh2, Sw2 = Sh*F, Sw*F # The new shape height and shape width. file = imread(file_name, flatten=False, mode='RGB') if file.shape[0] > 5000 or file.shape[1] > 5000: print(file_name,': File too big.', file.shape) return save_path = os.path.join(os.path.split(file_name)[0], save_path) os.makedirs(save_path, exist_ok=True) _, file_name = os.path.split(file_name) name = file_name.replace('.jpg', '-Enhanced.jpg') name2 = file_name.replace('.jpg', '-nearest.jpg') name3 = file_name.replace('.jpg', '-bicubic.jpg') name4 = file_name.replace('.jpg', '-4together.jpg') h, w, colorChannel = file.shape p, q = h // Sh, w // Sw t = min(p, q) ran = random.randint(0, t-2) X, Y = ran*Sh2, ran*Sw2 picEnh = np.zeros((p * Sh2, q * Sw2, 3)) x = np.zeros((p*q, Sh, Sw, 3)) pic4 = np.zeros((4*Sh2+3, 4*Sw2+3, 3)) for i in range(p): for j in range(q): row1, row2 = (i*Sh), (i + 1)*Sh col1, col2 = j*Sw, (j+1)*Sw x[i * q + j] = file[row1: row2, col1: col2, :] x = x.astype('float32') # / 255. ## For some reason, doing this step makes the picture black. # (not in relu(seems like even in relu)) y = autoencoder.predict(x, 150, verbose=1)* 255 # So does omitting this part. y_two = y.astype('uint8') for i in range(p): for j in range(q): row1, row2 = i*Sh2, (i+1)*Sh2 col1, col2 = j*Sw2, (j+1)*Sw2 picEnh[row1: row2, col1: col2, :] = y_two[i * q + j] nearest = imresize(file, size=400, interp='nearest') bicubic = imresize(file, size=400, interp='bicubic') bilinear = imresize(file, size=400, interp='bilinear') pic4[0:2*Sh2, 0:2*Sw2, :] = nearest[X:X + 2 * Sh2, Y: Y + 2 * Sw2, :] pic4[3+2*Sh2:, 0:2*Sw2, :] = bicubic[X:X + 2 * Sh2, Y: Y + 2 * Sw2, :] pic4[0:2*Sh2, 3+2*Sw2:, :] = bilinear[X:X + 2 * Sh2, Y: Y + 2 * Sw2, :] pic4[3+2*Sh2:, 3+2*Sw2:, :] = picEnh[X:X + 2 * Sh2, Y: Y + 2 * Sw2, :] imsave(os.path.join(save_path, name), picEnh) imsave(os.path.join(save_path, name2), nearest) imsave(os.path.join(save_path, name3), bicubic) imsave(os.path.join(save_path, name4), pic4) del bicubic, picEnh, pic4, bilinear, nearest
def proc_user_img(fn,model): print 'loading "%s for digit recognition" ...' % fn im = cv2.imread(fn) im_original = cv2.imread(fn) blank_image = np.zeros((im.shape[0],im.shape[1],3), np.uint8) blank_image.fill(255) imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY) kernel = np.ones((5,5),np.uint8) ret,thresh = cv2.threshold(imgray,127,255,0) thresh = cv2.erode(thresh,kernel,iterations = 1) thresh = cv2.dilate(thresh,kernel,iterations = 1) thresh = cv2.erode(thresh,kernel,iterations = 1) #for opencv 3.0.x #_,contours,hierarchy = cv2.findContours(thresh,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE) #for opencv 2.4.x contours,hierarchy = cv2.findContours(thresh,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE) digits_rect = get_digits(contours) #rectangles of bounding the digits in user image for rect in digits_rect: x,y,w,h = rect _ = cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),2) im_digit = im_original[y:y+h,x:x+w] sz = 28 im_digit = imresize(im_digit,(sz,sz)) for i in range(sz): #need to remove border pixels im_digit[i,0] = 255 im_digit[i,1] = 255 im_digit[0,i] = 255 im_digit[1,i] = 255 thresh = 210 im_digit = cv2.cvtColor(im_digit,cv2.COLOR_BGR2GRAY) im_digit = cv2.threshold(im_digit, thresh, 255, cv2.THRESH_BINARY)[1] #im_digit = cv2.adaptiveThreshold(im_digit,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C ,cv2.THRESH_BINARY,11,2) im_digit = (255-im_digit) im_digit = imresize(im_digit,(20,20)) hog_img_data = pixels_to_hog_20([im_digit]) pred = model.predict(hog_img_data) _ = cv2.putText(im, str(int(pred[0])), (x,y),cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3) _ = cv2.putText(blank_image, str(int(pred[0])), (x,y),cv2.FONT_HERSHEY_SIMPLEX, 3, (255, 0, 0), 5) cv2.imwrite("original_overlay.png",im) cv2.imwrite("final_digits.png",blank_image) cv2.destroyAllWindows()
def proc_user_img(fn,model): print 'loading "%s for digit recognition" ...' % fn im = cv2.imread(fn) im_original = cv2.imread(fn) blank_image = np.zeros((im.shape[0],im.shape[1],3), np.uint8) blank_image.fill(255) imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY) kernel = np.ones((5,5),np.uint8) ret,thresh = cv2.threshold(imgray,127,255,0) thresh = cv2.erode(thresh,kernel,iterations = 1) thresh = cv2.dilate(thresh,kernel,iterations = 1) thresh = cv2.erode(thresh,kernel,iterations = 1) #for opencv 3.0.x #_,contours,hierarchy = cv2.findContours(thresh,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE) #for opencv 2.4.x contours,hierarchy = cv2.findContours(thresh,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE) digits_rect = get_digits(contours) #rectangles of bounding the digits in user image for rect in digits_rect: x,y,w,h = rect _ = cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),2) im_digit = im_original[y:y+h,x:x+w] sz = 28 im_digit = imresize(im_digit,(sz,sz)) for i in range(sz): #need to remove border pixels im_digit[i,0] = 255 im_digit[i,1] = 255 im_digit[0,i] = 255 im_digit[1,i] = 255 thresh = 230 #210 im_digit = cv2.cvtColor(im_digit,cv2.COLOR_BGR2GRAY) im_digit = cv2.threshold(im_digit, thresh, 255, cv2.THRESH_BINARY)[1] #im_digit = cv2.adaptiveThreshold(im_digit,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C ,cv2.THRESH_BINARY,11,2) im_digit = (255-im_digit) im_digit = imresize(im_digit,(20,20)) hog_img_data = pixels_to_hog_20([im_digit]) pred = model.predict(hog_img_data) _ = cv2.putText(im, str(int(pred[0])), (x,y),cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 2) _ = cv2.putText(blank_image, str(int(pred[0])), (x,y),cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 2) cv2.imwrite("original_overlay.png",im) cv2.imwrite("final_digits.png",blank_image) cv2.destroyAllWindows()
def proc_user_img(fn,model): print('loading "%s for digit recognition...' % fn) im = cv2.imread(fn) print(im.size) im_original = cv2.imread(fn) clone = im_original.copy() blank_image = np.zeros((im.shape[0],im.shape[1],3), np.uint8) blank_image.fill(255) imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY) kernel = np.ones((5,5),np.uint8) ret,thresh = cv2.threshold(imgray,127,255,0) thresh = cv2.erode(thresh,kernel,iterations = 1) thresh = cv2.dilate(thresh,kernel,iterations = 1) thresh = cv2.erode(thresh,kernel,iterations = 1) #cv2.imshow("thresh", thresh) #for opencv 3.0.x #_,contours,hierarchy = cv2.findContours(thresh,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE) #for opencv 2.4.x im2, contours,hierarchy = cv2.findContours(thresh,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_NONE) print(contours) cv2.drawContours(clone, contours, -1, (0,255,0), 3) #cv2.imshow("cont", clone) digits_rect = get_digits(contours) #rectangles of bounding the digits in user image for rect in digits_rect: x,y,w,h = rect print(rect) _ = cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),2) #cv2.imshow("im", im) im_digit = im_original[y:y+h,x:x+w] sz = 28 im_digit = imresize(im_digit,(sz,sz)) ## ## for i in range(sz): #need to remove border pixels ## im_digit[i,0] = 255 ## im_digit[i,1] = 255 ## im_digit[0,i] = 255 ## im_digit[1,i] = 255 thresh = 210 im_digit = cv2.cvtColor(im_digit,cv2.COLOR_BGR2GRAY) im_digit = cv2.threshold(im_digit, thresh, 255, cv2.THRESH_BINARY)[1] #im_digit = cv2.adaptiveThreshold(im_digit,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C ,cv2.THRESH_BINARY,11,2) im_digit = (255-im_digit) im_digit = imresize(im_digit,(20,20)) hog_img_data = pixels_to_hog_20([im_digit]) pred = model.predict(hog_img_data) _ = cv2.putText(im, str(int(pred[0])), (x,y),cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3) _ = cv2.putText(blank_image, str(int(pred[0])), (x,y),cv2.FONT_HERSHEY_SIMPLEX, 3, (255, 0, 0), 5)
def process_test_image(dataset, model, model_type='dnn'): logging.getLogger('regular.time').info('loading "{0} for digit recognition" ...'.format(dataset)) im = cv2.imread(dataset) im_original = cv2.imread(dataset) blank_image = np.zeros((im.shape[0], im.shape[1], 3), np.uint8) blank_image.fill(255) imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) kernel = np.ones((5, 5), np.uint8) ret, thresh = cv2.threshold(imgray, 127, 255, 0) thresh = cv2.erode(thresh, kernel, iterations=1) thresh = cv2.dilate(thresh, kernel, iterations=1) thresh = cv2.erode(thresh, kernel, iterations=1) _, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE) digits_rect = get_digits(contours) # rectangles of bounding the digits in user image for rect in digits_rect: x, y, w, h = rect _ = cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255, 0), 2) im_digit = im_original[y: y + h, x: x + w] sz = 28 im_digit = imresize(im_digit, (sz, sz)) for i in range(sz): # need to remove border pixels im_digit[i, 0] = 255 im_digit[i, 1] = 255 im_digit[0, i] = 255 im_digit[1, i] = 255 thresh = 210 im_digit = cv2.cvtColor(im_digit, cv2.COLOR_BGR2GRAY) im_digit = cv2.threshold(im_digit, thresh, 255, cv2.THRESH_BINARY)[1] # im_digit = cv2.adaptiveThreshold(im_digit,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C ,cv2.THRESH_BINARY,11,2) im_digit = (255 - im_digit) im_digit = imresize(im_digit, (20, 20)) hog_img_data = pixels_to_hog_20([im_digit]) pred = model.predict(hog_img_data) if pred.shape[1] == 10: pred = pred.ravel() _ = cv2.putText(im, str(pd.Series(pred).idxmax()), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3) _ = cv2.putText(blank_image, str(int(pred[0])), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 3, (255, 0, 0), 5) cv2.imwrite("original_overlay.png", im) cv2.imwrite("final_digits.png", blank_image) cv2.destroyAllWindows()
def find_template_in_image(fig_im: np.ndarray, page_im: np.ndarray, scales: List[float], use_canny: bool) -> \ Optional[Tuple[datamodels.BoxClass, float, float]]: """ Find the position of the best match for fig_im on page_im by checking at each of a list of scales. Each scale is a float in (0,1] representing the ratio of the size of fig_im to page_im (maximum of height ratio and width ratio). """ try: template = imresize(fig_im, SCALE_FACTOR) except ValueError: # This may cause some very small images to have size 0 which causes a ValueError return None (template_height, template_width) = template.shape[:2] (page_height, page_width) = page_im.shape[:2] if use_canny: template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) template = cv2.Canny(template, 100, 200) page_im = cv2.cvtColor(page_im, cv2.COLOR_BGR2GRAY) found = None best_scale = None template_page_size_ratio = max( template_height / page_height, template_width / page_width ) # loop over the scales of the image for scale in (scales)[::-1]: # resize the image according to the scale, and keep track of the ratio of the resizing. page_resized = imresize( page_im, template_page_size_ratio / scale ) r = page_im.shape[1] / float(page_resized.shape[1]) assert ( page_resized.shape[0] >= template_height and page_resized.shape[1] >= template_width ) if use_canny: page_resized = cv2.Canny(page_resized, 50, 200) result = cv2.matchTemplate( page_resized, template, cv2.TM_CCOEFF_NORMED ) (_, maxVal, _, maxLoc) = cv2.minMaxLoc(result) if found is None or maxVal > found[0]: found = (maxVal, maxLoc, r) best_scale = scale logging.debug('Scale: %.03f, Score: %.03f' % (scale, maxVal)) assert found is not None (score, maxLoc, r) = found (startX, startY) = (int(maxLoc[0] * r), int(maxLoc[1] * r)) (endX, endY) = ( int((maxLoc[0] + template_width) * r), int((maxLoc[1] + template_height) * r) ) fig_box = datamodels.BoxClass(x1=startX, y1=startY, x2=endX, y2=endY) return fig_box, score, best_scale
def resizeImage(self, inImage): (ny, nx, nf) = inImage.shape if (self.resizeMethod == "crop"): if (ny > nx): #Get percentage of scale scale = float(self.inputShape[1]) / nx targetNy = int(round(ny * scale)) scaleImage = imresize(inImage, (targetNy, self.inputShape[1])) cropTop = (targetNy - self.inputShape[0]) / 2 outImage = scaleImage[cropTop:cropTop + self.inputShape[0], :, :] elif (ny <= nx): #Get percentage of scale scale = float(self.inputShape[0]) / ny targetNx = int(round(nx * scale)) scaleImage = imresize(inImage, (self.inputShape[0], targetNx)) cropLeft = (targetNx - self.inputShape[1]) / 2 outImage = scaleImage[:, cropLeft:cropLeft + self.inputShape[1], :] elif (self.resizeMethod == "pad"): if (ny > nx): #Get percentage of scale scale = float(self.inputShape[0]) / ny targetNx = int(round(nx * scale)) scaleImage = imresize(inImage, (self.inputShape[0], targetNx)) padLeft = (self.inputShape[1] - targetNx) / 2 padRight = self.inputShape[1] - (padLeft + targetNx) outImage = np.pad(scaleImage, ((0, 0), (padLeft, padRight), (0, 0)), 'constant') elif (ny <= nx): #Get percentage of scale scale = float(self.inputShape[1]) / nx targetNy = int(round(ny * scale)) scaleImage = imresize(inImage, (targetNy, self.inputShape[1])) padTop = (self.inputShape[0] - targetNy) / 2 padBot = self.inputShape[0] - (padTop + targetNy) outImage = np.pad(scaleImage, ((padTop, padBot), (0, 0), (0, 0)), 'constant') elif (self.resizeMethod == "max"): #We pad entire image with 0 assert (ny <= self.inputShape[0]) assert (nx <= self.inputShape[1]) padTop = (self.inputShape[0] - ny) / 2 padBot = self.inputShape[0] - (padTop + ny) padLeft = (self.inputShape[1] - nx) / 2 padRight = self.inputShape[1] - (padLeft + nx) outImage = np.pad(inImage, ((padTop, padBot), (padLeft, padRight), (0, 0)), 'constant') else: print "Method ", resizeMethod, "not supported" assert (0) return outImage
def proc_user_img(img_file, model): print('loading "%s for digit recognition" ...' % img_file) im = cv2.imread(img_file) blank_image = np.zeros((im.shape[0], im.shape[1], 3), np.uint8) blank_image.fill(255) imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) plt.imshow(imgray) kernel = np.ones((5, 5), np.uint8) ret, thresh = cv2.threshold(imgray, 127, 255, 0) thresh = cv2.erode(thresh, kernel, iterations=1) thresh = cv2.dilate(thresh, kernel, iterations=1) thresh = cv2.erode(thresh, kernel, iterations=1) _, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) digits_rectangles = get_digits( contours, hierarchy) #rectangles of bounding the digits in user image for rect in digits_rectangles: x, y, w, h = rect if w > 130: a = math.floor(w / 22) h = math.floor(h / 2) - 5 for sub in range(0, a): cv2.rectangle(im, (x + sub * 22, y), (x + (sub + 1) * 22, y + h), (0, 255, 0), 2) im_digit = imgray[y:y + h, x + sub * 22:x + (sub + 1) * 22] im_digit = (255 - im_digit) im_digit = imresize(im_digit, (IMG_WIDTH, IMG_HEIGHT)) hog_img_data = pixels_to_hog_20([im_digit]) pred = model.predict(hog_img_data) print('sub_num', pred) cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255, 0), 2) im_digit = imgray[y:y + h, x:x + w] im_digit = (255 - im_digit) im_digit = imresize(im_digit, (IMG_WIDTH, IMG_HEIGHT)) hog_img_data = pixels_to_hog_20([im_digit]) pred = model.predict(hog_img_data) # cv2.putText(im, str(int(pred[0])), (x,y),cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3) cv2.putText(blank_image, str(int(pred[0])), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 3, (255, 0, 0), 5) plt.imshow(im) cv2.imwrite("original_overlay.png", im) # cv2.imwrite("final_digits.png",blank_image) cv2.destroyAllWindows()
def generator_train_img(real_list_dir, white_list_dir, resize, batch_size): batch_real_img = [] batch_white_img = [] for _ in range(batch_size): random_index = np.random.randint(len(real_list_dir)) real_img = imresize(imread(real_list_dir[random_index], mode='L'), resize) white_img = imresize(imread(white_list_dir[random_index], mode='L'), resize) batch_real_img.append(real_img) batch_white_img.append(white_img) batch_real_img = np.array(batch_real_img) / 127.5 - 1 batch_real_img = np.expand_dims(batch_real_img, axis=1) batch_white_img = np.array(batch_white_img) / 127.5 - 1 batch_white_img = np.expand_dims(batch_white_img, axis=1) return batch_real_img, batch_white_img
def resize_and_crop(im, width, height): im_aspect = float(im.shape[0])/im.shape[1] new_aspect = float(height)/width if im_aspect > new_aspect: # Need to chop the top and bottom new_height = int(width*im_aspect) resized_im = imresize(im, (new_height, width)) start = (new_height-height)/2 output_im = resized_im[start:start+height, :] else: # Need to chop the left and right. new_width = int(height/im_aspect) resized_im = imresize(im, (height, new_width)) start = (new_width-width)/2 output_im = resized_im[:, start:start+width] assert output_im.shape[:2] == (height, width) return output_im
def find_PCAKmeans(imagepath1, imagepath2): print("Operating") image1 = imread(imagepath1) image2 = imread(imagepath2) image1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY) image2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY) # print(image1.shape,image2.shape) new_size = np.asarray(image1.shape) / 5 new_size = new_size.astype(np.int) * 5 image1 = imresize(image1, (new_size)).astype(np.int16) image2 = imresize(image2, (new_size)).astype(np.int16) diff_image = np.abs(image1 - image2) imsave("diff_%s" % imagepath1.split("/")[1], diff_image) print("\nBoth images resized to ", new_size) vector_set, mean_vec = find_vector_set(diff_image, new_size) pca = PCA() pca.fit(vector_set) EVS = pca.components_ FVS = find_FVS(EVS, diff_image, mean_vec, new_size) print("\ncomputing k means") components = 3 least_index, change_map = clustering(FVS, components, new_size) change_map[change_map == least_index] = 255 change_map[change_map != 255] = 0 change_map = change_map.astype(np.uint8) kernel = np.asarray( ( (0, 0, 1, 0, 0), (0, 1, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 1, 1, 0), (0, 0, 1, 0, 0), ), dtype=np.uint8, ) cleanChangeMap = cv2.erode(change_map, kernel) imsave("changemap_%s" % imagepath1.split("/")[1], change_map) imsave("cleanchangemap_%s" % imagepath1.split("/")[1], cleanChangeMap)
def read_image(filename): img = imread(filename) if img.ndim == 2: img = img[:, :, None][:, :, [0, 0, 0]] img = imresize(img, (224,224)) return img
def crop_and_resize(input_image, outdir): # detect face -> crop -> resize -> save im = cv2.imread(input_image) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) faces = faceCascade.detectMultiScale(im, scaleFactor=1.5, minNeighbors=5, minSize=(30, 30)) face_color = None for (x, y, w, h) in faces: face_color = im[y:y + h, x:x + w] try: small = cv2.resize(face_color, (64, 64)) file_name = input_image.split('\\')[-1] imsave("{}/{}".format(outdir, file_name), small) except Exception: # if face is not detected im = imread(input_image) height, width, color = im.shape edge_h = int(round((height - 108) / 2.0)) edge_w = int(round((width - 108) / 2.0)) cropped = im[edge_h:(edge_h + 108), edge_w:(edge_w + 108)] small = imresize(cropped, (64, 64)) file_name = input_image.split('\\')[-1] imsave("{}/{}".format(outdir, file_name), small)
def process(folder, shape_predictor='shape_predictor_68_face_landmarks.dat'): for p, person in enumerate(os.listdir(folder)): for w, word in enumerate(os.listdir(folder + person)): for k, instance in enumerate( os.listdir(folder + person + '/' + word)): features = [] for j, image in enumerate( os.listdir(folder + '/' + person + '/' + word + '/' + instance)): path = folder + '/' + person + '/' + word + '/' + instance + '/' + image # print(path) if image[0] == 'c' and j < 16: image = cv2.imread(path, 0) detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor(shape_predictor) rects = detector(image, 1) for (i, rect) in enumerate(rects): shape = predictor(image, rect) shape = face_utils.shape_to_np(shape) mouth = shape[48:68] x1 = min(mouth[i][0] for i in range(0, 20)) - 5 y1 = min(mouth[i][1] for i in range(0, 20)) - 5 x2 = max(mouth[i][0] for i in range(0, 20)) + 5 y2 = max(mouth[i][1] for i in range(0, 20)) + 5 crop_image = image[y1:y2, x1:x2] image = imresize(crop_image, [40, 40]) # cv2.imwrite(str(k)+str(j)+".jpg", image) features.append(image) if len(features) < 16: for hosa in range(16 - len(features)): features.append(features[-1]) print(instance, person) save_images(person, word, k, features)
def worker(): while True: i, filename = q.get() img = imread(filename) # handle grayscale if img.ndim == 2: img = img[:, :, None][:, :, [0, 0, 0]] H0, W0 = img.shape[0], img.shape[1] img = imresize(img, float(args.image_size) / max(H0, W0)) H, W = img.shape[0], img.shape[1] # swap rgb to bgr. Is this the best way? r = img[:, :, 0].copy() img[:, :, 0] = img[:, :, 2] img[:, :, 2] = r lock.acquire() if i % 1000 == 0: print('Writing image %d / %d' % (i, len(data))) original_heights[i] = H0 original_widths[i] = W0 image_heights[i] = H image_widths[i] = W image_dset[i, :, :H, :W] = img.transpose(2, 0, 1) lock.release() q.task_done()
def myImResize(image, imSize): arrayImage = np.reshape(image, image.shape) image = extractImportantSubset(arrayImage) sh = calcShapeToResize(image.shape, imSize=imSize) image = imresize(image, sh) arrayImage = np.reshape(image, image.shape) sh_final = arrayImage.shape if (max(sh_final) != imSize): print('shape resulting: ', sh_final, ' expected: ', sh) top = False row = np.ones((1, imSize)) * 255 col = np.ones((imSize, 1)) * 255 while (arrayImage.shape)[0] < imSize: if top: arrayImage = np.vstack([row, arrayImage]) else: arrayImage = np.vstack([arrayImage, row]) top = not top while (arrayImage.shape)[1] < imSize: if top: arrayImage = np.append(arrayImage, col, axis=1) else: arrayImage = np.append(col, arrayImage, axis=1) top = not top return arrayImage
def hashObj(feat, params_sun09): dim = 25 numClasses = 21 # COMPUTE THE FEATURE REPRESENTATION temp_pred = feat['max_classes2'] temp_pred = temp_pred[0:-1, 0:-1] temp_pred = smp.imresize(temp_pred, (dim, dim), interp='nearest', mode='F') temp_pred = np.int8(temp_pred) temp_pred = temp_pred.reshape(dim*dim, order='F').copy() tempMask = np.zeros((numClasses*dim*dim)) for k in range(numClasses): tempClassMask = np.zeros((dim*dim, 1)) tempClassMask[np.nonzero(temp_pred==k)] = 1 tempMask[k::numClasses] = tempClassMask.squeeze() # COMPUTE THE HASHCODES FROM THE FEATURE tempMask = tempMask - np.tile(np.transpose(params_sun09['meanY']), (tempMask.shape[0], 1)) tempMask = np.dot(tempMask, params_sun09['Wy']) tempMask = np.dot(tempMask, params_sun09['R']) Y = np.zeros(tempMask.shape) Y[tempMask>=0] = 1 Z = hl.compactbit(Y>0) return Z
def preprocessImage(self, image_path): img = imread(image_path) # handle grayscale if img.ndim == 2: img = img[:, :, None][:, :, [0, 0, 0]] H0, W0 = img.shape[0], img.shape[1] img = imresize(img, float(720) / max(H0, W0)) img2ret = img.copy() H, W = img.shape[0], img.shape[1] # swap rgb to bgr. Is this the best way? r = img[:, :, 0].copy() img[:, :, 0] = img[:, :, 2] img[:, :, 2] = r img = np.expand_dims(img.transpose(2, 0, 1), axis=0) # todo: save a file that lua can read. filename = image_path.split('/')[-1].split('.')[0] # save to h5file f = h5py.File(os.path.join(self.dataPipePath, filename + '.h5'), 'w') imdb = f.create_dataset('image', img.shape, dtype=np.uint8) imdb[0] = img[0] f.close() # np.save(os.path.join(self.dataPipePath, filename+'.npy'),img) return img2ret
def im2vgginput(im, shaping_mode = 'squeeze', already_bgr = False): """ :param im: A (size_y, size_x, 3) array representing a RGB image on a [0, 255] scale, or a (n_samples, size_y, size_x, 3) array representing an array of such images. :param shaping_mode: 'squeeze': Squeezes the image into the desired shape. 'crop': Crops the center region (of the desired shape) out. :returns: A (n_samples, 3, 224, 224) array representing the BGR image that's ready to feed into VGGNet """ if not isinstance(im, np.ndarray): return np.concatenate([im2vgginput(m, shaping_mode = shaping_mode) for m in im]) if len(im)>0 else np.zeros((0, 3, 224, 224)) if im.ndim==2: im = np.repeat(im[:, :, None], repeats=3, axis=2) if any(m.shape[-2:-1] != (224, 224) for m in im): if shaping_mode == 'squeeze': from scipy.misc.pilutil import imresize # TODO: Test! im = imresize(im, size=(224, 224)) elif shaping_mode == 'crop': current_shape = im.shape[-3:-1] assert current_shape[0]>=224 and current_shape[1]>=224, "Don't currently have padding implemented" row_start, col_start = [(c-224)/2 for c in current_shape] im = im[..., row_start:row_start+224, col_start:col_start+224, :] else: raise Exception('Unknown shaping mode: "%s"' % (shaping_mode, )) bgr_im = im if already_bgr else im[..., ::-1] centered_bgr_im = bgr_im - np.array([103.939, 116.779, 123.68]) feature_map_im = np.rollaxis(centered_bgr_im, -1, -3) if feature_map_im.ndim==3: feature_map_im = feature_map_im[None, ...] return feature_map_im.astype(theano.config.floatX)
def predict(): imgData = request.get_data() convertImage(imgData) print("debug") x = imread('output.png', mode='L') x = preprocess(x) x = imresize(x, (28, 28)) x = x.astype('float32') x /= 255 x = x.reshape(1, 28, 28, 1) print("debug2") with graph.as_default(): out = model.predict(x) #out = model.predict_proba(x, verbose=1) print(out) print(np.argmax(out, axis=1)) index = np.array(np.argmax(out, axis=1)) index = index[0] sketch = txt_name_list[index] print("debug3") return sketch
def make_raw_dataset(video_list, categories, dataset="train"): data = [] for category in categories: print(category) video_list = sorted(video_list) for filepath in video_list: print(filepath, end='\r') filename = os.path.basename(filepath) vid = imageio.get_reader(filepath, "ffmpeg") frames = [] # Add each frame to correct list. for i, frame in enumerate(vid): # Convert to grayscale. frame = Image.fromarray(np.array(frame)) frame = frame.resize((160, 120)) frame = frame.convert("L") frame = np.array(frame.getdata(), dtype=np.uint8).reshape( (120, 160)) frame = imresize(frame, (60, 80)) frames.append(frame) data.append({ "filename": filename, "category": category, "frames": frames }) pickle.dump(data, open("data/%s.p" % dataset, "wb"))
def imresize_multichannel(im: np.ndarray, target_size: typing.Tuple[int, int], **kwargs) -> np.ndarray: n_channels = im.shape[2] resized_channels = [ imresize(im[:, :, n], target_size, **kwargs) for n in range(n_channels) ] return np.stack(resized_channels, axis=2)
def resize_image_for_speed(im): # resize the image for speed nr0 = im.shape[0] nc0 = im.shape[1] if RESIZE > min(nr0,nc0): nc_resize_ratio = 1. nr_resize_ratio = 1. nc = nc0 nr = nr0 else: if nr0 < nc0: nc = nc0*RESIZE/nr0 nr = RESIZE # multiply by nc_resize_ratio to go from real coordinates to smaller, resized # coordinates nc_resize_ratio = float(nc)/float(nc0) nr_resize_ratio = float(nr)/float(nr0) else: nr = nr0*RESIZE/nc0 nc = RESIZE nc_resize_ratio = float(nc)/float(nc0) nr_resize_ratio = float(nr)/float(nr0) im = pilutil.imresize(im,[nr,nc]) return (im,nr_resize_ratio,nc_resize_ratio)
def load_resized_mnist(new_size, from_to_digits=(0, 2), randomize=False): """ Load resised mnist digits from 0 to 5 """ samples, labels = mnist() print('Resizing...') samples = samples[np.in1d( labels, np.arange(from_to_digits[0], from_to_digits[1] + 1))] labels = labels[np.in1d( labels, np.arange(from_to_digits[0], from_to_digits[1] + 1))] if new_size != 28: resized_imgs = [ imresize(img.reshape([28, 28]), [new_size, new_size], interp='lanczos').ravel()[np.newaxis].T for img in samples ] resized_imgs = np.array(resized_imgs) resized_imgs = resized_imgs.astype(float) resized_imgs /= 255.0 resized_imgs = 2 * resized_imgs - 1 np.save( './data/resized_mnist_' + str(from_to_digits[0]) + '_' + str(from_to_digits[1]) + '_5_samples.npy', resized_imgs) np.save( './data/resized_mnist_' + str(from_to_digits[0]) + '_' + str(from_to_digits[1]) + '_labels.npy', labels) return resized_imgs, labels else: return samples, labels
def score_frame(model, history, ix, r, d, interp_func, mode='actor'): # r: radius of blur # d: density of scores (if d==1, then get a score for every pixel... # if d==2 then every other, which is 25% of total pixels for a 2D image) assert mode in ['actor', 'critic'], 'mode must be either "actor" or "critic"' L = run_through_model(model, history, ix, interp_func, mask=None, mode=mode) scores = np.zeros( (int(80 / d) + 1, int(80 / d) + 1)) # saliency scores S(t,i,j) for i in range(0, 80, d): for j in range(0, 80, d): mask = get_mask(center=[i, j], size=[80, 80], r=r) l = run_through_model(model, history, ix, interp_func, mask=mask, mode=mode) # Greydanus metric scores[int(i / d), int(j / d)] = (L - l).pow(2).sum().mul_(.5).data[0] pmax = scores.max() scores = imresize(scores, size=[80, 80], interp='bilinear').astype(np.float32) return pmax * scores / scores.max()
def generator_training_Img(real_list_dir,white_list_dir,resize=None,batch_size=32): batch_real_img=[] batch_white_img=[] for _ in range(batch_size): real_img = imread(np.random.choice(real_list_dir),mode='L') white_img = imread(np.random.choice(white_list_dir),mode='L') if resize: real_img = imresize(real_img,resize) white_img = imresize(white_img,resize) batch_real_img.append(real_img) batch_white_img.append(white_img) batch_real_img = np.array(batch_real_img)/127.5-1 batch_real_img = np.expand_dims(batch_real_img,axis=1) batch_white_img = np.array(batch_white_img)/127.5-1 batch_white_img = np.expand_dims(batch_white_img,axis=1) return batch_real_img,batch_white_img
def detectfaceemotion(): cap = cv2.VideoCapture(0) modelWeights = keras.models.load_model('emotion_weightsGoogleNet.h5') while cap.isOpened(): ret, img = cap.read() if ret: x=[] gray = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY) flag,face = detect_face(gray) if flag: gray = imresize(face, [height, width], 'bilinear') gray = np.dstack((gray,) * 3) x.append(gray) x = np.asarray(x) print(x.shape) result=modelWeights.predict( x, batch_size=8, verbose=0) for index,emotion in enumerate(EMOTIONS): cv2.putText(img, emotion, (10,index * 20 + 20), cv2.FONT_HERSHEY_PLAIN, 0.5, (0, 255, 0), 1); cv2.rectangle(img, (130, index * 20 + 10), (130 + int(result[0][index] * 100), (index + 1) * 20 + 4), (255, 0, 0), -1) print(result) cv2.imshow('TestingResult',img) if cv2.waitKey(1) &0xff==ord('q'): break else: print("Can't open camera") cv2.destroyAllWindows()
def double_patch_sz(l): from scipy.misc.pilutil import imresize new_patch_sz = 2*l.model.patch_sz newN = 2*l.model.N A = l.model.A.get_value() A = np.dot(l.model.dewhitenmatrix,A) upA = np.zeros((new_patch_sz**2,l.model.N)) for j in range(A.shape[1]): a = imresize(A[:,j].reshape(l.model.patch_sz,l.model.patch_sz),(new_patch_sz,new_patch_sz),mode='F') upA[:,j] = a.ravel() newA = np.zeros((new_patch_sz**2,newN)) avgA = np.zeros_like(upA) avgA[:,:-1] = .5*(upA[:,:-1] + upA[:,1:]) avgA[:,-1] = .5*(upA[:,-1] + upA[:,0]) newA[:,::2] = upA newA[:,1::2] = avgA l.model.N = newN l.model.NN = newN l.model.patch_sz = new_patch_sz l.model.D = new_patch_sz**2 databatch = l.get_databatch(whitenpatches) l.model.learn_whitening(databatch) newA = np.dot(l.model.whitenmatrix,newA) newA = l.model.normalize_A(newA) l.model.A.set_value(newA.astype(hdl.models.theano.config.floatX)) l.model.reset_functions() return l
def load_digits_custom(img_file): train_data = [] train_target = [] start_class = 1 im = cv2.imread(img_file) imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY) plt.imshow(imgray) kernel = np.ones((5,5),np.uint8) ret,thresh = cv2.threshold(imgray,127,255,0) thresh = cv2.erode(thresh,kernel,iterations = 1) thresh = cv2.dilate(thresh,kernel,iterations = 1) thresh = cv2.erode(thresh,kernel,iterations = 1) _,contours,hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) digits_rectangles = get_digits(contours,hierarchy) #rectangles of bounding the digits in user image #sort rectangles accoring to x,y pos so that we can label them digits_rectangles.sort(key=lambda x:get_contour_precedence(x, im.shape[1])) for index,rect in enumerate(digits_rectangles): x,y,w,h = rect cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),2) im_digit = imgray[y:y+h,x:x+w] im_digit = (255-im_digit) im_digit = imresize(im_digit,(IMG_WIDTH, IMG_HEIGHT)) train_data.append(im_digit) train_target.append(start_class%10) if index>0 and (index+1) % 10 == 0: start_class += 1 cv2.imwrite("training_box_overlay.png",im) return np.array(train_data), np.array(train_target)
def zion(findstr, catfile, outstr, EXTEN=0): file_list = glob.glob(findstr) catData = np.loadtxt(catfile, skiprows=1) sort_idx = np.argsort(catData[:,0]) catData = catData[sort_idx] for i in range(len(file_list)): print(file_list[i]) hdu = pyfits.open(file_list[i]) image = hdu[EXTEN].data imAngle = hdu[EXTEN].header['ANGLE'] idx = np.where(catData[:,0] == imAngle)[0] radius = np.average(catData[:,3][idx]) width = np.average(catData[:,4][idx]) cent = ADE.centroid(image) dims = image.shape imRad = radius + width cutIm = image[cent[0] - imRad:cent[0] + imRad,\ cent[1] - imRad:cent[1] + imRad] bigIm = pl.imresize(cutIm, (1028,1028)) frameNumber = idx[0] name = str(frameNumber).zfill(3)+'_'+str(imAngle)+outstr+'.fits' if len(glob.glob(name)) == 0: pyfits.PrimaryHDU(bigIm).writeto(name) return
def plot_embedding(X, X_orig, title=None): x_min, x_max = np.min(X, 0), np.max(X, 0) X = (X - x_min) / (x_max - x_min) plt.figure() ax = plt.subplot(111) for i in range(X.shape[0]): plt.scatter(X[i, 0], X[i, 1]) if hasattr(offsetbox, 'AnnotationBbox'): # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1., 1.]]) # just something big for i in range(X.shape[0]): dist = np.sum((X[i] - shown_images) ** 2, 1) if np.min(dist) < 12e-3: # don't show points that are too close continue shown_images = np.r_[shown_images, [X[i]]] imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage( imresize(greyscale_to_rgb(X_orig[i].reshape((720, 1280))), (36, 64)) ), X[i]) ax.add_artist(imagebox) plt.xticks([]), plt.yticks([]) if title is not None: plt.title(title)
def resizeImage(self, inImage): (ny, nx, nf) = inImage.shape if(self.resizeMethod == "crop"): if(ny > nx): #Get percentage of scale scale = float(self.inputShape[1])/nx targetNy = int(round(ny * scale)) scaleImage = imresize(inImage, (targetNy, self.inputShape[1])) cropTop = (targetNy-self.inputShape[0])/2 outImage = scaleImage[cropTop:cropTop+self.inputShape[0], :, :] elif(ny <= nx): #Get percentage of scale scale = float(self.inputShape[0])/ny targetNx = int(round(nx * scale)) scaleImage = imresize(inImage, (self.inputShape[0], targetNx)) cropLeft = (targetNx-self.inputShape[1])/2 outImage = scaleImage[:, cropLeft:cropLeft+self.inputShape[1], :] elif(self.resizeMethod == "pad"): if(ny > nx): #Get percentage of scale scale = float(self.inputShape[0])/ny targetNx = int(round(nx * scale)) scaleImage = imresize(inImage, (self.inputShape[0], targetNx)) padLeft = (self.inputShape[1]-targetNx)/2 padRight = self.inputShape[1] - (padLeft + targetNx) outImage = np.pad(scaleImage, ((0, 0), (padLeft, padRight), (0, 0)), 'constant') elif(ny <= nx): #Get percentage of scale scale = float(self.inputShape[1])/nx targetNy = int(round(ny * scale)) scaleImage = imresize(inImage, (targetNy, self.inputShape[1])) padTop = (self.inputShape[0]-targetNy)/2 padBot = self.inputShape[0] - (padTop + targetNy) outImage = np.pad(scaleImage, ((padTop, padBot), (0, 0), (0, 0)), 'constant') elif(self.resizeMethod=="max"): #We pad entire image with 0 assert(ny <= self.inputShape[0]) assert(nx <= self.inputShape[1]) padTop = (self.inputShape[0]-ny)/2 padBot = self.inputShape[0]-(padTop+ny) padLeft = (self.inputShape[1]-nx)/2 padRight = self.inputShape[1]-(padLeft+nx) outImage = np.pad(inImage, ((padTop, padBot), (padLeft, padRight), (0, 0)), 'constant') else: print "Method ", resizeMethod, "not supported" assert(0) return outImage
def step2(yChannels): preprocessedImage = ndarray((84, 84, 4)) for imgCounter in xrange(len(yChannels)): # TODO: look into bilinear reduction preprocessedImage[:, :, imgCounter] = imresize(yChannels[imgCounter], (84, 84)) return preprocessedImage
def imrescale_multichannel(im: np.ndarray, scale_factor: float, **kwargs) -> np.ndarray: n_channels = im.shape[2] resized_channels = [ imresize(im[:, :, n], scale_factor, **kwargs) for n in range(n_channels) ] return np.stack(resized_channels, axis=2)
def KNN_MachineLearning(img_file, model): print("START TO PROCESSING INPUT IMAGE:\n") img = cv2.imread(img_file) blank_image = np.zeros((img.shape[0], img.shape[1], 3), np.int32) # blank_image = np.zeros((img.shape[0],img.shape[1],3),dtype=int) # the above line is my original solution # while it cause a problem in the following output there is an uncompatible error # switch to another data type -- np.int32 could solve this problem blank_image.fill(255) # 0 is color black and 255 is color white # here we create a new image of the input size img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) plt.imshow(img_gray) # kernel = np.ones((5,5),dtype=int) kernel = np.ones((5, 5), np.uint8) # pre-processing the input images ret, thresh = cv2.threshold(img_gray, 127, 255, 0) thresh = cv2.erode(thresh, kernel, iterations=1) thresh = cv2.dilate(thresh, kernel, iterations=1) thresh = cv2.erode(thresh, kernel, iterations=1) _, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) rectangles = rect_from_contoursHierarchy(contours, hierarchy) #rectangles of bounding the digits in user image for r in rectangles: x, y, w, h = r cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) # cv2.rectangle(img,(380,0),(511,111),(255,0,0),3) # the parameters are two points of the rectangle color and the type of the line img_digit = img_gray[y:y + h, x:x + w] img_digit = (255 - img_digit) img_digit = imresize(img_digit, (IMG_WIDTH, IMG_HEIGHT)) hog_img_data = pixels_to_hog([img_digit]) # for each rectangle area we obtained, we use our trained model to do the prediction pred = model.predict(hog_img_data) print("Now the predict value is %i" % pred[0]) # put the result into the orginal image cv2.putText(img, str(int(pred[0])), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3) # put the result into the new blank image cv2.putText(blank_image, str(int(pred[0])), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 3, (255, 0, 0), 5) plt.imshow(img) cv2.imwrite(INPUT_IMAGE_AFTERLOAD, img) cv2.imwrite(OUTPUT_IMAGE, blank_image) cv2.destroyAllWindows()
def hashAttrib(feat, params_imgNet_color, imgNet_texture): dim = 25 numColorClasses = 11 numTextureClasses = 8 colorIdx = np.array([[0, 1, 2, 7, 10, 11, 13, 14, 15, 16, 18]], dtype='int8') textureIdx = np.array([[3, 4, 5, 6, 8, 12, 9, 17]], dtype='int8') # COMPUTE THE COLOR FEATURE REPRESENTATION tempMask = np.zeros((numColorClasses*dim*dim)) prob_map = feat['all_probs2'] prob_map = prob_map[0:-1, 0:-1, colorIdx] for k in range(numColorClasses): p1 = prob_map[:, :, k] p1 = smp.imresize(p1, (dim, dim), interp='bilinear', mode='F') p1 = p1.reshape(dim*dim, order='F').copy() tempMask[k::numColorClasses] = p1.squeeze() # COMPUTE THE HASHCODES FROM THE FEATURE tempMask = tempMask - np.tile(np.transpose(params_imgNet_color['meanY']), (tempMask.shape[0], 1)) tempMask = np.dot(tempMask, params_imgNet_color['Wy']) tempMask = np.dot(tempMask, params_imgNet_color['R']) Y = np.zeros(tempMask.shape) Y[tempMask>=0] = 1 Zc = hl.compactbit(Y>0) # COMPUTE THE TEXTURE FEATURE REPRESENTATION tempMask = np.zeros((numTextureClasses*dim*dim)) prob_map = feat['all_probs2'] prob_map = prob_map[0:-1, 0:-1, textureIdx] for k in range(numTextureClasses): p1 = prob_map[:, :, k] p1 = smp.imresize(p1, (dim, dim), interp='bilinear', mode='F') p1 = p1.reshape(dim*dim, order='F').copy() tempMask[k::numTextureClasses] = p1.squeeze() # COMPUTE THE HASHCODES FROM THE FEATURE tempMask = tempMask - np.tile(np.transpose(params_imgNet_texture['meanY']), (tempMask.shape[0], 1)) tempMask = np.dot(tempMask, params_imgNet_texture['Wy']) tempMask = np.dot(tempMask, params_imgNet_texture['R']) Y = np.zeros(tempMask.shape) Y[tempMask>=0] = 1 Zt = hl.compactbit(Y>0) Z = np.hstack([Zc[0:127], Zt[0:127]]) return Z
def load_digits(fn): print('loading "%s for training" ...' % fn) digits_img = cv2.imread(fn, 0) digits = split2d(digits_img, (DIGIT_WIDTH, DIGIT_HEIGHT)) resized_digits = [] for digit in digits: resized_digits.append(imresize(digit, (IMG_WIDTH, IMG_HEIGHT))) labels = np.repeat(np.arange(CLASS_N), len(digits) / CLASS_N) return np.array(resized_digits), labels
def resize_while_preserving_aspect_ratio(im, x_dim=None, y_dim=None): """ Resize an image, while preserving the aspect ratio. For this you need to specify either x_dim or y_dim. :param im: The image: a 2D or 3D array. :param x_dim: An integer indicating the desired size, or None, to leave it loose. :param y_dim: An integer indicating the desired size, or None, to leave it loose. :return: A new image whose x_dim or y_dim matches the constraint """ assert not (x_dim is None and y_dim is None), 'You can not leave both constraints at None!' x_dim = float('inf') if x_dim is None else x_dim y_dim = float('inf') if y_dim is None else y_dim box_aspect_ratio = x_dim/float(y_dim) image_aspect_ratio = im.shape[1] / float(im.shape[0]) if image_aspect_ratio > box_aspect_ratio: # Active constraint is width return imresize(im, size=(int(x_dim/image_aspect_ratio+.5), x_dim)) else: # Active constraint is height return imresize(im, size=(y_dim, int(y_dim*image_aspect_ratio+.5)))
def draw_image(img,windowsize=None,zoomaxes=None): # get axes if not input if zoomaxes is None: zoomaxes = [0,img.shape[1]-1,0,img.shape[0]-1] # check to make sure valid if (int(zoomaxes[3]) < int(zoomaxes[2])) or (int(zoomaxes[1]) < int(zoomaxes[0])): raise ValueError('Invalid zoom axes input') # crop image scale_img = img.copy() scale_img = scale_img[int(zoomaxes[2]):int(zoomaxes[3]+1),int(zoomaxes[0]):int(zoomaxes[1]+1)] # resize image so that it fits in the window if windowsize is not None: scale_x = float(windowsize[1])/float(scale_img.shape[1]) scale_y = float(windowsize[0])/float(scale_img.shape[0]) if scale_x < scale_y: size_x = windowsize[1] size_y = scale_img.shape[0]*scale_x resize = scale_x else: size_y = windowsize[0] size_x = scale_img.shape[1]*scale_y resize = scale_y scale_img = imresize(scale_img,(int(size_y),int(size_x))) # create an rgb image out of img if scale_img.ndim == 3: if not (scale_img.dtype == 'uint8'): scale_img = scale_img.astype('uint8') elif scale_img.dtype == 'uint8': scale_img = to_rgb8('MONO8',scale_img) elif scale_img.dtype == 'double': # scale to between 0 and 255 scale_img = num.astype(img_normalize(scale_img)*255.,'uint8') scale_img = to_rgb8('MONO8',scale_img) #print 'image shape:' #print scale_img.shape #print 'resize: %f'%resize # create a bitmap out of image h,w,three = scale_img.shape img_size = [h,w] image = wx.EmptyImage(w,h) image.SetData( scale_img.tostring() ) bmp = wx.BitmapFromImage(image) return (bmp,resize,img_size)
def dhash(image, hash_size=8): image = imresize(image, (hash_size + 1, hash_size)) #image = image.convert("L").resize((hash_size + 1, hash_size), Image.ANTIALIAS) #pixels = numpy.array(image.getdata(), dtype=numpy.float).reshape((hash_size + 1, hash_size)) #pixels = image.reshape((hash_size + 1, hash_size)) # compute differences diff = image[1:,:] > image[:-1,:] # sample the bits one = 0x0000000000000001 h = 0x0000000000000000 for v in diff.flat[:63]: if v: h |= one one = one << 1; return h
def resize_image(im, width=None, height=None, mode='squeeze'): assert isinstance(im, np.ndarray) and im.ndim in (2, 3) if mode == 'squeeze': im = imresize(im, size=(height, width)) elif mode == 'preserve_aspect': im = resize_while_preserving_aspect_ratio(im, x_dim=width, y_dim=height) elif mode == 'crop': current_height, current_width = im.shape[:2] assert height>=height and width>=width, "Crop size must be smaller than image size" row_start = (current_height-height)/2 col_start = (current_width-width)/2 im = im[..., row_start:row_start+224, col_start:col_start+224, :] elif mode in ('resize_and_crop', 'scale_crop'): assert height is not None and width is not None, "You need to specify both height and width. for 'scale_crop' mode" return resize_and_crop(im, width=width, height=height) else: raise Exception("Unknown resize mode: '{}'".format(mode)) return im
def imresize(self, data, new_wd, new_ht, method='bilinear'): """Scale an image in numpy array _data_ to the specified width and height. A smooth scaling is preferred. """ old_ht, old_wd = data.shape[:2] start_time = time.time() if have_qtimage: # QImage method is slightly faster and gives a smoother looking # result than PIL means = 'QImage' qimage = numpy2qimage(data) if (old_wd != new_wd) or (old_ht != new_ht): # NOTE: there is a strange bug in qimage.scaled if the new # dimensions are exactly the same--so we check and only # scale if there is some difference qimage = qimage.scaled(new_wd, new_ht, transformMode=QtCore.Qt.SmoothTransformation) newdata = qimage2numpy(qimage) else: newdata = data elif have_pilutil: means = 'PIL' zoom_x = float(new_wd) / float(old_wd) zoom_y = float(new_ht) / float(old_ht) if (old_wd >= new_wd) or (old_ht >= new_ht): # data size is bigger, skip pixels zoom = max(zoom_x, zoom_y) else: zoom = min(zoom_x, zoom_y) newdata = pilutil.imresize(data, zoom, interp=method) else: raise ImageError("No way to scale image smoothly") end_time = time.time() self.logger.debug("scaling (%s) time %.4f sec" % ( means, end_time - start_time)) return newdata
def draw_annotated_image(img,pointlists=None,linelists=None,circlelists=None, windowsize=None,zoomaxes=None, pointcolors=None,linecolors=None,circlecolors=None, pointsizes=None,linewidths=None,circlewidths=None): #print 'in draw_annotated_image' # get axes if not input if zoomaxes is None: zoomaxes = [0,img.shape[1]-1,0,img.shape[0]-1] # check to make sure valid if (int(zoomaxes[3]) < int(zoomaxes[2])) or (int(zoomaxes[1]) < int(zoomaxes[0])): raise ValueError('Invalid zoom axes input') # crop image scale_img = img.copy() scale_img = scale_img[int(zoomaxes[2]):int(zoomaxes[3]+1),int(zoomaxes[0]):int(zoomaxes[1]+1)] xoffset = -zoomaxes[0] yoffset = -zoomaxes[2] #print 'zoomaxes = ' + str(zoomaxes) # resize image so that it fits in the window if windowsize is not None: scale_x = float(windowsize[1])/float(scale_img.shape[1]) scale_y = float(windowsize[0])/float(scale_img.shape[0]) if scale_x < scale_y: size_x = windowsize[1] size_y = scale_img.shape[0]*scale_x resize = scale_x else: size_y = windowsize[0] size_x = scale_img.shape[1]*scale_y resize = scale_y scale_img = imresize(scale_img,(int(size_y),int(size_x))) #print 'current size of scale_img = ' + str(scale_img.shape) # create an rgb image out of img if scale_img.ndim == 3: if not (scale_img.dtype == 'uint8'): scale_img = scale_img.astype('uint8') elif scale_img.dtype == 'uint8': scale_img = to_rgb8('MONO8',scale_img) elif scale_img.dtype == 'double': # scale to between 0 and 255 scale_img = num.astype(img_normalize(scale_img)*255.,'uint8') scale_img = to_rgb8('MONO8',scale_img) #print 'image shape after converting to rgb:' #print scale_img.shape #print 'resize: %f'%resize # create a bitmap out of image h,w,three = scale_img.shape img_size = [h,w] image = wx.EmptyImage(w,h) #print 'created empty image of size (%d,%d)'%(w,h) image.SetData( scale_img.tostring() ) #print 'set the data' bmp = wx.BitmapFromImage(image) #print 'created bmp' # draw into bmp drawDC = wx.MemoryDC() drawDC.SelectObject( bmp ) # draw into bmp # set default point color drawDC.SetPen(wx.Pen('GREEN')) drawDC.SetBrush(wx.Brush(wx.Colour(255,255,255), wx.TRANSPARENT)) # by default set point radius to 8 point_radius=8 #print 'starting to draw stuff' if pointlists is not None: pointcolor = 'GREEN' for i,points in enumerate(pointlists): # set color if (pointcolors is not None) and (len(pointcolors) > i): pointcolor = wx.Colour(pointcolors[i][0],pointcolors[i][1],pointcolors[i][2]) if (pointsizes is not None) and (len(pointsizes) > i): point_radius = pointsizes[i] drawDC.SetPen(wx.Pen(colour=pointcolor,width=point_radius)) # set radius for j,pt in enumerate(points): # draw a circle x = int((xoffset+pt[0])*resize) y = int((yoffset+pt[1])*resize) if (x >= 0) and (x < img_size[1]) and \ (y >= 0) and (y < img_size[0]): drawDC.DrawCircle(x,y,point_radius) #print 'finished drawing points' if linelists is not None: # set default line color linecolor = 'GREEN' # set default line width linewidth = 1 for i,lines in enumerate(linelists): #print i # create a list of wxPoints points = [] for j,pt in enumerate(lines): x = int((xoffset+pt[0])*resize) y = int((yoffset+pt[1])*resize) newpoint = wx.Point(x,y) if (j < 1) or not (newpoint == lastpoint): points.append(newpoint) lastpoint = newpoint if len(points) == 0: continue if len(points) == 1: points.append(newpoint) # set color if (linecolors is not None) and (len(linecolors) > i): linecolor = wx.Colour(linecolors[i][0],linecolors[i][1], linecolors[i][2]) # set width if (linewidths is not None) and (len(linewidths) > i): linewidth = linewidths[i] drawDC.SetPen(wx.Pen(colour=linecolor,width=linewidth)) #print 'drawing line with color' #print linecolor #print 'width' #print linewidth #print 'points' #print points # draw the lines drawDC.DrawLines(points) #print 'finished drawing lines' if circlelists is not None: circlecolor = 'GREEN' for i,circles in enumerate(circlelists): # set color if (circlecolors is not None) and (len(circlecolors) > i): circlecolor = wx.Colour(circlecolors[i][0],circlecolors[i][1],circlecolors[i][2]) if (circlewidths is not None) and (len(circlewidths) > i): circlewidth = circlewidths[i] drawDC.SetPen(wx.Pen(colour=circlecolor,width=circlewidth)) # set radius if (circlewidths is not None) and (len(circlewidths) > i): circlewidth = circlewidths[i] for j,circle in enumerate(circles): # draw a circle x = int((xoffset+circle[0])*resize) y = int((yoffset+circle[1])*resize) r = int(circle[2]*resize) if (x >= 0) and (x < img_size[1]) and \ (y >= 0) and (y < img_size[0]): drawDC.DrawCircle(x,y,r) #print 'finished drawing circles' #print 'leaving draw_annotated_image' return (bmp,resize,img_size)
# ALLOCATE THE REQUIRED AMOUNT OF DATA X_gt_tr = np.zeros((num_files, numClasses * dim * dim)) # LOAD EACH FILE, RESIZE IT AND COPY IT INTO X idx = 0 fList = glob.glob(os.path.join(input_path1, "*.pkl.gz")) fList.sort() for infile in fList: print idx temp = pickle.load(gzip.GzipFile(infile)) tempMask = np.zeros((numClasses * dim * dim)) for k in temp["gt_masks"]: if k in temp["classes"]: temp_pred = temp["gt_masks"][k] temp_pred = temp_pred / 255 temp_pred = smp.imresize(temp_pred, (dim, dim), interp="nearest", mode="F") temp_pred = np.int8(temp_pred) temp_pred = temp_pred.reshape(dim * dim, order="F").copy() st = temp["classes"][k] tempMask[st::numClasses] = temp_pred X_gt_tr[idx, :] = tempMask idx = idx + 1 X = {} X["X"] = X_gt_tr sio.savemat(output_path1, X) # GET THE NUMBER OF FILES num_files = 0 for infile in glob.glob(os.path.join(input_path2, "*.pkl.gz")): num_files = num_files + 1
def test_imresize3(self): im = np.random.random((15,30)) im2 = pilutil.imresize(im, (30,60), interp='nearest') assert_equal(im2.shape, (30,60))
def test_imresize2(self): im = np.random.random((20,30)) im2 = pilutil.imresize(im, (30,40), interp='bicubic') assert_equal(im2.shape, (30,40))
def test_imresize(self): im = np.random.random((10,20)) for T in np.sctypes['float'] + [float]: im1 = pilutil.imresize(im,T(1.1)) assert_equal(im1.shape,(11,22))
def startProfile(self): self.profRangeStart_Pulse = int(self.profRange[0]*100) self.profRangeStop_Pulse = int(self.profRange[1]*100) CurrentPos = int(self.controller.getpos(inpulses=True))#returns absolute location in pulses self.controller.go(self.profRangeStart_Pulse) time.sleep((abs(CurrentPos-self.profRangeStart_Pulse)/4500) + 2) #Live Camera View appears to be paused because the entire program is stopped with time.sleep. print "Image one about to be taken" self.imgPos1 = self.cameraDevice._cameraDevice.read()[1] cv2.imwrite("testImgPos1.png", self.imgPos1) print "Image one written, stage about to move to position 2" CurrentPos2 = int(self.controller.getpos(inpulses=True)) self.controller.go(self.profRangeStop_Pulse) time.sleep((abs(CurrentPos2-self.profRangeStop_Pulse)/4500) + 2) print "Image two about to be taken" self.imgPos2 = self.cameraDevice._cameraDevice.read()[1] cv2.imwrite("testImgPos2.png", self.imgPos2) #X and Y values may be switched, but the final roi image is correct #Using the moments function to get the roi widthFactor = 13.0 gaussParams1 = moments(self.imgPos1.astype(float), circle = 0., rotate = 0., vheight = 0.) print str(gaussParams1) + "\n" xCenter1 = float(gaussParams1[1]) yCenter1 = float(gaussParams1[2]) xWidthReal1 = float(gaussParams1[3]) yWidthReal1 = float(gaussParams1[4]) xWidth1 = xWidthReal1*widthFactor yWidth1 = yWidthReal1*widthFactor #Using the gaussfit function to get the beam paramaters self.imgroi1 = self.imgPos1.astype(float)[yCenter1-yWidth1:yCenter1+yWidth1, xCenter1-xWidth1:xCenter1+xWidth1] cv2.imwrite("TESTimgroi1.png", self.imgroi1)#These images are stored in the python file in C, or in the git file self.NEWimgroi1 = imresize(self.imgroi1.astype(float), (170, 170)) cv2.imwrite("DO_NOT_DELETE_NEWimgroi1.png", self.NEWimgroi1)#MUST WRITE BEFORE setScene scene1 = QGraphicsScene() scene1.addPixmap(QPixmap("DO_NOT_DELETE_NEWimgroi1.png")) self.ui.graphicsView.setScene(scene1) self.fit1 = gaussfit(self.imgroi1.astype(float)) #Displaying the calculated parameters to the interface x1Diamfwhm = float((self.fit1[4]))#in mm NOTE, it is not fwhm, The name hasn't been changed yet y1Diamfwhm = float((self.fit1[5]))#mm x1 = self.fit1[2] y1 = self.fit1[3] self.ui.label_26.setText("%.5f" % (float(x1Diamfwhm*5.5*2))) self.ui.label_27.setText("%.5f" % (float(y1Diamfwhm*5.5*2)))#converts to micrometers self.ui.label_30.setText(str(self.profRangeStart_Pulse/100.)) print self.fit1.astype(float) #Repeat for position 2 gaussParams2 = moments(self.imgPos2.astype(float), circle = 0., rotate = 0., vheight = 0.) print str(gaussParams2) + "\n" xCenter2 = float(gaussParams2[1]) yCenter2 = float(gaussParams2[2]) xWidthReal2 = float(gaussParams2[3]) yWidthReal2 = float(gaussParams2[4]) xWidth2 = xWidthReal2*widthFactor yWidth2 = yWidthReal2*widthFactor self.imgroi2 = self.imgPos2.astype(float)[yCenter2-yWidth2:yCenter2+yWidth2, xCenter2-xWidth2:xCenter2+xWidth2] cv2.imwrite("TESTimgroi2.png", self.imgroi2)#These images are stored in the python file in C or in the git file self.NEWimgroi2 = imresize(self.imgroi2.astype(float), (170, 170)) cv2.imwrite("DO_NOT_DELETE_NEWimgroi2.png", self.NEWimgroi2) scene2 = QGraphicsScene() scene2.addPixmap(QPixmap("DO_NOT_DELETE_NEWimgroi2.png")) self.ui.graphicsView_2.setScene(scene2) self.fit2 = gaussfit(self.imgroi2.astype(float))#gaussfit returns height, amplitude, x, y, width_x, width_y, rota x2Diamfwhm = float((self.fit2[4]))#pixels y2Diamfwhm = float((self.fit2[5]))#pixels x2 = self.fit2[2] y2 = self.fit2[3] self.ui.label_33.setText("%.4f" % (float(x2Diamfwhm*5.5*2)))#converts to micrometers self.ui.label_35.setText("%.4f" % (float(y2Diamfwhm*5.5*2))) self.ui.label_32.setText(str(self.profRangeStop_Pulse/100.)) print self.fit2.astype(float) ########################################################################### #Getting the data deltaZ = float(self.profRange[1] - self.profRange[0]) * 1000. #micrometers #print deltaZ w1 = float(x1Diamfwhm*5.5*4)#Diameter #print w1 w2 = float(x2Diamfwhm*5.5*4) #print w2 wavelength = .663#in micrometers y1 = float(y1Diamfwhm*5.5*4) #print y1 y2 = float(y2Diamfwhm*5.5*4) #print y2 ####################################### solution to 3rd position ambiguity(using only the major axis) secondSolution = beamParam2(deltaZ, w1, w2, wavelength) Testz1 = int(secondSolution[0]/1000.) print "Test z1 in mm: " + str(Testz1) CurrentPos3 = int(self.controller.getpos(inpulses=True)) waistPos = ((self.profRangeStart_Pulse)/100-Testz1)# in mm self.controller.go(waistPos*100)#in pulses time.sleep((abs(CurrentPos3-waistPos*100)/4500) + 2) print "3rd image about to be taken" self.imgPos3 = self.cameraDevice._cameraDevice.read()[1] gaussParams3 = moments(self.imgPos3.astype(float), circle = 0., rotate = 0., vheight = 0.) xCenter3 = gaussParams3[1] yCenter3 = gaussParams3[2] xWidthReal3 = gaussParams3[3] yWidthReal3 = gaussParams3[4] xWidth3 = xWidthReal3*widthFactor yWidth3 = yWidthReal3*widthFactor self.imgroi3 = self.imgPos3.astype(float)[yCenter3-yWidth3:yCenter3+yWidth3, xCenter3-xWidth3:xCenter3+xWidth3] cv2.imwrite("TESTimgroi3.png", self.imgroi3)#These images are stored in the python file in C, OR in the git file self.fit3 = gaussfit(self.imgroi3.astype(float)) x3Diamfwhm = float((self.fit3[4]))#pixels, not fwhm y3Diamfwhm = float((self.fit3[5]))#pixels, not fwhm # print "x3Diamfwhm: " + str(x3Diamfwhm) # print "x2Diamfwhm: " + str(x2Diamfwhm) # print "x1Diamfwhm: " + str(x1Diamfwhm)#compare 3rd image width to 1st image width, see if it's larger or not # print "y3Diamfwhm: " + str(y3Diamfwhm) # print "y2Diamfwhm: " + str(y2Diamfwhm) # print "y1Diamfwhm: " + str(y1Diamfwhm) ###################################### if x3Diamfwhm > x1Diamfwhm: majorAxisParamaters = beamParam(deltaZ, w1, w2, wavelength) minorAxisParamaters = beamParam(deltaZ, y1, y2, wavelength) print "finished with the if statement" else: majorAxisParamaters = beamParam2(deltaZ, w1, w2, wavelength) minorAxisParamaters = beamParam2(deltaZ, y1, y2, wavelength) print "finished with the else statement" majorAxisParamaters = beamParam(deltaZ, w1, w2, wavelength) minorAxisParamaters = beamParam(deltaZ, y1, y2, wavelength) majorAngle = ((wavelength/((math.pi)*0.5*float(majorAxisParamaters[2])))*0.0174532925*1000.) minorAngle = ((wavelength/((math.pi)*0.5*float(minorAxisParamaters[2])))*0.0174532925*1000.) print majorAxisParamaters#a list of z1, z0, w0 print minorAxisParamaters self.ui.label_5.setText("%.5f" % float(majorAxisParamaters[2])) self.ui.label_7.setText("%.5f" % float((majorAxisParamaters[1])/1000.))#mm self.ui.label_6.setText("%.5f" % float(minorAxisParamaters[2])) self.ui.label_8.setText("%.5f" % float((minorAxisParamaters[1])/1000.))#mm self.ui.label_9.setText("%.5f" % (((majorAxisParamaters[0]/1000) + (self.profRangeStart_Pulse/100.))*-1)) self.ui.label_10.setText("%.5f" % (((minorAxisParamaters[0]/1000) + (self.profRangeStart_Pulse/100.))*-1)) self.ui.label_14.setText("%.7f" % float(majorAngle)) self.ui.label_16.setText("%.7f" % float(minorAngle)) print "End of start profile"
def spectrogram(request, recordingId): recording = Recording.objects.get(pk=int(recordingId)) audioFile = "/data/django/openmir/audio/%s.wav" % (str(recording.name)) startSec = float(request.GET.get('startSec', '0')) endSec = float(request.GET.get('endSec', '1.000')) lowHz = int(request.GET.get('lowHz', '0')) highHz = int(request.GET.get('highHz', 44100 / 2)) # Variables from request winSize = int(request.GET.get('winSize', '1024')) # TODO(sness) - Make hopSize work hopSize = winSize # hopSize = int(request.GET.get('hopSize', '1024')) width = request.GET.get('width', 'native') height = request.GET.get('height', 'native') spectrumType = request.GET.get('spectrumType', 'decibels') #spectrumType = request.GET.get('spectrumType', 'magnitude') # Marsyas network mng = marsyas.MarSystemManager() net = mng.create("Series","series") net.addMarSystem(mng.create("SoundFileSource", "src")) net.addMarSystem(mng.create("Stereo2Mono", "s2m")); net.addMarSystem(mng.create("ShiftInput", "si")); net.addMarSystem(mng.create("Windowing", "win")); net.addMarSystem(mng.create("Spectrum","spk")); net.addMarSystem(mng.create("PowerSpectrum","pspk")) # Update Marsyas controls net.updControl("PowerSpectrum/pspk/mrs_string/spectrumType", marsyas.MarControlPtr.from_string(str(spectrumType))) net.updControl("SoundFileSource/src/mrs_string/filename", marsyas.MarControlPtr.from_string(audioFile)) net.updControl("SoundFileSource/src/mrs_natural/inSamples", hopSize) net.updControl("ShiftInput/si/mrs_natural/winSize", winSize) net.updControl("mrs_natural/inSamples", int(hopSize)) # Sample rate and samples per tick networkSampleRate = net.getControl("mrs_real/osrate").to_real() soundFileSampleRate = net.getControl("SoundFileSource/src/mrs_real/osrate").to_real() insamples = net.getControl("SoundFileSource/src/mrs_natural/inSamples").to_natural() # Calculate values samplesToSkip = int(soundFileSampleRate * (startSec)) durationSec = (endSec - startSec) ticksToRun = int(durationSec * networkSampleRate) _height = winSize / 2 # Move to the correct position in the file net.updControl("SoundFileSource/src/mrs_natural/moveToSamplePos", samplesToSkip) # The array to be displayed to the user out = np.zeros( (_height,ticksToRun), dtype=np.double ) # Tick the network until we are done for x in range(0,ticksToRun): net.tick() data = net.getControl("mrs_realvec/processedData").to_realvec() for y in range(0,_height): out[(_height - y - 1),x] = data[y] # Normalize and make black on white out /= np.max(np.abs(out)) out = 1.0 - out nyquist = 44100 / 2.; bins = out.shape[0] lowBin = int((bins / nyquist) * lowHz); highBin = int((bins / nyquist) * highHz); halfWinSize = int(hopSize / 2) out = out[halfWinSize - highBin:halfWinSize - lowBin, :] # Resize and convert the array to an image if (height == "native") and (width == "native"): height = winSize / 2 width = hopSize * durationSec if (height != "native") and (width == "native"): pxPerItem = int(height) / float(winSize / 2.) # TODO(sness) - Why do we have to multiply this by 4? Check the math above width = int(ticksToRun * pxPerItem) * 4 out = smp.imresize(out,(int(height),int(width))) im = smp.toimage(out) # Output a png response = HttpResponse(mimetype="image/png") im.save(response, "PNG") return response
def test_imresize(self): im = np.random.random((10,20)) for T in np.sctypes['float'] + [float]: # 1.1 rounds to below 1.1 for float16, 1.101 works im1 = pilutil.imresize(im,T(1.101)) assert_equal(im1.shape,(11,22))
#X = {} #X['X'] = X_gt #sio.savemat(output_path1, X) # ALLOCATE THE REQUIRED AMOUNT OF DATA X_pred = np.zeros((num_files, numClasses*dim*dim)) # LOAD EACH FILE, RESIZE IT AND COPY IT INTO X idx = 0 for infile in fList: print idx temp = pickle.load(gzip.GzipFile(infile)) tempMask = np.zeros((numClasses*dim*dim)) prob_map = temp['all_probs2'] prob_map = prob_map[0:-1, 0:-1, attribIdx] prob_map = prob_map.argmax(2) prob_map = smp.imresize(prob_map, (dim, dim), interp='nearest', mode='F') prob_map = np.int8(prob_map) prob_map = prob_map.reshape(dim*dim, order='F').copy() for k in range(numClasses): tempClassMask = np.zeros((dim*dim, 1)) tempClassMask[np.nonzero(prob_map==k)] = 1 tempMask[k::numClasses] = tempClassMask.squeeze() X_pred[idx, :] = tempMask idx = idx+1 X = {} X['X'] = X_pred sio.savemat(output_path2, X)
def _onNewFrame(self, frame): self._frame = imresize(frame,(800,800)) self.newFrame.emit(self._frame) self.update()
for infile in glob.glob(os.path.join(input_path1, '*.pkl.gz')): num_files = num_files + 1 # ALLOCATE THE REQUIRED AMOUNT OF DATA X_pred_tr = np.zeros((num_files, numClasses*dim*dim)) # LOAD EACH FILE, RESIZE IT AND COPY IT INTO X idx = 0 fList = glob.glob(os.path.join(input_path1, '*.pkl.gz')) fList.sort() for infile in fList: print idx temp = pickle.load(gzip.GzipFile(infile)) temp_pred = temp['max_classes2'] temp_pred = temp_pred[0:-1, 0:-1] temp_pred = smp.imresize(temp_pred, (dim, dim), interp='nearest', mode='F') temp_pred = np.int8(temp_pred) temp_pred = temp_pred.reshape(dim*dim, order='F').copy() tempMask = np.zeros((numClasses*dim*dim)) for k in range(numClasses): tempClassMask = np.zeros((dim*dim, 1)) tempClassMask[np.nonzero(temp_pred==k)] = 1 tempMask[k::numClasses] = tempClassMask.squeeze() X_pred_tr[idx, :] = tempMask idx = idx+1 X = {} X['X'] = X_pred_tr sio.savemat(output_path1, X) # GET THE NUMBER OF FILES
# GET THE NUMBER OF FILES num_files = 0 for infile in glob.glob(os.path.join(input_path, '*.pkl.gz')): num_files = num_files + 1 # LOAD EACH FILE, RESIZE IT AND COPY IT INTO X idx = 0 fList = glob.glob(os.path.join(input_path, '*.pkl.gz')) fList.sort() for infile in fList: temp = pickle.load(gzip.GzipFile(infile)) masks = np.zeros(temp['all_probs2'].shape) image = temp['image'] for k in temp['gt_masks']: if k in temp['classes']: idx = temp['classes'][k] masks[:, :, idx] = smp.imresize(temp['gt_masks'][k]/255, (temp['all_probs2'].shape[0], temp['all_probs2'].shape[1]), interp='nearest', mode='F') print os.path.join(output_path, infile[-12:]) file = gzip.GzipFile(os.path.join(output_path, infile[-12:]), 'wb') data1 = {'all_probs2': masks, 'image': image} file.write(pickle.dumps(data1, 0)) file.close() idx = idx+1 #print os.path.join(output_path, infile[-12:-3]) #output = open(os.path.join(output_path, infile[-12:-3]), 'wb') #data1 = {'all_probs2': masks, 'image': image} #pickle.dump(data1, output) #idx = idx+1
def update_image_and_drawings( self, id_val, image, format=None, points=None, linesegs=None, point_colors=None, point_radii=None, lineseg_colors=None, lineseg_widths=None, xoffset=0, yoffset=0, doresize=None, ): # create bitmap, don't paint on screen if points is None: points = [] if linesegs is None: linesegs = [] if format is None: raise ValueError("must specify format") # if doresize is not input, then use the default value if doresize is None: doresize = self.doresize rgb8 = imops.to_rgb8(format, image) if doresize: # how much should we resize the image windowwidth = self.GetRect().GetWidth() windowheight = self.GetRect().GetHeight() imagewidth = rgb8.shape[1] imageheight = rgb8.shape[0] resizew = float(windowwidth) / float(imagewidth) resizeh = float(windowheight) / float(imageheight) self.resize = min(resizew, resizeh) # resize the image rgb8 = imresize(rgb8, self.resize) # scale all the points and lines pointscp = [] for pt in points: pointscp.append([pt[0] * self.resize, pt[1] * self.resize]) points = pointscp linesegscp = [] for line in linesegs: linesegscp.append( [line[0] * self.resize, line[1] * self.resize, line[2] * self.resize, line[3] * self.resize] ) linesegs = linesegscp if self.id_val is None: self.id_val = id_val if id_val != self.id_val: raise NotImplementedError("only 1 image source currently supported") h, w, three = rgb8.shape # get full image if self.full_image_numpy is not None: full_h, full_w, tmp = self.full_image_numpy.shape if h < full_h or w < full_w: self.full_image_numpy[yoffset : yoffset + h, xoffset : xoffset + w, :] = rgb8 rgb8 = self.full_image_numpy h, w = full_h, full_w else: self.full_image_numpy = rgb8 image = wx.EmptyImage(w, h) # XXX TODO could eliminate data copy here? image.SetData(rgb8.tostring()) bmp = wx.BitmapFromImage(image) # now draw into bmp drawDC = wx.MemoryDC() # assert drawDC.Ok(), "drawDC not OK" drawDC.SelectObject(bmp) # draw into bmp drawDC.SetBrush(wx.Brush(wx.Colour(255, 255, 255), wx.TRANSPARENT)) if self.do_draw_points and points is not None and len(points) > 0: if point_radii is None: point_radii = [8] * len(points) if point_colors is None: point_colors = [(0, 1, 0)] * len(points) if self.do_draw_points and linesegs is not None and len(linesegs) > 0: if lineseg_widths is None: lineseg_widths = [1] * len(linesegs) if lineseg_colors is None: lineseg_colors = [(0, 1, 0)] * len(linesegs) # point_radius=8 # fixing drawing point colors!!! if self.do_draw_points: for i in range(len(points)): # point pt = points[i] # point color ptcolor = point_colors[i] wxptcolor = wx.Colour(round(ptcolor[0] * 255), round(ptcolor[1] * 255), round(ptcolor[2] * 255)) # radius of point ptradius = point_radii[i] # draw it drawDC.SetPen(wx.Pen(colour=wxptcolor, width=ptradius)) drawDC.DrawCircle(int(pt[0]), int(pt[1]), ptradius) for i in range(len(linesegs)): lineseg = linesegs[i] linesegcolor = lineseg_colors[i] wxlinesegcolor = wx.Colour( round(linesegcolor[0] * 255), round(linesegcolor[1] * 255), round(linesegcolor[2] * 255) ) linesegwidth = lineseg_widths[i] drawDC.SetPen(wx.Pen(colour=wxlinesegcolor, width=linesegwidth)) drawDC.DrawLine(*lineseg) if id_val in self.lbrt: drawDC.SetPen(wx.Pen("GREEN", width=1)) l, b, r, t = self.lbrt[id_val] drawDC.DrawLine(l, b, r, b) drawDC.DrawLine(r, b, r, t) drawDC.DrawLine(r, t, l, t) drawDC.DrawLine(l, t, l, b) img = wx.ImageFromBitmap(bmp) if self.mirror_display: if not self.display_rotate_180: img = img.Rotate90() img = img.Rotate90() else: img = img.Mirror(True) if not self.display_rotate_180: img = img.Rotate90() img = img.Rotate90() bmp = wx.BitmapFromImage(img) self.bitmap = bmp
def run(audioFile, outFile, startSec, endSec, lowHz, highHz, winSize, hopSize, spectrumType, widthPx, heightPx): # Marsyas network mng = marsyas.MarSystemManager() net = mng.create("Series","series") net.addMarSystem(mng.create("SoundFileSource", "src")) net.addMarSystem(mng.create("Stereo2Mono", "s2m")); net.addMarSystem(mng.create("ShiftInput", "si")); net.addMarSystem(mng.create("Windowing", "win")); net.addMarSystem(mng.create("Spectrum","spk")); net.addMarSystem(mng.create("PowerSpectrum","pspk")) # Update Marsyas controls net.updControl("PowerSpectrum/pspk/mrs_string/spectrumType", marsyas.MarControlPtr.from_string(str(spectrumType))) net.updControl("SoundFileSource/src/mrs_string/filename", marsyas.MarControlPtr.from_string(audioFile)) net.updControl("SoundFileSource/src/mrs_natural/inSamples", hopSize) net.updControl("ShiftInput/si/mrs_natural/winSize", winSize) net.updControl("mrs_natural/inSamples", int(hopSize)) # Sample rate and samples per tick networkSampleRate = net.getControl("mrs_real/osrate").to_real() soundFileSampleRate = net.getControl("SoundFileSource/src/mrs_real/osrate").to_real() insamples = net.getControl("SoundFileSource/src/mrs_natural/inSamples").to_natural() # Calculate values samplesToSkip = int(soundFileSampleRate * (startSec)) durationSec = (endSec - startSec) ticksToRun = int(durationSec * networkSampleRate) _height = winSize / 2 # Move to the correct position in the file net.updControl("SoundFileSource/src/mrs_natural/moveToSamplePos", samplesToSkip) # The array to be displayed to the user out = np.zeros( (_height,ticksToRun), dtype=np.double ) # Tick the network until we are done for x in range(0,ticksToRun): net.tick() data = net.getControl("mrs_realvec/processedData").to_realvec() for y in range(0,_height): out[(_height - y - 1),x] = data[y] # Normalize and make black on white out /= np.max(np.abs(out)) out = 1.0 - out nyquist = 44100 / 2.; bins = out.shape[0] lowBin = int((bins / nyquist) * lowHz); highBin = int((bins / nyquist) * highHz); halfWinSize = int(hopSize / 2) out = out[halfWinSize - highBin:halfWinSize - lowBin, :] # Resize and convert the array to an image if (heightPx == 0) and (widthPx == 0): heightPx = winSize / 2 widthPx = hopSize * durationSec if (heightPx != 0) and (widthPx == 0): pxPerItem = int(heightPx) / float(winSize / 2.) # TODO(sness) - Why do we have to multiply this by 4? Check the math above widthPx = int(ticksToRun * pxPerItem) * 4 out = smp.imresize(out,(int(heightPx),int(widthPx))) im = smp.toimage(out) im.save(outFile, "PNG")
def update_image_and_drawings(self, id_val, image, format=None, points=None, linesegs=None, point_colors=None, point_radii=None, lineseg_colors=None, lineseg_widths=None, xoffset=0, yoffset=0, doresize=None): """update the displayed image **Arguments** id_val : string An identifier for the particular source being updated image : numpy array The image data to update **Optional keyword arguments** format : string The image format (e.g. 'MONO8', 'RGB8', or 'YUV422') points : list of points Points to display (e.g. [(x0,y0),(x1,y1)]) linesegs : list of line segments Line segments to display (e.g. [(x0,y0,x1,y1),(x1,y1,x2,y2)]) """ # create bitmap, don't paint on screen if points is None: points = [] if linesegs is None: linesegs = [] if format is None: format='MONO8' warnings.warn('format unspecified - assuming MONO8') # if doresize is not input, then use the default value if doresize is None: doresize = self.doresize rgb8 = imops.to_rgb8(format,image) if doresize: from scipy.misc.pilutil import imresize # how much should we resize the image windowwidth = self.GetRect().GetWidth() windowheight = self.GetRect().GetHeight() imagewidth = rgb8.shape[1] imageheight = rgb8.shape[0] resizew = float(windowwidth) / float(imagewidth) resizeh = float(windowheight) / float(imageheight) self.resize = min(resizew,resizeh) # resize the image rgb8 = imresize(rgb8,self.resize) # scale all the points and lines pointscp = [] for pt in points: pointscp.append([pt[0]*self.resize,pt[1]*self.resize]) points = pointscp linesegscp = [] for line in linesegs: linesegscp.append([line[0]*self.resize,line[1]*self.resize,line[2]*self.resize,line[3]*self.resize]) linesegs = linesegscp if self.id_val is None: self.id_val = id_val if id_val != self.id_val: raise NotImplementedError("only 1 image source currently supported") h,w,three = rgb8.shape # get full image if self.full_image_numpy is not None: full_h, full_w, tmp = self.full_image_numpy.shape if h<full_h or w<full_w: self.full_image_numpy[yoffset:yoffset+h,xoffset:xoffset+w,:] = rgb8 rgb8 = self.full_image_numpy h,w = full_h, full_w else: self.full_image_numpy = rgb8 image = wx.EmptyImage(w,h) # XXX TODO could eliminate data copy here? image.SetData( rgb8.tostring() ) bmp = wx.BitmapFromImage(image) # now draw into bmp drawDC = wx.MemoryDC() #assert drawDC.Ok(), "drawDC not OK" drawDC.SelectObject( bmp ) # draw into bmp drawDC.SetBrush(wx.Brush(wx.Colour(255,255,255), wx.TRANSPARENT)) if self.do_draw_points and points is not None and len(points) > 0: if point_radii is None: point_radii = [ 8 ] * len(points) if point_colors is None: point_colors = [ (0,1,0) ]*len(points) if self.do_draw_points and linesegs is not None and len(linesegs) > 0: if lineseg_widths is None: lineseg_widths = [ 1 ] * len(linesegs) if lineseg_colors is None: lineseg_colors = [ (0,1,0) ]*len(linesegs) # fixing drawing point colors!!! if self.do_draw_points: for i in range(len(points)): # point pt = points[i] # point color ptcolor = point_colors[i] wxptcolor = wx.Colour(round(ptcolor[0]*255), round(ptcolor[1]*255), round(ptcolor[2]*255)) # radius of point ptradius = point_radii[i] # draw it drawDC.SetPen(wx.Pen(colour=wxptcolor, width=1)) drawDC.DrawCircle(int(pt[0]),int(pt[1]),ptradius) for i in range(len(linesegs)): lineseg = linesegs[i] linesegcolor = lineseg_colors[i] wxlinesegcolor = wx.Colour(round(linesegcolor[0]*255), round(linesegcolor[1]*255), round(linesegcolor[2]*255)) linesegwidth = lineseg_widths[i] drawDC.SetPen(wx.Pen(colour=wxlinesegcolor, width=linesegwidth)) if len(lineseg)<=4: drawDC.DrawLine(*lineseg) else: for start_idx in range(0, len(lineseg)-3, 2): this_seg = lineseg[start_idx:start_idx+4] drawDC.DrawLine(*this_seg) if id_val in self.lbrt: drawDC.SetPen(wx.Pen('GREEN',width=1)) l,b,r,t = self.lbrt[id_val] drawDC.DrawLine(l,b, r,b) drawDC.DrawLine(r,b, r,t) drawDC.DrawLine(r,t, l,t) drawDC.DrawLine(l,t, l,b) img = wx.ImageFromBitmap(bmp) if self.mirror_display: if not self.display_rotate_180: img = img.Rotate90() img = img.Rotate90() else: img = img.Mirror(True) if not self.display_rotate_180: img = img.Rotate90() img = img.Rotate90() bmp = wx.BitmapFromImage(img) self.bitmap = bmp