def estimate_angle(raw, maxskew=2, skewsteps=8, perc=80, range=20, zoom=0.5, bignore=0.1): comment = "" rawF = read_image_gray(raw) # perform image normalization image = rawF - amin(rawF) if amax(image) == amin(image): print "# image is empty", fname return image /= amax(image) extreme = (sum(image < 0.05) + sum(image > 0.95)) * 1.0 / prod(image.shape) if extreme > 0.95: comment += " no-normalization" flat = image else: # check whether the image is already effectively binarized # if not, we need to flatten it by estimating the local whitelevel m = interpolation.zoom(image, zoom) m = filters.percentile_filter(m, perc, size=(range, 2)) m = filters.percentile_filter(m, perc, size=(2, range)) m = interpolation.zoom(m, 1.0 / zoom) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) # estimate skew angle and rotate d0, d1 = flat.shape o0, o1 = int(bignore * d0), int(bignore * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0 : d0 - o0, o1 : d1 - o1] ma = maxskew ms = int(2 * maxskew * skewsteps) angle = estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) return angle
def daubechiesX(x): #apply x-axis transformation on to given image daubechies_Low = copy.deepcopy(x).astype(int) #create a copy of image to gather low pass infomation daubechies_High = copy.deepcopy(x).astype(int) #create a copy of image to gather high pass infomation daubechies_Low_Array = np.array([-0.1294095226,0.2241438680,0.8365163037,0.4829629131]) #low pass array transformation daubechies_High_Array = np.array([-0.4829629131,0.8365163037,-0.2241438680,-0.1294095226]) #high pass array transformation for a in range(x.shape[0]): #foreach loop for x-axis for b in range(x.shape[1]): #foreach loop for y-axis if b == x.shape[1]-1: #prevent array overflow daubechies_Low[a][b] = (daubechies_Low_Array[0]*x[a][b] + daubechies_Low_Array[1]*x[a][b] + daubechies_Low_Array[2]*x[a][b] + daubechies_Low_Array[3]*x[a][b]) #apply the low array transformation for near array overflow daubechies_High[a][b] = (daubechies_High_Array[0]*x[a][b] + daubechies_High_Array[1]*x[a][b] + daubechies_High_Array[2]*x[a][b] + daubechies_High_Array[3]*x[a][b]) #apply the high array transformation for near arry overflow elif b == x.shape[1]-2: #prevent array overflow daubechies_Low[a][b] = (daubechies_Low_Array[0]*x[a][b] + daubechies_Low_Array[1]*x[a][b+1] + daubechies_Low_Array[2]*x[a][b+1] + daubechies_Low_Array[3]*x[a][b+1]) #apply the low array transformation for near array overflow daubechies_High[a][b] = (daubechies_High_Array[0]*x[a][b] + daubechies_High_Array[1]*x[a][b+1] + daubechies_High_Array[2]*x[a][b+1] + daubechies_High_Array[3]*x[a][b+1]) #apply the high array transformation for near arry overflow elif b == x.shape[1]-3: #prevent array overflow daubechies_Low[a][b] = (daubechies_Low_Array[0]*x[a][b] + daubechies_Low_Array[1]*x[a][b+1] + daubechies_Low_Array[2]*x[a][b+2] + daubechies_Low_Array[3]*x[a][b+2]) #apply the low array transformation for near array overflow daubechies_High[a][b] = (daubechies_High_Array[0]*x[a][b] + daubechies_High_Array[1]*x[a][b+1] + daubechies_High_Array[2]*x[a][b+2] + daubechies_High_Array[3]*x[a][b+2]) #apply the high array transformation for near arry overflow else: daubechies_Low[a][b] = (daubechies_Low_Array[0]*x[a][b] + daubechies_Low_Array[1]*x[a][b+1] + daubechies_Low_Array[2]*x[a][b+2] + daubechies_Low_Array[3]*x[a][b+3]) #apply the low array transformation daubechies_High[a][b] = (daubechies_High_Array[0]*x[a][b] + daubechies_High_Array[1]*x[a][b+1] + daubechies_High_Array[2]*x[a][b+2] + daubechies_High_Array[3]*x[a][b+3]) #apply the high array transformation daubechies_Low = down.zoom(daubechies_Low,[.5,1],order = 0) #downsize the image by cutting it in half in the x-axis daubechies_High = down.zoom(daubechies_High,[.5,1], order = 0) #downsize the image by cutting it in half in the x-axis return daubechies_Low,daubechies_High #return daubechies_Low and daubechies_High
def pattern_match(template, image, upsampling=16, func=match_template): """ Call an arbitrary pattern matcher Parameters ---------- template : ndarray The input search template used to 'query' the destination image image : ndarray The image or sub-image to be searched upsampling : int The multiplier to upsample the template and image. func : object The function to be used to perform the template based matching Returns ------- x : float The x offset y : float The y offset strength : float The strength of the correlation in the range [-1, 1]. """ if upsampling < 1: raise ValueError u_template = zoom(template, upsampling) u_image = zoom(image, upsampling, ) # Find the the upper left origin of the template in the image match = func(u_image, u_template) y, x = np.unravel_index(np.argmax(match), match.shape) # Resample the match back to the native image resolution x /= upsampling y /= upsampling # Offset from the UL origin to the image center x += (template.shape[1] / 2) y += (template.shape[0] / 2) # Compute the offset to adjust the image match point location ideal_y = image.shape[0] / 2 ideal_x = image.shape[1] / 2 x = ideal_x - x y = ideal_y - y # Find the maximum correlation strength = np.max(match) return x, y, strength
def zoom_rot(ii,dd): """ Rotate and zoom an image around a given angle""" a = np.random.randint(-10,10) ddr = rotate(dd,a, order=0, prefilter=False) iir = rotate(ii.transpose((1,2,0)),a, order=0, prefilter=False) f = np.random.randint(10000,15100) / 10000. h = int(dd.shape[0] / f) w = int(dd.shape[1] / f) s_fh = float(dd.shape[0]) / float(h) s_fw = float(dd.shape[1]) / float(w) s_f = (s_fh + s_fw) / 2. offset = 0 cy = np.random.randint(offset,dd.shape[0] - h - offset + 1) cx = np.random.randint(offset,dd.shape[1] - w - offset + 1) ddc = ddr[cy:cy+h, cx:cx+w] iic = iir[cy:cy+h,cx:cx+w,:] dd_s = zoom(ddc,(s_fh, s_fw),order=0, prefilter=False) dd_s /= s_f ii_s = iic.transpose((2,0,1)) ii_s = zoom(ii_s,(1,s_fh,s_fw),order=0, prefilter=False) return ii_s.astype(np.float32), dd_s.astype(np.float32)
def extract(self, ar, br): times = self.faces a = np.array(ar) b = np.array(br) diff = np.subtract(b, a) m = np.max(diff) op = np.zeros([a.shape[0], m, 3]) im = np.array(self.img) for i in range(a.shape[0]): r1 = self.screen2im(a[i]) r2 = self.screen2im(b[i]) k = im[r1[1], r1[0]:r2[0]] interpolation.zoom(k, [float(m) / k.shape[0], 1], output=op[i, :, :]) exp = math.ceil(math.log(op.shape[1], 2)) exp2 = math.ceil(math.log(op.shape[0], 2)) zoomY = float(pow(2, exp)) / op.shape[1] zoomX = float(pow(2, exp2)) / op.shape[0] o = interpolation.zoom(op, [2 * zoomX, zoomY, 1]) p = np.zeros([o.shape[0], o.shape[1] * times, o.shape[2]]) for k in range(1, times + 1): if k % 2 == 0: p[:, (k - 1) * o.shape[1]:k * o.shape[1], :] = o else: p[:, (k - 1) * o.shape[1]:k * o.shape[1], :] = o[:, ::-1, :] p = np.roll(p, o.shape[1] / 2, 1) scipy.misc.imsave('temp.jpg', p) img = cv2.imread('temp.jpg') img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) return img
def image_subsample(self,channels,target_resolution,order=3): """ :param channels: list of strings with channel names :param target_resolution: float :param order: interpolation order, integer :return: data as desired """ assert self.target_resolution is None if target_resolution is None: shape = list(self.data[channels[0]].shape) else: shape = [self.metadata["spatial_samplings"][target_resolution][ii] for ii in ["NCOLS","NROWS"]] shape.append(len(channels)) dtype_internal = np.float32 data = np.zeros(shape,dtype=dtype_internal) for ich,ch in enumerate(channels): zoom_fac = [shape[0] / self.data[ch].shape[0], shape[1] / self.data[ch].shape[1] ] bf = np.array(self.data[ch],dtype=dtype_internal) bf_nan = np.isnan(bf) bf[bf_nan] = 0.0 data[:,:,ich] = zoom(input=bf,zoom=zoom_fac,order=order) bf_nan = zoom(input=np.array(bf_nan,dtype=np.float32),zoom=zoom_fac,order=0) data[:,:,ich][bf_nan > 0.0] = np.NaN return np.array(data,dtype=self.dtype_float)
def nlbin(im, threshold=0.5, zoom=0.5, escale=1.0, border=0.1, perc=80, range=20, low=5, high=90): """ Performs binarization using non-linear processing. Args: im (PIL.Image): threshold (float): zoom (float): Zoom for background page estimation escale (float): Scale for estimating a mask over the text region border (float): Ignore this much of the border perc (int): Percentage for filters range (int): Range for filters low (int): Percentile for black estimation high (int): Percentile for white estimation Returns: PIL.Image containing the binarized image """ if im.mode == '1': return im raw = pil2array(im) # rescale image to between -1 or 0 and 1 raw = raw/np.float(np.iinfo(raw.dtype).max) if raw.ndim == 3: raw = np.mean(raw, 2) # perform image normalization if np.amax(raw) == np.amin(raw): raise KrakenInputException('Image is empty') image = raw-np.amin(raw) image /= np.amax(image) m = interpolation.zoom(image, zoom) m = filters.percentile_filter(m, perc, size=(range, 2)) m = filters.percentile_filter(m, perc, size=(2, range)) m = interpolation.zoom(m, 1.0/zoom) w, h = np.minimum(np.array(image.shape), np.array(m.shape)) flat = np.clip(image[:w, :h]-m[:w, :h]+1, 0, 1) # estimate low and high thresholds d0, d1 = flat.shape o0, o1 = int(border*d0), int(border*d1) est = flat[o0:d0-o0, o1:d1-o1] # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable v = est-filters.gaussian_filter(est, escale*20.0) v = filters.gaussian_filter(v**2, escale*20.0)**0.5 v = (v > 0.3*np.amax(v)) v = morphology.binary_dilation(v, structure=np.ones((escale*50, 1))) v = morphology.binary_dilation(v, structure=np.ones((1, escale*50))) est = est[v] lo = np.percentile(est.ravel(), low) hi = np.percentile(est.ravel(), high) flat -= lo flat /= (hi-lo) flat = np.clip(flat, 0, 1) bin = np.array(255*(flat > threshold), 'B') return array2pil(bin)
def plot_u(u_x, u_y): assert u_x.shape == u_y.shape x = np.linspace(gp.xmin, gp.xmax, gp.nx) y = np.linspace(gp.ymin, gp.ymax, gp.ny) X, Y = np.meshgrid(x, y, indexing='ij') from scipy.ndimage.interpolation import zoom a = .3 plt.quiver(zoom(X, a), zoom(Y, a), zoom(u_x, a), zoom(u_y, a), angles='xy', scale_units='xy', scale=1.)
def fit2dArrayToFn(arr, fn, mask=None, down_scale_factor=None, output_shape=None, guess=None, outgrid=None): """Fit a 2d array to a 2d function USE ONLY MASKED VALUES * [down_scale_factor] map to speed up fitting procedure, set value smaller than 1 * [output_shape] shape of the output array * [guess] must be scaled using [scale_factor] Returns: Fitted map, fitting params (scaled), error """ if mask is None: #assert outgrid is not None mask = np.ones(shape=arr.shape, dtype=bool) if down_scale_factor is None: if mask.sum() > 1000: down_scale_factor = 0.3 else: down_scale_factor = 1 if down_scale_factor != 1: # SCALE TO DECREASE AMOUNT OF POINTS TO FIT: arr2 = zoom(arr, down_scale_factor) mask = zoom(mask, down_scale_factor, output=bool) else: arr2 = arr # USE ONLY VALID POINTS: x, y = np.where(mask) z = arr2[mask] # FIT: print (guess,111) parameters, cov_matrix = curve_fit(fn, (x, y), z, p0=guess) # ERROR: perr = np.sqrt(np.diag(cov_matrix)) if outgrid is not None: yy,xx = outgrid rebuilt = fn((yy,xx), *parameters) else: if output_shape is None: output_shape = arr.shape fx = arr2.shape[0] / output_shape[0] fy = arr2.shape[1] / output_shape[1] rebuilt = np.fromfunction(lambda x, y: fn((x * fx, y * fy), *parameters), output_shape) return rebuilt, parameters, perr
def contour_plot(self, parameter1, parameter2, num_cells=50, spec_query=None, statistic='mean', show=False, smoothing=None, **plot_args): specs = self._query_specs(spec_query) shape = len(specs), 1 xs = np.empty(shape, float) ys = np.empty(shape, float) values = np.empty(shape, float) for i, spec in enumerate(specs): xs[i] = float(getattr(spec, parameter1)) ys[i] = float(getattr(spec, parameter2)) values[i] = self._get_statistic(spec, statistic) xMin, xMax = xs.min(), xs.max() yMin, yMax = ys.min(), ys.max() assert xMin != xMax assert yMin != yMax grid = np.mgrid[xMin:xMax:num_cells*1j, yMin:yMax:num_cells*1j] interp = interpolate.griddata(np.hstack((xs, ys)), values, np.vstack((grid[0].flat, grid[1].flat)).T, 'cubic') valueGrid = np.reshape(interp, grid[0].shape) #try: # valueGrid.clip(plot_args['vmin'], plot_args['vmax'], out=valueGrid) #except: KeyError if smoothing is not None: #from scipy.ndimage.filters import gaussian_filter #gaussian_filter(valueGrid, smoothing, output=valueGrid) from scipy.ndimage.interpolation import zoom gx = zoom(grid[0], smoothing) gy = zoom(grid[1], smoothing) valueGrid = zoom(valueGrid, smoothing) else: gx, gy = grid[0], grid[1] contour = plt.contour(gx, gy, valueGrid, **plot_args) plt.clabel(contour, inline=True, fontsize=10) plt.grid(True) plt.xlim(xMin, xMax) plt.ylim(yMin, yMax) plt.xlabel(parameter1) plt.ylabel(parameter2) #plt.colorbar() plt.title(self.path) if show: plt.show()
def test_crown_visualiser_on_a_image(pipeline_results, bees_image, outdir): vis = ResultCrownVisualizer() res = pipeline_results img = res[Image] overlay, = vis(res[Image], res[LocalizerPositions], res[Orientations], res[IDs]) overlay = zoom(overlay, (0.5, 0.5, 1), order=1) img = zoom(img, 0.5, order=3) / 255. img_with_overlay = ResultCrownVisualizer.add_overlay(img, overlay) name, _ = os.path.splitext(os.path.basename(bees_image)) imsave(str(outdir.join(name + "_overlay.png")), overlay) imsave(str(outdir.join(name + "_added_overlay.jpeg")), img_with_overlay)
def data(self, raw=False, bgr2rgb=True, resize=True, order=1): """Read image data from file and return as numpy array.""" self._fh.seek(self.data_offset) if raw: return self._fh.read(self.data_size) elif self.compression: if self.compression not in DECOMPRESS: raise ValueError("compression unknown or not supported") # TODO: test this data = self._fh.read(self.data_size) data = DECOMPRESS[self.compression](data) if self.compression == 2: # LZW data = numpy.fromstring(data, self.dtype) else: dtype = numpy.dtype(self.dtype) data = self._fh.read_array(dtype, self.data_size // dtype.itemsize) data = data.reshape(self.stored_shape) if self.stored_shape == self.shape or not resize: if bgr2rgb and self.stored_shape[-1] in (3, 4): tmp = data[..., 0].copy() data[..., 0] = data[..., 2] data[..., 2] = tmp return data # sub / supersampling factors = [j / i for i, j in zip(self.stored_shape, self.shape)] factors = [(1.0 if abs(1.0-f) < 0.0001 else f) for f in factors] shape = list(self.stored_shape) # remove leading dimensions with factor 1.0 for speed for factor in factors: if factor != 1.0: break shape = shape[1:] factors = factors[1:] data.shape = shape # resize RGB components separately for speed if shape[-1] in (3, 4) and factors[-1] == 1.0: factors = factors[:-1] old = data data = numpy.empty(self.shape, self.dtype[-2:]) for i in range(shape[-1]): j = {0: 2, 1: 1, 2: 0, 3: 3}[i] if bgr2rgb else i data[..., i] = zoom(old[..., j], zoom=factors, order=order) else: data = zoom(data, zoom=factors, order=order) data.shape = self.shape return data
def mpl_img(innd, zoom_factor=None, out_shape=None): if out_shape: w, h = innd.shape ow, oh = out_shape zf_1 = float(ow) / w zf_2 = float(oh) / h zf = min(zf_1, zf_2) nd = zoom(innd, zf, prefilter=False) elif zoom_factor: nd = zoom(innd, zoom_factor, prefilter=False) else: nd = innd a = plt.imshow(nd, interpolation="none") return a
def mask_polar_to_cart(mask, center, min_radius, max_radius, output_shape, zoom_factor=1): '''Converts a polar binary mask to Cartesian and places in an image of zeros''' # Account for upsampling if zoom_factor != 1: center = (center[0]*zoom_factor + zoom_factor/2, center[1]*zoom_factor + zoom_factor/2) min_radius = min_radius * zoom_factor max_radius = max_radius * zoom_factor output_shape = map(lambda a: a * zoom_factor, output_shape) # new image image = np.zeros(output_shape) # coordinate conversion theta, r = np.meshgrid(np.linspace(0, 2*np.pi, mask.shape[1]), np.arange(0, max_radius)) x, y = coord_polar_to_cart(r, theta, center) x, y = np.round(x), np.round(y) x, y = x.astype(int), y.astype(int) x = np.clip(x, 0, image.shape[0]-1) y = np.clip(y, 0, image.shape[1]-1) ix,iy = np.meshgrid(np.arange(0,mask.shape[1]), np.arange(0,mask.shape[0])) image[x,y] = mask # downsample image if zoom_factor != 1: zf = 1/float(zoom_factor) image = zoom(image, (zf, zf), order=4) # ensure image remains a filled binary mask image = (image > 0.5).astype(int) image = binary_fill_holes(image) return image
def modulatePF_unwrapped(self): #geometry = self._control.slm.getGeometry() geometry = self._getGeo() MOD = -1*self.unwrap() MOD = np.flipud(MOD) MOD = np.rot90(MOD) cx,cy,d = geometry.cx, geometry.cy, geometry.d # Diameter of phase retrieval output [pxl]: dPhRt = (self._pupil.k_max/self._pupil.kx.max())*self._pupil.nx # Zoom needed to fit onto SLM map: zoom = d/dPhRt MOD = interpolation.zoom(MOD,zoom,order=0,mode='nearest') # Flip up down: #MOD = np.flipud(MOD) # Flip left right: #MOD = np.fliplr(MOD) #MOD = np.rot90(MOD) MOD = np.rot90(-1.0*MOD) #Invert and rot90 # Shift center: MOD = interpolation.shift(MOD,(cy-255.5,cx-255.5),order=0, mode='nearest') # Cut out center 512x512: c = MOD.shape[0]/2 MOD = MOD[c-256:c+256,c-256:c+256] # Add an 'Other' modulation using the SLM API. Store the index in _modulations: #index = self._control.slm.addOther(MOD) index = self._addMOD(MOD) self._modulations.append(index) return index
def local_piesno(data, N, size=5, return_mask=True): m_out = np.zeros(data.shape[:-1], dtype=np.bool) reshaped_maps = sliding_window(data, (size, size, size, data.shape[-1])) sigma = np.zeros(reshaped_maps.shape[0], dtype=np.float32) mask = np.zeros((reshaped_maps.shape[0], size**3), dtype=np.bool) for i in range(reshaped_maps.shape[0]): cur_map = reshaped_maps[i].reshape(size**3, 1, -1) sigma[i], m = piesno(cur_map, N=N, return_mask=True) mask[i] = np.squeeze(m) s_out = sigma.reshape(data.shape[0] // size, data.shape[1] // size, data.shape[2] // size) for n, i in enumerate(np.ndindex(s_out.shape)): i = np.array(i) * size j = i + size m_out[i[0]:j[0], i[1]:j[1], i[2]:j[2]] = mask[n].reshape(size, size, size) interpolated = np.zeros_like(data[..., 0], dtype=np.float32) x, y, z = np.array(s_out.shape) * size interpolated[:x, :y, :z] = zoom(s_out, size, order=1) if return_mask: return interpolated, m_out return interpolated
def image_cart_to_polar(image, center, min_radius, max_radius, phase_width, zoom_factor=1): '''Converts an image from cartesian to polar coordinates around center''' # Upsample image if zoom_factor != 1: image = zoom(image, (zoom_factor, zoom_factor), order=4) center = (center[0]*zoom_factor + zoom_factor/2, center[1]*zoom_factor + zoom_factor/2) min_radius = min_radius * zoom_factor max_radius = max_radius * zoom_factor # pad if necessary max_x, max_y = image.shape[0], image.shape[1] pad_dist_x = np.max([(center[0] + max_radius) - max_x, -(center[0] - max_radius)]) pad_dist_y = np.max([(center[1] + max_radius) - max_y, -(center[1] - max_radius)]) pad_dist = int(np.max([0, pad_dist_x, pad_dist_y])) if pad_dist != 0: image = np.pad(image, pad_dist, 'constant') # coordinate conversion theta, r = np.meshgrid(np.linspace(0, 2*np.pi, phase_width), np.arange(min_radius, max_radius)) x, y = coord_polar_to_cart(r, theta, center) x, y = np.round(x), np.round(y) x, y = x.astype(int), y.astype(int) x = np.maximum(x, 0) y = np.maximum(y, 0) x = np.minimum(x, max_x-1) y = np.minimum(y, max_y-1) polar = image[x, y] polar.reshape((max_radius - min_radius, phase_width)) return polar
def run_color(image, image_out): caffe.set_mode_cpu() net = caffe.Net('colorization_deploy_v0.prototxt', 'colorization_release_v0.caffemodel', caffe.TEST) (H_in,W_in) = net.blobs['data_l'].data.shape[2:] # get input shape (H_out,W_out) = net.blobs['class8_ab'].data.shape[2:] # get output shape net.blobs['Trecip'].data[...] = 6/np.log(10) # 1/T, set annealing temperature img_rgb = caffe.io.load_image(image) img_lab = color.rgb2lab(img_rgb) # convert image to lab color space img_l = img_lab[:,:,0] # pull out L channel (H_orig,W_orig) = img_rgb.shape[:2] # original image size # resize image to network input size img_rs = caffe.io.resize_image(img_rgb,(H_in,W_in)) # resize image to network input size img_lab_rs = color.rgb2lab(img_rs) img_l_rs = img_lab_rs[:,:,0] net.blobs['data_l'].data[0,0,:,:] = img_l_rs-50 # subtract 50 for mean-centering net.forward() # run network ab_dec = net.blobs['class8_ab'].data[0,:,:,:].transpose((1,2,0)) # this is our result ab_dec_us = sni.zoom(ab_dec,(1.*H_orig/H_out,1.*W_orig/W_out,1)) # upsample to match size of original image L img_lab_out = np.concatenate((img_l[:,:,np.newaxis],ab_dec_us),axis=2) # concatenate with original image L img_rgb_out = np.clip(color.lab2rgb(img_lab_out),0,1) # convert back to rgb scipy.misc.imsave(image_out, img_rgb_out)
def downScaleFn(s,img): maxSize = math.sqrt(dim[0]*dim[1]) size = math.sqrt(img.shape[0]*img.shape[1]) factor = maxSize/size if factor < 1: img = sp.zoom(img,(factor,factor,1)) return img
def draw_ellipsoid(shape, radius, center, FWHM, noise=0): sigma = FWHM / 2.35482 cutoff = 2 * FWHM # draw a sphere R = max(radius) zoom_factor = np.array(radius) / R size = int((R + cutoff)*2) c = size // 2 z, y, x = np.meshgrid(*([np.arange(size)] * 3), indexing='ij') h = np.sqrt((z - c)**2+(y - c)**2+(x - c)**2) - R mask = np.abs(h) < cutoff im = np.zeros((size,)*3, dtype=np.float) im[mask] += np.exp((h[mask] / sigma)**2/-2)/(sigma*np.sqrt(2*np.pi)) # zoom so that radii are ok with warnings.catch_warnings(): warnings.simplefilter("ignore") im = zoom(im, zoom_factor) # shift and make correct shape center_diff = center - np.array(center_of_mass(im)) left_padding = np.round(center_diff).astype(np.int) subpx_shift = center_diff - left_padding im = shift(im, subpx_shift) im = crop_pad(im, -left_padding, shape) im[im < 0] = 0 assert_almost_equal(center_of_mass(im), center, decimal=2) if noise > 0: im += np.random.random(shape) * noise * im.max() return (im / im.max() * 255).astype(np.uint8)
def get_sun_image(time, wavelength, image_size = 1023): try: time_str = time.strftime("%Y/%m/%d/%H%M.fits") if wavelength == 'hmi': filename = "/work1/t2g-16IAS/hmi/" + time_str else: filename = "/work1/t2g-16IAS/aia{:04}/".format(wavelength) + time_str aia_image = fits.open(filename) aia_image.verify("fix") if wavelength == 'hmi': exptime = 1 else: exptime = aia_image[1].header['EXPTIME'] if exptime <= 0: print(time, "non-positive exposure",file=sys.stderr) return None quality = aia_image[1].header['QUALITY'] if quality !=0: print(time, "bad quality",file=sys.stderr) return None original_width = aia_image[1].data.shape[0] return interpolation.zoom(np.nan_to_num(aia_image[1].data), image_size / float(original_width)) / exptime except Exception as e: print(e,file=sys.stderr) return None
def _plot_annotation_on_ax(self, signal, ax, autoscale=False, colourmap="flag"): if autoscale: xstart = 0 xdelta = signal.duration.total_seconds() else: xstart,ystart,xdelta,ydelta = ax.viewLim.bounds if xstart <0: start_time = timedelta() else: start_time = timedelta(seconds=xstart) stop_time = timedelta(seconds=xdelta) + timedelta(seconds=xstart) sub_sig = signal[start_time:stop_time] xs =np.linspace(0, sub_sig.duration.total_seconds() ,sub_sig.size) + start_time.total_seconds() ys = sub_sig.values probs = sub_sig.probas ys = ys.reshape((1,ys.size)) zoom_f = float(self.max_point_amplitude_plot)/ sub_sig.size ys = zoom(ys,[1, zoom_f], order=0) ax.imshow(ys, extent=[np.min(xs), np.max(xs), 1.5, -0.5], aspect="auto", cmap=colourmap, vmin=0, vmax=255, origin='lower') ax.plot(xs,probs,"-", color="k", linewidth=3) ax.plot(xs,probs,"-", color="y", linewidth=1,alpha=0.5) jet = cm = pl.get_cmap(colourmap) cNorm = colors.Normalize(vmin=0, vmax=255) scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet) states = np.unique(ys) boxes = [pl.Rectangle((0, 0), 1, 1, fc=scalarMap.to_rgba(col)) for col in states] labels = [chr(s) for s in states] pl.legend(boxes,labels, loc='lower right') n_labels = 8 #fixme magic number if len(xs) > n_labels: trimming = int(float(len(xs)) / float(n_labels)) xs_trimmed = np.round(xs[::trimming]) else: xs_trimmed = xs time_strings = [str(timedelta(seconds=s)) for s in xs_trimmed] ax.set_xticks(xs_trimmed) ax.set_xticklabels(time_strings, rotation=70) return
def zoom(image, factor, dimension, hdr = False, order = 3): """ Zooms the provided image by the supplied factor in the supplied dimension. The factor is an integer determining how many slices should be put between each existing pair. If an image header (hdr) is supplied, its voxel spacing gets updated. Returns the image and the updated header or false. """ # check if supplied dimension is valid if dimension >= image.ndim: raise argparse.ArgumentError('The supplied zoom-dimension {} exceeds the image dimensionality of 0 to {}.'.format(dimension, image.ndim - 1)) # get logger logger = Logger.getInstance() logger.debug('Old shape = {}.'.format(image.shape)) # perform the zoom zoom = [1] * image.ndim zoom[dimension] = (image.shape[dimension] + (image.shape[dimension] - 1) * factor) / float(image.shape[dimension]) logger.debug('Reshaping with = {}.'.format(zoom)) image = interpolation.zoom(image, zoom, order=order) logger.debug('New shape = {}.'.format(image.shape)) if hdr: new_spacing = list(header.get_pixel_spacing(hdr)) new_spacing[dimension] = new_spacing[dimension] / float(factor + 1) logger.debug('Setting pixel spacing from {} to {}....'.format(header.get_pixel_spacing(hdr), new_spacing)) header.set_pixel_spacing(hdr, tuple(new_spacing)) return image, hdr
def downsample(self, target_resolution): '''Obtain a smaller reference space by downsampling Parameters ---------- target_resolution : tuple of numeric Resolution in microns of the output space. interpolator : string Method used to interpolate the volume. Currently only 'nearest' is supported Returns ------- ReferenceSpace : A new ReferenceSpace with the same structure tree and a downsampled annotation. ''' factors = [ float(ii / jj) for ii, jj in zip(self.resolution, target_resolution)] target = zoom(self.annotation, factors, order=0) return ReferenceSpace(self.structure_tree, target, target_resolution)
def load_glyph(self, face, charcode, hires_size=512, lowres_size=32, padding=0.125): face.set_char_size( hires_size*64 ) face.load_char(charcode, FT_LOAD_RENDER | FT_LOAD_NO_HINTING | FT_LOAD_NO_AUTOHINT); bitmap = face.glyph.bitmap width = face.glyph.bitmap.width height = face.glyph.bitmap.rows pitch = face.glyph.bitmap.pitch # Get glyph into a numpy array G = np.array(bitmap.buffer).reshape(height,pitch) G = G[:,:width].astype(np.ubyte) # Pad high resolution glyph with a blank border and normalize values # between 0 and 1 hires_width = (1+2*padding)*width hires_height = (1+2*padding)*height hires_data = np.zeros( (hires_height,hires_width), np.double) ox,oy = padding*width, padding*height hires_data[oy:oy+height, ox:ox+width] = G/255.0 # Compute distance field at high resolution compute_sdf(hires_data) # Scale down glyph to low resoltion size ratio = lowres_size/float(hires_size) lowres_data = 1 - zoom(hires_data, ratio, cval=1.0) # Compute information at low resolution size # size = ( lowres_data.shape[1], lowres_data.shape[0] ) offset = ( (face.glyph.bitmap_left - padding*width) * ratio, (face.glyph.bitmap_top + padding*height) * ratio ) advance = ( (face.glyph.advance.x/64.0)*ratio, (face.glyph.advance.y/64.0)*ratio ) return lowres_data, offset, advance
def load_image(): ret = np.zeros((args.batchsize, 1, img_h, img_w), dtype=np.float32) i = 0 while i < args.batchsize: try: year = 2011 + np.random.randint(4) month = 1 + np.random.randint(12) day = 1 + np.random.randint(32) hour = np.random.randint(24) minu = np.random.randint(5) * 12 subprocess.call("rm {}/*".format(work_image_dir), shell=True) local_fn = work_image_dir + "/image.fits" cmd = 'aws s3 cp "s3://sdo/aia193/720s/{:04}/{:02}/{:02}/{:02}{:02}.fits" {} --region us-west-2 --quiet'.format( year, month, day, hour, minu, local_fn ) subprocess.call(cmd, shell=True) h = fits.open(local_fn) h[1].verify("fix") exptime = h[1].header["EXPTIME"] if exptime <= 0: print "EXPTIME <=0" continue img = intp.zoom(h[1].data.astype(np.float32), zoom=img_w / 4096.0, order=0) img = scale_brightness(img / exptime) ret[i, :, :, :] = np.reshape(img, (1, 1, img_h, img_w)) i += 1 except: continue return ret
def dat2mhd(fn): with open("L2_17aug.dat") as fd: D = np.fromfile(file=fd, dtype=np.uint8).reshape((256, 256, 120)).astype("float32") / 255.0 D = np.log(D + 1) from scipy.ndimage.interpolation import zoom D = zoom(D, [1, 1, 256.0 / 120.0]) flat_d = D.transpose(2, 1, 0).flatten() vtk_d_array = ns.numpy_to_vtk(flat_d) image = vtk.vtkImageData() points = image.GetPointData() points.SetScalars(vtk_d_array) image.SetDimensions(D.shape) image.Update() w = vtk.vtkMetaImageWriter() w.SetFileName("bla.hdr") w.SetInput(image) w.Write()
def _zoomit(self, img, factors): assert img.ndim == 2, "TODO: Currently not implemented for 3D images." zimg = _spint.zoom(img, factors, **self.kw) out = _np.full_like(img, self.kw['cval']) if zimg.shape[0] < out.shape[0]: dst_y0 = (out.shape[0] - zimg.shape[0])//2 dst_y1 = dst_y0 + zimg.shape[0] src_y0 = 0 src_y1 = zimg.shape[0] else: dst_y0 = 0 dst_y1 = out.shape[0] src_y0 = (zimg.shape[0] - out.shape[0])//2 src_y1 = src_y0 + out.shape[0] if zimg.shape[1] < out.shape[1]: dst_x0 = (out.shape[1] - zimg.shape[1])//2 dst_x1 = dst_x0 + zimg.shape[1] src_x0 = 0 src_x1 = zimg.shape[1] else: dst_x0 = 0 dst_x1 = out.shape[1] src_x0 = (zimg.shape[1] - out.shape[1])//2 src_x1 = src_x0 + out.shape[1] out[dst_y0:dst_y1,dst_x0:dst_x1] = zimg[src_y0:src_y1,src_x0:src_x1] return out
def BuildS1FromRetina(self, retina): """Apply S1 processing to some existing retinal layer data. retina -- (2-D array) result of retinal layer processing RETURNS list of (4-D) S1 activity arrays, with one array per scale """ # Create scale pyramid of retinal map p = self.params retina_scales = [ zoom(retina, 1 / p.scale_factor ** scale) for scale in range(p.num_scales) ] # Reshape kernel array to be 3-D: index, 1, y, x s1_kernels = self.s1_kernels.reshape((-1, 1, p.s1_kwidth, p.s1_kwidth)) s1s = [] for scale in range(p.num_scales): # Reshape retina to be 3D array retina = retina_scales[scale] retina_ = retina.reshape((1,) + retina.shape) s1_ = self.backend.NormRbf(retina_, s1_kernels, bias = p.s1_bias, beta = p.s1_beta, scaling = p.s1_scaling) # Reshape S1 to be 4D array s1 = s1_.reshape((p.s1_num_orientations, p.s1_num_phases) + \ s1_.shape[-2:]) # Pool over phase. s1 = s1.max(1) s1s.append(s1) return s1s
def ring_wedge(image,dim=_DIM): # perform fft and scale its intensities to dim x dim amp_trans = fftshift(fft2(image)) int_trans = np.real(amp_trans * np.conj(amp_trans)) z = (1.*dim/image.shape[0], 1.*dim/image.shape[1]) int_trans = zoom(int_trans,z,order=1) # bilinear # now compute stats of filtered intensities mask, filt = filter_masks(dim) filter_img = mask * int_trans # intensities inside central area inner_int = np.sum(filter_img) # total intensity total_int = np.sum(int_trans) # ratio between central intensity and total intensity pwr_ratio = inner_int / total_int # now mask the intensities for wedge and ring calculations wedge_int_trans = int_trans * filt # wedges exclude center # only use the bottom half half = np.vstack((np.zeros(((dim//2)+1,dim)), np.ones((dim//2,dim)))).astype(np.bool) wedge_half = wedge_int_trans * half ring_half = int_trans * half # now compute unscaled wedge and ring vectors for all wedges and rings # these represent the total power found in each ring / wedge wedge_vector = np.array([np.sum(wedge_mask(i) * wedge_half) for i in range(48)]) ring_vector = np.array([np.sum(ring_mask(i) * ring_half) for i in range(50)]) # compute power integral over wedge vectors and scale vectors by it pwr_integral = np.sum(wedge_vector) wedges = wedge_vector / pwr_integral rings = ring_vector / pwr_integral # return all features return pwr_integral, pwr_ratio, wedges, rings
gt[gt > 0] = 1 dice = metric.binary.dc(pred, gt) asd = metric.binary.asd(pred, gt) hd95 = metric.binary.hd95(pred, gt) return dice, hd95, asd def test_single_volume(case, net, test_save_path): h5f = h5py.File(FLAGS.root_path + "/data/{}.h5".format(case), 'r') image = h5f['image'][:] label = h5f['label'][:] prediction = np.zeros_like(label) for ind in range(image.shape[0]): slice = image[ind, :, :] x, y = slice.shape[0], slice.shape[1] slice = zoom(slice, (256 / x, 256 / y), order=0) input = torch.from_numpy(slice).unsqueeze(0).unsqueeze( 0).float().cuda() net.eval() with torch.no_grad(): out_main = net(input) out = torch.argmax(torch.softmax(out_main, dim=1), dim=1).squeeze(0) out = out.cpu().detach().numpy() pred = zoom(out, (x / 256, y / 256), order=0) prediction[ind] = pred first_metric = calculate_metric_percase(prediction == 1, label == 1) second_metric = calculate_metric_percase(prediction == 2, label == 2) third_metric = calculate_metric_percase(prediction == 3, label == 3)
def similarity(im0, im1): """Return similarity transformed image im1 and transformation parameters. Transformation parameters are: isotropic scale factor, rotation angle (in degrees), and translation vector. A similarity transformation is an affine transformation with isotropic scale and without shear. Limitations: Image shapes must be equal and square. All image areas must have same scale, rotation, and shift. Scale change must be less than 1.8. No subpixel precision. """ if im0.shape != im1.shape: raise ValueError("Images must have same shapes.") elif len(im0.shape) != 2: raise ValueError("Images must be 2 dimensional.") f0 = fftshift(abs(fft2(im0))) f1 = fftshift(abs(fft2(im1))) h = highpass(f0.shape) f0 *= h f1 *= h del h f0, log_base = logpolar(f0) f1, log_base = logpolar(f1) f0 = fft2(f0) f1 = fft2(f1) r0 = abs(f0) * abs(f1) ir = abs(ifft2((f0 * f1.conjugate()) / r0)) i0, i1 = numpy.unravel_index(numpy.argmax(ir), ir.shape) angle = 180.0 * i0 / ir.shape[0] scale = log_base**i1 if scale > 1.8: ir = abs(ifft2((f1 * f0.conjugate()) / r0)) i0, i1 = numpy.unravel_index(numpy.argmax(ir), ir.shape) angle = -180.0 * i0 / ir.shape[0] scale = 1.0 / (log_base**i1) if scale > 1.8: raise ValueError("Images are not compatible. Scale change > 1.8") if angle < -90.0: angle += 180.0 elif angle > 90.0: angle -= 180.0 im2 = ndii.zoom(im1, 1.0 / scale) im2 = ndii.rotate(im2, angle) if im2.shape < im0.shape: t = numpy.zeros_like(im0) t[:im2.shape[0], :im2.shape[1]] = im2 im2 = t elif im2.shape > im0.shape: im2 = im2[:im0.shape[0], :im0.shape[1]] f0 = fft2(im0) f1 = fft2(im2) ir = abs(ifft2((f0 * f1.conjugate()) / (abs(f0) * abs(f1)))) t0, t1 = numpy.unravel_index(numpy.argmax(ir), ir.shape) if t0 > f0.shape[0] // 2: t0 -= f0.shape[0] if t1 > f0.shape[1] // 2: t1 -= f0.shape[1] im2 = ndii.shift(im2, [t0, t1]) # correct parameters for ndimage's internal processing if angle > 0.0: d = int((int(im1.shape[1] / scale) * math.sin(math.radians(angle)))) t0, t1 = t1, d + t0 elif angle < 0.0: d = int((int(im1.shape[0] / scale) * math.sin(math.radians(angle)))) t0, t1 = d + t1, d + t0 scale = (im1.shape[1] - 1) / (int(im1.shape[1] / scale) - 1) return im2, scale, angle, [-t0, -t1]
def decrease_dimensionality(image): # downsample size = 80 x 80 downsampled = interpolation.zoom(image[20:-30, ], [0.5, 0.5, 1]) r, g, b = downsampled[:, :, 0], downsampled[:, :, 1], downsampled[:, :, 2] # convert to grayscale return 0.299 * r + 0.587 * g + 0.114 * b
def evaluate(upsampling_factor, residual_blocks, feature_size, checkpoint_dir_restore, path_volumes, nn, subpixel_NN, img_height, img_width, img_depth): traindataset = Train_dataset(1) iterations = math.ceil( (len(traindataset.subject_list) * 0.2)) print(len(traindataset.subject_list)) print(iterations) totalpsnr = 0 totalssim = 0 array_psnr = np.empty(iterations) array_ssim = np.empty(iterations) batch_size = 1 div_patches = 4 num_patches = traindataset.num_patches # define model t_input_gen = tf.compat.v1.placeholder('float32', [1, None, None, None, 1], name='t_image_input_to_SRGAN_generator') srgan_network = generator(input_gen=t_input_gen, kernel=3, nb=residual_blocks, upscaling_factor=upsampling_factor, feature_size=feature_size, subpixel_NN=subpixel_NN, img_height=img_height, img_width=img_width, img_depth=img_depth, nn=nn, is_train=False, reuse=False) # restore g sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) saver = tf.train.Saver(tf.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope="SRGAN_g")) saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir_restore)) for i in range(0, iterations): # extract volumes xt_total = traindataset.data_true(654 + i) xt_mask = traindataset.mask(654 + i) normfactor = (np.amax(xt_total[0])) / 2 x_generator = ((xt_total[0] - normfactor) / normfactor) res = 1 / upsampling_factor x_generator = x_generator[:, :, :, np.newaxis] x_generator = gaussian_filter(x_generator, sigma=1) x_generator = zoom(x_generator, [res, res, res, 1], prefilter=False) xg_generated = sess.run(srgan_network.outputs, {t_input_gen: x_generator[np.newaxis, :]}) xg_generated = ((xg_generated + 1) * normfactor) volume_real = xt_total[0] volume_real = volume_real[:, :, :, np.newaxis] volume_generated = xg_generated[0] volume_mask = aggregate(xt_mask) # compute metrics max_gen = np.amax(volume_generated) max_real = np.amax(volume_real) if max_gen > max_real: val_max = max_gen else: val_max = max_real min_gen = np.amin(volume_generated) min_real = np.amin(volume_real) if min_gen < min_real: val_min = min_gen else: val_min = min_real val_psnr = psnr(np.multiply(volume_real, volume_mask), np.multiply(volume_generated, volume_mask), dynamic_range=val_max - val_min) array_psnr[i] = val_psnr totalpsnr += val_psnr val_ssim = ssim(np.multiply(volume_real, volume_mask), np.multiply(volume_generated, volume_mask), dynamic_range=val_max - val_min, multichannel=True) array_ssim[i] = val_ssim totalssim += val_ssim print(val_psnr) print(val_ssim) # save volumes filename_gen = os.path.join(path_volumes, str(i) + 'gen.nii.gz') img_volume_gen = nib.Nifti1Image(volume_generated, np.eye(4)) img_volume_gen.to_filename(filename_gen) filename_real = os.path.join(path_volumes, str(i) + 'real.nii.gz') img_volume_real = nib.Nifti1Image(volume_real, np.eye(4)) img_volume_real.to_filename(filename_real) print('{}{}'.format('PSNR: ', array_psnr)) print('{}{}'.format('SSIM: ', array_ssim)) print('{}{}'.format('Mean PSNR: ', array_psnr.mean())) print('{}{}'.format('Mean SSIM: ', array_ssim.mean())) print('{}{}'.format('Variance PSNR: ', array_psnr.var())) print('{}{}'.format('Variance SSIM: ', array_ssim.var())) print('{}{}'.format('Max PSNR: ', array_psnr.max())) print('{}{}'.format('Min PSNR: ', array_psnr.min())) print('{}{}'.format('Max SSIM: ', array_ssim.max())) print('{}{}'.format('Min SSIM: ', array_ssim.min())) print('{}{}'.format('Median PSNR: ', np.median(array_psnr))) print('{}{}'.format('Median SSIM: ', np.median(array_ssim)))
def nlbin(im, threshold=0.5, zoom=0.5, escale=1.0, border=0.1, perc=80, range=20, low=5, high=90): """ Performs binarization using non-linear processing. Args: im (PIL.Image): threshold (float): zoom (float): Zoom for background page estimation escale (float): Scale for estimating a mask over the text region border (float): Ignore this much of the border perc (int): Percentage for filters range (int): Range for filters low (int): Percentile for black estimation high (int): Percentile for white estimation Returns: PIL.Image containing the binarized image """ if im.mode == '1': return im raw = pil2array(im) # rescale image to between -1 or 0 and 1 raw = raw / np.float(np.iinfo(raw.dtype).max) if raw.ndim == 3: raw = np.mean(raw, 2) # perform image normalization if np.amax(raw) == np.amin(raw): raise KrakenInputException('Image is empty') image = raw - np.amin(raw) image /= np.amax(image) with warnings.catch_warnings(): warnings.simplefilter('ignore', UserWarning) m = interpolation.zoom(image, zoom) m = filters.percentile_filter(m, perc, size=(range, 2)) m = filters.percentile_filter(m, perc, size=(2, range)) m = interpolation.zoom(m, 1.0 / zoom) w, h = np.minimum(np.array(image.shape), np.array(m.shape)) flat = np.clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) # estimate low and high thresholds d0, d1 = flat.shape o0, o1 = int(border * d0), int(border * d1) est = flat[o0:d0 - o0, o1:d1 - o1] # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable v = est - filters.gaussian_filter(est, escale * 20.0) v = filters.gaussian_filter(v**2, escale * 20.0)**0.5 v = (v > 0.3 * np.amax(v)) v = morphology.binary_dilation(v, structure=np.ones((int(escale * 50), 1))) v = morphology.binary_dilation(v, structure=np.ones((1, int(escale * 50)))) est = est[v] lo = np.percentile(est.ravel(), low) hi = np.percentile(est.ravel(), high) flat -= lo flat /= (hi - lo) flat = np.clip(flat, 0, 1) bin = np.array(255 * (flat > threshold), 'B') return array2pil(bin)
def resample_np(data, output_shape, order): assert(len(data.shape) == len(output_shape)) factor = [float(o) / i for i, o in zip(data.shape, output_shape)] return interpolation.zoom(data, zoom=factor, order=order)
# crea matrice transform rzoom = 0.2 * (np.random.rand(3) - .5) + 1. # [0.5, 1.5) #rzoom = [1, 1, 1,] rotation_deg = 0.3 * (np.random.rand(3) - 0.5) #rotation_deg = [0, 0, 0.5] rroto = get_affine_rotation(*rotation_deg) rshift = 10 * (np.random.rand(3) - 0.5) # applica transofrm cdm = np.array(image.shape) / 2 offset = cdm - np.dot(rroto, cdm) img = affine_transform(image, rroto, offset=offset, order=1) img = shift(img, rshift, order=1) img = zoom(img, rzoom, order=1) img1 = pad(img, image) img2 = crop(img1, image) # salva save_tiff(patht, 'transformed.tiff', img2, i) conf = np.vstack((rzoom, rotation_deg, rshift)) fname_ = "parameters.txt" input_ = os.path.join(patht, str(i + 1), fname_) np.savetxt(input_, conf, fmt="%f") # permuta indici imgp = np.random.permutation(image.flatten()).reshape(image.shape) # salva save_tiff(pathp, 'permuted.tiff', imgp, i)
def resize3D(timg, newShape=(256, 256, 64)): zoomScales = np.array(newShape, np.float) / np.array(timg.shape, np.float) ret = scInterpolation.zoom(timg, zoomScales) return ret
def rescale(in_slice, target_shape=[224, 224]): factors = [t / s for s, t in zip(in_slice.shape, target_shape)] resized = zoom(in_slice, zoom=factors, order=1, prefilter=False) return resized
def generator(input_gen, kernel, nb, upscaling_factor, reuse, feature_size, img_width, img_height, img_depth, subpixel_NN, nn, is_train=True): w_init = tf.random_normal_initializer(stddev=0.02) w_init_subpixel1 = np.random.normal(scale=0.02, size=[3, 3, 3, 64, feature_size]) w_init_subpixel1 = zoom(w_init_subpixel1, [2, 2, 2, 1, 1], order=0) w_init_subpixel1_last = tf.constant_initializer(w_init_subpixel1) w_init_subpixel2 = np.random.normal(scale=0.02, size=[3, 3, 3, 64, 64]) w_init_subpixel2 = zoom(w_init_subpixel2, [2, 2, 2, 1, 1], order=0) w_init_subpixel2_last = tf.constant_initializer(w_init_subpixel2) with tf.compat.v1.variable_scope("SRGAN_g", reuse=reuse): tl.layers.set_name_reuse(reuse) x = InputLayer(input_gen, name='in') x = Conv3dLayer(x, shape=[kernel, kernel, kernel, 1, feature_size], strides=[1, 1, 1, 1, 1], padding='SAME', W_init=w_init, name='conv1') x = BatchNormLayer(x, act=lrelu1, is_train=is_train, name='BN-conv1') inputRB = x inputadd = x # residual blocks for i in range(nb): x = Conv3dLayer(x, shape=[kernel, kernel, kernel, feature_size, feature_size], strides=[1, 1, 1, 1, 1], padding='SAME', W_init=w_init, name='conv1-rb/%s' % i) x = BatchNormLayer(x, act=lrelu1, is_train=is_train, name='BN1-rb/%s' % i) x = Conv3dLayer(x, shape=[kernel, kernel, kernel, feature_size, feature_size], strides=[1, 1, 1, 1, 1], padding='SAME', W_init=w_init, name='conv2-rb/%s' % i) x = BatchNormLayer(x, is_train=is_train, name='BN2-rb/%s' % i, ) # short skip connection x = ElementwiseLayer([x, inputadd], tf.add, name='add-rb/%s' % i) inputadd = x # large skip connection x = Conv3dLayer(x, shape=[kernel, kernel, kernel, feature_size, feature_size], strides=[1, 1, 1, 1, 1], padding='SAME', W_init=w_init, name='conv2') x = BatchNormLayer(x, is_train=is_train, name='BN-conv2') x = ElementwiseLayer([x, inputRB], tf.add, name='add-conv2') # ____________SUBPIXEL-NN______________# if subpixel_NN: # upscaling block 1 if upscaling_factor == 4: img_height_deconv = int(img_height / 2) img_width_deconv = int(img_width / 2) img_depth_deconv = int(img_depth / 2) else: img_height_deconv = img_height img_width_deconv = img_width img_depth_deconv = img_depth x = DeConv3dLayer(x, shape=[kernel * 2, kernel * 2, kernel * 2, 64, feature_size], act=lrelu1, strides=[1, 2, 2, 2, 1], output_shape=[tf.shape(input_gen)[0], img_height_deconv, img_width_deconv, img_depth_deconv, 64], padding='SAME', W_init=w_init_subpixel1_last, name='conv1-ub-subpixelnn/1') # upscaling block 2 if upscaling_factor == 4: x = DeConv3dLayer(x, shape=[kernel * 2, kernel * 2, kernel * 2, 64, 64], act=lrelu1, strides=[1, 2, 2, 2, 1], padding='SAME', output_shape=[tf.shape(input_gen)[0], img_height, img_width, img_depth, 64], W_init=w_init_subpixel2_last, name='conv1-ub-subpixelnn/2') x = Conv3dLayer(x, shape=[kernel, kernel, kernel, 64, 1], strides=[1, 1, 1, 1, 1], padding='SAME', W_init=w_init, name='convlast-subpixelnn') # ____________RC______________# elif nn: # upscaling block 1 x = Conv3dLayer(x, shape=[kernel, kernel, kernel, feature_size, 64], act=lrelu1, strides=[1, 1, 1, 1, 1], padding='SAME', W_init=w_init, name='conv1-ub/1') x = UpSampling3D(name='UpSampling3D_1')(x.outputs) x = Conv3dLayer(InputLayer(x, name='in ub1 conv2'), shape=[kernel, kernel, kernel, 64, 64], act=lrelu1, strides=[1, 1, 1, 1, 1], padding='SAME', W_init=w_init, name='conv2-ub/1') # upscaling block 2 if upscaling_factor == 4: x = Conv3dLayer(x, shape=[kernel, kernel, kernel, 64, 64], act=lrelu1, strides=[1, 1, 1, 1, 1], padding='SAME', W_init=w_init, name='conv1-ub/2') x = UpSampling3D(name='UpSampling3D_1')(x.outputs) x = Conv3dLayer(InputLayer(x, name='in ub2 conv2'), shape=[kernel, kernel, kernel, 64, 64], act=lrelu1, strides=[1, 1, 1, 1, 1], padding='SAME', W_init=w_init, name='conv2-ub/2') x = Conv3dLayer(x, shape=[kernel, kernel, kernel, 64, 1], strides=[1, 1, 1, 1, 1], act=tf.nn.tanh, padding='SAME', W_init=w_init, name='convlast') # ____________SUBPIXEL - BASELINE______________# else: if upscaling_factor == 4: steps_to_end = 2 else: steps_to_end = 1 # upscaling block 1 x = Conv3dLayer(x, shape=[kernel, kernel, kernel, feature_size, 64], act=lrelu1, strides=[1, 1, 1, 1, 1], padding='SAME', W_init=w_init, name='conv1-ub/1') arguments = {'img_width': img_width, 'img_height': img_height, 'img_depth': img_depth, 'stepsToEnd': steps_to_end, 'n_out_channel': int(64 / 8)} x = LambdaLayer(x, fn=subPixelConv3d, fn_args=arguments, name='SubPixel1') # upscaling block 2 if upscaling_factor == 4: x = Conv3dLayer(x, shape=[kernel, kernel, kernel, int((64) / 8), 64], act=lrelu1, strides=[1, 1, 1, 1, 1], padding='SAME', W_init=w_init, name='conv1-ub/2') arguments = {'img_width': img_width, 'img_height': img_height, 'img_depth': img_depth, 'stepsToEnd': 1, 'n_out_channel': int(64 / 8)} x = LambdaLayer(x, fn=subPixelConv3d, fn_args=arguments, name='SubPixel2') x = Conv3dLayer(x, shape=[kernel, kernel, kernel, int(64 / 8), 1], strides=[1, 1, 1, 1, 1], padding='SAME', W_init=w_init, name='convlast') return x
def generate_flat_matrix(n=8): return zoom(asarray(TO_FLAT_MATRIX), n / 8, order=1, mode='nearest')
#process_parameter_id=np.argmax(abs(y_pred[i,:])) cop_input = test_input_conv_data[0:1, :, :, :, :] fmap_eval, grad_wrt_fmap_eval = camviz.grad_cam_3d( cop_input, process_parameter_id) alpha_k_c = grad_wrt_fmap_eval.mean(axis=(0, 1, 2, 3)).reshape( (1, 1, 1, -1)) Lc_Grad_CAM = np.maximum(np.sum(fmap_eval * alpha_k_c, axis=-1), 0).squeeze() scale_factor = np.array(cop_input.shape[1:4]) / np.array( Lc_Grad_CAM.shape) from scipy.ndimage.interpolation import zoom import tensorflow.keras.backend as K _grad_CAM = zoom(Lc_Grad_CAM, scale_factor) arr_min, arr_max = np.min(_grad_CAM), np.max(_grad_CAM) grad_CAM = (_grad_CAM - arr_min) / (arr_max - arr_min + K.epsilon()) #print(grad_CAM.shape) grad_cam_plot_matlab[i, :] = get_point_cloud.getcopdev_gradcam( grad_CAM, point_index, nominal_cop) #Saving File np.savetxt((logs_path + '/grad_cam_pred_' + layer_name + '.csv'), grad_cam_plot_matlab, delimiter=",") if (deploy_output == 0):
def main(): args = parser.parse_args() if args.gt_type == 'KITTI': from kitti_eval.depth_evaluation_utils import test_framework_KITTI as test_framework #print("device :",device) disp_net_enc = SharedEncoder.SharedEncoderMain().double().to(device) weights = torch.load(args.pretrained_dispnet_enc, map_location=lambda storage, loc: storage) disp_net_enc.load_state_dict(weights) disp_net_enc.eval() disp_net_dec = DepthDecoder.DepthDecoder().double().to(device) weights = torch.load(args.pretrained_dispnet_dec, map_location=lambda storage, loc: storage) #print("weights:",weights) disp_net_dec.load_state_dict(weights) disp_net_dec.eval() if args.pretrained_posenet_dec is None: print( 'no PoseNet specified, scale_factor will be determined by median ratio, which is kiiinda cheating\ (but consistent with original paper)') seq_length = 1 else: #pose_net_dec = PoseCopy.PoseExpNet().double().to(device) pose_net_dec = PoseNetwork.PoseDecoder().double().to(device) weights = torch.load(args.pretrained_posenet_dec, map_location=lambda storage, loc: storage) seq_length = int(weights['conv1.0.weight'].size(1) / 3) # print("weights:",weights) pose_net_dec.load_state_dict(weights) pose_net_dec.eval() print("seq:", seq_length) seq_length = 3 dataset_dir = Path(args.dataset_dir) if args.dataset_list is not None: with open(args.dataset_list, 'r') as f: test_files = list(f.read().splitlines()) else: test_files = [ file.relpathto(dataset_dir) for file in sum([ dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts ], []) ] framework = test_framework(dataset_dir, test_files, seq_length, args.min_depth, args.max_depth, use_gps=args.gps) print('{} files to test'.format(len(test_files))) errors = np.zeros((2, 9, len(test_files)), np.float32) if args.output_dir is not None: output_dir = Path(args.output_dir) output_dir.makedirs_p() for j, sample in enumerate(tqdm(framework)): tgt_img = sample['tgt'] ref_imgs = sample['ref'] h, w, _ = tgt_img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): tgt_img = imresize( tgt_img, (args.img_height, args.img_width)).astype(np.float32) ref_imgs = [ imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in ref_imgs ] tgt_img = np.transpose(tgt_img, (2, 0, 1)) ref_imgs = [np.transpose(img, (2, 0, 1)) for img in ref_imgs] tgt_img = torch.from_numpy(tgt_img).unsqueeze(0) tgt_img = ((tgt_img / 255 - 0.5) / 0.5).to(device) for i, img in enumerate(ref_imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img / 255 - 0.5) / 0.5).to(device) ref_imgs[i] = img econv = disp_net_enc(tgt_img.double()) pred_disp = disp_net_dec(tgt_img.double(), econv).cpu().numpy()[0, 0] if args.output_dir is not None: if j == 0: predictions = np.zeros((len(test_files), *pred_disp.shape)) predictions[j] = 1 / pred_disp gt_depth = sample['gt_depth'] pred_depth = 1 / pred_disp pred_depth_zoomed = zoom( pred_depth, (gt_depth.shape[0] / pred_depth.shape[0], gt_depth.shape[1] / pred_depth.shape[1])).clip(args.min_depth, args.max_depth) if sample['mask'] is not None: pred_depth_zoomed = pred_depth_zoomed[sample['mask']] gt_depth = gt_depth[sample['mask']] if seq_length > 1: middle_index = seq_length // 2 tgt = ref_imgs[middle_index] reorganized_refs = ref_imgs[:middle_index] + ref_imgs[ middle_index + 1:] econv = disp_net_enc(torch.cat(ref_imgs, dim=0).double()) poses, a1, a2 = pose_net_dec(econv[4][0:1], econv[4][1:2], econv[4][2:3]) displacement_magnitudes = poses[0, :, :3].norm(2, 1).cpu().numpy() scale_factor = np.mean(sample['displacements'] / displacement_magnitudes) errors[0, :, j] = compute_errors(gt_depth, pred_depth_zoomed * scale_factor) scale_factor = np.median(gt_depth) / np.median(pred_depth_zoomed) errors[1, :, j] = compute_errors(gt_depth, pred_depth_zoomed * scale_factor) mean_errors = errors.mean(2) error_names = [ 'abs_diff', 'abs_rel', 'sq_rel', 'rms', 'log_rms', 'abs_log', 'a1', 'a2', 'a3' ] if args.pretrained_posenet_dec: print("Results with scale factor determined by PoseNet : ") print( "{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}" .format(*error_names)) print( "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}" .format(*mean_errors[0])) print( "Results with scale factor determined by GT/prediction ratio (like the original paper) : " ) print( "{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}" .format(*error_names)) print( "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}" .format(*mean_errors[1])) if args.output_dir is not None: np.save(output_dir / 'predictions.npy', predictions)
def train(upscaling_factor, residual_blocks, feature_size, path_prediction, checkpoint_dir, img_width, img_height, img_depth, subpixel_NN, nn, restore, batch_size=1, div_patches=4, epochs=10): traindataset = Train_dataset(batch_size) iterations_train = math.ceil((len(traindataset.subject_list) * 0.8) / batch_size) num_patches = traindataset.num_patches # ##========================== DEFINE MODEL ============================## t_input_gen = tf.compat.v1.placeholder('float32', [int((batch_size * num_patches) / div_patches), None, None, None, 1], name='t_image_input_to_SRGAN_generator') t_target_image = tf.compat.v1.placeholder('float32', [int((batch_size * num_patches) / div_patches), img_height, img_width, img_depth, 1], #switched height,width name='t_target_image') t_input_mask = tf.compat.v1.placeholder('float32', [int((batch_size * num_patches) / div_patches), img_width, img_height, img_depth, 1], name='t_image_input_mask') net_gen = generator(input_gen=t_input_gen, kernel=3, nb=residual_blocks, upscaling_factor=upscaling_factor, img_height=img_height, img_width=img_width, img_depth=img_depth, subpixel_NN=subpixel_NN, nn=nn, feature_size=feature_size, is_train=True, reuse=False) net_d, disc_out_real = discriminator(input_disc=t_target_image, kernel=3, is_train=True, reuse=False) _, disc_out_fake = discriminator(input_disc=net_gen.outputs, kernel=3, is_train=True, reuse=True) # test gen_test = generator(t_input_gen, kernel=3, nb=residual_blocks, upscaling_factor=upscaling_factor, img_height=img_height, img_width=img_width, img_depth=img_depth, subpixel_NN=subpixel_NN, nn=nn, feature_size=feature_size, is_train=True, reuse=True) # ###========================== DEFINE TRAIN OPS ==========================### if np.random.uniform() > 0.1: # give correct classifications y_gan_real = tf.ones_like(disc_out_real) y_gan_fake = tf.zeros_like(disc_out_real) else: # give wrong classifications (noisy labels) y_gan_real = tf.zeros_like(disc_out_real) y_gan_fake = tf.ones_like(disc_out_real) d_loss_real = tf.reduce_mean(tf.square(disc_out_real - smooth_gan_labels(y_gan_real)), name='d_loss_real') d_loss_fake = tf.reduce_mean(tf.square(disc_out_fake - smooth_gan_labels(y_gan_fake)), name='d_loss_fake') d_loss = d_loss_real + d_loss_fake mse_loss = tf.reduce_sum( tf.square(net_gen.outputs - t_target_image), axis=[0, 1, 2, 3, 4], name='g_loss_mse') dx_real = t_target_image[:, 1:, :, :, :] - t_target_image[:, :-1, :, :, :] dy_real = t_target_image[:, :, 1:, :, :] - t_target_image[:, :, :-1, :, :] dz_real = t_target_image[:, :, :, 1:, :] - t_target_image[:, :, :, :-1, :] dx_fake = net_gen.outputs[:, 1:, :, :, :] - net_gen.outputs[:, :-1, :, :, :] dy_fake = net_gen.outputs[:, :, 1:, :, :] - net_gen.outputs[:, :, :-1, :, :] dz_fake = net_gen.outputs[:, :, :, 1:, :] - net_gen.outputs[:, :, :, :-1, :] gd_loss = tf.reduce_sum(tf.square(tf.abs(dx_real) - tf.abs(dx_fake))) + \ tf.reduce_sum(tf.square(tf.abs(dy_real) - tf.abs(dy_fake))) + \ tf.reduce_sum(tf.square(tf.abs(dz_real) - tf.abs(dz_fake))) g_gan_loss = 10e-2 * tf.reduce_mean(tf.square(disc_out_fake - smooth_gan_labels(tf.ones_like(disc_out_real))), name='g_loss_gan') g_loss = mse_loss + g_gan_loss + gd_loss g_vars = tl.layers.get_variables_with_name('SRGAN_g', True, True) d_vars = tl.layers.get_variables_with_name('SRGAN_d', True, True) with tf.compat.v1.variable_scope('learning_rate'): lr_v = tf.Variable(1e-4, trainable=False) global_step = tf.Variable(0, trainable=False) decay_rate = 0.5 decay_steps = 4920 # every 2 epochs (more or less) learning_rate = tf.train.inverse_time_decay(lr_v, global_step=global_step, decay_rate=decay_rate, decay_steps=decay_steps) # Optimizers g_optim = tf.train.AdamOptimizer(learning_rate).minimize(g_loss, var_list=g_vars) d_optim = tf.train.AdamOptimizer(learning_rate).minimize(d_loss, var_list=d_vars) session = tf.Session() tl.layers.initialize_global_variables(session) step = 0 saver = tf.train.Saver() if restore is not None: saver.restore(session, tf.train.latest_checkpoint(restore)) val_restore = 0 * epochs else: val_restore = 0 array_psnr = [] array_ssim = [] for j in range(val_restore, epochs + val_restore): for i in range(0, iterations_train): # ====================== LOAD DATA =========================== # xt_total = traindataset.patches_true(i) xm_total = traindataset.mask(i) for k in range(0, div_patches): print('{}'.format(k)) xt = xt_total[k * int((batch_size * num_patches) / div_patches):(int( (batch_size * num_patches) / div_patches) * k) + int( (batch_size * num_patches) / div_patches)] xm = xm_total[k * int((batch_size * num_patches) / div_patches):(int( (batch_size * num_patches) / div_patches) * k) + int( (batch_size * num_patches) / div_patches)] # NORMALIZING for t in range(0, xt.shape[0]): normfactor = (np.amax(xt[t])) / 2 if normfactor != 0: xt[t] = ((xt[t] - normfactor) / normfactor) x_generator = gaussian_filter(xt, sigma=1) x_generator = zoom(x_generator, [1, (1 / upscaling_factor), (1 / upscaling_factor), (1 / upscaling_factor), 1], prefilter=False, order=0) xgenin = x_generator # ========================= train SRGAN ========================= # # update D errd, _ = session.run([d_loss, d_optim], {t_target_image: xt, t_input_gen: xgenin}) # update G errg, errmse, errgan, errgd, _ = session.run([g_loss, mse_loss, g_gan_loss, gd_loss, g_optim], {t_input_gen: xgenin, t_target_image: xt, t_input_mask: xm}) print( "Epoch [%2d/%2d] [%4d/%4d] [%4d/%4d]: d_loss: %.8f g_loss: %.8f (mse: %.6f gdl: %.6f adv: %.6f)" % ( j, epochs + val_restore, i, iterations_train, k, div_patches - 1, errd, errg, errmse, errgd, errgan)) # ========================= evaluate & save model ========================= # if k == 1 and i % 20 == 0: if j - val_restore == 0: x_true_img = xt[0] if normfactor != 0: x_true_img = ((x_true_img + 1) * normfactor) # denormalize img_true = nib.Nifti1Image(x_true_img, np.eye(4)) img_true.to_filename( os.path.join(path_prediction, str(j) + str(i) + 'true.nii.gz')) x_gen_img = xgenin[0] if normfactor != 0: x_gen_img = ((x_gen_img + 1) * normfactor) # denormalize img_gen = nib.Nifti1Image(x_gen_img, np.eye(4)) img_gen.to_filename( os.path.join(path_prediction, str(j) + str(i) + 'gen.nii.gz')) x_pred = session.run(gen_test.outputs, {t_input_gen: xgenin}) x_pred_img = x_pred[0] if normfactor != 0: x_pred_img = ((x_pred_img + 1) * normfactor) # denormalize img_pred = nib.Nifti1Image(x_pred_img, np.eye(4)) img_pred.to_filename( os.path.join(path_prediction, str(j) + str(i) + '.nii.gz')) max_gen = np.amax(x_pred_img) max_real = np.amax(x_true_img) if max_gen > max_real: val_max = max_gen else: val_max = max_real min_gen = np.amin(x_pred_img) min_real = np.amin(x_true_img) if min_gen < min_real: val_min = min_gen else: val_min = min_real val_psnr = psnr(np.multiply(x_true_img, xm[0]), np.multiply(x_pred_img, xm[0]), dynamic_range=val_max - val_min) val_ssim = ssim(np.multiply(x_true_img, xm[0]), np.multiply(x_pred_img, xm[0]), dynamic_range=val_max - val_min, multichannel=True) saver.save(sess=session, save_path=checkpoint_dir, global_step=step) print("Saved step: [%2d]" % step) step = step + 1
def generate_quantization_matrix(n=8): return zoom(asarray(MATRIX), n / 8, order=1, mode='nearest')
def resize_image_zoom(img, zoom_factor=1., order=3): if (zoom_factor == 1): return img else: return zoom(img, [zoom_factor, zoom_factor, 1], order=order)
def get_t(J, type, gammaI=1): ''' 色调渲染(tone rendering): Tone Rendering tone drawing focuses more on shapes, shadow, and shading than on the use of lines 铅笔画的直方图有一定的pattern, 因为只是铅笔和白纸的结合 可以分成三个区域: 1.亮 2.暗 3.居于中间的部分, 于是就有三个用来模拟的模型 铅笔画的色调 颜色等通过用铅笔重复的涂画来体现 1. 直方图匹配 运用三种分布计算图片的直方图, 然后匹配一个正常图片的直方图 2. 纹理渲染(texture rendering): 计算模拟需要用铅笔重复涂画的次数beta :param J: 图片转换成灰度后的矩阵 :param type: 图片类型 :param gammaI: 控制参数, 值越大最后的结果颜色越深 :return: 色调渲染后的图片矩阵T ''' Jadjusted = natural_histogram_matching(J, type=type)**gammaI # Jadjusted = natural_histogram_matching(J, type=type) texture = Image.open(texture_file_name) texture = np.array(texture.convert("L")) # texture = np.array(texture) texture = texture[99:texture.shape[0] - 100, 99:texture.shape[1] - 100] ratio = texture_resize_ratio * min(J.shape[0], J.shape[1]) / float(1024) texture_resize = interpolation.zoom(texture, (ratio, ratio)) texture = im2double(texture_resize) htexture = hstitch(texture, J.shape[1]) Jtexture = vstitch(htexture, J.shape[0]) size = J.shape[0] * J.shape[1] nzmax = 2 * (size - 1) i = np.zeros((nzmax, 1)) j = np.zeros((nzmax, 1)) s = np.zeros((nzmax, 1)) for m in range(1, nzmax + 1): i[m - 1] = int(math.ceil((m + 0.1) / 2)) - 1 j[m - 1] = int(math.ceil((m - 0.1) / 2)) - 1 s[m - 1] = -2 * (m % 2) + 1 dx = csr_matrix((s.T[0], (i.T[0], j.T[0])), shape=(size, size)) nzmax = 2 * (size - J.shape[1]) i = np.zeros((nzmax, 1)) j = np.zeros((nzmax, 1)) s = np.zeros((nzmax, 1)) for m in range(1, nzmax + 1): i[m - 1, :] = int(math.ceil((m - 1 + 0.1) / 2) + J.shape[1] * (m % 2)) - 1 j[m - 1, :] = math.ceil((m - 0.1) / 2) - 1 s[m - 1, :] = -2 * (m % 2) + 1 dy = csr_matrix((s.T[0], (i.T[0], j.T[0])), shape=(size, size)) # +0.01是为了避免出现有0被进行log运算的情况, 但对正常值影响可以被忽略 Jtexture1d = np.log( np.reshape(Jtexture.T, (1, Jtexture.size), order="f") + 0.01) Jtsparse = spdiags(Jtexture1d, 0, size, size) Jadjusted1d = np.log( np.reshape(Jadjusted.T, (1, Jadjusted.size), order="f").T + 0.01) nat = Jtsparse.T.dot(Jadjusted1d) # lnJ(x) a = np.dot(Jtsparse.T, Jtsparse) b = dx.T.dot(dx) c = dy.T.dot(dy) mat = a + Lambda * (b + c) # lnH(x) # x = spsolve(a,b) <--> a*x = b # lnH(x) * beta(x) = lnJ(x) --> beta(x) = spsolve(lnH(x), lnJ(x)) # 使用sparse matrix的spsolve 而不是linalg.solve() beta1d = spsolve(mat, nat) # eq.8 beta = np.reshape(beta1d, (J.shape[0], J.shape[1]), order="c") # 模拟素描时通过重复画线来加深阴影, 用pattern Jtexture重复画beta次 T = Jtexture**beta # eq.9 T = (T - T.min()) / (T.max() - T.min()) img = Image.fromarray(T * 255) # img.show() return T
def transform_img(img, scale=1.0, angle=0.0, tvec=(0, 0), mode="constant", bgval=None, order=1): """ Return translation vector to register images. Args: img (2D or 3D numpy array): What will be transformed. If a 3D array is passed, it is treated in a manner in which RGB images are supposed to be handled - i.e. assume that coordinates are (Y, X, channels). Complex images are handled in a way that treats separately the real and imaginary parts. scale (float): The scale factor (scale > 1.0 means zooming in) angle (float): Degrees of rotation (clock-wise) tvec (2-tuple): Pixel translation vector, Y and X component. mode (string): The transformation mode (refer to e.g. :func:`scipy.ndimage.shift` and its kwarg ``mode``). bgval (float): Shade of the background (filling during transformations) If None is passed, :func:`imreg_dft.utils.get_borderval` with radius of 5 is used to get it. order (int): Order of approximation (when doing transformations). 1 = linear, 3 = cubic etc. Linear works surprisingly well. Returns: np.ndarray: The transformed img, may have another i.e. (bigger) shape than the source. """ if img.ndim == 3: print('img.ndim') # A bloody painful special case of RGB images ret = np.empty_like(img) for idx in range(img.shape[2]): sli = (slice(None), slice(None), idx) ret[sli] = transform_img(img[sli], scale, angle, tvec, mode, bgval, order) return ret elif np.iscomplexobj(img): print('elif') decomposed = np.empty(img.shape + (2, ), float) decomposed[:, :, 0] = img.real decomposed[:, :, 1] = img.imag # The bgval makes little sense now, as we decompose the image res = transform_img(decomposed, scale, angle, tvec, mode, None, order) ret = res[:, :, 0] + 1j * res[:, :, 1] return ret if bgval is None: bgval = utils.get_borderval(img) bigshape = np.round(np.array(img.shape) * 1.2).astype(int) bg = np.zeros(bigshape, img.dtype) + bgval dest0 = utils.embed_to(bg, img.copy()) # TODO: We have problems with complex numbers # that are not supported by zoom(), rotate() or shift() if scale != 1.0: dest0 = ndii.zoom(dest0, scale, order=0, mode=mode, cval=bgval) if angle != 0.0: dest0 = ndii.rotate(dest0, angle, order=0, mode=mode, cval=bgval) if tvec[0] != 0 or tvec[1] != 0: dest0 = ndii.shift(dest0, tvec, order=0, mode=mode, cval=bgval) bg = np.zeros_like(img) + bgval dest = utils.embed_to(bg, dest0) return dest
def color_gray_scale_image(color_model, quantized_ab, x_batch_black, batch_size, height, width, nb_q, t_parameter, img_l, size_original=None, size_out=None, root_dir=None): """ Predict colors for a gray-scale image :param root_dir: :param size_out: :param size_original: :param img_l: :param color_model: The model do you want to plot :param quantized_ab: :param x_batch_black: :param batch_size: :param height: :param width: :param nb_q: :param t_parameter: :return: """ # Format X_colorized ab_prediction = color_model.predict(x_batch_black / 100.)[:, :, :, :-1] ab_prediction = ab_prediction.reshape((batch_size * height * width, nb_q)) # Reweight probabilities ab_prediction = np.exp(np.log(ab_prediction) / t_parameter) ab_prediction = ab_prediction / np.sum(ab_prediction, 1)[:, np.newaxis] # Reweighted q_a = quantized_ab[:, 0].reshape((1, 313)) q_b = quantized_ab[:, 1].reshape((1, 313)) x_a = np.sum(ab_prediction * q_a, 1).reshape( (batch_size, 1, height, width)) x_b = np.sum(ab_prediction * q_b, 1).reshape( (batch_size, 1, height, width)) img = np.concatenate((x_a, x_b), axis=1).transpose((0, 2, 3, 1)) img = img[0, :, :, :] # this is our result img = sni.zoom( img, (1. * size_original[0] / size_out[0], 1. * size_original[1] / size_out[1], 1)) # upsample to match size of original image L img_lab_out = np.concatenate((img_l[:, :, np.newaxis], img), axis=2) # concatenate with original image L img_lab_out = color.lab2rgb(img_lab_out) # convert back to rgb img_rgb_out = (255 * np.clip(img_lab_out, 0, 1)).astype('uint8') file_name = uuid.uuid4() final_result = '/media/colorized/image_%s.png' % file_name file_name = os.path.join(root_dir, 'media/colorized/image_%s.png' % file_name) plt.imsave(file_name, img_rgb_out) return final_result
def similarity(bn0, bn1): """Register bn1 to bn0 , M. Canty 2012 bn0, bn1 and returned result are image bands Modified from Imreg.py, see http://www.lfd.uci.edu/~gohlke/: Copyright (c) 2011-2012, Christoph Gohlke Copyright (c) 2011-2012, The Regents of the University of California Produced at the Laboratory for Fluorescence Dynamics All rights reserved. """ def highpass(shape): """Return highpass filter to be multiplied with fourier transform.""" x = np.outer( np.cos(np.linspace(-math.pi / 2., math.pi / 2., shape[0])), np.cos(np.linspace(-math.pi / 2., math.pi / 2., shape[1]))) return (1.0 - x) * (2.0 - x) def logpolar(image, angles=None, radii=None): """Return log-polar transformed image and log base.""" shape = image.shape center = shape[0] / 2, shape[1] / 2 if angles is None: angles = shape[0] if radii is None: radii = shape[1] theta = np.empty((angles, radii), dtype=np.float64) theta.T[:] = -np.linspace(0, np.pi, angles, endpoint=False) # d = radii d = np.hypot(shape[0] - center[0], shape[1] - center[1]) log_base = 10.0**(math.log10(d) / (radii)) radius = np.empty_like(theta) radius[:] = np.power(log_base, np.arange(radii, dtype=np.float64)) - 1.0 x = radius * np.sin(theta) + center[0] y = radius * np.cos(theta) + center[1] output = np.empty_like(x) ndii.map_coordinates(image, [x, y], output=output) return output, log_base lines0, samples0 = bn0.shape # make reference and warp bands same shape bn1 = bn1[0:lines0, 0:samples0] # get scale, angle f0 = fftshift(abs(fft2(bn0))) f1 = fftshift(abs(fft2(bn1))) h = highpass(f0.shape) f0 *= h f1 *= h del h f0, log_base = logpolar(f0) f1, log_base = logpolar(f1) f0 = fft2(f0) f1 = fft2(f1) r0 = abs(f0) * abs(f1) ir = abs(ifft2((f0 * f1.conjugate()) / r0)) i0, i1 = np.unravel_index(np.argmax(ir), ir.shape) angle = 180.0 * i0 / ir.shape[0] scale = log_base**i1 if scale > 1.8: ir = abs(ifft2((f1 * f0.conjugate()) / r0)) i0, i1 = np.unravel_index(np.argmax(ir), ir.shape) angle = -180.0 * i0 / ir.shape[0] scale = 1.0 / (log_base**i1) if scale > 1.8: raise ValueError("Images are not compatible. Scale change > 1.8") if angle < -90.0: angle += 180.0 elif angle > 90.0: angle -= 180.0 # re-scale and rotate and then get shift bn2 = ndii.zoom(bn1, 1.0 / scale) bn2 = ndii.rotate(bn2, angle) if bn2.shape < bn0.shape: t = np.zeros_like(bn0) t[:bn2.shape[0], :bn2.shape[1]] = bn2 bn2 = t elif bn2.shape > bn0.shape: bn2 = bn2[:bn0.shape[0], :bn0.shape[1]] f0 = fft2(bn0) f1 = fft2(bn2) ir = abs(ifft2((f0 * f1.conjugate()) / (abs(f0) * abs(f1)))) t0, t1 = np.unravel_index(np.argmax(ir), ir.shape) if t0 > f0.shape[0] // 2: t0 -= f0.shape[0] if t1 > f0.shape[1] // 2: t1 -= f0.shape[1] # return result return (scale, angle, [t0, t1])
def main(): args = parser.parse_args() if args.gt_type == 'KITTI': from kitti_eval.depth_evaluation_utils import test_framework_KITTI as test_framework disp_net = DispNetS().to(device) weights = torch.load(args.pretrained_dispnet, map_location='cpu') disp_net.load_state_dict(weights['disp_net_state_dict']) disp_net.eval() seq_length = 1 dataset_dir = Path(args.dataset_dir) if args.dataset_list is not None: with open(args.dataset_list, 'r') as f: test_files = list(f.read().splitlines()) else: test_files = [file.relpathto(dataset_dir) for file in sum([dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts], [])] framework = test_framework(dataset_dir, test_files, seq_length, args.min_depth, args.max_depth, use_gps=args.gps) print('{} files to test'.format(len(test_files))) errors = np.zeros((2, 9, len(test_files)), np.float32) if args.output_dir is not None: output_dir = Path(args.output_dir) output_dir.makedirs_p() #predictions = np.load('/ceph/raunaks/old/t2net_signet/checkpoints/pure_geonet/predicted_depth.npy') for j, sample in enumerate(tqdm(framework)): tgt_img = sample['tgt'] ref_imgs = sample['ref'] h,w,_ = tgt_img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): tgt_img = cv2.resize(tgt_img, (args.img_width, args.img_height)).astype(np.float32) ref_imgs = [cv2.resize(img, (args.img_width, args.img_height)).astype(np.float32) for img in ref_imgs] #tgt_img = imresize(tgt_img, (args.img_height, args.img_width)).astype(np.float32) #ref_imgs = [imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in ref_imgs] tgt_img = np.transpose(tgt_img, (2, 0, 1)) ref_imgs = [np.transpose(img, (2,0,1)) for img in ref_imgs] tgt_img = torch.from_numpy(tgt_img).unsqueeze(0) tgt_img = ((tgt_img/255 - 0.5)/0.5).to(device) for i, img in enumerate(ref_imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img/255 - 0.5)/0.5).to(device) ref_imgs[i] = img pred_disp = disp_net(tgt_img).cpu().numpy()[0,0] if args.output_dir is not None: if j == 0: predictions = np.zeros((len(test_files), *pred_disp.shape)) predictions[j] = 1/pred_disp gt_depth = sample['gt_depth'] pred_depth = 1/pred_disp #pred_depth = predictions[j] pred_depth_zoomed = zoom(pred_depth, (gt_depth.shape[0]/pred_depth.shape[0], gt_depth.shape[1]/pred_depth.shape[1]) ).clip(args.min_depth, args.max_depth) if sample['mask'] is not None: pred_depth_zoomed = pred_depth_zoomed[sample['mask']] gt_depth = gt_depth[sample['mask']] scale_factor = np.median(gt_depth)/np.median(pred_depth_zoomed) errors[1,:,j] = compute_errors(gt_depth, pred_depth_zoomed*scale_factor) mean_errors = errors.mean(2) error_names = ['abs_diff', 'abs_rel','sq_rel','rms','log_rms', 'abs_log', 'a1','a2','a3'] print("Results with scale factor determined by GT/prediction ratio (like the original paper) : ") print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(*error_names)) print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(*mean_errors[1])) if args.output_dir is not None: np.save(output_dir/'predictions.npy', predictions)
def zoom_iracpsf(psfname, oversample=10., spline_order=3, radius=5.0, iracpix=0.6, outname=None, filter_width=5.0, inner_rad=2.0): """ Use scipy.ndimage.interpolate.zoom to oversample the PSF. Use this to make a PSF on the high-res pixel grid from the low-res pixel grid. iracpix: input IRAC pixel scale in arcsec. radius: radius of the desired PSF image in arcsec """ psf0 = pyfits.getdata(psfname) hdr0 = pyfits.getheader(psfname) print psf0.shape pixrad = radius / iracpix * oversample pixrad = np.round(pixrad) psf1 = zoom(psf0, oversample, order=spline_order) print "shape(psf1)", psf1.shape xc, yc = np.array(psf1.shape) / 2. xc = int(np.floor(xc)) yc = int(np.floor(yc)) print "xc, yc ", xc, yc # Center the PSF again # In case the low-level noise skews the image moment, we filter the PSF # image first # Make a circular mask around the center, and only calculate the image # moments for the central part; again this is to guard against PSF wings # skewing the center of mass inner_pixrad = inner_rad / iracpix * oversample inner_pixrad = np.round(inner_pixrad) cmask1 = circular_mask(xc, yc, psf1.shape[0], psf1.shape[1], inner_pixrad) psf1m = np.where(cmask1 == True, psf1, 0.) psf1_filtered = ndimage.filters.gaussian_filter(psf1m, filter_width) # Now calculate the center of mass of the filtered PSF cm1 = center_of_mass(psf1_filtered) print "CM of the filtered PSF: (%.2f, %.2f)" % tuple(cm1) xshift = xc - cm1[0] yshift = yc - cm1[1] print "xshift, yshift:", xshift, yshift psf1 = shift(psf1, [xshift, yshift], order=1, mode='wrap') cmask = circular_mask(xc, yc, psf1.shape[0], psf1.shape[1], pixrad) psf1 = np.where(cmask == True, psf1, 0.) print "Shifted PSF center: ", center_of_mass(psf1) # assume that CDELT1 is in arcsec/pix hdr0['cdelt1'] = hdr0['cdelt1'] / oversample hdr0['cdelt2'] = hdr0['cdelt2'] / oversample # mas_str = '%2d' % abs(int(round(hdr0['cdelt1']*1000.))) mas_str = '%2d' % abs(int(round(iracpix / oversample * 1000.))) if outname == None: outname = os.path.splitext(psfname)[0] + '_%2smas.fits' % (mas_str) if os.path.exists(outname): os.remove(outname) # Trim the borders if there is any yc2, xc2 = np.array(psf1.shape) / 2. xc2 = int(np.floor(xc2)) yc2 = int(np.floor(yc2)) xmin = np.maximum(0, xc2 - pixrad * 1.2) xmax = np.minimum(psf1.shape[1], xc2 + pixrad * 1.2) ymin = np.maximum(0, yc2 - pixrad * 1.2) ymax = np.minimum(psf1.shape[0], yc2 + pixrad * 1.2) print "xmin, xmax, ymin, ymax", xmin, xmax, ymin, ymax psf2 = psf1[ymin:ymax, xmin:xmax] print "shape(psf2)", np.shape(psf2) psf2 = psf2 / psf2.sum() pyfits.append(outname, psf2, hdr0) return psf1
def colorize_gray_scale_image(color_model, quantized_ab, x_batch_black, batch_size, height, width, nb_q, t_parameter, size_original=None): """ Plot the image from a batch in evaluation state :param size_original: :param color_model: The model do you want to plot :param quantized_ab: :param x_batch_black: :param batch_size: :param height: :param width: :param nb_q: :param t_parameter: :return: """ # Format X_colorized ab_prediction = color_model.predict(x_batch_black / 100.)[:, :, :, :-1] ab_prediction = ab_prediction.reshape((batch_size * height * width, nb_q)) # Reweight probabilities ab_prediction = np.exp(np.log(ab_prediction) / t_parameter) ab_prediction = ab_prediction / np.sum(ab_prediction, 1)[:, np.newaxis] # Reweighted q_a = quantized_ab[:, 0].reshape((1, 313)) q_b = quantized_ab[:, 1].reshape((1, 313)) x_a = np.sum(ab_prediction * q_a, 1).reshape( (batch_size, 1, height, width)) x_b = np.sum(ab_prediction * q_b, 1).reshape( (batch_size, 1, height, width)) ab_prediction = np.concatenate((x_batch_black, x_a, x_b), axis=1).transpose((0, 2, 3, 1)) ab_prediction = [ np.expand_dims(color.lab2rgb(im), axis=0) for im in ab_prediction ] ab_prediction = np.concatenate(ab_prediction, 0).transpose((0, 3, 1, 2)) list_img = [] for i, img in enumerate(ab_prediction[:min(1, batch_size)]): # 32 # noinspection PyTypeChecker arr = np.concatenate( [np.repeat(x_batch_black[i] / 100., 3, axis=0), img], axis=2) list_img.append(arr) arr = np.concatenate(list_img, axis=1) file_name = uuid.uuid4() img = arr.transpose((1, 2, 0)) img = sni.zoom(img, (2. * size_original[0] / img.shape[0], 1. * size_original[1] / img.shape[1], 1)) img = cv2.resize(img, (img.shape[0], img.shape[1]), interpolation=cv2.INTER_AREA) scipy.misc.imsave("../../evaluation/fig_%s.png" % file_name, img)
def run(self, fname, i): print_info("# %s" % (fname)) print_info("=== %s %-3d" % (fname, i)) raw = ocrolib.read_image_gray(fname) self.dshow(raw, "input") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): print_info("# image is empty: %s" % (fname)) return image /= amax(image) if not self.param['nocheck']: check = self.check_page(amax(image) - image) if check is not None: print_error(fname + " SKIPPED. " + check + " (use -n to disable this check)") return # check whether the image is already effectively binarized if self.param['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel print_info("flattening") m = interpolation.zoom(image, self.param['zoom']) m = filters.percentile_filter(m, self.param['perc'], size=(self.param['range'], 2)) m = filters.percentile_filter(m, self.param['perc'], size=(2, self.param['range'])) m = interpolation.zoom(m, 1.0 / self.param['zoom']) if self.param['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.param['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.param['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.param['debug']) # estimate low and high thresholds print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(self.param['bignore'] * d0), int(self.param['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.param['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.param['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.param['debug'] > 0: imshow(v) ginput(1, self.param['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.param['lo']) hi = stats.scoreatpercentile(est.ravel(), self.param['hi']) # rescale the image to get the gray scale image print_info("rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.param['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.param['debug']) binarized = 1 * (flat > self.param['threshold']) # output the normalized grayscale and the thresholded images #print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) print_info("%s lo-hi (%.2f %.2f) %s" % (fname, lo, hi, comment)) print_info("writing") if self.param['debug'] > 0 or self.param['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.param['debug'])) base, _ = ocrolib.allsplitext(fname) ocrolib.write_image_binary(base + ".bin.png", binarized) ocrolib.write_image_gray(base + ".nrm.png", flat) # print("########### File path : ", base+".nrm.png") # write_to_xml(base+".bin.png") return base + ".bin.png"
def pattern_match(template, image, upsampling=16, func=cv2.TM_CCOEFF_NORMED, error_check=False): """ Call an arbitrary pattern matcher Parameters ---------- template : ndarray The input search template used to 'query' the destination image image : ndarray The image or sub-image to be searched upsampling : int The multiplier to upsample the template and image. func : object The function to be used to perform the template based matching Options: {cv2.TM_CCORR_NORMED, cv2.TM_CCOEFF_NORMED, cv2.TM_SQDIFF_NORMED} In testing the first two options perform significantly better with Apollo data. error_check : bool If True, also apply a different matcher and test that the values are not too divergent. Default, False. Returns ------- x : float The x offset y : float The y offset strength : float The strength of the correlation in the range [-1, 1]. """ different = { cv2.TM_SQDIFF_NORMED: cv2.TM_CCOEFF_NORMED, cv2.TM_CCORR_NORMED: cv2.TM_SQDIFF_NORMED, cv2.TM_CCOEFF_NORMED: cv2.TM_SQDIFF_NORMED } if upsampling < 1: raise ValueError u_template = zoom(template, upsampling, order=3) u_image = zoom(image, upsampling, order=3) result = cv2.matchTemplate(u_image, u_template, method=func) min_corr, max_corr, min_loc, max_loc = cv2.minMaxLoc(result) if func == cv2.TM_SQDIFF or func == cv2.TM_SQDIFF_NORMED: x, y = (min_loc[0], min_loc[1]) else: x, y = (max_loc[0], max_loc[1]) # Compute the idealized shift (image center) ideal_y = u_image.shape[0] / 2 ideal_x = u_image.shape[1] / 2 # Compute the shift from template upper left to template center y += (u_template.shape[0] / 2) x += (u_template.shape[1] / 2) x = (ideal_x - x) / upsampling y = (ideal_y - y) / upsampling return x, y, max_corr
def get_scale_frags_to_same_size(frags, loci_ids, out_size=-1, no_cache=False): """Scale fragments to same size [description] Arguments: frags {list} -- List of numpy arrays representing the fragments Returns: np.array -- Numpy array of scaled fragments """ # Use the smallest dim dim_x = np.inf dim_y = np.inf is_image = False largest_frag_idx = -1 largest_frag_size = 0 smallest_frag_idx = -1 smallest_frag_size = np.inf for i, frag in enumerate(frags): is_image = is_image or frag.ndim == 3 if is_image: f_dim_y, f_dim_x, _ = frag.shape # from PIL.Image else: f_dim_x, f_dim_y = frag.shape size = f_dim_x * f_dim_y if size > largest_frag_size: largest_frag_idx = i largest_frag_size = size if size < smallest_frag_size: smallest_frag_idx = i smallest_frag_size = size dim_x = min(dim_x, f_dim_x) dim_y = min(dim_y, f_dim_y) if out_size != -1 and not no_cache: dim_x = out_size dim_y = out_size if is_image: out = np.zeros([len(frags), dim_y, dim_x, 3]) else: out = np.zeros([len(frags), dim_x, dim_y]) for i, frag in enumerate(frags): id = loci_ids[i] + '.' + '.'.join(map(str, out.shape[1:])) if not no_cache: frag_ds = None try: frag_ds = np.load(BytesIO(rdb.get('im_snip_ds_%s' % id))) if frag_ds is not None: out[i] = frag_ds continue except: pass if is_image: f_dim_y, f_dim_x, _ = frag.shape # from PIL.Image scaledFrag = np.zeros((dim_y, dim_x, 3), float) else: f_dim_x, f_dim_y = frag.shape scaledFrag = np.zeros((dim_x, dim_y), float) # Downsample # if f_dim_x > dim_x or f_dim_y > dim_y: # stupid zoom doesn't accept the final shape. Carefully crafting # the multipliers to make sure that it will work. zoomMultipliers = np.array(scaledFrag.shape) / np.array(frag.shape) frag = zoom(frag, zoomMultipliers, order=1) # frag = scaledFrag + zoomArray(frag, # frag, scaledFrag.shape, order=1 # ) if not no_cache: with BytesIO() as b: np.save(b, frag) rdb.set('im_snip_ds_%s' % id, b.getvalue(), 60 * 30) out[i] = frag return out, largest_frag_idx, smallest_frag_idx
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): raw = ocrolib.pil2array(page_image) if len(raw.shape) > 2: raw = np.mean(raw, 2) raw = raw.astype("float64") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): LOG.info("# image is empty: %s" % (page_id)) return image /= amax(image) # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel LOG.info("Flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment)) LOG.info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) page_xywh['features'] += ',binarized' bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B') bin_image = ocrolib.array2pil(bin_array) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file( bin_image, file_id, page_id=page_id, file_grp=self.image_grp, force=self.parameter['force']) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def train(self): step_pl = tf.placeholder(tf.float32, shape=None) alpha_tra_assign = self.alpha_tra.assign(step_pl / self.max_iters) opti_D = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.0, beta2=0.99).minimize( self.D_loss, var_list=self.d_vars) opti_G = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.0, beta2=0.99).minimize( self.G_loss, var_list=self.g_vars) init = tf.global_variables_initializer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(self.log_dir, sess.graph) if self.pg != 1 and self.pg != 7: if self.trans: self.r_saver.restore(sess, self.read_model_path) self.rgb_saver.restore(sess, self.read_model_path) else: self.saver.restore(sess, self.read_model_path) step = 0 batch_num = 0 while step <= self.max_iters: # optimization D n_critic = 1 if self.pg >= 5: n_critic = 1 for i in range(n_critic): sample_z = np.random.normal(size=[self.batch_size, self.sample_size]) if self.is_celeba: train_list = self.data_In.getNextBatch(batch_num, self.batch_size) realbatch_array = self.data_In.getShapeForData(train_list, resize_w=self.output_size) else: realbatch_array = self.data_In.getNextBatch(self.batch_size, resize_w=self.output_size) realbatch_array = np.transpose(realbatch_array, axes=[0, 3, 2, 1]).transpose([0, 2, 1, 3]) if self.trans and self.pg != 0: alpha = np.float(step) / self.max_iters low_realbatch_array = zoom(realbatch_array, zoom=[1, 0.5, 0.5, 1], mode='nearest') low_realbatch_array = zoom(low_realbatch_array, zoom=[1, 2, 2, 1], mode='nearest') realbatch_array = alpha * realbatch_array + (1 - alpha) * low_realbatch_array sess.run(opti_D, feed_dict={self.images: realbatch_array, self.z: sample_z}) batch_num += 1 # optimization G sess.run(opti_G, feed_dict={self.z: sample_z}) summary_str = sess.run(summary_op, feed_dict={self.images: realbatch_array, self.z: sample_z}) summary_writer.add_summary(summary_str, step) summary_writer.add_summary(summary_str, step) # the alpha of fake_in process sess.run(alpha_tra_assign, feed_dict={step_pl: step}) if step % 400 == 0: D_loss, G_loss, D_origin_loss, alpha_tra = sess.run([self.D_loss, self.G_loss, self.D_origin_loss,self.alpha_tra], feed_dict={self.images: realbatch_array, self.z: sample_z}) print("PG %d, step %d: D loss=%.7f G loss=%.7f, D_or loss=%.7f, opt_alpha_tra=%.7f" % (self.pg, step, D_loss, G_loss, D_origin_loss, alpha_tra)) realbatch_array = np.clip(realbatch_array, -1, 1) save_images(realbatch_array[0:self.batch_size], [2, self.batch_size/2], '{}/{:02d}_real.jpg'.format(self.sample_path, step)) if self.trans and self.pg != 0: low_realbatch_array = np.clip(low_realbatch_array, -1, 1) save_images(low_realbatch_array[0:self.batch_size], [2, self.batch_size / 2], '{}/{:02d}_real_lower.jpg'.format(self.sample_path, step)) fake_image = sess.run(self.fake_images, feed_dict={self.images: realbatch_array, self.z: sample_z}) fake_image = np.clip(fake_image, -1, 1) save_images(fake_image[0:self.batch_size], [2, self.batch_size/2], '{}/{:02d}_train.jpg'.format(self.sample_path, step)) if np.mod(step, 4000) == 0 and step != 0: self.saver.save(sess, self.gan_model_path) step += 1 save_path = self.saver.save(sess, self.gan_model_path) print ("Model saved in file: %s" % save_path) tf.reset_default_graph()
def main(): args = parser.parse_args() if args.gt_type == 'KITTI': from kitti_eval.depth_evaluation_utils import test_framework_KITTI as test_framework elif args.gt_type == 'stillbox': from stillbox_eval.depth_evaluation_utils import test_framework_stillbox as test_framework # disp_net = DispNetS().to(device) disp_net = DispResNet(3).to(device) weights = torch.load(args.pretrained_dispnet) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() if args.pretrained_posenet is None: print( 'no PoseNet specified, scale_factor will be determined by median ratio, which is kiiinda cheating\ (but consistent with original paper)') seq_length = 0 else: weights = torch.load(args.pretrained_posenet) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device) pose_net.load_state_dict(weights['state_dict'], strict=False) dataset_dir = Path(args.dataset_dir) if args.dataset_list is not None: with open(args.dataset_list, 'r') as f: test_files = list(f.read().splitlines()) else: test_files = [ file.relpathto(dataset_dir) for file in sum([ dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts ], []) ] framework = test_framework(dataset_dir, test_files, seq_length, args.min_depth, args.max_depth) print('{} files to test'.format(len(test_files))) errors = np.zeros((2, 7, len(test_files)), np.float32) if args.output_dir is not None: output_dir = Path(args.output_dir) output_dir.makedirs_p() for j, sample in enumerate(tqdm(framework)): tgt_img = sample['tgt'] ref_imgs = sample['ref'] h, w, _ = tgt_img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): tgt_img = imresize( tgt_img, (args.img_height, args.img_width)).astype(np.float32) ref_imgs = [ imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in ref_imgs ] tgt_img = np.transpose(tgt_img, (2, 0, 1)) ref_imgs = [np.transpose(img, (2, 0, 1)) for img in ref_imgs] tgt_img = torch.from_numpy(tgt_img).unsqueeze(0) tgt_img = ((tgt_img / 255 - 0.5) / 0.5).to(device) for i, img in enumerate(ref_imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img / 255 - 0.5) / 0.5).to(device) ref_imgs[i] = img pred_disp = disp_net(tgt_img).cpu().numpy()[0, 0] if args.output_dir is not None: if j == 0: predictions = np.zeros((len(test_files), *pred_disp.shape)) predictions[j] = 1 / pred_disp gt_depth = sample['gt_depth'] pred_depth = 1 / pred_disp pred_depth_zoomed = zoom( pred_depth, (gt_depth.shape[0] / pred_depth.shape[0], gt_depth.shape[1] / pred_depth.shape[1])).clip(args.min_depth, args.max_depth) if sample['mask'] is not None: pred_depth_zoomed = pred_depth_zoomed[sample['mask']] gt_depth = gt_depth[sample['mask']] if seq_length > 0: # Reorganize ref_imgs : tgt is middle frame but not necessarily the one used in DispNetS # (in case sample to test was in end or beginning of the image sequence) middle_index = seq_length // 2 tgt = ref_imgs[middle_index] reorganized_refs = ref_imgs[:middle_index] + ref_imgs[ middle_index + 1:] _, poses = pose_net(tgt, reorganized_refs) mean_displacement_magnitude = poses[0, :, :3].norm( 2, 1).mean().item() scale_factor = sample['displacement'] / mean_displacement_magnitude errors[0, :, j] = compute_errors(gt_depth, pred_depth_zoomed * scale_factor) scale_factor = np.median(gt_depth) / np.median(pred_depth_zoomed) errors[1, :, j] = compute_errors(gt_depth, pred_depth_zoomed * scale_factor) mean_errors = errors.mean(2) error_names = ['abs_rel', 'sq_rel', 'rms', 'log_rms', 'a1', 'a2', 'a3'] if args.pretrained_posenet: print("Results with scale factor determined by PoseNet : ") print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format( *error_names)) print( "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}" .format(*mean_errors[0])) print( "Results with scale factor determined by GT/prediction ratio (like the original paper) : " ) print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format( *error_names)) print( "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}". format(*mean_errors[1])) if args.output_dir is not None: np.save(output_dir / 'predictions.npy', predictions)
def visualize_cam_with_losses(input_tensor, losses, seed_input, penultimate_layer, grad_modifier=None): """Generates a gradient based class activation map (CAM) by using positive gradients of `input_tensor` with respect to weighted `losses`. For details on grad-CAM, see the paper: [Grad-CAM: Why did you say that? Visual Explanations from Deep Networks via Gradient-based Localization] (https://arxiv.org/pdf/1610.02391v1.pdf). Unlike [class activation mapping](https://arxiv.org/pdf/1512.04150v1.pdf), which requires minor changes to network architecture in some instances, grad-CAM has a more general applicability. Compared to saliency maps, grad-CAM is class discriminative; i.e., the 'cat' explanation exclusively highlights cat regions and not the 'dog' region and vice-versa. Args: input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format= channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. losses: List of ([Loss](vis.losses.md#Loss), weight) tuples. seed_input: The model input for which activation map needs to be visualized. penultimate_layer: The pre-layer to `layer_idx` whose feature maps should be used to compute gradients with respect to filter output. grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). If you don't specify anything, gradients are unchanged (Default value = None) Returns: The normalized gradients of `seed_input` with respect to weighted `losses`. """ penultimate_output = penultimate_layer.output opt = Optimizer(input_tensor, losses, wrt_tensor=penultimate_output, norm_grads=False) _, grads, penultimate_output_value = opt.minimize( seed_input, max_iter=1, grad_modifier=grad_modifier, verbose=False) # For numerical stability. Very small grad values along with small penultimate_output_value can cause # w * penultimate_output_value to zero out, even for reasonable fp precision of float32. #grads = grads / (np.max(grads) + K.epsilon()) # Average pooling across all feature maps. # This captures the importance of feature map (channel) idx to the output. channel_idx = 1 if K.image_data_format() == 'channels_first' else -1 other_axis = np.delete(np.arange(len(grads.shape)), channel_idx) weights = np.mean(grads, axis=tuple(other_axis)) # Generate heatmap by computing weight * output over feature maps output_dims = utils.get_img_shape(penultimate_output_value)[2:] heatmap = np.zeros(shape=output_dims, dtype=K.floatx()) for i, w in enumerate(weights): if channel_idx == -1: heatmap += w * penultimate_output_value[0, ..., i] else: heatmap += w * penultimate_output_value[0, i, ...] # ReLU thresholding to exclude pattern mismatch information (negative gradients). heatmap = np.maximum(heatmap, 0) # The penultimate feature map size is definitely smaller than input image. input_dims = utils.get_img_shape(input_tensor)[2:] # Figure out the zoom factor. zoom_factor = [ i / (j * 1.0) for i, j in iter(zip(input_dims, output_dims)) ] heatmap = zoom(heatmap, zoom_factor) return utils.normalize(heatmap)