def __init__(self,channels, thresholds, pixelsize = (1., 1., 1.)): self.f = mlab.figure() self.isos = [] self.projs = [] for im, th, i in zip(channels, thresholds, range(len(channels))): c = mlab.contour3d(im, contours=[th], color = pylab.cm.gist_rainbow(float(i)/len(channels))[:3]) c.mlab_source.dataset.spacing = pixelsize self.isos.append(c) ps = [] thf = th*1.5 pr = im.mean(2) #f = im.max()/pr.max() #pr *= im.max()/pr.max() ps.append(self.drawProjection((255*pylab.minimum(pr/(1.*thf), 1)).astype('uint8'), 'z', c)) pr = im.mean(0) #pr *= im.max()/pr.max() ps.append(self.drawProjection((255*pylab.minimum(pr/(.6*thf), 1)).astype('uint8'), 'x', c)) pr = im.mean(1) #pr *= im.max()/pr.max() ps.append(self.drawProjection((255*pylab.minimum(pr/(.8*thf), 1)).astype('uint8'), 'y', c)) self.projs.append(ps)
def update(self, data): blue=pl.less(data,0.) # Fill in True where less than 0.0 red=~blue # Reverse of the above #Blue self.image[...,2][blue]=pl.minimum(pl.absolute(pl.divide(data[blue],255.)),1.) #Red -- Max 40C, so we increase the intensity of the red color 6 times self.image[...,0][red]=pl.minimum(1.,pl.divide(pl.multiply(data[red],6.),255.)) pl.imshow(self.image) pl.draw()
def OC(nelx,nely,x,volfrac,dc) : l1 = 0; l2 = 100000; move = 0.2; while (l2-l1 > 1e-4): lmid = 0.5*(l2+l1) xnew = py.maximum(1e-3,py.maximum(x-move,py.minimum(1.0,py.minimum(x+move,x*py.sqrt(-dc/lmid))))); if sum(xnew) - volfrac*nelx*nely > 0: l1 = lmid else: l2 = lmid return xnew
def check(nelx,nely,rmin,x,dc): dcn=py.zeros((nely,nelx)); for i in range(1,nelx+1): for j in range(1,nely+1): sumx=0.0 for k in range(py.maximum(i-py.floor(rmin),1),py.minimum(i+py.floor(rmin),nelx)+1): for l in range(py.maximum(j-py.floor(rmin),1),py.minimum(j+py.floor(rmin),nely)+1): fac = rmin-py.sqrt((i-k)**2+(j-l)**2) sumx = sumx+py.maximum(0,fac) dcn[j-1,i-1] = dcn[j-1,i-1] + py.maximum(0,fac)*x[l-1,k-1]*dc[l-1,k-1] dcn[j-1,i-1] = dcn[j-1,i-1]/(x[j-1,i-1]*sumx) return dcn
def printClusterResult(self, imglist, k, code): for c in range(k): ind = np.where(code == c)[0] pylab.figure() pylab.gray() for i in range(pylab.minimum(len(ind), 39)): im = Image.open(imglist[ind[i]]) pylab.subplot(4, 10, i + 1) pylab.imshow(pylab.array(im)) pylab.axis('equal') pylab.axis('off') pylab.show()
def __init__(self, channels, thresholds, pixelsize=(1., 1., 1.)): self.f = mlab.figure() self.isos = [] self.projs = [] for im, th, i in zip(channels, thresholds, range(len(channels))): c = mlab.contour3d(im, contours=[th], color=pylab.cm.gist_rainbow( float(i) / len(channels))[:3]) c.mlab_source.dataset.spacing = pixelsize self.isos.append(c) ps = [] thf = th * 1.5 pr = im.mean(2) #f = im.max()/pr.max() #pr *= im.max()/pr.max() ps.append( self.drawProjection( (255 * pylab.minimum(pr / (1. * thf), 1)).astype('uint8'), 'z', c)) pr = im.mean(0) #pr *= im.max()/pr.max() ps.append( self.drawProjection( (255 * pylab.minimum(pr / (.6 * thf), 1)).astype('uint8'), 'x', c)) pr = im.mean(1) #pr *= im.max()/pr.max() ps.append( self.drawProjection( (255 * pylab.minimum(pr / (.8 * thf), 1)).astype('uint8'), 'y', c)) self.projs.append(ps)
def process_ts(self, ts): ''' The meat of the class -- convert an input time series into beds ''' # Define a function to stop time points from going off the end of the array tlim = lambda t: pl.minimum(self.npts, t) # Short for "time limit" # Housekeeping hsp = self.hspars # Shorten since used a lot beds = sc.objdict( ) # To make in one step: make(keys=self.reskeys, vals=pl.zeros(self.npts)) for reskey in self.reskeys: beds[reskey] = pl.zeros(self.npts) # If cumulative, take the difference to get the change at each timepoint if self.datatype == 'cumulative': ts = pl.diff(ts) # Actually process the time series -- where all the logic is, loop over each time point and update beds required for t, val in enumerate(ts): # Precompute results sympt = val * hsp.symptomatic # Find how many symptomatic people there are hosp = sympt * hsp.hospitalized # How many require hospitalization icu = sympt * hsp.icu # How many will require ICU beds mild = hosp - icu # Non-ICU patients are mild tstart_aac = t + hsp.delay # When adult acute beds start being used tstop_aac = tstart_aac + hsp.mild_dur # When adult acute beds are no longer needed icu_in_aac = round( hsp.severe_dur * hsp.aac_frac) # Days an ICU patient spends in AAC icu_in_icu = hsp.severe_dur - icu_in_aac # ...and in ICU tstop_pre_icu = tstart_aac + icu_in_aac # When they move from AAC to ICU tstop_icu = tstop_pre_icu + icu_in_icu # When they leave ICU # Compute actual results beds.aac[tlim(tstart_aac):tlim( tstop_aac)] += mild # Add mild patients to AAC beds.aac[tlim(tstart_aac):tlim( tstop_pre_icu)] += icu # Add pre-ICU ICU patients beds.icu[tlim(tstop_pre_icu):tlim(tstop_icu )] += icu # Add ICU patients beds.total = beds.aac + beds.icu # Compute total results return beds
def simulated_age_intervals(data_type, n, a, pi_age_true, sigma_true): # choose age intervals to measure age_start = pl.array(mc.runiform(0, 100, n), dtype=int) age_start.sort() # sort to make it easy to discard the edges when testing age_end = pl.array(mc.runiform(age_start+1, pl.minimum(age_start+10,100)), dtype=int) # find truth for the integral across the age intervals import scipy.integrate pi_interval_true = [scipy.integrate.trapz(pi_age_true[a_0i:(a_1i+1)]) / (a_1i - a_0i) for a_0i, a_1i in zip(age_start, age_end)] # generate covariates that add explained variation X = mc.rnormal(0., 1.**2, size=(n,3)) beta_true = [-.1, .1, .2] beta_true = [0, 0, 0] Y_true = pl.dot(X, beta_true) # calculate the true value of the rate in each interval pi_true = pi_interval_true*pl.exp(Y_true) # simulate the noisy measurement of the rate in each interval p = pl.maximum(0., mc.rnormal(pi_true, 1./sigma_true**2.)) # store the simulated data in a pandas DataFrame data = pandas.DataFrame(dict(value=p, age_start=age_start, age_end=age_end, x_0=X[:,0], x_1=X[:,1], x_2=X[:,2])) data['effective_sample_size'] = pl.maximum(p*(1-p)/sigma_true**2, 1.) data['standard_error'] = pl.nan data['upper_ci'] = pl.nan data['lower_ci'] = pl.nan data['year_start'] = 2005. # TODO: make these vary data['year_end'] = 2005. data['sex'] = 'total' data['area'] = 'all' data['data_type'] = data_type return data
### @export 'data' n = pl.array(pl.exp(mc.rnormal(11, 1**-2, size=32)), dtype=int) k = pl.array(mc.rnegative_binomial(n*pi_true, delta_true), dtype=float) k[:4] = 0. # zero-inflated model r = k/n s = pl.sqrt(r * (1-r) / n) n_min = min(n) n_max = max(n) ### @export 'zibb-model' alpha = mc.Uninformative('alpha', value=4.) beta = mc.Uninformative('beta', value=1000.) pi_mean = mc.Lambda('pi_mean', lambda alpha=alpha, beta=beta: alpha/(alpha+beta)) pi = mc.Beta('pi', alpha, beta, value=pl.maximum(1.e-12, pl.minimum(1-1.e-12, r))) phi = mc.Uniform('phi', lower=0., upper=1., value=.01) nonzeros = r != 0. num_nonzeros = nonzeros.sum() @mc.potential def obs(pi=pi, phi=phi): logp = pl.log(1-phi)*num_nonzeros + mc.binomial_like(r[nonzeros]*n[nonzeros], n[nonzeros], pi[nonzeros]) for n_i in n[~nonzeros]: logp += pl.log(phi + (1-phi) * pl.exp(pl.log(1-pi[~nonzeros]) * n[~nonzeros])).sum() return logp @mc.deterministic def pred(alpha=alpha, beta=beta, phi=phi): if pl.rand() < phi: return 0
def wide_angle_function(angles): return pylab.maximum(pylab.minimum(1.05 - 2 * pylab.absolute(angles), 1), 0)
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): LOG = getLogger('OcrdAnybaseocrBinarizer') raw = ocrolib.pil2array(page_image) if len(raw.shape) > 2: raw = np.mean(raw, 2) raw = raw.astype("float64") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): LOG.info("# image is empty: %s" % (page_id)) return image /= amax(image) # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel LOG.info("Flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment)) LOG.info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) page_xywh['features'] += ',binarized' bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B') bin_image = ocrolib.array2pil(bin_array) file_id = make_file_id(input_file, self.output_file_grp) file_path = self.workspace.save_image_file( bin_image, file_id + '-IMG', page_id=page_id, file_grp=self.output_file_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def trim(x, a, b): return pl.maximum(a, pl.minimum(b, x))
def new_bounds_func(f, age, val=val, prev_bounds_func=rate_vars['bounds_func']): return pl.minimum(prev_bounds_func(f, age), val)
def g(x): return pylab.minimum(abs(x * pylab.sin(x)), abs(x * pylab.cos(x)))
k[:4] = 0. # zero-inflated model r = k / n s = pl.sqrt(r * (1 - r) / n) n_min = min(n) n_max = max(n) ### @export 'zibb-model' alpha = mc.Uninformative('alpha', value=4.) beta = mc.Uninformative('beta', value=1000.) pi_mean = mc.Lambda('pi_mean', lambda alpha=alpha, beta=beta: alpha / (alpha + beta)) pi = mc.Beta('pi', alpha, beta, value=pl.maximum(1.e-12, pl.minimum(1 - 1.e-12, r))) phi = mc.Uniform('phi', lower=0., upper=1., value=.01) nonzeros = r != 0. num_nonzeros = nonzeros.sum() @mc.potential def obs(pi=pi, phi=phi): logp = pl.log(1 - phi) * num_nonzeros + mc.binomial_like( r[nonzeros] * n[nonzeros], n[nonzeros], pi[nonzeros]) for n_i in n[~nonzeros]: logp += pl.log(phi + (1 - phi) * pl.exp(pl.log(1 - pi[~nonzeros]) * n[~nonzeros])).sum() return logp
def g(x): # print("a"); return pylab.minimum( abs(x*sin(x)), abs(x*cos(x)) );
def test_data_model_sim(): # generate simulated data n = 50 sigma_true = .025 # start with truth a = pl.arange(0, 100, 1) pi_age_true = .0001 * (a * (100. - a) + 100.) # choose age intervals to measure age_start = pl.array(mc.runiform(0, 100, n), dtype=int) age_start.sort() # sort to make it easy to discard the edges when testing age_end = pl.array(mc.runiform(age_start + 1, pl.minimum(age_start + 10, 100)), dtype=int) # find truth for the integral across the age intervals import scipy.integrate pi_interval_true = [ scipy.integrate.trapz(pi_age_true[a_0i:(a_1i + 1)]) / (a_1i - a_0i) for a_0i, a_1i in zip(age_start, age_end) ] # generate covariates that add explained variation X = mc.rnormal(0., 1.**2, size=(n, 3)) beta_true = [-.1, .1, .2] Y_true = pl.dot(X, beta_true) # calculate the true value of the rate in each interval pi_true = pi_interval_true * pl.exp(Y_true) # simulate the noisy measurement of the rate in each interval p = mc.rnormal(pi_true, 1. / sigma_true**2.) # store the simulated data in a pandas DataFrame data = pandas.DataFrame( dict(value=p, age_start=age_start, age_end=age_end, x_0=X[:, 0], x_1=X[:, 1], x_2=X[:, 2])) data['effective_sample_size'] = pl.maximum(p * (1 - p) / sigma_true**2, 1.) data['standard_error'] = pl.nan data['upper_ci'] = pl.nan data['lower_ci'] = pl.nan data['year_start'] = 2005. # TODO: make these vary data['year_end'] = 2005. data['sex'] = 'total' data['area'] = 'all' # generate a moderately complicated hierarchy graph for the model hierarchy = nx.DiGraph() hierarchy.add_node('all') hierarchy.add_edge('all', 'super-region-1', weight=.1) hierarchy.add_edge('super-region-1', 'NAHI', weight=.1) hierarchy.add_edge('NAHI', 'CAN', weight=.1) hierarchy.add_edge('NAHI', 'USA', weight=.1) output_template = pandas.DataFrame( dict(year=[1990, 1990, 2005, 2005, 2010, 2010] * 2, sex=['male', 'female'] * 3 * 2, x_0=[.5] * 6 * 2, x_1=[0.] * 6 * 2, x_2=[.5] * 6 * 2, pop=[50.] * 6 * 2, area=['CAN'] * 6 + ['USA'] * 6)) # create model and priors vars = data_model.data_model('test', data, hierarchy, 'all') # fit model mc.MAP(vars).fit(method='fmin_powell', verbose=1) m = mc.MCMC(vars) m.use_step_method(mc.AdaptiveMetropolis, [m.gamma_bar, m.gamma, m.beta]) m.sample(30000, 15000, 15) # check estimates pi_usa = data_model.predict_for(output_template, hierarchy, 'all', 'USA', 'male', 1990, vars) assert pl.allclose(pi_usa.mean(), (m.mu_age.trace() * pl.exp(.05)).mean(), rtol=.1) # check convergence print 'gamma mc error:', m.gamma_bar.stats()['mc error'].round( 2), m.gamma.stats()['mc error'].round(2) # plot results for a_0i, a_1i, p_i in zip(age_start, age_end, p): pl.plot([a_0i, a_1i], [p_i, p_i], 'rs-', mew=1, mec='w', ms=4) pl.plot(a, pi_age_true, 'g-', linewidth=2) pl.plot(pl.arange(101), m.mu_age.stats()['mean'], 'k-', drawstyle='steps-post', linewidth=3) pl.plot(pl.arange(101), m.mu_age.stats()['95% HPD interval'], 'k', linestyle='steps-post:') pl.plot(pl.arange(101), pi_usa.mean(0), 'r-', linewidth=2, drawstyle='steps-post') pl.savefig('age_integrating_sim.png') # compare estimate to ground truth (skip endpoints, because they are extra hard to get right) assert pl.allclose(m.pi.stats()['mean'][10:-10], pi_true[10:-10], rtol=.2) lb, ub = m.pi.stats()['95% HPD interval'].T assert pl.mean((lb <= pi_true)[10:-10] & (pi_true <= ub)[10:-10]) > .75
def forest_plot(r, n, pi_true=None, results=None, model_keys=None, data_labels=None, fname=None, xmax=.05, fig_params=half_page_params, subplot_params=dict(bottom=.1, right=.99, top=.95, left=.33), **params): sorted_indices = (-r).argsort().argsort() se = 1.96 * pl.sqrt(r * (1 - r) / n) ms = pl.minimum(25, pl.sqrt(n) / 10.) pl.figure(**fig_params) for i in range(len(r)): pl.errorbar(r[i], sorted_indices[i] * .5 - .25, xerr=[[r[i] - max(0, r[i] - se[i])], [se[i]]], fmt='ks', mew=1, mec='white', ms=5) #ms[i]) if data_labels: pl.text(-2 * xmax / 50, sorted_indices[i] * .5 - .25, data_labels[i], ha='right', va='center', fontsize='x-large') pl.text(-2 * xmax / 50, len(sorted_indices) * .5 - .25, 'Input data:', va='center', ha='right', fontsize='x-large') pl.yticks([]) pl.xticks(size='large') if not data_labels: pl.text(-2 * xmax / 50, (len(sorted_indices) - 1) * .25, 'Simulated Study Data', rotation=90, ha='right', va='center', fontsize='x-large') if not model_keys: if results: model_keys = results.keys() else: model_keys = [] for i, k in enumerate(model_keys): if k == 'Beta binomial': k1 = 'Beta-binomial' pl.text(-2 * xmax / 50, -(i * .5 + 1.75), k1, ha='right', va='center', fontsize='x-large') elif k == 'Negative binomial': k1 = 'Negative-binomial' pl.text(-2 * xmax / 50, -(i * .5 + 1.75), k1, ha='right', va='center', fontsize='x-large') else: pl.text(-2 * xmax / 50, -(i * .5 + 1.75), k, ha='right', va='center', fontsize='x-large') # plot prediction posterior if '50' in results[k]['pred'][ 'quantiles']: # number becomes string when read back from disk pi_med = results[k]['pred']['quantiles']['50'] else: pi_med = results[k]['pred']['quantiles'][50] pi_lb = results[k]['pred']['95% HPD interval'][0] pi_ub = results[k]['pred']['95% HPD interval'][1] n = pi_med * (1 - pi_med) / ((pi_ub - pi_lb) / (2 * 1.96))**2 xerr = [[pi_med - pi_lb], [pi_ub - pi_med]] #if i == 0: # label = 'Predicted Study Value' #else: # label = '_nolabel_' #pl.errorbar(pi_med, -(i+2), xerr=xerr, # fmt='ko', mew=1, mec='white', ms=5, label=label) # plot parameter posterior if '50' in results[k]['pi'][ 'quantiles']: # number becomes string when read back from disk pi_med = results[k]['pi']['quantiles']['50'] else: pi_med = results[k]['pi']['quantiles'][50] pi_lb = results[k]['pi']['95% HPD interval'][0] pi_ub = results[k]['pi']['95% HPD interval'][1] n = pi_med * (1 - pi_med) / ((pi_ub - pi_lb) / (2 * 1.96))**2 xerr = [[pi_med - pi_lb], [pi_ub - pi_med]] if i == 0: label = 'Parameter value' else: label = '_nolabel_' pl.errorbar(pi_med, -(i * .5 + 2) + .25, xerr=xerr, fmt='k^', mew=1, mec='white', ms=8, label=label) pl.hlines([-.75], -1, 1, linewidth=1, linestyle='dotted', color='k', label='_nolegend_') pl.text(-2 * xmax / 50, -1.25, 'Model estimate of pop. rate:', va='center', ha='right', fontsize='x-large') #pl.legend(loc='lower right', shadow=True, fancybox=True, numpoints=1) l, r, b, t = pl.axis() b -= .5 t += .75 if pi_true: pl.vlines([pi_true], b, t, linewidth=1, linestyle='dashed', color='k') pl.text(pi_true, t, '\n $\\pi_{true}$', ha='left', va='top', fontsize='xx-large') pl.axis([-xmax / 50., xmax, b, t]) pl.subplots_adjust(**subplot_params) pl.xlabel('Rate (per PY)', fontsize='x-large') if fname: pl.savefig(fname)
def create_fig4_5(): def create_data1(): def f(x): return np.random.normal(0, 0.4) + x + 4. x1 = 4. * np.random.random_sample(DATA_SIZE,) - 4. x2 = np.array(map(f, x1)) t = np.array([[1, 0, 0] for i in xrange(DATA_SIZE)]) return np.array(zip(x1, x2)), t def create_data2(): def f(x): return np.random.normal(0, 0.4) + x x1 = 4. * np.random.random_sample(DATA_SIZE,) - 2. x2 = np.array(map(f, x1)) t = np.array([[0, 1, 0] for i in xrange(DATA_SIZE)]) return np.array(zip(x1, x2)), t def create_data3(): def f(x): return np.random.normal(0, 0.4) + x - 4. x1 = 4. * np.random.random_sample(DATA_SIZE,) x2 = np.array(map(f, x1)) t = np.array([[0, 0, 1] for i in xrange(DATA_SIZE)]) return np.array(zip(x1, x2)), t X1, T1 = create_data1() X2, T2 = create_data2() X3, T3 = create_data3() W1 = calc_weight(np.r_[X1, X2, X3], np.r_[T1, T2, T3]) fig, (ax1, ax2) = plt.subplots(1, 2, figsize = (14, 6)) ax1.grid(True) ax2.grid(True) plt.subplots_adjust(wspace = 0.4) ax1.set_xlim(-6, 6) ax1.set_ylim(-6, 6) ax2.set_xlim(-6, 6) ax2.set_ylim(-6, 6) x = np.arange(-10, 10, 0.1) x_lower = np.arange(-10, 0, 0.1) x_higher = np.arange(0, 10, 0.1) border_func1 = get_border(W1[:,:2]) border1 = np.array(map(border_func1, x)) ax1.plot(x_lower, map(border_func1, x_lower), 'k') border_func2 = get_border(W1[:, 1:]) border2 = np.array(map(border_func2, x)) ax1.plot(x_lower, map(border_func2, x_lower), 'k') border_func3 = get_border(W1[:, 0::2]) border3 = np.array(map(border_func3, x)) ax1.plot(x_higher, map(border_func3, x_higher), 'k') ax1.fill_between(x, border1, border2, where=border2>border1, facecolor = 'g', alpha = 0.2) ax1.fill_between(x, maximum(border2, border3), 10, facecolor = 'r', alpha = 0.2) ax1.fill_between(x, minimum(border1, border3), -10, facecolor = 'b', alpha = 0.2) #border_func2 = get_border(W2) #ax2.plot(x, map(border_func2, x), 'm') ax1.scatter(X1[:,0], X1[:,1], s = 50, c = 'r', marker = "x") ax1.scatter(X2[:,0], X2[:,1], s = 50, c = 'g', marker = "x") ax1.scatter(X3[:,0], X3[:,1], s = 50, edgecolors = 'b', marker = "o", facecolors= 'none') ax2.scatter(X1[:,0], X1[:,1], s = 50, c = 'r', marker = "x") ax2.scatter(X2[:,0], X2[:,1], s = 50, c = 'g', marker = "x") ax2.scatter(X3[:,0], X3[:,1], s = 50, edgecolors = 'b', marker = "o", facecolors= 'none') plt.show()
def _process_segment(self, page, filename, page_id, file_id): raw = ocrolib.read_image_gray(filename) self.dshow(raw, "input") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): LOG.info("# image is empty: %s" % (page_id)) return image /= amax(image) if not self.parameter['nocheck']: check = self.check_page(amax(image) - image) if check is not None: LOG.error(input_file.pageId or input_file.ID + " SKIPPED. " + check + " (use -n to disable this check)") return # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel LOG.info("Flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment)) LOG.info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) #base, _ = ocrolib.allsplitext(filename) #ocrolib.write_image_binary(base + ".bin.png", binarized) # ocrolib.write_image_gray(base +".nrm.png", flat) # print("########### File path : ", base+".nrm.png") # write_to_xml(base+".bin.png") # return base+".bin.png" bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B') bin_image = ocrolib.array2pil(bin_array) file_path = self.workspace.save_image_file(bin_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comment="binarized"))
def binarize_image(job): image_object, i = job raw = read_image_gray(image_object) image = raw - amin(raw) if amax(image) == amin(image): return # Image is empty image /= amax(image) check = check_page(amax(image) - image) if check is not None: return if args.gray: extreme = 0 else: extreme = (sum(image < 0.05) + sum(image > 0.95)) * 1.0 / prod( image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" m = interpolation.zoom(image, args.zoom) m = filters.percentile_filter(m, args.perc, size=(args.range, 2)) m = filters.percentile_filter(m, args.perc, size=(2, args.range)) m = interpolation.zoom(m, 1.0 / args.zoom) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if args.maxskew > 0: d0, d1 = flat.shape o0, o1 = int(args.bignore * d0), int(args.bignore * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = args.maxskew ms = int(2 * args.maxskew * args.skewsteps) angle = estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 d0, d1 = flat.shape o0, o1 = int(args.bignore * d0), int(args.bignore * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if args.escale > 0: e = args.escale v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) est = est[v] lo = stats.scoreatpercentile(est.ravel(), args.lo) hi = stats.scoreatpercentile(est.ravel(), args.hi) flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) binary = 1 * (flat > args.threshold) return (binary, flat)
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) fname = pcgts.get_Page().imageFilename img = self.workspace.resolve_image_as_pil(fname) print_info("# %s" % (fname)) raw = ocrolib.read_image_gray(img.filename) self.dshow(raw, "input") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): print_info("# image is empty: %s" % (fname)) return image /= amax(image) if not self.parameter['nocheck']: check = self.check_page(amax(image) - image) if check is not None: print_error(fname + " SKIPPED. " + check + " (use -n to disable this check)") return # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel print_info("flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones( (int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones( (1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image print_info("rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) print_info("%s lo-hi (%.2f %.2f) %s" % (fname, lo, hi, comment)) print_info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) base, _ = ocrolib.allsplitext(img.filename) ocrolib.write_image_binary(base + ".bin.png", binarized) # ocrolib.write_image_gray(base +".nrm.png", flat) # print("########### File path : ", base+".nrm.png") # write_to_xml(base+".bin.png") # return base+".bin.png" ID = concat_padded(self.output_file_grp, n) self.workspace.add_file(ID=ID, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype="image/png", url=base + ".bin.png", local_filename='%s/%s' % (self.output_file_grp, ID), content=to_xml(pcgts).encode('utf-8'))
def g(x): return pylab.minimum(pylab.absolute(x * pylab.sin(x)), pylab.absolute(x * pylab.cos(x)))
def wide_angle_function(angles): return pylab.maximum(pylab.minimum(1.05-2*pylab.absolute(angles),1), 0)
def forest_plot(r, n, pi_true=None, results=None, model_keys=None, data_labels=None, fname=None, xmax=.05, fig_params=half_page_params, subplot_params=dict(bottom=.1, right=.99, top=.95, left=.33), **params): sorted_indices = (-r).argsort().argsort() se = 1.96*pl.sqrt(r*(1-r)/n) ms = pl.minimum(25, pl.sqrt(n) / 10.) pl.figure(**fig_params) for i in range(len(r)): pl.errorbar(r[i], sorted_indices[i]*.5-.25, xerr=[[r[i]-max(0, r[i]-se[i])], [se[i]]], fmt='ks', mew=1, mec='white', ms=5) #ms[i]) if data_labels: pl.text(-2*xmax/50, sorted_indices[i]*.5-.25, data_labels[i], ha='right', va='center', fontsize='x-large') pl.text(-2*xmax/50, len(sorted_indices)*.5-.25, 'Input data:', va='center', ha='right', fontsize='x-large') pl.yticks([]) pl.xticks(size='large') if not data_labels: pl.text(-2*xmax/50, (len(sorted_indices)-1)*.25, 'Simulated Study Data', rotation=90, ha='right', va='center', fontsize='x-large') if not model_keys: if results: model_keys = results.keys() else: model_keys = [] for i, k in enumerate(model_keys): if k == 'Beta binomial': k1 = 'Beta-binomial' pl.text(-2*xmax/50, -(i*.5+1.75), k1, ha='right', va='center', fontsize='x-large') elif k == 'Negative binomial': k1 = 'Negative-binomial' pl.text(-2*xmax/50, -(i*.5+1.75), k1, ha='right', va='center', fontsize='x-large') else: pl.text(-2*xmax/50, -(i*.5+1.75), k, ha='right', va='center', fontsize='x-large') # plot prediction posterior if '50' in results[k]['pred']['quantiles']: # number becomes string when read back from disk pi_med = results[k]['pred']['quantiles']['50'] else: pi_med = results[k]['pred']['quantiles'][50] pi_lb = results[k]['pred']['95% HPD interval'][0] pi_ub = results[k]['pred']['95% HPD interval'][1] n = pi_med*(1-pi_med) / ((pi_ub - pi_lb)/(2*1.96))**2 xerr = [ [pi_med - pi_lb], [pi_ub - pi_med] ] #if i == 0: # label = 'Predicted Study Value' #else: # label = '_nolabel_' #pl.errorbar(pi_med, -(i+2), xerr=xerr, # fmt='ko', mew=1, mec='white', ms=5, label=label) # plot parameter posterior if '50' in results[k]['pi']['quantiles']: # number becomes string when read back from disk pi_med = results[k]['pi']['quantiles']['50'] else: pi_med = results[k]['pi']['quantiles'][50] pi_lb = results[k]['pi']['95% HPD interval'][0] pi_ub = results[k]['pi']['95% HPD interval'][1] n = pi_med*(1-pi_med) / ((pi_ub - pi_lb)/(2*1.96))**2 xerr = [ [pi_med - pi_lb], [pi_ub - pi_med] ] if i == 0: label = 'Parameter value' else: label = '_nolabel_' pl.errorbar(pi_med, -(i*.5+2)+.25, xerr=xerr, fmt='k^', mew=1, mec='white', ms=8, label=label) pl.hlines([-.75], -1, 1, linewidth=1, linestyle='dotted', color='k', label='_nolegend_') pl.text(-2*xmax/50, -1.25, 'Model estimate of pop. rate:', va='center', ha='right', fontsize='x-large') #pl.legend(loc='lower right', shadow=True, fancybox=True, numpoints=1) l,r,b,t=pl.axis() b -= .5 t += .75 if pi_true: pl.vlines([pi_true], b, t, linewidth=1, linestyle='dashed', color='k') pl.text(pi_true, t, '\n $\\pi_{true}$', ha='left', va='top', fontsize='xx-large') pl.axis([-xmax/50., xmax, b, t]) pl.subplots_adjust(**subplot_params) pl.xlabel('Rate (per PY)', fontsize='x-large') if fname: pl.savefig(fname)
matplotlib.rcParams.update({ "pgf.texsystem": "pdflatex", "pgf.preamble": [ r"\usepackage[utf8x]{inputenc}", r"\usepackage[T1]{fontenc}", r"\usepackage{cmbright}", ], "font.family" : "serif", "font.size": 10, }) import pylab data = pylab.loadtxt("timings1", skiprows=1) for i in range(2,5): data = pylab.minimum(data, pylab.loadtxt("timings"+str(i), skiprows=1)) pylab.figure(figsize=(5,2.8)) pylab.plot(data[:,0], data[:,1], 'o-k', label='operator()') pylab.plot(data[:,0], data[:,2], 'o-b', label='evaluate()') pylab.plot(data[:,0], data[:,3], 'o-r', label='std::function') pylab.plot(data[:,0], data[:,4], 'o-g', label='virtual evaluate()') pylab.ylim(ymax = 350, ymin = 0) pylab.xlim(xmax = 16, xmin = 0) pylab.legend() # pylab.title("Title of Plot") pylab.xlabel("range vector size $N$") pylab.ylabel("ms") pylab.tight_layout()
def test_data_model_sim(): # generate simulated data n = 50 sigma_true = .025 # start with truth a = pl.arange(0, 100, 1) pi_age_true = .0001 * (a * (100. - a) + 100.) # choose age intervals to measure age_start = pl.array(mc.runiform(0, 100, n), dtype=int) age_start.sort() # sort to make it easy to discard the edges when testing age_end = pl.array(mc.runiform(age_start+1, pl.minimum(age_start+10,100)), dtype=int) # find truth for the integral across the age intervals import scipy.integrate pi_interval_true = [scipy.integrate.trapz(pi_age_true[a_0i:(a_1i+1)]) / (a_1i - a_0i) for a_0i, a_1i in zip(age_start, age_end)] # generate covariates that add explained variation X = mc.rnormal(0., 1.**2, size=(n,3)) beta_true = [-.1, .1, .2] Y_true = pl.dot(X, beta_true) # calculate the true value of the rate in each interval pi_true = pi_interval_true*pl.exp(Y_true) # simulate the noisy measurement of the rate in each interval p = mc.rnormal(pi_true, 1./sigma_true**2.) # store the simulated data in a pandas DataFrame data = pandas.DataFrame(dict(value=p, age_start=age_start, age_end=age_end, x_0=X[:,0], x_1=X[:,1], x_2=X[:,2])) data['effective_sample_size'] = pl.maximum(p*(1-p)/sigma_true**2, 1.) data['standard_error'] = pl.nan data['upper_ci'] = pl.nan data['lower_ci'] = pl.nan data['year_start'] = 2005. # TODO: make these vary data['year_end'] = 2005. data['sex'] = 'total' data['area'] = 'all' # generate a moderately complicated hierarchy graph for the model hierarchy = nx.DiGraph() hierarchy.add_node('all') hierarchy.add_edge('all', 'super-region-1', weight=.1) hierarchy.add_edge('super-region-1', 'NAHI', weight=.1) hierarchy.add_edge('NAHI', 'CAN', weight=.1) hierarchy.add_edge('NAHI', 'USA', weight=.1) output_template=pandas.DataFrame(dict(year=[1990, 1990, 2005, 2005, 2010, 2010]*2, sex=['male', 'female']*3*2, x_0=[.5]*6*2, x_1=[0.]*6*2, x_2=[.5]*6*2, pop=[50.]*6*2, area=['CAN']*6 + ['USA']*6)) # create model and priors vars = data_model.data_model('test', data, hierarchy, 'all') # fit model mc.MAP(vars).fit(method='fmin_powell', verbose=1) m = mc.MCMC(vars) m.use_step_method(mc.AdaptiveMetropolis, [m.gamma_bar, m.gamma, m.beta]) m.sample(30000, 15000, 15) # check estimates pi_usa = data_model.predict_for(output_template, hierarchy, 'all', 'USA', 'male', 1990, vars) assert pl.allclose(pi_usa.mean(), (m.mu_age.trace()*pl.exp(.05)).mean(), rtol=.1) # check convergence print 'gamma mc error:', m.gamma_bar.stats()['mc error'].round(2), m.gamma.stats()['mc error'].round(2) # plot results for a_0i, a_1i, p_i in zip(age_start, age_end, p): pl.plot([a_0i, a_1i], [p_i,p_i], 'rs-', mew=1, mec='w', ms=4) pl.plot(a, pi_age_true, 'g-', linewidth=2) pl.plot(pl.arange(101), m.mu_age.stats()['mean'], 'k-', drawstyle='steps-post', linewidth=3) pl.plot(pl.arange(101), m.mu_age.stats()['95% HPD interval'], 'k', linestyle='steps-post:') pl.plot(pl.arange(101), pi_usa.mean(0), 'r-', linewidth=2, drawstyle='steps-post') pl.savefig('age_integrating_sim.png') # compare estimate to ground truth (skip endpoints, because they are extra hard to get right) assert pl.allclose(m.pi.stats()['mean'][10:-10], pi_true[10:-10], rtol=.2) lb, ub = m.pi.stats()['95% HPD interval'].T assert pl.mean((lb <= pi_true)[10:-10] & (pi_true <= ub)[10:-10]) > .75
def superBee(r): "superBee limiter for a TVD scheme" return pl.maximum(0, pl.minimum(2 * r, 1), pl.minimum(r, 2))