def fillcontinents(self,color=0.8): """ Fill continents. color - color to fill continents (default gray). """ # get current axes instance. ax = pylab.gca() # define corners of map domain. p1 = (self.llcrnrx,self.llcrnry); p2 = (self.urcrnrx,self.urcrnry) p3 = (self.llcrnrx,self.urcrnry); p4 = (self.urcrnrx,self.llcrnry) for x,y in self.coastpolygons: xa = pylab.array(x,'f') ya = pylab.array(y,'f') # clip to map domain. xa = pylab.clip(xa, self.xmin, self.xmax) ya = pylab.clip(ya, self.ymin, self.ymax) # check to see if all four corners of domain in polygon (if so, # don't draw since it will just fill in the whole map). delx = 10; dely = 10 if self.projection in ['cyl']: delx = 0.1 dely = 0.1 test1 = pylab.fabs(xa-self.xmax) < delx test2 = pylab.fabs(xa-self.xmin) < delx test3 = pylab.fabs(ya-self.ymax) < dely test4 = pylab.fabs(ya-self.ymin) < dely hasp1 = sum(test1*test3) hasp2 = sum(test2*test3) hasp4 = sum(test2*test4) hasp3 = sum(test1*test4) if not hasp1 or not hasp2 or not hasp3 or not hasp4: xy = zip(xa.tolist(),ya.tolist()) poly = Polygon(xy,facecolor=color,edgecolor=color,linewidth=0) ax.add_patch(poly)
def mu_age_derivative_potential(mu_age=mu_age, increasing_a0=pl.clip(parameters['increasing']['age_start']-ages[0], 0, len(ages)), increasing_a1=pl.clip(parameters['increasing']['age_end']-ages[0], 0, len(ages)), decreasing_a0=pl.clip(parameters['decreasing']['age_start']-ages[0], 0, len(ages)), decreasing_a1=pl.clip(parameters['decreasing']['age_end']-ages[0], 0, len(ages))): mu_prime = pl.diff(mu_age) inc_violation = mu_prime[increasing_a0:increasing_a1].clip(-pl.inf, 0.).sum() dec_violation = mu_prime[decreasing_a0:decreasing_a1].clip(0., pl.inf).sum() return -1.e12 * (inc_violation**2 + dec_violation**2)
def mu_age(unconstrained_mu_age=unconstrained_mu_age, value=parameters['level_value']['value'], age_before=pl.clip(parameters['level_value']['age_before']-ages[0], 0, len(ages)), age_after=pl.clip(parameters['level_value']['age_after']-ages[0], 0, len(ages)), lower=parameters['level_bounds']['lower'], upper=parameters['level_bounds']['upper']): mu_age = unconstrained_mu_age.copy() mu_age[:age_before] = value if age_after < len(mu_age)-1: mu_age[(age_after+1):] = value return mu_age.clip(lower, upper)
def mu_age(unconstrained_mu_age=unconstrained_mu_age, value=parameters['level_value']['value'], age_before=pl.clip( parameters['level_value']['age_before'] - ages[0], 0, len(ages)), age_after=pl.clip( parameters['level_value']['age_after'] - ages[0], 0, len(ages)), lower=parameters['level_bounds']['lower'], upper=parameters['level_bounds']['upper']): mu_age = unconstrained_mu_age.copy() mu_age[:age_before] = value if age_after < len(mu_age) - 1: mu_age[(age_after + 1):] = value return mu_age.clip(lower, upper)
def scale_mtx(M, normalize=False, dbscale=False, norm=False, bels=False): """ :: Perform mutually-orthogonal scaling operations, otherwise return identity: normalize [False] dbscale [False] norm [False] """ if not (normalize or dbscale or norm or bels): return M else: X = M.copy() # don't alter the original if norm: nz_idx = (X * X).sum(1) > 0 X[nz_idx] = (X[nz_idx].T / np.sqrt( (X[nz_idx] * X[nz_idx]).sum(1))).T if normalize: X = X - np.min(X) X = X / np.max(X) if dbscale or bels: X = P.log10(P.clip(X, 0.0001, X.max())) if dbscale: X = 20 * X return X
def plot_mtx(mtx=None, title=None, newfig=False, cbar=True, **kwargs): """ :: static method for plotting a matrix as a time-frequency distribution (audio features) """ if mtx is None or type(mtx) != np.ndarray: raise ValueError('First argument, mtx, must be a array') if newfig: P.figure() dbscale = kwargs.pop('dbscale', False) bels = kwargs.pop('bels',False) norm = kwargs.pop('norm',False) normalize = kwargs.pop('normalize',False) origin=kwargs.pop('origin','lower') aspect=kwargs.pop('aspect','auto') interpolation=kwargs.pop('interpolation','nearest') cmap=kwargs.pop('cmap',P.cm.gray_r) clip=-100. X = scale_mtx(mtx, normalize=normalize, dbscale=dbscale, norm=norm, bels=bels) i_min, i_max = np.where(X.mean(1))[0][[0,-1]] X = X[i_min:i_max+1].copy() if dbscale or bels: if bels: clip/=10. P.imshow(P.clip(X,clip,0),origin=origin, aspect=aspect, interpolation=interpolation, cmap=cmap, **kwargs) else: P.imshow(X,origin=origin, aspect=aspect, interpolation=interpolation, cmap=cmap, **kwargs) if title: P.title(title,fontsize=16) if cbar: P.colorbar() P.yticks(np.arange(0,i_max+1-i_min,3),pc_labels[i_min:i_max+1:3],fontsize=14) P.xlabel('Tactus', fontsize=14) P.ylabel('MIDI Pitch', fontsize=14) P.grid()
def mu_interval(mu_age=mu_age, theta=theta, age_mid=pl.array(age_mid, dtype=int), age_width=pl.array(age_width, dtype=float)): return mu_age.take(pl.clip(age_mid, ages[0], ages[-1]) - ages[0]) + theta * age_width
def feature_plot(M, normalize=False, dbscale=False, norm=False, title_string=None, interp='nearest', bels=False): """ :: static method for plotting a matrix as a time-frequency distribution (audio features) """ X = adb.feature_scale(M, normalize, dbscale, norm, bels) pylab.figure() clip = -100. if dbscale or bels: if bels: clip /= 10. pylab.imshow(pylab.clip(X, clip, 0), origin='lower', aspect='auto', interpolation=interp) else: pylab.imshow(X, origin='lower', aspect='auto', interpolation=interp) if title_string: pylab.title(title_string) pylab.colorbar()
def threshold(self, ax, data): data = N.asarray(data) mu, sd = data.mean(), data.std() data = clip(data, mu - 3*sd, mu + 3*sd) data = blur_image(data, 5) orig = data.copy() f = fft.fft2(data) f[0:5,:] = 0 f[:,0:5] = 0 data = N.abs(fft.ifft2(f)) print "mu, std =", orig.mean(), orig.std() """ mu, sigma = data.mean(), data.std() mask = data > mu + 2*sigma """ #im = ax.imshow(data, origin="lower", interpolation='nearest') im = ax.imshow(orig, origin="lower", interpolation='nearest') self.set_callbacks([("button_release_event", self.imclick, ())]) self.f.colorbar(im, fraction = 0.08) return data
def mu_age_derivative_potential( mu_age=mu_age, increasing_a0=pl.clip(parameters['increasing']['age_start'] - ages[0], 0, len(ages)), increasing_a1=pl.clip(parameters['increasing']['age_end'] - ages[0], 0, len(ages)), decreasing_a0=pl.clip(parameters['decreasing']['age_start'] - ages[0], 0, len(ages)), decreasing_a1=pl.clip(parameters['decreasing']['age_end'] - ages[0], 0, len(ages))): mu_prime = pl.diff(mu_age) inc_violation = mu_prime[increasing_a0:increasing_a1].clip( -pl.inf, 0.).sum() dec_violation = mu_prime[decreasing_a0:decreasing_a1].clip( 0., pl.inf).sum() return -1.e12 * (inc_violation**2 + dec_violation**2)
def density_plot ( x, D ): """Plot the density D along with a confidence region""" # TODO: pass parameters through (e.g. color, axes, ...) fx = D(x) x_ = pl.concatenate ( (x, x[::-1]) ) fx_ = pl.clip(pl.concatenate ( (fx+D.c,fx[::-1]-D.c) ), 0, pl.inf ) pl.fill ( x_, fx_, edgecolor=[.5]*3, facecolor=[.8]*3 ) pl.plot ( x, fx, color=[0]*3 )
def interpolateLin(y,x,xNew): """ linear interpolation of y[x] onto y[xNew] Linearly extrapolates if outside range """ xInd = M.clip(M.searchsorted(x,xNew)-1,0,len(x)-2) xFract = (xNew-x[xInd])/(x[xInd+1]-x[xInd]) return y[xInd]+xFract*(y[xInd+1]-y[xInd])
def my_hor_to_eq(az, el, lat, lsts): dec = arcsin(sin(el) * sin(lat) + cos(el) * cos(lat) * cos(az)) argument = (sin(el) - sin(lat) * sin(dec)) / (cos(lat) * cos(dec)) argument = clip(argument, -1.0, 1.0) H = arccos(argument) flag = sin(az) > 0 H[flag] = 2.0*pi - H[flag] ra = lsts - H ra %= 2*pi return ra,dec
def sub_mean(x, N): N = int(N) L = len(x) y = pl.zeros_like(x) ii = pl.arange(-N, N + 1) k = 1.0 / len(ii) # 1 / (2 * N + 1) for n in range(L): iii = pl.clip(ii + n, 0, L - 1) s = k * sum(x[iii]) y[n] = x[n] - s print n, x[n], iii[0], iii[-1], s return y
def hor_to_eq(az, el, lat, lst): dec = arcsin(sin(el) * sin(lat) + cos(el) * cos(lat) * cos(az)) argument = (sin(el) - sin(lat) * sin(dec)) / (cos(lat) * cos(dec)) argument = pylab.clip(argument, -1.0, 1.0) H = arccos(argument) flag = sin(az) > 0 if type(flag) is ndarray: H[flag] = 2.0 * pi - H[flag] elif flag: H = 2.0 * pi - H ra = lst - H ra %= 2 * pi return ra, dec
def interpolateLinLog(y,x,xNew): """ linear interpolation in LOG space of y[x] onto y[xNew] Linearly extrapolates if outside range """ logx = M.log(x) logy = M.log(y) logxNew = M.log(xNew) logxInd = M.clip(M.searchsorted(logx,logxNew)-1,0,len(logx)-2) logxFract = (logxNew-logx[logxInd])/(logx[logxInd+1]-logx[logxInd]) return M.exp(logy[logxInd]+logxFract*(logy[logxInd+1]-logy[logxInd]))
def imshow(self, ax, data): mu, sd = data.mean(), data.stddev() data = clip(data, mu - 3*sd, mu + 3*sd) #data = signal.detrend(signal.detrend(data, axis=0), axis=1) #data -= data.min() #data = signal.spline_filter(data) cmap = self.get_cmap() im = ax.imshow(data, origin="lower", cmap=cmap, interpolation='nearest') self.set_callbacks([("button_release_event", self.imclick, ())]) self.f.colorbar(im, fraction = 0.08) return data
def fillcontinents(self,color=0.8): """ Fill continents. color - color to fill continents (default gray). """ # get current axes instance. ax = pylab.gca() # define corners of map domain. p1 = (self.llcrnrx,self.llcrnry); p2 = (self.urcrnrx,self.urcrnry) p3 = (self.llcrnrx,self.urcrnry); p4 = (self.urcrnrx,self.llcrnry) for x,y in self.coastpolygons: xa = pylab.array(x,'f') ya = pylab.array(y,'f') # clip to map domain. xa = pylab.clip(xa, self.xmin, self.xmax) ya = pylab.clip(ya, self.ymin, self.ymax) # check to see if all four corners of domain in polygon (if so, # don't draw since it will just fill in the whole map). delx = 10; dely = 10 if self.projection in ['cyl']: delx = 0.1 dely = 0.1 test1 = pylab.fabs(xa-self.xmax) < delx test2 = pylab.fabs(xa-self.xmin) < delx test3 = pylab.fabs(ya-self.ymax) < dely test4 = pylab.fabs(ya-self.ymin) < dely hasp1 = sum(test1*test3) hasp2 = sum(test2*test3) hasp4 = sum(test2*test4) hasp3 = sum(test1*test4) if not hasp1 or not hasp2 or not hasp3 or not hasp4: xy = zip(xa.tolist(),ya.tolist()) poly = Polygon(xy,facecolor=color,edgecolor=color,linewidth=0) ax.add_patch(poly) # set axes limits to fit map region. self.set_axes_limits()
def make_range_frame (self): rx = self.axes.get_xlim() ry = self.axes.get_ylim() px = pl.prctile ( self.x ) py = pl.prctile ( self.y ) if self.trim: if px[2]-px[0]>1.5*(px[3]-px[1]): px[0] = self.x[self.x>px[2]-1.5*(px[3]-px[1])].min() if px[4]-px[2]>1.5*(px[3]-px[1]): px[4] = self.x[self.x<px[2]+1.5*(px[3]-px[1])].min() x = px-rx[0] x /= rx[1]-rx[0] y = py-ry[0] y /= ry[1]-ry[0] ex = .003 ey = .003 xline = [ [(x[0],0),(x[1],0)], [(x[1],ey),(x[2]-ex,ey)], [(x[2]+ex,ey),(x[3],ey)], [(x[3],0),(x[4],0)] ] yline = [ [(0,y[0]),(0,y[1])], [(ex,y[1]),(ex,y[2]-ey)], [(ex,y[2]+ey),(ex,y[3])], [(0,y[3]),(0,y[4])] ] widths = [1,1,1,1] range_lines = LineCollection( segments=pl.clip(xline+yline,0,1), linewidths=widths+widths, colors=[[0]*3]*2*len(widths) ) range_lines.set_transform ( self.axes.transAxes ) range_lines.set_zorder(10) self.axes.get_xaxis().tick_bottom() self.axes.get_yaxis().tick_left() self.axes.set_xticks(px) self.axes.set_yticks(py) self.axes.tick_params ( width=0 ) return range_lines
def feature_plot(M, normalize=False, dbscale=False, norm=False, title_string=None, interp='nearest', bels=False, nofig=False,**kwargs): """ :: static method for plotting a matrix as a time-frequency distribution (audio features) """ X = feature_scale(M, normalize, dbscale, norm, bels) if not nofig: P.figure() clip=-100. if dbscale or bels: if bels: clip/=10. P.imshow(P.clip(X,clip,0),origin='lower',aspect='auto', interpolation=interp, **kwargs) else: P.imshow(X,origin='lower',aspect='auto', interpolation=interp, **kwargs) if title_string: P.title(title_string) P.colorbar()
def feature_plot(M, normalize=False, dbscale=False, norm=False, title_string=None, interp="nearest", bels=False): """ :: static method for plotting a matrix as a time-frequency distribution (audio features) """ X = adb.feature_scale(M, normalize, dbscale, norm, bels) pylab.figure() clip = -100.0 if dbscale or bels: if bels: clip /= 10.0 pylab.imshow(pylab.clip(X, clip, 0), origin="lower", aspect="auto", interpolation=interp) else: pylab.imshow(X, origin="lower", aspect="auto", interpolation=interp) if title_string: pylab.title(title_string) pylab.colorbar()
def feature_plot(M, normalize=False, dbscale=False, norm=False, ttl=None, interp='nearest', bels=False, nofig=False, x_lbl='', y_lbl='', cbar=False, save_image_as=None, **kwargs): """ Static method for plotting a matrix as a time-frequency distribution (audio features) """ X = feature_scale(M, normalize, dbscale, norm, bels) if not nofig: plt.figure() clip = -100. if dbscale or bels: if bels: clip /= 10. plt.imshow(P.clip(X, clip, 0), origin='lower', aspect='auto', interpolation=interp, **kwargs) else: plt.imshow(X, origin='lower', aspect='auto', interpolation=interp, **kwargs) if ttl: plt.title(ttl) if x_lbl: plt.xlabel(x_lbl) if y_lbl: plt.ylabel(y_lbl) if cbar: plt.colorbar() if save_image_as is not None and os.path.exists( save_image_as) is not True: # full path! plt.savefig(save_image_as)
def _mfcc(self): """ :: DCT of the Log magnitude CQFT """ fp = self._check_feature_params() if not self._cqft(): return False self._make_dct() AA = P.log10(P.clip(self.CQFT,0.0001,self.CQFT.max())) self.MFCC = P.dot(self.DCT, AA) self._have_mfcc=True if self.verbosity: print "Extracted MFCC: lcoef=%d, ncoef=%d, intensified=%d" %(self.lcoef, self.ncoef, self.intensify) n=self.ncoef l=self.lcoef self.X=self.MFCC[l:l+n,:] return True
def feature_scale(M, normalize=False, dbscale=False, norm=False, bels=False): """ Perform mutually-orthogonal scaling operations, otherwise return identity: normalize [False] dbscale [False] norm [False] """ if not (normalize or dbscale or norm or bels): return M else: X = M.copy() # don't alter the original if norm: X = X / P.tile(P.sqrt((X * X).sum(0)), (X.shape[0], 1)) if normalize: X = _normalize(X) if dbscale or bels: X = P.log10(P.clip(X, 0.0001, X.max())) if dbscale: X = 20 * X return X
def feature_scale(M, normalize=False, dbscale=False, norm=False, bels=False): """ :: Perform mutually-orthogonal scaling operations, otherwise return identity: normalize [False] dbscale [False] norm [False] """ if not (normalize or dbscale or norm or bels): return M else: X = M.copy() # don't alter the original if norm: X = X / P.tile(P.sqrt((X*X).sum(0)),(X.shape[0],1)) if normalize: X = _normalize(X) if dbscale or bels: X = P.log10(P.clip(X,0.0001,X.max())) if dbscale: X = 20*X return X
def luminancecode ( x, basecolor, **kwargs ): """Create a code for the values in x :Parameters: *x* values to be coded *basecolor* basic color that should be mixed with white for lower values :Optional Keyword Arguments: *vmin* minimum of color scale (default: min(x)) *vmax* maximum of color scale (default: max(x)) *mincol* minimum color concentration (default: 0.1) """ vmin = float(kwargs.setdefault ( 'vmin', min(x) )) vmax = float(kwargs.setdefault ( 'vmax', max(x) )) mincol = float(kwargs.setdefault('mincol', 0.1 )) ratios = pl.clip(((vmax-x)/(vmax-vmin)),0,1e8)/mincol return [cmix('w',basecolor,r) for r in ratios]
def forward(self, ys): """Forward propagate activations. This updates the internal state for a subsequent call to `backward` and returns the output activations.""" n = len(ys) # inputs, zs = [None]*n,[None]*n zs = [None] * n for i in range(n): # inputs[i] = concatenate([ones(1), ys[i]]) # print self.W2[:,0] temp = dot(self.W2[:,1:], ys[i]) + self.W2[:,0] # print 'yss', ys[i].shape, self.W2[:,1:].shape, temp.shape # temp = dot(self.W2, inputs[i]) # print temp - dot(self.W2[:,1:], ys[i]) - self.W2[:,0] # print inputs[i].shape, self.W2.shape, temp.shape # print self.W2[i], i, n # temp = dot(self.W2[:,1:], ys[i]) + self.W2[:,0] temp = exp(clip(temp,-100,100)) temp /= sum(temp) zs[i] = temp # self.state = (inputs,zs) return zs
def forward(self, ys): """Forward propagate activations. This updates the internal state for a subsequent call to `backward` and returns the output activations.""" n = len(ys) # inputs, zs = [None]*n,[None]*n zs = [None] * n for i in range(n): # inputs[i] = concatenate([ones(1), ys[i]]) # print self.W2[:,0] temp = dot(self.W2[:, 1:], ys[i]) + self.W2[:, 0] # print 'yss', ys[i].shape, self.W2[:,1:].shape, temp.shape # temp = dot(self.W2, inputs[i]) # print temp - dot(self.W2[:,1:], ys[i]) - self.W2[:,0] # print inputs[i].shape, self.W2.shape, temp.shape # print self.W2[i], i, n # temp = dot(self.W2[:,1:], ys[i]) + self.W2[:,0] temp = exp(clip(temp, -100, 100)) temp /= sum(temp) zs[i] = temp # self.state = (inputs,zs) return zs
def scale_mtx(M, normalize=False, dbscale=False, norm=False, bels=False): """ :: Perform mutually-orthogonal scaling operations, otherwise return identity: normalize [False] dbscale [False] norm [False] """ if not (normalize or dbscale or norm or bels): return M else: X = M.copy() # don't alter the original if norm: nz_idx = (X*X).sum(1) > 0 X[nz_idx] = (X[nz_idx].T / np.sqrt((X[nz_idx]*X[nz_idx]).sum(1))).T if normalize: X=X-np.min(X) X=X/np.max(X) if dbscale or bels: X = P.log10(P.clip(X,0.0001,X.max())) if dbscale: X = 20*X return X
def _process_segment(self, page, filename, page_id, file_id): if self.parameter['parallel'] < 2: LOG.info("INPUT FILE %s ", filename) raw = ocrolib.read_image_gray(filename) flat = raw #flat = np.array(binImg) # estimate skew angle and rotate if self.parameter['maxskew'] > 0: if self.parameter['parallel'] < 2: LOG.info("Estimating Skew Angle") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = self.parameter['maxskew'] ms = int(2 * self.parameter['maxskew'] * self.parameter['skewsteps']) angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # self.write_angles_to_pageXML(base,angle) # estimate low and high thresholds if self.parameter['parallel'] < 2: LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image if self.parameter['parallel'] < 2: LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) deskewed = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" % (filename, lo, hi, angle)) if self.parameter['parallel'] < 2: LOG.info("Writing") #ocrolib.write_image_binary(base+".ds.png", deskewed) #TODO: Need some clarification as the results effect the following pre-processing steps. #orientation = -angle #orientation = 180 - ((180 - orientation) % 360) page.set_orientation(angle) file_path = self.workspace.save_image_file(bin_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comment="deskewed"))
def _input_data_from_gbd_json(dm, covs): """ translate input data""" import dismod3 # remove any rows with 'ignore' columns set to 1 dm['data'] = [d for d in dm['data'] if not (d.get('Ignore') or d.get('ignore'))] # remove any data with type-specific heterogeneity set to Unusable if 'global_priors' in dm['params']: for t in dm['params']['global_priors']['heterogeneity']: if dm['params']['global_priors']['heterogeneity'][t] == 'Unusable': print '%s has heterogeneity unusable, dropping %d rows' % (t, len([d for d in dm['data'] if d['data_type'] == t + ' data'])) dm['data'] = [d for d in dm['data'] if d['data_type'] != t + ' data'] input_data = {} for field in 'effective_sample_size age_start age_end year_start year_end'.split(): input_data[field] = [] for row in dm['data']: val = row.get(field, '') if val == '': val = pl.nan input_data[field].append(float(val)) input_data['sex'] = [] for row in dm['data']: input_data['sex'].append(row['sex']) # replace sex 'all' with sex 'total' if input_data['sex'][-1] == 'all': input_data['sex'][-1] = 'total' assert input_data['sex'][-1] != '' new_type_name = {'incidence data':'i', 'prevalence data': 'p', 'remission data': 'r', 'excess-mortality data': 'f', 'prevalence x excess-mortality data': 'pf', 'all-cause mortality data': 'm_all', 'relative-risk data': 'rr', 'duration data': 'X', 'smr data': 'smr', 'cause-specific mortality data': 'csmr', 'mortality data': 'm_with'} input_data['data_type'] = [new_type_name[row['data_type']] for row in dm['data']] for field in 'value standard_error lower_ci upper_ci'.split(): input_data[field] = [] for row in dm['data']: val = row.get(field, '') if val == '': val = pl.nan else: val = float(val) / float(row.get('units', '1').replace(',', '')) input_data[field].append(val) input_data['area'] = [] for row in dm['data']: val = row.get('country_iso3_code', '') if val == '' or val == 'all': val = dismod3.utils.clean(row['gbd_region']) input_data['area'].append(val) assert input_data['area'][-1] != '' input_data['age_weights'] = [';'.join(['%.4f'%w for w in row.get('age_weights', [])]) for row in dm['data']] # store age_weights as semi-colon delimited text, since Pandas doesn't like arrays in arrays and doesn't save comma-separated fields correctly # add selected covariates if 'covariates' in dm['params']: for level in ['Country_level', 'Study_level']: for cv in dm['params']['covariates'].get(level, []): if dm['params']['covariates'][level][cv]['rate']['value']: input_data['x_%s'%cv] = [] for row in dm['data']: if level == 'Country_level': if row['data_type'] == 'all-cause mortality data': input_data['x_%s'%cv].append(0.) # don't bother to merge covariates into all-cause mortality data elif row['region'] == 'all': input_data['x_%s'%cv].append(0.) # don't bother to merge covariates into regionall data elif row.get('country_iso3_code'): iso3 = row['country_iso3_code'] # special case for countries that CODEm does not report on if 'ASDR' in cv: if iso3 in ['HKG', 'MAC']: iso3 = 'TWN' # TODO: average over CHN, PRK, TWN if iso3 in ['PRI', 'BMU']: iso3 = 'CUB' # TODO: average over caribbean countries input_data['x_%s'%cv].append( covs[cv][iso3, row['sex'], pl.clip((row['year_start']+row['year_end'])/2, 1980., 2012.)] ) else: # handle regional data df = covs[(covs['region'] == dismod3.utils.clean(row['gbd_region']))& (covs.index.get_level_values(1)==row['sex'])& (covs.index.get_level_values(2)==pl.clip((row['year_start']+row['year_end'])/2, 1980., 2012.))] #input_data['x_%s'%cv].append( # (df[cv]*df['pop']).sum() / df['pop'].sum() # ) input_data['x_%s'%cv].append(0.) # TODO: remove regional data elif level == 'Study_level': input_data['x_%s'%cv].append(float(row.get(dismod3.utils.clean(cv), '') or 0.)) # also include column of input data for 'z_%s'%cv if it is requested if dm['params']['covariates'][level][cv]['error']['value']: input_data['z_%s'%cv] = [float(row.get(dismod3.utils.clean(cv), '') or 0.) for row in dm['data']] input_data = pandas.DataFrame(input_data) # replace age_end 1 with age_end 0, correcting a common mistake in data entry i = (input_data['age_start']==0) & (input_data['age_end']==1) if i.sum() > 0: print 'WARNING: correcting age_end in %d rows that have age_start == 0, age_end == 1 (old format uses "demographic" notation)' % i.sum() input_data['age_end'][i] = 0 # replace triple underscores with single underscore, a problem with consistency in the spacing in "North Africa / Middle East" input_data['area'] = [a.replace('___', '_') for a in input_data['area']] # print checks of data for i, row in input_data.T.iteritems(): if pl.isnan(row['value']): print 'WARNING: value in row %d is missing' % i input_data = input_data[~pl.isnan(input_data['value'])] return input_data
def figure3 ( ): w,h = 25,8.5 fig = pl.figure ( figsize=(fullwidth,h*fullwidth/w) ) # a,b,c,d = place_axes ( fig, 1.5,2, [9,9,5,5],[6]*4, # [True]*2+[False]*2, [1.8,1.8,.5,.5], (w,h) ) a,b,c = place_axes ( fig, 1.5,2, [9,9,5],[6]*3, [True]*2+[False], [1.8,1.8,.5], (w,h) ) d = fig.add_axes ( [10,10,1,1] ) a.text ( .05, laby, r"\textbf{a}", transform=a.transAxes ) b.text ( .05, laby, r"\textbf{b}", transform=b.transAxes ) c.text ( .05, laby, r"\textbf{c}", transform=c.transAxes ) d.text ( .05, laby, r"\textbf{d}", transform=d.transAxes ) M = results['model_w_hist'] # Figures 3 A,B for condition in plotinfo['conditions']: condition = int ( condition ) print "c",condition d_ = data.getsummary ( condition ) # x = pl.mgrid[0:plotinfo['xmax']:100j] x = pl.mgrid[0:30:100j] # if len(data.th_features)>0: # x = threshold.u_v ( x, results['model_w_hist'].nu ) wfit = results['model_w_hist'].w[plotinfo['indices'][condition]] w0fit = results['model_nohist'].w[plotinfo['indices'][condition]] pfit = results['model_w_hist'].pi p0fit = results['model_nohist'].pi x_ = threshold.u_v ( x, results['model_w_hist'].nu ) x0 = threshold.u_v ( x, results['model_nohist'].nu ) col = plotinfo['colors'][condition] pmf = 0.5*(pfit[1]+pfit[2]*model.logistic ( wfit[0]+wfit[1]*x_ )) + \ 0.5*(1-(pfit[1]+pfit[2]*model.logistic ( wfit[0]-wfit[1]*x_ ))) p0f = 0.5*(p0fit[1]+p0fit[2]*model.logistic ( w0fit[0]+w0fit[1]*x0 )) + \ 0.5*(1-(p0fit[1]+p0fit[2]*model.logistic ( w0fit[0]-w0fit[1]*x0 ))) print p0fit perror = (1-p0f-(1-pmf))/(1-p0f) a.plot ( x, pmf, color = col ) a.plot ( x, p0f, color = col, linestyle='--' ) b.plot ( x, pl.clip(perror,0,1e5), color = col ) a.yaxis.set_major_formatter ( prcformatter ) a.xaxis.set_major_formatter ( myformatter ) a.set_xticks ( (0,10,20,30) ) pl.setp ( (a,b), xlabel='Stimulus intensity' ) a.set_ylabel ( 'Probability correct [\%]' ) b.set_ylabel ( 'Error rate exp. [\%]' ) b.set_xticks ( (0,10,20,30) ) b.yaxis.set_major_locator ( tckr ( density=2, figure=fig, which=1 ) ) b.yaxis.set_major_formatter ( prcformatter ) b.xaxis.set_major_formatter ( myformatter ) if observer in ['KP','sim_KP','sim_KP_nh']: b.set_ylim ( 0, .35 ) if observer in ['pk']: pl.setp ( (a,b), xlim=(-.1,30.1) ) # figure 3 C textfile.write ( "Figure 3C:\n" ) z0 = 0 C = statistics.EvaluationCollector ( M ) ewh = C(results['model_w_hist']) enh = C(results['model_nohist']) hf0 = M.hf0 # perm = results['permutation_wh'] # # TODO: These indices have to be adapted to the revised collector # thresholds_wh = pl.array([C.get_thres ( perm[i,13+hf0:13+2*hf0], perm[i,12+hf0], perm[i,9:12], p=0.75 ) \ # for i in xrange ( 2000 )]) # perm = results['permutation_nh'] # thresholds_nh = pl.array([C.get_thres ( perm[i,13+hf0:13+2*hf0], perm[i,12+hf0], perm[i,9:12], p=0.75 ) \ # for i in xrange ( 2000 )]) if thlev == .75: thind = 11 elif thlev == .85: thind = 10+hf0 else: raise ValueError for condition in xrange ( 1, M.hf0 ): s_wh = results['permutation_wh'][:,thind+condition] s_nh = results['permutation_nh'][:,thind+condition] # s_wh = thresholds_wh[:,condition] # s_nh = thresholds_nh[:,condition] s_ratio = s_wh/s_nh s_ratio_obs = ewh[thind+condition]/enh[thind+condition] # s_ratio_obs = results['model_w_hist'].w[condition]/results['model_nohist'].w[condition] z = (s_ratio_obs-pl.mean(s_ratio))/pl.std(s_ratio) cpe = pl.mean ( s_ratio < s_ratio_obs ) ci = pl.prctile ( s_ratio, (2.5,97.5) ) if z < z0 and ci[1]-ci[0] > 0: c0 = condition s_ratio_ = s_ratio s_ratio_obs_ = s_ratio_obs ci_ = ci textfile.write ( "Condition %d\n th75_ratio = %g\n cpe = %g\n percentiles of Null-Distribution: %g, %g\n" % \ (condition,s_ratio_obs,cpe,ci[0],ci[1]) ) try: print "Using condition %d for figure 3C" % (c0,) except: c0 = 1 s_ratio_ = s_ratio s_ratio_obs_ = s_ratio_obs ci_ = ci hist,bins = pl.histogram ( s_ratio_ ) c.bar ( bins[:-1], hist, pl.diff ( bins ), edgecolor=graphics.histogram_color, facecolor=graphics.histogram_color ) yrange = c.get_ylim () # c.plot ( [1]*2, yrange, 'k:' ) if s_ratio_obs<ci_[0]: c.plot ( [s_ratio_obs_]*2, (yrange[0],yrange[0]+0.85*(yrange[1]-yrange[0])), linewidth=2, color=graphics.observed_color ) c.plot ( [s_ratio_obs_], [yrange[0]+0.95*(yrange[1]-yrange[0])], '*', color=graphics.observed_color ) else: c.plot ( [s_ratio_obs_]*2, yrange, linewidth=2, color=graphics.observed_color ) c.plot ( [ci_[0]]*2, yrange, color=graphics.C95_color ) c.plot ( [ci_[1]]*2, yrange, color=graphics.C95_color ) c.set_ylim ( *yrange ) c.set_xlabel ( r'Threshold ratio' ) c.xaxis.set_major_formatter ( myformatter ) c.xaxis.set_major_formatter ( myformatter ) # c.text ( .7, 0.7, r"$\frac{\theta_\mathrm{h}}{\theta_0}$", # transform=c.transAxes ) # c.set_xlim ( trimmed_hlim ( s_ratio_, s_ratio_obs_ ) ) # c.xaxis.set_major_locator ( tckr ( density=0.4, figure=fig, which=0 ) ) c.set_xlim ( .99, 1.01 ) # c.xaxis.set_ticks ( (.95,1) ) # c.set_xlim ( .85, 1.05 ) c.xaxis.set_ticks ( (.99,1.,1.01) ) # figure 3 D l_wh = 0.5*results['permutation_wh'][:,[9,10]].sum(1) l_nh = 0.5*results['permutation_nh'][:,[9,10]].sum(1) l_ratio = l_wh-l_nh l_ratio_obs = results['model_w_hist'].pi[[0,1]].sum()-results['model_nohist'].pi[[0,1]].sum() cpe = pl.mean ( l_ratio < l_ratio_obs ) ci = pl.prctile ( l_ratio, (2.5,97.5) ) textfile.write ( "Figure 3D:\n lapse_ratio = %g\n cpe = %g\n percentiles of Null-distribution: %g, %g\n lapse_rate (w hist) = %g\n lapse_rate (no hist) = %g\n" % \ (l_ratio_obs,cpe,ci[0],ci[1],results['model_w_hist'].pi[[0,1]].sum(),results['model_nohist'].pi[[0,1]].sum()) ) d = graphics.prepare_axes ( d, haveon=('bottom',) ) # hist,bins = pl.histogram ( l_ratio ) hist,bins = pl.histogram ( l_ratio, bins=good_lapse_bins ( l_ratio ) ) # hist,bins = pl.histogram ( l_ratio, bins=pl.mgrid[-.0001:.0001:20j] ) d.bar ( bins[:-1], hist, pl.diff(bins), edgecolor=graphics.histogram_color, facecolor=graphics.histogram_color, zorder=0 ) yrange = d.get_ylim () # d.plot ( [1]*2, yrange, 'k:' ) if l_ratio_obs < ci[0] or l_ratio_obs > ci[1]: d.plot ( [l_ratio_obs]*2, [yrange[0], yrange[0]+0.85*(yrange[1]-yrange[0])], linewidth=2, color=graphics.observed_color) d.plot ( [l_ratio_obs], [yrange[0]+0.95*(yrange[1]-yrange[0])], '*', color=graphics.observed_color) else: print "lrobs",l_ratio_obs d.plot ( [l_ratio_obs]*2, yrange, color=graphics.observed_color, zorder=2) d.plot ([ci[0]]*2, yrange, color=graphics.C95_color, zorder=1 ) d.plot ([ci[1]]*2, yrange, color=graphics.C95_color, zorder=1 ) d.set_ylim ( yrange ) d.set_xlabel ( r'Asymptote difference' ) # d.text ( .7, 0.7, r"$\frac{\lambda_\mathrm{h}}{\lambda_0}$", # transform=d.transAxes ) # d.set_xlim ( trimmed_hlim ( l_ratio, l_ratio_obs, (0,5) ) ) d.set_xlim ( -.003, .001 ) d.xaxis.set_major_locator ( tckr ( density=0.4, figure=fig, which=0 ) ) d.xaxis.set_ticks ( (-.002,0) ) # d.set_xlim ( (.75, 1.25) ) d.xaxis.set_major_formatter ( myformatter ) a.set_ylim ( .49, 1.01 ) pl.savefig ( "figures/%s3.pdf" % ( figname, ) ) pl.savefig ( "figures/%s3.eps" % ( figname, ) )
def do_fontsize(k): return float(clip(max_text_size*sqrt(data[k]),\ min_text_size,max_text_size))
def mu_interval(mu_age=mu_age, theta=theta, age_mid=pl.array(age_mid, dtype=int), age_width=pl.array(age_width, dtype=float)): return mu_age.take(pl.clip(age_mid, ages[0], ages[-1]) - ages[0]) + theta*age_width
def log_add(x, y): return where( abs(x - y) > 10, maximum(x, y), log(exp(clip(x - y, -20, 20)) + 1) + y)
def consistent(model, reference_area='all', reference_sex='total', reference_year='all', priors={}, zero_re=True, rate_type='neg_binom'): """ Generate PyMC objects for consistent model of epidemological data :Parameters: - `model` : data.ModelData - `data_type` : str, one of 'i', 'r', 'f', 'p', or 'pf' - `root_area, root_sex, root_year` : the node of the model to fit consistently - `priors` : dictionary, with keys for data types for lists of priors on age patterns - `zero_re` : boolean, change one stoch from each set of siblings in area hierarchy to a 'sum to zero' deterministic - `rate_type` : str or dict, optional. One of 'beta_binom', 'binom', 'log_normal_model', 'neg_binom', 'neg_binom_lower_bound_model', 'neg_binom_model', 'normal_model', 'offest_log_normal', or 'poisson', optionally as a dict, with keys i, r, f, p, m_with :Results: - Returns dict of dicts of PyMC objects, including 'i', 'p', 'r', 'f', the covariate adjusted predicted values for each row of data .. note:: - dict priors can contain keys (t, 'mu') and (t, 'sigma') to tell the consistent model about the priors on levels for the age-specific rate of type t (these are arrays for mean and standard deviation a priori for mu_age[t] - it can also contain dicts keyed by t alone to insert empirical priors on the fixed effects and random effects """ # TODO: refactor the way priors are handled # current approach is much more complicated than necessary for t in 'i r pf p rr f'.split(): if t in priors: model.parameters[t]['random_effects'].update( priors[t]['random_effects']) model.parameters[t]['fixed_effects'].update( priors[t]['fixed_effects']) # if rate_type is a string, make it into a dict if type(rate_type) == str: rate_type = dict(i=rate_type, r=rate_type, f=rate_type, p=rate_type, m_with=rate_type) if 'm_with' not in rate_type.keys(): rate_type['m_with'] = 'neg_binom' if 'i' not in rate_type.keys(): rate_type['i'] = 'neg_binom' if 'r' not in rate_type.keys(): rate_type['r'] = 'neg_binom' if 'f' not in rate_type.keys(): rate_type['f'] = 'neg_binom' rate = {} ages = model.parameters['ages'] for t in 'irf': rate[t] = age_specific_rate( model, t, reference_area, reference_sex, reference_year, mu_age=None, mu_age_parent=priors.get((t, 'mu')), sigma_age_parent=priors.get((t, 'sigma')), zero_re=zero_re, rate_type=rate_type[t] )[t] # age_specific_rate()[t] is to create proper nesting of dict # set initial values from data if t in priors: if isinstance(priors[t], mc.Node): initial = priors[t].value else: initial = pl.array(priors[t]) else: initial = rate[t]['mu_age'].value.copy() df = model.get_data(t) if len(df.index) > 0: mean_data = df.groupby(['age_start', 'age_end']).mean().delevel() for i, row in mean_data.T.iteritems(): start = row['age_start'] - rate[t]['ages'][0] end = row['age_end'] - rate[t]['ages'][0] initial[start:end] = row['value'] for i, k in enumerate(rate[t]['knots']): rate[t]['gamma'][i].value = pl.log(initial[k - rate[t]['ages'][0]] + 1.e-9) m_all = .01 * pl.ones(101) df = model.get_data('m_all') if len(df.index) == 0: print 'WARNING: all-cause mortality data not found, using m_all = .01' else: mean_mortality = df.groupby(['age_start', 'age_end']).mean().delevel() knots = [] for i, row in mean_mortality.T.iteritems(): knots.append( pl.clip((row['age_start'] + row['age_end'] + 1.) / 2., 0, 100)) m_all[knots[-1]] = row['value'] # extend knots as constant beyond endpoints knots = sorted(knots) m_all[0] = m_all[knots[0]] m_all[100] = m_all[knots[-1]] knots.insert(0, 0) knots.append(100) m_all = scipy.interpolate.interp1d(knots, m_all[knots], kind='linear')(pl.arange(101)) m_all = m_all[ages] logit_C0 = mc.Uniform('logit_C0', -15, 15, value=-10.) # use Runge-Kutta 4 ODE solver import dismod_ode N = len(m_all) num_step = 10 # double until it works ages = pl.array(ages, dtype=float) fun = dismod_ode.ode_function(num_step, ages, m_all) @mc.deterministic def mu_age_p(logit_C0=logit_C0, i=rate['i']['mu_age'], r=rate['r']['mu_age'], f=rate['f']['mu_age']): # for acute conditions, it is silly to use ODE solver to # derive prevalence, and it can be approximated with a simple # transformation of incidence if r.min() > 5.99: return i / (r + m_all + f) C0 = mc.invlogit(logit_C0) x = pl.hstack((i, r, f, 1 - C0, C0)) y = fun.forward(0, x) susceptible = y[:N] condition = y[N:] p = condition / (susceptible + condition) p[pl.isnan(p)] = 0. return p p = age_specific_rate(model, 'p', reference_area, reference_sex, reference_year, mu_age_p, mu_age_parent=priors.get(('p', 'mu')), sigma_age_parent=priors.get(('p', 'sigma')), zero_re=zero_re, rate_type=rate_type['p'])['p'] @mc.deterministic def mu_age_pf(p=p['mu_age'], f=rate['f']['mu_age']): return p * f pf = age_specific_rate(model, 'pf', reference_area, reference_sex, reference_year, mu_age_pf, mu_age_parent=priors.get(('pf', 'mu')), sigma_age_parent=priors.get(('pf', 'sigma')), lower_bound='csmr', include_covariates=False, zero_re=zero_re)['pf'] @mc.deterministic def mu_age_m(pf=pf['mu_age'], m_all=m_all): return (m_all - pf).clip(1.e-6, 1.e6) rate['m'] = age_specific_rate(model, 'm_wo', reference_area, reference_sex, reference_year, mu_age_m, None, None, include_covariates=False, zero_re=zero_re)['m_wo'] @mc.deterministic def mu_age_rr(m=rate['m']['mu_age'], f=rate['f']['mu_age']): return (m + f) / m rr = age_specific_rate(model, 'rr', reference_area, reference_sex, reference_year, mu_age_rr, mu_age_parent=priors.get(('rr', 'mu')), sigma_age_parent=priors.get(('rr', 'sigma')), rate_type='log_normal', include_covariates=False, zero_re=zero_re)['rr'] @mc.deterministic def mu_age_smr(m=rate['m']['mu_age'], f=rate['f']['mu_age'], m_all=m_all): return (m + f) / m_all smr = age_specific_rate(model, 'smr', reference_area, reference_sex, reference_year, mu_age_smr, mu_age_parent=priors.get(('smr', 'mu')), sigma_age_parent=priors.get(('smr', 'sigma')), rate_type='log_normal', include_covariates=False, zero_re=zero_re)['smr'] @mc.deterministic def mu_age_m_with(m=rate['m']['mu_age'], f=rate['f']['mu_age']): return m + f m_with = age_specific_rate(model, 'm_with', reference_area, reference_sex, reference_year, mu_age_m_with, mu_age_parent=priors.get(('m_with', 'mu')), sigma_age_parent=priors.get( ('m_with', 'sigma')), include_covariates=False, zero_re=zero_re, rate_type=rate_type['m_with'])['m_with'] # duration = E[time in bin C] @mc.deterministic def mu_age_X(r=rate['r']['mu_age'], m=rate['m']['mu_age'], f=rate['f']['mu_age']): hazard = r + m + f pr_not_exit = pl.exp(-hazard) X = pl.empty(len(hazard)) X[-1] = 1 / hazard[-1] for i in reversed(range(len(X) - 1)): X[i] = pr_not_exit[i] * (X[i + 1] + 1) + 1 / hazard[i] * ( 1 - pr_not_exit[i]) - pr_not_exit[i] return X X = age_specific_rate(model, 'X', reference_area, reference_sex, reference_year, mu_age_X, mu_age_parent=priors.get(('X', 'mu')), sigma_age_parent=priors.get(('X', 'sigma')), rate_type='normal', include_covariates=True, zero_re=zero_re)['X'] vars = rate vars.update(logit_C0=logit_C0, p=p, pf=pf, rr=rr, smr=smr, m_with=m_with, X=X) return vars
def sumouter(us, vs, lo=-1.0, hi=1.0, out=None): result = out or zeros((len(us[0]), len(vs[0]))) for u, v in zip(us, vs): result += outer(clip(u, lo, hi), v) return result
def sumprod(us, vs, lo=-1.0, hi=1.0, out=None): assert len(us[0]) == len(vs[0]) result = out or zeros(len(us[0])) for u, v in zip(us, vs): result += clip(u, lo, hi) * v return result
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) fname = pcgts.get_Page().imageFilename img = self.workspace.resolve_image_as_pil(fname) print_info("# %s" % (fname)) raw = ocrolib.read_image_gray(img.filename) self.dshow(raw, "input") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): print_info("# image is empty: %s" % (fname)) return image /= amax(image) if not self.parameter['nocheck']: check = self.check_page(amax(image) - image) if check is not None: print_error(fname + " SKIPPED. " + check + " (use -n to disable this check)") return # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel print_info("flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones( (int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones( (1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image print_info("rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) print_info("%s lo-hi (%.2f %.2f) %s" % (fname, lo, hi, comment)) print_info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) base, _ = ocrolib.allsplitext(img.filename) ocrolib.write_image_binary(base + ".bin.png", binarized) # ocrolib.write_image_gray(base +".nrm.png", flat) # print("########### File path : ", base+".nrm.png") # write_to_xml(base+".bin.png") # return base+".bin.png" ID = concat_padded(self.output_file_grp, n) self.workspace.add_file(ID=ID, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype="image/png", url=base + ".bin.png", local_filename='%s/%s' % (self.output_file_grp, ID), content=to_xml(pcgts).encode('utf-8'))
def _input_data_from_gbd_json(dm, covs): """ translate input data""" import dismod3 # remove any rows with 'ignore' columns set to 1 dm["data"] = [d for d in dm["data"] if not (d.get("Ignore") or d.get("ignore"))] # remove any data with type-specific heterogeneity set to Unusable if "global_priors" in dm["params"]: for t in dm["params"]["global_priors"]["heterogeneity"]: if dm["params"]["global_priors"]["heterogeneity"][t] == "Unusable": print "%s has heterogeneity unusable, dropping %d rows" % ( t, len([d for d in dm["data"] if d["data_type"] == t + " data"]), ) dm["data"] = [d for d in dm["data"] if d["data_type"] != t + " data"] input_data = {} for field in "effective_sample_size age_start age_end year_start year_end".split(): input_data[field] = [] for row in dm["data"]: val = row.get(field, "") if val == "": val = pl.nan input_data[field].append(float(val)) input_data["sex"] = [] for row in dm["data"]: input_data["sex"].append(row["sex"]) # replace sex 'all' with sex 'total' if input_data["sex"][-1] == "all": input_data["sex"][-1] = "total" assert input_data["sex"][-1] != "" new_type_name = { "incidence data": "i", "prevalence data": "p", "remission data": "r", "excess-mortality data": "f", "prevalence x excess-mortality data": "pf", "all-cause mortality data": "m_all", "relative-risk data": "rr", "duration data": "X", "smr data": "smr", "cause-specific mortality data": "csmr", "mortality data": "m_with", } input_data["data_type"] = [new_type_name[row["data_type"]] for row in dm["data"]] for field in "value standard_error lower_ci upper_ci".split(): input_data[field] = [] for row in dm["data"]: val = row.get(field, "") if val == "": val = pl.nan else: val = float(val) / float(row.get("units", "1").replace(",", "")) input_data[field].append(val) input_data["area"] = [] for row in dm["data"]: val = row.get("country_iso3_code", "") if val == "" or val == "all": val = dismod3.utils.clean(row["gbd_region"]) input_data["area"].append(val) assert input_data["area"][-1] != "" input_data["age_weights"] = [ ";".join(["%.4f" % w for w in row.get("age_weights", [])]) for row in dm["data"] ] # store age_weights as semi-colon delimited text, since Pandas doesn't like arrays in arrays and doesn't save comma-separated fields correctly # add selected covariates if "covariates" in dm["params"]: for level in ["Country_level", "Study_level"]: for cv in dm["params"]["covariates"].get(level, []): if dm["params"]["covariates"][level][cv]["rate"]["value"]: input_data["x_%s" % cv] = [] for row in dm["data"]: if level == "Country_level": if row["data_type"] == "all-cause mortality data": input_data["x_%s" % cv].append( 0.0 ) # don't bother to merge covariates into all-cause mortality data elif row["region"] == "all": input_data["x_%s" % cv].append( 0.0 ) # don't bother to merge covariates into regionall data elif row.get("country_iso3_code"): iso3 = row["country_iso3_code"] # special case for countries that CODEm does not report on if "ASDR" in cv: if iso3 in ["HKG", "MAC"]: iso3 = "TWN" # TODO: average over CHN, PRK, TWN if iso3 in ["PRI", "BMU"]: iso3 = "CUB" # TODO: average over caribbean countries input_data["x_%s" % cv].append( covs[cv][ iso3, row["sex"], pl.clip((row["year_start"] + row["year_end"]) / 2, 1980.0, 2012.0), ] ) else: # handle regional data df = covs[ (covs["region"] == dismod3.utils.clean(row["gbd_region"])) & (covs.index.get_level_values(1) == row["sex"]) & ( covs.index.get_level_values(2) == pl.clip((row["year_start"] + row["year_end"]) / 2, 1980.0, 2012.0) ) ] # input_data['x_%s'%cv].append( # (df[cv]*df['pop']).sum() / df['pop'].sum() # ) input_data["x_%s" % cv].append(0.0) # TODO: remove regional data elif level == "Study_level": input_data["x_%s" % cv].append(float(row.get(dismod3.utils.clean(cv), "") or 0.0)) # also include column of input data for 'z_%s'%cv if it is requested if dm["params"]["covariates"][level][cv]["error"]["value"]: input_data["z_%s" % cv] = [ float(row.get(dismod3.utils.clean(cv), "") or 0.0) for row in dm["data"] ] input_data = pandas.DataFrame(input_data) # replace age_end 1 with age_end 0, correcting a common mistake in data entry i = (input_data["age_start"] == 0) & (input_data["age_end"] == 1) if i.sum() > 0: print 'WARNING: correcting age_end in %d rows that have age_start == 0, age_end == 1 (old format uses "demographic" notation)' % i.sum() input_data["age_end"][i] = 0 # replace triple underscores with single underscore, a problem with consistency in the spacing in "North Africa / Middle East" input_data["area"] = [a.replace("___", "_") for a in input_data["area"]] # print checks of data for i, row in input_data.T.iteritems(): if pl.isnan(row["value"]): print "WARNING: value in row %d is missing" % i input_data = input_data[~pl.isnan(input_data["value"])] return input_data
from matplotlib.toolkits.basemap import Basemap, cm import pylab, copy from matplotlib import rcParams # make tick labels smaller rcParams['xtick.labelsize'] = 9 rcParams['ytick.labelsize'] = 9 # plot rainfall from NWS using special precipitation # colormap used by the NWS, and included in basemap. nc = NetCDFFile('nws_precip_conus_20061222.nc') # data from http://www.srh.noaa.gov/rfcshare/precip_analysis_new.php prcpvar = nc.variables['amountofprecip'] data = 0.01 * prcpvar[:] data = pylab.clip(data, 0, 10000) latcorners = nc.variables['lat'][:] loncorners = -nc.variables['lon'][:] plottitle = prcpvar.long_name + ' for period ending ' + prcpvar.dateofdata print data.min(), data.max() print latcorners print loncorners print plottitle print data.shape lon_0 = -nc.variables['true_lon'].getValue() lat_0 = nc.variables['true_lat'].getValue() # create polar stereographic Basemap instance. m = Basemap(projection='stere',lon_0=lon_0,lat_0=90.,lat_ts=lat_0,\ llcrnrlat=latcorners[0],urcrnrlat=latcorners[2],\ llcrnrlon=loncorners[0],urcrnrlon=loncorners[2],\ rsphere=6371200.,resolution='l',area_thresh=10000)
def one_compartment_ode(S, t, h_b, h_m): # piecewise-constant functions of time implementend as array t = int(pl.clip(t, 0, len(h_b)-1)) return (h_b[t]-h_m[t])*S
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) fname = pcgts.get_Page().imageFilename img = self.workspace.resolve_image_as_pil(fname) param = self.parameter base, _ = ocrolib.allsplitext(fname) #basefile = ocrolib.allsplitext(os.path.basename(fpath))[0] if param['parallel'] < 2: print_info("=== %s " % (fname)) raw = ocrolib.read_image_gray(img.filename) flat = raw #flat = np.array(binImg) # estimate skew angle and rotate if param['maxskew'] > 0: if param['parallel'] < 2: print_info("estimating skew angle") d0, d1 = flat.shape o0, o1 = int(param['bignore'] * d0), int(param['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = param['maxskew'] ms = int(2 * param['maxskew'] * param['skewsteps']) angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # self.write_angles_to_pageXML(base,angle) # estimate low and high thresholds if param['parallel'] < 2: print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(param['bignore'] * d0), int(param['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if param['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = param['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones( (int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones( (1, int(e * 50)))) if param['debug'] > 0: imshow(v) ginput(1, param['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), param['lo']) hi = stats.scoreatpercentile(est.ravel(), param['hi']) # rescale the image to get the gray scale image if param['parallel'] < 2: print_info("rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if param['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, param['debug']) deskewed = 1 * (flat > param['threshold']) # output the normalized grayscale and the thresholded images print_info("%s lo-hi (%.2f %.2f) angle %4.1f" % (pcgts.get_Page().imageFilename, lo, hi, angle)) if param['parallel'] < 2: print_info("writing") ocrolib.write_image_binary(base + ".ds.png", deskewed) orientation = -angle orientation = 180 - (180 - orientation) % 360 pcgts.get_Page().set_orientation(orientation) ID = concat_padded(self.output_file_grp, n) self.workspace.add_file(ID=ID, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype="image/png", url=base + ".ds.png", local_filename='%s/%s' % (self.output_file_grp, ID), content=to_xml(pcgts).encode('utf-8'))
def mu_interval(mu_age=mu_age, age_mid=pl.array(age_mid, dtype=int)): return mu_age.take(pl.clip(age_mid, ages[0], ages[-1]) - ages[0])
def run(self, fpath, job): param = self.param base, _ = ocrolib.allsplitext(fpath) basefile = ocrolib.allsplitext(os.path.basename(fpath))[0] if param['parallel'] < 2: print_info("=== %s %-3d" % (fpath, job)) raw = ocrolib.read_image_gray(fpath) flat = raw # estimate skew angle and rotate if param['maxskew'] > 0: if param['parallel'] < 2: print_info("estimating skew angle") d0, d1 = flat.shape o0, o1 = int(param['bignore']*d0), int(param['bignore']*d1) flat = amax(flat)-flat flat -= amin(flat) est = flat[o0:d0-o0, o1:d1-o1] ma = param['maxskew'] ms = int(2*param['maxskew']*param['skewsteps']) angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms+1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat)-flat else: angle = 0 # estimate low and high thresholds if param['parallel'] < 2: print_info("estimating thresholds") d0, d1 = flat.shape o0, o1 = int(param['bignore']*d0), int(param['bignore']*d1) est = flat[o0:d0-o0, o1:d1-o1] if param['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = param['escale'] v = est-filters.gaussian_filter(est, e*20.0) v = filters.gaussian_filter(v**2, e*20.0)**0.5 v = (v > 0.3*amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e*50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e*50)))) if param['debug'] > 0: imshow(v) ginput(1, param['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), param['lo']) hi = stats.scoreatpercentile(est.ravel(), param['hi']) # rescale the image to get the gray scale image if param['parallel'] < 2: print_info("rescaling") flat -= lo flat /= (hi-lo) flat = clip(flat, 0, 1) if param['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, param['debug']) deskewed = 1*(flat > param['threshold']) # output the normalized grayscale and the thresholded images print_info("%s lo-hi (%.2f %.2f) angle %4.1f" % (basefile, lo, hi, angle)) if param['parallel'] < 2: print_info("writing") ocrolib.write_image_binary(base+".ds.png", deskewed) return base+".ds.png"
def _process_segment(self, page, filename, page_id, file_id): raw = ocrolib.read_image_gray(filename) self.dshow(raw, "input") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): LOG.info("# image is empty: %s" % (page_id)) return image /= amax(image) if not self.parameter['nocheck']: check = self.check_page(amax(image) - image) if check is not None: LOG.error(input_file.pageId or input_file.ID + " SKIPPED. " + check + " (use -n to disable this check)") return # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel LOG.info("Flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment)) LOG.info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) #base, _ = ocrolib.allsplitext(filename) #ocrolib.write_image_binary(base + ".bin.png", binarized) # ocrolib.write_image_gray(base +".nrm.png", flat) # print("########### File path : ", base+".nrm.png") # write_to_xml(base+".bin.png") # return base+".bin.png" bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B') bin_image = ocrolib.array2pil(bin_array) file_path = self.workspace.save_image_file(bin_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comment="binarized"))
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID page = pcgts.get_Page() # why does it save the image ?? page_image, page_xywh, _ = self.workspace.image_from_page( page, page_id) if self.parameter['parallel'] < 2: LOG.info("INPUT FILE %s ", input_file.pageId or input_file.ID) raw = ocrolib.read_image_gray(page_image.filename) flat = raw #flat = np.array(binImg) # estimate skew angle and rotate if self.parameter['maxskew'] > 0: if self.parameter['parallel'] < 2: LOG.info("Estimating Skew Angle") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = self.parameter['maxskew'] ms = int(2 * self.parameter['maxskew'] * self.parameter['skewsteps']) angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # self.write_angles_to_pageXML(base,angle) # estimate low and high thresholds if self.parameter['parallel'] < 2: LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones( (int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones( (1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image if self.parameter['parallel'] < 2: LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) deskewed = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" % (pcgts.get_Page().imageFilename, lo, hi, angle)) if self.parameter['parallel'] < 2: LOG.info("Writing") #ocrolib.write_image_binary(base+".ds.png", deskewed) #TODO: Need some clarification as the results effect the following pre-processing steps. #orientation = -angle #orientation = 180 - ((180 - orientation) % 360) pcgts.get_Page().set_orientation(angle) #print(orientation, angle) file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp) if file_id == input_file.ID: file_id = concat_padded(self.output_file_grp, n) self.workspace.add_file(ID=file_id, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype=MIMETYPE_PAGE, local_filename=os.path.join( self.output_file_grp, file_id + '.xml'), content=to_xml(pcgts).encode('utf-8'))
from matplotlib.toolkits.basemap import Basemap, cm import pylab, copy from matplotlib import rcParams # make tick labels smaller rcParams['xtick.labelsize']=9 rcParams['ytick.labelsize']=9 # plot rainfall from NWS using special precipitation # colormap used by the NWS, and included in basemap. nc = NetCDFFile('nws_precip_conus_20061222.nc') # data from http://www.srh.noaa.gov/rfcshare/precip_analysis_new.php prcpvar = nc.variables['amountofprecip'] data = 0.01*prcpvar[:] data = pylab.clip(data,0,10000) latcorners = nc.variables['lat'][:] loncorners = -nc.variables['lon'][:] plottitle = prcpvar.long_name+' for period ending '+prcpvar.dateofdata print data.min(), data.max() print latcorners print loncorners print plottitle print data.shape lon_0 = -nc.variables['true_lon'].getValue() lat_0 = nc.variables['true_lat'].getValue() # create polar stereographic Basemap instance. m = Basemap(projection='stere',lon_0=lon_0,lat_0=90.,lat_ts=lat_0,\ llcrnrlat=latcorners[0],urcrnrlat=latcorners[2],\ llcrnrlon=loncorners[0],urcrnrlon=loncorners[2],\ rsphere=6371200.,resolution='l',area_thresh=10000)
def consistent( model, reference_area="all", reference_sex="total", reference_year="all", priors={}, zero_re=True, rate_type="neg_binom", ): """ Generate PyMC objects for consistent model of epidemological data :Parameters: - `model` : data.ModelData - `data_type` : str, one of 'i', 'r', 'f', 'p', or 'pf' - `root_area, root_sex, root_year` : the node of the model to fit consistently - `priors` : dictionary, with keys for data types for lists of priors on age patterns - `zero_re` : boolean, change one stoch from each set of siblings in area hierarchy to a 'sum to zero' deterministic - `rate_type` : str or dict, optional. One of 'beta_binom', 'binom', 'log_normal_model', 'neg_binom', 'neg_binom_lower_bound_model', 'neg_binom_model', 'normal_model', 'offest_log_normal', or 'poisson', optionally as a dict, with keys i, r, f, p, m_with :Results: - Returns dict of dicts of PyMC objects, including 'i', 'p', 'r', 'f', the covariate adjusted predicted values for each row of data .. note:: - dict priors can contain keys (t, 'mu') and (t, 'sigma') to tell the consistent model about the priors on levels for the age-specific rate of type t (these are arrays for mean and standard deviation a priori for mu_age[t] - it can also contain dicts keyed by t alone to insert empirical priors on the fixed effects and random effects """ # TODO: refactor the way priors are handled # current approach is much more complicated than necessary for t in "i r pf p rr f".split(): if t in priors: model.parameters[t]["random_effects"].update(priors[t]["random_effects"]) model.parameters[t]["fixed_effects"].update(priors[t]["fixed_effects"]) # if rate_type is a string, make it into a dict if type(rate_type) == str: rate_type = dict(i=rate_type, r=rate_type, f=rate_type, p=rate_type, m_with=rate_type) if "m_with" not in rate_type.keys(): rate_type["m_with"] = "neg_binom" if "i" not in rate_type.keys(): rate_type["i"] = "neg_binom" if "r" not in rate_type.keys(): rate_type["r"] = "neg_binom" if "f" not in rate_type.keys(): rate_type["f"] = "neg_binom" rate = {} ages = model.parameters["ages"] for t in "irf": rate[t] = age_specific_rate( model, t, reference_area, reference_sex, reference_year, mu_age=None, mu_age_parent=priors.get((t, "mu")), sigma_age_parent=priors.get((t, "sigma")), zero_re=zero_re, rate_type=rate_type[t], )[ t ] # age_specific_rate()[t] is to create proper nesting of dict # set initial values from data if t in priors: if isinstance(priors[t], mc.Node): initial = priors[t].value else: initial = pl.array(priors[t]) else: initial = rate[t]["mu_age"].value.copy() df = model.get_data(t) if len(df.index) > 0: mean_data = df.groupby(["age_start", "age_end"]).mean().delevel() for i, row in mean_data.T.iteritems(): start = row["age_start"] - rate[t]["ages"][0] end = row["age_end"] - rate[t]["ages"][0] initial[start:end] = row["value"] for i, k in enumerate(rate[t]["knots"]): rate[t]["gamma"][i].value = pl.log(initial[k - rate[t]["ages"][0]] + 1.0e-9) m_all = 0.01 * pl.ones(101) df = model.get_data("m_all") if len(df.index) == 0: print "WARNING: all-cause mortality data not found, using m_all = .01" else: mean_mortality = df.groupby(["age_start", "age_end"]).mean().delevel() knots = [] for i, row in mean_mortality.T.iteritems(): knots.append(pl.clip((row["age_start"] + row["age_end"] + 1.0) / 2.0, 0, 100)) m_all[knots[-1]] = row["value"] # extend knots as constant beyond endpoints knots = sorted(knots) m_all[0] = m_all[knots[0]] m_all[100] = m_all[knots[-1]] knots.insert(0, 0) knots.append(100) m_all = scipy.interpolate.interp1d(knots, m_all[knots], kind="linear")(pl.arange(101)) m_all = m_all[ages] logit_C0 = mc.Uniform("logit_C0", -15, 15, value=-10.0) # use Runge-Kutta 4 ODE solver import dismod_ode N = len(m_all) num_step = 10 # double until it works ages = pl.array(ages, dtype=float) fun = dismod_ode.ode_function(num_step, ages, m_all) @mc.deterministic def mu_age_p(logit_C0=logit_C0, i=rate["i"]["mu_age"], r=rate["r"]["mu_age"], f=rate["f"]["mu_age"]): # for acute conditions, it is silly to use ODE solver to # derive prevalence, and it can be approximated with a simple # transformation of incidence if r.min() > 5.99: return i / (r + m_all + f) C0 = mc.invlogit(logit_C0) x = pl.hstack((i, r, f, 1 - C0, C0)) y = fun.forward(0, x) susceptible = y[:N] condition = y[N:] p = condition / (susceptible + condition) p[pl.isnan(p)] = 0.0 return p p = age_specific_rate( model, "p", reference_area, reference_sex, reference_year, mu_age_p, mu_age_parent=priors.get(("p", "mu")), sigma_age_parent=priors.get(("p", "sigma")), zero_re=zero_re, rate_type=rate_type["p"], )["p"] @mc.deterministic def mu_age_pf(p=p["mu_age"], f=rate["f"]["mu_age"]): return p * f pf = age_specific_rate( model, "pf", reference_area, reference_sex, reference_year, mu_age_pf, mu_age_parent=priors.get(("pf", "mu")), sigma_age_parent=priors.get(("pf", "sigma")), lower_bound="csmr", include_covariates=False, zero_re=zero_re, )["pf"] @mc.deterministic def mu_age_m(pf=pf["mu_age"], m_all=m_all): return (m_all - pf).clip(1.0e-6, 1.0e6) rate["m"] = age_specific_rate( model, "m_wo", reference_area, reference_sex, reference_year, mu_age_m, None, None, include_covariates=False, zero_re=zero_re, )["m_wo"] @mc.deterministic def mu_age_rr(m=rate["m"]["mu_age"], f=rate["f"]["mu_age"]): return (m + f) / m rr = age_specific_rate( model, "rr", reference_area, reference_sex, reference_year, mu_age_rr, mu_age_parent=priors.get(("rr", "mu")), sigma_age_parent=priors.get(("rr", "sigma")), rate_type="log_normal", include_covariates=False, zero_re=zero_re, )["rr"] @mc.deterministic def mu_age_smr(m=rate["m"]["mu_age"], f=rate["f"]["mu_age"], m_all=m_all): return (m + f) / m_all smr = age_specific_rate( model, "smr", reference_area, reference_sex, reference_year, mu_age_smr, mu_age_parent=priors.get(("smr", "mu")), sigma_age_parent=priors.get(("smr", "sigma")), rate_type="log_normal", include_covariates=False, zero_re=zero_re, )["smr"] @mc.deterministic def mu_age_m_with(m=rate["m"]["mu_age"], f=rate["f"]["mu_age"]): return m + f m_with = age_specific_rate( model, "m_with", reference_area, reference_sex, reference_year, mu_age_m_with, mu_age_parent=priors.get(("m_with", "mu")), sigma_age_parent=priors.get(("m_with", "sigma")), include_covariates=False, zero_re=zero_re, rate_type=rate_type["m_with"], )["m_with"] # duration = E[time in bin C] @mc.deterministic def mu_age_X(r=rate["r"]["mu_age"], m=rate["m"]["mu_age"], f=rate["f"]["mu_age"]): hazard = r + m + f pr_not_exit = pl.exp(-hazard) X = pl.empty(len(hazard)) X[-1] = 1 / hazard[-1] for i in reversed(range(len(X) - 1)): X[i] = pr_not_exit[i] * (X[i + 1] + 1) + 1 / hazard[i] * (1 - pr_not_exit[i]) - pr_not_exit[i] return X X = age_specific_rate( model, "X", reference_area, reference_sex, reference_year, mu_age_X, mu_age_parent=priors.get(("X", "mu")), sigma_age_parent=priors.get(("X", "sigma")), rate_type="normal", include_covariates=True, zero_re=zero_re, )["X"] vars = rate vars.update(logit_C0=logit_C0, p=p, pf=pf, rr=rr, smr=smr, m_with=m_with, X=X) return vars
def one_compartment_ode(S, t, h_b, h_m): # piecewise-constant functions of time implementend as array t = int(pl.clip(t, 0, len(h_b) - 1)) return (h_b[t] - h_m[t]) * S
def binarize_image(job): image_object, i = job raw = read_image_gray(image_object) image = raw - amin(raw) if amax(image) == amin(image): return # Image is empty image /= amax(image) check = check_page(amax(image) - image) if check is not None: return if args.gray: extreme = 0 else: extreme = (sum(image < 0.05) + sum(image > 0.95)) * 1.0 / prod( image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" m = interpolation.zoom(image, args.zoom) m = filters.percentile_filter(m, args.perc, size=(args.range, 2)) m = filters.percentile_filter(m, args.perc, size=(2, args.range)) m = interpolation.zoom(m, 1.0 / args.zoom) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if args.maxskew > 0: d0, d1 = flat.shape o0, o1 = int(args.bignore * d0), int(args.bignore * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = args.maxskew ms = int(2 * args.maxskew * args.skewsteps) angle = estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 d0, d1 = flat.shape o0, o1 = int(args.bignore * d0), int(args.bignore * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if args.escale > 0: e = args.escale v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) est = est[v] lo = stats.scoreatpercentile(est.ravel(), args.lo) hi = stats.scoreatpercentile(est.ravel(), args.hi) flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) binary = 1 * (flat > args.threshold) return (binary, flat)
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): raw = ocrolib.pil2array(page_image) flat = raw.astype("float64") # estimate skew angle and rotate if self.parameter['maxskew'] > 0: if self.parameter['parallel'] < 2: LOG.info("Estimating Skew Angle") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = self.parameter['maxskew'] ms = int(2 * self.parameter['maxskew'] * self.parameter['skewsteps']) angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # self.write_angles_to_pageXML(base,angle) # estimate low and high thresholds if self.parameter['parallel'] < 2: LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image if self.parameter['parallel'] < 2: LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) deskewed = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images #LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" %(lo, hi, angle)) #TODO: Need some clarification as the results effect the following pre-processing steps. #orientation = -angle #orientation = 180 - ((180 - orientation) % 360) if angle is None: # FIXME: quick fix to prevent angle of "none" angle = 0 page.set_orientation(angle) page_xywh['features'] += ',deskewed' bin_array = array(255 * (deskewed > ocrolib.midrange(deskewed)), 'B') page_image = ocrolib.array2pil(bin_array) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(page_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def _input_data_from_gbd_json(dm, covs): """ translate input data""" import dismod3 # remove any rows with 'ignore' columns set to 1 dm['data'] = [ d for d in dm['data'] if not (d.get('Ignore') or d.get('ignore')) ] # remove any data with type-specific heterogeneity set to Unusable if 'global_priors' in dm['params']: for t in dm['params']['global_priors']['heterogeneity']: if dm['params']['global_priors']['heterogeneity'][ t] == 'Unusable': print '%s has heterogeneity unusable, dropping %d rows' % ( t, len([ d for d in dm['data'] if d['data_type'] == t + ' data' ])) dm['data'] = [ d for d in dm['data'] if d['data_type'] != t + ' data' ] input_data = {} for field in 'effective_sample_size age_start age_end year_start year_end'.split( ): input_data[field] = [] for row in dm['data']: val = row.get(field, '') if val == '': val = pl.nan input_data[field].append(float(val)) input_data['sex'] = [] for row in dm['data']: input_data['sex'].append(row['sex']) # replace sex 'all' with sex 'total' if input_data['sex'][-1] == 'all': input_data['sex'][-1] = 'total' assert input_data['sex'][-1] != '' new_type_name = { 'incidence data': 'i', 'prevalence data': 'p', 'remission data': 'r', 'excess-mortality data': 'f', 'prevalence x excess-mortality data': 'pf', 'all-cause mortality data': 'm_all', 'relative-risk data': 'rr', 'duration data': 'X', 'smr data': 'smr', 'cause-specific mortality data': 'csmr', 'mortality data': 'm_with' } input_data['data_type'] = [ new_type_name[row['data_type']] for row in dm['data'] ] for field in 'value standard_error lower_ci upper_ci'.split(): input_data[field] = [] for row in dm['data']: val = row.get(field, '') if val == '': val = pl.nan else: val = float(val) / float( row.get('units', '1').replace(',', '')) input_data[field].append(val) input_data['area'] = [] for row in dm['data']: val = row.get('country_iso3_code', '') if val == '' or val == 'all': val = dismod3.utils.clean(row['gbd_region']) input_data['area'].append(val) assert input_data['area'][-1] != '' input_data['age_weights'] = [ ';'.join(['%.4f' % w for w in row.get('age_weights', [])]) for row in dm['data'] ] # store age_weights as semi-colon delimited text, since Pandas doesn't like arrays in arrays and doesn't save comma-separated fields correctly # add selected covariates if 'covariates' in dm['params']: for level in ['Country_level', 'Study_level']: for cv in dm['params']['covariates'].get(level, []): if dm['params']['covariates'][level][cv]['rate']['value']: input_data['x_%s' % cv] = [] for row in dm['data']: if level == 'Country_level': if row['data_type'] == 'all-cause mortality data': input_data['x_%s' % cv].append( 0. ) # don't bother to merge covariates into all-cause mortality data elif row['region'] == 'all': input_data['x_%s' % cv].append( 0. ) # don't bother to merge covariates into regionall data elif row.get('country_iso3_code'): iso3 = row['country_iso3_code'] # special case for countries that CODEm does not report on if 'ASDR' in cv: if iso3 in ['HKG', 'MAC']: iso3 = 'TWN' # TODO: average over CHN, PRK, TWN if iso3 in ['PRI', 'BMU']: iso3 = 'CUB' # TODO: average over caribbean countries input_data['x_%s' % cv].append( covs[cv][iso3, row['sex'], pl.clip((row['year_start'] + row['year_end']) / 2, 1980., 2012.)]) else: # handle regional data df = covs[(covs['region'] == dismod3.utils. clean(row['gbd_region'])) & (covs.index.get_level_values(1) == row['sex']) & (covs.index.get_level_values(2) == pl.clip( (row['year_start'] + row['year_end']) / 2, 1980., 2012.))] #input_data['x_%s'%cv].append( # (df[cv]*df['pop']).sum() / df['pop'].sum() # ) input_data['x_%s' % cv].append( 0.) # TODO: remove regional data elif level == 'Study_level': input_data['x_%s' % cv].append( float( row.get(dismod3.utils.clean(cv), '') or 0.)) # also include column of input data for 'z_%s'%cv if it is requested if dm['params']['covariates'][level][cv]['error']['value']: input_data['z_%s' % cv] = [ float(row.get(dismod3.utils.clean(cv), '') or 0.) for row in dm['data'] ] input_data = pandas.DataFrame(input_data) # replace age_end 1 with age_end 0, correcting a common mistake in data entry i = (input_data['age_start'] == 0) & (input_data['age_end'] == 1) if i.sum() > 0: print 'WARNING: correcting age_end in %d rows that have age_start == 0, age_end == 1 (old format uses "demographic" notation)' % i.sum( ) input_data['age_end'][i] = 0 # replace triple underscores with single underscore, a problem with consistency in the spacing in "North Africa / Middle East" input_data['area'] = [ a.replace('___', '_') for a in input_data['area'] ] # print checks of data for i, row in input_data.T.iteritems(): if pl.isnan(row['value']): print 'WARNING: value in row %d is missing' % i input_data = input_data[~pl.isnan(input_data['value'])] return input_data