def get_ci(self, alpha=0.05, div=5): """Get upper- and lower-bounds (confidence interval) of normal distribution from xs stage_heights at 'div' evenly spaced intervals. 'alpha' - statistical alpha value 'div' - number of intervals""" ubounds = [] # upper-bounds lbounds = [] # lower-bounds def get_bounds(mean, stdev, alpha): z = norm.ppf((1 - alpha / 2), scale=stdev) # z, mean = 0, 2-sided ub = mean + z # upper bound lb = mean - z # lower bound return ub, lb interval = self.max_q_query / div ci_qvals = scipy.arange(0, (self.max_q_query + interval), interval) # [1:] for q in ci_qvals: other_stages = self.get_stage(q) # xs stages mean = scipy.average(other_stages) # mean of xs stages stdev = scipy.std(other_stages) # stdev of xs stages ub, lb = get_bounds(mean=mean, stdev=stdev, alpha=alpha) ubounds.append(ub) lbounds.append(lb) self.ci_vals = ci_qvals self.ubounds = scipy.nan_to_num(ubounds) self.lbounds = scipy.nan_to_num(lbounds)
def _compute_weights(source_series, parcel_series, identities, inverse): cplv_array = plv(source_series, parcel_series, identities) """Get weights and flip. This could be the output.""" weights = scipy.sign(scipy.real(cplv_array)) * scipy.real(cplv_array)**2 """Create weighted inverse operator and normalize the norm of weighted inv op to match original inv op's norm.""" """Multiply sensor dimension in inverseOperator by weight. This one would be the un-normalized operator.""" weighted_inv = scipy.einsum('ij,i->ij', inverse, weights) n_parcels = max(identities) + 1 """Initialize norm normalized weights. Maybe not necessary.""" weights_normalized = scipy.zeros(len(weights)) for parcel in range(n_parcels): # Normalize parcel level norms. # Index sources belonging to parcel ii = [i for i, source in enumerate(identities) if source == parcel] # Normalize per parcel. weights_normalized[ii] = weights[ii] * (norm(inverse[ii]) / norm(weighted_inv[ii])) """Parcel level normalized operator.""" weighted_inv = scipy.einsum('ij,i->ij', inverse, weights_normalized) """Operator level normalized operator. If there are sources not in any parcel weightedInvOp gets Nan values due to normalizations.""" weighted_inv *= norm(inverse) / norm(scipy.nan_to_num(weighted_inv)) weighted_inv = scipy.nan_to_num(weighted_inv) return weighted_inv
def recommendation(x, y): z = scipy.dot( scipy.nan_to_num(x / sla.norm(x, axis=0)), scipy.nan_to_num(y / sla.norm(y)), ) X = scipy.nan_to_num(x / sla.norm(x, axis=0)) Z = scipy.nan_to_num(z / sla.norm(z)) return scipy.dot(X.T, Z)
def get_ci(self, alpha=0.05, div=5, axis='x'): """Get upper- and lower-bounds (confidence interval) of normal distribution from xs stage_heights at 'div' evenly spaced intervals. 'axis' - designates which axis to create bounds over, in this case 'x' corresponds to discharge and 'y' corresponds to stage-height 'alpha' - statistical alpha value 'div' - number of intervals""" ubounds = [] # upper-bounds lbounds = [] # lower-bounds def get_bounds(mean, stdev, alpha): z = norm.ppf((1 - alpha / 2), scale=stdev) # z, mean = 0, 2-sided ub = mean + z # upper bound lb = mean - z # lower bound return ub, lb if axis == 'x': interval = self.max_q_query / div ci_qvals = scipy.arange(0, (self.max_q_query + interval), interval)[1:] for q in ci_qvals: hand_stage = self.get_stage(q)[0] # hand_stage other_stages = self.get_stage(q)[1] # other_stages mean = scipy.average(other_stages) stdev = scipy.std(other_stages) ub, lb = get_bounds(mean=mean, stdev=stdev, alpha=alpha) ubounds.append(ub) lbounds.append(lb) self.ci_vals = ci_qvals elif axis == 'y': interval = self.max_h_query / div ci_hvals = scipy.arange(0, (self.max_h_query + interval), interval)[1:] for h in ci_hvals: hand_disch = self.get_disch(h)[0] # hand_stage other_dischs = self.get_disch(h)[1] # other_stages mean = scipy.average(other_dischs) stdev = scipy.std(other_dischs) ub, lb = get_bounds(mean=mean, stdev=stdev, alpha=alpha) ubounds.append(ub) lbounds.append(lb) self.ci_vals = ci_hvals self.ubounds = scipy.nan_to_num(ubounds) self.lbounds = scipy.nan_to_num(lbounds)
def forces(self): """ get the forces between cells, as array, both from links and from the native force_func """ pos = self.get_pos_arr(force=True) force_arr = sp.zeros_like(pos) for link in self.links: force = link.force force_arr[link.one.index] += force force_arr[link.two.index] -= force kdtree = self._get_kdtree(force=True) for i,j in kdtree.query_pairs(self.xi*1.0): force = self.force_func(self.cells[i].pos, self.cells[j].pos ) #disp = self.cells[i].pos - self.cells[j].pos #L = norm(disp) #force = 2 * self.a**4 * ( 2 * self.xi**2 - 3 * self.xi * L + L**2 )/( self.xi**2 * L**6 ) * disp force_arr[i] += force force_arr[j] -= force return sp.nan_to_num(force_arr)
def classTypes_km(self,xyz,datapath2save,k=6,runtimes=1000): xyzz = gt.normlistlist(listlista=xyz[1:],metacolcount=0,sumormax='sum')#xyz[1:]# xy = zip(*xyzz)#[1:] z = zip(*xyz[:1]) labels,res = [],[] # print len(xy),xy xy = sp.nan_to_num(xy) labels,kmcenter,kmfit = netky.kmeans(xy,k,show=False,runtimes=runtimes) # labels.sort() print modefans,modemen,kmcenter,labels centername = datapath2save+'.center'#worksfolder_fig+'shapedis_'+str(personcnt)+'_'+str(experimenTimes)+'_'+str(modefans)+'_'+str(modemen)+'.center' gt.savefigdata(centername,kmcenter,labels) res = itemcntDis(labels) fig = plt.figure() for item in zip(*res): xx = range(1,k*len(item),k) plt.plot(xx,item) figname = datapath2save+'.png'#worksfolder_fig+'shapedis_'+str(personcnt)+'_'+str(experimenTimes)+'_'+str(modefans)+'_'+str(modemen)+'.png' fig.dpi = 300 fig.savefig(figname, dpi=fig.dpi) # pylab.savefig(figname, dpi=fig.dpi) gt.savefigdata(datafilepath=figname+'.data',x=xx,y=zip(*res),errorbarlist=None,title='title',xlabel='',ylabel='',leglend='') plt.close()
def forces(self): """ get the forces between cells, as array, both from links and from the native force_func """ pos = self.get_pos_arr(force=True) force_arr = sp.zeros_like(pos) for link in self.links: force = link.force force_arr[link.one.index] += force force_arr[link.two.index] -= force kdtree = self._get_kdtree(force=True) for i,j in kdtree.query_pairs(self.xi*1.0): force = self.force_func2(self.cells[i], self.cells[j] ) #disp = self.cells[i].pos - self.cells[j].pos #L = norm(disp) #force = 2 * self.a**4 * ( 2 * self.xi**2 - 3 * self.xi * L + L**2 )/( self.xi**2 * L**6 ) * disp force_arr[i] += force force_arr[j] -= force return sp.nan_to_num(force_arr)
def MarkovMutualInfo(transitionMatrix): p0 = MarkovSteadyState(transitionMatrix) #M = scipy.transpose(transitionMatrix) M = transitionMatrix #***8testing sum, dot, log2 = scipy.sum, scipy.dot, lambda x: scipy.nan_to_num( scipy.log2(x)) return scipy.real_if_close(sum(dot(p0, M * log2(M))) - sum(p0 * log2(p0)))
def fire(self,fmax=0.1, Nmin=5.,finc=1.1,fdec=0.5,alphastart=0.1,fa=0.99,deltatmax=10., maxsteps = 10**5): """ Do a fire relaxation """ alpha = alphastart deltat = 0.1 pos = self.get_pos_arr(force=True) v = sp.zeros_like(pos) self._update_vel(v) v = self._get_vel_arr() steps_since_negative = 0 def norm_arr(vec): return sp.sqrt(sp.sum(vec**2,1)) def unitize_arr(vec): return ((vec.T)/norm(vec)).T forces = sp.nan_to_num(sp.array([ [sp.inf,sp.inf]])) step_num = 0 print "Beginning FIRE Relaxation -- fmax={}".format(fmax) while max(norm_arr(forces)) > fmax and step_num < maxsteps: forces = self.forces power = sp.vdot(forces,v) print "Step: {}, max_force: {}, power: {}".format(step_num,max(norm_arr(forces)), power) v = (1.0 - alpha)*v + alpha*(norm_arr(v)*unitize_arr(forces).T).T if power>0.: if steps_since_negative > Nmin: deltat = min(deltat * finc, deltatmax) alpha = alpha*fa steps_since_negative += 1 else: steps_since_negative = 0 deltat = deltat * fdec v *= 0. alpha = alphastart v += forces*deltat pos += v*deltat self._update_pos(pos) step_num += 1 self._update_pos(pos) self._update_vel(v) print "Relaxation finished..."
def solve(SA, SD, SAcap, update_function, rtol=0.05, maxits=100): """ #FIXME DSG-EQRM what is the dimensions of these, and the return value? SA = demand curve (g) SAcap = capacity curve (g) SD = x axis (for both capcity and demand) (mm) update_function(intersection_point.x) makes a new SA,SD,SAcap. it also returns an exit flag it is usaully (always?) eqrm_code.capacity_spectrum_model.Capacity_spectrum_model.updated_responce rtol of 0.05 process will halt if intersection_x moved by less than 5% in the last iteration. All points are deemed to have converged. maxits is the maximum iterations. If maxits is exceeded, then any points that are not yet deemed to have converged are set to (intersection_x + old_intersection_x)/2 """ # old terminology, SDcr was intersection_x iters = 0 intersection_x = find_intersection(SD, SA, SAcap) exit_flag = False while ((iters <= maxits) & (not exit_flag)): # if 1: # if iters>0: # print 'iter' # print iters iters += 1 # update number of iterations old_intersection_x = intersection_x.copy() # copy old intersection # update curves SA, SD, SAcap, exit_flag = update_function(intersection_x) # get new intersection intersection_x = find_intersection(SD, SA, SAcap) oldsettings = seterr(invalid='ignore') diff = abs(intersection_x - old_intersection_x) / \ old_intersection_x # diff seterr(**oldsettings) # This is needed in windows to stop nan's setting the diff to -1.#IND diff = nan_to_num(diff) max_diff = diff.max() # find the relative change in intersection_x if max_diff < rtol: exit_flag = True # check for convergence if (iters >= maxits): # if iteration doesn't converge, take the average value # use average values non_convergent = where(diff >= rtol) # find non_convergent cases # x = (x+x_old)/2 intersection_x[non_convergent] += old_intersection_x[non_convergent] intersection_x[non_convergent] *= 0.5 else: non_convergent = array([]) return intersection_x, non_convergent
def get_percent_lesion_overlap(lesion_template, track_templates): """ Implements the 'OVERTRACK' tract/lesion overlap method of Thiebaut de Schotten et al. (2008) Generates a probabilistc lesion map, then iteratively thresholds at 5%, 10%, 15%...100% of patients, each time overlaying the thresholded lesion map on the binary tract template and calculating the percent overlap Inputs: lesion_template = lesion probability map, scaled between 0 and 1 (use 'make_tract_template_nibabel' function above to generate this from a set of lesion images in standard space) track_templates = list of BINARY template images to calculate overlap for (NOTE: these are different to 'track templates' used in Hua et al. (2008) type analyses and functions above, in that the track templates here are binary, and not probabilistic) overlap_image_names = list of names for the overlap images to be generated Outputs: - overlap_table = list of average % overlap for each track template image. Final column = lesion probability thresholds (0.1-1), first few columns = ;% overlap at that threshold for each track - the corresponding overlap images are written to the filenames specified in the 'overlap_images' input variable """ overlap_vals = [round(o*0.05,2) for o in range(0, 21)] overlap_table = np.zeros((len(overlap_vals), len(track_templates)+1)) # = n threshold levels (5%, 10%, 15%, etc.) x n template imgaes Li = scipy.nan_to_num(nib.load(lesion_template).get_data()) for t in range(0,len(track_templates)): Ti = scipy.nan_to_num(nib.load(track_templates[t]).get_data()) for o in range(0, len(overlap_vals)): Li_thresh = np.float32(np.greater(Li, overlap_vals[o])) Li_thresh_mul_Ti = Li_thresh*Ti overlap_table[o,t] = (1/Ti.sum())*Li_thresh_mul_Ti.sum() #nib.Nifti1Image(Li_thresh_mul_Ti, print overlap_table.shape print len(overlap_vals) overlap_table[:,overlap_table.shape[1]-1] = overlap_vals return overlap_table
def csv2data(path): ''' Given a path to a .csv file, output the information in a data frame of the form [Time, R1, R2, ... , Rn], where n is the number of response columns in the file. If a file is already pre-labeled, maintain those labels. If the time variable is in miliseconds, convert to seconds. ''' ncols = sp.shape(pd.read_csv(path, nrows=2, header=None))[1] ## Determine if file has headers headtest = isHeader(path) ## Add column names if they are missing # Ensure 'Time' is the first column name # Allows user to label response columns as they want if not headtest: headers = ['Time'] for i in range(ncols - 1): headers.append(('R%i' % (i + 1))) data = pd.read_csv(path, header=None, dtype='float16') data.columns = [headers] else: data = pd.read_csv(path, dtype='float16') data.rename(columns={data.columns[0]: 'Time'}, inplace=True) ## Delete Empty columns # * A problem I found importing data from csv files # generated by a Shocklog 298. for i in range(ncols - 1): if pd.isnull(data.iloc[0, i + 1]): data.drop(data.columns[i + 1], axis=1, inplace=True) ## Standardize the time column # Time may be input in either seconds or miliseconds. # We need to allow for this and convert ms to s # !!! Assumes the sampling frequency is greater then 10 # and less then 10,000 !!! dt = data.Time[2] - data.Time[1] if dt > 0.1: data.Time = data.Time / 1000 ## Remove gravity bias for i in range(data.shape[1] - 1): data.iloc[:, i + 1] = data.iloc[:, i + 1] - sp.mean( sp.nan_to_num(data.iloc[:, i + 1])) return (data)
def get_xs_q(self, low, upto): """Returns an array containing q-values for each cross-section at each 1ft stage-height interval, calculated from average n-values. 'upto' - describes how many stage-heights to iterate through when collecting n-values for all cross-sections""" area = self.handarea[low:upto] hydrad = self.handrad[low:upto] slope = self.handslope n = self.nstats(low=low, upto=upto)[:, 1] # xs mean n-values xs_qvals = self.mannings_q(area=area, hydrad=hydrad, slope=slope, n=n) self.xs_qvals = scipy.nan_to_num(xs_qvals) return (self.xs_qvals, self.handstage[low:upto])
def get_template_probabilities(scalar_images, template_images): """ Implements the tract template analysis method of Hua et al. (2008): For a list of scalar images, and probabilistic template images, calculates a weighted mean of the scalar image voxels, weighted by the corresponding template voxels Returns an N scalar images x N template images array """ Tp = np.zeros((len(scalar_images), len(template_images))) # Note the extra brackets - needed for np.zeros to work (see help)... for s in range(0, len(scalar_images)): STp = [] for t in range(0,len(template_images)): # load in the scalar image data, setting nans to zeros Si = scipy.nan_to_num(nib.load(scalar_images[s]).get_data()) Ti = scipy.nan_to_num(nib.load(template_images[t]).get_data()) # scale the scalar image values by template values SiTi = Si*Ti # add to output variable Tp[s,t] = str(SiTi.sum()/Ti.sum()) # return the N scalar images x N template images array return Tp
def get_xs_n(self,low,upto): """Returns an array containing n-values for each cross-section at each 1ft stage-height interval. 'upto' - describes how many stage-heights to iterate through when collecting n-values for all cross-sections""" area = self.handarea[low:upto] hydrad = self.handrad[low:upto] slope = self.handslope disch = self.get_disch(h=self.handstage[low:upto],kind='power') # xs values only xs_nvals = self.mannings_n(area=area,hydrad=hydrad,slope=slope,disch=disch) self.xs_nvals = scipy.nan_to_num( xs_nvals ) return self.xs_nvals
def plot(self, func, interp=True, plotter='imshow'): import matplotlib as mpl from matplotlib import pylab as pl if interp: lpi = self.interpolator(func) z = lpi[self.yrange[0]:self.yrange[1]:complex(0, self.nrange), self.xrange[0]:self.xrange[1]:complex(0, self.nrange)] else: y, x = sp.mgrid[ self.yrange[0]:self.yrange[1]:complex(0, self.nrange), self.xrange[0]:self.xrange[1]:complex(0, self.nrange)] z = func(x, y) z = sp.where(sp.isinf(z), 0.0, z) extent = (self.xrange[0], self.xrange[1], self.yrange[0], self.yrange[1]) pl.ioff() pl.clf() pl.hot() # Some like it hot if plotter == 'imshow': pl.imshow(sp.nan_to_num(z), interpolation='nearest', extent=extent, origin='lower') elif plotter == 'contour': Y, X = sp.ogrid[ self.yrange[0]:self.yrange[1]:complex(0, self.nrange), self.xrange[0]:self.xrange[1]:complex(0, self.nrange)] pl.contour(sp.ravel(X), sp.ravel(Y), z, 20) x = self.x y = self.y lc = mpl.collections.LineCollection(sp.array([ ((x[i], y[i]), (x[j], y[j])) for i, j in self.tri.edge_db ]), colors=[(0, 0, 0, 0.2)]) ax = pl.gca() ax.add_collection(lc) if interp: title = '%s Interpolant' % self.name else: title = 'Reference' if hasattr(func, 'title'): pl.title('%s: %s' % (func.title, title)) else: pl.title(title) pl.show() pl.ion()
def test_array_ab_diff(self): a = array([7.35287023, 3.98947559, 0.]) b = array([ 7.38625883, 3.98947559, 0.]) diff=abs(a-b)/b # diff [ 0.00452037 0. NaN] # Windows can't handle the NaN, # so it has to be set to zero diff=nan_to_num(diff) #print "diff", diff # this would return max_diff -1.#IND if NaN's aren't removed, # in windows. max_diff=diff.max() #print "max_diff", max_diff assert max_diff == diff[0]
def get_ci(self, alpha=0.05, div=5): """Get upper- and lower-bounds (confidence interval) of normal distribution from xs stage_heights at 'div' evenly spaced intervals. 'alpha' - statistical alpha value 'div' - number of intervals""" interval = self.max_q_query/div ci_qvals = scipy.arange( 0, (self.max_q_query+interval), interval ) ubounds = [] # upper-bounds lbounds = [] # lower-bounds for q in ci_qvals: hand_stage = self.get_stage(q)[0] # hand_stage other_stages = self.get_stage(q)[1] # other_stages mean = scipy.average(other_stages) stdev = scipy.std(other_stages) z = norm.ppf( (1-alpha/2), scale=stdev ) # z, mean = 0, 2-sided lb = mean-z # lower bound ub = mean+z # upper bound ubounds.append(ub) lbounds.append(lb) self.ci_qvals = ci_qvals self.ubounds = scipy.nan_to_num( ubounds ) self.lbounds = scipy.nan_to_num( lbounds )
def test_array_ab_diff(self): a = array([7.35287023, 3.98947559, 0.]) b = array([7.38625883, 3.98947559, 0.]) oldsettings = seterr(invalid='ignore') diff = abs(a - b) / b seterr(**oldsettings) # diff [ 0.00452037 0. NaN] # Windows can't handle the NaN, # so it has to be set to zero diff = nan_to_num(diff) # print "diff", diff # this would return max_diff -1.#IND if NaN's aren't removed, # in windows. max_diff = diff.max() # print "max_diff", max_diff assert max_diff == diff[0]
def plot(self, func, interp=True, plotter='imshow'): import matplotlib as mpl from matplotlib import pylab as pl if interp: lpi = self.interpolator(func) z = lpi[self.yrange[0]:self.yrange[1]:complex(0,self.nrange), self.xrange[0]:self.xrange[1]:complex(0,self.nrange)] else: y, x = sp.mgrid[self.yrange[0]:self.yrange[1]:complex(0,self.nrange), self.xrange[0]:self.xrange[1]:complex(0,self.nrange)] z = func(x, y) z = sp.where(sp.isinf(z), 0.0, z) extent = (self.xrange[0], self.xrange[1], self.yrange[0], self.yrange[1]) pl.ioff() pl.clf() pl.hot() # Some like it hot if plotter == 'imshow': pl.imshow(sp.nan_to_num(z), interpolation='nearest', extent=extent, origin='lower') elif plotter == 'contour': Y, X = sp.ogrid[self.yrange[0]:self.yrange[1]:complex(0,self.nrange), self.xrange[0]:self.xrange[1]:complex(0,self.nrange)] pl.contour(sp.ravel(X), sp.ravel(Y), z, 20) x = self.x y = self.y lc = mpl.collections.LineCollection(sp.array([((x[i], y[i]), (x[j], y[j])) for i, j in self.tri.edge_db]), colors=[(0,0,0,0.2)]) ax = pl.gca() ax.add_collection(lc) if interp: title = '%s Interpolant' % self.name else: title = 'Reference' if hasattr(func, 'title'): pl.title('%s: %s' % (func.title, title)) else: pl.title(title) pl.show() pl.ion()
def compute_fairness(self): """Computes fairness across the range in `self.date_range`. Returns: a pandas DataFrame with three columns corresponding to each kind of prediction method (PredPol, perfect prediction (god), and the baseline (naive_count)). The entries of each column are an array where the ith entry is the average fairness over `self.date_range` when visiting i number of grid cells """ fairness = { method: sp.zeros((len(self.results), len(self.lambda_columns))) for method in ['predpol', 'god', 'naive_count', 'random'] } naive_count = count_seen(self.pred_obj, self.pred_obj.train)['num_observed'] black = self.pred_obj.grid_cells.black.fillna(0) white = self.pred_obj.grid_cells.white.fillna(0) for i, (lambda_col, actual_col) in self._iterator(): pct_black_caught = (self.results[actual_col] * black).values pct_black_caught /= sp.sum(pct_black_caught) pct_white_caught = (self.results[actual_col] * white).values pct_white_caught /= sp.sum(pct_white_caught) # On some days, no crime occurs. The following line treats those results # as zeros. fair_diff = sp.nan_to_num(pct_black_caught - pct_white_caught) sorted_idx = sp.argsort(self.results[actual_col])[::-1] fairness['god'][:, i] += fair_diff[sorted_idx] sorted_idx = sp.argsort(self.results[lambda_col])[::-1] fairness['predpol'][:, i] += fair_diff[sorted_idx] sorted_idx = sp.argsort(naive_count)[::-1] fairness['naive_count'][:, i] += fair_diff[sorted_idx] naive_count += self.results[actual_col] fairness['random'][:, i] += fair_diff[sorted_idx.sample(frac=1)] for k, v in fairness.items(): fairness[k] = sp.sum(v, axis=1) fairness[k] = sp.cumsum(fairness[k]) / len(self.lambda_columns) return pd.DataFrame(fairness)
def MarkovEntropy(transitionMatrix): p0 = MarkovSteadyState(transitionMatrix) sum, log2 = scipy.sum, lambda x: scipy.nan_to_num(scipy.log2(x)) return -scipy.real_if_close(sum(p0 * log2(p0)))
j = ls[1] fit = ls[2] Temperature[i,j] = fit[0] Denscol[i,j] = fit[2] Turbvel[i,j] = fit[1] tau0[i,j] = ls[-1] model[i,j] = ls[-2] # Parameter error (confidence intervals) errmodel= sp.array(errmodel) model = sp.array(model) model = sp.swapaxes(model, 0,1) model = sp.swapaxes(model,0,2) print('Calculated Fits') model = sp.nan_to_num(model) dust = sp.nan_to_num(dust) Denscol = sp.nan_to_num(Denscol) Turbvel = sp.nan_to_num(Turbvel) Temperature = sp.nan_to_num(Temperature) tau0 = sp.nan_to_num(tau0) if Convolve: Denscol = Convolv(Denscol, head) Temperature = Convolv(Temperature, head) Turbvel = Convolv(Turbvel, head) mom2 = Convolv(mom2, head) r1 = pf.PrimaryHDU(model) r2 = pf.PrimaryHDU(Temperature) r3 = pf.PrimaryHDU(Turbvel)
q1q2 = scipy.outer(q, q) images = 2000 for i in range(n): for x in range(-images, images + 1, 1): for y in range(-images, images + 1, 1): if x**2 + y**2 > images**2: continue pos_diff = pos[i] - (pos + scipy.array((x, y, 0)) * box_l) r = scipy.sqrt(scipy.sum(pos_diff**2, 1)) r3 = r**3 qq = q1q2[i, :] tmp = qq / r tmp[abs(tmp) == scipy.inf] = 0 energy += scipy.sum(tmp) pref = qq / r**3 pref = pref.reshape((n, 1)) tmp = pos_diff * scipy.hstack((pref, pref, pref)) forces += scipy.nan_to_num(tmp) ids = scipy.arange(n) forces *= -1 scipy.savetxt( "coulomb_mixed_periodicity_system.data", scipy.hstack((ids.reshape((n, 1)), pos, q.reshape((n, 1)), forces)))
def unitize_arr(vec): norms = norm(vec)[:,newaxis] return nan_to_num(ne.evaluate('vec/norms'))
weightsNormalized = scipy.zeros( len(weights)) # Initialize norm normalized weights. Maybe not necessary. for parcel in range(numberParcels): # Normalize parcel level norms. ii = [i for i, source in enumerate(sourceIdentities) if source == parcel] # Index sources belonging to parcel weightsNormalized[ii] = weights[ii] * ( scipy.linalg.norm(inverseOperator[ii]) / scipy.linalg.norm(weightedInvOp[ii])) # Normalize per parcel. weightedInvOp = np.dot( scipy.eye(weightsNormalized.shape[0]) * weightsNormalized, inverseOperator) # Parcel level normalized operator. weightedInvOp *= scipy.linalg.norm(inverseOperator) / scipy.linalg.norm( scipy.nan_to_num(weightedInvOp) ) # Operator level normalized operator. If there are sources not in any parcel weightedInvOp gets Nan values due to normalizations. weightedInvOp = scipy.nan_to_num(weightedInvOp) ########## Check if weighting worked. ## Do correlations between the original time series and the weighted inverse and normal inverse models. # Make parcel and sensor time series. Separate series to avoid overfitted estimation. samplesSubset = 10000 + 2 * timeCut checkParcelTimeSeries = scipy.random.randn( numberParcels, samplesSubset) # Generate random signal for i in range(numberParcels): checkParcelTimeSeries[i] = signal.cwt( checkParcelTimeSeries[i], signal.ricker, widths) # Mexican hat continuous wavelet transform random series.
def unitize_arr(vec): norms = norm(vec)[:, newaxis] return nan_to_num(ne.evaluate('vec/norms'))
def compute_fp_knapsack(assess_obj, num_cells, total_cells_factor=5, max_gap=0.05, precision=4, verbose=False): """Computes both a fairer selection of grid cells for each day tested in `assess_obj` and the accuracy/fairness curves for those results. Args: assess_obj: an AssessPol object num_cells: an array indicating the number of grid cells to compute the fairness modification on (calculating on every number in range(len(grid_cells)) is prohibitively expensive) total_cells_factor: an integer indicating the total number of cells to consider at every iteration. Rather than considering all grid cells at each step, we speed up computation by only comparing the top (num_cell * total_cells_factor) grid cells at each iteration. max_gap: an integer indicating the maximum tolerable fairness gap (the parameter D in chapter 4 of the final report). precision: an integer indicating the number of decimal points to preserve when passing weights to the MKP solver (which expects integer weights while the original fairness costs are in the unit range) Returns: a tuple consisting of: - a dictionary containing the information of which grid cells were chosen for each day and each number of grid cells in `num_cells` specifically: maps from datestrings to another dictionary, where that dictionary maps from numbers N in `num_cells` to a list of indexes - array, average accuracy results for each number of grid cells considered - array, average fairness results for each number of grid cells considered """ knapsack_items = {} knapsack_accuracy = sp.zeros(len(num_cells)) knapsack_fairness = sp.zeros(len(num_cells)) black = assess_obj.pred_obj.grid_cells.black.fillna(0) white = assess_obj.pred_obj.grid_cells.white.fillna(0) # scale max_gap by the same factor as we scale the weights max_gap *= 10**precision for i, (lambda_col, actual_col) in assess_obj._iterator(): chosen_items = {} # lambda_col[:-7] is the date-string knapsack_items[lambda_col[:-7]] = chosen_items # Compute values needed for knapsack profits = assess_obj.results[lambda_col].values.astype(int) idx_profits_sorted = sp.argsort(profits)[::-1] pct_black_pred = (sp.log(assess_obj.results[lambda_col]) * black).values pct_black_pred /= sp.sum(pct_black_pred) pct_white_pred = (sp.log(assess_obj.results[lambda_col]) * white).values pct_white_pred /= sp.sum(pct_white_pred) pct_policed_gap = pct_black_pred - pct_white_pred pct_policed_gap = (pct_policed_gap * (10**precision)).astype(int) # print(pct_policed_gap.describe()) # First part of equality constraint pct_overpoliced_black = pct_policed_gap.copy() pct_overpoliced_black[pct_policed_gap < 0] = 0 # Second part of equality constraint pct_overpoliced_white = -pct_policed_gap.copy() pct_overpoliced_white[pct_policed_gap > 0] = 0 # Compute values needed for assessment num_actual = assess_obj.results[actual_col].values pct_black_caught = (assess_obj.results[actual_col] * black).values pct_black_caught /= sp.sum(pct_black_caught) pct_white_caught = (assess_obj.results[actual_col] * white).values pct_white_caught /= sp.sum(pct_white_caught) fair_diff = sp.nan_to_num(pct_black_caught - pct_white_caught) for j, N in enumerate(num_cells): # Take only the top N * total_cells_factor cells in terms of # predicted intensities for knapsack to improve prediction speed idx_taken = idx_profits_sorted[:N * total_cells_factor] idx_chosen = solve_multi_knapsack( profits[idx_taken].tolist(), [[1] * len(idx_taken), pct_overpoliced_black[idx_taken].tolist(), pct_overpoliced_white[idx_taken].tolist()], [N, max_gap, max_gap], verbose) # idx_taken[idx_chosen] are the indices of the original grid cells # selected by the knapsack procedure chosen_items[N] = idx_taken[idx_chosen] knapsack_accuracy[j] += sp.sum(num_actual[chosen_items[N]]) knapsack_fairness[j] += sp.sum(fair_diff[chosen_items[N]]) # Compute accuracy as a percentage of total crime knapsack_accuracy /= assess_obj.get_actual_counts().values.sum() # Compute fairness as an average over all days knapsack_fairness /= (i + 1) return knapsack_items, knapsack_accuracy, knapsack_fairness
j = ls[1] Temperature[i,j,:] = ls[2] Denscol[i,j,:] = ls[3] Turbvel[i,j,:] = ls[4] vel_cen[i,j,:] = ls[5] # Parameter error (confidence intervals) #errmodel= sp.array(errmodel) #model = sp.array(model) #model = sp.swapaxes(model, 0,1) #model = sp.swapaxes(model,0,2) print('Calculated Fits') #model = sp.nan_to_num(model) #dust = sp.nan_to_num(dust) Denscol = sp.nan_to_num(Denscol) Turbvel = sp.nan_to_num(Turbvel) Temperature = sp.nan_to_num(Temperature) vel_cen = sp.nan_to_num(vel_cen) #tau0 = sp.nan_to_num(tau0) if Convolve: Denscol = Convolv(Denscol, head) Temperature = Convolv(Temperature, head) Turbvel = Convolv(Turbvel, head) mom2 = Convolv(mom2, head) #r1 = pf.PrimaryHDU(model) r2 = pf.PrimaryHDU(Temperature) r3 = pf.PrimaryHDU(Turbvel) r4 = pf.PrimaryHDU(Denscol)
xyz = zip(*result) gt.savelist(xyz, worksfolder_mat + str(personcnt) + '_' + str(experimenTimes) + '_' + str(modefans) + '_' + str(modemen) + '.netattri', mode='a+') xyzz = gt.normlistlist(listlista=xyz[1:], metacolcount=0, sumormax='sum') #xyz[1:]# xy = zip(*xyzz) #[1:] z = zip(*xyz[:1]) labels, res = [], [] # print len(xy),xy xy = sp.nan_to_num(xy) labels, kmcenter, kmfit = netky.kmeans(xy, k=6, runtimes=1000) # labels.sort() print modefans, modemen, kmcenter, labels centername = worksfolder_fig + 'shapedis_' + str( personcnt) + '_' + str(experimenTimes) + '_' + str( modefans) + '_' + str(modemen) + '.center' endtimer = time.clock() duration = endtimer - startimer timestrline = str(time.asctime()) + ' in ' + str( duration / 60) + 'mins of modefans ' + str( modefans) + ' modemen ' + str(modemen) print timestrline gt.savefigdata(centername, kmcenter, labels, timestrline) res = itemcntDis(labels)