def filter_phase(log, pipeline, f_min, f_max, fd, pd, n_stop=0): transitions = aer_pipeline_transitions1_all(log, pipeline) stream = aer_filtered_cutoff(transitions, f_min, f_max) P = np.zeros((fd, pd)) frequencies = np.linspace(f_min, f_max, fd) phases = np.linspace(0, 1, pd) count = 0 for ae in stream: f = ae['frequency'] t = ae['timestamp'] delta = 1 / f s = t / delta phase = s - np.floor(s) i = np.digitize([f], frequencies) - 1 j = np.digitize([phase], phases) - 1 P[i, j] += 1 count += 1 if n_stop != 0 and count >= n_stop: break stats = {} stats['P'] = P return stats
def bin2d(x, y, xbins=10, ybins=10): """ 2-dimensional binning of x, y Works as a 2-D extension of numpy.digitize but also automatically sets-up binedges Parameters ---------- x, y : array-like x, y values to bin according to xbins, ybins : int OR list/array like Either the number of bins or the binedges to use Returns ------- ind : list of arrays The x,y bin indices each entry belongs to. ind[0][i] gives the x-bin of the ith entry. ind[1][i] gives the y-bin of the ith entry xedges, yedges: arrays Bin edges used """ xedges = setupbins(x, xbins) yedges = setupbins(y, ybins) xind = np.digitize(x, xedges) - 1 yind = np.digitize(y, yedges) - 1 ind = [xind, yind] return ind, xedges, yedges
def hist(x, y, numbins, ion): # Make the bins xbins = np.linspace(xlims[0], xlims[1], nbins+1) ybins = np.linspace(ylims[0], ylims[1], nbins+1) # Determine what cells go in what bins xdig = np.digitize(x, xbins) ydig = np.digitize(y, ybins) # Fix the edge effects maxBinNum = len(xbins) for i in range(len(xdig)): if xdig[i]==maxBinNum: xdig[i] -= 1 if ydig[i]==maxBinNum: ydig[i] -= 1 # Create empty array h = np.zeros((nbins, nbins)) # Loop through array for i in range(nbins): for j in range(nbins): # Find the indicies where x and y belong to this bin bits = np.bitwise_and( xdig==i+1, ydig==j+1) if True in bits: h[i,j] = np.log10( np.sum( bits ) ) h = np.rot90(h) h = np.flipud(h) return h, xbins, ybins
def group(angle, wind, bsp, speedbins, anglebins, fct=np.median): '''Group data in bins according to wind angle and wind speed. Parameters ---------- angle : np.ndarry Wind angles in degrees wind : np.ndarray wind speed in kn bsp : np.ndarray Boat speed in kn speedbins : ndarray bin boundaries for speed binning anglebins : ndarray bin boundaries for angle binning. Make sure that 180. is included in last bin and not on the boundary. fct : function Given all bsp values in one (speedbin,anglebin) select on value to be used. Common examples are np.median or np.mean Returns ------- polar : ndarray([len(speedbins)+1, len(anglebins)]) This contains the data array with one speed for each (speedbin, anglebin) ''' if (angle.shape != wind.shape) or (angle.shape != bsp.shape): raise ValueError('angle, wind and bsp must have same number of elements') digspeed = np.digitize(wind, speedbins) digangle = np.digitize(np.abs(angle), anglebins) polar = np.zeros([len(speedbins)+1, len(anglebins)]) for i in np.arange(1, len(speedbins)+1): for j in np.arange(1, len(anglebins)): polar[i, j] = fct(bsp[(digspeed == i) & (digangle == j)]) return polar
def means2idxarrays(g1, g2, i_bins, c_bins, difference): '''take two arrays of values and return the initial values and differences as numpy digitised arrays''' if difference == "relative": # calculate difference between mean values for group1 and group2 # g1 and g2 always the same length change = [g2[x] - g1[x] for x in range(0, len(g1))] initial = g1 elif difference == "logfold": change = [np.log2((g2[x] + 1.0) / (g1[x] + 1.0)) for x in range(0, len(g1))] initial = [np.log2(g1[x] + 1.0) for x in range(0, len(g1))] elif difference == "abs_logfold": change = [abs(np.log2((g2[x] + 1.0) / (g1[x] + 1.0))) for x in range(0, len(g1))] initial = [max(np.log2(g1[x] + 1.0), np.log2(g2[x] + 1.0)) for x in range(0, len(g1))] # return arrays of len(change) with the index position in c_bins # corresponding to the bin in which the value of change falls change_idx = np.digitize(change, c_bins, right=True) initial_idx = np.digitize(initial, i_bins, right=True) return(change_idx, initial_idx)
def better2D_desisty_plot(xdat, ydat, thresh=3, bins=(100, 100)): xyrange = [[min(xdat), max(xdat)], [min(ydat), max(ydat)]] distortion = (xyrange[1][1] - xyrange[1][0]) / \ (xyrange[0][1] - xyrange[0][0]) xdat = xdat * distortion xyrange = [[min(xdat), max(xdat)], [min(ydat), max(ydat)]] hh, locx, locy = histogram2d(xdat, ydat, range=xyrange, bins=bins) posx = np.digitize(xdat, locx) posy = np.digitize(ydat, locy) ind = (posx > 0) & (posx <= bins[0]) & (posy > 0) & (posy <= bins[1]) # values of the histogram where the points are hhsub = hh[posx[ind] - 1, posy[ind] - 1] xdat1 = xdat[ind][hhsub < thresh] # low density points ydat1 = ydat[ind][hhsub < thresh] hh[hh < thresh] = np.nan # fill the areas with low density by NaNs plt.imshow( np.flipud( hh.T), cmap='jet', extent=np.array(xyrange).flatten(), interpolation='none') plt.plot(xdat1, ydat1, '.')
def pos2Grid(x,y,data,xbins=None,ybins=None): '''Make a pixellated grid image from a 1d array of positions x,y,d. No smoothing, just binning.''' if (xbins == None): xbins = np.arange(x.min(),x.max()+1) if (ybins == None): ybins = np.arange(y.min(),y.max()+1) xd = np.digitize(x,xbins) xd -= 1 yd = np.digitize(y,ybins) yd -= 1 (w,) = np.where((xd != 0) & (yd != 0)) xd = xd[w] yd = yd[w] data = data[w] xi,yi = np.array(np.meshgrid(xbins,ybins,indexing='ij')) zi = xi*0 zi[xd,yd] = data return zi,xd,yd
def do(lon, lat, station): """ Process this station and geography """ idx = np.digitize([lon, ], lons)[0] jdx = np.digitize([lat, ], lats)[0] print("--> Processing %s i:%s j:%s" % (station, idx, jdx)) pdata = pr_nc.variables['pr'][:, jdx, idx] xdata = tasmax_nc.variables['tmax'][:, jdx, idx] ndata = tasmin_nc.variables['tmin'][:, jdx, idx] highs = temperature(xdata, 'C').value('F') lows = temperature(ndata, 'C').value('F') precips = distance(pdata, 'MM').value('IN') now = basets high = low = precip = None for k, _ in enumerate(tmdata): now += datetime.timedelta(days=1) if now.month == 2 and now.day == 29: # Insert missing data insert(station, now, high, low, precip) now += datetime.timedelta(days=1) high = fix(highs[k]) low = fix(lows[k]) if low is not None and high is not None and low > high: # Swap, sigh print(('%s %s high: %.1f low: %.1f was swapped' ) % (now.strftime("%m-%d-%Y"), station, high, low)) high2 = high high = low low = high2 precip = fix(precips[k]) insert(station, now, high, low, precip)
def digitize(x, bins, right=False): if np.isscalar(x): return np.digitize(np.atleast_1d(x), bins, right)[0] elif x.ndim != 1: raise NotImplementedError("digitize for pre 1.10 numpy with ndim > " "1 array") return np.digitize(x, bins, right)
def get_sky_positions(self,dmag=0.2,dz=0.2): LRGfile = os.path.expandvars("$OM10_DIR/data/CFHTLS_LRGs.txt") try: d = np.loadtxt(LRGfile) except: raise "ERROR: cannot find LRG catalog for sky positions!" if vb: print "om10.DB: read in LRG sky position data from ",LRGfile # Put LRG parameters in LRG structure: self.LRGs = {} self.LRGs['RA'] = np.array(d[:, 0]) self.LRGs['DEC'] = np.array(d[:, 1]) self.LRGs['redshift'] = np.array(d[:, 2]) self.LRGs['mag_i'] = np.array(d[:, 6]) print "Mean LRG RA,DEC,z,i = ",np.average(self.LRGs['RA']),np.average(self.LRGs['DEC']),np.average(self.LRGs['redshift']),np.average(self.LRGs['mag_i']); # Bin LRGs in mag_i and redshift, and record bin numbers for each one: imin,imax = np.min(self.LRGs['mag_i']),np.max(self.LRGs['mag_i']) nibins = int((imax - imin)/dmag) + 1 ibins = np.linspace(imin, imax, nibins) self.LRGs['ivals'] = np.digitize(self.LRGs['mag_i'],ibins) self.LRGs['ibins'] = ibins zmin,zmax = np.min(self.LRGs['redshift']),np.max(self.LRGs['redshift']) nzbins = int((zmax - zmin)/dz) + 1 zbins = np.linspace(zmin, zmax, nzbins) self.LRGs['zvals'] = np.digitize(self.LRGs['redshift'],zbins) self.LRGs['zbins'] = zbins if vb: print "om10.DB: number of LRGs stored = ",len(self.LRGs['redshift']) return
def scatter_density(x, y, xlabel=None, ylabel=None, title=None, xlims=None, ylims=None, filename=None): plt.figure() plt.grid() hist, xedges, yedges = np.histogram2d(x, y) xidx = np.clip(np.digitize(x, xedges), 0, hist.shape[0] - 1) yidx = np.clip(np.digitize(y, yedges), 0, hist.shape[1] - 1) c = hist[xidx, yidx] print "starting to plot the scatter plot" plt.scatter(x, y, c=c) if xlabel: plt.xlabel(xlabel) if ylabel: plt.ylabel(ylabel) if title: plt.title(title) if xlims: plt.xlim(xlims) if ylims: plt.ylim(ylims) if filename: plt.savefig(filename) else: plt.show()
def make_image(stream, bins=(100,100), range=[[-5,5],[-5,5]], nevents=100 ): """ Generates an image every `nevents` events """ image = np.zeros( shape=bins ) binX = np.linspace( range[0][0], range[0][1], bins[0] ) binY = np.linspace( range[1][0], range[1][1], bins[1] ) count = 0 xpoints = list() ypoints = list() for data in stream: detx = data.DETX[0] dety = data.DETY[0] # accumulate points for efficiency: if (detx > range[0][0] and detx < range[0][1] and dety > range[1][0] and dety < range[1][1] ): xpoints.append( detx ) ypoints.append( dety ) count += 1 # generate a binned image from the accumulated points: if count >= nevents: if len(xpoints) > 0: ii = np.digitize( xpoints, binX ) jj = np.digitize( ypoints, binY ) image[ii,jj] += 1 yield image.copy() # output the image # clear image and data points count =0 image[:] = 0 xpoints = list() ypoints = list()
def _bin_descriptors(self, siftgeo, pca, grid, dimensions, duration): """ Groups the points in different bins using the gridding specified by grid. The returned results is a dictionary that has a key the bin number on each of the three dimensions x, y and t. """ W, H = dimensions t_init, t_final = duration # Create equally spaced bins. bins_x = linspace(0, W + 1, grid[0] + 1) bins_y = linspace(0, H + 1, grid[1] + 1) bins_t = linspace(t_init, t_final + 1, grid[2] + 1) bag_xx = defaultdict(list) bag_ll = defaultdict(list) N = 0 for ss in siftgeo: xx = pca.transform(ss[1]) N += 1 id_x = digitize([ss[0]['x']], bins_x) id_y = digitize([ss[0]['y']], bins_y) id_t = digitize([ss[0]['t']], bins_t) bag_xx[(id_x[0], id_y[0], id_t[0])].append(xx) bag_ll[(id_x[0], id_y[0], id_t[0])].append([ss[0]['x'] / W, ss[0]['y'] / H, (ss[0]['t'] - t_init) / (t_final + 1 - t_init)]) assert (1 <= id_x <= grid[0] and 1 <= id_y <= grid[1] and 1 <= id_t <= grid[2]) return bag_xx, bag_ll
def hist2d(ax, xdat, ydat, xyrange, bins, thresh=2, cmap=plt.cm.Greys, log=False, scatterother=False): import scipy tt = ax.get_aspect() # histogram the data hh, locx, locy = scipy.histogram2d(xdat, ydat, range=xyrange, bins=bins) mhh = np.mean(hh) shh = np.std(hh) if log: lhh = np.log10(hh) else: lhh = hh posx = np.digitize(xdat, locx) posy = np.digitize(ydat, locy) #select points within the histogram ind = (posx > 0) & (posx <= bins[0]) & (posy > 0) & (posy <= bins[1]) hhsub = hh[posx[ind] - 1, posy[ind] - 1] # values of the histogram where the points are xdat1 = xdat[ind][hhsub < thresh] # low density points ydat1 = ydat[ind][hhsub < thresh] lhh[hh < thresh] = np.nan # fill the areas with low density by NaNs ar = (0.6/0.65)*(np.diff(xyrange[0])/np.diff(xyrange[1]))[0] c = ax.imshow(np.flipud(lhh.T),extent=np.array(xyrange).flatten(), interpolation='none', cmap=cmap, aspect=ar) ax.set_aspect(tt) if scatterother: ax.plot(xdat1, ydat1, 'k,') return c
def vertical_length_distribution(src_alt, simplex_alt, simplex_lengths, alt_bins, norm=True): """ given input altitudes and lengths in km, create vertical profiles of source counts and total length. Returns alt_bins, bin_total_src, bin_total_length If norm==True, divide the counts by the bin width, returning km, counts/km and km/km. Otherwise just return km, counts and km. """ # Not sure why we're not using histogram here, so that's a TODO # d_alt = 0.5 d_alt = alt_bins[1:]-alt_bins[:-1] # alt_bins = np.arange(0.0,max_alt+d_alt, d_alt) bin_total_length = np.zeros(alt_bins.shape[0]-1, dtype=float) bin_total_src = np.zeros(alt_bins.shape[0]-1, dtype=float) # bin_total_length_sq = np.zeros(alt_bins.shape[0]-1, dtype=float) tri_bin_idx = np.digitize(simplex_alt, alt_bins) src_bin_idx = np.digitize(src_alt,alt_bins) tri_bin_idx[tri_bin_idx>(bin_total_length.shape[0]-1)]=bin_total_length.shape[0]-1 src_bin_idx[src_bin_idx>(bin_total_src.shape[0]-1)]=bin_total_src.shape[0]-1 for idx in src_bin_idx: bin_total_src[idx] += 1 for lw,idx in zip(simplex_lengths,tri_bin_idx): bin_total_length[idx]+=lw # bin_total_length_sq[idx] += lw*lw # bin_total_length[tri_bin_idx] += length_weighted if norm==True: return alt_bins, bin_total_src/d_alt, bin_total_length/d_alt else: return alt_bins, bin_total_src, bin_total_length
def runningStatistic(x, y, statistic='mean', binNumber=10, **kwargs): """ Calculates the value given by statistic in bins of x. Useful for plotting a running mean value for a scatter plot, for example. This function allows the computation of the sum, mean, median, std, or other statistic of the values within each bin. NOTE: if the statistic is a callable function and there are empty data bins those bins will be skipped to keep the function from falling over. @type x: numpy array @param x: data over which the bins are calculated @type y: numpy array @param y: values for corresponding x values @type statistic: string or function @param statistic: The statistic to compute (default is 'mean'). Acceptable values are 'mean', 'median', 'sum', 'std', and callable function. Extra arguements are passed as kwargs. @type binNumber: int @param binNumber: The desired number of bins for the x data. @rtype: tuple @return: A tuple of two lists containing the left bin edges and the value of the statistic in each of the bins. """ if type(statistic) == str: if statistic not in ['mean', 'median', 'sum', 'std']: raise ValueError('unrecognized statistic "%s"' % statistic) elif isinstance(statistic, collections.Callable): pass else: raise ValueError("statistic not understood") if not isinstance(x, numpy.ndarray): x = numpy.asarray(x) if not isinstance(y, numpy.ndarray): y = numpy.asarray(y) try: bins = numpy.linspace(x.min(), x.max(), binNumber) centers = (bins[:-1] + bins[1:]) / 2. index = numpy.digitize(x, bins) except TypeError: bins = binNumber centers = (bins[:-1] + bins[1:]) / 2. index = numpy.digitize(x, binNumber) binNumber = len(binNumber) if statistic == 'mean': running = [numpy.mean(y[index == k]) for k in range(1, binNumber)] elif statistic == 'median': running = [numpy.median(y[index == k]) for k in range(1, binNumber)] elif statistic == 'sum': running = [numpy.sum(y[index == k]) for k in range(1, binNumber)] elif statistic == 'std': running = [numpy.std(y[index == k]) for k in range(1, binNumber)] elif isinstance(statistic, collections.Callable): running = [statistic(y[index == k], **kwargs) for k in range(1, binNumber) if not len(y[index == k]) == 0] return centers, running
def plot_2Dhist_medians(x, y, z, xlabel=None, ylabel=None, cblabel=None, ranges=[[-0.007, 0.002],[-0.014, 0.005]], vmin=0.0, vmax=10.0, filename=None): xedges = np.linspace(ranges[0][0], ranges[0][1], 51) # these numbers chosen to get 50 bins in final plot yedges = np.linspace(ranges[1][0], ranges[1][1], 51) xbins = np.digitize(x, xedges) # values falling below min(xedges) assigned 0; values above max(xedges) assigned 51 ybins = np.digitize(y, yedges) medians = np.zeros((50,50)) for i in range(50): for j in range(50): medians[i,j] = np.nanmedian(z[(xbins == i+1) * (ybins == j+1)]) fig, ax = plt.subplots(figsize=(6.5, 5)) plt.gcf().subplots_adjust(bottom=0.15) plt.imshow(medians.T, origin='lower', aspect='auto', interpolation='nearest', cmap=plt.cm.viridis, vmin=vmin, vmax=vmax, extent=(ranges[0][0], ranges[0][1], ranges[1][0], ranges[1][1])) if xlabel: plt.xlabel(xlabel) if ylabel: plt.ylabel(ylabel) cb = plt.colorbar() if cblabel: cb.set_label(cblabel) plt.draw() plt.tight_layout() if filename: plt.savefig(filename)
def shuffle_centrals(mock, centrals, bins, shuffle_props, mock_prop='mvir', use_log=True): """ Shuffle central galaxies amongst haloes. Only one central per halo is allowed. """ shuffled_mock = np.copy(mock) central_inds = np.where(centrals==True)[0] if use_log==True: inds = np.digitize(np.log10(mock[mock_prop][centrals]), bins=bins) else: inds = np.digitize(mock[mock_prop][centrals], bins=bins) for i in range(0,len(bins)-1): inds_in_bin = (inds==i+1) inds_in_bin = central_inds[inds_in_bin] shufled_inds_in_bin = np.random.permutation(inds_in_bin) for prop in shuffle_props: shuffled_mock[prop][shufled_inds_in_bin] = mock[prop][inds_in_bin] return shuffled_mock
def binder(positions, orientations, bl, m=4, method='ball', margin=0): """ Calculate the binder cumulant for a frame, given positions and orientations. bl: the binder length scale, such that B(bl) = 1 - .333 * S4 / S2^2 where SN are <phibl^N> averaged over each block/cluster of size bl in frame. """ if margin: if margin < ss: margin *= ss center = 0.5*(positions.max(0) + positions.min(0)) dmask = d < d.max() - margin positions = positions[dmask] orientations = orientations[dmask] if 'neigh' in method or 'ball' in method: tree = cKDTree(positions) balls = tree.query_ball_tree(tree, bl) balls, ball_mask = helpy.pad_uneven(balls, 0, True, int) ball_orient = orientations[balls] ball_orient[~ball_mask] = np.nan phis = np.nanmean(np.exp(m*ball_orient*1j), 1) phi2 = np.dot(phis, phis) / len(phis) phiphi = phis*phis phi4 = np.dot(phiphi, phiphi) / len(phiphi) return 1 - phi4 / (3*phi2*phi2) else: raise ValueError, "method {} not implemented".format(method) #elif method=='block': left, right, bottom, top = (positions[:,0].min(), positions[:,0].max(), positions[:,1].min(), positions[:,1].max()) xbins, ybins = np.arange(left, right + bl, bl), np.arange(bottom, top + bl, bl) blocks = np.rollaxis(np.indices((xbins.size, ybins.size)), 0, 3) block_ind = np.column_stack([ np.digitize(positions[:,0], xbins), np.digitize(positions[:,1], ybins)])
def relPolarCoordAverageMap(relPolMeanPlt, distEdges, angleEdges, valuesToMap, objDistance, gamma, colorMap, useMean, maxValue, xlab, ylab): # bin valuesToMap by objectDistance value digitizedDist = np.digitize(objDistance, distEdges) # bin valuesToMap by objectDistance value digitizedAngle = np.digitize(gamma, angleEdges) meanVals = 1.0*np.zeros((len(angleEdges), len(distEdges))) for distBin in range(1, 1+len(distEdges)): for angleBin in range(1, 1+len(angleEdges)): sltPts = np.logical_and(digitizedDist == distBin, digitizedAngle == angleBin) if sum(sltPts) > 0: if useMean: meanVals[angleBin-1, distBin-1, ] = np.mean(valuesToMap[sltPts]) else: # use median meanVals[angleBin-1, distBin-1, ] = np.median(valuesToMap[sltPts]) pc = relPolMeanPlt.pcolormesh(distEdges, angleEdges, meanVals, cmap=colorMap, vmin=-maxValue, vmax=maxValue) relPolMeanPlt.set_xlim(min(distEdges), max(distEdges)) relPolMeanPlt.set_ylim(min(angleEdges), max(angleEdges)) relPolMeanPlt.set_xlabel(xlab) relPolMeanPlt.set_ylabel(ylab) return relPolMeanPlt, meanVals, pc
def indices(self,xyz) : """ Returns the grid coordinates for a set of Cartesian coordinates """ xidx = np.digitize(xyz[:,0],self.edgesx) yidx = np.digitize(xyz[:,1],self.edgesy) return np.array([xidx,yidx])
def __getitem__(self, key): """ Implements slicing or indexing of the Histogram """ if key is (): return self # May no longer be necessary if isinstance(key, tuple) and len(key) > self.ndims: raise Exception("Slice must match number of key dimensions.") centers = [(float(l)+r)/2 for (l,r) in zip(self.edges, self.edges[1:])] if isinstance(key, slice): start, stop = key.start, key.stop if [start, stop] == [None,None]: return self start_idx, stop_idx = None,None if start is not None: start_idx = np.digitize([start], centers, right=True)[0] if stop is not None: stop_idx = np.digitize([stop], centers, right=True)[0] slice_end = stop_idx+1 if stop_idx is not None else None slice_values = self.values[start_idx:stop_idx] slice_edges = self.edges[start_idx: slice_end] extents = (min(slice_edges), self.extents[1], max(slice_edges), self.extents[3]) return self.clone((slice_values, slice_edges), extents=extents) else: if not (self.edges.min() <= key < self.edges.max()): raise Exception("Key value %s is out of the histogram bounds" % key) idx = np.digitize([key], self.edges)[0] return self.values[idx-1 if idx>0 else idx]
def take2D(histogram, x, y, bins_x, bins_y): """ Take the value from a two-dimensional histogram from the bin corresponding to (x, y). Parameters: ----------- histogram : The values in the histogram (n,m) (ADW: is this ordering right?) x : the x-value to take from the hist y : the y-value to take from the hist bins_x : the xbin edges, including upper edge (n-dim) bins_y : the ybin edges, including upper edge (m-dim) """ histogram = np.array(histogram) if np.isscalar(x): x = [x] if np.isscalar(y): y = [y] bins_x[-1] += 1.e-10 * (bins_x[-1] - bins_x[-2]) # Numerical stability bins_y[-1] += 1.e-10 * (bins_y[-1] - bins_y[-2]) #return np.take(histogram, (histogram.shape[1] * (np.digitize(y, bins_y) - 1)) + (np.digitize(x, bins_x) - 1)) # Return np.nan for entries which are outside the binning range on either axis index = (histogram.shape[1] * (np.digitize(y, bins_y) - 1)) + (np.digitize(x, bins_x) - 1) index_clipped = np.clip(index, 0, (histogram.shape[0] * histogram.shape[1]) - 1) val = np.take(histogram, index_clipped) outlier_x = np.logical_or(x < bins_x[0], x > bins_x[-1]) outlier_y = np.logical_or(y < bins_y[0], y > bins_y[-1]) outlier = np.logical_or(outlier_x, outlier_y) val[outlier] = np.nan return val
def _get_rejrej_array(flat_eff, flat_x, flat_y, x_range=None, y_range=None): indices = np.nonzero((flat_eff > 0.005) & np.isfinite(flat_x) & np.isfinite(flat_y)) used_x = np.log10(flat_x[indices]) used_y = np.log10(flat_y[indices]) used_eff = flat_eff[indices] if not x_range: # allow 1% safety margin on max value max_x = _max_noninf(used_x) * 1.0001 min_x = np.min(used_x) else: min_x, max_x = x_range if not y_range: max_y = _max_noninf(used_y) * 1.0001 min_y = np.min(used_y) else: min_y, max_y = y_range n_out_bins = 100 x_bin_values = np.linspace(min_x, max_x, n_out_bins) x_bins = np.digitize(used_x, bins=x_bin_values) - 1 # no underflow y_bin_values = np.linspace(min_y, max_y, n_out_bins) y_bins = np.digitize(used_y, bins=y_bin_values) - 1 # no underflow make_eff_array = _loop_over_entries # the other method seems slower eff_array = make_eff_array(x_bins, y_bins, used_eff, n_out_bins) return eff_array, (min_x, max_x), (min_y, max_y)
def place(self, sig, bg_x, bg_y, cut_1_range, cut_2_range): """ calculates x,y,z coordinates (rej x, rej y, eff) NOTE: make sure the eff, rej_x, rej_y arrays are integrated """ assert bg_x.shape == bg_y.shape npts_1, npts_2 = bg_x.shape c1_bin_bounds = np.linspace(*cut_1_range, num=(npts_1 + 1)) c1_bin = np.digitize([self._cut_1], c1_bin_bounds) - 1 c2_bin_bounds = np.linspace(*cut_2_range, num=(npts_2 + 1)) c2_bin = np.digitize([self._cut_2], c2_bin_bounds) - 1 if any(b < 0 for b in [c1_bin, c2_bin]): raise ValueError("can't put a cut in the underflow bin") eff = float(sig[c1_bin, c2_bin] / sig.max()) def get_rej(bkg_array): array_val = bkg_array.max() / bkg_array[c1_bin, c2_bin] return float(array_val) rej_x, rej_y = [get_rej(ar) for ar in [bg_x, bg_y]] self._xyz = rej_x, rej_y, eff self._cut_ranges = (cut_1_range, cut_2_range)
def sim_make_residual_images(rmcal,binX=32,binY=32): xBins = np.arange(0,nX+1,binX) yBins = np.arange(0,nY+1,binY) median_a_offset = 0 dmag = [] for i,obj in enumerate(rmcal): mag,err = rmcal.get_object_phot(obj) dmag.append(obj.refMag - (mag - median_a_offset)) dmag = np.concatenate(dmag) xy = np.hstack( [ [rmcal.objs[i].xpos,rmcal.objs[i].ypos] for i in range(rmcal.num_objects()) ] ) # XXX hack that last index in a_indices is ccdNum ccds = np.concatenate( [ rmcal.objs[i].a_indices[-1] for i in range(rmcal.num_objects()) ] ) ffmaps = [] for ccdNum in range(4): ffmap = [[[] for xi in xBins] for yi in yBins] ii = np.where(ccds==ccdNum)[0] for xi,yi,dm in zip(np.digitize(xy[0,ii],xBins), np.digitize(xy[1,ii],yBins), dmag[ii]): ffmap[yi][xi].append(dm) for xi in range(len(xBins)): for yi in range(len(yBins)): if len(ffmap[yi][xi])==0: ffmap[yi][xi] = np.nan else: ffmap[yi][xi] = np.median(ffmap[yi][xi]) ffmaps.append(np.array(ffmap)) return np.array(ffmaps)
def updateViz(self): if self.gridRadiusViz == 0: vals=[] for name in self.vizObjectNames: r = moose.element(name+self.moosepath) d = float(r.getField(self.variable)) vals.append(d) inds = digitize(vals,self.stepVals) for i in range(0,len(self.vizObjects)): self.vizObjects[i].r,self.vizObjects[i].g,self.vizObjects[i].b=self.colorMap[inds[i]-1] else: vals=[] vals_2=[] for name in self.vizObjectNames: r=mc.pathToId(name+self.moosepath) d=float(mc.getField(r,self.variable)) r2=mc.pathToId(name+self.moosepath_2) d2=float(mc.getField(r2,self.variable_2)) vals.append(d) vals_2.append(d2) inds = digitize(vals,self.stepVals) inds_2 = digitize(vals_2,self.stepVals_2) for i in range(0,len(self.vizObjects)): self.vizObjects[i].r,self.vizObjects[i].g,self.vizObjects[i].b=self.colorMap[inds[i]-1] self.vizObjects[i].radius=self.indRadius[inds_2[i]-1] self.updateGL()
def get_line_histos(results, temp, image, axis=0, bins=None): """This function creates an ADU histogram per each pixel in the direction defined by the axis parameter. """ if image is None: temp["current_entry"] += 1 return results, temp if bins is None: bins = np.arange(-100, 1000, 5) for i in range(image.shape[axis]): if axis == 0: t_histo = np.bincount(np.digitize(image[i, :].flatten(), bins[1:-1]), minlength=len(bins) - 1) elif axis == 1: t_histo = np.bincount(np.digitize(image[:, i].flatten(), bins[1:-1]), minlength=len(bins) - 1) if temp["current_entry"] == 0 and i == 0: results["histo_counts_line"] = np.empty([image.shape[axis], t_histo.shape[0]], dtype=image.dtype) if temp["current_entry"] == 0: results["histo_counts_line"][i] = t_histo else: results["histo_counts_line"][i] += t_histo temp["current_entry"] += 1 return results, temp
def Mars_Year_np(j2k_np, jday_vals, year_vals, year_length, return_length=False): jday_vals = np.array(jday_vals) year_vals = np.array(year_vals) year_length = np.array(year_length) if j2k_np < jday_vals[0]: return np.floor(1+(j2k_np-jday_vals[0])/year_length[0]) elif j2k_np >= jday_vals[-1]: return np.floor(1+(j2k_np-jday_vals[-1])/year_length[-1]) else: try: v=np.clip(np.digitize(j2k_np,jday_vals),1,jday_vals.size)-1 y = year_vals[v] l = year_length[v] except: v=np.clip(np.digitize([j2k_np],jday_vals),1,jday_vals.size)-1 y = year_vals[v][0] l = year_length[v][0] if return_length: return (y*1.0,l) else: return y*1.0
def generate_biomes(data_path): if os.path.isfile(data_path + "biomes.pkl"): return moisture = pickle.load(open(data_path+"moisture.pkl", 'rb')) moisture = imresize(moisture, (IMAGE_HEIGHT, IMAGE_WIDTH)) plt.imshow(moisture) plt.show() moisture = np.digitize(moisture, [0, 100, 170, 230, 255])-1 moisture[moisture > 4] = 4 plt.imshow(moisture) plt.show() temp = pickle.load(open(data_path+"temperature.pkl", 'rb')) temp = imresize(temp, (IMAGE_HEIGHT, IMAGE_WIDTH)) plt.imshow(temp) plt.show() temp = np.digitize(temp, [0, 90, 130, 255])-1 temp[temp > 2] = 2 plt.imshow(temp) plt.show() biomes = [ [BARE, TUNDRA, TAIGA, SNOW, OCEAN], [GRASSLAND, WOODLAND, TEMPERATE_FOREST, TEMPERATE_RAINFOREST, OCEAN], [DESERT, SAVANNAH, TROPICAL_SEASONAL_FOREST, TROPICAL_RAINFOREST, OCEAN] ] img = np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH)) for i in range(IMAGE_HEIGHT): for j in range(IMAGE_WIDTH): img[i,j] = biomes[temp[i,j]][moisture[i,j]] elevation = pickle.load(open(data_path+"elevation.pkl", 'rb')) img[elevation == 0] = OCEAN plt.imshow(img) plt.show() pickle.dump(img, open(data_path+"biomes.pkl", 'wb'))
# # Plot the distribution # fig = plt.figure(figsize=(12,8)) ax = fig.add_subplot(111) for cl in classes: print "%s: %s"%(cl, np.mean(neighbourhoods_content[cl])) ## Bin the data cs = np.array(neighbourhoods_content[cl]) N_bins = 11 bins = np.linspace(min(cs), max(cs), N_bins) l_bin = (max(cs)-min(cs)) / N_bins m = np.mean(cs) s = np.std(cs) digitized = np.digitize(cs, bins) cs_mean = [cs[digitized == i].mean() for i in range(1, len(bins))] cs_counts = [0 for i in range(len(bins))] for d in digitized: cs_counts[d-1] += 1/(len(cs)*l_bin) # Clean the NaN values out cs_clean, counts_clean = zip(* filter( lambda x: not np.isnan(x[0]), zip(cs_mean,cs_counts[1:]) )) ax.plot(cs_clean, counts_clean, 'k-', color=colours[cl], lw=3, label=r"$%s$"%cl) ax.set_xlabel(r'$\frac{H_\alpha^n}{H_\alpha}$', fontsize=30) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_position(('outward', 10)) # outward by 10 points ax.spines['bottom'].set_position(('outward', 10)) # outward by 10 points
def _digitize_1d(X, bins, n_samples, n_timestamps): X_digit = np.empty((n_samples, n_timestamps)) for i in prange(n_samples): X_digit[i] = np.digitize(X[i], bins, right=True) return X_digit
def main(): args = get_parser().parse_args() # -- debugging option if args.debug: logging.basicConfig(level=logging.DEBUG) file_time = check_neon_time() # -- specify site from which to extract data site_name = args.site_name # -- Look for surface data surf_dir = args.surf_dir surf_file = find_surffile(surf_dir, site_name) # -- directory structure current_dir = os.getcwd() parent_dir = os.path.dirname(current_dir) clone_dir = os.path.abspath(os.path.join(__file__, "../../..")) neon_dir = os.path.join(clone_dir, "neon_surffiles") print("Present Directory", current_dir) # -- download neon data if needed neon_file = get_neon(neon_dir, site_name) # -- Read neon data df = pd.read_csv(neon_file) # -- Read surface dataset files print("surf_file:", surf_file) f1 = xr.open_dataset(surf_file) # -- Find surface dataset soil depth information soil_bot, soil_top = find_soil_structure(surf_file) # -- Find surface dataset soil levels # TODO: how? NS uses metadata on file to find # soil strucure # better suggestion by WW to write dzsoi to neon surface dataset # This todo needs to go to the subset_data # TODO Will: if I sum them up , are they 3.5? (m) YES print("soil_top:", soil_top) print("soil_bot:", soil_bot) print("Sum of soil top depths :", sum(soil_top)) print("Sum of soil bottom depths :", sum(soil_bot)) soil_top = np.cumsum(soil_top) soil_bot = np.cumsum(soil_bot) soil_mid = 0.5 * (soil_bot - soil_top) + soil_top # print ("Cumulative sum of soil bottom depths :", sum(soil_bot)) obs_top = df["biogeoTopDepth"] / 100 obs_bot = df["biogeoBottomDepth"] / 100 # -- Mapping surface dataset and neon soil levels bins = df["biogeoTopDepth"] / 100 bin_index = np.digitize(soil_mid, bins) - 1 """ print ("================================") print (" Neon data soil structure: ") print ("================================") print ("------------","ground","------------") for i in range(len(obs_bot)): print ("layer",i) print ("-------------", "{0:.2f}".format(obs_bot[i]), "-------------") print ("================================") print ("Surface data soil structure: ") print ("================================") print ("------------","ground","------------") for b in range(len(bin_index)): print ("layer",b) print ("-------------", "{0:.2f}".format(soil_bot[b]), "-------------") """ # -- update fields with neon f2 = f1 soil_levels = f2["PCT_CLAY"].size for soil_lev in range(soil_levels): print("--------------------------") print("soil_lev:", soil_lev) print(df["clayTotal"][bin_index[soil_lev]]) f2["PCT_CLAY"][soil_lev] = df["clayTotal"][bin_index[soil_lev]] f2["PCT_SAND"][soil_lev] = df["sandTotal"][bin_index[soil_lev]] bulk_den = df["bulkDensExclCoarseFrag"][bin_index[soil_lev]] carbon_tot = df["carbonTot"][bin_index[soil_lev]] estimated_oc = df["estimatedOC"][bin_index[soil_lev]] # -- estimated_oc in neon data is rounded to the nearest integer. # -- Check to make sure the rounded oc is not higher than carbon_tot. # -- Use carbon_tot if estimated_oc is bigger than carbon_tot. if estimated_oc > carbon_tot: estimated_oc = carbon_tot layer_depth = (df["biogeoBottomDepth"][bin_index[soil_lev]] - df["biogeoTopDepth"][bin_index[soil_lev]]) # f2["ORGANIC"][soil_lev] = estimated_oc * bulk_den / 0.58 # -- after adding caco3 by NEON: # -- if caco3 exists: # -- inorganic = caco3/100.0869*12.0107 # -- organic = carbon_tot - inorganic # -- else: # -- oranigc = estimated_oc * bulk_den /0.58 caco3 = df["caco3Conc"][bin_index[soil_lev]] inorganic = caco3 / 100.0869 * 12.0107 print("inorganic:", inorganic) if not np.isnan(inorganic): actual_oc = carbon_tot - inorganic else: actual_oc = estimated_oc f2["ORGANIC"][soil_lev] = actual_oc * bulk_den / 0.58 print("~~~~~~~~~~~~~~~~~~~~~~~~") print("inorganic:") print("~~~~~~~~~~~~~~~~~~~~~~~~") print(inorganic) print("~~~~~~~~~~~~~~~~~~~~~~~~") print("bin_index : ", bin_index[soil_lev]) print("layer_depth : ", layer_depth) print("carbon_tot : ", carbon_tot) print("estimated_oc : ", estimated_oc) print("bulk_den : ", bulk_den) print("organic :", f2["ORGANIC"][soil_lev].values) print("--------------------------") # -- Interpolate missing values method = "linear" fill_interpolate(f2, "PCT_CLAY", method) fill_interpolate(f2, "PCT_SAND", method) fill_interpolate(f2, "ORGANIC", method) # -- Update zbedrock if neon observation does not make it down to 2m depth rock_thresh = 2 zb_flag = False if obs_bot.iloc[-1] < rock_thresh: print("zbedrock is updated.") f2["zbedrock"].values[:, :] = obs_bot.iloc[-1] zb_flag = True sort_print_soil_layers(obs_bot, soil_bot) # -- updates for ag sites : KONA and STER ag_sites = ["KONA", "STER"] if site_name in ag_sites: print("Updating PCT_NATVEG") print("Original : ", f2.PCT_NATVEG.values) f2.PCT_NATVEG.values = [[0.0]] print("Updated : ", f2.PCT_NATVEG.values) print("Updating PCT_CROP") print("Original : ", f2.PCT_CROP.values) f2.PCT_CROP.values = [[100.0]] print("Updated : ", f2.PCT_CROP.values) print("Updating PCT_NAT_PFT") #print (f2.PCT_NAT_PFT) print(f2.PCT_NAT_PFT.values[0]) f2.PCT_NAT_PFT.values[0] = [[100.0]] print(f2.PCT_NAT_PFT[0].values) out_dir = args.out_dir # -- make out_dir if it does not exist if not os.path.exists(out_dir): os.makedirs(out_dir) # -- update time tag for the output file wfile = out_dir + update_time_tag(surf_file) # -- update netcdf metadata f2 = update_metadata(f2, surf_file, neon_file, zb_flag) print(f2.attrs) f2.to_netcdf(path=wfile, mode="w", format="NETCDF3_64BIT") print("Successfully updated surface data file for neon site(" + site_name + "):\n - " + wfile)
def to_bin(value, bins): return np.digitize(x=[value], bins=bins)[0]
model = Model(inputs=inputs, outputs=proba) model.compile(loss='binary_crossentropy', optimizer=optimizer) return model with open('/home/alex/diplom/constants/buckets_info.pkl', 'rb') as f: mapping_seq_len_to_padded_len = pickle.load(f) with open('/home/alex/diplom/constants/dense_features_buckets.pkl', 'rb') as f: dense_features_buckets = pickle.load(f) df = pd.read_csv(sys.argv[1]) for dense_col in ['amnt', 'days_before', 'hour_diff']: df[dense_col] = np.digitize(df[dense_col], bins=dense_features_buckets[dense_col]) seq = transform_transactions_to_sequences(df) seq['sequence_length'] = seq.sequences.apply(lambda x: len(x[1])) seq['product'] = 1 x = create_padded_buckets(seq, mapping_seq_len_to_padded_len, save_to_file_path=None, has_target=False) embedding_projections = { 'currency': (11, 6), 'operation_kind': (7, 5), 'card_type': (175, 29), 'operation_type': (22, 9),
def get_state(observation): pos, vel = observation pos_bin = int(np.digitize(pos, pos_space)) vel_bin = int(np.digitize(vel, vel_space)) return (pos_bin, vel_bin)
def init_2D(cls, mesh1: Mesh2D, mesh2: Mesh2D, boundary1: ndarray, boundary2: ndarray, tangent: ndarray): """Create mortar mappings for two 2D meshes via projection. Parameters ---------- mesh1 An object of the type :class:`~skfem.mesh.mesh_2d.Mesh2D`. mesh2 An object of the type :class:`~skfem.mesh.mesh_2d.Mesh2D`. boundary1 A subset of facets to use from mesh1. boundary2 A subset of facets to use from mesh2. tangent A tangent vector defining the direction of the projection. """ from ..mesh import MeshLine tangent /= np.linalg.norm(tangent) # find unique nodes on the two boundaries p1_ix = np.unique(mesh1.facets[:, boundary1].flatten()) p2_ix = np.unique(mesh2.facets[:, boundary2].flatten()) p1 = mesh1.p[:, p1_ix] p2 = mesh2.p[:, p2_ix] def proj(p): """Project onto the line defined by 'tangent'.""" return np.outer(tangent, tangent) @ p def param(p): """Calculate signed distances of projected points from origin.""" y = proj(p) return np.linalg.norm(y, axis=0) * np.sign(np.dot(tangent, y)) # find unique supermesh facets by combining nodes from both sides param_p1 = param(p1) param_p2 = param(p2) _, ix = np.unique(np.concatenate((param_p1, param_p2)), return_index=True) ixorig = np.concatenate((p1_ix, p2_ix + mesh1.p.shape[1]))[ix] p = np.array([np.hstack((param(mesh1.p), param(mesh2.p)))]) t = np.array([ixorig[:-1], ixorig[1:]]) # create 1-dimensional supermesh from the intersections of the # projected facet elements p = p[:, np.concatenate((t[0], np.array([t[1, -1]])))] range_max = np.min([np.max(param_p1), np.max(param_p2)]) range_min = np.max([np.min(param_p1), np.min(param_p2)]) p = np.array([p[0, (p[0] <= range_max) * (p[0] >= range_min)]]) t = np.array([np.arange(p.shape[1] - 1), np.arange(1, p.shape[1])]) m_super = MeshLine(p, t) # helper meshes for creating the mappings m1 = MeshLine(np.sort(param_p1), np.array([np.arange(p1.shape[1] - 1), np.arange(1, p1.shape[1])])) m2 = MeshLine(np.sort(param_p2), np.array([np.arange(p2.shape[1] - 1), np.arange(1, p2.shape[1])])) # construct normals by rotating 'tangent' normal = np.array([tangent[1], -tangent[0]]) normals = normal[:, None].repeat(t.shape[1], axis=1) # initialize mappings (for orienting) map_super = m_super._mapping() map_m1 = m1._mapping() map_m2 = m2._mapping() map_mesh1 = mesh1._mapping() map_mesh2 = mesh2._mapping() # matching of elements in the supermesh and the helper meshes mps = map_super.F(np.array([[.5]])) ix1 = np.digitize(mps[0, :, 0], m1.p[0]) - 1 ix2 = np.digitize(mps[0, :, 0], m2.p[0]) - 1 # for each element, map two points to global coordinates, reparametrize # the points, and flip corresponding helper mesh element indices if # sorting is wrong f1mps = .5 * (mesh1.p[:, mesh1.facets[0, boundary1]] + mesh1.p[:, mesh1.facets[1, boundary1]]) sort_boundary1 = np.argsort(param(f1mps)) z1 = map_mesh1.G(map_m1.invF(map_super.F(np.array([[.25, .75]])), tind=ix1), find=boundary1[sort_boundary1][ix1]) ix1_flip = np.unique(ix1[param(z1[:, :, 1]) < param(z1[:, :, 0])]) m1t = m1.t.copy() m1t[:, ix1_flip] = np.flipud(m1t[:, ix1_flip]) m1 = replace(m1, t=m1t) f2mps = .5 * (mesh2.p[:, mesh2.facets[0, boundary2]] + mesh2.p[:, mesh2.facets[1, boundary2]]) sort_boundary2 = np.argsort(param(f2mps)) z2 = map_mesh2.G(map_m2.invF(map_super.F(np.array([[.25, .75]])), tind=ix2), find=boundary2[sort_boundary2][ix2]) ix2_flip = np.unique(ix2[param(z2[:, :, 1]) < param(z2[:, :, 0])]) m2t = m2.t.copy() m2t[:, ix2_flip] = np.flipud(m2t[:, ix2_flip]) m2 = replace(m2, t=m2t) # construct normals by rotating 'tangent' normal = np.array([tangent[1], -tangent[0]]) normals = normal[:, None].repeat(t.shape[1], axis=1) # initialize mappings (for orienting) map_super = m_super._mapping() map_m1 = m1._mapping() map_m2 = m2._mapping() map_mesh1 = mesh1._mapping() map_mesh2 = mesh2._mapping() # matching of elements in the supermesh and the helper meshes mps = map_super.F(np.array([[.5]])) ix1 = np.digitize(mps[0, :, 0], m1.p[0]) - 1 ix2 = np.digitize(mps[0, :, 0], m2.p[0]) - 1 return cls((map_mesh1, map_mesh2), (boundary1[sort_boundary1][ix1], boundary2[sort_boundary2][ix2]), (map_m1, map_m2), (ix1, ix2), map_super, normals)
def get_energy_dispersion(self, position, e_reco, migra_step=5e-3): """Get energy dispersion at a given position. Parameters ---------- position : `~astropy.coordinates.SkyCoord` the target position. Should be a single coordinates e_reco : `~astropy.units.Quantity` Reconstructed energy axis binning migra_step : float Integration step in migration Returns ------- edisp : `~gammapy.irf.EnergyDispersion` the energy dispersion (i.e. rmf object) """ # TODO: reduce code duplication with EnergyDispersion2D.get_response if position.size != 1: raise ValueError( "EnergyDispersion can be extracted at one single position only." ) # axes ordering fixed. Could be changed. pix_ener = np.arange(self.edisp_map.geom.axes[1].nbin) # Define a vector of migration with mig_step step mrec_min = self.edisp_map.geom.axes[0].edges[0] mrec_max = self.edisp_map.geom.axes[0].edges[-1] mig_array = np.arange(mrec_min, mrec_max, migra_step) pix_migra = (mig_array - mrec_min) / mrec_max * self.edisp_map.geom.axes[0].nbin # Convert position to pixels pix_lon, pix_lat = self.edisp_map.geom.to_image().coord_to_pix(position) # Build the pixels tuple pix = np.meshgrid(pix_lon, pix_lat, pix_migra, pix_ener) # Interpolate in the EDisp map. Squeeze to remove dimensions of length 1 edisp_values = np.squeeze( self.edisp_map.interp_by_pix(pix) * u.Unit(self.edisp_map.unit) # * migra_step ) e_trues = self.edisp_map.geom.axes[1].center data = [] for i, e_true in enumerate(e_trues): # We now perform integration over migra # The code is adapted from `~gammapy.EnergyDispersion2D.get_response` # migration value of e_reco bounds migra_e_reco = e_reco / e_true # Compute normalized cumulative sum to prepare integration tmp = np.nan_to_num( np.cumsum(edisp_values[:, i]) / np.sum(edisp_values[:, i]) ) # Determine positions (bin indices) of e_reco bounds in migration array pos_mig = np.digitize(migra_e_reco, mig_array) - 1 # We ensure that no negative values are found pos_mig = np.maximum(pos_mig, 0) # We compute the difference between 2 successive bounds in e_reco # to get integral over reco energy bin integral = np.diff(tmp[pos_mig]) data.append(integral) data = np.asarray(data) # EnergyDispersion uses edges of true energy bins e_true_edges = self.edisp_map.geom.axes[1].edges e_lo, e_hi = e_true_edges[:-1], e_true_edges[1:] ereco_lo, ereco_hi = (e_reco[:-1], e_reco[1:]) return EnergyDispersion( e_true_lo=e_lo, e_true_hi=e_hi, e_reco_lo=ereco_lo, e_reco_hi=ereco_hi, data=data, )
def select(cls, dataset, selection_mask=None, **selection): if selection_mask is not None: raise ValueError( "Masked selections currently not supported for {0}.".format( cls.__name__)) dimensions = dataset.kdims val_dims = [vdim for vdim in dataset.vdims if vdim in selection] if val_dims: raise IndexError( 'Cannot slice value dimensions in compressed format, ' 'convert to expanded format before slicing.') indexed = cls.indexed(dataset, selection) full_selection = [(d, selection.get(d.name, selection.get(d.label))) for d in dimensions] data = {} value_select = [] for i, (dim, ind) in enumerate(full_selection): irregular = cls.irregular(dataset, dim) values = cls.coords(dataset, dim, irregular) mask = cls.key_select_mask(dataset, values, ind) if irregular: if np.isscalar(ind) or isinstance(ind, (set, list)): raise IndexError( "Indexing not supported for irregularly " "sampled data. %s value along %s dimension." "must be a slice or 2D boolean mask." % (ind, dim)) mask = mask.max(axis=i) elif dataset._binned: edges = cls.coords(dataset, dim, False, edges=True) inds = np.argwhere(mask) if np.isscalar(ind): emin, emax = edges.min(), edges.max() if ind < emin: raise IndexError("Index %s less than lower bound " "of %s for %s dimension." % (ind, emin, dim)) elif ind >= emax: raise IndexError( "Index %s more than or equal to upper bound " "of %s for %s dimension." % (ind, emax, dim)) idx = max([np.digitize([ind], edges)[0] - 1, 0]) mask = np.zeros(len(values), dtype=np.bool) mask[idx] = True values = edges[idx:idx + 2] elif len(inds): values = edges[inds.min():inds.max() + 2] else: values = edges[0:0] else: values = values[mask] values, mask = np.asarray(values), np.asarray(mask) value_select.append(mask) data[dim.name] = np.array([values ]) if np.isscalar(values) else values int_inds = [np.argwhere(v) for v in value_select][::-1] index = np.ix_(*[ np.atleast_1d(np.squeeze(ind)) if ind.ndim > 1 else np. atleast_1d(ind) for ind in int_inds ]) for kdim in dataset.kdims: if cls.irregular(dataset, dim): da = dask_array_module() if da and isinstance(dataset.data[kdim.name], da.Array): data[kdim.name] = dataset.data[kdim.name].vindex[index] else: data[kdim.name] = np.asarray(data[kdim.name])[index] for vdim in dataset.vdims: da = dask_array_module() if da and isinstance(dataset.data[vdim.name], da.Array): data[vdim.name] = dataset.data[vdim.name].vindex[index] else: data[vdim.name] = np.asarray(dataset.data[vdim.name])[index] if indexed: if len(dataset.vdims) == 1: da = dask_array_module() arr = np.squeeze(data[dataset.vdims[0].name]) if da and isinstance(arr, da.Array): arr = arr.compute() return arr if np.isscalar(arr) else arr[()] else: return np.array( [np.squeeze(data[vd.name]) for vd in dataset.vdims]) return data
def _transform(self, c): return np.where(np.isnan(c), np.NaN, np.digitize(c, self.points))
def mapToCSR(mat: dict) -> Tuple[ndarray, ndarray, ndarray]: """ Given a pyoptsparse matrix definition, return a tuple containing a map of the matrix to the CSR format. Parameters ---------- mat : dict A sparse matrix representation. Returns ------- tup : tuple of numpy arrays tup[0] : numpy array (size=num_rows+1) An array that holds the indices in col_idx and data at which each row begins. The last index of contains the number of nonzero elements in the sparse array. tup[1] : numpy array (size=nnz) An array of the column indices of each element in data. tup[2] : numpy array (size=nnz) An indexing array which maps the elements in the data array to elements in the CSR data array. """ if "csr" in mat: # First handle the trivial case CSR->CSR row_p = mat["csr"][IROW] col_idx = mat["csr"][ICOL] idx_data = np.s_[:] return row_p, col_idx, idx_data num_rows = mat["shape"][0] num_cols = mat["shape"][1] if "csc" in mat: # If given a CSC matrix, expand the column pointers so we # effectively have a COO representation. csc_colp = mat["csr"][ICOL] rows = mat["csc"][IROW] nnz = csc_colp[-1] # Allocate the COO maps cols = np.zeros(nnz, dtype="intc") # We already have a full representation of the columns. # We need to decompress the representation of the rows. for j in range(num_cols): cols[csc_colp[j] : csc_colp[j + 1]] = j elif "coo" in mat: rows = mat["coo"][IROW] cols = mat["coo"][ICOL] nnz = len(rows) # Allocate the row pointer array row_p = np.zeros(num_rows + 1, dtype="intc") # Get the sort order that puts data in row-major form idx_data = np.lexsort((cols, rows)) # Apply the row-major indexing to the COO column and row indices col_idx = np.asarray(cols, dtype="intc")[idx_data] rows_rowmaj = np.asarray(rows, dtype="intc")[idx_data] # Now for i = 0 to num_rows-1, row_p[i] is the first occurrence # of i in rows_rowmaj row_p[:-1] = np.digitize(np.arange(num_rows), rows_rowmaj, right=True) # By convention store nnz in the last element of row_p row_p[-1] = nnz return row_p, col_idx, idx_data
def digitizeAmplitudesMono(y, bitdepth): bins = np.linspace(-1, 1, 2**bitdepth+1) y_digitized = bins[np.digitize(y, bins) - 1] return y_digitized, np.linspace(-1,1,2**bitdepth+1)
def feature_engineering_step1(_df): title_mapping = { 'Capt': 'Mr', 'Col': 'Mr', 'Don': 'Mr', 'Dr': 'Mr', 'Jonkheer': 'Mr', 'Lady': 'Mrs', 'Major': 'Mr', 'Master': 'Master', 'Miss': 'Miss', 'Mlle': 'Miss', 'Mme': 'Mrs', 'Mr': 'Mr', 'Mrs': 'Mrs', 'Ms': 'Miss', 'Rev': 'Mr', 'Sir': 'Mr', 'the Countess': 'Mrs' } title_age_mapping = { 'Capt': 'elder', 'Col': 'elder', 'Don': 'adult', 'Dr': 'adult', 'Jonkheer': 'adult', 'Lady': 'elder', 'Major': 'elder', 'Master': 'young', 'Miss': 'young', 'Mlle': 'young', 'Mme': 'adult', 'Mr': 'adult', 'Mrs': 'adult', 'Ms': 'adult', 'Rev': 'adult', 'Sir': 'elder', 'the Countess': 'adult' } cabin_mapping = { 'A': 'M', 'B': 'G', 'C': 'M', 'D': 'G', 'E': 'G', 'F': 'G', 'G': 'M', 'T': 'X', 'X': 'X' } _df['Sex_'] = _df['Sex'].apply(lambda x: 1 if x=='female' else 0) _df['Title_'] = _df['Name'].apply(lambda x: x.replace('.',',').split(',')[1].strip()) _df['FamilyName'] = _df['Name'].apply(lambda x: x.replace('.',',').split(',')[0].strip()) #_df['Fare_'] = _df['Fare'].fillna(20) #_df['Fare_'] = _df['Fare_'].apply(lambda x: 40 if x > 40 else x) #### _df['Fare_'] = _df['Fare'] _df.loc[ (_df.Fare.isnull())&(_df.Pclass==1),'Fare_'] =np.median(_df[_df['Pclass'] == 1]['Fare'].dropna()) _df.loc[ (_df.Fare.isnull())&(_df.Pclass==2),'Fare_'] =np.median( _df[_df['Pclass'] == 2]['Fare'].dropna()) _df.loc[ (_df.Fare.isnull())&(_df.Pclass==3),'Fare_'] = np.median(_df[_df['Pclass'] == 3]['Fare'].dropna()) #### _df['Fare_'] = _df['Fare_'] / (1+_df['SibSp']+_df['Parch']) _df['HasFare'] = _df['Fare'].apply(lambda x: 0 if np.isnan(x) else 1) _df['Fare_b'] = np.digitize(_df['Fare_'], [0,5,10,20,30,40]) # Family Size _df['FamilySize'] = (_df['SibSp'] + _df['Parch']) _df['HasFamily'] = (_df['SibSp'] + _df['Parch']).map(lambda x: 0 if x == 0 else 1) # Age _df['HasAge'] = _df['Age'].apply(lambda x: 0 if np.isnan(x) else 1) _df['Age_s'] = _df['Age'].apply(age_to_s) # or #_df['Age_'] = _df["Age"].fillna(_df["Age"].mean()) # http://stackoverflow.com/questions/21050426/pandas-impute-nans # Title _df['Title_'] = _df['Name'].apply(lambda x: x.replace('.',',').split(',')[1].strip()) _df.loc[(_df['Title_'].isnull()) & (_df['Sex']=='female'),('Title_')] = 'Miss' _df.loc[(_df['Title_'].isnull()) & (_df['Sex']=='male' ), ('Title_')] = 'Master' _df['Title_s'] = _df['Title_'].map(title_mapping) _df['Title_Age_s'] = _df['Title_'].map(title_age_mapping) _df['Title_Age_s'] = _df['Title_Age_s'].fillna('adult') ## fill age NAN: _df.loc[_df['HasAge']==0, ('Age_s')]= _df[_df['HasAge']==0]['Title_Age_s'] # Cabin: _df['Cabin_'] = _df['Cabin'].apply(lambda x: 'X' if isinstance(x, float) else x[0]) _df['Cabin_s'] = _df['Cabin_'].map(cabin_mapping) # NaN is no problem for get_dummies # However let's try to keep it as a feature called X # Embarked: _df['Embarked_'] = _df['Embarked'].apply(lambda x: 'S' if isinstance(x, float) else x) df_return = _df.loc[:,('Age','Age_s','HasAge', 'Sex','Pclass','Fare_', 'Fare_b','Title_s', 'Title_Age_s','Embarked_','Cabin_s', 'HasFamily', 'SibSp','Parch','FamilySize','FamilyName')] return df_return
def get_contextual_similarity(candidate_dataset_id, kb_entry, mention_contexts, scispacy_parser, glove): """Computes contextual similarity scores between the candidate dataset description and the mention contexts using glove embeddings and cosine similarity. @param candidate_dataset_id: the id of the candidate dataset @param kb_entry: the knowledge base entry for the candidate dataset @param mention_contexts: a list of mention contexts to compute similarity over @param scispacy_parser: a scispacy parser @param glove: a dictionary of glove word embeddings """ glove_dim = 50 bins = np.linspace(0, 1, 11) num_bins = bins.shape[0] description = kb_entry["description"] if description == "": return [0] * num_bins, [0] * num_bins description = scispacy_parser.scispacy_create_doc(description) # try both max pooling and average pooling of word embeddings to get sentence representation embedded_description_max = [] embedded_description_avg = [] for sentence in description.sents: tokens = [t.text.lower() for t in sentence] glove_tokens = [t for t in tokens if t in glove] embedded_sentence = [ np.linalg.norm(glove[t], ord=2) for t in glove_tokens if t not in NLTK_STOPWORDS ] # embedded_sentence = [embedding*idf_dict[t] if t in idf_dict else embedding*idf_dict["<MAX_VALUE>"] for embedding, t in zip(embedded_sentence, glove_token)] last_embedding_layer = embedded_sentence if last_embedding_layer == []: continue embedded_description_max.append(np.max(last_embedding_layer, axis=0)) embedded_description_avg.append(np.mean(last_embedding_layer, axis=0)) # try both max pooling and average pooling of word embeddings to get sentence representation embedded_contexts_max = [] embedded_contexts_avg = [] for context in mention_contexts: embedded_context_max = [] embedded_context_avg = [] for sentence in context[0]: tokens = [t.text.lower() for t in sentence] glove_tokens = [t for t in tokens if t in glove] embedded_sentence = [ np.linalg.norm(glove[t], ord=2) for t in glove_tokens if t not in NLTK_STOPWORDS ] # embedded_sentence = [embedding*idf_dict[t] if t in idf_dict else embedding*idf_dict["<MAX_VALUE>"] for embedding, t in zip(embedded_sentence, glove_token)] last_embedding_layer = embedded_sentence if last_embedding_layer == []: continue embedded_context_max.append(np.max(last_embedding_layer, axis=0)) embedded_context_avg.append(np.mean(last_embedding_layer, axis=0)) embedded_contexts_max.append(embedded_context_max) embedded_contexts_avg.append(embedded_context_avg) cosine_distances_max = [] cosine_distances_avg = [] for context_max, context_avg in zip(embedded_contexts_max, embedded_contexts_avg): for sentence_max, sentence_avg in zip(context_max, context_avg): for description_max, description_avg in zip( embedded_description_max, embedded_description_avg): max_cosine = scipy.spatial.distance.cosine( sentence_max, description_max) avg_cosine = scipy.spatial.distance.cosine( sentence_avg, description_avg) if not math.isnan(max_cosine): cosine_distances_max.append(max_cosine) if not math.isnan(avg_cosine): cosine_distances_avg.append(avg_cosine) # bin the similarity scores of description sentence and context sentence pairs digitized_max = np.digitize(cosine_distances_max, bins) digitized_avg = np.digitize(cosine_distances_avg, bins) binned_max = [0] * num_bins binned_avg = [0] * num_bins # use a one hot representation with a one for the largest similarity bin that has a pair in it binned_max[max(digitized_max) - 1] = 1 binned_avg[max(digitized_avg) - 1] = 1 return binned_max, binned_avg
def read_gliderdata(self, lat, lon): path = os.path.join(self.gliders_directory, self.glider_name, 'from-glider', '%s*.[st]bd' % (self.glider_name)) dbd = dbdreader.MultiDBD(pattern=path) if self.glider_name == 'sim': print("Warning: assuming simulator. I am making up CTD data!") t, P = dbd.get("m_depth") P /= 10 C = np.ones_like(P) * 4 T = np.ones_like(P) * 15 else: tmp = dbd.get_sync("sci_water_cond", "sci_water_temp sci_water_pressure".split()) t_last = tmp[0][-1] age = t_last - tmp[0] t, C, T, P = tmp.compress(np.logical_and(tmp[1] > 0, age < self.AGE * 3600), axis=1) try: _, u, v = dbd.get_sync("m_water_vx", ["m_water_vy"]) except dbdreader.DbdError: try: _, u, v = dbd.get_sync("m_final_water_vx", ["m_final_water_vy"]) except dbdreader.DbdError: u = np.array([0]) v = np.array([0]) u, v = np.compress(np.logical_and( np.abs(u) < 1.5, np.abs(v) < 1.5), [u, v], axis=1) rho = fast_gsw.rho(C * 10, T, P * 10, lon, lat) SA = fast_gsw.SA(C * 10, T, P * 10, lon, lat) # compute the age of each measurement, and the resulting weight. dt = t.max() - t weights = np.exp(-dt / (self.AGE * 3600)) # make binned averages max_depth = P.max() * 10 dz = 5 zi = np.arange(dz / 2, max_depth + dz / 2, dz) bins = np.arange(0, max_depth + dz, dz) bins[0] = -10 idx = np.digitize(P * 10, bins) - 1 rho_avg = np.zeros_like(zi, float) SA_avg = np.zeros_like(zi, float) T_avg = np.zeros_like(zi, float) weights_sum = np.zeros_like(zi, float) for _idx, _w, _rho, _SA, _T in zip(idx, weights, rho, SA, T): try: rho_avg[_idx] += _rho * _w SA_avg[_idx] += _SA * _w T_avg[_idx] += _T * _w weights_sum[_idx] += _w except IndexError: continue # if data are sparse, it can be that ther are gaps j = np.unique(idx) zj = zi[j] rho_avg = rho_avg[j] / weights_sum[j] SA_avg = SA_avg[j] / weights_sum[j] T_avg = T_avg[j] / weights_sum[j] self.rho_fun = interp1d(zj, rho_avg, bounds_error=False, fill_value=(rho_avg[0], rho_avg[-1])) self.SA_fun = interp1d(zj, SA_avg, bounds_error=False, fill_value=(SA_avg[0], SA_avg[-1])) self.T_fun = interp1d(zj, T_avg, bounds_error=False, fill_value=(T_avg[0], T_avg[-1])) if self.u_fun is None: # not intialised yet, use last water current estimate available. self.u_fun = lambda x: u[-1] self.v_fun = lambda x: v[-1]
def gen_motormap ( pid, savedir ): ''' Generate a controlled step motor map via GP to use to seed the convergence runs. Parameters ========== pid : int PID of the the cobra for which to generate motor maps savedir : str The directory path in which to save output. ''' if not type(pid) is int: pid, savedir = pid mmap = {} for key in dirdict.keys (): fname = dirname + dirdict[key] + '/Log/PhiSpecMove_mId_1_pId_%i.txt' % pid if not os.path.exists ( fname ): print('Not found: %s' % fname) continue movesize, nmoves, niter = paramdict[key] ctrlstep = movement.read_ctrlstep ( fname, movesize, movesize=0, verbose=True, motor_id=middict[key]) ctrlstep = ctrlstep.convert_objects () #// filter ctrlstep based on Johannes' suggestions lowthresh = 0.01 ctrlstep.loc[ctrlstep['stepsize'] < 0, 'stepsize'] = np.nan ctrlstep.loc[ctrlstep['stepsize'] > 1., 'stepsize'] = np.nan bins = np.arange ( 0., 400., 10. ) assns = np.digitize ( ctrlstep['startangle'], bins ) grps = ctrlstep.groupby ( assns ) ssmean = grps.mean()['stepsize'] sscount = grps.count()['stepsize'] #// cut on mean change or overpopulation deltam = abs(ssmean-ssmean.mean()) > 3.*ssmean.std() deltact = abs(sscount-sscount.mean()) > 3.*sscount.std() to_cut = ssmean.index[deltam|deltact] ctrlstep.loc[np.in1d(assns, to_cut),'stepsize'] = np.NaN slow_mask = ctrlstep['stepsize'] < lowthresh ctrlstep.loc[slow_mask, 'stepsize' ] += .03 #mmap[key] = ctrlstep #continue mm = np.isfinite(ctrlstep).all(axis=1) try: gpmod, axarr = analyze.viz_gproc ( [ctrlstep.loc[mm]], angle_grid=angbins ) except ValueError: return gpmod = gpmod[0] #// set no-data to mmap=0.1 if 'stage2' in key: max_angle = 180. else: max_angle = 365. gap_thresh = 20. gaps = ctrlstep['startangle'].sort_values().diff().dropna() > gap_thresh gap_ends = gaps.loc[gaps].index gap_stts = set () for eval in gap_ends: gap_stts.add(gaps.index[gaps.index.get_loc(eval) - 1]) gap_ends = ctrlstep.loc[gap_ends,'startangle'].values.tolist() gap_stts = ctrlstep['startangle'].loc[gap_stts].values.tolist() if ctrlstep.loc[gaps.index[0],'startangle'] > gap_thresh: gap_stts.append(0) gap_ends.append(ctrlstep.loc[gaps.index[0],'startangle']) elif ctrlstep.loc[gaps.index[-1], 'startangle'] < (max_angle - gap_thresh): gap_stts.append(ctrlstep.loc[gaps.index[-1],'startangle']) gap_ends.append(max_angle) for start, end in zip ( gap_stts, gap_ends ): out_of_bounds = (gpmod.angle_grid>start)&(gpmod.angle_grid<end) gpmod.shape_mu[out_of_bounds] = 0.1/gpmod.mmean gpmod.shape_mu[(gpmod.shape_mu*gpmod.mmean)<.02] = .02/gpmod.mmean axarr[0].plot ( gpmod.angle_grid, gpmod.shape_mu*gpmod.mmean, '--', color='dodgerblue') axarr[1].plot ( gpmod.angle_grid, gpmod.shape_mu, '--', color='dodgerblue') axarr[0].set_ylim(0., 0.25) mmap[key] = gpmod np.savetxt ( savedir + '/pid%i_%s.dat' % (pid, key ), gpmod.shape_mu * gpmod.mmean) plt.savefig(savedir + '/figures/pid%i_%s.png' % (pid, key) ) plt.close('all') return mmap
def p(new_df: pd.DataFrame) -> pd.DataFrame: col_biner = lambda col: np.where( new_df[col].isnull(), nan, np.digitize(new_df[col], bins[col], right=right)) bined_columns = {col: col_biner(col) for col in columns_to_bin} return new_df.assign(**bined_columns)
def histogram(self, outputs, options, units, setting): """ Produces histograms for the given outputs, tailored to the classes of data selected. First determines the bins, then populates these, then plots the result using Matplotlib. 'options' determines the types of data available. 'setting' determines which data types are required 'units' are the units of measurement strings corresponding to the respective data classes. these are inserted into the histogram legends. """ kCs, kNs, keCs, krs = 'actual Cs', 'actual Ns', 'est. Cs', 'recognition' xmode, ymode, data_cfg = self.unwrap_cfg(options, setting) xmode_unit, ymode_unit = units[xmode], units[ymode] self.logger.info('creating %s against %s histogram' % (ymode, xmode)) match = (data_cfg == 'matched') nr_bins = 100 plt_data = {xmode: np.zeros((nr_bins, )), ymode: np.zeros((nr_bins, ))} if data_cfg == 'recognition': get = kCs, keCs, kNs, krs if data_cfg == 'matched': get = kCs, keCs, kNs data = self.get_data(outputs, get=get, match=match) Y = np.zeros((data[kCs].shape[0], )) if ymode == 'mean error': for i, (C, eC) in enumerate(zip(data[kCs], data[keCs])): dif = C - eC Y[i] = np.linalg.norm(dif) elif ymode == 'recognition rate': Y = data[krs].flatten() X = np.zeros(data[kCs].shape[0]) if xmode == 'angle': for i, (C, N) in enumerate(zip(data[kCs], data[kNs])): X[i] = self.get_angle(-C, N) elif xmode == 'distance to cam': for i, C in enumerate(data[kCs]): X[i] = np.linalg.norm(C) Xmin, Xmax = np.min(X), np.max(X) bin_bounds, step = np.linspace(Xmin, Xmax, nr_bins + 1, retstep=True) plt_data[xmode] = bin_bounds[:-1] # bin boundary values X_bin_indices = np.digitize(X, plt_data[xmode]) bincount = np.zeros((nr_bins, )) bins = [[] for x in xrange(nr_bins)] for i, y in enumerate(Y): plt_data[ymode][X_bin_indices[i] - 1] += y bincount[X_bin_indices[i] - 1] += 1 bins[X_bin_indices[i] - 1].append(y) std = np.zeros((nr_bins, )) for i in xrange(nr_bins): if bins[i]: std[i] = np.std(bins[i]) for i, total in enumerate(plt_data[ymode]): if (1. * total * bincount[i]) == 0: continue plt_data[ymode][i] = total / bincount[i] fig = plt.figure() ax = plt.subplot(111) rwidth = step if ymode == 'recognition rate': rects = ax.bar(plt_data[xmode], plt_data[ymode], rwidth, color='b') else: rects = ax.bar(plt_data[xmode], plt_data[ymode], rwidth, color='b', yerr=std, ecolor='r') ax.set_title('%s against %s' % (ymode, xmode)) ax.set_xlabel(xmode + xmode_unit) ax.set_ylabel(ymode + ymode_unit) ax.grid(True) if data_cfg == 'arecognition': for rect in rects: height = rect.get_height() ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, '%d' % int(height), ha='center', va='bottom') plt.show()
def find_events_fast(self): if self._triggers is None: return None # self.filterBadTriggers() if self._triggers.size < 5: return None self.filterBadTriggers() if self._toa is None: return None if self._toa.size == 0: # Clear out the triggers since they have nothing return None # Get our start/end triggers to get events start = self._triggers[0:-1:] if start.size == 0: return None min_window, max_window = self._eventWindow trigger_counter = np.arange(self._trigger_counter, self._trigger_counter + start.size, dtype=np.int) self._trigger_counter = trigger_counter[-1] + 1 # end = self._triggers[1:-1:] # Get the first and last triggers in pile first_trigger = start[0] last_trigger = start[-1] # print('First Trigger las trigger',first_trigger,last_trigger) # print('TOA before',self._toa) # Delete useless pixels behind the first trigger self.updateBuffers(self._toa >= first_trigger) # grab only pixels we care about x, y, toa, tot = self.getBuffers(self._toa < last_trigger) # print('triggers',start) # print('TOA',toa) self.updateBuffers(self._toa >= last_trigger) try: event_mapping = np.digitize(toa, start) - 1 except Exception as e: self.error('Exception has occured {} due to ', str(e)) self.error('Writing output TOA {}'.format(toa)) self.error('Writing triggers {}'.format(start)) self.error('Flushing triggers!!!') self._triggers = self._triggers[-1:] return None event_triggers = self._triggers[:-1:] self._triggers = self._triggers[-1:] # print('Trigger delta',triggers,np.ediff1d(triggers)) tof = toa - event_triggers[event_mapping] event_number = trigger_counter[event_mapping] exp_filter = (tof >= min_window) & (tof <= max_window) result = event_number[exp_filter], x[exp_filter], y[exp_filter], tof[ exp_filter], tot[exp_filter] if result[0].size > 0: return result else: return None
from skimage import data from skimage.filters import threshold_multiotsu # Setting the font size for all plots. matplotlib.rcParams['font.size'] = 9 # The input image. image = data.camera() # Applying multi-Otsu threshold for the default value, generating # three classes. thresholds = threshold_multiotsu(image) # Using the threshold values, we generate the three regions. regions = np.digitize(image, bins=thresholds) fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(10, 3.5)) # Plotting the original image. ax[0].imshow(image, cmap='gray') ax[0].set_title('Original') ax[0].axis('off') # Plotting the histogram and the two thresholds obtained from # multi-Otsu. ax[1].hist(image.ravel(), bins=255) ax[1].set_title('Histogram') for thresh in thresholds: ax[1].axvline(thresh, color='r')
'output/enrichment/GTEx_maf_tss_binned/bins.{suffix}' run: maf_bins = np.linspace(0, 1, 51) tss_bins = np.linspace(-500000, 500000, 51) bins = {} for m in range(len(maf_bins)): bins[m] = {} for t in range(len(tss_bins)): bins[m][t] = defaultdict(list) with open(input[0], 'r') as f: for line in f: chromosome, variant, gene, maf, dtss = line.split('\t') if float(maf) > 0.01: maf_bin = np.digitize(float(maf), maf_bins) tss_bin = np.digitize(float(dtss), tss_bins) bins[maf_bin][tss_bin][variant].append(gene) json.dump(bins, open(output[0], 'w')) rule make_gene_variant_lookup: input: 'output/enrichment/GTEx_maf_tss/GTEx_maf_tss.{suffix}' output: expand('output/enrichment/GTEx_maf_tss_lookup/chr{chr}/chr{chr}.lookup.{suffix}', chr=list(range(1, 23)), suffix='{suffix}') run: lookup = {'chr{}'.format(x): defaultdict(dict) for x in range(1, 23)} with open(input[0], 'r') as f: for line in f: chromosome, variant, gene, maf, dtss = line.strip().split('\t')
def histplot_raw(datas, bins, labels, weights=None, removenorm=False, scale=1., doerrorbar=True, **kwargs): settings = { "xlabel": r"$m_{Vh}[GeV]$", "ylabel": 'Number of Events', "title1": r"ATLAS", # \newline Ptl next-leading, full cuts, 2 b-tags $", "title1_1": r"Internal", "title2": r"$\mathit{\sqrt{s}=13\:TeV,139\:fb^{-1}}$", # Ptl next-leading, full cuts, 2 b-tags $", #"title3": r"$\mathbf{2\;lep.,2\;b-tag}$", "title3": "2 lep., 2 b-tag", "filename": "deltatest2", "log_y": False, "norm": False, "upper_y": 1.5, } for each_key in kwargs.items(): settings[each_key[0]] = kwargs[each_key[0]] if weights is None: weights = [] for each in datas: weights.append(np.ones(len(each))) if removenorm: for i in range(len(weights)): weights[i] = np.array(weights[i]) / np.sum(weights[i]) sigmas = [] weight_in_binses = [] for i in range(len(datas)): event_location = np.digitize(datas[i] / scale, bins) sigma2 = [] weight_in_bins = [] for j in range(np.size(bins) - 1): bin_weight = weights[i][np.where(event_location == j + 1)[0]] sigma2.append(np.sum(bin_weight**2.)) weight_in_bins.append(np.sum(bin_weight)) sigmas.append(np.array(sigma2)**0.5) weight_in_binses.append(np.array(weight_in_bins)) colors = ['b', 'g', 'r', 'c', 'm', 'y'] fig, ax = plt.subplots(figsize=(10, 8)) ax.hist((np.array(datas) / scale).tolist(), bins, histtype='step', fill=False, color=colors[0:len(datas)], weights=np.array(weights).tolist()) bins = np.array(bins) for i in range(len(datas)): bin_centre = (bins[0:-1] + bins[1:]) / 2 if doerrorbar: ax.errorbar(bin_centre, weight_in_binses[i], xerr=0.0001, yerr=sigmas[i], fmt='.', color=colors[i], label=str(labels[i])) else: ax.errorbar(bin_centre, weight_in_binses[i], xerr=0.0001, yerr=0, fmt='_', color=colors[i], label=str(labels[i])) ax.legend(loc='upper right', prop={'size': 20}, frameon=False) ymin, ymax = ax.get_ylim() ax.set_ylim([0, ymax * settings["upper_y"]]) ax.text(0.05, 1.55 / 1.7, settings['title1'], fontsize=25, transform=ax.transAxes, style='italic', fontweight='bold') ax.text(0.227, 1.55 / 1.7, settings['title1_1'], fontsize=25, transform=ax.transAxes) ax.text(0.05, 1.40 / 1.7, settings['title2'], fontsize=23, transform=ax.transAxes, style='italic', fontweight='bold') ax.text(0.05, 1.26 / 1.7, settings['title3'], fontsize=18, weight='bold', style='italic', transform=ax.transAxes) ax.set_ylabel(settings['ylabel'], fontsize=20) ax.set_xlabel(settings['xlabel'], fontsize=20) if settings['log_y']: ax.set_yscale('log') ax.set_ylim([0.1, 10**(math.log10(ymax) * settings["upper_y"])]) ax.yaxis.set_major_locator( matplotlib.ticker.LogLocator(base=10, numticks=100)) ax.minorticks_on() fig.savefig(settings['filename'] + '.pdf', bbox_inches='tight', pad_inches=0.25)
def consolidate_sweep_results(file_path, plot_hist=True): results = [] #for i in xrange(num_exps): # results.append(py_scripts_yann.load_pickle(file_path+'_'+str(i)+'.pkl')) import json json_data = open(file_path + '/config.json').read() sim_pars = json.loads(json_data)['sim_pars'] sweep_results = { 'pc_rval_mean': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'pc_rval_std': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'uniform_connprob_mean': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'uniform_connprob_std': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'diverse_connprob_mean': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'diverse_connprob_std': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'diverse_pc_connprob': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'uniform_pc_connprob': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'diverse_pc_std': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'uniform_pc_std': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'diverse_pc_mean': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'uniform_pc_mean': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'diverse_selectivity_mean': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'uniform_selectivity_mean': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'diverse_selectivity_std': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'uniform_selectivity_std': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'diverse_selectivity_upper': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'uniform_selectivity_upper': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'uniform_selectivity_max': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'diverse_selectivity_max': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'uniform_pc_input': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), 'diverse_pc_input': np.zeros((len(sim_pars['par_sweep_vals']), len(sim_pars['par_sweep_vals_2']))), } for par_val_idx in xrange(len(sim_pars['par_sweep_vals'])): par_val = sim_pars['par_sweep_vals'][par_val_idx] if not sim_pars['par_sweep_key_2'] == None: iter_pars = sim_pars.copy() iter_pars[sim_pars['par_sweep_key']] = par_val for par_val_2_idx in xrange(len(sim_pars['par_sweep_vals_2'])): par_val_2 = sim_pars['par_sweep_vals_2'][par_val_2_idx] iter_pars[sim_pars['par_sweep_key_2']] = par_val_2 str_i = sim_pars['par_sweep_key'] + str( iter_pars[sim_pars['par_sweep_key']] ) + sim_pars['par_sweep_key_2'] + str( iter_pars[sim_pars['par_sweep_key_2']]) res_file = os.path.join( file_path, str(sim_pars['sim_title']) + '_' + str(str_i) + '.pkl') try: temp_results = py_scripts_yann.load_pickle(res_file) sweep_results['pc_rval_mean'][ par_val_idx, par_val_2_idx] = np.mean([ temp_results['selectivities_results_corr'][i] ['pop_coupling_partial_rval'] for i in xrange(sim_pars['N_sims']) ]) sweep_results['pc_rval_std'][ par_val_idx, par_val_2_idx] = np.std([ temp_results['selectivities_results_corr'][i] ['pop_coupling_partial_rval'] for i in xrange(sim_pars['N_sims']) ]) unif_connprob_temp = [] diverse_connprob_temp = [] #unif_connprob_std_temp = [] #diverse_connprob_std_temp = [] unif_corr_pc_connprob = [] diverse_corr_pc_connprob = [] unif_pc_temp = [] diverse_pc_temp = [] unif_input_temp = [] diverse_input_temp = [] uniform_selectivity_temp = [] diverse_selectivity_temp = [] unif_corr_pc_input = [] diverse_corr_pc_input = [] for i in xrange(sim_pars['N_sims']): print 'sim idx ', i unif_connprob_temp.append( np.mean(temp_results['simresults_uniform'][i] ['W_conn'], axis=0)) diverse_connprob_temp.append( np.mean( temp_results['simresults_corr'][i]['W_conn'], axis=0)) unif_input_temp.append( np.sum(temp_results['simresults_uniform'][i] ['W_plastic'], axis=0)) diverse_input_temp.append( np.sum(temp_results['simresults_corr'][i] ['W_plastic'], axis=0)) #unif_connprob_std_temp.append(np.std(temp_results['simresults_uniform'][i]['W_conn'],axis=0)) #diverse_connprob_std_temp.append(np.std(temp_results['simresults_corr'][i]['W_conn'],axis=0)) unif_pc_temp.append( temp_results['selectivities_results_uniform'][i] ['empirical_pop_coupling']) diverse_pc_temp.append( temp_results['selectivities_results_corr'][i] ['empirical_pop_coupling']) unif_corr_pc_connprob.append( stats.pearsonr( temp_results['selectivities_results_uniform'] [i]['empirical_pop_coupling'], unif_connprob_temp[-1])) diverse_corr_pc_connprob.append( stats.pearsonr( temp_results['selectivities_results_corr'][i] ['empirical_pop_coupling'], diverse_connprob_temp[-1])) unif_corr_pc_input.append( stats.pearsonr( temp_results['selectivities_results_uniform'] [i]['empirical_pop_coupling'], unif_input_temp[-1])) diverse_corr_pc_input.append( stats.pearsonr( temp_results['selectivities_results_corr'][i] ['empirical_pop_coupling'], diverse_input_temp[-1])) uniform_selectivity_temp.append( temp_results['selectivities_t_uniform'][-1]) diverse_selectivity_temp.append( temp_results['selectivities_t_corr'][-1]) sweep_results['uniform_connprob_mean'][ par_val_idx, par_val_2_idx] = np.mean(np.array(unif_connprob_temp)) sweep_results['uniform_connprob_std'][ par_val_idx, par_val_2_idx] = np.std(np.array(unif_connprob_temp)) sweep_results['diverse_connprob_mean'][ par_val_idx, par_val_2_idx] = np.mean( np.array(diverse_connprob_temp)) sweep_results['diverse_connprob_std'][ par_val_idx, par_val_2_idx] = np.std( np.array(diverse_connprob_temp)) sweep_results['diverse_pc_connprob'][ par_val_idx, par_val_2_idx] = np.mean( np.array(diverse_corr_pc_connprob)) sweep_results['uniform_pc_connprob'][ par_val_idx, par_val_2_idx] = np.mean( np.array(unif_corr_pc_connprob)) sweep_results['diverse_pc_input'][ par_val_idx, par_val_2_idx] = np.mean( np.array(diverse_corr_pc_input)) sweep_results['uniform_pc_input'][ par_val_idx, par_val_2_idx] = np.mean(np.array(unif_corr_pc_input)) sweep_results['diverse_pc_mean'][ par_val_idx, par_val_2_idx] = np.mean(np.array(diverse_pc_temp)) sweep_results['uniform_pc_mean'][ par_val_idx, par_val_2_idx] = np.mean(np.array(unif_pc_temp)) sweep_results['diverse_pc_std'][ par_val_idx, par_val_2_idx] = np.std(np.array(diverse_pc_temp)) sweep_results['uniform_pc_std'][par_val_idx, par_val_2_idx] = np.std( np.array(unif_pc_temp)) sweep_results['diverse_selectivity_mean'][ par_val_idx, par_val_2_idx] = np.mean( np.array(diverse_selectivity_temp)) sweep_results['diverse_selectivity_std'][ par_val_idx, par_val_2_idx] = np.std( np.array(diverse_selectivity_temp)) sweep_results['diverse_selectivity_upper'][ par_val_idx, par_val_2_idx] = np.percentile( np.array(diverse_selectivity_temp), 90) sweep_results['diverse_selectivity_max'][ par_val_idx, par_val_2_idx] = np.max( np.array(diverse_selectivity_temp)) sweep_results['uniform_selectivity_mean'][ par_val_idx, par_val_2_idx] = np.mean( np.array(uniform_selectivity_temp)) sweep_results['diverse_selectivity_std'][ par_val_idx, par_val_2_idx] = np.std( np.array(diverse_selectivity_temp)) sweep_results['uniform_selectivity_upper'][ par_val_idx, par_val_2_idx] = np.percentile( np.array(uniform_selectivity_temp), 90) sweep_results['uniform_selectivity_max'][ par_val_idx, par_val_2_idx] = np.max( np.array(uniform_selectivity_temp)) plt.figure() plt.hist([ np.array(unif_connprob_temp), np.array(diverse_connprob_temp) ], 20) plt.legend(['uniform', 'diverse']) plt.savefig( os.path.join( file_path, str(sim_pars['sim_title']) + '_' + str(str_i) + '_connprob_hist.pdf')) plt.cla() sns.jointplot(np.array(diverse_pc_temp), np.array(diverse_connprob_temp), kind='hexbin', ylim=(0.0, 1.0)) plt.title('Plasticity-connectivity link, diverse') plt.savefig( os.path.join( file_path, str(sim_pars['sim_title']) + '_' + str(str_i) + 'pc_connprob_diverse.pdf')) plt.cla() sns.jointplot(np.array(unif_pc_temp), np.array(unif_connprob_temp), kind='hexbin', ylim=(0.0, 1.0)) plt.title('Plasticity-connectivity link, uniform') plt.savefig( os.path.join( file_path, str(sim_pars['sim_title']) + '_' + str(str_i) + '_pc_connprob_uniform.pdf')) plt.cla() plt.figure() digit = np.digitize( np.array(diverse_pc_temp).flatten(), np.arange(-1.0, 1.0, 0.2)) plt.plot([ np.mean( np.array(diverse_connprob_temp).flatten()[ digit == i]) for i in xrange(10) ]) digit = np.digitize( np.array(unif_pc_temp).flatten(), np.arange(-1.0, 1.0, 0.1)) plt.plot([ np.mean( np.array(unif_connprob_temp).flatten()[digit == i]) for i in xrange(10) ]) plt.title('PC-connectivity link') plt.legend(['diverse', 'uniform']) plt.savefig( os.path.join( file_path, str(sim_pars['sim_title']) + '_' + str(str_i) + '_pc_connprob_link.pdf')) plt.cla() except: pass else: iter_pars = sim_pars.copy() iter_pars[sim_pars['par_sweep_key']] = par_val str_i = sim_pars['par_sweep_key'] + str( iter_pars[sim_pars['par_sweep_key']]) # Passing the list res_file = os.path.join( file_path, str(sim_pars['sim_title']) + '_' + str(str_i) + '.pkl') temp_results = py_scripts_yann.load_pickle(res_file) sweep_results['pc_rval_mean'][ par_val_idx, par_val_2_idx] = np.mean([ temp_results['selectivities_results_corr'][i] ['pop_coupling_partial_rval'] for i in xrange(sim_pars['N_sims']) ]) sweep_results['pc_rval_std'][par_val_idx, par_val_2_idx] = np.std([ temp_results['selectivities_results_corr'][i] ['pop_coupling_partial_rval'] for i in xrange(sim_pars['N_sims']) ]) if plot_hist: plt.pcolor(sweep_results['pc_rval_mean']) plt.title('Plasticity-coupling link') plt.colorbar() plt.savefig( os.path.join( file_path, str(sim_pars['sim_title']) + '_' + str(str_i) + '_plasticity_coupling_link_mean.pdf')) plt.show() plt.pcolor(sweep_results['pc_rval_std']) plt.title('Plasticity-coupling link variability') plt.colorbar() plt.savefig( os.path.join( file_path, str(sim_pars['sim_title']) + '_' + str(str_i) + '_plasticity_coupling_link_std.pdf')) plt.show() plt.pcolor(sweep_results['uniform_connprob_std']) plt.title('input connectivity width, uniform') plt.colorbar() plt.savefig( os.path.join( file_path, str(sim_pars['sim_title']) + '_' + str(str_i) + '_connectivity_width_uniform.pdf')) plt.show() plt.pcolor(sweep_results['diverse_connprob_std']) plt.title('input connectivity width, diverse') plt.savefig( os.path.join( file_path, str(sim_pars['sim_title']) + '_' + str(str_i) + '_connectivity_width_diverse.pdf')) plt.colorbar() plt.show() plt.pcolor(sweep_results['diverse_pc_connprob']) plt.title('pc-connprob link, diverse') plt.colorbar() plt.savefig( os.path.join( file_path, str(sim_pars['sim_title']) + '_' + str(str_i) + '_pc_connprob_diverse.pdf')) plt.show() plt.pcolor(sweep_results['uniform_pc_connprob']) plt.title('pc-connprob link, uniform') plt.colorbar() plt.savefig( os.path.join( file_path, str(sim_pars['sim_title']) + '_' + str(str_i) + '_pc_connprob_uniform.pdf')) plt.show() return sweep_results
print(f"desired scale:{desired_scale}") desired_size = ( int(img_gray.shape[1] * desired_scale), int(img_gray.shape[0] * desired_scale), ) img_resized = cv2.resize(img_gray, desired_size) print(f"resized size:{img_resized.shape}") bins = np.arange( start=0.0, stop=255, step=255/quantize_num ) print(f"bins:{bins}") img_mozaic = np.digitize(img_resized, bins) with xlsxwriter.Workbook('result.xlsx') as workbook: worksheet = workbook.add_worksheet() # Write pixel intensity. for y in range(0, img_mozaic.shape[0]): for x in range(0, img_mozaic.shape[1]): e_num = img_mozaic.item(y, x) worksheet.write_number( row=y, col=x, number=e_num if not inverse else (quantize_num-e_num) ) # Set width and height. for y in range(0, img_mozaic.shape[0]):
def estimateFromForces(self, pathCart): def movingAverage(values, window): weights = np.repeat(1.0, window) / window sma = np.convolve(values, weights, 'valid') return sma numBins = 51 mostForceThresh = 0.5 #only look at data of (mostForceThresh*100) percentile window_size = 15 #for moving average filter forces = self.getCartForces(pathCart) t = np.arange(np.shape(forces)[0]) xForces = forces[:, 0] # print(xForces) yForces = forces[:, 2] zForces = forces[:, 1] xForcesMA = movingAverage(xForces, window_size) yForcesMA = movingAverage(yForces, window_size) zForcesMA = movingAverage(zForces, window_size) #estimate x position xzvt = np.array([pathCart[window_size - 1:, 0], xForcesMA / zForcesMA]) # print(xzvt) bins = np.linspace(-0.5, 0.5, numBins) xzvt[0, :] = np.digitize(xzvt[0, :], bins) binSum = np.zeros(len(bins)) i = 0 while i < len(bins): currentBin = np.argwhere([ (xzvt[0, :] == i), (xzvt[1, :] > np.quantile(xzvt[:, 1], mostForceThresh)) ]) #get upper mostForceThresh% values from each bin binSum[i] = np.sum(xzvt[1, currentBin]) / (np.count_nonzero([ xzvt[0, :] == i, (xzvt[1, :] > np.quantile(xzvt[:, 1], mostForceThresh)) ])) #total number of times the bin is used i += 1 print(binSum) polyOrder = 2 bestFitxzvt = np.polyfit(bins[np.logical_not(np.isnan(binSum))], binSum[np.logical_not(np.isnan(binSum))], polyOrder) pbins = np.poly1d(bestFitxzvt) xpbins = np.linspace(-0.5, 0.5, 100) critX = pbins.deriv().r r_critX = critX[critX.imag == 0].real testX = pbins.deriv(2)(r_critX) x_maxX = r_critX[testX > 0] y_min = pbins(x_maxX) print("shoulder x is = ", x_maxX) #estimate z position zxvt = np.array([pathCart[window_size - 1:, 2], zForcesMA / xForcesMA]) # print(zvt) bins = np.linspace(-0.5, 0.5, numBins) zxvt[0, :] = np.digitize(zxvt[0, :], bins) binSum = np.zeros(len(bins)) i = 0 while i < len(bins): currentBin = np.argwhere([ (zxvt[0, :] == i), (zxvt[1, :] > np.quantile(zxvt[:, 1], mostForceThresh)) ]) #get upper mostForceThresh% values from each bin binSum[i] = np.sum(zxvt[1, currentBin]) / (np.count_nonzero([ zxvt[0, :] == i, (zxvt[1, :] > np.quantile(zxvt[:, 1], mostForceThresh)) ])) #total number of times the bin is used i += 1 print(binSum) polyOrder = 2 bestFitzxvt = np.polyfit(bins[np.logical_not(np.isnan(binSum))], binSum[np.logical_not(np.isnan(binSum))], polyOrder) pbins = np.poly1d(bestFitzxvt) xpbins = np.linspace(-0.5, 0.5, 100) critZ = pbins.deriv().r r_critZ = critZ[critZ.imag == 0].real testZ = pbins.deriv(2)(r_critZ) x_maxZ = r_critZ[testZ < 0] y_min = pbins(x_maxZ) print("shoulder z is = ", x_maxZ) #OLD VERSION WITHOUT MOVING AVERAGE AND BINNING # forcesCart = self.getCartForces(pathCart) # xForces = forcesCart[forcesCart[:,0].argsort()] # polyOrder = 4 #start with 2nd order, try again and again until there is a negative coeffieienct on largest term # bestFitX = np.polyfit(pathCart[:,0],forcesCart[:,0],polyOrder) # # print(bestFitX) # pX = np.poly1d(bestFitX) # xpX= np.linspace(-1,1,100) # critX = pX.deriv().r # r_critX = critX[critX.imag==0].real # testX = pX.deriv(2)(r_critX) # x_maxX = r_critX[testX<0] # y_min = pX(x_maxX) # # print("shoulder x is = ", max(x_maxX, key=abs)) # yForces = forcesCart[forcesCart[:,2].argsort()] # polyOrder = 2 # bestFitY = np.polyfit(pathCart[:,2],forcesCart[:,2],polyOrder) # # print(bestFitY) # pY = np.poly1d(bestFitY) # xpY = np.linspace(-1,1,100) # critY = pY.deriv().r # r_critY = critY[critY.imag==0].real # testY = pY.deriv(2)(r_critY) # x_maxY = r_critY[testY<0] # y_min = pY(x_maxY) # # print("shoulder y is = ", max(x_maxY, key=abs)) # zForces = forcesCart[forcesCart[:,1].argsort()] # polyOrder = 2 # bestFitZ = np.polyfit(pathCart[:,1],forcesCart[:,1],polyOrder) # # print(bestFitZ) # pZ = np.poly1d(bestFitZ) # xpZ = np.linspace(-1,1,100) # critZ = pZ.deriv().r # r_critZ = critZ[critZ.imag==0].real # testZ = pZ.deriv(2)(r_critZ) # x_maxZ = r_critZ[testZ<0] # y_min = pY(x_maxZ) # # print("shoulder Z is = ", max(x_maxZ, key=abs)) #assume constant shoulder heighy x_maxY x_maxY = 0.2 bestEst = np.array([[x_maxX, x_maxY, x_maxZ]]) print("best estimate from forces = ", bestEst) return (bestEst)
def histogram_pair(value_vec, binary_vec, bins, smoothing_const=.01, prior_prob=.5, rel_risk=False, error_bar_alpha=.05, figsize = (12,6), **kwargs): """Plot the relationship between a numerical feature and a binary outcome. This will create two plots stacked vertically. The upper plot is a stacked histogram showing the the counts of 0 and 1 in each respective bin. The lower plot shows the marginal empirical probability of being a 1 given that the numerical feature is in a particular value range. This gives a simple way to assess the relationship between the two variables, especially if it is non-linear. Error bars are also shown to demonstrate the confidence of the empirical probability (based on the Beta distribution) Parameters ---------- value_vec : array-like (containing numerical values) The array of numerical values that we are exploring binary_vec : array_like (containing 0/1 values) The array of binary values that we are exploring bins : list or numpy array The bin endpoints to use, as if constructing a histogram. smoothing_const : float, default = .01 To avoid issues when a bin contains few or no data points, we add in a small number of both positive and negative observations to each bin. This controls the weight of the added data. prior_prob : float, default = .5 The prior probability reflected by the added data. rel_risk : bool, default is False If True, this will plot log(emp_prob/prior_prob) rather on the y-axis rather than emp_prob. error_bar_alpha : float default=.05 The alpha value to use for the error bars (based on the Beta distribution). Default is 0.05 corresponding to a 95% confidence interval. figsize : tuple of 2 floats, default=(12,6) The size of the "canvas" to use for plotting. **kwargs : other Other parameters to be passed to the plt.hist command. """ nan_mask = np.isnan(value_vec) num_nans = np.sum(nan_mask) if num_nans > 0: nan_binary_vec = binary_vec[nan_mask] binary_vec = binary_vec[~nan_mask] value_vec = value_vec[~nan_mask] nan_avg_value = np.mean(nan_binary_vec) reg_avg_value = np.mean(binary_vec) out0 = plt.hist(value_vec[binary_vec == 0], bins=bins, **kwargs) out1 = plt.hist(value_vec[binary_vec == 1], bins=bins, **kwargs) plt.close() plt.figure(figsize=figsize) plt.subplot(2, 1, 1) plt.hist((value_vec[binary_vec == 0],value_vec[binary_vec == 1]), stacked=True, bins=bins, **kwargs) bin_leftpts = (out1[1])[:-1] bin_rightpts = (out1[1])[1:] default_bin_centers = (bin_leftpts + bin_rightpts) / 2 digitized_value_vec = np.digitize(value_vec, bins) bin_centers = np.array([np.mean(value_vec[digitized_value_vec==i]) if i in np.unique(digitized_value_vec) else default_bin_centers[i-1] for i in np.arange(len(bins)-1)+1]) prob_numer = out1[0] prob_denom = out1[0] + out0[0] smoothing_const = .001 probs = ((prob_numer + prior_prob * smoothing_const) / (prob_denom + smoothing_const)) plt.subplot(2, 1, 2) if rel_risk: plt.plot(bin_centers, np.log(probs / prior_prob), '-o') plt.xlim(bin_leftpts[0], bin_rightpts[-1]) else: plt.plot(bin_centers[:len(probs)], probs, '-o') plt.xlim(bin_leftpts[0], bin_rightpts[-1]) yerr_mat_temp = beta.interval(1-error_bar_alpha,out1[0]+1,out0[0]+1) yerr_mat = np.vstack((yerr_mat_temp[0],yerr_mat_temp[1])) - probs yerr_mat[0,:] = -yerr_mat[0,:] plt.errorbar(bin_centers[:len(probs)], probs, yerr=yerr_mat, capsize=5) plt.xlim(bin_leftpts[0], bin_rightpts[-1]) if num_nans > 0: plt.hlines(y=nan_avg_value, xmin=bin_leftpts[0], xmax=bin_leftpts[1], linestyle='dotted') plt.hlines(y=reg_avg_value, xmin=bin_leftpts[0], xmax=bin_leftpts[1], linestyle='dashed') return {'bin_centers': bin_centers, 'probs': probs, 'prob_numer': prob_numer, 'prob_denom': prob_denom}
def segmented_rings(edges, segments, center, shape, offset_angle=0): """ Parameters ---------- edges : array inner and outer radius for each ring segments : int or list number of pie slices or list of angles in radians That is, 8 produces eight equal-sized angular segments, whereas a list can be used to produce segments of unequal size. center : tuple point in image where r=0; may be a float giving subpixel precision. Order is (rr, cc). shape: tuple Image shape which is used to determine the maximum extent of output pixel coordinates. Order is (rr, cc). angle_offset : float or array, optional offset in radians from offset_angle=0 along the positive X axis Returns ------- label_array : array Elements not inside any ROI are zero; elements inside each ROI are 1, 2, 3, corresponding to the order they are specified in edges and segments See Also -------- ring_edges : Calculate the inner and outer radius of a set of rings. """ edges = np.asarray(edges).ravel() if not 0 == len(edges) % 2: raise ValueError("edges should have an even number of elements, " "giving inner, outer radii for each ring") if not np.all(np.diff(edges) >= 0): raise ValueError("edges are expected to be monotonically increasing, " "giving inner and outer radii of each ring from " "r=0 outward") agrid = utils.angle_grid(center, shape) agrid[agrid < 0] = 2 * np.pi + agrid[agrid < 0] segments_is_list = isinstance(segments, collections.Iterable) if segments_is_list: segments = np.asarray(segments) + offset_angle else: # N equal segments requires N+1 bin edges spanning 0 to 2pi. segments = np.linspace(0, 2 * np.pi, num=1 + segments, endpoint=True) segments += offset_angle # the indices of the bins(angles) to which each value in input # array(angle_grid) belongs. ind_grid = (np.digitize(np.ravel(agrid), segments, right=False)).reshape(shape) label_array = np.zeros(shape, dtype=np.int64) # radius grid for the image_shape rgrid = utils.radial_grid(center, shape) # assign indices value according to angles then rings len_segments = len(segments) for i in range(len(edges) // 2): indices = (edges[2 * i] <= rgrid) & (rgrid < edges[2 * i + 1]) # Combine "segment #" and "ring #" to get unique label for each. label_array[indices] = ind_grid[indices] + (len_segments - 1) * i return label_array
# val = int(np.log10(val)) y.append(val) else: if val == None: x.append(0) else: x.append(val) # add 1 training example to the mix if ignore == False: X.append(x) print(len(X), len(y)) # print(len(set(y))) # print(min(y), max(y)) # print(get_fib(max(y))) y = np.digitize(y, get_fib(max(y))) # for i in range(len(inds)): # print(inds[i]) # from sklearn.preprocessing import scale # y = scale(y) # for i in range(len(y)): # print(y[i]) random_forest(X, y) # cov = np.cov(np.transpose(X)) # from decimal import Decimal # temp = [] # for i in range(len(cov)): # a = [] # for j in range(len(cov[0])): # a.append(round(Decimal(cov[i][j]), 2))
n_bins = 9 binned_data_array = np.zeros([n_lwt,n_months,n_sites,n_days,n_bins]) bins = np.linspace(0,70,8) print('bins', bins) time.sleep(5) for i_lwt in range(n_lwt): for i_month in range(n_months): for i_site in range(n_sites): for i_day in range(n_days): data_to_bin = (lwt_array_monthly[i_lwt, i_month, i_site, i_day]) #print(' D2B',(data_to_bin)) #print('non-nan', data_to_bin[~np.isnan(data_to_bin)]) inds = np.digitize(data_to_bin, bins) #print('inds', inds) #print( i_lwt, i_month, i_site, i_day, inds) add_count = 1 binned_data_array[i_lwt,i_month,i_site,i_day,inds] = add_count for i_lwt in range(n_lwt): print('lwt = ', i_lwt) for i_bin in range(n_bins): print( 'bin', i_bin) print(np.count_nonzero(binned_data_array[i_lwt,:,:,:,i_bin]))
def getWeightqt_postVFP_Wplus(y, pt): biny = np.digitize(np.array([y]), yBins)[0] - 1 binpt = np.digitize(np.array([pt]), qtBins)[0] - 1 return h[biny, binpt]