def bin_extent(lat, lon, bins=100): """Bin random lon+lat points. lat Nx1 array of latitude values lon Nx1 array of longitude values bins an integer or (bin_x, bin_y) describing how the extent of the data in (lat, lon) is to be binned If one integer is supplied the extent determined from the point data (lat, lon) is binned that number of times in the Y and X direction. If (M, N), the number of Y bins is M, etc. Returns a 2D array (bin_x, bin_y) of lists containing indices of points binned into each grid cell. """ # get bin width and number of cells try: (gnumy, gnumx) = bins except TypeError: gnumx = gnumy = bins # convert two Nx1 arrays lat and lon to [[lon, lat], ...] data = zip(lon, lat) (ll_lat, ll_lon, ur_lat, ur_lon) = uge.get_extent(data, margin=0) lon_width = ur_lon - ll_lon lat_width = ur_lat - ll_lat gwidx = lon_width / gnumx gwidy = lat_width / gnumy return bin_data(lat, lon, ll_lon, ll_lat, gwidx, gwidy, gnumx, gnumy)
def calc_sum_xyz(datafile, scale=None, bins=100, filter=None, invert=False, clip=None): """A function to take a file of data and create an XYZ data object. Take values at random x,y points and bin into summed values. datafile a file containing (lat, lon, val) values scale amount to scale the data (ie, divide) bins number of bins in X and Y direction if an int, # bins in X and Y direction if [int, int] the X and Y bin counts may be different filter a function to extract (lon, lat, val) from one datafile line if not supplied an internal function is used invert if 'filter' not supplied, an internal filter is used. if 'invert' is True, switch the order of lat/lon in data lines. clip if defined is a dictionary defining clip limits. Recognised keys in the dictionary are: 'high' sets high limit above which values are clipped 'low' sets low limit below which values are clipped At least one of the above keys must exist. The clipping is done before any scaling. Returns a numpy array of XYZ data [[lon, lat, val], ...]. Values that are 0 replaced with Nan. """ # handle optional parameters try: bin_len = len(bins) except TypeError: bins_x = bins_y = bins else: try: (bins_x, bins_y) = bins except ValueError: raise RuntimeError("Bad 'bins' value, expected int or [int, int]") # if user didn't supply a data filter, use our own if filter is None: # define a default filter and use it if invert: def default_filter(line): return (float(line[1]), float(line[0]), float(line[2])) else: def default_filter(line): return (float(line[0]), float(line[1]), float(line[2])) filter = default_filter # get data into memory data = [] for line in file(datafile): # ignore blank or comment lines line = line.strip() if line == '' or line[0] in '%#': continue # get lon+lat+value from line data.append(filter(SplitPattern.split(line))) data = scipy.array(data) # do clipping, if required if clip: low_clip = clip.get('low', None) high_clip = clip.get('high', None) if low_clip is None: low_clip = scipy.min(data[:,2]) if high_clip is None: high_clip = scipy.max(data[:,2]) data[:,2] = scipy.clip(data[:,2], low_clip, high_clip) # handle any scaling if scale: scale = int(scale) data[:,2] = data[:,2] / scale # get extent of data (tight first, then with margin) (tll_lat, tll_lon, tur_lat, tur_lon) = ge.get_extent(data, margin=0) tr_opt = '-R%f/%f/%f/%f' % (tll_lon, tur_lon, tll_lat, tur_lat) (ll_lat, ll_lon, ur_lat, ur_lon) = ge.get_extent(data) r_opt = '-R%f/%f/%f/%f' % (ll_lon, ur_lon, ll_lat, ur_lat) # now generate a binned dataset (binned_data, xedges, yedges) = scipy.histogram2d(data[:,0], data[:,1], bins=bins, normed=False, weights=data[:,2]) # create XYZ object # make sure X+Y is *centre* of each bin xyz = [] xedges = scipy.array(xedges) xedges = xedges[:-1] + (xedges[1] - xedges[0])/2 yedges = scipy.array(yedges) yedges = yedges[:-1] + (yedges[1] - yedges[0])/2 for (xi, x) in enumerate(xedges): for (yi, y) in enumerate(yedges): xyz.append([x, y, binned_data[xi,yi]]) return scipy.array(xyz)
def calc_sum_xyz(datafile, scale=None, bins=100, filter=None, invert=False, clip=None): """A function to take a file of data and create an XYZ data object. Take values at random x,y points and bin into summed values. datafile a file containing (lat, lon, val) values scale amount to scale the data (ie, divide) bins number of bins in X and Y direction if an int, # bins in X and Y direction if [int, int] the X and Y bin counts may be different filter a function to extract (lon, lat, val) from one datafile line if not supplied an internal function is used invert if 'filter' not supplied, an internal filter is used. if 'invert' is True, switch the order of lat/lon in data lines. clip if defined is a dictionary defining clip limits. Recognised keys in the dictionary are: 'high' sets high limit above which values are clipped 'low' sets low limit below which values are clipped At least one of the above keys must exist. The clipping is done before any scaling. Returns a numpy array of XYZ data [[lon, lat, val], ...]. Values that are 0 replaced with Nan. """ # handle optional parameters try: bin_len = len(bins) except TypeError: bins_x = bins_y = bins else: try: (bins_x, bins_y) = bins except ValueError: raise RuntimeError("Bad 'bins' value, expected int or [int, int]") # if user didn't supply a data filter, use our own if filter is None: # define a default filter and use it if invert: def default_filter(line): return (float(line[1]), float(line[0]), float(line[2])) else: def default_filter(line): return (float(line[0]), float(line[1]), float(line[2])) filter = default_filter # get data into memory data = [] for line in file(datafile): # ignore blank or comment lines line = line.strip() if line == '' or line[0] in '%#': continue # get lon+lat+value from line data.append(filter(SplitPattern.split(line))) data = scipy.array(data) # do clipping, if required if clip: low_clip = clip.get('low', None) high_clip = clip.get('high', None) if low_clip is None: low_clip = scipy.min(data[:, 2]) if high_clip is None: high_clip = scipy.max(data[:, 2]) data[:, 2] = scipy.clip(data[:, 2], low_clip, high_clip) # handle any scaling if scale: scale = int(scale) data[:, 2] = data[:, 2] / scale # get extent of data (tight first, then with margin) (tll_lat, tll_lon, tur_lat, tur_lon) = ge.get_extent(data, margin=0) tr_opt = '-R%f/%f/%f/%f' % (tll_lon, tur_lon, tll_lat, tur_lat) (ll_lat, ll_lon, ur_lat, ur_lon) = ge.get_extent(data) r_opt = '-R%f/%f/%f/%f' % (ll_lon, ur_lon, ll_lat, ur_lat) # now generate a binned dataset (binned_data, xedges, yedges) = scipy.histogram2d(data[:, 0], data[:, 1], bins=bins, normed=False, weights=data[:, 2]) # create XYZ object # make sure X+Y is *centre* of each bin xyz = [] xedges = scipy.array(xedges) xedges = xedges[:-1] + (xedges[1] - xedges[0]) / 2 yedges = scipy.array(yedges) yedges = yedges[:-1] + (yedges[1] - yedges[0]) / 2 for (xi, x) in enumerate(xedges): for (yi, y) in enumerate(yedges): xyz.append([x, y, binned_data[xi, yi]]) return scipy.array(xyz)