def unique2d(arr,axis=0): """function to sort and eliminate replicate rows/columns of an array. Extension to numpy's unique() USAGE: arr : 2d array at this stage axis : sort by axis = [0],1 ([rows],cols) """ I=[] # check shape of arr rows,cols=arr.shape # to sort by cols, transpose and do the same as you would for rows if axis==1: arr=arr.T for k in range(0,cols): i,d=n.unique1d(arr[:,k],return_index=True) I=n.hstack((I,i)) I=n.unique1d(I) I=I.tolist() arr_out = arr[I,:] if axis==1: arr_out=arr_out.T return arr_out
def get_od_pair_index_not_in_dataset(self, O, D): """Return indices to O (D) from whose elements an od pair is not included in the travel data see unittest for an example """ from numpy import unique1d, setdiff1d, zeros_like, logical_and, logical_or, where assert O.shape == D.shape id_attributes = self.get_id_attribute() max_id = max(O.max(), D.max(), id_attributes.max()) digits = len(str(max_id)) + 1 multiplier = 10 ** digits ODpair = O * multiplier + D idpair = id_attributes[:, 0] * multiplier + id_attributes[:, 1] missing_pairs = setdiff1d( unique1d(ODpair), unique1d(idpair) ) results = zeros_like(D) for pair in missing_pairs: results += logical_and( O == pair // multiplier, D == pair % multiplier) results += logical_or(O < id_attributes[:, 0].min(), O > id_attributes[:, 0].max()) results += logical_or(D < id_attributes[:, 1].min(), D > id_attributes[:, 1].max()) return where(results)
def uniquerows(X, decimalplaces, return_frequencies=False): """ Returns array consisting of unique rows and a list of the number of times each row appeared. Uniqueness is subject to rounding to the specified number of decimal places. """ # TODO there could be a serious bug with hash collisions in this implementation # TODO update this function to use the integer packing as a hash function? if len(X.shape) == 1: return np.unique1d(X) hashvec = np.random.random(X.shape[1]) rowhashes = np.dot(np.around(X, decimalplaces), hashvec) # only appears to be consistent up to 12 decimal places rowhashes = np.around(rowhashes, 12) #rowhashes.sort() uniqs, inds = np.unique1d(rowhashes, return_index=True) # return_index of unique1d does not always return the first one # TODO faster way of doing this?? # maybe something like diff(where(diff(sort(rowhashes)))) # slow but simple way iterating through all rows if return_frequencies: ol = dict() for i in rowhashes: if ol.has_key(i): ol[i] += 1 else: ol[i] = 1 return X[inds, :], ol.values() else: return X[inds, :]
def initExisting(self, allocation, values, copy=True, stringToCategory=True, stringToBuffer=True, loadFields=None): for name, arr in values.items(): if len(arr) != allocation: raise ValueError("pre-existing array \"%s\" must have allocation %d, not %d" % (name, allocation, len(arr))) self.allocation = self.length = allocation if not hasattr(self, "data") or loadFields is None: if loadFields is None: loadFields = self.fields self.data = [None] * len(self.fields) self.categories = [None] * len(self.fields) self.buffers = [None] * len(self.fields) for i, field in enumerate(self.fields): if field in loadFields: arr = values[field] if len(arr) != allocation: raise ValueError("length of field \"%s\" is %d, but allocating %d" % (field, len(arr), allocation)) if stringToCategory and self.types[i] == "category": try: uniqueValues, indicies = numpy.unique(arr, return_inverse=True) except TypeError: if map(int, numpy.__version__.split(".")) < [1, 3, 0]: indicies, uniqueValues = numpy.unique1d(arr, return_inverse=True) else: uniqueValues, indicies = numpy.unique1d(arr, return_inverse=True) v_to_n = dict((v, n) for n, v in enumerate(uniqueValues)) n_to_v = dict((n, v) for n, v in enumerate(uniqueValues)) self.data[i] = indicies self.categories[i] = (v_to_n, n_to_v) elif self.types[i] == "string": if stringToBuffer: buf = BytesIO() arr2 = numpy.empty(allocation, dtype=typeToDtype.get(self.types[i], self.types[i])) for j, v in enumerate(arr): buf.write(str(v).encode("utf-8")) arr2[j] = buf.tell() else: arr2 = arr buf = None self.data[i] = arr2 self.buffers[i] = buf elif self.types[i] == "object": if copy or isinstance(arr, numpy.ndarray): arr = list(arr) self.data[i] = arr else: if copy or not isinstance(arr, numpy.ndarray): arr = numpy.array(arr, dtype=typeToDtype.get(self.types[i], self.types[i])) self.data[i] = arr
def get_ld_grid(photband, **kwargs): """ Retrieve an interpolating grid for the LD coefficients Check outcome: #>>> bands = ['GENEVA.U', 'GENEVA.B', 'GENEVA.G', 'GENEVA.V'] #>>> f_ld_grid = get_ld_grid(bands) #>>> ff = pyfits.open(_atmos['file']) #>>> all(ff['GENEVA.U'].data[257][2:]==f_ld_grid(ff['GENEVA.U'].data[257][0],ff['GENEVA.U'].data[257][1])[0:5]) #True #>>> all(ff['GENEVA.G'].data[257][2:]==f_ld_grid(ff['GENEVA.G'].data[257][0],ff['GENEVA.G'].data[257][1])[10:15]) #True #>>> ff.close() #Make some plots: #>>> photband = ['GENEVA.V'] #>>> f_ld = get_ld_grid(photband) #>>> logg = 4.0 #>>> mu = linspace(0,1,100) #>>> p = figure() #>>> p = gcf().canvas.set_window_title('test of function <get_ld_grid>') #>>> for teff in linspace(9000,12000,19): #... out = f_ld(teff,logg) #... a1x,a2x,a3x,a4x, I_x1 = out.reshape((len(photband),5)).T #... p = subplot(221);p = title('Interpolation of absolute intensities') #... p = plot(teff,I_x1,'ko') #... p = subplot(222);p = title('Interpolation of LD coefficients') #... p = scatter(4*[teff],[a1x,a2x,a3x,a4x],c=range(4),vmin=0,vmax=3,cmap=cm.spectral,edgecolors='none') #... p = subplot(223);p = title('Without absolute intensity') #... p = plot(mu,ld_eval(mu,[a1x,a2x,a3x,a4x]),'-') #... p = subplot(224);p = title('With absolute intensity') #... p = plot(mu,I_x1*ld_eval(mu,[a1x,a2x,a3x,a4x]),'-') """ # -- retrieve the grid points (unique values) teffs, loggs = get_ld_grid_dimensions(**kwargs) teffs_grid = np.sort(np.unique1d(teffs)) loggs_grid = np.sort(np.unique1d(loggs)) coeff_grid = np.zeros((len(teffs_grid), len(loggs_grid), 5 * len(photband))) # -- get the FITS-file containing the tables gridfile = get_file(**kwargs) # -- fill the grid ff = pyfits.open(gridfile) for pp, iband in enumerate(photband): teffs = ff[iband].data.field("Teff") loggs = ff[iband].data.field("logg") for ii, (iteff, ilogg) in enumerate(zip(teffs, loggs)): indext = np.searchsorted(teffs_grid, iteff) indexg = np.searchsorted(loggs_grid, ilogg) # -- array and list are added for backwards compatibility with some # pyfits versions coeff_grid[indext, indexg, 5 * pp : 5 * (pp + 1)] = np.array(list(ff[iband].data[ii]))[2:] ff.close() # -- make an interpolating function f_ld_grid = InterpolatingFunction([teffs_grid, loggs_grid], coeff_grid) return f_ld_grid
def unique1d(a,return_indices=False): """Replacement for numpy's unique1d""" import numpy if return_indices: indices,uniq = numpy.unique1d(a,True) return uniq,indices else: return numpy.unique1d(a)
def plot(times, rms_difference, legend_loc, y_label, y_lim, colors, styles, title, file_name): # exp_names = { "1km-control-mod-05XP":"MM + MWR05XP", "1km-control-no-mm":"No MM", "1km-control-mm":"MM" } exp_names = { "3km-control":r"5 dBZ, 3 m s$^{-1}$", "3km-control-adapt=0.80":r"RTPS $\alpha$ = 0.80", "3km-control-adapt=1.00":r"RTPS $\alpha$ = 1.00", "3km-control-r0h=12km":r"$r_{0h}$ = 12 km", "3km-control-7dBZ,5ms":r'$\sigma_Z$ = 7 dBZ, $\sigma_{v_r}$ = 5 m s$^{-1}$' } pylab.figure() pylab.axes((0.1, 0.125, 0.85, 0.8)) all_good = [] for exp_name in sorted(rms_difference.keys()): good_idxs = np.where(~np.isnan(rms_difference[exp_name]))[0] name, radar= exp_name.split(':') if len(good_idxs) > 0: pylab.plot(times[good_idxs], rms_difference[exp_name][good_idxs], color=colors[exp_name], linestyle=styles[exp_name], label="%s (%s)" % (exp_names[name], radar)) all_good.append(good_idxs) all_good_idxs = np.unique1d(np.concatenate(tuple(all_good))) pylab.plot([times.min(), times.max()], [0, 0], color='k', linestyle=':') pylab.xlabel(r"Time (UTC)", size='large') pylab.ylabel(y_label, size='large') pylab.xlim((times.min(), times.max())) pylab.ylim(y_lim) pylab.xticks(times[all_good_idxs], [ (datetime(2009, 6, 5, 18, 0, 0) + timedelta(seconds=int(t))).strftime("%H%M") for t in times[all_good_idxs] ], size='large', rotation=30) pylab.yticks(size='large') pylab.legend(loc=legend_loc, prop={'size':'small'}) pylab.suptitle(title) pylab.savefig(file_name) pylab.close() return
def __init__(self, dataset, predictand, maxSubsetSize=None): """Constructor for a tree from a dataset of regressors (that which we split) and a predictand (that which we try to purify in the leaves). :type dataset: titus.producer.cart.Dataset :param dataset: dataset of regressors only :type predictand: 1-d Numpy array :param predictand: predictands in a separate array with the same number of rows as the ``dataset`` :type maxSubsetSize: positive integer or ``None`` :param maxSubsetSize: maximum size of subset splits of categorical regressors (approximation for optimization in ``categoricalEntropyGainTerm`` and ``categoricalNVarianceGainTerm``) """ self.dataset = dataset self.predictand = predictand self.maxSubsetSize = maxSubsetSize self.datasetSize = len(self.predictand.data) if self.predictand.tpe == numbers.Real: try: self.predictandUnique = numpy.unique(self.predictand.data) except TypeError: self.predictandUnique = numpy.unique1d(self.predictand.data) elif self.predictand.tpe == basestring: if self.datasetSize > 0: self.predictandDistribution = [] for category in xrange(len(self.predictand.intToStr)): frac = 1.0 * numpy.sum(self.predictand.data == category) / len(self.predictand.data) self.predictandDistribution.append(frac) else: self.predictandDistribution = [0.0] * len(self.predictand.intToStr) else: raise RuntimeError
def __init__(self, dataset, predictand, maxSubsetSize=None): """Constructor for a tree from a dataset of regressors (that which we split) and a predictand (that which we try to purify in the leaves). :type dataset: titus.producer.cart.Dataset :param dataset: dataset of regressors only :type predictand: 1-d Numpy array :param predictand: predictands in a separate array with the same number of rows as the ``dataset`` :type maxSubsetSize: positive integer or ``None`` :param maxSubsetSize: maximum size of subset splits of categorical regressors (approximation for optimization in ``categoricalEntropyGainTerm`` and ``categoricalNVarianceGainTerm``) """ self.dataset = dataset self.predictand = predictand self.maxSubsetSize = maxSubsetSize self.datasetSize = len(self.predictand.data) if self.predictand.tpe == numbers.Real: try: self.predictandUnique = numpy.unique(self.predictand.data) except TypeError: self.predictandUnique = numpy.unique1d(self.predictand.data) elif self.predictand.tpe == str: if self.datasetSize > 0: self.predictandDistribution = [] for category in range(len(self.predictand.intToStr)): frac = 1.0 * numpy.sum(self.predictand.data == category) / len(self.predictand.data) self.predictandDistribution.append(frac) else: self.predictandDistribution = [0.0] * len(self.predictand.intToStr) else: raise RuntimeError
def labelmeanfilter_str(ys, x): # works also for string labels in ys, but requires 1D # from mailing list scipy-user 2009-02-11 unil, unilinv = np.unique1d(ys, return_index=False, return_inverse=True) labelmeans = np.array(ndimage.mean(x, labels=unilinv, index=np.arange(np.max(unil)+1))) arr3 = labelmeans[unilinv] return arr3
def main(): experiments = [ '1kmf-sndr0h=50km', '1kmf-zs-no-05XP', '1kmf-zs-no-mm-05XP', '1kmf-zs-no-mm', '1kmf-z-no-snd', '1kmf-z-no-v2' ] grid = goshen_1km_grid(bounds=(slice(100, 180), slice(90, 170))) temp = goshen_1km_temporal(start=14400) obs_file_names = ['psu_straka_mesonet.pkl', 'ttu_sticknet.pkl', 'asos.pkl'] all_obs = loadObs(obs_file_names, temp.getDatetimes(aslist=True), grid, grid.getWidthHeight()) ens_obs = {} for exp in experiments: ens_obs[exp] = cPickle.load(open("cold_pool_obs_%s.pkl" % exp, 'r')) mm_ids = np.unique1d(all_obs['id']) for id in mm_ids: id_idxs = np.where(all_obs['id'] == id) for ob_var, ens_var in [('temp', 't'), ('dewp', 'td')]: pylab.figure() pylab.plot(all_obs['time'][id_idxs], 5 / 9. * (all_obs[ob_var][id_idxs] - 32), 'k-', label='Observed') for exp_name, exp in ens_obs.iteritems(): pylab.plot(all_obs['time'][id_idxs], exp[ens_var][id_idxs] - 273.15, label=exp_name) pylab.xticks(temp.getEpochs(aslist=True), temp.getStrings("%H%M", aslist=True), rotation=30) pylab.xlim(temp.getEpochs(aslist=True)[0], temp.getEpochs(aslist=True)[-1]) pylab.legend(loc=1) pylab.savefig("mm_timeseries_%s_%s.png" % (ens_var, id)) pylab.close() return
def agroupby(*args, **kwds): """A groupby function which accepts and returns arrays. All passed arrays are expected to be one dimensional and of the same shape. All of the arrays are grouped by `key(arg[0])` and then returned. The returned arrays will be two dimensional with each row corresponding to a group. The size of the first dimension is equal to the number of groups, and the size of the second dimension is equal the the size of the largest groups. All smaller groups are padded with the value of the keyword argument `fill_value`.""" keyfunc = kwds.get('key', lambda a: a) fill_val = kwds.get('fill_value', 0.0) args = [a.copy() for a in args] argsort = sorted(enumerate(args[0]), key=compose(keyfunc,itemgetter(1))) indexsort = [index for index, item in argsort] args = [a.take(indexsort) for a in args] # calculate groups g_mask = keyfunc(args[0]) g_set = unique1d(g_mask) g_max = max([g_mask[g_mask==g].shape[0] for g in g_set]) g_args = [fill_val * ones((len(g_set), g_max), dtype=a.dtype) for a in args] for gix, gval in enumerate(g_set): for ga, a in izip(g_args, args): b = a[g_mask==gval] ga[gix,:len(b)] = b return tuple(g_args)
def agroupby(*args, **kwds): """A groupby function which accepts and returns arrays. All passed arrays are expected to be one dimensional and of the same shape. All of the arrays are grouped by `key(arg[0])` and then returned. The returned arrays will be two dimensional with each row corresponding to a group. The size of the first dimension is equal to the number of groups, and the size of the second dimension is equal the the size of the largest groups. All smaller groups are padded with the value of the keyword argument `fill_value`.""" keyfunc = kwds.get('key', lambda a: a) fill_val = kwds.get('fill_value', 0.0) args = [a.copy() for a in args] argsort = sorted(enumerate(args[0]), key=compose(keyfunc, itemgetter(1))) indexsort = [index for index, item in argsort] args = [a.take(indexsort) for a in args] # calculate groups g_mask = keyfunc(args[0]) g_set = unique1d(g_mask) g_max = max([g_mask[g_mask == g].shape[0] for g in g_set]) g_args = [ fill_val * ones((len(g_set), g_max), dtype=a.dtype) for a in args ] for gix, gval in enumerate(g_set): for ga, a in izip(g_args, args): b = a[g_mask == gval] ga[gix, :len(b)] = b return tuple(g_args)
def __init__(self, dataset, predictand, maxSubsetSize=None): """Constructor for a dataset of predictors only; the predictand is a distinct Dataset.Field, provided separately. maxSubsetSize is used to limit splitting of categorical predictors; see categoricalEntropyGainTerm and categoricalNVarianceGainTerm. """ self.dataset = dataset self.predictand = predictand self.maxSubsetSize = maxSubsetSize self.datasetSize = len(self.predictand.data) if self.predictand.tpe == numbers.Real: try: self.predictandUnique = numpy.unique(self.predictand.data) except TypeError: self.predictandUnique = numpy.unique1d(self.predictand.data) elif self.predictand.tpe == basestring: if self.datasetSize > 0: self.predictandDistribution = [] for category in xrange(len(self.predictand.intToStr)): frac = 1.0 * numpy.sum(self.predictand.data == category) / len(self.predictand.data) self.predictandDistribution.append(frac) else: self.predictandDistribution = [0.0] * len(self.predictand.intToStr) else: raise RuntimeError
def plotObservationsComposite(obs, map, title, file_name): pylab.clf() colors = [ 'r', 'g', 'b', 'c', 'm', '#660099', '#ff9900', '#006666' ] ob_ids = np.unique1d(obs['id']) ttu_label = False for ob_id in ob_ids: ob_idxs = np.where(obs['id'] == ob_id)[0] these_obs = obs[ob_idxs] ob_xs, ob_ys = map(these_obs['longitude'], these_obs['latitude']) if ob_id[0] == "P": ob_num = int(ob_id[1]) - 1 pylab.plot(ob_xs, ob_ys, 'o', mfc=colors[ob_num], mec=colors[ob_num], ms=3, label="NSSL MM (%s)" % ob_id) else: if not ttu_label: label = "TTU Sticknet" ttu_label = True else: label = None pylab.plot(ob_xs[0], ob_ys[0], 'ko', ms=3, label=label) drawPolitical(map, scale_len=5) pylab.legend(loc=3, numpoints=1, prop={'size':'medium'}) pylab.title(title) pylab.savefig(file_name) return
def rangeChoose2(highLinesRange=None,lowLinesRange=None,m=None,octet=None,fit_params=None): fgrid=0 # findbowties.m:168 for j3 in np.arange(1,m).reshape(-1): if highLinesRange[j3] == octet[1,1] or highLinesRange[j3] == octet[1,3]: continue octet[1,np.arange(5,6)]=np.concatenate((highLinesRange[j3],lowLinesRange[j3])) # findbowties.m:173 for j4 in np.arange(j3 + 1,m).reshape(-1): if highLinesRange[j4] == octet[1,1] or highLinesRange[j4] == octet[1,3]: continue octet[1,np.arange(7,8)]=np.concatenate((highLinesRange[j4],lowLinesRange[j4])) # findbowties.m:179 if np.size(np.unique1d(octet[np.arange(1,8)])) == np.size(octet[np.arange(1,8)]): octet128=perm128(octet[arange(1,8)],fit_params['containsOblate']) # findbowties.m:182 lenoct128=np.size(octet128) / 8 # findbowties.m:183 for j in np.arange(1,lenoct128).reshape(-1): if isGoodOctet(octet128[j,np.arange()],fit_params): fgrid=np.vstack((np.zeros((2,4)),octet128[j,np.arange(5,8)],octet128[j,arange(1,4)],np.zeros(4,4))) # findbowties.m:186 return fgrid return fgrid
def labelstats_str(factors, values, stat='mvnx'): # works also for string labels in ys, but requires 1D # from mailing list scipy-user 2009-02-11 unil, unilinv = np.unique1d(factors, return_index=False, return_inverse=True) res = [] if 'm' in stat: labelmeans = np.array( ndimage.mean(values, labels=unilinv, index=np.arange(len(unil)))) res.append(labelmeans) if 'v' in stat: labelvars = np.array( ndimage.variance(values, labels=unilinv, index=np.arange(len(unil)))) res.append(labelvars) if 'n' in stat: labelmin = np.array( ndimage.minimum(values, labels=unilinv, index=np.arange(len(unil)))) res.append(labelmin) if 'x' in stat: labelmax = np.array( ndimage.maximum(values, labels=unilinv, index=np.arange(len(unil)))) res.append(labelmax) return res
def _stats(input, labels=None, index=None, do_sum2=False): '''returns count, sum, and optionally sum^2 by label''' def single_group(vals): if do_sum2: return vals.size, vals.sum(), (vals * vals.conjugate()).sum() else: return vals.size, vals.sum() if labels is None: return single_group(input) # ensure input and labels match sizes input, labels = numpy.broadcast_arrays(input, labels) if index is None: return single_group(input[labels > 0]) if numpy.isscalar(index): return single_group(input[labels == index]) # remap labels to unique integers if necessary, or if the largest # label is larger than the number of values. if ((not numpy.issubdtype(labels.dtype, numpy.int)) or (labels.min() < 0) or (labels.max() > labels.size)): unique_labels, new_labels = numpy.unique1d(labels, return_inverse=True) counts = numpy.bincount(new_labels) sums = numpy.bincount(new_labels, weights=input.ravel()) if do_sum2: sums2 = numpy.bincount(new_labels, weights=(input * input.conjugate()).ravel()) idxs = numpy.searchsorted(unique_labels, index) # make all of idxs valid idxs[idxs >= unique_labels.size] = 0 found = (unique_labels[idxs] == index) else: # labels are an integer type, and there aren't too many, so # call bincount directly. counts = numpy.bincount(labels.ravel()) sums = numpy.bincount(labels.ravel(), weights=input.ravel()) if do_sum2: sums2 = numpy.bincount(labels.ravel(), weights=(input * input.conjugate()).ravel()) # make sure all index values are valid idxs = numpy.asanyarray(index, numpy.int).copy() found = (idxs >= 0) & (idxs < counts.size) idxs[~found] = 0 counts = counts[idxs] counts[~found] = 0 sums = sums[idxs] sums[~found] = 0 if not do_sum2: return (counts, sums) sums2 = sums2[idxs] sums2[~found] = 0 return (counts, sums, sums2)
def z_slab(self, bottom, top, allTimes=True): """remove all trajectories that are not in the slab defined by [bottom, top]""" if allTimes: selection = np.unique1d(np.where( np.bitwise_and( self.positions[:,:,-1]>bottom, self.positions[:,:,-1]<top ))[1]) else: selection = np.unique1d(np.where( np.bitwise_and( self.positions[:,:,-1].max(axis=0)>bottom, self.positions[:,:,-1].min(axis=0)<top ))) self.trajs = self.trajs[selection] self.positions = self.positions[:,selection]
def get_near(self, point, d=1): coords = np.array(point*self.__divisions/self.__sizes, int) return np.unique1d([ i for co in itertools.product(*[ range(max(0, c-d), min(div, c+d+1)) for c, div in zip(coords, self.__divisions) ]) for i in self.__get_cell(co) ])
def groupmeanbin(factors, values): '''uses np.bincount, assumes factors/labels are integers ''' #n = len(factors) ix, rind = np.unique1d(factors, return_inverse=1) gcount = np.bincount(rind) gmean = np.bincount(rind, weights=values) / (1.0 * gcount) return gmean
def _stats(input, labels = None, index = None, do_sum2=False): '''returns count, sum, and optionally sum^2 by label''' def single_group(vals): if do_sum2: return vals.size, vals.sum(), (vals * vals.conjugate()).sum() else: return vals.size, vals.sum() if labels is None: return single_group(input) # ensure input and labels match sizes input, labels = numpy.broadcast_arrays(input, labels) if index is None: return single_group(input[labels > 0]) if numpy.isscalar(index): return single_group(input[labels == index]) # remap labels to unique integers if necessary, or if the largest # label is larger than the number of values. if ((not numpy.issubdtype(labels.dtype, numpy.int)) or (labels.min() < 0) or (labels.max() > labels.size)): unique_labels, new_labels = numpy.unique1d(labels, return_inverse=True) counts = numpy.bincount(new_labels) sums = numpy.bincount(new_labels, weights=input.ravel()) if do_sum2: sums2 = numpy.bincount(new_labels, weights=(input * input.conjugate()).ravel()) idxs = numpy.searchsorted(unique_labels, index) # make all of idxs valid idxs[idxs >= unique_labels.size] = 0 found = (unique_labels[idxs] == index) else: # labels are an integer type, and there aren't too many, so # call bincount directly. counts = numpy.bincount(labels.ravel()) sums = numpy.bincount(labels.ravel(), weights=input.ravel()) if do_sum2: sums2 = numpy.bincount(labels.ravel(), weights=(input * input.conjugate()).ravel()) # make sure all index values are valid idxs = numpy.asanyarray(index, numpy.int).copy() found = (idxs >= 0) & (idxs < counts.size) idxs[~ found] = 0 counts = counts[idxs] counts[~ found] = 0 sums = sums[idxs] sums[~ found] = 0 if not do_sum2: return (counts, sums) sums2 = sums2[idxs] sums2[~ found] = 0 return (counts, sums, sums2)
def plotSubplots(times, data, legend_loc, y_label, y_lim, colors, styles, exp_names, title, file_name): pylab.figure(figsize=(10, 8)) pylab.subplots_adjust(left=0.075, right=0.95, top=0.925, bottom=0.1, wspace=0.175, hspace=0.275) radars = sorted(list(set([ name.split(":")[-1] for name in data.keys() ]))) n_rows = 1 n_cols = (len(radars) + 1) / 2 lines = {} for idx, radar in enumerate(radars): all_good = [] pylab.subplot(n_rows, n_cols, idx + 1) for exp in sorted([ e for e in data.keys() if e.split(":")[-1] == radar ]): good_idxs = np.where(~np.isnan(data[exp]))[0] if len(good_idxs) > 0: name = exp.split(":")[0] line = pylab.plot(times[good_idxs], data[exp][good_idxs], color=colors[exp], label=exp_names[name]) lines[exp_names[name]] = line all_good.append(good_idxs) all_good_idxs = np.unique1d(np.concatenate(tuple(all_good))) pylab.plot([times.min(), times.max()], [0, 0], color='k', linestyle=':') pylab.axvline(14400, color='k', linestyle=':') pylab.xlabel(r"Time (UTC)", size='xx-large') pylab.ylabel(y_label, size='xx-large') pylab.xlim((times.min(), times.max())) pylab.ylim(y_lim) unique_times = np.sort(np.unique1d(times)) pylab.xticks(unique_times[::2], [ (datetime(2009, 6, 5, 18, 0, 0) + timedelta(seconds=int(t))).strftime("%H%M") for t in unique_times ][::2], rotation=30, size='xx-large') pylab.yticks(size='xx-large') # pylab.title(radar) labels, line_objs = zip(*lines.items()) # pylab.gcf().legend(line_objs, labels, 'lower right', prop={'size':'medium'}) pylab.legend(line_objs, labels, loc=legend_loc, prop={'size':'medium'}) pylab.suptitle(title) pylab.savefig(file_name) pylab.close() return
def volumetricsample(P, res): inds = occupiedlist.pointstovoxels(P, res) #np.random.shuffle(inds) # TODO accessing a private method should move this to occupiedlist, or make # it part of the public api of occupiedlist? ids = occupiedlist._pack(inds) # This introduces a systematic error as inds_unique is just the first index encountered. ids_unique, inds_unique = np.unique1d(ids, return_index=True) return P[inds_unique]
def ismember_newer(totest, members): """ A setmember1d, which works for totest arrays with duplicate values """ uniques_in_test, rev_idx = np.unique1d(totest, return_inverse=True) uniques_in_members_mask = np.setmember1d(uniques_in_test, members) # Use this instead if members is not unique # uniques_in_members_mask = setmember1d(uniques_in_test, unique1d(members)) return uniques_in_members_mask[rev_idx]
def decimate(vals, decimation): B = numpy.unique1d(vals) N = WhereIs(vals, B) # n contains the count of the number of elements for each bin # B contains the index location for each element in the array vals # NOTE: This histogram function will become obsolete soon by the numpy people. The behavior will change. (n, B) = numpy.histogram(vals, B, new=False) return (DataTruncation(n, decimation, len(B), N))
def labelmean_str(factors, values): # works also for string labels in ys, but requires 1D # from mailing list scipy-user 2009-02-11 # check mistake: returns one element to much unil, unilinv = np.unique1d(factors, return_index=False, return_inverse=True) #labelmeans = np.array(ndimage.mean(values, labels=unilinv, index=np.arange(len(unil)+1))) labelmeans = np.array(ndimage.mean(values, labels=unilinv, index=np.arange(len(unil)))) return labelmeans
def labelstats_str(factors, values): # works also for string labels in ys, but requires 1D # from mailing list scipy-user 2009-02-11 unil, unilinv = np.unique1d(factors, return_index=False, return_inverse=True) labelmeans = np.array(ndimage.mean(values, labels=unilinv, index=np.arange(len(unil)))) labelvars = np.array(ndimage.variance(values, labels=unilinv, index=np.arange(len(unil)))) labelmin = np.array(ndimage.minimum(values, labels=unilinv, index=np.arange(len(unil)))) labelmax = np.array(ndimage.maximum(values, labels=unilinv, index=np.arange(len(unil)))) return labelmeans, labelvars, labelmin, labelmax
def decimate(vals, decimation): B = numpy.unique1d(vals) N = WhereIs(vals, B) # n contains the count of the number of elements for each bin # B contains the index location for each element in the array vals # NOTE: This histogram function will become obsolete soon by the numpy people. The behavior will change. (n, B) = numpy.histogram(vals, B, new=False) return(DataTruncation(n, decimation, len(B), N))
def renumber_array(array): uniq, inds = np.unique1d(array, return_inverse=True) #res = np.zeros_like(res2.shape) newuniq = range(len(uniq)) res = np.array([newuniq[i] for i in inds]).astype(np.int16) res.shape = array.shape #for i in range(len(uniq)): #print "Segment %s has %d pixels" % (i, np.where( res == i)[0].size) return res
def get_data(self, *args, **kwargs): """ Return the requested range of data for each event by using the proper data retrieval mechanism for each event. The result will be an TimeSeries instance with dimensions (events,time) for the data and also some information about the data returned. """ # get ready to load dat eventdata = None # events = self.data # speed up by getting unique event sources first usources = np.unique1d(self['tssrc']) # loop over unique sources for src in usources: # get the eventOffsets from that source ind = np.atleast_1d(self['tssrc'] == src) if len(ind) == 1: src_events = self else: src_events = self[ind] #print "Loading %d events from %s" % (ind.sum(),src) # get the timeseries for those events # newdat = src.get_event_data(channel, # src_events, # dur, # offset, # buf, # resampled_rate, # filt_freq, # filt_type, # filt_order, # keep_buffer) newdat = src.get_event_data(*args, **kwargs) # see if concatenate if eventdata is None: # start the new eventdata eventdata = newdat else: # append it to the existing np.concatenate(eventdata, newdat, eventdata.tdim) if eventdata is None: dims = [Dim(np.array(None), 'event'), Dim(np.array(None), 'time')] eventdata = TimeSeries(np.atleast_2d(np.array(None)), samplerate=None, tdim='time', dims=dims) return eventdata
def _add_source_to_range(self, data_source, columns, range): sources = range.get('sources') added = False for source in sources: if source['ref'] == data_source: newcolumns = np.unique1d(columns, source['columns']).tolist() source['columns'] = newcolumns added = True if not added: sources.append({'ref' : data_source.ref(), 'columns' : columns})
def _add_source_to_range(self, data_source, columns, range): sources = range.get('sources') added = False for source in sources: if source['ref'] == data_source: newcolumns = np.unique1d(columns, source['columns']).tolist() source['columns'] = newcolumns added = True if not added: sources.append({'ref': data_source.ref(), 'columns': columns})
def _add_source_to_range(self, data_source, columns, range): sources = range.get("sources") added = False for source in sources: if source["ref"] == data_source: newcolumns = np.unique1d(columns, source["columns"]).tolist() source["columns"] = newcolumns added = True if not added: sources.append({"ref": data_source.ref(), "columns": columns})
def groupstatsbin(factors, values): n = len(factors) ix, rind = np.unique1d(factors, return_inverse=1) gcount = np.bincount(rind) gmean = np.bincount(rind, weights=values) / (1.0 * gcount) meanarr = gmean[rind] withinvar = np.bincount(rind, weights=(values - meanarr)** 2) / (1.0 * gcount) withinvararr = withinvar[rind] return gcount, gmean, meanarr, withinvar, withinvararr
def get_data(self,*args,**kwargs): """ Return the requested range of data for each event by using the proper data retrieval mechanism for each event. The result will be an TimeSeries instance with dimensions (events,time) for the data and also some information about the data returned. """ # get ready to load dat eventdata = None # events = self.data # speed up by getting unique event sources first usources = np.unique1d(self['tssrc']) # loop over unique sources for src in usources: # get the eventOffsets from that source ind = np.atleast_1d(self['tssrc']==src) if len(ind) == 1: src_events=self else: src_events = self[ind] #print "Loading %d events from %s" % (ind.sum(),src) # get the timeseries for those events # newdat = src.get_event_data(channel, # src_events, # dur, # offset, # buf, # resampled_rate, # filt_freq, # filt_type, # filt_order, # keep_buffer) newdat = src.get_event_data(*args,**kwargs) # see if concatenate if eventdata is None: # start the new eventdata eventdata = newdat else: # append it to the existing np.concatenate(eventdata,newdat,eventdata.tdim) if eventdata is None: dims = [Dim(np.array(None), 'event'), Dim(np.array(None), 'time')] eventdata = TimeSeries(np.atleast_2d(np.array(None)), samplerate=None,tdim='time',dims=dims) return eventdata
def groupstatsbin(factors, values): '''uses np.bincount, assumes factors/labels are integers ''' n = len(factors) ix,rind = np.unique1d(factors, return_inverse=1) gcount = np.bincount(rind) gmean = np.bincount(rind, weights=values)/ (1.0*gcount) meanarr = gmean[rind] withinvar = np.bincount(rind, weights=(values-meanarr)**2) / (1.0*gcount) withinvararr = withinvar[rind] return gcount, gmean , meanarr, withinvar, withinvararr
def unique1d(ar1, return_index=False, return_inverse=False): """ This function is deprecated. Use ma.unique() instead. """ output = np.unique1d(ar1, return_index=return_index, return_inverse=return_inverse) if isinstance(output, tuple): output = list(output) output[0] = output[0].view(MaskedArray) output = tuple(output) else: output = output.view(MaskedArray) return output
def plot(times, rms_difference, legend_loc, y_label, y_lim, colors, styles, title, file_name): # exp_names = { "1km-control-mod-05XP":"MM + MWR05XP", "1km-control-no-mm":"No MM", "1km-control-mm":"MM" } exp_names = { "3km-control": r"5 dBZ, 3 m s$^{-1}$", "3km-control-adapt=0.80": r"RTPS $\alpha$ = 0.80", "3km-control-adapt=1.00": r"RTPS $\alpha$ = 1.00", "3km-control-r0h=12km": r"$r_{0h}$ = 12 km", "3km-control-7dBZ,5ms": r'$\sigma_Z$ = 7 dBZ, $\sigma_{v_r}$ = 5 m s$^{-1}$' } pylab.figure() pylab.axes((0.1, 0.125, 0.85, 0.8)) all_good = [] for exp_name in sorted(rms_difference.keys()): good_idxs = np.where(~np.isnan(rms_difference[exp_name]))[0] name, radar = exp_name.split(':') if len(good_idxs) > 0: pylab.plot(times[good_idxs], rms_difference[exp_name][good_idxs], color=colors[exp_name], linestyle=styles[exp_name], label="%s (%s)" % (exp_names[name], radar)) all_good.append(good_idxs) all_good_idxs = np.unique1d(np.concatenate(tuple(all_good))) pylab.plot([times.min(), times.max()], [0, 0], color='k', linestyle=':') pylab.xlabel(r"Time (UTC)", size='large') pylab.ylabel(y_label, size='large') pylab.xlim((times.min(), times.max())) pylab.ylim(y_lim) pylab.xticks(times[all_good_idxs], [(datetime(2009, 6, 5, 18, 0, 0) + timedelta(seconds=int(t))).strftime("%H%M") for t in times[all_good_idxs]], size='large', rotation=30) pylab.yticks(size='large') pylab.legend(loc=legend_loc, prop={'size': 'small'}) pylab.suptitle(title) pylab.savefig(file_name) pylab.close() return
def get_partition_groupings(partition): # Returns dictionary of nodelist for each grouping groupings={} # dictionary of groupings for each partition num_clusters=unique1d(partition) # number of clusers in partition # Create a dictonary of nodelists for each group in partition p for n in num_clusters: group=[] for i in range(0,len(partition)): if partition[i]==n: # If node i is in group n group.append(i) # Add i to n's nodelist groupings[n]=group return groupings
def _compute(self): tv = self._children[0].getResult() nElements = tv.getNumElements() starts, ends, vals = tv.startsAsNumpyArray(), tv.endsAsNumpyArray(), tv.valsAsNumpyArray() sortedToOriginalEndIndices = np.argsort(ends)# neccessary to find the correct values for end events uniqueSortedPositions, uniquePosIndices = np.unique1d(np.concatenate((starts, ends)), return_inverse=True) if uniqueSortedPositions.size > 1: posEventArray = np.zeros( (uniqueSortedPositions.size,2), dtype='int32') #the number of segments starting and ending at each unique position # starts indices, counts = self._countDuplicates(uniquePosIndices[:nElements]) posEventArray[indices,0] = counts # ends indices, counts = self._countDuplicates(uniquePosIndices[nElements:]) posEventArray[indices,1] = counts del starts, ends, indices, counts, uniquePosIndices # delete unneccessary arrays to free memory newVals = np.zeros(uniqueSortedPositions.size-1, dtype=vals.dtype) uniqueVals = np.unique(vals) uniqueValCounts = np.zeros( uniqueVals.size, dtype='int32' ) # array with the current count for each unique value #NumPy record array for converting from category values to corr. indices in the uniqueValCounts array recDType = np.dtype({ 'names': [str(x) for x in uniqueVals], 'formats': ['int32']*uniqueVals.size }) uniqueValCountsIndices = np.array([tuple(range( len(uniqueVals) ))], dtype=recDType) accStart = 0 accEnd = 0 for posEventIndex in xrange(len(posEventArray)): startEvents, endEvents = posEventArray[posEventIndex] numVals = uniqueValCounts.sum() if numVals > 0: maxCount = uniqueValCounts.max() newVals[posEventIndex-1] = ';'.join([str(x) for x in uniqueVals[np.where(uniqueValCounts==maxCount)]])+'(%i/%i)' % (maxCount, numVals) uniqueEventVals, counts = self._countDuplicates(vals[accStart:accStart+startEvents]) if uniqueEventVals.size > 0: uniqueValCounts[uniqueValCountsIndices[uniqueEventVals].view('int32')] += counts uniqueEventVals, counts = self._countDuplicates(vals[sortedToOriginalEndIndices[accEnd:accEnd+endEvents]]) if uniqueEventVals.size > 0: uniqueValCounts[uniqueValCountsIndices[uniqueEventVals].view('int32')] -= counts accStart += startEvents accEnd += endEvents else: newVals = np.array([], dtype=vals.dtype) segBorders = uniqueSortedPositions + tv.genomeAnchor.start return TrackView(genomeAnchor = tv.genomeAnchor, startList=segBorders[:-1], endList=segBorders[1:], valList=newVals, \ strandList=None, idList=None, edgesList=None, weightsList=None, borderHandling=tv.borderHandling, allowOverlaps=tv.allowOverlaps)
def labelmean_dig(factors, values): # works also for string labels in ys, but requires 1D # from mailing list scipy-user 2009-02-11 # check mistake: returns one element to much #unil = np.unique1d(factors, return_index=False, return_inverse=False) unil = np.unique1d(factors) unilinv = np.digitize(factors, unil).astype('int64') #print unilinv.shape #print unilinv.dtype #labelmeans = np.array(ndimage.mean(values, labels=unilinv, index=np.arange(len(unil)+1))) labelmeans = np.array( ndimage.mean(values, labels=unilinv, index=np.arange(len(unil)))) return labelmeans
def plotObservationsComposite(obs, map, scale_len, title, file_name): pylab.figure(figsize=(10,8)) pylab.axes((0, 0, 1, 0.95)) colors = [ 'r', 'g', 'b', 'c', 'm', '#660099', '#ff9900', '#006666' ] ob_ids = np.unique1d(obs['id']) ttu_label = False asos_label = False sndg_label = False for ob_id in ob_ids: ob_idxs = np.where(obs['id'] == ob_id)[0] these_obs = obs[ob_idxs] ob_xs, ob_ys = map(these_obs['longitude'], these_obs['latitude']) if ob_id[0] == "P": ob_num = int(ob_id[1]) - 1 pylab.plot(ob_xs, ob_ys, 'o', mfc=colors[ob_num], mec=colors[ob_num], ms=3, label="NSSL MM (%s)" % ob_id) elif ob_id[0] == "K": if not asos_label: label = "ASOS" asos_label = True else: label = None pylab.plot(ob_xs[0], ob_ys[0], 'k*', ms=5, label=label) elif ob_id[0] == "1" or ob_id[0] == "2": if not ttu_label: label = "TTU Sticknet" ttu_label = True else: label = None pylab.plot(ob_xs[0], ob_ys[0], 'o', mfc="#999999", mec="#999999", ms=3, label=label) elif these_obs[0]['obtype'] == "SNDG": if not sndg_label: label = "Sounding" sndg_label = True else: label = None pylab.plot(ob_xs[0], ob_ys[0], 'k^', ms=4, label=label) drawPolitical(map, scale_len=scale_len) line_objects = [ l for l in sorted(pylab.gca().lines, key=lambda x: x.get_label()) if l.get_label()[0] != "_" ] pylab.legend(line_objects, [ l.get_label() for l in line_objects], loc=2, numpoints=1, prop={'size':'medium'}) pylab.suptitle(title) pylab.savefig(file_name) pylab.close() return
def get_data(self,channel,dur,offset,buf,resampled_rate=None, filt_freq=None,filt_type='stop', filt_order=4,keep_buffer=False): """ Return the requested range of data for each event by using the proper data retrieval mechanism for each event. The result will be an TimeSeries instance with dimensions (events,time). """ # get ready to load dat eventdata = [] events = [] # speed up by getting unique event sources first usources = np.unique1d(self['esrc']) # loop over unique sources for src in usources: # get the eventOffsets from that source ind = np.atleast_1d(self['esrc']==src) if len(ind) == 1: event_offsets=self['eoffset'] events.append(self) else: event_offsets = self[ind]['eoffset'] events.append(self[ind]) #print "Loading %d events from %s" % (ind.sum(),src) # get the timeseries for those events eventdata.append(src.get_event_data(channel, event_offsets, dur, offset, buf, resampled_rate, filt_freq, filt_type, filt_order, keep_buffer)) # concatenate (must eventually check that dims match) tdim = eventdata[0]['time'] srate = eventdata[0].samplerate events = np.concatenate(events).view(self.__class__) eventdata = TimeSeries(np.concatenate(eventdata), 'time', srate, dims=[Dim(events,'events'),tdim]) return eventdata
def runlist(self, minscore=None, rescore=False): """ Class: Window Method Name: runlist Purpose: Return the runs,reruns corresponding to the request. The runs can be trimmed by their score. Calling Sequence: import sdsspy w=sdsspy.window.Window() runs, rerun = w.runlist(minscore=None, rescore=False) Optional Inputs: minscore: Only runs,reruns with score > minscore will be returned. Note the version in IDL PHOTOOP uses >=, so be careful when converting your code. rescore: If True, re-score the runs. """ if rescore: type = 'flist_rescore' else: type = 'flist' data = self.read(type) flist = data[type] runs,reruns = flist['run'], flist['rerun'] if minscore is not None: w, = numpy.where( flist['score'] > minscore) if w.size == 0: return None,None runs=runs[w] reruns=reruns[w] # using a bigid is not necessary since the same run never ends up in # there with two different reruns uid, uindex = numpy.unique1d(runs, return_index=True) runs = runs[uindex] reruns = reruns[uindex] return runs, reruns
def bincount2d(factors): # array check copied from np.histogramdd try: # Sample is an ND-array. N, D = factors.shape sample = factors except (AttributeError, ValueError): # Sample is a sequence of 1D arrays. sample = np.atleast_2d(factors).T N, D = sample.shape tmp = np.ascontiguousarray( sample) #b/c view works on base not another view factarr = tmp.view([('', tmp.dtype)] * tmp.shape[-1]) uni, rind = np.unique1d(factarr, return_inverse=1) return np.bincount(rind), uni.view(tmp.dtype).reshape(-1, D)
def grouped_limits(self): """ Returns a dictionary with the unique values of ``self`` as keys, and a list of tuples (starting index, ending index) for the corresponding values. See Also -------- Cluster.grouped_slices """ output = dict([(k, []) for k in np.unique1d(self.uniques)]) for (k, v) in zip(self.uniques, self.slices): output[k].append((v.start, v.stop)) for k in output: output[k] = np.array(output[k]) return output
def unique(A): """same as matlab's unique() function. not implemented for i A = B[j] """ A = np.asarray(A) assert len(A.shape) == 1 B = list(np.unique1d(A)) j = [B.index(Ai) for Ai in A] i = None B = np.array(B) j = np.array(j) return B, i, j
def load_vorobonds(fname): """load the bond network from a custom output of Voro++ '%i %v %s %n'""" #load all bonds bonds = np.vstack([ np.column_stack( (int(line.split()[0]) * np.ones(int(line.split()[2]), int), map(int, line.split()[3:]))) for line in open(fname) ]) walls = np.signbit(bonds.min(axis=-1)) outside = np.unique1d(bonds[walls].max(axis=-1)) #remove the walls and the duplicates bonds = bonds[np.bitwise_and( np.diff(bonds, axis=-1)[:, 0] > 0, np.bitwise_not(walls))] #sort by second then first column return bonds[np.lexsort(bonds.T[::-1].tolist())], outside
def partitionObs(obs, base_epoch): times = sorted(inflow_stations.keys()) regions = inflow_stations[times[0]].keys() partitions = dict([(r, np.empty((0, ), dtype=obs.dtype)) for r in regions]) for t_ens in times: t_obs = obs[np.where(obs['time'] == base_epoch + t_ens)] ids = np.unique1d(t_obs['id']) for id in ids: for region in regions: if id in inflow_stations[t_ens][region]: ob = t_obs[np.where(t_obs['id'] == id)] partitions[region] = np.append(partitions[region], ob) return partitions
def grouped_slices(self): """ Returns a dictionary with the unique values of ``self`` as keys, and a list of slices for the corresponding values. See Also -------- Cluster.grouped_limits that does the same thing """ # uniques = self.uniques.view(np.ndarray) output = dict([(k, []) for k in np.unique1d(uniques)]) for (k, v) in zip(self.uniques, self.slices): output[k].append(v) return output
def main(): experiments = [ '1kmf-sndr0h=50km', '1kmf-zs-no-05XP', '1kmf-zs-no-mm-05XP', '1kmf-zs-no-mm', '1kmf-z-no-snd', '1kmf-z-no-v2' ] grid = goshen_1km_grid(bounds=(slice(100, 180), slice(90, 170))) temp = goshen_1km_temporal(start=14400) obs_file_names = ['psu_straka_mesonet.pkl', 'ttu_sticknet.pkl', 'asos.pkl'] all_obs = loadObs(obs_file_names, temp.getDatetimes(aslist=True), grid, grid.getWidthHeight()) ens_obs = {} for exp in experiments: ens_obs[exp] = cPickle.load(open("cold_pool_obs_%s.pkl" % exp, 'r')) mm_ids = np.unique1d(all_obs['id']) for id in mm_ids: id_idxs = np.where(all_obs['id'] == id) for ob_var, ens_var in [('temp', 't'), ('dewp', 'td')]: pylab.figure() pylab.plot(all_obs['time'][id_idxs], 5 / 9. * (all_obs[ob_var][id_idxs] - 32), 'k-', label='Observed') for exp_name, exp in ens_obs.iteritems(): pylab.plot(all_obs['time'][id_idxs], exp[ens_var][id_idxs] - 273.15, label=exp_name) pylab.xticks(temp.getEpochs(aslist=True), temp.getStrings("%H%M", aslist=True), rotation=30) pylab.xlim( temp.getEpochs(aslist=True)[0], temp.getEpochs(aslist=True)[-1]) pylab.legend(loc=1) pylab.savefig("mm_timeseries_%s_%s.png" % (ens_var, id)) pylab.close() return