def find_coord_bin(self, coord): """Find the bins that contain the specified coord (X, Y) pairs. Parameters ---------- coord : `~astropy.coordinates.Angle` Array of coord (X, Y) pairs to search for. Returns ------- bin_index : `~numpy.ndarray` Array of integers with the indices (x, y) of the coord bin containing the specified coord (X, Y) pair. """ # check that the specified coord is within the boundaries of the cube coord_extent = self.image_extent check_x_lo = (coord_extent[0] <= coord[0]).all() check_x_hi = (coord[0] < coord_extent[1]).all() check_y_lo = (coord_extent[2] <= coord[1]).all() check_y_hi = (coord[1] < coord_extent[3]).all() if not (check_x_lo and check_x_hi) or not (check_y_lo and check_y_hi): raise ValueError("Specified coord {0} is outside the boundaries {1}." .format(coord, coord_extent)) bin_index_x = np.searchsorted(self.coordx_edges[1:], coord[0]) bin_index_y = np.searchsorted(self.coordy_edges[1:], coord[1]) return np.array([bin_index_x, bin_index_y])
def _get_single_element(self, row, col): M, N = self.shape if (row < 0): row += M if (col < 0): col += N if not (0 <= row < M) or not (0 <= col < N): raise IndexError("index out of bounds: 0<=%d<%d, 0<=%d<%d" % (row, M, col, N)) major_index, minor_index = self._swap((row, col)) start = self.indptr[major_index] end = self.indptr[major_index + 1] if self.has_sorted_indices: # Copies may be made, if dtypes of indices are not identical minor_index = self.indices.dtype.type(minor_index) minor_indices = self.indices[start:end] insert_pos_left = np.searchsorted( minor_indices, minor_index, side='left') insert_pos_right = insert_pos_left + np.searchsorted( minor_indices[insert_pos_left:], minor_index, side='right') return self.data[start + insert_pos_left: start + insert_pos_right].sum(dtype=self.dtype) else: return np.compress(minor_index == self.indices[start:end], self.data[start:end]).sum(dtype=self.dtype)
def _get_radius_neighbors(self, query, max_depth, bin_queries, radius): """Finds radius neighbors from the candidates obtained. Their distances from query are smaller than radius. Returns radius neighbors and distances. """ ratio_within_radius = 1 threshold = 1 - self.radius_cutoff_ratio total_candidates = np.array([], dtype=int) total_neighbors = np.array([], dtype=int) total_distances = np.array([], dtype=float) while max_depth > self.min_hash_match and ratio_within_radius > threshold: left_mask = self._left_mask[max_depth] right_mask = self._right_mask[max_depth] candidates = [] for i in range(self.n_estimators): start, stop = _find_matching_indices(self.trees_[i], bin_queries[i], left_mask, right_mask) candidates.extend(self.original_indices_[i][start:stop].tolist()) candidates = np.setdiff1d(candidates, total_candidates) total_candidates = np.append(total_candidates, candidates) ranks, distances = self._compute_distances(query, candidates) m = np.searchsorted(distances, radius, side="right") positions = np.searchsorted(total_distances, distances[:m]) total_neighbors = np.insert(total_neighbors, positions, candidates[ranks[:m]]) total_distances = np.insert(total_distances, positions, distances[:m]) ratio_within_radius = total_neighbors.shape[0] / float(total_candidates.shape[0]) max_depth = max_depth - 1 return total_neighbors, total_distances
def map_to_external_reference(self, roi, refname='HXB2', in_patient=True): ''' return a map of positions in the patient to a reference genomewide Args: roi -- region of interest given as a string or a tuple (start, end) refname -- reference to compare to in_patient -- specifies whether the (start, end) refers to reference or patient coordinates returns: a (len(roi), 3) array with reference coordinates in first column, patient coordinates in second roi coordinates in third column ''' from .filenames import get_coordinate_map_filename coo_fn = get_coordinate_map_filename(self.name, 'genomewide', refname=refname) genomewide_map = np.loadtxt(coo_fn, dtype=int) if roi in self.annotation: roi_pos = np.array([x for x in self.annotation[roi]], dtype = int) ind = np.in1d(genomewide_map[:,1], roi_pos) roi_indices = np.in1d(roi_pos, genomewide_map[:,1]).nonzero()[0] return np.vstack((genomewide_map[ind].T, [roi_indices])).T elif roi == "genomewide": return np.vstack((genomewide_map.T, [genomewide_map[:,1]])).T else: try: start, stop = map(int, roi) start_ind = np.searchsorted(genomewide_map[:,in_patient], start) stop_ind = np.searchsorted(genomewide_map[:,in_patient], stop) return np.vstack((genomewide_map[start_ind:stop_ind].T, [genomewide_map[start_ind:stop_ind, in_patient] - start])).T except: raise ValueError("ROI not understood")
def __init__(self, A, fraction=0.80): assert 0 <= fraction <= 1 # A = U . diag(d) . Vt, O( m n^2 ), lapack_lite -- self.U, self.d, self.Vt = np.linalg.svd(A, full_matrices=False) # different versions of numpy can return U and Vt such that # U * Vt is constant but the signs may be switched. Gross... # numpy, you owe me one day buster! # force a check here... if self.Vt[0, 0] < 0: self.Vt *= -1.0 self.U *= -1.0 assert np.all(self.d[:-1] >= self.d[1:]) # sorted self.eigen = self.d ** 2 self.sumvariance = np.cumsum(self.eigen) try: self.sumvariance /= self.sumvariance[-1] except: print len(A), len(self.sumvariance), len(self.eigen) raise self.npc = np.searchsorted(self.sumvariance, fraction) + 1 while self.npc == 1: # prevents less than 2 pcs being found fraction *= 1.1 self.npc = np.searchsorted(self.sumvariance, fraction) + 1 self.dinv = np.array([1 / d if d > self.d[0] * 1e-6 else 0 for d in self.d])
def spatio_temporal_src_connectivity(src, n_times): """Compute connectivity for a source space activation over time Parameters ---------- src : source space The source space. n_times : int Number of time instants Returns ------- connectivity : sparse COO matrix The connectivity matrix describing the spatio-temporal graph structure. If N is the number of vertices in the source space, the N first nodes in the graph are the vertices are time 1, the nodes from 2 to 2N are the vertices during time 2, etc. """ if src[0]['use_tris'] is None: raise Exception("The source space does not appear to be an ico " "surface. Connectivity cannot be extracted from " "non-ico source spaces.") lh_tris = np.searchsorted(np.unique(src[0]['use_tris']), src[0]['use_tris']) rh_tris = np.searchsorted(np.unique(src[1]['use_tris']), src[1]['use_tris']) tris = np.concatenate((lh_tris, rh_tris + np.max(lh_tris) + 1)) return spatio_temporal_tris_connectivity(tris, n_times)
def test_constant_interpolation_basic(self): """Interpolation library works for piecewise constant function """ # Define pixel centers along each direction x = numpy.array([1.0, 2.0, 4.0]) y = numpy.array([5.0, 9.0]) # Define ny by nx array with corresponding values A = numpy.zeros((len(x), len(y))) # Define values for each x, y pair as a linear function for i in range(len(x)): for j in range(len(y)): A[i, j] = linear_function(x[i], y[j]) # Then test that interpolated points are always assigned value of # closest neighbour xis = numpy.linspace(x[0], x[-1], 10) etas = numpy.linspace(y[0], y[-1], 10) points = combine_coordinates(xis, etas) vals = interpolate2d(x, y, A, points, mode='constant') # Find upper neighbours for each interpolation point xi = points[:, 0] eta = points[:, 1] idx = numpy.searchsorted(x, xi, side='left') idy = numpy.searchsorted(y, eta, side='left') # Get the four neighbours for each interpolation point x0 = x[idx - 1] x1 = x[idx] y0 = y[idy - 1] y1 = y[idy] z00 = A[idx - 1, idy - 1] z01 = A[idx - 1, idy] z10 = A[idx, idy - 1] z11 = A[idx, idy] # Location coefficients alpha = (xi - x0) / (x1 - x0) beta = (eta - y0) / (y1 - y0) refs = numpy.zeros(len(vals)) for i in range(len(refs)): if alpha[i] < 0.5 and beta[i] < 0.5: refs[i] = z00[i] if alpha[i] >= 0.5 and beta[i] < 0.5: refs[i] = z10[i] if alpha[i] < 0.5 and beta[i] >= 0.5: refs[i] = z01[i] if alpha[i] >= 0.5 and beta[i] >= 0.5: refs[i] = z11[i] assert numpy.allclose(vals, refs, rtol=1e-12, atol=1e-12)
def test_lasso_cv(): X, y, X_test, y_test = build_dataset() max_iter = 150 clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True) clf.fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) # Check that the lars and the coordinate descent implementation # select a similar alpha lars = LassoLarsCV(normalize=False, max_iter=30).fit(X, y) # for this we check that they don't fall in the grid of # clf.alphas further than 1 assert_true(np.abs( np.searchsorted(clf.alphas_[::-1], lars.alpha_) - np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1) # check that they also give a similar MSE mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.cv_mse_path_.T) np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(), clf.mse_path_[5].mean(), significant=2) # test set assert_greater(clf.score(X_test, y_test), 0.99)
def randomChoice(V, n=1): """ Make a random choice from a vector V of values which are unnormalised probabilities. Return the corresponding index. For example if v = [1, 2, 4] then the probability of the indices repectively are [1/7, 2/7, 4/7]. The parameter n is the number of random choices to make. If V is a matrix, then the rows are taken as probabilities, and a choice is made for each row. """ Parameter.checkClass(V, numpy.ndarray) if V.shape[0]==0: return -1 if V.ndim == 1: cumV = numpy.cumsum(V) p = numpy.random.rand(n)*cumV[-1] return numpy.searchsorted(cumV, p) elif V.ndim == 2: cumV = numpy.cumsum(V, 1) P = numpy.random.rand(V.shape[0], n)*numpy.array([cumV[:, -1]]).T inds = numpy.zeros(P.shape, numpy.int) for i in range(P.shape[0]): inds[i, :] = numpy.searchsorted(cumV[i, :], P[i, :]) return inds else: raise ValueError("Invalid number of dimensions")
def window_index_time(t,windowsize,overlap): """ Determines the indices for window start and end points of a time vector The window does not need to be evenly spaced Inputs: t - list or array of datetime objects windowsize - length of the window [seconds] overlap - number of overlap points [seconds] Returns: pt1,pt2 the start and end indices of each window """ tsec = othertime.SecondsSince(t) t1=tsec[0] t2=t1 + windowsize pt1=[0] pt2=[np.searchsorted(tsec,t2)] while t2 < tsec[-1]: t1 = t2 - overlap t2 = t1 + windowsize pt1.append(np.searchsorted(tsec,t1)) pt2.append(np.searchsorted(tsec,t2)) return pt1, pt2
def get_peaks(sub_gene_df, top_s, max_dist, feature_name): """ For each gene in gene_info get the peaks within max_dist in top_s. This is basically reverse engineering to get the peak info for each gene that was found to be associated with a peak. The reason for reverse engeneering rather than storing this information when searching for the genes for each peak is that we want to use precisely the same function to search the genes for the real data and for the permutations. Input: gene_info ... data frame with index ('chrom','start') and columns 'gene_id' and 'end' top_s ... series of peak positions with index (chrom, pos) and values peak height max_dist ... maximum distance between gene and peak """ gene_info = sub_gene_df def get_dist(df, gene_pos): """ calculate distance """ s = pd.Series(df.index.droplevel(0).values - gene_pos.ix[df.index[0][0]], index=df.index.droplevel(0).values) return s tot_gene_peaks_df = pd.DataFrame() if not top_s.index.is_monotonic: top_s = top_s.sortlevel([0, 1]) if not gene_info.index.is_monotonic: gene_info = gene_info.sort_index() for chrom in gene_info.index.droplevel(1).unique(): loc_top_s = top_s.ix[chrom] start = np.searchsorted(loc_top_s.index.values + max_dist, gene_info.ix[chrom].index.values) end = np.searchsorted(loc_top_s.index.values - max_dist, gene_info.ix[chrom]["end"].values) x = pd.concat( [loc_top_s.iloc[st:ed] for st, ed in zip(start, end)], keys=gene_info.ix[chrom][feature_name].values ) x.name = "peak_height" dist_start = x.groupby(lambda i: i[0]).apply( lambda df: get_dist(df, gene_info.ix[chrom].reset_index().set_index(feature_name)["start"]) ) dist_start.name = "dist_start" dist_end = x.groupby(lambda i: i[0]).apply( lambda df: get_dist(df, gene_info.ix[chrom].set_index(feature_name)["end"]) ) dist_end.name = "dist_end" gene_peaks_df = pd.concat([x, dist_start, dist_end], axis=1) gene_peaks_df.index = pd.MultiIndex.from_arrays( [gene_peaks_df.index.droplevel(1), [chrom] * len(x), gene_peaks_df.index.droplevel(0)] ) tot_gene_peaks_df = pd.concat([tot_gene_peaks_df, gene_peaks_df], axis=0) tot_gene_peaks_df.index.names = [feature_name, "chrom", "peak_pos"] return tot_gene_peaks_df
def derivatives(self, x, der): """Evaluate a derivative of the piecewise polynomial Parameters ---------- x : scalar or array-like of length N der : integer how many derivatives (including the function value as 0th derivative) to extract Returns ------- y : array-like of shape der by R or der by N or der by N by R """ if _isscalar(x): pos = np.clip(np.searchsorted(self.xi, x) - 1, 0, self.n-2) y = self.polynomials[pos].derivatives(x,der=der) else: x = np.asarray(x) m = len(x) pos = np.clip(np.searchsorted(self.xi, x) - 1, 0, self.n-2) if self.vector_valued: y = np.zeros((der,m,self.r)) else: y = np.zeros((der,m)) for i in xrange(self.n-1): c = pos==i y[:,c] = self.polynomials[i].derivatives(x[c],der=der) return y
def _regrid_indices(cells, depth, points): # Calculate the minimum difference in cell extent. extent = np.min(np.diff(cells)) if extent == 0: # Detected an dimension coordinate with an invalid # zero length cell extent. msg = 'The target grid cube {} ({!r}) coordinate contains ' \ 'a zero length cell extent.' axis, name = 'x', tx.name() if points is sy_points: axis, name = 'y', ty.name() raise ValueError(msg.format(axis, name)) elif extent > 0: # The cells of the dimension coordinate are in ascending order. indices = np.searchsorted(cells, points, side='right') - 1 else: # The cells of the dimension coordinate are in descending order. # np.searchsorted() requires ascending order, so we require to # account for this restriction. cells = cells[::-1] right = np.searchsorted(cells, points, side='right') left = np.searchsorted(cells, points, side='left') indices = depth - right # Only those points that exactly match the left-hand cell bound # will differ between 'left' and 'right'. Thus their appropriate # target cell location requires to be recalculated to give the # correct descending [upper, lower) interval cell, source to target # regrid behaviour. delta = np.where(left != right)[0] if delta.size: indices[delta] = depth - left[delta] return indices
def get_tmax(self, p, cutoff=None): if cutoff is None: cutoff = self.cutoff if self.quad: x = np.arange(1, 10000, 1) y = np.zeros_like(x) func = self.function(x, p) func_half = self.function(x[:-1] + 1 / 2, p) y[1:] = y[0] + np.cumsum(1 / 6 * (func[:-1] + 4 * func_half + func[1:])) y = y / quad(self.function, 0, np.inf, args=p)[0] return np.searchsorted(y, cutoff) else: t1 = -np.sqrt(3 / 5) t2 = 0 t3 = np.sqrt(3 / 5) w1 = 5 / 9 w2 = 8 / 9 w3 = 5 / 9 x = np.arange(1, 10000, 1) y = np.zeros_like(x) func = self.function(x, p) func_half = self.function(x[:-1] + 1 / 2, p) y[0] = 0.5 * (w1 * self.function(0.5 * t1 + 0.5, p) + w2 * self.function(0.5 * t2 + 0.5, p) + w3 * self.function(0.5 * t3 + 0.5, p)) y[1:] = y[0] + np.cumsum(1 / 6 * (func[:-1] + 4 * func_half + func[1:])) y = y / quad(self.function, 0, np.inf, args=p)[0] return np.searchsorted(y, cutoff)
def __call__(self, x): """Evaluate the piecewise polynomial Parameters ---------- x : scalar or array-like of length N Returns ------- y : scalar or array-like of length R or length N or N by R """ if _isscalar(x): pos = np.clip(np.searchsorted(self.xi, x) - 1, 0, self.n-2) y = self.polynomials[pos](x) else: x = np.asarray(x) m = len(x) pos = np.clip(np.searchsorted(self.xi, x) - 1, 0, self.n-2) if self.vector_valued: y = np.zeros((m,self.r)) else: y = np.zeros(m) for i in xrange(self.n-1): c = pos==i y[c] = self.polynomials[i](x[c]) return y
def downsample(image, image_x_axis, image_y_axis, x_bounds, y_bounds, x_resolution, y_resolution): x_resolution, y_resolution = int(round(x_resolution)), int(round(y_resolution)) x_bounds = [x_bounds.start, x_bounds.end] y_bounds = [y_bounds.start, y_bounds.end] x_bounds = np.searchsorted(image_x_axis, x_bounds) y_bounds = np.searchsorted(image_y_axis, y_bounds) #y_bounds = image.shape[0] + 1 - y_bounds[::-1] if x_resolution == 0 or y_resolution == 0: subset = np.zeros((1,1), dtype=image.dtype) else: subset = image[y_bounds[0]:y_bounds[1], x_bounds[0]:x_bounds[1]] x_downsample_factor = max(round(subset.shape[1] / x_resolution / 3.), 1) y_downsample_factor = max(round(subset.shape[0] / y_resolution / 3.), 1) subset = subset[::x_downsample_factor, ::y_downsample_factor] image = scipy.misc.imresize(subset, (x_resolution, y_resolution), interp='nearest') bounds = image_x_axis[x_bounds[0]:x_bounds[1]] dw = np.max(bounds) - np.min(bounds) bounds = image_y_axis[y_bounds[0]:y_bounds[1]] dh = np.max(bounds) - np.min(bounds) return {'data': subset, 'offset_x': image_x_axis[x_bounds[0]], 'offset_y': image_y_axis[y_bounds[0]], 'dw': dw, 'dh': dh, }
def get_indices(ival): """ Retuns the indeces surrounding the given interval""" start_ind = np.searchsorted(self.x, ival[0], side='right') end_ind = np.searchsorted(self.x, ival[1], side='left') assert start_ind > 0 and end_ind < len(self.x), \ "Invalid averaging interval" return start_ind, end_ind
def __interpFunction_BoundaryConstant(self, x, y): xNextIndex = np.searchsorted(self.xData, x) if (xNextIndex == 0): xNextIndex = 0 xPrevIndex = xNextIndex xCoeff = 0 elif (xNextIndex >= len(self.xData)): xNextIndex = xNextIndex - 1 xPrevIndex = xNextIndex xCoeff = 0 else: xPrevIndex = xNextIndex-1 xCoeff = (x-self.xData[xPrevIndex])/(self.xData[xNextIndex]-self.xData[xPrevIndex]) yNextIndex = np.searchsorted(self.yData, y) if (yNextIndex == 0): yNextIndex = 0 yPrevIndex = yNextIndex yCoeff = 0 elif (yNextIndex >= len(self.yData)): yNextIndex = yNextIndex - 1 yPrevIndex = yNextIndex yCoeff = 0 else: yPrevIndex = yNextIndex-1 yCoeff = (y-self.yData[yPrevIndex])/(self.yData[yNextIndex]-self.yData[yPrevIndex]) value1 = self.zData[yPrevIndex][xPrevIndex] + xCoeff*(self.zData[yPrevIndex][xNextIndex]-self.zData[yPrevIndex][xPrevIndex]) value2 = self.zData[yNextIndex][xPrevIndex] + xCoeff*(self.zData[yNextIndex][xNextIndex]-self.zData[yNextIndex][xPrevIndex]) interpValue = value1 + yCoeff*(value2 - value1) return interpValue
def evaluation_pairs(det_chords, ann_chords): """ Match detected with annotated chords and create paired label segments for evaluation. Parameters ---------- det_chords : numpy structured array Chord detections with 'start' and 'end' fields. ann_chords : numpy structured array Chord annotations with 'start' and 'end' fields. Returns ------- annotations : numpy structured array Annotated chords of evaluation segments. detections : numpy structured array Detected chords of evaluation segments. durations : numpy array Durations of evaluation segments. """ times = np.unique(np.hstack([ann_chords['start'], ann_chords['end'], det_chords['start'], det_chords['end']])) durations = times[1:] - times[:-1] annotations = ann_chords['chord'][ np.searchsorted(ann_chords['start'], times[:-1], side='right') - 1] detections = det_chords['chord'][ np.searchsorted(det_chords['start'], times[:-1], side='right') - 1] return annotations, detections, durations
def BayesianWords(unigram_counts, bigram_counts, n_words): unigrams, ucounts = zip(*sorted(filter( lambda (k, v): k in bigram_counts, unigram_counts.items()))) prior = np.array(ucounts) / sum(ucounts) prior_pdf = np.array([np.sum(prior[:n]) for n in range(len(unigrams))]) bigram_pdfs = {} for w1, w1_bgcnts in bigram_counts.iteritems(): w2strs, w2counts = zip(*sorted(w1_bgcnts.items())) w2pdf = np.array(w2counts) / sum(w2counts) bigram_pdfs[w1] = ( w2strs, np.array([np.sum(w2pdf[:n]) for n in range(len(w2strs))])) #print '%d bigrams for %s' % (len(w2strs), w1) first_word_index = np.searchsorted(prior_pdf, np.random.random_sample()) words = [unigrams[min(len(unigrams)-1, first_word_index)]] for n in range(1, n_words): if words[-1] in bigram_pdfs: bigram_strs, bigram_pdf = bigram_pdfs[words[-1]] idx = np.searchsorted(bigram_pdf, np.random.random_sample()) words.append(bigram_strs[min(len(bigram_strs)-1, idx)]) else: # Pick from the prior. idx = np.searchsorted(prior_pdf, np.random.random_sample()) words.append(unigrams[min(len(unigrams)-1, idx)]) return words
def interp(self, rho, temp, log=False): dens = self.dens temps = self.temps if log == True: rho = np.log10(rho) temp = np.log10(temp) dens = np.log10(dens) temps = np.log10(temps) # First, find the temperature/density cell we are in. # The opacity will be computed using densities: # dens[jd-1], dens[jd] # and temperatures: # temp[jt-1], temp[jt] jd = np.searchsorted(dens, rho) if jd == 0: rho = dens[0] jd += 1 if jd == len(dens): jd = jd - 1 rho = dens[-1] jt = np.searchsorted(temps, temp) if jt == 0: temp = temps[0] jt += 1 if jt == len(temps): jt = jt - 1 temp = temps[-1] # Now that the surrounding temperatures/densities have been # identified, the interpolation coefficients can be computed. # c1 -> weight for dens[jd-1] and temp[jt-1] # c2 -> weight for dens[jd] and temp[jt-1] # c3 -> weight for dens[jd-1] and temp[jt] # c4 -> weight for dens[jd] and temp[jt] d1 = dens[jd-1] d2 = dens[jd] t1 = temps[jt-1] t2 = temps[jt] delta = (rho-d1)/(d2-d1) tau = (temp-t1)/(t2-t1) c1 = (delta-1.0)*(tau-1.0) c2 = delta*(1-tau) c3 = tau*(1-delta) c4 = delta * tau # Compute the interpolated opacity: return \ c1 * self.go(jd-1,jt-1) + \ c2 * self.go(jd ,jt-1) + \ c3 * self.go(jd-1,jt ) + \ c4 * self.go(jd ,jt )
def get_features(peak_s, feature_df, feature_name='feature', max_dist=0): """ take the input series and gets. names of features nearby Input: peak_s ... pandas series with (chrom, pos) index and value of the statistic ('peak height'). Series should be named. feature_df ... data frame with feature info. """ all_features = [] if not feature_df.index.is_monotonic: feature_df = feature_df.sort_index() tot_hit_df = pd.DataFrame() for chrom in peak_s.index.droplevel(1).unique(): loc_feature_df = feature_df.ix[chrom] #loc_feature_df = loc_feature_df.append(pd.DataFrame(np.nan,index=[np.inf],columns=loc_feature_df.columns)) #print loc_feature_df.index-max_dist, peak_s.ix[chrom].index.values #try: pos_rel_to_start = np.searchsorted(loc_feature_df.index.values-max_dist,peak_s.ix[chrom].index.values) #except: # print chrom, peak_s.ix[chrom] pos_rel_to_end = np.searchsorted(loc_feature_df["end"].values+max_dist,peak_s.ix[chrom].index.values) features = list(set(loc_feature_df[feature_name].iloc[np.hstack([range(a,b) for a,b in zip(pos_rel_to_end,pos_rel_to_start)])])) all_features += features return all_features
def _linearslice(linearbpf, x0, x1): """ Slice the given bpf, returning a new Linear bpf with endpoints x0 and x1. """ assert isinstance(linearbpf, core.Linear) X, Y = linearbpf.points() insert_head = x0 > X[0] if insert_head: i = np.searchsorted(X, x0) X = X[i-1:] Y = Y[i-1:] insert_tail = x1 < X[-1] if insert_tail: i = np.searchsorted(X, x1) X = X[:i+1] Y = Y[:i+1] if insert_head or insert_tail: # we copy when we know exactly how much to copy X = X.copy() Y = Y.copy() if insert_head: X[0] = x0 Y[0] = linearbpf(x0) if insert_tail: X[i] = x1 Y[i] = linearbpf(x1) return core.Linear(X, Y)
def get_exclude_coords(self, ex_starts, ex_ends): mx=self.starts.shape[0]-1 n_exclude = len(ex_ends) ex_wnd_starts = np.searchsorted(self.starts, ex_starts) ex_wnd_ends = np.searchsorted(self.ends, ex_ends) ex_wnd_starts = np.amax(np.c_[ex_wnd_starts-1,np.zeros(n_exclude)],1).astype(int) ex_wnd_ends = np.amin(np.c_[ex_wnd_ends+1,np.ones(n_exclude)*mx],1).astype(int) ex_starts = self.starts[ex_wnd_starts] ex_ends = self.ends[ex_wnd_ends] ex_coords = [] curr_s = ex_starts[0] curr_e = ex_ends[0] #print ex_wnd_starts #print ex_wnd_ends for i in xrange(1, n_exclude): if ex_starts[i] < curr_e: curr_e = ex_ends[i] else: ex_coords.append(tuple([curr_s,curr_e])) curr_s = ex_starts[i] curr_e = ex_ends[i] ex_coords.append(tuple([curr_s,curr_e])) return ex_coords
def build3DHistogramArray(inputA=None,xBinVector=None,yBinVector=None): """ This builds and bins up the DCPD exposure surface. The input is a time parameterized array [beamlist,beampitchlist,beamyawlist] """ threeDData=list() for i,aCol0 in enumerate(inputA[0]): threeDData.append((inputA[0][i],\ inputA[1][i],\ inputA[2][i])) #Sort the input structure by col0 threeDData.sort() zMapLists=empty([len(xBinVector),len(yBinVector)],dtype=object) for ii in range(0,len(xBinVector)): for jj in range(0,len(yBinVector)): zMapLists[ii][jj]=list() for xIndex in range(0,len(xBinVector)-1): xLow=xBinVector[xIndex] xHigh=xBinVector[xIndex+1] xData=[a for a,b,c in threeDData] # Find all points that fit this X column dataSubset=threeDData[searchsorted(xData,xLow,side='left'):\ searchsorted(xData,xHigh,side='right')] yDataSubset=[(b,c) for a,b,c in dataSubset] yDataSubset.sort() yData=[b for b,c in yDataSubset] for yIndex in range(0,len(yBinVector)-1): yLow=yBinVector[yIndex] yHigh=yBinVector[yIndex+1] dataBinMatch=yDataSubset[searchsorted(yData,yLow,side='left'):\ searchsorted(yData,yHigh,side='right')] zDataMatch=[c for b,c in dataBinMatch] zMapLists[xIndex][yIndex].extend(zDataMatch) return zMapLists
def get_scx_scz_in_timerange(timerange, file): """ read a downloaded FERMI weekly pointing file and extract scx, scz for a timerange. Parameters ---------- date : `datetime.datetime` A datetime object or other date format understood by the parse_time function. file : str A filepath to a Fermi/LAT weekly pointing file (e.g. as obtained by the download_weekly_pointing_file function). """ hdulist = fits.open(file) timesinutc = [] for tim in hdulist[1].data['START']: timesinutc.append(met_to_utc(tim)) startind = np.searchsorted(timesinutc, timerange.start) endind = np.searchsorted(timesinutc, timerange.end) scx_radec = [] scz_radec = [] for i in range(startind, endind): scx_radec.append((Longitude(hdulist[1].data['RA_SCX'][i]*u.deg), Latitude(hdulist[1].data['DEC_SCX'][i]*u.deg))) scz_radec.append((Longitude(hdulist[1].data['RA_SCZ'][i]*u.deg), Latitude(hdulist[1].data['DEC_SCZ'][i]*u.deg))) return scx_radec, scz_radec, timesinutc[startind:endind]
def kuiper_two(data1, data2): """Compute the Kuiper statistic to compare two samples. Parameters ---------- data1 : array-like The first set of data values. data2 : array-like The second set of data values. Returns ------- D : float The raw test statistic. fpp : float The probability of obtaining two samples this different from the same distribution. Notes ----- Warning: the fpp is quite approximate, especially for small samples. """ data1, data2 = sort(data1), sort(data2) if len(data2)<len(data1): data1, data2 = data2, data1 cdfv1 = searchsorted(data2, data1)/float(len(data2)) # this could be more efficient cdfv2 = searchsorted(data1, data2)/float(len(data1)) # this could be more efficient D = (amax(cdfv1-arange(len(data1))/float(len(data1))) + amax(cdfv2-arange(len(data2))/float(len(data2)))) Ne = len(data1)*len(data2)/float(len(data1)+len(data2)) return D, kuiper_FPP(D, Ne)
def split(self, cols_or_rows, columns=1): # Similar to take but returns two arrays, the extracted columns plus # the resulting array. Assumes cols_or_rows is sorted base = dok_matrix() ext = dok_matrix() indx = int((columns == 1)) if indx: for key in self.keys(): num = np.searchsorted(cols_or_rows, key[1]) if cols_or_rows[num] == key[1]: newkey = (key[0], num) ext[newkey] = self[key] else: newkey = (key[0], key[1]-num) base[newkey] = self[key] else: for key in self.keys(): num = np.searchsorted(cols_or_rows, key[0]) if cols_or_rows[num] == key[0]: newkey = (num, key[1]) ext[newkey] = self[key] else: newkey = (key[0]-num, key[1]) base[newkey] = self[key] return base, ext
def skim_imgs(Mimg, Mimg_tabs, Msnp_tabs, t_adjust=0, tb0=SKIMSPK_TB, te0=SKIMSPK_TE, n_blk=20000, onlyonce=True): if onlyonce: idx_eachimg = [np.nonzero(Mimg == i_img)[0][0] for i_img in np.unique(Mimg)] t_eachimg = Mimg_tabs[idx_eachimg] i_eachimg = Mimg[idx_eachimg] else: t_eachimg = Mimg_tabs i_eachimg = Mimg ibie = [] ib = 0 ie = 0 for t0 in t_eachimg: tb = t0 + tb0 - t_adjust te = t0 + te0 - t_adjust xb = np.searchsorted(Msnp_tabs[ib: ib + n_blk], tb) if xb >= n_blk: xb = np.searchsorted(Msnp_tabs[ib:], tb) ib += xb xe = np.searchsorted(Msnp_tabs[ie: ie + n_blk], te) if xe >= n_blk: xe = np.searchsorted(Msnp_tabs[ie:], te) ie += xe ibie.append((ib, ie)) return ibie, i_eachimg
def _substitute_iers_b(cls, table): """Substitute IERS B values with those from a real IERS B table. IERS-A has IERS-B values included, but for reasons unknown these do not match the latest IERS-B values (see comments in #4436). Here, we use the bundled astropy IERS-B table to overwrite the values in the downloaded IERS-A table. """ iers_b = IERS_B.open() # Substitute IERS-B values for existing B values in IERS-A table mjd_b = table['MJD'][~table['UT1_UTC_B'].mask] i0 = np.searchsorted(iers_b['MJD'].value, mjd_b[0], side='left') i1 = np.searchsorted(iers_b['MJD'].value, mjd_b[-1], side='right') iers_b = iers_b[i0:i1] n_iers_b = len(iers_b) # If there is overlap then replace IERS-A values from available IERS-B if n_iers_b > 0: # Sanity check that we are overwriting the correct values if not np.allclose(table['MJD'][:n_iers_b], iers_b['MJD'].value): raise ValueError('unexpected mismatch when copying ' 'IERS-B values into IERS-A table.') # Finally do the overwrite table['UT1_UTC_B'][:n_iers_b] = iers_b['UT1_UTC'].value table['PM_X_B'][:n_iers_b] = iers_b['PM_x'].value table['PM_Y_B'][:n_iers_b] = iers_b['PM_y'].value return table
vals_col='cumulative') temp['per_capita_deaths'] = rona_db.find_per_capita(temp, pop_col='popData2018', vals_col='cum_deaths') temp.reset_index(drop=True, inplace=True) countries[country] = temp full_data = [] for j, date in enumerate(dates): date_countries = [] for i, country in enumerate(countries): country_list = [ str(i)[:10] for i in countries[country]['dateRep'].values ] if str(date)[:10] in country_list: index = np.searchsorted(countries[country]['dateRep'], date) y = countries[country]['cumulative'].iloc[:index + 1] x = countries[country]['dateRep'].iloc[:index + 1] x = [str(i)[:10] for i in x] temp = {'country': country, 'x': x, 'y': y} date_countries.append(temp) full_data.append(date_countries) # for i, country in enumerate(countries): # full_data.append({'country': country, 'x': [], 'y' : []}) # country_list = [str(i)[:10] for i in countries[country]['dateRep'].values] # for j, date in enumerate(dates): # if str(date)[:10] in country_list: # index = np.searchsorted(countries[country]['dateRep'], date) # y = countries[country]['cumulative'].iloc[index]
def _process_chunk(db_lock, log_lock, sema, sed_fit_name, cosmoDC2_data, first_gal, self_dict, bad_gals): """ Do all chunk-specific work: compute table contents for a collection of galaxies and write to db Parameters ---------- db_lock Used to avoid conflicts writing to sqlite output log_lock Used to avoid conflicts writing to per-healpixel log sema A semaphore. Release when done sed_fit_name File where sed fits for this healpixel are cosmoDC2_data Values from cosmoDC2 for this healpixel, keyed by column name first_gal index of first galaxy in our chunk (in sed fit list) self_dict Random useful values stored in GalaxyTruthWriter bad_gals List of galaxy ids, monotone increasing, to be skipped """ dry = self_dict['dry'] chunk_size = self_dict['chunk_size'] dbfile = self_dict['dbfile'] logfile = self_dict['logfile'] if dry: _logit( log_lock, logfile, '_process_chunk invoke for first_gal {}, chunk size {}'.format( first_gal, chunk_size)) if sema is None: return sema.release() #exit(0) return lsst_bp_dict = self_dict['lsst_bp_dict'] galaxy_ids = [] ra = [] dec = [] redshift = [] ebv_vals = None ebv_vals_init = False # does this belong somewhere else? ccm_w = None total_gals = self_dict['total_gals'] chunk_start = first_gal chunk_end = min(first_gal + chunk_size, total_gals) with h5py.File(sed_fit_name, 'r') as sed_fit_file: sed_names = sed_fit_file['sed_names'][()] sed_names = [s.decode() for s in sed_names] # becse stored as bytes gals_this_chunk = chunk_end - chunk_start subset = slice(chunk_start, chunk_end) galaxy_ids = sed_fit_file['galaxy_id'][()][subset] to_log = 'Start with galaxy #{}, id={}\n# galaxies for _process_chunk: {}\n'.format( first_gal, galaxy_ids[0], len(galaxy_ids)) _logit(log_lock, logfile, to_log) # get the cross-match between the sed fit and cosmoDC2 cosmo_len = len(cosmoDC2_data['galaxy_id']) crossmatch_dex = np.searchsorted(cosmoDC2_data['galaxy_id'], galaxy_ids) np.testing.assert_array_equal( galaxy_ids, cosmoDC2_data['galaxy_id'][crossmatch_dex]) ra = sed_fit_file['ra'][()][subset] dec = sed_fit_file['dec'][()][subset] np.testing.assert_array_equal(ra, cosmoDC2_data['ra'][crossmatch_dex]) np.testing.assert_array_equal(dec, cosmoDC2_data['dec'][crossmatch_dex]) good_ixes = _good_indices(galaxy_ids.tolist(), bad_gals[0]) if (len(good_ixes) == 0): if sema is not None: sema.release() return else: _logit( log_lock, logfile, 'Found {} good indices for chunk starting with {}\n'.format( len(good_ixes), chunk_start)) flux_by_band_MW = {} flux_by_band_noMW = {} # Calculate E(B-V) for dust extinction in Milky Way along relevant # lines of sight band_print = "Processing band {}, first gal {}, time {}\n" if not ebv_vals_init: equatorial_coords = np.array([np.radians(ra), np.radians(dec)]) ebv_model = EBVbase() ebv_vals = ebv_model.calculateEbv( equatorialCoordinates=equatorial_coords, interp=True) ebv_vals_init = True for i_bp, bp in enumerate('ugrizy'): if (i_bp == 0 or i_bp == 5): _logit(log_lock, logfile, band_print.format(bp, first_gal, dt.now())) fluxes_noMW = {} fluxes = {} for component in ['disk', 'bulge']: fluxes_noMW[component] = np.zeros(gals_this_chunk, dtype=float) fluxes[component] = np.zeros(gals_this_chunk, dtype=float) for component in ['disk', 'bulge']: #print(" Processing component ", component) sed_arr = sed_fit_file['%s_sed' % component][()][subset] av_arr = sed_fit_file['%s_av' % component][()][subset] rv_arr = sed_fit_file['%s_rv' % component][()][subset] mn_arr = sed_fit_file['%s_magnorm' % component][()][i_bp, :][subset] z_arr = cosmoDC2_data['redshift'][crossmatch_dex] gii = 0 done = False for i_gal, (s_dex, mn, av, rv, zz, ebv) in enumerate( zip(sed_arr, mn_arr, av_arr, rv_arr, z_arr, ebv_vals)): if done: break while good_ixes[gii] < i_gal: gii += 1 if gii == len(good_ixes): # ran out of good ones done = True break if done: break if good_ixes[gii] > i_gal: # skipped over it; it's bad continue # Leave space for it in the arrays, but values # for all the fluxes will be left at 0 # read in the SED file from the library sed_file_name = os.path.join(self_dict['sed_lib_dir'], sed_names[s_dex]) sed = sims_photUtils.Sed() sed.readSED_flambda(sed_file_name) # find and apply normalizing flux fnorm = sims_photUtils.getImsimFluxNorm(sed, mn) sed.multiplyFluxNorm(fnorm) # add internal dust if ccm_w is None or not np.array_equal(sed.wavelen, ccm_w): ccm_w = np.copy(sed.wavelen) a_x, b_x = sed.setupCCM_ab() sed.addDust(a_x, b_x, A_v=av, R_v=rv) # apply redshift sed.redshiftSED(zz, dimming=True) # flux, in Janskys, without Milky Way dust extinction f_noMW = sed.calcFlux(lsst_bp_dict[bp]) # apply Milky Way dust # (cannot reuse a_x, b_x because wavelength grid changed # when we called redshiftSED) a_x_mw, b_x_mw = sed.setupCCM_ab() sed.addDust(a_x_mw, b_x_mw, R_v=3.1, ebv=ebv) f_MW = sed.calcFlux(lsst_bp_dict[bp]) fluxes_noMW[component][i_gal] = f_noMW fluxes[component][i_gal] = f_MW if (component == 'disk') and (bp == 'r'): redshift = z_arr # Sum components and convert to nanojansky total_fluxes = (fluxes_noMW['disk'] + fluxes_noMW['bulge']) * 10**9 total_fluxes_MW = (fluxes['disk'] + fluxes['bulge']) * 10**9 dummy_sed = sims_photUtils.Sed() # add magnification due to weak lensing kappa = cosmoDC2_data['convergence'][crossmatch_dex] gamma_sq = (cosmoDC2_data['shear_1'][crossmatch_dex]**2 + cosmoDC2_data['shear_2'][crossmatch_dex]**2) magnification = 1.0 / ((1.0 - kappa)**2 - gamma_sq) magnified_fluxes = magnification * total_fluxes magnified_fluxes_MW = magnification * total_fluxes_MW flux_by_band_noMW[bp] = magnified_fluxes flux_by_band_MW[bp] = magnified_fluxes_MW # Open connection to sqlite db and write #print('Time before db write is {}, first gal={}'.format(dt.now(), first_gal)) #sys.stdout.flush() if not db_lock.acquire(timeout=120.0): _logit(log_lock, logfile, "Failed to acquire db lock, first gal=", first_gal) if sema is None: return sema.release() exit(1) try: _write_sqlite(dbfile, galaxy_ids, ra, dec, redshift, flux_by_band_MW, flux_by_band_noMW, good_ixes) db_lock.release() if sema is not None: sema.release() _logit( log_lock, logfile, 'Time after db write: {}, first_gal={}\n'.format( dt.now(), first_gal)) exit(0) except Exception as ex: db_lock.release() if sema is not None: sema.release() raise (ex)
def accumulate(self, p=None): ''' Accumulate per image evaluation results and store the result in self.eval :param p: input params for evaluation :return: None ''' print('Accumulating evaluation results...') tic = time.time() if not self.evalImgs: print('Please run evaluate() first') # allows input customized parameters if p is None: p = self.params p.catIds = p.catIds if p.useCats == 1 else [-1] T = len(p.iouThrs) R = len(p.recThrs) K = len(p.catIds) if p.useCats else 1 A = len(p.areaRng) M = len(p.maxDets) precision = -np.ones( (T, R, K, A, M)) # -1 for the precision of absent categories recall = -np.ones((T, K, A, M)) scores = -np.ones((T, R, K, A, M)) # create dictionary for future indexing _pe = self._paramsEval catIds = _pe.catIds if _pe.useCats else [-1] setK = set(catIds) setA = set(map(tuple, _pe.areaRng)) setM = set(_pe.maxDets) setI = set(_pe.imgIds) # get inds to evaluate k_list = [n for n, k in enumerate(p.catIds) if k in setK] m_list = [m for n, m in enumerate(p.maxDets) if m in setM] a_list = [ n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA ] i_list = [n for n, i in enumerate(p.imgIds) if i in setI] I0 = len(_pe.imgIds) A0 = len(_pe.areaRng) # retrieve E at each category, area range, and max number of detections for k, k0 in enumerate(k_list): Nk = k0 * A0 * I0 for a, a0 in enumerate(a_list): Na = a0 * I0 for m, maxDet in enumerate(m_list): E = [self.evalImgs[Nk + Na + i] for i in i_list] E = [e for e in E if not e is None] if len(E) == 0: continue dtScores = np.concatenate( [e['dtScores'][0:maxDet] for e in E]) # different sorting method generates slightly different results. # mergesort is used to be consistent as Matlab implementation. inds = np.argsort(-dtScores, kind='mergesort') dtScoresSorted = dtScores[inds] dtm = np.concatenate( [e['dtMatches'][:, 0:maxDet] for e in E], axis=1)[:, inds] dtIg = np.concatenate( [e['dtIgnore'][:, 0:maxDet] for e in E], axis=1)[:, inds] gtIg = np.concatenate([e['gtIgnore'] for e in E]) npig = np.count_nonzero(gtIg == 0) if npig == 0: continue tps = np.logical_and(dtm, np.logical_not(dtIg)) fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float) fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float) for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): tp = np.array(tp) fp = np.array(fp) nd = len(tp) rc = tp / npig pr = tp / (fp + tp + np.spacing(1)) q = np.zeros((R, )) ss = np.zeros((R, )) if nd: recall[t, k, a, m] = rc[-1] else: recall[t, k, a, m] = 0 # numpy is slow without cython optimization for accessing elements # use python array gets significant speed improvement pr = pr.tolist() q = q.tolist() for i in range(nd - 1, 0, -1): if pr[i] > pr[i - 1]: pr[i - 1] = pr[i] inds = np.searchsorted(rc, p.recThrs, side='left') try: for ri, pi in enumerate(inds): q[ri] = pr[pi] ss[ri] = dtScoresSorted[pi] except: pass precision[t, :, k, a, m] = np.array(q) scores[t, :, k, a, m] = np.array(ss) self.eval = { 'params': p, 'counts': [T, R, K, A, M], 'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'precision': precision, 'recall': recall, 'scores': scores, } toc = time.time() print('DONE (t={:0.2f}s).'.format(toc - tic))
def label_binarize(y, classes, neg_label=0, pos_label=1, sparse_output=False): """Binarize labels in a one-vs-all fashion Several regression and binary classification algorithms are available in the scikit. A simple way to extend these algorithms to the multi-class classification case is to use the so-called one-vs-all scheme. This function makes it possible to compute this transformation for a fixed set of class labels known ahead of time. Parameters ---------- y : array-like Sequence of integer labels or multilabel data to encode. classes : array-like of shape [n_classes] Uniquely holds the label for each class. neg_label : int (default: 0) Value with which negative labels must be encoded. pos_label : int (default: 1) Value with which positive labels must be encoded. sparse_output : boolean (default: False), Set to true if output binary array is desired in CSR sparse format Returns ------- Y : numpy array or CSR matrix of shape [n_samples, n_classes] Shape will be [n_samples, 1] for binary problems. Examples -------- >>> from sklearn.preprocessing import label_binarize >>> label_binarize([1, 6], classes=[1, 2, 4, 6]) array([[1, 0, 0, 0], [0, 0, 0, 1]]) The class ordering is preserved: >>> label_binarize([1, 6], classes=[1, 6, 4, 2]) array([[1, 0, 0, 0], [0, 1, 0, 0]]) Binary targets transform to a column vector >>> label_binarize(['yes', 'no', 'no', 'yes'], classes=['no', 'yes']) array([[1], [0], [0], [1]]) See also -------- LabelBinarizer : class used to wrap the functionality of label_binarize and allow for fitting to classes independently of the transform operation """ if not isinstance(y, list): # XXX Workaround that will be removed when list of list format is # dropped y = check_array(y, accept_sparse='csr', ensure_2d=False, dtype=None) else: if _num_samples(y) == 0: raise ValueError('y has 0 samples: %r' % y) if neg_label >= pos_label: raise ValueError("neg_label={0} must be strictly less than " "pos_label={1}.".format(neg_label, pos_label)) if (sparse_output and (pos_label == 0 or neg_label != 0)): raise ValueError("Sparse binarization is only supported with non " "zero pos_label and zero neg_label, got " "pos_label={0} and neg_label={1}" "".format(pos_label, neg_label)) # To account for pos_label == 0 in the dense case pos_switch = pos_label == 0 if pos_switch: pos_label = -neg_label y_type = type_of_target(y) if 'multioutput' in y_type: raise ValueError("Multioutput target data is not supported with label " "binarization") if y_type == 'unknown': raise ValueError("The type of target data is not known") n_samples = y.shape[0] if sp.issparse(y) else len(y) n_classes = len(classes) classes = np.asarray(classes) if y_type == "binary": if len(classes) == 1: Y = np.zeros((len(y), 1), dtype=np.int) Y += neg_label return Y elif len(classes) >= 3: y_type = "multiclass" sorted_class = np.sort(classes) if (y_type == "multilabel-indicator" and classes.size != y.shape[1]): raise ValueError("classes {0} missmatch with the labels {1}" "found in the data".format(classes, unique_labels(y))) if y_type in ("binary", "multiclass"): y = column_or_1d(y) # pick out the known labels from y y_in_classes = in1d(y, classes) y_seen = y[y_in_classes] indices = np.searchsorted(sorted_class, y_seen) indptr = np.hstack((0, np.cumsum(y_in_classes))) data = np.empty_like(indices) data.fill(pos_label) Y = sp.csr_matrix((data, indices, indptr), shape=(n_samples, n_classes)) elif y_type == "multilabel-indicator": Y = sp.csr_matrix(y) if pos_label != 1: data = np.empty_like(Y.data) data.fill(pos_label) Y.data = data else: raise ValueError("%s target data is not supported with label " "binarization" % y_type) if not sparse_output: Y = Y.toarray() Y = astype(Y, int, copy=False) if neg_label != 0: Y[Y == 0] = neg_label if pos_switch: Y[Y == pos_label] = 0 else: Y.data = astype(Y.data, int, copy=False) # preserve label ordering if np.any(classes != sorted_class): indices = np.searchsorted(sorted_class, classes) Y = Y[:, indices] if y_type == "binary": if sparse_output: Y = Y.getcol(-1) else: Y = Y[:, -1].reshape((-1, 1)) return Y
def process(self, **kwargs): """Process module.""" kwargs = self.prepare_input(self.key('luminosities'), **kwargs) self._luminosities = kwargs[self.key('luminosities')] self._bands = kwargs['all_bands'] self._band_indices = kwargs['all_band_indices'] self._frequencies = kwargs['all_frequencies'] self._radius_phot = np.array(kwargs[self.key('radiusphot')]) self._temperature_phot = np.array(kwargs[self.key('temperaturephot')]) self._cutoff_wavelength = kwargs[self.key('cutoff_wavelength')] self._times = np.array(kwargs['rest_times']) xc = self.X_CONST # noqa: F841 fc = self.FLUX_CONST cc = self.C_CONST ac = ANG_CGS cwave_ac = self._cutoff_wavelength * ac cwave_ac2 = cwave_ac * cwave_ac cwave_ac3 = cwave_ac2 * cwave_ac # noqa: F841 zp1 = 1.0 + kwargs[self.key('redshift')] lt = len(self._times) seds = np.empty(lt, dtype=object) rp2 = self._radius_phot**2 tp = self._temperature_phot evaled = False for li, lum in enumerate(self._luminosities): bi = self._band_indices[li] # tpi = tp[li] # rp2i = rp2[li] if lum == 0.0: seds[li] = np.zeros( len(self._sample_wavelengths[bi]) if bi >= 0 else 1) continue if bi >= 0: rest_wavs = self._sample_wavelengths[bi] * ac / zp1 else: rest_wavs = np.array([cc / (self._frequencies[li] * zp1)]) # Apply absorption to SED only bluewards of cutoff wavelength ab = rest_wavs < cwave_ac # noqa: F841 tpi = tp[li] # noqa: F841 rp2i = rp2[li] # noqa: F841 if not evaled: # Absorbed blackbody: 0% transmission at 0 Angstroms 100% at # >3000 Angstroms. sed = ne.evaluate( "where(ab, fc * (rp2i / cwave_ac / " "rest_wavs ** 4) / expm1(xc / rest_wavs / tpi), " "fc * (rp2i / rest_wavs ** 5) / " "expm1(xc / rest_wavs / tpi))") evaled = True else: sed = ne.re_evaluate() sed[np.isnan(sed)] = 0.0 seds[li] = sed uniq_times = np.unique(self._times) tsort = np.argsort(self._times) uniq_is = np.searchsorted(self._times, uniq_times, sorter=tsort) lu = len(uniq_times) norms = self._luminosities[uniq_is] / (fc / ac * rp2[uniq_is] * tp[uniq_is]) rp2 = rp2[uniq_is].reshape(lu, 1) tp = tp[uniq_is].reshape(lu, 1) tp2 = tp * tp tp3 = tp2 * tp # noqa: F841 nxcs = self._nxcs # noqa: F841 f_blue_reds = ne.evaluate( "sum((exp(-nxcs / (cwave_ac * tp)) * (" "nxcs ** 2 + 2 * (" "nxcs * cwave_ac * tp + cwave_ac2 * tp2)) / (" "nxcs ** 3 * cwave_ac3)) + " "(6 * tp3 - exp(-nxcs / (cwave_ac * tp)) * (" "nxcs ** 3 + 3 * nxcs ** 2 * cwave_ac * tp + 6 * (" "nxcs * cwave_ac2 * tp2 + cwave_ac3 *" "tp3)) / cwave_ac3) / (nxcs ** 4), 1)") norms /= f_blue_reds # Apply renormalisation seds *= norms[np.searchsorted(uniq_times, self._times)] seds = self.add_to_existing_seds(seds, **kwargs) return { 'sample_wavelengths': self._sample_wavelengths, self.key('seds'): seds }
def demodata_infr(**kwargs): """ kwargs = {} CommandLine: python -m wbia.algo.graph.demo demodata_infr --show python -m wbia.algo.graph.demo demodata_infr --num_pccs=25 python -m wbia.algo.graph.demo demodata_infr --profile --num_pccs=100 Ignore: >>> from wbia.algo.graph.demo import * # NOQA >>> from wbia.algo.graph import demo >>> import networkx as nx >>> kwargs = dict(num_pccs=6, p_incon=.5, size_std=2) >>> kwargs = ut.argparse_dict(kwargs) >>> infr = demo.demodata_infr(**kwargs) >>> pccs = list(infr.positive_components()) >>> assert len(pccs) == kwargs['num_pccs'] >>> nonfull_pccs = [cc for cc in pccs if len(cc) > 1 and nx.is_empty(nx.complement(infr.pos_graph.subgraph(cc)))] >>> expected_n_incon = len(nonfull_pccs) * kwargs['p_incon'] >>> n_incon = len(list(infr.inconsistent_components())) >>> # TODO can test that we our sample num incon agrees with pop mean >>> #sample_mean = n_incon / len(nonfull_pccs) >>> #pop_mean = kwargs['p_incon'] >>> print('status = ' + ut.repr4(infr.status(extended=True))) >>> ut.quit_if_noshow() >>> infr.show(pickable=True, groupby='name_label') >>> ut.show_if_requested() Ignore: kwargs = { 'ccs': [[1, 2, 3], [4, 5]] } """ import networkx as nx import vtool as vt from wbia.algo.graph import nx_utils def kwalias(*args): params = args[0:-1] default = args[-1] for key in params: if key in kwargs: return kwargs[key] return default num_pccs = kwalias('num_pccs', 16) size_mean = kwalias('pcc_size_mean', 'pcc_size', 'size', 5) size_std = kwalias('pcc_size_std', 'size_std', 0) # p_pcc_incon = kwargs.get('p_incon', .1) p_pcc_incon = kwargs.get('p_incon', 0) p_pcc_incomp = kwargs.get('p_incomp', 0) pcc_sizes = kwalias('pcc_sizes', None) pos_redun = kwalias('pos_redun', [1, 2, 3]) pos_redun = ut.ensure_iterable(pos_redun) # number of maximum inconsistent edges per pcc max_n_incon = kwargs.get('n_incon', 3) rng = np.random.RandomState(0) counter = 1 if pcc_sizes is None: pcc_sizes = [ int(randn(size_mean, size_std, rng=rng, a_min=1)) for _ in range(num_pccs) ] else: num_pccs = len(pcc_sizes) if 'ccs' in kwargs: # Overwrites other options pcc_sizes = list(map(len, kwargs['ccs'])) num_pccs = len(pcc_sizes) size_mean = None size_std = 0 new_ccs = [] pcc_iter = list(enumerate(pcc_sizes)) pcc_iter = ut.ProgIter(pcc_iter, enabled=num_pccs > 20, label='make pos-demo') for i, size in pcc_iter: p = 0.1 want_connectivity = rng.choice(pos_redun) want_connectivity = min(size - 1, want_connectivity) # Create basic graph of positive edges with desired connectivity g = nx_utils.random_k_edge_connected_graph(size, k=want_connectivity, p=p, rng=rng) nx.set_edge_attributes(g, name='evidence_decision', values=POSTV) nx.set_edge_attributes(g, name='truth', values=POSTV) # nx.set_node_attributes(g, name='orig_name_label', values=i) assert nx.is_connected(g) # Relabel graph with non-conflicting names if 'ccs' in kwargs: g = nx.relabel_nodes(g, dict(enumerate(kwargs['ccs'][i]))) else: # Make sure nodes do not conflict with others g = nx.relabel_nodes( g, dict(enumerate(range(counter, len(g) + counter + 1)))) counter += len(g) # The probability any edge is inconsistent is `p_incon` # This is 1 - P(all edges consistent) # which means p(edge is consistent) = (1 - p_incon) / N complement_edges = ut.estarmap(nx_utils.e_, nx_utils.complement_edges(g)) if len(complement_edges) > 0: # compute probability that any particular edge is inconsistent # to achieve probability the PCC is inconsistent p_edge_inconn = 1 - (1 - p_pcc_incon)**(1 / len(complement_edges)) p_edge_unrev = 0.1 p_edge_notcomp = 1 - (1 - p_pcc_incomp)**(1 / len(complement_edges)) probs = np.array([p_edge_inconn, p_edge_unrev, p_edge_notcomp]) # if the total probability is greater than 1 the parameters # are invalid, so we renormalize to "fix" it. # if probs.sum() > 1: # warnings.warn('probabilities sum to more than 1') # probs = probs / probs.sum() pcumsum = probs.cumsum() # Determine which mutually exclusive state each complement edge is in # logger.info('pcumsum = %r' % (pcumsum,)) states = np.searchsorted(pcumsum, rng.rand(len(complement_edges))) incon_idxs = np.where(states == 0)[0] if len(incon_idxs) > max_n_incon: logger.info('max_n_incon = %r' % (max_n_incon, )) chosen = rng.choice(incon_idxs, max_n_incon, replace=False) states[np.setdiff1d(incon_idxs, chosen)] = len(probs) grouped_edges = ut.group_items(complement_edges, states) for state, edges in grouped_edges.items(): truth = POSTV if state == 0: # Add in inconsistent edges evidence_decision = NEGTV # TODO: truth could be INCMP or POSTV # new_edges.append((u, v, {'evidence_decision': NEGTV})) elif state == 1: evidence_decision = UNREV # TODO: truth could be INCMP or POSTV # new_edges.append((u, v, {'evidence_decision': UNREV})) elif state == 2: evidence_decision = INCMP truth = INCMP else: continue # Add in candidate edges attrs = { 'evidence_decision': evidence_decision, 'truth': truth } for (u, v) in edges: g.add_edge(u, v, **attrs) new_ccs.append(g) # (list(g.nodes()), new_edges)) pos_g = nx.union_all(new_ccs) assert len(new_ccs) == len(list(nx.connected_components(pos_g))) assert num_pccs == len(new_ccs) # Add edges between the PCCS neg_edges = [] if not kwalias('ignore_pair', False): logger.info('making pairs') pair_attrs_lookup = { 0: { 'evidence_decision': NEGTV, 'truth': NEGTV }, 1: { 'evidence_decision': INCMP, 'truth': INCMP }, 2: { 'evidence_decision': UNREV, 'truth': NEGTV }, # could be incomp or neg } # These are the probabilities that one edge has this state p_pair_neg = kwalias('p_pair_neg', 0.4) p_pair_incmp = kwalias('p_pair_incmp', 0.2) p_pair_unrev = kwalias('p_pair_unrev', 0) # p_pair_neg = 1 cc_combos = ((list(g1.nodes()), list(g2.nodes())) for (g1, g2) in it.combinations(new_ccs, 2)) valid_cc_combos = [(cc1, cc2) for cc1, cc2 in cc_combos if len(cc1) and len(cc2)] for cc1, cc2 in ut.ProgIter(valid_cc_combos, label='make neg-demo'): possible_edges = ut.estarmap(nx_utils.e_, it.product(cc1, cc2)) # probability that any edge between these PCCs is negative n_edges = len(possible_edges) p_edge_neg = 1 - (1 - p_pair_neg)**(1 / n_edges) p_edge_incmp = 1 - (1 - p_pair_incmp)**(1 / n_edges) p_edge_unrev = 1 - (1 - p_pair_unrev)**(1 / n_edges) # Create event space with sizes proportional to probabilities pcumsum = np.cumsum([p_edge_neg, p_edge_incmp, p_edge_unrev]) # Roll dice for each of the edge to see which state it lands on possible_pstate = rng.rand(len(possible_edges)) states = np.searchsorted(pcumsum, possible_pstate) flags = states < len(pcumsum) stateful_states = states.compress(flags) stateful_edges = ut.compress(possible_edges, flags) unique_states, groupxs_list = vt.group_indices(stateful_states) for state, groupxs in zip(unique_states, groupxs_list): # logger.info('state = %r' % (state,)) # Add in candidate edges edges = ut.take(stateful_edges, groupxs) attrs = pair_attrs_lookup[state] for (u, v) in edges: neg_edges.append((u, v, attrs)) logger.info('Made {} neg_edges between PCCS'.format(len(neg_edges))) else: logger.info('ignoring pairs') import wbia G = wbia.AnnotInference._graph_cls() G.add_nodes_from(pos_g.nodes(data=True)) G.add_edges_from(pos_g.edges(data=True)) G.add_edges_from(neg_edges) infr = wbia.AnnotInference.from_netx(G, infer=kwargs.get('infer', True)) infr.verbose = 3 infr.relabel_using_reviews(rectify=False) # fontname = 'Ubuntu' fontsize = 12 fontname = 'sans' splines = 'spline' # splines = 'ortho' # splines = 'line' infr.set_node_attrs('shape', 'circle') infr.graph.graph['ignore_labels'] = True infr.graph.graph['dark_background'] = False infr.graph.graph['fontname'] = fontname infr.graph.graph['fontsize'] = fontsize infr.graph.graph['splines'] = splines infr.set_node_attrs('width', 29) infr.set_node_attrs('height', 29) infr.set_node_attrs('fontsize', fontsize) infr.set_node_attrs('fontname', fontname) infr.set_node_attrs('fixed_size', True) # Set synthetic ground-truth attributes for testing # infr.apply_edge_truth() infr.edge_truth = infr.get_edge_attrs('truth') # Make synthetic verif infr.dummy_verif = DummyVerif(infr) infr.verifiers = {} infr.verifiers['match_state'] = infr.dummy_verif infr.demokw = kwargs return infr
def compute_sample_weight(class_weight, y, indices=None): """Estimate sample weights by class for unbalanced datasets. Parameters ---------- class_weight : dict, list of dicts, "balanced", or None, optional Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. The "balanced" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data: ``n_samples / (n_classes * np.bincount(y))``. For multi-output, the weights of each column of y will be multiplied. y : array-like, shape = [n_samples] or [n_samples, n_outputs] Array of original class labels per sample. indices : array-like, shape (n_subsample,), or None Array of indices to be used in a subsample. Can be of length less than n_samples in the case of a subsample, or equal to n_samples in the case of a bootstrap subsample with repeated indices. If None, the sample weight will be calculated over the full sample. Only "auto" is supported for class_weight if this is provided. Returns ------- sample_weight_vect : ndarray, shape (n_samples,) Array with sample weights as applied to the original y """ y = np.atleast_1d(y) if y.ndim == 1: y = np.reshape(y, (-1, 1)) n_outputs = y.shape[1] if isinstance(class_weight, six.string_types): if class_weight not in ['balanced', 'auto']: raise ValueError('The only valid preset for class_weight is ' '"balanced". Given "%s".' % class_weight) elif (indices is not None and not isinstance(class_weight, six.string_types)): raise ValueError('The only valid class_weight for subsampling is ' '"balanced". Given "%s".' % class_weight) elif n_outputs > 1: if (not hasattr(class_weight, "__iter__") or isinstance(class_weight, dict)): raise ValueError("For multi-output, class_weight should be a " "list of dicts, or a valid string.") if len(class_weight) != n_outputs: raise ValueError("For multi-output, number of elements in " "class_weight should match number of outputs.") expanded_class_weight = [] for k in range(n_outputs): y_full = y[:, k] classes_full = np.unique(y_full) classes_missing = None if class_weight in ['balanced', 'auto'] or n_outputs == 1: class_weight_k = class_weight else: class_weight_k = class_weight[k] if indices is not None: # Get class weights for the subsample, covering all classes in # case some labels that were present in the original data are # missing from the sample. y_subsample = y[indices, k] classes_subsample = np.unique(y_subsample) weight_k = np.choose(np.searchsorted(classes_subsample, classes_full), compute_class_weight(class_weight_k, classes_subsample, y_subsample), mode='clip') classes_missing = set(classes_full) - set(classes_subsample) else: weight_k = compute_class_weight(class_weight_k, classes_full, y_full) weight_k = weight_k[np.searchsorted(classes_full, y_full)] if classes_missing: # Make missing classes' weight zero weight_k[in1d(y_full, list(classes_missing))] = 0. expanded_class_weight.append(weight_k) expanded_class_weight = np.prod(expanded_class_weight, axis=0, dtype=np.float64) return expanded_class_weight
def __init__(self, ax, *args, **kwargs): global data, x, dark, incident global IntTime, Averages global xmin, xmax, ymin, ymax global AbMode, LEDdutycycle, LEDfrequency, LEDpin, LEDstate global monitorwave, monitorindex, monitor x = spec.wavelengths() # Integration time set above Averages = 1 #set default averages to a reasonable value dark = np.zeros(len(x)) incident = np.ones( len(x) ) #dummy values to prevent error in Absorbance when no dark recorded LEDpin = 18 #BCM pin 18 is Physical pin 12 on the RPi LEDfrequency = 2000 # 2000 Hz is suitable for an integration time of 2 ms or longer LEDdutycycle = 50000 #50000 is 5%, scale is zero to 1 million, 0 is off LEDstate = 0 # LED off initially AbMode = 0 # initial mode is raw intensity self.ax = ax self.x = x self.xmin = xmin self.xmax = xmax self.ymin = ymin self.ymax = ymax self.data = data self.line = Line2D(self.x, self.data, color='red') self.ax.add_line(self.line) self.ax.set_ylim(ymin * 0.8, ymax * 1.1) self.ax.set_xlim(self.xmin, self.xmax) monitorwave = np.median( x) #set monitor wavelength to middle of hardware range tk.Tk.__init__(self, *args, **kwargs) # tk.Tk.iconbitmap(self, default="clienticon.ico") set window icon tk.Tk.wm_title(self, "Ocean Optics Spectrometer Control") container = tk.Frame(self) container.pack(side="top", fill="both", expand=True) container.grid_rowconfigure(0, weight=1) container.grid_columnconfigure(0, weight=1) label = tk.Label(self, text="Spectrometer on a Pi", font=LARGE_FONT) label.pack(pady=10, padx=10) self.frame1 = tk.Frame(self) self.frame1.pack(side='left', anchor=tk.N) labelint = tk.Label(self.frame1, text='Integration Time (ms)', relief='ridge') labelint.pack(side='top', pady=2) labelavg = tk.Label(self.frame1, text='# of spectra to average', relief='ridge', width='17', wraplength='100') labelavg.pack(side='top', pady=1) labelxmin = tk.Label(self.frame1, text='Minimum wavelength', relief='ridge') labelxmin.pack(side='top', pady=2) labelxmax = tk.Label(self.frame1, text='Maximum wavelength', relief='ridge') labelxmax.pack(side='top', pady=2) self.button_dark = tk.Button(self.frame1, text='Measure Dark', background='light grey') self.button_dark.pack(side='top', pady=2) self.button_dark.bind('<ButtonRelease-1>', self.getdark) self.buttonAbMode = tk.Button(self.frame1, text='Absorbance Mode (off)', background='light grey') self.buttonAbMode.pack(side='top', pady=1) self.buttonAbMode.bind('<ButtonRelease-1>', self.AbMode) monitorindex = np.searchsorted(x, monitorwave, side='left') monitor = np.round(self.data[monitorindex], decimals=3) self.text = self.ax.text(0.9, 0.9, monitor, transform=ax.transAxes, fontsize=14) self.ax.axvline(x=monitorwave, lw=2, color='blue', alpha=0.5) self.labelmonitor = tk.Label(self.frame1, text='Wavelength to monitor (nm)', font=LARGE_FONT) self.labelmonitor.pack(side='top') self.entrymonitor = tk.Entry(self.frame1, width='7') self.entrymonitor.pack(side='top', pady=1, anchor=tk.N) self.entrymonitor.insert(0, np.round(x[monitorindex], decimals=2)) self.entrymonitor.bind('<Return>', self.entrymonitor_return) self.labelmonitor2 = tk.Label( self.frame1, text="press <Enter> to set new wavelength") self.labelmonitor2.pack(side='top') self.button_reset_y = tk.Button(self.frame1, text='Reset Y axis scale', background='light blue') self.button_reset_y.pack(side='top', pady=10) self.button_reset_y.bind('<ButtonRelease-1>', self.reset_y) self.buttonLED = tk.Button(self.frame1, text='LED on / off', background='light grey') self.buttonLED.pack(side='top', pady=1) self.buttonLED.bind('<ButtonRelease-1>', self.LEDstate) self.labelLED = tk.Label(self.frame1, text="LED power (1 to 100%)") self.labelLED.pack(side='top', pady=1, anchor=tk.N) self.entryLED = tk.Entry(self.frame1, width='5') self.entryLED.pack(side='top', pady=1, anchor=tk.N) self.entryLED.insert(0, LEDdutycycle / 10000) self.entryLED.bind('<Return>', self.entryLED_return) self.frame2 = tk.Frame(self) self.frame2.pack(side='left', anchor=tk.N) self.entryint = tk.Entry(self.frame2, width='6') self.entryint.pack(side='top', pady=1, anchor=tk.N) self.entryint.insert(0, IntTime / 1000) self.entryint.bind('<Return>', self.EntryInt_return) self.entryavg = tk.Entry(self.frame2, width='4') self.entryavg.pack(side='top', pady=5) self.entryavg.insert(0, Averages) self.entryavg.bind('<Return>', self.EntryAvg_return) self.entryxmin = tk.Entry(self.frame2, width='7') self.entryxmin.pack(side='top', pady=2) self.entryxmin.insert(0, xmin) self.entryxmin.bind('<Return>', self.Entryxmin_return) self.entryxmax = tk.Entry(self.frame2, width='7') self.entryxmax.pack(side='top', pady=2) self.entryxmax.insert(0, xmax) self.entryxmax.bind('<Return>', self.Entryxmax_return) self.button_incident = tk.Button(self.frame2, text='Measure 100% T', background='light grey') self.button_incident.pack(side='top', pady=2) self.button_incident.bind('<ButtonRelease-1>', self.getincident) button_quit = ttk.Button(self, text='Quit') button_quit.pack(side='right', anchor=tk.N) button_quit.bind('<ButtonRelease-1>', self.ButtonQuit) ax.set_xlabel('Wavelength (nm)') ax.set_ylabel('Counts') canvas = FigureCanvasTkAgg(fig, self) canvas.get_tk_widget().pack(side=tk.BOTTOM, fill=tk.BOTH, expand=True)
def eid_to_element_node_index(self, eids): ind = np.searchsorted(eids, self.element) return ind
def _interpolate_height_and_temperature_from_pressure(imager_obj, level, list_of_levels=None): """ Function to find height att pressure level (level) from segment_nwp, pressure and height vectors. High means high in pressure. The level closest to ground i hi, and lo is at lower pressure further up in atmosphere. """ if hasattr(imager_obj, "nwp_height") and imager_obj.nwp_height is not None: values_h = imager_obj.nwp_height pressure_v = imager_obj.nwp_pressure surface_h = imager_obj.nwp_surface_h psur = imager_obj.nwp_psur elif hasattr(imager_obj, "segment_nwp_geoheight" ) and imager_obj.segment_nwp_geoheight is not None: values_h = imager_obj.segment_nwp_geoheight pressure_v = imager_obj.segment_nwp_pressure surface_h = imager_obj.segment_nwp_surfaceGeoHeight psur = imager_obj.segment_nwp_surfacePressure else: return None # import pdb # pdb.set_trace() nlev = pressure_v.shape[1] npix = pressure_v.shape[0] k = np.arange(npix) if list_of_levels is None: higher_index = np.array([ nlev - 1 - np.searchsorted(pressure_v[ind, :], level, side='right', sorter=range(nlev - 1, -1, -1)) for ind in range(npix) ]) else: higher_index = np.array([ nlev - 1 - np.searchsorted(pressure_v[ind, :], list_of_levels[ind], side='right', sorter=range(nlev - 1, -1, -1)) for ind in range(npix) ]) level = list_of_levels higher_index[higher_index >= (nlev - 1)] = nlev - 2 lower_index = higher_index + 1 # update "lo" where level is between surface and first level in array below_level_1 = level > pressure_v[:, 0] lower_index[below_level_1] = 0 # get pressure and height for layer below and above level hi = pressure_v[k, higher_index] lo = pressure_v[k, lower_index] height_hi_ = values_h[k, higher_index] * 1.0 height_lo_ = values_h[k, lower_index] * 1.0 # update "hi" where level is between surface and first level in array hi[below_level_1] = psur[below_level_1] height_hi_[below_level_1] = surface_h[below_level_1] # log pressures hi = np.log(hi) lo = np.log(lo) level = np.log(level) # interpolate out_h = height_hi_ - (hi - level) * (height_hi_ - height_lo_) / (hi - lo) return out_h
def _jprimes(x, i, x_bounds=None): """ Helper function to return the j' indices for the master curve fit This function is a helper function for :py:func:`quality`. It is not supposed to be called directly. Parameters ---------- x : mapping to ndarrays The x values. i : int The row index (finite size index) x_bounds : 2-tuple, optional bounds on x values Returns ------- ret : mapping to ndarrays Has the same keys and shape as `x`. Its element ``ret[i'][j]`` is the j' such that :math:`x_{i'j'} \leq x_{ij} < x_{i'(j'+1)}`. If no such j' exists, the element is np.nan. Convert the element to int to use as an index. """ j_primes = - np.ones_like(x) try: x_masked = ma.masked_outside(x, x_bounds[0], x_bounds[1]) except (TypeError, IndexError): x_masked = ma.asanyarray(x) k, n = x.shape # indices of lower and upper bounds edges = ma.notmasked_edges(x_masked, axis=1) x_lower = np.zeros(k, dtype=int) x_upper = np.zeros(k, dtype=int) x_lower[edges[0][0]] = edges[0][-1] x_upper[edges[-1][0]] = edges[-1][-1] for i_prime in range(k): if i_prime == i: j_primes[i_prime][:] = np.nan continue jprimes = np.searchsorted( x[i_prime], x[i], side='right' ).astype(float) - 1 jprimes[ np.logical_or( jprimes < x_lower[i_prime], jprimes >= x_upper[i_prime] ) ] = np.nan j_primes[i_prime][:] = jprimes return j_primes
def streamplot(axes, x, y, u, v, density=1, linewidth=None, color=None, cmap=None, norm=None, arrowsize=1, arrowstyle='-|>', minlength=0.1, transform=None, zorder=None, start_points=None, maxlength=4.0, integration_direction='both'): """Draws streamlines of a vector flow. *x*, *y* : 1d arrays an *evenly spaced* grid. *u*, *v* : 2d arrays x and y-velocities. Number of rows should match length of y, and the number of columns should match x. *density* : float or 2-tuple Controls the closeness of streamlines. When `density = 1`, the domain is divided into a 30x30 grid---*density* linearly scales this grid. Each cell in the grid can have, at most, one traversing streamline. For different densities in each direction, use [density_x, density_y]. *linewidth* : numeric or 2d array vary linewidth when given a 2d array with the same shape as velocities. *color* : matplotlib color code, or 2d array Streamline color. When given an array with the same shape as velocities, *color* values are converted to colors using *cmap*. *cmap* : :class:`~matplotlib.colors.Colormap` Colormap used to plot streamlines and arrows. Only necessary when using an array input for *color*. *norm* : :class:`~matplotlib.colors.Normalize` Normalize object used to scale luminance data to 0, 1. If None, stretch (min, max) to (0, 1). Only necessary when *color* is an array. *arrowsize* : float Factor scale arrow size. *arrowstyle* : str Arrow style specification. See :class:`~matplotlib.patches.FancyArrowPatch`. *minlength* : float Minimum length of streamline in axes coordinates. *start_points*: Nx2 array Coordinates of starting points for the streamlines. In data coordinates, the same as the ``x`` and ``y`` arrays. *zorder* : int any number *maxlength* : float Maximum length of streamline in axes coordinates. *integration_direction* : ['forward', 'backward', 'both'] Integrate the streamline in forward, backward or both directions. Returns: *stream_container* : StreamplotSet Container object with attributes - lines: `matplotlib.collections.LineCollection` of streamlines - arrows: collection of `matplotlib.patches.FancyArrowPatch` objects representing arrows half-way along stream lines. This container will probably change in the future to allow changes to the colormap, alpha, etc. for both lines and arrows, but these changes should be backward compatible. """ grid = Grid(x, y) mask = StreamMask(density) dmap = DomainMap(grid, mask) if zorder is None: zorder = mlines.Line2D.zorder # default to data coordinates if transform is None: transform = axes.transData if color is None: color = axes._get_lines.get_next_color() if linewidth is None: linewidth = matplotlib.rcParams['lines.linewidth'] line_kw = {} arrow_kw = dict(arrowstyle=arrowstyle, mutation_scale=10 * arrowsize) if integration_direction not in ['both', 'forward', 'backward']: errstr = ("Integration direction '%s' not recognised. " "Expected 'both', 'forward' or 'backward'." % integration_direction) raise ValueError(errstr) if integration_direction == 'both': maxlength /= 2. use_multicolor_lines = isinstance(color, np.ndarray) if use_multicolor_lines: if color.shape != grid.shape: raise ValueError( "If 'color' is given, must have the shape of 'Grid(x,y)'") line_colors = [] color = np.ma.masked_invalid(color) else: line_kw['color'] = color arrow_kw['color'] = color if isinstance(linewidth, np.ndarray): if linewidth.shape != grid.shape: raise ValueError( "If 'linewidth' is given, must have the shape of 'Grid(x,y)'") line_kw['linewidth'] = [] else: line_kw['linewidth'] = linewidth arrow_kw['linewidth'] = linewidth line_kw['zorder'] = zorder arrow_kw['zorder'] = zorder ## Sanity checks. if u.shape != grid.shape or v.shape != grid.shape: raise ValueError("'u' and 'v' must be of shape 'Grid(x,y)'") u = np.ma.masked_invalid(u) v = np.ma.masked_invalid(v) integrate = get_integrator(u, v, dmap, minlength, maxlength, integration_direction) trajectories = [] if start_points is None: for xm, ym in _gen_starting_points(mask.shape): if mask[ym, xm] == 0: xg, yg = dmap.mask2grid(xm, ym) t = integrate(xg, yg) if t is not None: trajectories.append(t) else: sp2 = np.asanyarray(start_points, dtype=float).copy() # Check if start_points are outside the data boundaries for xs, ys in sp2: if not (grid.x_origin <= xs <= grid.x_origin + grid.width and grid.y_origin <= ys <= grid.y_origin + grid.height): raise ValueError("Starting point ({}, {}) outside of data " "boundaries".format(xs, ys)) # Convert start_points from data to array coords # Shift the seed points from the bottom left of the data so that # data2grid works properly. sp2[:, 0] -= grid.x_origin sp2[:, 1] -= grid.y_origin for xs, ys in sp2: xg, yg = dmap.data2grid(xs, ys) t = integrate(xg, yg) if t is not None: trajectories.append(t) if use_multicolor_lines: if norm is None: norm = mcolors.Normalize(color.min(), color.max()) if cmap is None: cmap = cm.get_cmap(matplotlib.rcParams['image.cmap']) else: cmap = cm.get_cmap(cmap) streamlines = [] arrows = [] for t in trajectories: tgx = np.array(t[0]) tgy = np.array(t[1]) # Rescale from grid-coordinates to data-coordinates. tx, ty = dmap.grid2data(*np.array(t)) tx += grid.x_origin ty += grid.y_origin points = np.transpose([tx, ty]).reshape(-1, 1, 2) streamlines.extend(np.hstack([points[:-1], points[1:]])) # Add arrows half way along each trajectory. s = np.cumsum(np.sqrt(np.diff(tx) ** 2 + np.diff(ty) ** 2)) n = np.searchsorted(s, s[-1] / 2.) arrow_tail = (tx[n], ty[n]) arrow_head = (np.mean(tx[n:n + 2]), np.mean(ty[n:n + 2])) if isinstance(linewidth, np.ndarray): line_widths = interpgrid(linewidth, tgx, tgy)[:-1] line_kw['linewidth'].extend(line_widths) arrow_kw['linewidth'] = line_widths[n] if use_multicolor_lines: color_values = interpgrid(color, tgx, tgy)[:-1] line_colors.append(color_values) arrow_kw['color'] = cmap(norm(color_values[n])) p = patches.FancyArrowPatch( arrow_tail, arrow_head, transform=transform, **arrow_kw) axes.add_patch(p) arrows.append(p) lc = mcollections.LineCollection( streamlines, transform=transform, **line_kw) lc.sticky_edges.x[:] = [grid.x_origin, grid.x_origin + grid.width] lc.sticky_edges.y[:] = [grid.y_origin, grid.y_origin + grid.height] if use_multicolor_lines: lc.set_array(np.ma.hstack(line_colors)) lc.set_cmap(cmap) lc.set_norm(norm) axes.add_collection(lc) axes.autoscale_view() ac = matplotlib.collections.PatchCollection(arrows) stream_container = StreamplotSet(lc, ac) return stream_container
def run(self, dataSlice, slicePoint=None): """" Calculate the detectability of a transient with the specified lightcurve. Parameters ---------- dataSlice : numpy.array Numpy structured array containing the data related to the visits provided by the slicer. slicePoint : dict, optional Dictionary containing information about the slicepoint currently active in the slicer. Returns ------- float The total number of transients that could be detected. """ # Total number of transients that could go off back-to-back if self.countMethod == 'partialLC': _nTransMax = np.ceil(self.surveyDuration / (self.transDuration / 365.25)) else: _nTransMax = np.floor(self.surveyDuration / (self.transDuration / 365.25)) tshifts = np.arange(self.nPhaseCheck) * self.transDuration / float( self.nPhaseCheck) nDetected = 0 nTransMax = 0 for tshift in tshifts: # Compute the total number of back-to-back transients are possible to detect # given the survey duration and the transient duration. nTransMax += _nTransMax if tshift != 0: nTransMax -= 1 if self.surveyStart is None: surveyStart = dataSlice[self.mjdCol].min() time = (dataSlice[self.mjdCol] - surveyStart + tshift) % self.transDuration # Which lightcurve does each point belong to lcNumber = np.floor( (dataSlice[self.mjdCol] - surveyStart) / self.transDuration) lcMags = self.lightCurve(time, dataSlice[self.filterCol]) # How many criteria needs to be passed detectThresh = 0 # Flag points that are above the SNR limit detected = np.zeros(dataSlice.size, dtype=int) detected[np.where( lcMags < dataSlice[self.m5Col] + self.detectM5Plus)] += 1 detectThresh += 1 # If we demand points on the rise if self.nPrePeak > 0: detectThresh += 1 ord = np.argsort(dataSlice[self.mjdCol]) dataSlice = dataSlice[ord] detected = detected[ord] lcNumber = lcNumber[ord] time = time[ord] ulcNumber = np.unique(lcNumber) left = np.searchsorted(lcNumber, ulcNumber) right = np.searchsorted(lcNumber, ulcNumber, side='right') # Note here I'm using np.searchsorted to basically do a 'group by' # might be clearer to use scipy.ndimage.measurements.find_objects or pandas, but # this numpy function is known for being efficient. for le, ri in zip(left, right): # Number of points where there are a detection good = np.where(time[le:ri] < self.peakTime) nd = np.sum(detected[le:ri][good]) if nd >= self.nPrePeak: detected[le:ri] += 1 # Check if we need multiple points per light curve or multiple filters if (self.nPerLC > 1) | (self.nFilters > 1): # make sure things are sorted by time ord = np.argsort(dataSlice[self.mjdCol]) dataSlice = dataSlice[ord] detected = detected[ord] lcNumber = lcNumber[ord] time = time[ord] ulcNumber = np.unique(lcNumber) left = np.searchsorted(lcNumber, ulcNumber) right = np.searchsorted(lcNumber, ulcNumber, side='right') detectThresh += self.nFilters for le, ri in zip(left, right): points = np.where(detected[le:ri] > 0) ufilters = np.unique( dataSlice[self.filterCol][le:ri][points]) phaseSections = np.floor(time[le:ri][points] / self.transDuration * self.nPerLC) for filtName in ufilters: good = np.where(dataSlice[self.filterCol][le:ri] [points] == filtName) if np.size(np.unique( phaseSections[good])) >= self.nPerLC: detected[le:ri] += 1 # Find the unique number of light curves that passed the required number of conditions nDetected += np.size( np.unique(lcNumber[np.where(detected >= detectThresh)])) # Rather than keeping a single "detected" variable, maybe make a mask for each criteria, then # reduce functions like: reduce_singleDetect, reduce_NDetect, reduce_PerLC, reduce_perFilter. # The way I'm running now it would speed things up. return float(nDetected) / nTransMax
def get_element_index(self, eids): itot = np.searchsorted(eids, self.element) return itot
def weighted_pick(weights): t = np.cumsum(weights) s = np.sum(weights) return (int(np.searchsorted(t, np.random.rand(1) * s)))
def disaggregate_poe(self, sctx, rctx, dctx, imt, iml, truncation_level, n_epsilons): """ Disaggregate (separate) PoE of ``iml`` in different contributions each coming from ``n_epsilons`` distribution bins. If ``truncation_level = 3``, ``n_epsilons = 3``, bin edges are ``-3 .. -1``, ``-1 .. +1`` and ``+1 .. +3``. :param n_epsilons: Integer number of bins to split truncated Gaussian distribution to. Other parameters are the same as for :meth:`get_poes`, with differences that ``iml`` is only one single intensity level and ``truncation_level`` is required to be positive. :returns: Contribution to probability of exceedance of ``iml`` coming from different sigma bands in a form of 1d numpy array with ``n_epsilons`` floats between 0 and 1. """ if not truncation_level > 0: raise ValueError('truncation level must be positive') self._check_imt(imt) # compute mean and standard deviations mean, [stddev] = self.get_mean_and_stddevs(sctx, rctx, dctx, imt, [const.StdDev.TOTAL]) # compute iml value with respect to standard (mean=0, std=1) # normal distributions iml = self.to_distribution_values(iml) standard_imls = (iml - mean) / stddev distribution = scipy.stats.truncnorm(-truncation_level, truncation_level) epsilons = numpy.linspace(-truncation_level, truncation_level, n_epsilons + 1) # compute epsilon bins contributions contribution_by_bands = (distribution.cdf(epsilons[1:]) - distribution.cdf(epsilons[:-1])) # take the minimum epsilon larger than standard_iml iml_bin_indices = numpy.searchsorted(epsilons, standard_imls) return numpy.array([ # take full disaggregated distribution for the case of # ``iml <= mean - truncation_level * stddev`` contribution_by_bands if idx == 0 else # take zeros if ``iml >= mean + truncation_level * stddev`` numpy.zeros(n_epsilons) if idx >= n_epsilons + 1 else # for other cases (when ``iml`` falls somewhere in the # histogram): numpy.concatenate(( # take zeros for bins that are on the left hand side # from the bin ``iml`` falls into, numpy.zeros(idx - 1), # ... area of the portion of the bin containing ``iml`` # (the portion is limited on the left hand side by # ``iml`` and on the right hand side by the bin edge), [ distribution.sf(standard_imls[i]) - contribution_by_bands[idx:].sum() ], # ... and all bins on the right go unchanged. contribution_by_bands[idx:])) for i, idx in enumerate(iml_bin_indices) ])
def _first_brillouin_zone(eigenvalues, eigenvectors, n_values, edge): """ Return the `n_values` eigenvalues (and corresponding eigenvectors) which fall within the first "Brillioun zone" whos edge is `edge`. This function takes care to select values from only one edge of the zone, and raises a `RuntimeError` if it cannot safely do so. The inputs `eigenvalues` and `edge` must be rounded to the desired precision for this function. Arguments: eigenvalues: 1D np.array of float -- The eigenvalues to choose from. This should be rounded to the desired precision (because the floating-point values will be compared directly to the edge value). eigenvectors: 2D np.array of complex -- The eigenvectors corresponding to the eigenvalues. The first index runs over the number of vectors, so `eigenvalues[i]` is the eigenvalue corresponding to the eigenvector `eigenvectors[i]`. Note: this is the transpose of the return value of `np.linalg.eig` and family. n_values: int -- The number of eigenvalues to find. edge: float -- The edge of the first Brillioun zone. This should be rounded to the desired precision. Returns: eigenvalues: 1D np.array of float -- The selected eigenvalues (sorted). eigenvectors: 2D np.array of complex -- The eigenvectors corresponding to the selected eigenvalues. The first index corresponds to the index of the `eigenvalues` output. """ order = eigenvalues.argsort() eigenvalues = eigenvalues[order] eigenvectors = eigenvectors[order] lower = np.searchsorted(eigenvalues, -edge, side='left') upper = np.searchsorted(eigenvalues, edge, side='right') n_lower_edge = n_upper_edge = 0 while eigenvalues[lower + n_lower_edge] == -edge: n_lower_edge += 1 # Additional `-1` because `searchsorted(side='right')` gives us the index # after the found element. while eigenvalues[upper - n_upper_edge - 1] == edge: n_upper_edge += 1 n_not_on_edge = (upper - n_upper_edge) - (lower + n_lower_edge) log_message = " ".join([ f"Needed {n_values} eigenvalues in the first zone.", f"Found {n_lower_edge}, {n_not_on_edge}, {n_upper_edge} on the", "lower edge, centre zone, upper edge respectively.", ]) _log.debug(log_message) if n_not_on_edge == n_values: lower, upper = lower + n_lower_edge, upper - n_upper_edge elif n_not_on_edge + n_lower_edge == n_values: lower, upper = lower, upper - n_upper_edge elif n_not_on_edge + n_upper_edge == n_values: lower, upper = lower + n_lower_edge, upper else: exception_message = " ".join([ "Could not resolve the first Brillouin zone safely.", "You could try increasing the tolerance (decreasing the 'decimals'", "field), or adding a small constant term to your Hamiltonian.", ]) raise RuntimeError(exception_message) return eigenvalues[lower:upper], eigenvectors[lower:upper]
def adaptive_parzen_normal(mus, prior_weight, prior_mu, prior_sigma, LF=DEFAULT_LF): """ mus - matrix (N, M) of M, N-dimensional component centers """ mus = np.array(mus) assert str(mus.dtype) != 'object' if mus.ndim != 1: raise TypeError('mus must be vector', mus) if len(mus) == 0: srtd_mus = np.asarray([prior_mu]) sigma = np.asarray([prior_sigma]) prior_pos = 0 elif len(mus) == 1: if prior_mu < mus[0]: prior_pos = 0 srtd_mus = np.asarray([prior_mu, mus[0]]) sigma = np.asarray([prior_sigma, prior_sigma * .5]) else: prior_pos = 1 srtd_mus = np.asarray([mus[0], prior_mu]) sigma = np.asarray([prior_sigma * .5, prior_sigma]) elif len(mus) >= 2: # create new_mus, which is sorted, and in which # the prior has been inserted order = np.argsort(mus) prior_pos = np.searchsorted(mus[order], prior_mu) srtd_mus = np.zeros(len(mus) + 1) srtd_mus[:prior_pos] = mus[order[:prior_pos]] srtd_mus[prior_pos] = prior_mu srtd_mus[prior_pos + 1:] = mus[order[prior_pos:]] sigma = np.zeros_like(srtd_mus) sigma[1:-1] = np.maximum(srtd_mus[1:-1] - srtd_mus[0:-2], srtd_mus[2:] - srtd_mus[1:-1]) lsigma = srtd_mus[1] - srtd_mus[0] usigma = srtd_mus[-1] - srtd_mus[-2] sigma[0] = lsigma sigma[-1] = usigma if LF and LF < len(mus): unsrtd_weights = linear_forgetting_weights(len(mus), LF) srtd_weights = np.zeros_like(srtd_mus) assert len(unsrtd_weights) + 1 == len(srtd_mus) srtd_weights[:prior_pos] = unsrtd_weights[order[:prior_pos]] srtd_weights[prior_pos] = prior_weight srtd_weights[prior_pos + 1:] = unsrtd_weights[order[prior_pos:]] else: srtd_weights = np.ones(len(srtd_mus)) srtd_weights[prior_pos] = prior_weight # -- magic formula: maxsigma = old_div(prior_sigma, 1.0) minsigma = old_div(prior_sigma, min(100.0, (1.0 + len(srtd_mus)))) sigma = np.clip(sigma, minsigma, maxsigma) sigma[prior_pos] = prior_sigma assert prior_sigma > 0 assert maxsigma > 0 assert minsigma > 0 assert np.all(sigma > 0), (sigma.min(), minsigma, maxsigma) srtd_weights /= srtd_weights.sum() if 0: print('WEIGHTS', srtd_weights) print('MUS', srtd_mus) print('SIGMA', sigma) return srtd_weights, srtd_mus, sigma
def result(x): return np.searchsorted(selection, x, side='right') / selection.size
def test_searchsortedOp_on_right_side(self): f = aesara.function([self.x, self.v], searchsorted(self.x, self.v, side="right")) assert np.allclose(np.searchsorted(self.a, self.b, side="right"), f(self.a, self.b))
def process_week(config, source, week_file): """Process a single week file * Retrieve the file, extracting the photon and spacecraft info * Select photons near the source, * Determine exposure for the direction * Use the weight table to add weights to photon data, selecting photons with weight info -- in progress -- * Use the exposure to assign an exposure to each photon. """ with open(week_file, 'rb') as inp: week = pickle.load(inp) pdf = _get_photons_near_source(config, source, week) edf = _calculate_exposure_for_source(config, source, week) if config.verbose > 2: print(f'\n\t-->Selected {len(pdf)} photons') # add weights if pdf is None or len(pdf) < 3 or len(edf) == 0: return None, edf add_weights(config, pdf, source) if 'run_id' in pdf: # expint = np.empty(2*len(edf)) estart = edf.start.values estop = edf.stop.values exptime = np.append(estart, estop[-1]) expval = edf.exp.values expcth = edf.cos_theta.values # corresponding cumulative exposure -- in m^2 cumexp = np.insert(np.cumsum(edf.exp.values / 1e4), 0, 0) # i = np.searchsorted(expint[0::2], MJD(pdf.iloc[0].run_id) ) runs = pdf.groupby('run_id') last_run = 0 tau = [] time = [] run_id = [] for run, g in runs: assert run > last_run run_id += [run] * len(g) last_run = run # assemble MJD time from run_id and trun runstart = MJD(float(run)) rtime = MJD(float(run) + g.trun * config.offset_size) time += list(rtime) # cumexp at run start run_cumexp = cumexp[np.searchsorted(estart, runstart)] # use event times in this run to interpolate table of exposure times, cumexp event_cumexp = np.interp(rtime, exptime, cumexp) # diffs, from first --> tau event_exp = np.diff(np.insert(event_cumexp, 0, run_cumexp)) tau += list(event_exp) # # extract cos_theta at event_time? should interplate maybe # cth += expcth[np.searchsorted(rtime, estart )] # update pdf pdf.loc[:, 'tau'] = np.array(tau, np.float32) pdf.loc[:, 'time'] = time pdf.drop(columns='trun', inplace=True) pdf.loc[:, 'run_id'] = pd.Categorical(run_id) else: # zap legacy for now for check in 'etime event run_diff rtime run'.split(): if check in pdf: if config.verbose > 2: print(f'remove {check}') pdf.drop(columns=check, inplace=True) # final attempt to do this pdf.loc[:, 'weight'] = pdf['weight'].astype(np.float32) return pdf, edf
def R_cb(self, x, bound='two-sided', interp='step', alpha_ci=0.05, bound_type='exp', dist='z'): if bound_type not in ['exp', 'normal']: return ValueError("'bound_type' must be in ['exp', 'normal']") if dist not in ['t', 'z']: return ValueError("'dist' must be in ['t', 'z']") confidence = 1. - alpha_ci old_err_state = np.seterr(all='ignore') x = np.atleast_1d(x) if bound in ['upper', 'lower']: if dist == 't': stat = t.ppf(1 - confidence, self.r - 1) else: stat = norm.ppf(1 - confidence, 0, 1) if bound == 'upper': stat = -stat elif bound == 'two-sided': if dist == 't': stat = t.ppf((1 - confidence) / 2, self.r - 1) else: stat = norm.ppf((1 - confidence) / 2, 0, 1) stat = np.array([-1, 1]).reshape(2, 1) * stat if bound_type == 'exp': # Exponential Greenwood confidence R_out = self.greenwood * 1. / (np.log(self.R)**2) R_out = np.log(-np.log(self.R)) - stat * np.sqrt(R_out) R_out = np.exp(-np.exp(R_out)) else: # Normal Greenwood confidence R_out = self.R + np.sqrt(self.greenwood * self.R**2) * stat if interp == 'step': idx = np.searchsorted(self.x, x, side='right') - 1 if bound == 'two-sided': R_out = R_out[:, idx] R_out = np.where(idx < 0, 1, R_out) else: R_out = R_out[idx] R_out = np.where(idx < 0, 1, R_out) else: if bound == 'two-sided': R1 = interp1d(self.x, R_out[0, :], kind=interp)(x) R2 = interp1d(self.x, R_out[1, :], kind=interp)(x) R_out = np.vstack([R1, R2]) else: R_out = interp1d(self.x, R_out, kind=interp)(x) if R_out.ndim == 2: min_idx = (x < self.x.min()) max_idx = (x > self.x.max()) # print(R_out[0, :]) R_out[0, :][min_idx] = 1 R_out[0, :][max_idx] = 1 R_out[1, :][min_idx] = 0 R_out[1, :][max_idx] = 0 R_out = R_out.T # print(R_out) # nan_idx = ((x < self.x.min()) | (x > self.x.max())) # R_out[nan_i3dx] = np.nan else: min_idx = (x < self.x.min()) max_idx = (x > self.x.max()) R_out[min_idx] = 0 R_out[max_idx] = 1 np.seterr(**old_err_state) return R_out
def split_tracklet(tracklet, inds): idx = sorted(set(np.searchsorted(tracklet.inds, inds))) inds_new = np.split(tracklet.inds, idx) data_new = np.split(tracklet.data, idx) return [Tracklet(data, inds) for data, inds in zip(data_new, inds_new)]
def get_reflectance(self, sun_zenith, sat_zenith, azidiff, bandname, redband=None): """Get the reflectance from the three sun-sat angles.""" # Get wavelength in nm for band: if isinstance(bandname, float): LOG.warning('A wavelength is provided instead of band name - ' + 'disregard the relative spectral responses and assume ' + 'it is the effective wavelength: %f (micro meter)', bandname) wvl = bandname * 1000.0 else: wvl = self.get_effective_wavelength(bandname) wvl = wvl * 1000.0 rayl, wvl_coord, azid_coord, satz_sec_coord, sunz_sec_coord = self.get_reflectance_lut() # force dask arrays compute = False if HAVE_DASK and not isinstance(sun_zenith, Array): compute = True sun_zenith = from_array(sun_zenith, chunks=sun_zenith.shape) sat_zenith = from_array(sat_zenith, chunks=sat_zenith.shape) azidiff = from_array(azidiff, chunks=azidiff.shape) if redband is not None: redband = from_array(redband, chunks=redband.shape) clip_angle = rad2deg(arccos(1. / sunz_sec_coord.max())) sun_zenith = clip(sun_zenith, 0, clip_angle) sunzsec = 1. / cos(deg2rad(sun_zenith)) clip_angle = rad2deg(arccos(1. / satz_sec_coord.max())) sat_zenith = clip(sat_zenith, 0, clip_angle) satzsec = 1. / cos(deg2rad(sat_zenith)) shape = sun_zenith.shape if not(wvl_coord.min() < wvl < wvl_coord.max()): LOG.warning( "Effective wavelength for band %s outside 400-800 nm range!", str(bandname)) LOG.info( "Set the rayleigh/aerosol reflectance contribution to zero!") if HAVE_DASK: chunks = sun_zenith.chunks if redband is None else redband.chunks res = zeros(shape, chunks=chunks) return res.compute() if compute else res return zeros(shape) idx = np.searchsorted(wvl_coord, wvl) wvl1 = wvl_coord[idx - 1] wvl2 = wvl_coord[idx] fac = (wvl2 - wvl) / (wvl2 - wvl1) raylwvl = fac * rayl[idx - 1, :, :, :] + (1 - fac) * rayl[idx, :, :, :] tic = time.time() smin = [sunz_sec_coord[0], azid_coord[0], satz_sec_coord[0]] smax = [sunz_sec_coord[-1], azid_coord[-1], satz_sec_coord[-1]] orders = [ len(sunz_sec_coord), len(azid_coord), len(satz_sec_coord)] f_3d_grid = atleast_2d(raylwvl.ravel()) if HAVE_DASK and isinstance(smin[0], Array): # compute all of these at the same time before passing to the interpolator # otherwise they are computed separately smin, smax, orders, f_3d_grid = da.compute(smin, smax, orders, f_3d_grid) minterp = MultilinearInterpolator(smin, smax, orders) minterp.set_values(f_3d_grid) if HAVE_DASK: ipn = map_blocks(self._do_interp, minterp, sunzsec, azidiff, satzsec, dtype=raylwvl.dtype, chunks=azidiff.chunks) else: ipn = self._do_interp(minterp, sunzsec, azidiff, satzsec) LOG.debug("Time - Interpolation: {0:f}".format(time.time() - tic)) ipn *= 100 res = ipn if redband is not None: res = where(redband < 20., res, (1 - (redband - 20) / 80) * res) res = clip(res, 0, 100) if compute: res = res.compute() return res
def set_data_at(self, ind, data): self.data[np.searchsorted(self.inds, ind)] = data
def onlineAnalysis(config, getImgNormDistribution=False): ################################### ##### get background images ##### ################################### bkgImages = [] reCenterImgs = [] centerRsum = 0. centerCsum = 0. centerSumCount = 0. ########################################### ##### initialize analysis variables ##### ########################################### ### initialize file lists ### loadedFiles = [] loadFiles = [] queryFiles = [] if config.doQueryFolder: queryFiles = query_folder(config.queryFolder, config.fileExtention, loadedFiles) for fld in config.loadFolders: folderName = fld["folder"] + "/*" + config.fileExtention diffractionFiles = glob.glob(folderName) bkgImgFiles = [fld["background"]]*len(diffractionFiles) centerRs = [fld["centerR"]]*len(diffractionFiles) centerCs = [fld["centerC"]]*len(diffractionFiles) loadFiles = loadFiles + zip(diffractionFiles, bkgImgFiles, centerRs, centerCs) while (len(loadFiles) == 0) and (len(queryFiles) == 0): if not config.doQueryFolder: raise RuntimeError("ERROR: There are no files included in the load folders!") else: queryFiles = query_folder(config.queryFolder, config.fileExtention, loadedFiles) while not len(queryFiles): print("There are no diffraction patterns under %s, will keep looking..." % config.queryFolder) time.sleep(10) queryFiles = query_folder(config.queryFolder, config.fileExtention, loadedFiles) ### initialize plots ### if not getImgNormDistribution: plt.ion() Qrange = np.arange(config.NradialBins+1)*config.Qmax/(config.NradialBins) LOmin = np.searchsorted(Qrange, [config.LineOutMinQ])[0] LOmax = np.searchsorted(Qrange, [config.LineOutMaxQ])[0] fig = plt.figure(figsize=config.plotFigSize, dpi=config.dpi) plotGrid = (2,6) axCurDP = plt.subplot2grid(plotGrid, (0,0)) #axCurDP.set_title("Current Diffraction") axCurDP.get_xaxis().set_visible(False) axCurDP.get_yaxis().set_visible(False) axCurDP.set_position([0.95,0.05, 0.05, 0.95]) axSumDP = plt.subplot2grid(plotGrid, (1,0)) #axSumDP.set_title("Aggregate Diffraction") axSumDP.get_xaxis().set_visible(False) axSumDP.get_yaxis().set_visible(False) axSumDP.set_position([0.95,0.05, 0.05, 0.95]) axCurL0 = plt.subplot2grid(plotGrid, (0,1)) axCurL0.set(xlabel=r'Q $[\AA^{-1}]$', ylabel="Legendre 0") axTotCN = plt.subplot2grid(plotGrid, (1,1)) axTotCN.set(xlabel="Time", ylabel="Total Counts") axAllL0 = plt.subplot2grid(plotGrid, (0,2), colspan=2) axAllL0.set(xlabel="Time [ps]", ylabel=r'Q $[\AA^{-1}]$') axLinL0 = plt.subplot2grid(plotGrid, (1,2), colspan=2) axLinL0.set(xlabel="Time [ps]", ylabel="Legendre 0") axAllL2 = plt.subplot2grid(plotGrid, (0,4), colspan=2) axAllL2.set(xlabel="Time [ps]", ylabel=r'Q $[\AA^{-1}]$') axLinL2= plt.subplot2grid(plotGrid, (1,4), colspan=2) axLinL2.set(xlabel="Time [ps]", ylabel="Legendre 2") plotCurLeg, = axCurL0.plot(Qrange[:-1], np.zeros((config.NradialBins)), "k-") plotL0LO, = axLinL0.plot(Qrange[:-1], np.zeros((config.NradialBins)), "k-") plotL2LO, = axLinL2.plot(Qrange[:-1], np.zeros((config.NradialBins)), "k-") ### image variables ### aggregateImage = np.zeros((1024,1024), np.float64) imageSums = [] NsumRejected = 0 ### initialize legendre variables ### legCoeffDict = {} loadImgDict = {} averageLegCoeffArray = np.zeros((config.Nlegendres,1,config.NradialBins)) initializeFiles = True if config.loadSavedResults: legCoeffDict, loadedFiles, averageLegCoeffArray =\ load_results(config.loadSavedResultsFolder, config.loadSavedResultsFileName) delays = np.sort(np.array(legCoeffDict.keys())) initializeFiles = False # initialize loading variables with first new entry while len(loadFiles): fileName,_,_,_ = loadFiles[0] if fileName in loadedFiles: del loadFiles[0] else: info = get_image_info(fileName) delays = np.array([info.stageDelay]) loadImgDict[info.stageDelay] = (0, 0) break while initializeFiles and\ (initializeFiles or (len(loadFiles) is 0) or (len(queryFiles) is 0)): if len(loadFiles): fileName,_,_,_ = loadFiles[0] info = get_image_info(fileName) delays = np.array([info.stageDelay]) loadImgDict[info.stageDelay] = (0, 0) initializeFiles = False elif len(queryFiles): info = get_image_info(queryFiles[0]) delays = np.array([info.stageDelay]) legCoeffDict[info.stageDelay] = (0, 0) initializeFiles = False elif config.doQueryFolder: queryFiles = query_folder(config.queryFolder, config.fileExtention, loadedFiles) else: print("ERROR: Cannot run without loading files or querying folder!!!") sys.exit() for i in np.arange(len(loadFiles)): fileName,_,_,_ = loadFiles[-1] imgSum = np.sum(get_image(fileName, config.hotPixel)) if (imgSum < config.sumMin) or (imgSum > config.sumMax): del loadFiles[-1] else: break ### retrieving atomic diffraction ### if config.normByAtomic: atomicDiffraction = np.fromfile(config.atomicDiffractionFile, dtype=config.atomicDiffractionDataType)*1e20 qGrid = (np.array(config.NradialBins, np.float) + 0.5)\ *config.Qmax/config.NradialBins atomicNorm = 1./(atomicDiffraction*qGrid) ### retrieve gMatrix for legendre fitting ### assert ((config.roi + (1 - config.roi%2))%config.Nrebin == 0),\ "ERROR: Cannot rebin an image with size [{}, {}] by {}, change roi!".format( config.roi+1, config.roi+1, config.Nrebin) imgRebinSize = (config.roi+1)/config.Nrebin gMatrixName = "gMatrix_pixels-" + str((config.roi+1)/config.Nrebin)\ + "Nradii-" + str(config.NradialBins)\ + "Nlegendre-" + str(config.Nlegendres) + ".dat" if not os.path.isfile(config.gMatrixFolder + "/" + gMatrixName): make_legendre_gMatrix(config.NradialBins, config.Nlegendres, imgRebinSize, config.gMatrixFolder + "/" + gMatrixName) gMatrix = np.fromfile(config.gMatrixFolder + "/" + gMatrixName, dtype=np.float) gMatrix = np.reshape(gMatrix, (imgRebinSize**2, config.NradialBins*config.Nlegendres)) # invert g matrix using SVD decomposition gInv = invert_matrix_SVD(gMatrix) ################################################## ##### looping through images and analysing ##### ################################################## loadingImage = False curBkgAddr = "" loadConfig = copy.deepcopy(config) while (len(loadFiles) != 0) or config.doQueryFolder: if len(loadFiles): name, bkgAddr, loadConfig.centerR, loadConfig.centerC = loadFiles.pop(0) while name in loadedFiles: name, bkgAddr, loadConfig.centerR, loadConfig.centerC = loadFiles.pop(0) loadingImage = True centerConfig = loadConfig loadedFiles.append(name) # load background if curBkgAddr is not fld["background"]: curBkgAddr = fld["background"] if curBkgAddr is not None: bkgImg = get_image(fld["background"], config.hotPixel) elif len(queryFiles): name = queryFiles.pop(0) imgAddr = None loadingImage = False centerConfig = config loadedFiles.append(name) # load background if curBkgAddr is not config.queryBkgAddr: curBkgAddr = config.queryBkgAddr if curBkgAddr is not None: bkgImg = get_image(config.queryBkgAddr, config.hotPixel) else: ### save current results ### if config.saveQueryResults: save_results(legCoeffDict, loadedFiles, averageLegCoeffArray, config.saveFolder, config.saveFileName) ### search query folder for new files ### queryFiles = query_folder(config.queryFolder, config.fileExtention, loadedFiles) if len(queryFiles) is 0: print "INFO: Query folder is empty, waiting to check again", while len(queryFiles) == 0: print "...", time.sleep(1) queryFiles = query_folder(config.queryFolder, config.fileExtention, loadedFiles) continue print("Now looking at %s" % name) ### get image information ### info = get_image_info(name) ### get image and remove hot pixels ### imgOrig = get_image(name, config.hotPixel) aggregateImage += imgOrig img = copy.deepcopy(imgOrig) ### check total scattering intensity ### imgSum = np.sum(img) imageSums.append(imgSum) if (imgSum < config.sumMin) or (imgSum > config.sumMax): NsumRejected += 1 print("INFO: Rejected image %s with a total sum of %f!" % (name, imgSum)) print("INFO: Total sum cut has rejected %i images!" % (NsumRejected)) continue if (getImgNormDistribution): if ((len(loadFiles) is 0) and (len(queryFiles) is 0)): return imageSums continue ### subtract background images ### if curBkgAddr is not None: img -= bkgImg #background_subtraction(img, bkgImg) ### center image ### img, centerR, centerC = centering(img, centerConfig) #plt.imshow(img) #plt.show() ### readout noise subtraction ### img = readoutNoise_subtraction(img, True, rLow=config.ROradLow, rHigh=config.ROradHigh) #plt.imshow(img) #plt.show() ### image norm ### imgNorm = 1#get_image_norm(img, config.normRadLow, config.normRadHigh) ##### update loaded images ##### if loadingImage: ind = np.searchsorted(delays, [info.stageDelay])[0] if np.any(np.abs(delays-info.stageDelay) < 0.005): delayInd = delays[ind] loadImg,norm = loadImgDict[delayInd] loadImgDict[delayInd] = (loadImg + img, norm + imgNorm) else: delays = np.insert(delays, ind, info.stageDelay) loadImgDict[info.stageDelay] = (img, imgNorm) """ legendreCoeffs = fit_legendres(img, config.Nrebin, config.Nlegendres, config.NradialBins, gInv=gInv) X,Y = np.meshgrid(np.array([0,1]), Qrange) fitTest = plt.figure() axTest = figTest.add_subplot(111) img = axTest.pcolor(X, Y, np.reshape(legendreCoeffs[0,:],(1,-1)).T, cmap=cm.jet) axLoad.set_ylim([0,config.Qmax]) axLoad.set_xlim([timeDelay[0],timeDelay[-1]]) figLoad.colorbar(img, ax=axLoad) img.set_clim(-1*maxRange, maxRange) figLoad.canvas.draw() figLoad.savefig("legednre" + str(i) + "_loadedFiles.png") """ if len(loadFiles) == 0: averageLegCoeffArray = np.zeros((config.Nlegendres, delays.shape[0], config.NradialBins), np.float) for i,d in enumerate(delays): # fit legendres img,norm = loadImgDict[d] legendreCoeffs = fit_legendres(img, config.Nrebin, config.Nlegendres, config.NradialBins, gInv=gInv) if config.normByAtomic: legendreCoeffs *= atomicNorm # record results legCoeffDict[d] = (legendreCoeffs, norm) averageLegCoeffArray[:,i,:] = legendreCoeffs/norm ### save results ### if config.saveLoadedResults: save_results(legCoeffDict, loadedFiles, averageLegCoeffArray, config.saveFolder, config.saveFileName) ### plot results of loaded files ### timeDelay = (delays - delays[0])*1e-9/(3e8*1e-12) if timeDelay.shape[0] > 1: timeDelay = np.insert(timeDelay, -1, 2*timeDelay[-1]-timeDelay[-2]) else: timeDelay = np.insert(timeDelay, -1, timeDelay[-1]+0.05) timeDelay = timeDelay[1:] X,Y = np.meshgrid(timeDelay, Qrange) for i in [0,2]: figLoad = plt.figure() axLoad = figLoad.add_subplot(111) subTZleg = averageLegCoeffArray[i] - np.mean(averageLegCoeffArray[i,:4,:], axis=0) shp = subTZleg.shape mn = np.mean(subTZleg[:,0.2*shp[1]:0.7*shp[1]], axis=(0,1)) std = np.std(subTZleg[:,0.2*shp[1]:0.7*shp[1]], axis=(0,1)) if mn > 0: maxRange = np.abs(mn - 3*std) else: maxRange = mn + 3*std #maxRange = 0.14 #axLoad.pcolor(X, Y, averageLegCoeffArray[i,:,:].T, cmap=cm.RdBu) img = axLoad.pcolor(X, Y, subTZleg[1:,:].T, cmap=cm.jet) axLoad.set_ylim([0,config.Qmax]) axLoad.set_xlim([timeDelay[0],timeDelay[-1]]) figLoad.colorbar(img, ax=axLoad) img.set_clim(-1*maxRange, maxRange) figLoad.canvas.draw() figLoad.savefig(config.plotDirectory + "/legendre" + str(i) + "_loadedFiles.png") continue ##### fit legendres ##### legendreCoeffs = fit_legendres(img, config.Nrebin, config.Nlegendres, config.NradialBins, gInv=gInv) if config.normByAtomic: legendreCoeffs *= atomicNorm ##### update time domain legendres ##### ind = np.searchsorted(delays, [info.stageDelay])[0] if np.any((delays-info.stageDelay) == 0): delayInd = delays[ind] coeffs,norm = legCoeffDict[delayInd] updatedCoeffs = coeffs + legendreCoeffs legCoeffDict[delayInd] = (updatedCoeffs, norm + imgNorm) averageLegCoeffArray[:,ind,:] = updatedCoeffs[:,:]/(norm + imgNorm) else: delays = np.insert(delays, ind, info.stageDelay) averageLegCoeffArray = np.insert(averageLegCoeffArray, ind, legendreCoeffs[:,:], axis=1) legCoeffDict[info.stageDelay] = (legendreCoeffs, imgNorm) ##### plot time domain legendre fits ##### ### diffraction patterns ### axCurDP.imshow(imgOrig) axSumDP.imshow(aggregateImage) plotCurLeg.set_ydata(legendreCoeffs[0,:]) axCurL0.set_ylim([0.9*min(legendreCoeffs[0,:]), 1.1*max(legendreCoeffs[0,:])]) axTotCN.plot(np.arange(len(imageSums)), imageSums, color="k") ### time dependent plots ### plotInds = np.reshape(np.argwhere(delays > config.plotMinDelay*1e6), (-1)) timeDelay = (delays[plotInds] - delays[0])/(3e8*1e-3) if timeDelay.shape[0] > 1: timeDelay = np.insert(timeDelay, -1, 2*timeDelay[-1]-timeDelay[-2]) else: timeDelay = np.insert(timeDelay, -1, timeDelay[-1]+0.05) X,Y = np.meshgrid(timeDelay, Qrange) #axLegAll.pcolor(Qrange, timeDelay, averageLegCoeffArray[0,:,:], cmap=cm.RdBu) # aggregate legendre 0 plot meanSubL0 = averageLegCoeffArray[0,plotInds,:]\ - np.mean(averageLegCoeffArray[0,plotInds,:], axis=0) axAllL0.pcolor(X, Y, meanSubL0.T, cmap=cm.RdBu) axAllL0.set_ylim([0,config.Qmax]) axAllL0.set_xlim([timeDelay[0],timeDelay[-1]]) lineOut = np.sum(meanSubL0[:,LOmin:LOmax], axis=1) plotL0LO.set_data(timeDelay[:-1], lineOut) axLinL0.set_ylim([min(lineOut),max(lineOut)]) axLinL0.set_xlim([timeDelay[0],timeDelay[-1]]) #plotL0LO.set_xdata(timeDelay[:-1]) #plotL0LO.set_ydata(lineOut) # aggregate legendre 2 plot meanSubL2 = averageLegCoeffArray[2,plotInds,:]\ - np.mean(averageLegCoeffArray[2,plotInds,:], axis=0) axAllL2.pcolor(X, Y, meanSubL2.T, cmap=cm.RdBu) axAllL2.set_ylim([0,config.Qmax]) axAllL2.set_xlim([timeDelay[0],timeDelay[-1]]) lineOut = np.sum(meanSubL2[:,LOmin:LOmax], axis=1) plotL2LO.set_data(timeDelay[:-1], lineOut) axLinL2.set_ylim([min(lineOut),max(lineOut)]) axLinL2.set_xlim([timeDelay[0],timeDelay[-1]]) #plotL2LO.set_xdata(timeDelay[:-1]) #plotL2LO.set_ydata(lineOut) #plt.autoscale(tight=True) plt.tight_layout() fig.canvas.draw() ################################ ##### plot final results ##### ################################ finalFig = plt.figure() ax = finalFig.add_subplot(111) timeDelay = (delays - delays[0])*1e-2/(3e8*1e-12) if timeDelay.shape[0] > 1: timeDelay = np.insert(timeDelay, -1, 2*timeDelay[-1]-timeDelay[-2]) else: timeDelay = np.insert(timeDelay, -1, timeDelay[-1]+0.05) X,Y = np.meshgrid(timeDelay, Qrange) for i in range(config.Nlegendres): ax.pcolor(X, Y, averageLegCoeffArray[i,:,:].T, cmap=cm.RdBu) finalFig.savefig(config.plotDirectory + "/" + config.plotPrefix + "Legendre" + str(i) + ".png")
def write_vid_results_multiprocess(self, detection, gpu_id): """ write results files in pascal devkit path :param all_boxes: boxes to be processed [bbox, confidence] :return: None """ print 'Writing {} ImageNetVID results file'.format('all') filename = self.get_result_file_template(gpu_id).format('all') frame_seg_len = self.frame_seg_len nms = py_nms_wrapper(0.3) data_time = 0 all_boxes = detection[0] frame_ids = detection[1] start_idx = 0 sum_frame_ids = np.cumsum(frame_seg_len) first_true_id = frame_ids[0] start_video = np.searchsorted(sum_frame_ids, first_true_id) for im_ind in range(1, len(frame_ids)): t = time.time() true_id = frame_ids[im_ind] video_index = np.searchsorted(sum_frame_ids, true_id) if (video_index != start_video): # reprensents a new video t1 = time.time() video = [ all_boxes[j][start_idx:im_ind] for j in range(1, self.num_classes) ] dets_all = seq_nms(video) for j in xrange(1, self.num_classes): for frame_ind, dets in enumerate(dets_all[j - 1]): keep = nms(dets) all_boxes[j][frame_ind + start_idx] = dets[keep, :] start_idx = im_ind start_video = video_index t2 = time.time() print 'video_index=', video_index, ' time=', t2 - t1 data_time += time.time() - t if (im_ind % 100 == 0): print '{} seq_nms testing {} data {:.4f}s'.format( frame_ids[im_ind - 1], im_ind, data_time / im_ind) # the last video video = [ all_boxes[j][start_idx:im_ind] for j in range(1, self.num_classes) ] dets_all = seq_nms(video) for j in xrange(1, self.num_classes): for frame_ind, dets in enumerate(dets_all[j - 1]): keep = nms(dets) all_boxes[j][frame_ind + start_idx] = dets[keep, :] with open(filename, 'wt') as f: for im_ind in range(len(frame_ids)): for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue dets = all_boxes[cls_ind][im_ind] if len(dets) == 0: continue # the imagenet expects 0-based indices for k in range(dets.shape[0]): f.write( '{:d} {:d} {:.4f} {:.2f} {:.2f} {:.2f} {:.2f}\n'. format(frame_ids[im_ind], cls_ind, dets[k, -1], dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3])) return
def del_data_at(self, ind): idx = np.searchsorted(self.inds, ind) self.inds = np.delete(self.inds, idx) self.data = np.delete(self.data, idx, axis=0) self._update_centroid()
def find_slice( data: np.ndarray, positions: np.ndarray, s: slice ) -> Tuple[np.ndarray, np.ndarray]: """ Get slice of RLE data. Parameters ---------- data: Data at start of reach run. positions: End positions of runs. Returns ------- data: Data at start of reach run. positions: End positions of runs. """ length = get_len(positions) start = 0 stop = length step = None invert = False if (s.step is not None) and (s.step != 1): if s.step < 0: invert = True step = -s.step else: step = s.step if s.start is not None: if s.start < 0: start = max(start, length + s.start) else: start = max(start, s.start) if s.stop is not None: if s.stop < 0: stop = max(0, min(stop, length + s.stop)) else: stop = min(stop, s.stop) if start == 0: idx_start = 0 elif start >= length: idx_start = len(positions) else: idx_start = np.searchsorted(positions, start, side="right") if stop == 0: idx_stop = 0 elif stop >= length: idx_stop = len(positions) else: idx_stop = np.searchsorted(positions, stop, side="left") + 1 data = data[idx_start:idx_stop] positions = positions[idx_start:idx_stop] - start if len(positions) > 0: positions[-1] = stop - start if invert: lenghts = calc_lengths(positions) lenghts = lenghts[::-1] positions = np.cumsum(lenghts) data = data[::-1] if step: positions = ((positions - 1) // step) + 1 mask = np.empty(len(positions), dtype=bool) if len(positions) > 0: mask[0] = True mask[1:] = positions[1:] != positions[:-1] data = data[mask] positions = positions[mask] data, positions = recompress(data, positions) return (data, positions)
def get_data_at(self, ind): return self.data[np.searchsorted(self.inds, ind)]