def stretch(array: np.ndarray, min: int=0, max: int=1, fill_dtype=None) -> np.array: """'Stretch' the profile to the fit a new min and max value and interpolate in between. From: http://www.labri.fr/perso/nrougier/teaching/numpy.100/ exercise #17 Parameters ---------- array: numpy.ndarray The numpy array to stretch. min : number The new minimum of the values. max : number The new maximum value. fill_dtype : numpy data type If None (default), the array will be stretched to the passed min and max. If a numpy data type (e.g. np.int16), the array will be stretched to fit the full range of values of that data type. If a value is given for this parameter, it overrides ``min`` and ``max``. """ new_max = max new_min = min if fill_dtype is not None: try: di = np.iinfo(fill_dtype) except ValueError: di = np.finfo(fill_dtype) new_max = di.max new_min = di.min # perfectly normalize the array (0..1) stretched_array = (array - array.min())/(array.max() - array.min()) # stretch normalized array to new max/min stretched_array *= new_max stretched_array += new_min return stretched_array.astype(array.dtype)
def norm_image(self, arr: np.ndarray): """ 将一个numpy数组正则化(0~255),并转成np.uint8类型 :param arr: 要处理的numpy数组 :return: 值域在0~255之间的uint8数组 """ if not arr.min() == arr.max(): arr = (arr - arr.min()) / (arr.max() - arr.min()) * 255 return np.array(arr, dtype=np.uint8)
def __call__(self, data: np.ndarray, learning_rate: float =1.0, steps: int =1000, db: bool =True) -> List[float]: """ `Learn` the parameters of best fit for the given data and model """ _min = data.min() _max = data.max() # scale amplitude to [0, 1] self.data = (data - _min) / (_max - _min) self.cubeX, self.cubeY = data.shape self.learning_rate = learning_rate self.steps = steps # perform the fit result = self.simplefit() # unscale amplitude of resultant result[0] = result[0] * (_max - _min) + _min result_as_list = result.tolist() self._counter += 1 return result_as_list
def _create_cmap_scale(values_arr: np.ndarray, vega: dict, kwargs: dict): cmap = kwargs.pop("cmap", DEFAULT_PALETTE) cmap_min = float(kwargs.pop("cmap_min", values_arr.min())) cmap_max = float(kwargs.pop("cmap_max", values_arr.max())) # TODO: Apply cmap_normalize parameter vega["scales"].append( { "name": "color", "type": "sequential", "domain": [cmap_min, cmap_max], "range": {"scheme": cmap}, "zero": False, "nice": False } )
def sndwrite(samples:np.ndarray, sr:int, outfile:str, encoding:str='auto') -> None: """ samples --> Array-like. the actual samples, shape=(nframes, channels) sr --> Sampling-rate outfile --> The name of the outfile. the extension will determine the file-format. The formats supported depend on the available backends Without additional backends, only uncompressed formats are supported (wav, aif) encoding --> one of: - 'auto' or None: the encoding is determined from the format given by the extension of outfile, and from the data - 'pcm16' - 'pcm24' - 'pcm32' - 'flt32' NB: not all file formats support all encodings. Throws a SndfileError if the format does not support the given encoding If set to 'auto', an encoding will be selected based on the file-format and on the data. The bitdepth of the data is measured, and if the file-format supports it, it will be used. For bitdepths of 8, 16 and 24 bits, a PCM encoding will be used. For a bitdepth of 32 bits, a FLOAT encoding will be used, or the next lower supported encoding """ if encoding in ('auto', None): encoding = _guessEncoding(samples, outfile) # normalize in the case where there would be clipping clipping = ((samples > 1).any() or (samples < -1).any()) if encoding.startswith('pcm') and clipping: maxvalue = max(samples.max(), abs(samples.min())) samples = samples / maxvalue backend = _getWriteBackend(outfile, encoding) if not backend: raise SndfileError("No backend found to support the given format") logger.debug(f"sndwrite: using backend {backend.name}") return backend.write(samples, sr, outfile, encoding)
def interpolate(self, lon: np.ndarray, lat: np.ndarray, dates: np.ndarray) -> np.ndarray: """Interpolate the SSH to the required coordinates.""" ds = self._select_ds( dates.min(), # type: ignore dates.max()) # type: ignore assert np.all(np.diff(ds.ocean_time.values) == self._dt) assert np.all(np.diff(ds.lon_rho.values, axis=0) < 1e-10) assert np.all(np.diff(ds.lat_rho.values, axis=1) < 1e-10) t_axis = pyinterp.TemporalAxis(ds.ocean_time.values) grid3d = pyinterp.Grid3D( pyinterp.Axis(ds.lon_rho.values[0, :], is_circle=True), pyinterp.Axis(ds.lat_rho.values[:, 0]), t_axis, ds[self.ssh].values.T) ssh = pyinterp.trivariate(grid3d, lon.ravel(), lat.ravel(), t_axis.safe_cast(dates.ravel()), num_threads=1).reshape(lon.shape) return ssh
def _get_y_c(cls, x_k: np.ndarray, y_k: np.ndarray, x_s: np.ndarray, y_s: np.ndarray, x_in_edge: np.ndarray, y_in_edge: np.ndarray, x_out_edge: np.ndarray, y_out_edge: np.ndarray): """Возвращает координату центра сечения по оси y""" # разделение массивов координат входной кромки на две части по принадлежности к корыту и спинке x_s_in_edge, x_k_in_edge = np.split( x_in_edge, [list(x_in_edge).index(x_in_edge.min())]) y_s_in_edge, y_k_in_edge = np.split( y_in_edge, [list(x_in_edge).index(x_in_edge.min())]) # разделение массивов координат выходной кромки на две части по принадлежности к корыту и спинке x_k_out_edge, x_s_out_edge = np.split( x_out_edge, [list(x_out_edge).index(x_out_edge.max())]) y_k_out_edge, y_s_out_edge = np.split( y_out_edge, [list(x_out_edge).index(x_out_edge.max())]) # объединение массивов координат корыта и спинки x_k = np.array(list(x_k_in_edge) + list(x_k) + list(x_k_out_edge)) y_k = np.array(list(y_k_in_edge) + list(y_k) + list(y_k_out_edge)) x_s = np.array(list(x_s_in_edge) + list(x_s) + list(x_s_out_edge)) y_s = np.array(list(y_s_in_edge) + list(y_s) + list(y_s_out_edge)) # интерполяция y_k_int = interp1d(x_k, y_k) y_s_int = interp1d(x_s, y_s) # площадь square = quad(y_s_int, x_s.min(), x_s.max())[0] - quad( y_k_int, x_k.min(), x_k.max())[0] # статический момент относительно оси x s_x = 0.5 * (quad(lambda x: y_s_int(x)**2, x_s.min(), x_s.max())[0] - quad(lambda x: y_k_int(x)**2, x_k.min(), x_k.max())[0]) # координата y центра сечения y_c = s_x / square return y_c, square
def setImage(self, x: np.ndarray, y: np.ndarray, z: np.ndarray) -> None: """Set data to be plotted as image. Clears the plot before creating a new image item that gets places in the plot and linked to the colorscale. :param x: x coordinates (as 2D meshgrid) :param y: y coordinates (as 2D meshgrid) :param z: data values (as 2D meshgrid) :return: None """ self.clearPlot() self.img = pg.ImageItem() self.plot.addItem(self.img) self.img.setImage(z) self.img.setRect( QtCore.QRectF(x.min(), y.min(), x.max() - x.min(), y.max() - y.min())) self.colorbar.setImageItem(self.img) self.colorbar.rounding = (z.max() - z.min()) * 1e-2 self.colorbar.setLevels((z.min(), z.max()))
def print_ary_props(ary: np.ndarray) -> None: print('shape: ', ary.shape) print('data type: ', ary.dtype) print('minimum value: ', ary.min().asscalar()) print('maximum value: ', ary.max().asscalar())
def to_mask(clipped: np.ndarray) -> np.ndarray: clipped -= clipped.min() clipped /= clipped.max() return clipped
def _check_slate_ope_inputs( slate_id: np.ndarray, reward: np.ndarray, position: np.ndarray, pscore: np.ndarray, evaluation_policy_pscore: np.ndarray, pscore_type: str, ) -> Optional[ValueError]: """Check inputs of Slate OPE estimators. Parameters slate_id: array-like, shape (<= n_rounds * len_list,) Slate id observed in each round of the logged bandit feedback. reward: array-like, shape (<= n_rounds * len_list,) Reward observed in each round and slot of the logged bandit feedback, i.e., :math:`r_{t}(k)`. position: array-like, shape (<= n_rounds * len_list,) Positions of each round and slot in the given logged bandit feedback. pscore: array-like, shape (<= n_rounds * len_list,) Action choice probabilities by a behavior policy (propensity scores). evaluation_policy_pscore: array-like, shape (<= n_rounds * len_list,) Action choice probabilities by the evaluation policy (propensity scores). pscore_type: str Either "pscore", "pscore_item_position", or "pscore_cascade". """ # position if not isinstance(position, np.ndarray): raise ValueError("position must be ndarray") if position.ndim != 1: raise ValueError("position must be 1-dimensional") if not (position.dtype == int and position.min() >= 0): raise ValueError("position elements must be non-negative integers") # reward if not isinstance(reward, np.ndarray): raise ValueError("reward must be ndarray") if reward.ndim != 1: raise ValueError("reward must be 1-dimensional") # pscore if not isinstance(pscore, np.ndarray): raise ValueError(f"{pscore_type} must be ndarray") if pscore.ndim != 1: raise ValueError(f"{pscore_type} must be 1-dimensional") if np.any(pscore <= 0) or np.any(pscore > 1): raise ValueError(f"{pscore_type} must be in the range of (0, 1]") # evaluation_policy_pscore if not isinstance(evaluation_policy_pscore, np.ndarray): raise ValueError(f"evaluation_policy_{pscore_type} must be ndarray") if evaluation_policy_pscore.ndim != 1: raise ValueError( f"evaluation_policy_{pscore_type} must be 1-dimensional") if np.any(evaluation_policy_pscore < 0) or np.any( evaluation_policy_pscore > 1): raise ValueError( f"evaluation_policy_{pscore_type} must be in the range of [0, 1]") # slate id if not isinstance(slate_id, np.ndarray): raise ValueError("slate_id must be ndarray") if slate_id.ndim != 1: raise ValueError("slate_id must be 1-dimensional") if not (slate_id.dtype == int and slate_id.min() >= 0): raise ValueError("slate_id elements must be non-negative integers") if not (slate_id.shape[0] == position.shape[0] == reward.shape[0] == pscore.shape[0] == evaluation_policy_pscore.shape[0]): raise ValueError( f"slate_id, position, reward, {pscore_type}, and evaluation_policy_{pscore_type} must be the same size." )
def get_y_hetero_mesh(y: np.ndarray, num): size = y.max() - y.min() mesh = get_mesh(np.array(np.linspace(0, size, num)), y, np.array([0, size / (num - 1)])) return mesh
def _update_lims(self, X: numpy.ndarray): """Update the x axis boundaries to match the values of the plotted distribution.""" self.xlim = (X.min(), X.max())
def Workflow_cardio_npm1_100x( struct_img: np.ndarray, rescale_ratio: float = -1, output_type: str = "default", output_path: Union[str, Path] = None, fn: Union[str, Path] = None, output_func=None, ): """ classic segmentation workflow wrapper for structure Cardio NPM1 100x Parameter: ----------- struct_img: np.ndarray the 3D image to be segmented rescale_ratio: float an optional parameter to allow rescale the image before running the segmentation functions, default is no rescaling output_type: str select how to handle output. Currently, four types are supported: 1. default: the result will be saved at output_path whose filename is original name without extention + "_struct_segmentaiton.tiff" 2. array: the segmentation result will be simply returned as a numpy array 3. array_with_contour: segmentation result will be returned together with the contour of the segmentation 4. customize: pass in an extra output_func to do a special save. All the intermediate results, names of these results, the output_path, and the original filename (without extension) will be passed in to output_func. """ ########################################################################## # PARAMETERS: # note that these parameters are supposed to be fixed for the structure # and work well accross different datasets intensity_norm_param = [0.5, 2.5] gaussian_smoothing_sigma = 1 gaussian_smoothing_truncate_range = 3.0 dot_2d_sigma = 2 # dot_2d_sigma_extra = 1 # dot_2d_cutoff = 0.025 minArea = 1 low_level_min_size = 1000 ########################################################################## out_img_list = [] out_name_list = [] ################### # PRE_PROCESSING ################### # intenisty normalization (min/max) struct_img = intensity_normalization(struct_img, scaling_param=intensity_norm_param) out_img_list.append(struct_img.copy()) out_name_list.append("im_norm") # rescale if needed if rescale_ratio > 0: struct_img = zoom(struct_img, (1, rescale_ratio, rescale_ratio), order=2) struct_img = (struct_img - struct_img.min() + 1e-8) / (struct_img.max() - struct_img.min() + 1e-8) gaussian_smoothing_truncate_range = ( gaussian_smoothing_truncate_range * rescale_ratio) # smoothing with gaussian filter structure_img_smooth = image_smoothing_gaussian_3d( struct_img, sigma=gaussian_smoothing_sigma, truncate_range=gaussian_smoothing_truncate_range, ) out_img_list.append(structure_img_smooth.copy()) out_name_list.append("im_smooth") ################### # core algorithm ################### # step 1: low level thresholding # global_otsu = threshold_otsu(structure_img_smooth) global_tri = threshold_triangle(structure_img_smooth) global_median = np.percentile(structure_img_smooth, 50) th_low_level = (global_tri + global_median) / 2 # print(global_median) # print(global_tri) # print(th_low_level) # imsave('img_smooth.tiff', structure_img_smooth) bw_low_level = structure_img_smooth > th_low_level bw_low_level = remove_small_objects(bw_low_level, min_size=low_level_min_size, connectivity=1, in_place=True) bw_low_level = dilation(bw_low_level, selem=ball(2)) # step 2: high level thresholding local_cutoff = 0.333 * threshold_otsu(structure_img_smooth) bw_high_level = np.zeros_like(bw_low_level) lab_low, num_obj = label(bw_low_level, return_num=True, connectivity=1) for idx in range(num_obj): single_obj = lab_low == (idx + 1) local_otsu = threshold_otsu(structure_img_smooth[single_obj]) if local_otsu > local_cutoff: bw_high_level[np.logical_and( structure_img_smooth > 1.2 * local_otsu, single_obj)] = 1 # imsave('seg_coarse.tiff', bw_high_level.astype(np.uint8)) out_img_list.append(bw_high_level.copy()) out_name_list.append("bw_coarse") response_bright = dot_slice_by_slice(structure_img_smooth, log_sigma=dot_2d_sigma) bw_extra = response_bright > 0.03 # dot_2d_cutoff bw_extra[~bw_low_level] = 0 bw_final = np.logical_or(bw_extra, bw_high_level) # bw_final[holes]=0 ################### # POST-PROCESSING ################### seg = remove_small_objects(bw_final, min_size=minArea, connectivity=1, in_place=True) # output seg = seg > 0 seg = seg.astype(np.uint8) seg[seg > 0] = 255 out_img_list.append(seg.copy()) out_name_list.append("bw_final") if output_type == "default": # the default final output, simply save it to the output path save_segmentation(seg, False, Path(output_path), fn) elif output_type == "customize": # the hook for passing in a customized output function # use "out_img_list" and "out_name_list" in your hook to # customize your output functions output_func(out_img_list, out_name_list, Path(output_path), fn) elif output_type == "array": return seg elif output_type == "array_with_contour": return (seg, generate_segmentation_contour(seg)) else: raise NotImplementedError("invalid output type: {output_type}")
def min_max_norm(y: np.ndarray) -> np.ndarray: return (y - y.min()) / (y.max() - y.min())
def get_switch_correction( ant_s11: np.ndarray, internal_switch: io.SwitchingState, f_in: np.ndarray = np.zeros([0, 1]), resistance_m: float = 50.166, n_terms: [int, Tuple] = 7, model_type: str = "polynomial", ) -> Tuple[np.ndarray, dict]: """ Compute the switch correction. Parameters ---------- ant_s11 : array_like Array of S11 measurements as a function of frequency internal_switch : :class:`io.SwitchingState` instance An internal switching state object. f_in : np.ndarray The input frequencies resistance_m : float The resistance of the switch. n_terms : int or tuple Specifies the order of the fitted polynomial for the S11 measurements. If a tuple, must be length 3, specifying the order for the s11, s12, s22 respectively. model_type : str The type of model to fit to the S11. Returns ------- corr_ant_s11 : np.ndarray The corrected antenna S11. fits : dict Dictionary of fits to the reflection coefficients s11, s12 and s22. """ corrections, sw = _read_data_and_corrections(internal_switch) flow = f_in.min() fhigh = f_in.max() f_center = (fhigh + flow) / 2 # Computation of S-parameters to the receiver input oa, sa, la = rc.agilent_85033E(internal_switch.freq * 1e6, resistance_m, 1) xx, s11, s12s21, s22 = rc.de_embed( oa, sa, la, corrections["open"], corrections["short"], corrections["match"], corrections["open"], ) # Frequency normalization fn = internal_switch.freq / f_center fn_in = f_in / f_center if len(f_in) > 10 else fn n_terms = (n_terms,) * 3 if not hasattr(n_terms, "__len__") else n_terms assert len(n_terms) == 3 # Polynomial fits fits = {} model = Model._models[model_type.lower()](n_terms=n_terms[0], default_x=fn) for ikind, (kind, val, n) in enumerate( zip(["s11", "s12s21", "s22"], [s11, s12s21, s22], n_terms) ): model.update_nterms(n) fits[kind] = ModelFit(model, ydata=np.real(val)).evaluate( fn_in ) + 1j * ModelFit(model, ydata=np.imag(val)).evaluate(fn_in) # Corrected antenna S11 return rc.gamma_de_embed(fits["s11"], fits["s12s21"], fits["s22"], ant_s11), fits
def p2z_hydrostatic(p: numpy.ndarray, T: numpy.ndarray, h2o, p0: (numpy.number, numbers.Number, numpy.ndarray), z0: (numpy.number, numbers.Number, numpy.ndarray), lat: (numpy.number, numbers.Number, numpy.ndarray) = 45, z_acc: (numpy.number, numbers.Number, numpy.ndarray) = -1, ellps="WGS84", extend=False): """Calculate hydrostatic elevation Translated from https://www.sat.ltu.se/trac/rt/browser/atmlab/trunk/geophysics/pt2z.m WARNING: seems to get siginificant errors. Testing with an ACE profile between 8.5 and 150 km, I get errors from 10 up to +100 metre between 10 and 50 km, increasing to +300 metre at 100 km, after which the bias changes sign, crosses 0 at 113 km and finally reaches -4000 metre at 150 km. This is not due to humidity. Atmlabs pt2z version differs only 30 metre from mine. In %, this error is below 0.3% up to 100 km, then changes sign and reaching -3% at 150 km. For many purposes this is good enough, though, and certainly better than p2z_oversimplified. :param array p: Pressure [Pa] :param array T: Temperature [K]. Must match the size of p. :param h2o: Water vapour [vmr]. If negligible, set to 0. Must be either scalar, or match the size of p and T. :param p0: :param z0: :param lat: Latitude [degrees]. This has some effect on the vertical distribution of gravitational acceleration, leading to difference of some 500 metre at 150 km. Defaults to 45°. :param z_acc: Up to what precision to iteratively calculate the z-profile. If -1, run two iterations, which should be accurate, according to the comment below. :param str ellps: Ellipsoid to use. The function relies on pyproj.Geod, which is an interface to the proj library. For a full table of ellipsoids, run 'proj -le'. :param bool extend: If p0, z0 outside of p, z range, extend artificially. WARNING: This will assume CONSTANT T, h2o! :returns array z: Array of altitudes [m]. Same size as p and T. """ # Original description: # % PT2Z Hydrostatic altitudes # % # % Calculates altitudes fulfilling hydrostatic equilibrium, based on # % vertical profiles of pressure, temperature and water vapour. Pressure # % and altitude of a reference point must be specified. # % # % Molecular weights and gravitational constants are hard coded and # % function is only valid for the Earth. # % # % As the gravitation changes with altitude, an iterative process is # % needed. The accuracy can be controlled by *z_acc*. The calculations # % are repeated until the max change of the altitudes is below *z_acc*. If # % z_acc<0, the calculations are run twice, which should give an accuracy # % better than 1 m. # % # % FORMAT z = pt2z( p, t, h2o, p0, z0 [,lat,z_acc,refell] ) # % # % OUT z Altitudes [m]. # % IN p Column vector of pressures [Pa]. # % t Column vector of temperatures [K]. # % h2o Water vapour [VMR]. Vector or a scalar, e.g. 0. # % p0 Pressure of reference point [Pa]. # % z0 Altitude of reference point [m]. # % lat Latitude. Default is 45. # % z_acc Accuracy for z. Default is -1. # % ellipsoid Reference ellipsoid data, see *ellipsoidmodels*. # % Default is data matching WGS84. # # % 2005-05-11 Created by Patrick Eriksson. #32 function z = pt2z(p,t,h2o,p0,z0,varargin) #33 % #34 [lat,z_acc,ellipsoid] = optargs( varargin, { 45, -1, NaN } ); #35 % ellipsoid = pyproj.Geod(ellps=ellps) #36 if isnan(ellipsoid) #37 ellipsoid = ellipsoidmodels('wgs84'); #38 end #39 %&% #40 rqre_nargin( 5, nargin ); %&% #41 rqre_datatype( p, @istensor1 ); %&% #42 rqre_datatype( t, @istensor1 ); %&% #43 rqre_datatype( h2o, @istensor1 ); %&% #44 rqre_datatype( p0, @istensor0 ); %&% #45 rqre_datatype( z0, @istensor0 ); %&% #46 rqre_datatype( lat, @istensor0 ); %&% if not p.size == T.size: raise ValueError("p and T must have same length") if p.min() < 0: raise ValueError("Found negative pressures") if T.min() < 0: raise ValueError("Found negative temperatures") #47 np = length( p ); #48 if length(t) ~= np %&% #49 error('The length of *p* and *t* must be identical.'); %&% #50 end %&% if not (isinstance(h2o, numbers.Real) or h2o.size in (p.size, 1)): raise ValueError("h2o must have length of p or be scalar") #51 if ~( length(h2o) == np | length(h2o) == 1 ) %&% #52 error('The length of *h2o* must be 1 or match *p*.'); %&% #53 end %&% # FIXME IS THIS NEEDED? Yes — See e-mail Patrick 2014-08-11 if p0 > p[0] or p0 < p[-1]: if extend: if p0 > p[0]: # p[0] is largest pressure, p0 even larger extend = "below" p = numpy.hstack([p0, p]) T = numpy.hstack([T[0], T]) h2o = numpy.hstack([h2o[0], h2o]) elif p0 < p[-1]: extend = "above" # p[-1] is smallest pressure, p0 even smaller p = numpy.hstack([p, p0]) T = numpy.hstack([T, T[-1]]) h2o = numpy.hstack([h2o, h2o[-1]]) else: raise ValueError( ("reference pressure ({:.2f}) must be " "in total pressure range ({:.2f} -- {:.2f})").format( p0, p[0], p[-1])) # END FIXME #54 if p0 > p(1) | p0 < p(np) %&% #55 error('Reference point (p0) can not be outside range of *p*.'); %&% #56 end %&% #57 #58 #59 %= Expand *h2o* if necessary #60 % #61 if length(h2o) == 1 #62 h2o = repmat( h2o, np, 1 ); #63 end if isinstance(h2o, numbers.Real) or h2o.size == 1: h2o = h2o * numpy.ones_like(p) if h2o.max() > 1: raise ValueError( "Found h2o vmr values up to {:.2f}. Expected < 1.".format( h2o.max())) ##64 #65 #66 %= Make rough estimate of *z* #67 % #68 z = p2z_simple( p ); z = p2z_oversimplified(p) #69 z = shift2refpoint( p, z, p0, z0 ); z = _shift2refpoint(p, z, p0, z0) #70 #71 #72 %= Set Earth radius and g at z=0 #73 % #74 re = ellipsoidradii( ellipsoid, lat ); # APPROXIMATION! Approximate radius at latitude by linear # interpolation in cos(lat) between semi-major-axis and # semi-minor-axis # # Get radius at latitude re = (ellipsoid.a * numpy.cos(numpy.deg2rad(lat)) + ellipsoid.b * (1 - numpy.cos(numpy.deg2rad(lat)))) #75 g0 = lat2g0( lat ); g0 = lat2g0(lat) #76 #77 #78 %= Gas constant and molecular weight of dry air and water vapour #79 % #80 r = constants( 'GAS_CONST' ); #81 md = 28.966; #82 mw = 18.016; #83 % #84 k = 1-mw/md; % 1 - eps k = 1 - M_w / M_d #85 rd = 1e3 * r / md; % Gas constant for 1 kg dry air rd = 1e3 * R / M_d # gas constant for 1 kg dry air #86 #87 #88 %= How to end iterations #89 % #90 if z_acc < 0 #91 niter = 2; #92 else #93 niter = 99; #94 end niter = 2 if z_acc < 0 else 99 #95 #96 for iter = 1:niter for i in range(niter): #97 #98 zold = z; #99 zold = z #100 g = z2g( re, g0, z ); g = z2g(re, g0, z) #101 #102 for i = 1 : (np-1) for i in range(p.size - 1): #103 #104 gp = ( g(i) + g(i+1) ) / 2; gp = (g[i] + g[i + 1]) / 2 #105 #106 %-- Calculate average water VMR (= average e/p) #107 hm = (h2o(i)+h2o(i+1)) / 2; hm = (h2o[i] + h2o[i + 1]) / 2 #108 #109 %-- The virtual temperature (no liquid water) #110 tv = (t(i)+t(i+1)) / ( 2 * (1-hm*k) ); % E.g. 3.16 in Wallace&Hobbs #111 tv = (T[i] + T[i + 1]) / (2 * (1 - hm * k)) #112 %-- The change in vertical altitude from i to i+1 #113 dz = rd * (tv/gp) * log( p(i)/p(i+1) ); dz = rd * (tv / gp) * numpy.log(p[i] / p[i + 1]) #114 z(i+1) = z(i) + dz; z[i + 1] = z[i] + dz #115 #116 end #117 #118 %-- Match the altitude of the reference point #119 z = shift2refpoint( p, z, p0, z0 ); z = _shift2refpoint(p, z, p0, z0) #120 #121 if z_acc >= 0 & max(abs(z-zold)) < z_acc #122 break; #123 end if z_acc >= 0 and max(abs(z - zold)) < z_acc: break #124 #125 end #126 #127 return # correct for extending if extend == "below": # lowest pressure extra return z[1:] elif extend == "above": # highest pressure extra return z[:-1] else: return z
def p2z_hydrostatic(p:numpy.ndarray, T:numpy.ndarray, h2o, p0:(numpy.number, numbers.Number, numpy.ndarray), z0:(numpy.number, numbers.Number, numpy.ndarray), lat:(numpy.number, numbers.Number, numpy.ndarray)=45, z_acc:(numpy.number, numbers.Number, numpy.ndarray)=-1, ellps="WGS84", extend=False): """Calculate hydrostatic elevation Translated from https://www.sat.ltu.se/trac/rt/browser/atmlab/trunk/geophysics/pt2z.m WARNING: seems to get siginificant errors. Testing with an ACE profile between 8.5 and 150 km, I get errors from 10 up to +100 metre between 10 and 50 km, increasing to +300 metre at 100 km, after which the bias changes sign, crosses 0 at 113 km and finally reaches -4000 metre at 150 km. This is not due to humidity. Atmlabs pt2z version differs only 30 metre from mine. In %, this error is below 0.3% up to 100 km, then changes sign and reaching -3% at 150 km. For many purposes this is good enough, though, and certainly better than p2z_oversimplified. :param array p: Pressure [Pa] :param array T: Temperature [K]. Must match the size of p. :param h2o: Water vapour [vmr]. If negligible, set to 0. Must be either scalar, or match the size of p and T. :param p0: :param z0: :param lat: Latitude [degrees]. This has some effect on the vertical distribution of gravitational acceleration, leading to difference of some 500 metre at 150 km. Defaults to 45°. :param z_acc: Up to what precision to iteratively calculate the z-profile. If -1, run two iterations, which should be accurate, according to the comment below. :param str ellps: Ellipsoid to use. The function relies on pyproj.Geod, which is an interface to the proj library. For a full table of ellipsoids, run 'proj -le'. :param bool extend: If p0, z0 outside of p, z range, extend artificially. WARNING: This will assume CONSTANT T, h2o! :returns array z: Array of altitudes [m]. Same size as p and T. """ # Original description: # % PT2Z Hydrostatic altitudes # % # % Calculates altitudes fulfilling hydrostatic equilibrium, based on # % vertical profiles of pressure, temperature and water vapour. Pressure # % and altitude of a reference point must be specified. # % # % Molecular weights and gravitational constants are hard coded and # % function is only valid for the Earth. # % # % As the gravitation changes with altitude, an iterative process is # % needed. The accuracy can be controlled by *z_acc*. The calculations # % are repeated until the max change of the altitudes is below *z_acc*. If # % z_acc<0, the calculations are run twice, which should give an accuracy # % better than 1 m. # % # % FORMAT z = pt2z( p, t, h2o, p0, z0 [,lat,z_acc,refell] ) # % # % OUT z Altitudes [m]. # % IN p Column vector of pressures [Pa]. # % t Column vector of temperatures [K]. # % h2o Water vapour [VMR]. Vector or a scalar, e.g. 0. # % p0 Pressure of reference point [Pa]. # % z0 Altitude of reference point [m]. # % lat Latitude. Default is 45. # % z_acc Accuracy for z. Default is -1. # % ellipsoid Reference ellipsoid data, see *ellipsoidmodels*. # % Default is data matching WGS84. # # % 2005-05-11 Created by Patrick Eriksson. #32 function z = pt2z(p,t,h2o,p0,z0,varargin) #33 % #34 [lat,z_acc,ellipsoid] = optargs( varargin, { 45, -1, NaN } ); #35 % ellipsoid = pyproj.Geod(ellps=ellps) #36 if isnan(ellipsoid) #37 ellipsoid = ellipsoidmodels('wgs84'); #38 end #39 %&% #40 rqre_nargin( 5, nargin ); %&% #41 rqre_datatype( p, @istensor1 ); %&% #42 rqre_datatype( t, @istensor1 ); %&% #43 rqre_datatype( h2o, @istensor1 ); %&% #44 rqre_datatype( p0, @istensor0 ); %&% #45 rqre_datatype( z0, @istensor0 ); %&% #46 rqre_datatype( lat, @istensor0 ); %&% if not p.size == T.size: raise ValueError("p and T must have same length") if p.min() < 0: raise ValueError("Found negative pressures") if T.min() < 0: raise ValueError("Found negative temperatures") #47 np = length( p ); #48 if length(t) ~= np %&% #49 error('The length of *p* and *t* must be identical.'); %&% #50 end %&% if not (isinstance(h2o, numbers.Real) or h2o.size in (p.size, 1)): raise ValueError("h2o must have length of p or be scalar") #51 if ~( length(h2o) == np | length(h2o) == 1 ) %&% #52 error('The length of *h2o* must be 1 or match *p*.'); %&% #53 end %&% # FIXME IS THIS NEEDED? Yes — See e-mail Patrick 2014-08-11 if p0 > p[0] or p0 < p[-1]: if extend: if p0 > p[0]: # p[0] is largest pressure, p0 even larger extend = "below" p = numpy.hstack([p0, p]) T = numpy.hstack([T[0], T]) h2o = numpy.hstack([h2o[0], h2o]) elif p0 < p[-1]: extend = "above" # p[-1] is smallest pressure, p0 even smaller p = numpy.hstack([p, p0]) T = numpy.hstack([T, T[-1]]) h2o = numpy.hstack([h2o, h2o[-1]]) else: raise ValueError(("reference pressure ({:.2f}) must be " "in total pressure range ({:.2f} -- {:.2f})").format( p0, p[0], p[-1])) # END FIXME #54 if p0 > p(1) | p0 < p(np) %&% #55 error('Reference point (p0) can not be outside range of *p*.'); %&% #56 end %&% #57 #58 #59 %= Expand *h2o* if necessary #60 % #61 if length(h2o) == 1 #62 h2o = repmat( h2o, np, 1 ); #63 end if isinstance(h2o, numbers.Real) or h2o.size == 1: h2o = h2o * numpy.ones_like(p) if h2o.max() > 1: raise ValueError("Found h2o vmr values up to {:.2f}. Expected < 1.".format(h2o.max())) ##64 #65 #66 %= Make rough estimate of *z* #67 % #68 z = p2z_simple( p ); z = p2z_oversimplified(p) #69 z = shift2refpoint( p, z, p0, z0 ); z = _shift2refpoint(p, z, p0, z0) #70 #71 #72 %= Set Earth radius and g at z=0 #73 % #74 re = ellipsoidradii( ellipsoid, lat ); # APPROXIMATION! Approximate radius at latitude by linear # interpolation in cos(lat) between semi-major-axis and # semi-minor-axis # # Get radius at latitude re = (ellipsoid.a * numpy.cos(numpy.deg2rad(lat)) + ellipsoid.b * (1-numpy.cos(numpy.deg2rad(lat)))) #75 g0 = lat2g0( lat ); g0 = lat2g0(lat) #76 #77 #78 %= Gas constant and molecular weight of dry air and water vapour #79 % #80 r = constants( 'GAS_CONST' ); #81 md = 28.966; #82 mw = 18.016; #83 % #84 k = 1-mw/md; % 1 - eps k = 1 - M_w/M_d #85 rd = 1e3 * r / md; % Gas constant for 1 kg dry air rd = 1e3 * R / M_d # gas constant for 1 kg dry air #86 #87 #88 %= How to end iterations #89 % #90 if z_acc < 0 #91 niter = 2; #92 else #93 niter = 99; #94 end niter = 2 if z_acc < 0 else 99 #95 #96 for iter = 1:niter for i in range(niter): #97 #98 zold = z; #99 zold = z #100 g = z2g( re, g0, z ); g = z2g(re, g0, z) #101 #102 for i = 1 : (np-1) for i in range(p.size-1): #103 #104 gp = ( g(i) + g(i+1) ) / 2; gp = (g[i] + g[i+1]) / 2 #105 #106 %-- Calculate average water VMR (= average e/p) #107 hm = (h2o(i)+h2o(i+1)) / 2; hm = (h2o[i] + h2o[i+1]) / 2 #108 #109 %-- The virtual temperature (no liquid water) #110 tv = (t(i)+t(i+1)) / ( 2 * (1-hm*k) ); % E.g. 3.16 in Wallace&Hobbs #111 tv = (T[i] + T[i+1]) / (2 * (1 - hm*k)) #112 %-- The change in vertical altitude from i to i+1 #113 dz = rd * (tv/gp) * log( p(i)/p(i+1) ); dz = rd * (tv/gp) * numpy.log(p[i]/p[i+1]) #114 z(i+1) = z(i) + dz; z[i+1] = z[i] + dz #115 #116 end #117 #118 %-- Match the altitude of the reference point #119 z = shift2refpoint( p, z, p0, z0 ); z = _shift2refpoint(p, z, p0, z0) #120 #121 if z_acc >= 0 & max(abs(z-zold)) < z_acc #122 break; #123 end if z_acc >= 0 and max(abs(z-zold)) < z_acc: break #124 #125 end #126 #127 return # correct for extending if extend == "below": # lowest pressure extra return z[1:] elif extend == "above": # highest pressure extra return z[:-1] else: return z
def get_kde_img(vals: numpy.ndarray) -> str: fig, ax = pyplot.subplots(figsize=(6, 4), tight_layout=True) seaborn.kdeplot(vals, ax=ax, clip=(vals.min(), vals.max())) return get_img(fig)
def peak_detect(values: np.ndarray, threshold: Union[float, int]=None, min_distance: Union[float, int]=10, max_number: int=None, search_region: Tuple[float, float]=(0.0, 1.0), find_min_instead: bool=False) -> Tuple[np.ndarray, np.ndarray]: """Find the peaks or valleys of a 1D signal. Uses the difference (np.diff) in signal to find peaks. Current limitations include: 1) Only for use in 1-D data; 2D may be possible with the gradient function. 2) Will not detect peaks at the very edge of array (i.e. 0 or -1 index) Parameters ---------- values : array-like Signal values to search for peaks within. threshold : int, float The value the peak must be above to be considered a peak. This removes "peaks" that are in a low-value region. If passed an int, the actual value is the threshold. E.g. when passed 15, any peak less with a value <15 is removed. If passed a float, it will threshold as a percent. Must be between 0 and 1. E.g. when passed 0.4, any peak <40% of the maximum value will be removed. min_distance : int, float If passed an int, parameter is the number of elements apart a peak must be from neighboring peaks. If passed a float, must be between 0 and 1 and represents the ratio of the profile to exclude. E.g. if passed 0.05 with a 1000-element profile, the minimum peak width will be 0.05*1000 = 50 elements. max_number : int Specify up to how many peaks will be returned. E.g. if 3 is passed in and 5 peaks are found, only the 3 largest peaks will be returned. find_min_instead : bool If False (default), peaks will be returned. If True, valleys will be returned. Returns ------- max_vals : numpy.array The values of the peaks found. max_idxs : numpy.array The x-indices (locations) of the peaks. Raises ------ ValueError If float not between 0 and 1 passed to threshold. """ peak_vals = [] # a list to hold the y-values of the peaks. Will be converted to a numpy array peak_idxs = [] # ditto for x-values (index) of y data. if find_min_instead: values = -values """Limit search to search region""" left_end = search_region[0] if is_float_like(left_end): left_index = int(left_end*len(values)) elif is_int_like(left_end): left_index = left_end else: raise ValueError(f"{left_end} must be a float or int") right_end = search_region[1] if is_float_like(right_end): right_index = int(right_end * len(values)) elif is_int_like(right_end): right_index = right_end else: raise ValueError(f"{right_end} must be a float or int") # minimum peak spacing calc if isinstance(min_distance, float): if 0 > min_distance >= 1: raise ValueError("When min_peak_width is passed a float, value must be between 0 and 1") else: min_distance = int(min_distance * len(values)) values = values[left_index:right_index] """Determine threshold value""" if isinstance(threshold, float) and threshold < 1: data_range = values.max() - values.min() threshold = threshold * data_range + values.min() elif isinstance(threshold, float) and threshold >= 1: raise ValueError("When threshold is passed a float, value must be less than 1") elif threshold is None: threshold = values.min() """Take difference""" values_diff = np.diff(values.astype(float)) # y and y_diff must be converted to signed type. """Find all potential peaks""" for idx in range(len(values_diff) - 1): # For each item of the diff array, check if: # 1) The y-value is above the threshold. # 2) The value of y_diff is positive (negative for valley search), it means the y-value changed upward. # 3) The next y_diff value is zero or negative (or positive for valley search); a positive-then-negative diff value means the value # is a peak of some kind. If the diff is zero it could be a flat peak, which still counts. # 1) if values[idx + 1] < threshold: continue y1_gradient = values_diff[idx] > 0 y2_gradient = values_diff[idx + 1] <= 0 # 2) & 3) if y1_gradient and y2_gradient: # If the next value isn't zero it's a single-pixel peak. Easy enough. if values_diff[idx + 1] != 0: peak_vals.append(values[idx + 1]) peak_idxs.append(idx + 1 + left_index) # elif idx >= len(y_diff) - 1: # pass # Else if the diff value is zero, it could be a flat peak, or it could keep going up; we don't know yet. else: # Continue on until we find the next nonzero diff value. try: shift = 0 while values_diff[(idx + 1) + shift] == 0: shift += 1 if (idx + 1 + shift) >= (len(values_diff) - 1): break # If the next diff is negative (or positive for min), we've found a peak. Also put the peak at the center of the flat # region. is_a_peak = values_diff[(idx + 1) + shift] < 0 if is_a_peak: peak_vals.append(values[int((idx + 1) + np.round(shift / 2))]) peak_idxs.append((idx + 1 + left_index) + np.round(shift / 2)) except IndexError: pass # convert to numpy arrays peak_vals = np.array(peak_vals) peak_idxs = np.array(peak_idxs) """Enforce the min_peak_distance by removing smaller peaks.""" # For each peak, determine if the next peak is within the min peak width range. index = 0 while index < len(peak_idxs) - 1: # If the second peak is closer than min_peak_distance to the first peak, find the larger peak and remove the other one. if peak_idxs[index] > peak_idxs[index + 1] - min_distance: if peak_vals[index] > peak_vals[index + 1]: idx2del = index + 1 else: idx2del = index peak_vals = np.delete(peak_vals, idx2del) peak_idxs = np.delete(peak_idxs, idx2del) else: index += 1 """If Maximum Number passed, return only up to number given based on a sort of peak values.""" if max_number is not None and len(peak_idxs) > max_number: sorted_peak_vals = peak_vals.argsort() # sorts low to high peak_vals = peak_vals[sorted_peak_vals[-max_number:]] peak_idxs = peak_idxs[sorted_peak_vals[-max_number:]] # If we were looking for minimums, convert the values back to the original sign if find_min_instead: peak_vals = -peak_vals return peak_vals, peak_idxs
def _check_slate_ope_inputs( slate_id: np.ndarray, reward: np.ndarray, position: np.ndarray, pscore: np.ndarray, evaluation_policy_pscore: np.ndarray, pscore_type: str, ) -> Optional[ValueError]: """Check inputs of Slate OPE estimators. Parameters ----------- slate_id: array-like, shape (<= n_rounds * len_list,) Slate id observed for each data in logged bandit data. reward: array-like, shape (<= n_rounds * len_list,) Slot-level rewards, i.e., :math:`r_{i}(l)`. position: array-like, shape (<= n_rounds * len_list,) Indices to differentiate positions in a recommendation interface where the actions are presented. pscore: array-like, shape (<= n_rounds * len_list,) Action choice probabilities of the logging/behavior policy (propensity scores). evaluation_policy_pscore: array-like, shape (<= n_rounds * len_list,) Action choice probabilities of the evaluation policy. pscore_type: str Either "pscore", "pscore_item_position", or "pscore_cascade". """ # position check_array(array=position, name="position", expected_dim=1) if not (position.dtype == int and position.min() >= 0): raise ValueError("`position` elements must be non-negative integers") # reward check_array(array=reward, name="reward", expected_dim=1) # pscore check_array(array=pscore, name=f"{pscore_type}", expected_dim=1) if np.any(pscore <= 0) or np.any(pscore > 1): raise ValueError(f"`{pscore_type}` must be in the range of (0, 1]") # evaluation_policy_pscore check_array( array=evaluation_policy_pscore, name=f"evaluation_policy_{pscore_type}", expected_dim=1, ) if np.any(evaluation_policy_pscore < 0) or np.any( evaluation_policy_pscore > 1): raise ValueError( f"`evaluation_policy_{pscore_type}` must be in the range of [0, 1]" ) # slate id check_array(array=slate_id, name="slate_id", expected_dim=1) if not (slate_id.dtype == int and slate_id.min() >= 0): raise ValueError("slate_id elements must be non-negative integers") if not (slate_id.shape[0] == position.shape[0] == reward.shape[0] == pscore.shape[0] == evaluation_policy_pscore.shape[0]): raise ValueError( f"`slate_id`, `position`, `reward`, `{pscore_type}`, and `evaluation_policy_{pscore_type}` " "must have the same number of samples.")
def normalize(values: np.ndarray) -> np.ndarray: return (values - values.min()) / (values.max() - values.min())
def check_cascade_dr_inputs( n_unique_action: int, slate_id: np.ndarray, action: np.ndarray, reward: np.ndarray, position: np.ndarray, pscore_cascade: np.ndarray, evaluation_policy_pscore_cascade: np.ndarray, q_hat: np.ndarray, evaluation_policy_action_dist: np.ndarray, ) -> Optional[ValueError]: """Check inputs of SlateCascadeDoublyRobust. Parameters ----------- n_unique_action: int Number of unique actions. slate_id: array-like, shape (<= n_rounds * len_list,) Indices to differentiate slates (i.e., ranking or list of actions) action: array-like, (<= n_rounds * len_list,) Actions observed at each slot in a ranking/slate in logged bandit data, i.e., :math:`a_{i}(l)`, which is chosen by the behavior policy :math:`\\pi_b`. reward: array-like, shape (<= n_rounds * len_list,) Slot-level rewards observed for each data in logged bandit data, i.e., :math:`r_{i}(l)`. position: array-like, shape (<= n_rounds * len_list,) Indices to differentiate slots/positions in a slate/ranking. pscore_cascade: array-like, shape (<= n_rounds * len_list,) Probabilities of behavior policy selecting action :math:`a` at position (slot) `k` conditional on the previous actions (presented at position `1` to `k-1`) , i.e., :math:`\\pi_b(a_i(l) | x_i, a_i(1), \\ldots, a_i(l-1))`. evaluation_policy_pscore_cascade: array-like, shape (<= n_rounds * len_list,) Probabilities of evaluation policy selecting action :math:`a` at position (slot) `k` conditional on the previous actions (presented at position `1` to `k-1`) , i.e., :math:`\\pi_e(a_i(l) | x_i, a_i(1), \\ldots, a_i(l-1))`. q_hat: array-like (<= n_rounds * len_list * n_unique_actions, ) :math:`\\hat{Q}_l` used in Cascade-DR. , i.e., :math:`\\hat{Q}_{i,l}(x_i, a_i(1), \\ldots, a_i(l-1), a_i(l)) \\forall a_i(l) \\in \\mathcal{A}`. evaluation_policy_action_dist: array-like (<= n_rounds * len_list * n_unique_actions, ) Action choice probabilities of the evaluation policy for all possible actions , i.e., :math:`\\pi_e(a_i(l) | x_i, a_i(1), \\ldots, a_i(l-1)) \\forall a_i(l) \\in \\mathcal{A}`. """ check_rips_inputs( slate_id=slate_id, reward=reward, position=position, pscore_cascade=pscore_cascade, evaluation_policy_pscore_cascade=evaluation_policy_pscore_cascade, ) check_array(array=action, name="action", expected_dim=1) check_array( array=q_hat, name="q_hat", expected_dim=1, ) check_array( array=evaluation_policy_action_dist, name="evaluation_policy_action_dist", expected_dim=1, ) if not (np.issubdtype(action.dtype, np.integer) and action.min() >= 0 and action.max() < n_unique_action): raise ValueError( "`action` elements must be integers in the range of [0, n_unique_action)" ) if not (slate_id.shape[0] == action.shape[0] == q_hat.shape[0] // n_unique_action == evaluation_policy_action_dist.shape[0] // n_unique_action): raise ValueError( "Expected `slate_id.shape[0] == action.shape[0] == " "q_hat.shape[0] // n_unique_action == evaluation_policy_action_dist.shape[0] // n_unique_action`, " "but found it False") evaluation_policy_action_dist_ = evaluation_policy_action_dist.reshape( (-1, n_unique_action)) if not np.allclose( np.ones(evaluation_policy_action_dist_.shape[0]), evaluation_policy_action_dist_.sum(axis=1), ): raise ValueError( "`evaluation_policy_action_dist[i * n_unique_action : (i+1) * n_unique_action]` " "must sum up to one for all i.")
def _raw_eig_dist( eigs: ndarray, bins: int = 50, kde: bool = True, title: str = "Raw Eigenvalue Distribution", mode: PlotMode = "block", outfile: Path = None, fig: Figure = None, axes: Axes = None, ) -> PlotResult: """Plot a histogram of the raw eigenvalues. Parameters ---------- eigs: ndarray The eigenvalues to plot. bins: int the number of (equal-sized) bins to display and use for the histogram kde: boolean If False (default), do not display a kernel density estimate. If true, use [statsmodels.nonparametric.kde.KDEUnivariate](https://www.statsmodels.org/stable/generated/statsmodels.nonparametric.kde.KDEUnivariate.html#statsmodels.nonparametric.kde.KDEUnivariate) with arguments {kernel="gau", bw="scott", cut=0} to compute and display the kde title: string The plot title string mode: "block" (default) | "noblock" | "save" | "return" If "block", call plot.plot() and display plot in a blocking fashion. If "noblock", attempt to generate plot in nonblocking fashion. If "save", save plot to pathlib Path specified in `outfile` argument If "return", return (fig, axes), the matplotlib figure and axes object for modification. outfile: Path If mode="save", save generated plot to Path specified in `outfile` argument. Intermediate directories will be created if needed. fig: Figure If provided with `axes`, configure plotting with the provided `fig` object instead of creating a new figure. Useful for creating subplots. axes: Axes If provided with `fig`, plot to the provided `axes` object. Useful for creating subplots. Returns ------- (fig, axes): (Figure, Axes) The handles to the matplotlib objects, only if `mode` is "return". """ _configure_sbn_style() fig, axes = _setup_plotting(fig, axes) sbn.distplot( eigs, norm_hist=True, bins=bins, # doane kde=False, label="Raw Eigenvalue Distribution", axlabel="Eigenvalue", color="black", ax=axes, ) if kde: grid = np.linspace(eigs.min(), eigs.max(), 10000) _kde_plot(eigs, grid, axes) axes.set(title=title, ylabel="Density") axes.legend().set_visible(True) return _handle_plot_mode(mode, fig, axes, outfile)
def scale_with_max(image: np.ndarray, max_val: float) -> np.ndarray: min_ = image.min() max_ = image.max() K = max_val / (max_ - min_) return K * (image - min_)
def normalize_gain(samples: np.ndarray) -> np.ndarray: min_ = samples.min() max_ = samples.max() return (samples - min_) / (max_ - min_)
def correlation( xs: np.ndarray, ys: np.ndarray, ax: Optional[axes.Axes] = None, xlabel: Optional[str] = None, ylabel: Optional[str] = None, labelsize: int = 14, label_length_limit: int = 50, pointsize: int = 50, linewidth: int = 2, font_src: Optional[str] = None, ) -> None: if font_src is not None: prop = fm.FontProperties(fname=font_src, size=labelsize) else: prop = fm.FontProperties(size=labelsize) maxy = ys.max() miny = ys.min() ry = maxy - miny ryratio = 0.2 mask = ~np.isnan(xs.reset_index(drop=True)) & ~np.isnan( ys.reset_index(drop=True)) tau, p = stats.kendalltau( xs[mask].astype(np.float), ys[mask].astype(np.float), ) s, i, _, _, _ = stats.linregress(xs[mask], ys[mask]) xl = np.linspace(xs.min(), xs.max()) if ax is None: ax = plt.gca() ax.scatter(xs.values, ys.values, s=pointsize, color="k") ax.plot(xl, xl * s + i, color="k", linewidth=linewidth) if maxy != miny and maxy > miny: ax.set_ylim( top=maxy + (ry * ryratio), bottom=min(miny, (xs.min() * s) + i) - (ry * (ryratio / 2.0)), ) ylim_min, ylim_max = ax.get_ylim() ylim_mean = (ylim_max + ylim_min) / 2.0 ylim_range = ylim_max - ylim_min minx, maxx = ax.get_xlim() rx = maxx - minx rxratio = 0.05 tx = maxx - (rx * rxratio) ha = "right" if (ys[xs < ((xs.max() + xs.min()) / 2.0)].max() < ys[xs > ((xs.max() + xs.min()) / 2.0)].max()): tx = minx + (rx * rxratio) ha = "left" ax.text( tx, max(maxy + (ry * (ryratio / 2.0)), ylim_mean + (ylim_range * 0.4)), s=f"P: {p:.3f}\nTAU: {tau:.3f}", color="k", ha=ha, va="bottom", fontproperties=prop, ) if xlabel is not None: ax.set_xlabel(label_simplify(xlabel, label_length_limit), fontproperties=prop) if ylabel is not None: ax.set_ylabel(label_simplify(ylabel, label_length_limit), fontproperties=prop)
def check_bandit_feedback_inputs( context: np.ndarray, action: np.ndarray, reward: np.ndarray, expected_reward: Optional[np.ndarray] = None, position: Optional[np.ndarray] = None, pscore: Optional[np.ndarray] = None, action_context: Optional[np.ndarray] = None, ) -> Optional[ValueError]: """Check inputs for bandit learning or simulation. Parameters ----------- context: array-like, shape (n_rounds, dim_context) Context vectors in each round, i.e., :math:`x_t`. action: array-like, shape (n_rounds,) Action sampled by a behavior policy in each round of the logged bandit feedback, i.e., :math:`a_t`. reward: array-like, shape (n_rounds,) Observed rewards (or outcome) in each round, i.e., :math:`r_t`. expected_reward: array-like, shape (n_rounds, n_actions), default=None Expected rewards (or outcome) in each round, i.e., :math:`\\mathbb{E}[r_t]`. position: array-like, shape (n_rounds,), default=None Positions of each round in the given logged bandit feedback. pscore: array-like, shape (n_rounds,), default=None Propensity scores, the probability of selecting each action by behavior policy, in the given logged bandit feedback. action_context: array-like, shape (n_actions, dim_action_context) Context vectors characterizing each action. """ if not isinstance(context, np.ndarray): raise ValueError("context must be ndarray") if context.ndim != 2: raise ValueError("context must be 2-dimensional") if not isinstance(action, np.ndarray): raise ValueError("action must be ndarray") if action.ndim != 1: raise ValueError("action must be 1-dimensional") if not isinstance(reward, np.ndarray): raise ValueError("reward must be ndarray") if reward.ndim != 1: raise ValueError("reward must be 1-dimensional") if not (action.dtype == int and action.min() >= 0): raise ValueError("action elements must be non-negative integers") if expected_reward is not None: if not isinstance(expected_reward, np.ndarray): raise ValueError("expected_reward must be ndarray") if expected_reward.ndim != 2: raise ValueError("expected_reward must be 2-dimensional") if not (context.shape[0] == action.shape[0] == reward.shape[0] == expected_reward.shape[0]): raise ValueError( "context, action, reward, and expected_reward must be the same size." ) if action.max() >= expected_reward.shape[1]: raise ValueError( "action elements must be smaller than the size of the second dimension of expected_reward" ) if pscore is not None: if not isinstance(pscore, np.ndarray): raise ValueError("pscore must be ndarray") if pscore.ndim != 1: raise ValueError("pscore must be 1-dimensional") if not (context.shape[0] == action.shape[0] == reward.shape[0] == pscore.shape[0]): raise ValueError( "context, action, reward, and pscore must be the same size.") if np.any(pscore <= 0): raise ValueError("pscore must be positive") if position is not None: if not isinstance(position, np.ndarray): raise ValueError("position must be ndarray") if position.ndim != 1: raise ValueError("position must be 1-dimensional") if not (context.shape[0] == action.shape[0] == reward.shape[0] == position.shape[0]): raise ValueError( "context, action, reward, and position must be the same size.") if not (position.dtype == int and position.min() >= 0): raise ValueError("position elements must be non-negative integers") else: if not (context.shape[0] == action.shape[0] == reward.shape[0]): raise ValueError( "context, action, and reward must be the same size.") if action_context is not None: if not isinstance(action_context, np.ndarray): raise ValueError("action_context must be ndarray") if action_context.ndim != 2: raise ValueError("action_context must be 2-dimensional") if action.max() >= action_context.shape[0]: raise ValueError( "action elements must be smaller than the size of the first dimension of action_context" )
def get_node_connections_3d(conns: np.ndarray, nextnode: np.ndarray, num_planes: int = 8) -> dict: """For a given node, calculates the nodes it is directly connected to via triangles. Output is a dictionary with the reference node as the key and a list of directly connected vertices as the value. Input: ====== conns: Array of vertices in a triangle. shape = (num_triangles, 3), nextnode: Array that lists the vertex at the next plane. Output: ======= conn_dict: Dictionary. Key: vertex index. Value: List of vertex indices that a node connects to. Here is some mockup-code that is automated by this routine. # Total number of planes n_pl = 4 # Vertices per plane n_per_pl = 8 Create an array of vertices a reference vertex is connected to. The last two refer to entries from nextnode. i_0 = np.array([1, 2, 3, 2 + n_per_pl, 1 - n_per_pl]) # Plane 1 i_1 = np.array([1, 2, 3, 2 + n_per_pl, 1 - n_per_pl]) + 1 * n_per_pl # Plane 2 i_2 = np.array([1, 2, 3, 2 + n_per_pl, 1 - n_per_pl]) + 2 * n_per_pl # Plane 3 i_3 = np.array([1, 2, 3, 2 + n_per_pl, 1 - n_per_pl]) + 3 * n_per_pl print(f"Plane 0: {i_0}") print(f"Plane 1: {i_1}") print(f"Plane 2: {i_2}") print(f"Plane 3: {i_3}") Plane 0: [ 1 2 3 10 -7] Plane 1: [ 9 10 11 18 1] Plane 2: [17 18 19 26 9] Plane 3: [25 26 27 34 17] We see that the array, describing connections in plane 0, refers to -7. A node in plane 3 (due to periodicity). The array translated into plane 3, refers to node 34. We can map these indices onto the range of vertices, [0:31], by using mod: print(f"Plane 0, with mod: {np.mod(i_0, n_per_pl * n_pl)}") print(f"Plane 3, with mod: {np.mod(i_3, n_per_pl * n_pl)}") Plane 0, with mod: [ 1 2 3 10 25] Plane 3, with mod: [25 26 27 2 17] """ conn_dict = {} # This + 1 is important! vtx_per_plane = conns.max() + 1 assert (conns.min() == 0) # Iterate over all vertices. for vertex in range(vtx_per_plane): # 1) Find all triangles which include the current vertex # Since each vertex occurs either 0 or 1 time in a triangle, each row # returned from np.argwhere is unique. idx_tri = np.argwhere(conns == vertex)[:, 0] # Now we have the triangles in which vertex occurs, identified as rows in conns. # Get the unique vertices for these triangles connected_vertices = np.unique(conns[idx_tri, :]) # Now we need to find the cross-plane connections. # Find the first cross-plane connection via a direct look-up from nextnode connected_next_plane = nextnode[vertex] + vtx_per_plane # Find the previous one via a inverse look-up try: connected_prev_plane = np.argwhere( nextnode == vertex).item() - vtx_per_plane except: # This may actually fail. In this case we just assume a self-connection connected_prev_plane = vertex - vtx_per_plane conn_vtx_list = np.concatenate( (connected_vertices, np.array([connected_next_plane, connected_prev_plane]))) # Now we insert conn_vtx_list into conn_dict as the value for the current vertex. # Do this for all planes and apply modulo for the first and last plane for idx_pl in range(num_planes): # We are inserting a vertex shifted to the plane at the current iteration vtx_shift = vertex + idx_pl * vtx_per_plane if ((idx_pl == 0) or (idx_pl == num_planes - 1)): conn_dict.update({ vtx_shift: np.mod(conn_vtx_list + idx_pl * vtx_per_plane, num_planes * vtx_per_plane) }) else: conn_dict.update( {vtx_shift: conn_vtx_list + idx_pl * vtx_per_plane}) return conn_dict
def plot_confusion_matrix( cm: np.ndarray, class_names, figsize=(16, 16), fontsize=12, normalize=False, title="Confusion matrix", cmap=None, fname=None, noshow=False, backend="Agg", ): """Render the confusion matrix and return matplotlib's figure with it. Normalization can be applied by setting `normalize=True`. """ import matplotlib matplotlib.use(backend) import matplotlib.pyplot as plt accuracy = np.trace(cm) / float(np.sum(cm)) misclass = 1 - accuracy if cmap is None: cmap = plt.cm.Oranges if normalize: with warnings.catch_warnings(): warnings.simplefilter("ignore") cm = cm.astype(np.float32) / cm.sum(axis=1)[:, np.newaxis] f = plt.figure(figsize=figsize) plt.imshow(cm, interpolation="nearest", cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(class_names)) plt.xticks(tick_marks, class_names, rotation=45, ha="right") plt.yticks(tick_marks, class_names) fmt = ".3f" if normalize else "d" thresh = (cm.max() + cm.min()) / 2.0 for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): if np.isfinite(cm[i, j]): plt.text( j, i, format(cm[i, j], fmt), horizontalalignment="center", fontsize=fontsize, color="white" if cm[i, j] > thresh else "black", ) plt.ylabel("True label") plt.xlabel("Predicted label\nAccuracy={:0.4f}; Misclass={:0.4f}".format( accuracy, misclass)) plt.tight_layout() if fname is not None: plt.savefig(fname=fname, dpi=200) if not noshow: plt.show() return f
def plot( self, x: np.ndarray, y: np.ndarray, scatter_sample_ratio: float = 0.1, feature_names: List[str] = None, fig_height: float = 5.0, fig_width: float = 10.0, ) -> None: """Plots one-dimensional ridge functions and scatter plots.""" if feature_names is None: feature_names = [f"x_{{{i}}}" for i in range(x.shape[1])] elif len(feature_names) != x.shape[1]: raise ValueError( f"Length of `feature_names` {len(feature_names)} differs from width of `x` {x.shape[1]}." ) def _coef_parser(idx, beta, name): tokens = [f"{np.abs(beta):.3f}", "\\cdot", name] if beta >= 0 and idx != 0: tokens = ["+"] + tokens else: tokens = ["-"] + tokens return " ".join(tokens) fig, axs = plt.subplots(self.n_stages, 1, figsize=(fig_width, fig_height * self.n_stages)) n_samples = x.shape[0] y_min, y_max = y.min(), y.max() y_span = y_max - y_min for stage in range(self.n_stages): xb = x @ self.projections[stage].beta xb_min = xb.min() xb_max = xb.max() xb_span = xb_max - xb_min # plot the ridge function xb_grid = np.linspace( start=xb_min - 0.05 * xb_span, stop=xb_max + 0.05 * xb_span, num=1000, ) yhat_grid = self.ridge_functions[stage].predict( xb_grid.reshape(-1, 1)) axs[stage].plot( xb_grid, yhat_grid, linewidth=2, color="r", label= f"ridge function {type(self.ridge_functions[stage]).__name__}", ) # plot a subset of the data sample_indices = np.random.choice(n_samples, size=int(n_samples * scatter_sample_ratio), replace=False) axs[stage].plot( xb.ravel()[sample_indices], y.ravel()[sample_indices], marker=".", markersize=2, linestyle="", color="k", alpha=0.5, label="projected data", ) # labeling projection_equation = " ".join([ _coef_parser(idx=i, beta=b, name=feature_names[i]) for i, b in enumerate(self.projections[stage].beta.ravel()) if b != 0 ]) axs[stage].set_title( f"Stage {stage}: $\\langle x, \\beta\\rangle = {projection_equation}$" ) axs[stage].legend() axs[stage].set_ylim(y_min - 0.5 * y_span, y_max + 0.5 * y_span) fig.tight_layout()
def scale(x: np.ndarray): mean_x = x.mean() max_x = x.max() min_x = x.min() return (x - min_x) / (max_x - min_x)
def _legacy_filter_spectrum( catalog: Catalog, frequency: np.ndarray, intensity: np.ndarray, vlsr: float = 5.8, delta_v: float = 0.3, block_interlopers: bool = False, interloper_threshold: float = 6.0, sim_cutoff: float = 0.1, line_wash_threshold: float = 3.5, ): restfreqs = catalog.frequency int_sim = 10 ** catalog.logint max_int_sim = int_sim.max() logger.info("Thresholding catalog entries based on overlap and intensity.") logger.info(f"Intensity cutoff: {sim_cutoff * max_int_sim}") # get indices of catalogs that actually fall in the range of the data cat_mask = np.where( (restfreqs < frequency.max()) & (restfreqs > frequency.min()) & (int_sim > sim_cutoff * max_int_sim) )[0] restfreqs = restfreqs[cat_mask] logger.info(f"Min/Max catalog frequencies: {restfreqs.min():.4f},{restfreqs.max():.4f}") int_sim[cat_mask] catalog_indices = list() relevant_freqs = np.zeros_like(frequency) relevant_intensity = np.zeros_like(intensity) relevant_yerrs = np.zeros_like(intensity) ignore_counter = 0 for catalog_index, restfreq in zip(cat_mask, restfreqs): velocity = (restfreq - frequency) / restfreq * 300000 mask = np.where((velocity < (delta_v + vlsr)) & (velocity > (-delta_v + vlsr))) if mask[0].size != 0: noise_mean, noise_std = compute.calc_noise_std( intensity[mask], line_wash_threshold ) if np.isnan(noise_mean) or np.isnan(noise_std): logger.info(f"NaNs found at {restfreq}") continue if ( block_interlopers and intensity[mask].max() > interloper_threshold * noise_std ): logger.info(f"Found interloper at {restfreq}; ignoring.") ignore_counter += 1 continue else: catalog_indices.append(catalog_index) relevant_freqs[mask] = frequency[mask] relevant_intensity[mask] = intensity[mask] relevant_yerrs[mask] = np.sqrt( noise_std ** 2.0 + (intensity[mask] * 0.1) ** 2.0 ) logger.info( f"Ignored a total of {ignore_counter} catalog entries due to interlopers." ) mask = relevant_freqs > 0 relevant_freqs = relevant_freqs[mask] relevant_intensity = relevant_intensity[mask] relevant_yerrs = relevant_yerrs[mask] mask = np.zeros_like(catalog.frequency) mask[catalog_indices] = 1 chunk = DataChunk( frequency=relevant_freqs, intensity=relevant_intensity, noise=relevant_yerrs, catalog_index=catalog_indices, mask=mask.astype(bool), ) return chunk
def check_bandit_feedback_inputs( context: np.ndarray, action: np.ndarray, reward: np.ndarray, expected_reward: Optional[np.ndarray] = None, position: Optional[np.ndarray] = None, pscore: Optional[np.ndarray] = None, action_context: Optional[np.ndarray] = None, ) -> Optional[ValueError]: """Check inputs for bandit learning or simulation. Parameters ----------- context: array-like, shape (n_rounds, dim_context) Context vectors observed for each data, i.e., :math:`x_i`. action: array-like, shape (n_rounds,) Actions sampled by the logging/behavior policy for each data in logged bandit data, i.e., :math:`a_i`. reward: array-like, shape (n_rounds,) Rewards observed for each data in logged bandit data, i.e., :math:`r_i`. expected_reward: array-like, shape (n_rounds, n_actions), default=None Expected reward of each data, i.e., :math:`\\mathbb{E}[r_i|x_i,a_i]`. position: array-like, shape (n_rounds,), default=None Indices to differentiate positions in a recommendation interface where the actions are presented. pscore: array-like, shape (n_rounds,) Action choice probabilities of the logging/behavior policy (propensity scores), i.e., :math:`\\pi_b(a_i|x_i)`. action_context: array-like, shape (n_actions, dim_action_context) Context vectors characterizing each action. """ check_array(array=context, name="context", expected_dim=2) check_array(array=action, name="action", expected_dim=1) check_array(array=reward, name="reward", expected_dim=1) if expected_reward is not None: check_array(array=expected_reward, name="expected_reward", expected_dim=2) if not (context.shape[0] == action.shape[0] == reward.shape[0] == expected_reward.shape[0]): raise ValueError( "Expected `context.shape[0] == action.shape[0] == reward.shape[0] == expected_reward.shape[0]`" ", but found it False") if not (np.issubdtype(action.dtype, np.integer) and action.min() >= 0 and action.max() < expected_reward.shape[1]): raise ValueError( "`action` elements must be integers in the range of [0, `expected_reward.shape[1]`)" ) else: if not (np.issubdtype(action.dtype, np.integer) and action.min() >= 0): raise ValueError("`action` elements must be non-negative integers") if pscore is not None: check_array(array=pscore, name="pscore", expected_dim=1) if not (context.shape[0] == action.shape[0] == reward.shape[0] == pscore.shape[0]): raise ValueError( "Expected `context.shape[0] == action.shape[0] == reward.shape[0] == pscore.shape[0]`" ", but found it False") if np.any(pscore <= 0): raise ValueError("`pscore` must be positive") if position is not None: check_array(array=position, name="position", expected_dim=1) if not (context.shape[0] == action.shape[0] == reward.shape[0] == position.shape[0]): raise ValueError( "Expected `context.shape[0] == action.shape[0] == reward.shape[0] == position.shape[0]`" ", but found it False") if not (np.issubdtype(position.dtype, np.integer) and position.min() >= 0): raise ValueError( "`position` elements must be non-negative integers") else: if not (context.shape[0] == action.shape[0] == reward.shape[0]): raise ValueError( "Expected `context.shape[0] == action.shape[0] == reward.shape[0]`" ", but found it False") if action_context is not None: check_array(array=action_context, name="action_context", expected_dim=2) if not (np.issubdtype(action.dtype, np.integer) and action.min() >= 0 and action.max() < action_context.shape[0]): raise ValueError( "`action` elements must be integers in the range of [0, `action_context.shape[0]`)" ) else: if not (np.issubdtype(action.dtype, np.integer) and action.min() >= 0): raise ValueError("`action` elements must be non-negative integers")
def apply(self, data: np.ndarray) -> np.ndarray: ifac = 1 - .9 * self.fac return data.clip(data.min() * ifac, data.max() * ifac) * (.5 / ifac)
def scale(x: np.ndarray) -> np.ndarray: max_x = x.max() min_x = x.min() return (x - min_x) / (max_x - min_x)
def Workflow_cardio_myl7( struct_img: np.ndarray, rescale_ratio: float = -1, output_type: str = "default", output_path: Union[str, Path] = None, fn: Union[str, Path] = None, output_func=None, ): """ classic segmentation workflow wrapper for structure Cardio MYL7 Parameter: ----------- struct_img: np.ndarray the 3D image to be segmented rescale_ratio: float an optional parameter to allow rescale the image before running the segmentation functions, default is no rescaling output_type: str select how to handle output. Currently, four types are supported: 1. default: the result will be saved at output_path whose filename is original name without extention + "_struct_segmentaiton.tiff" 2. array: the segmentation result will be simply returned as a numpy array 3. array_with_contour: segmentation result will be returned together with the contour of the segmentation 4. customize: pass in an extra output_func to do a special save. All the intermediate results, names of these results, the output_path, and the original filename (without extension) will be passed in to output_func. """ ########################################################################## # PARAMETERS: # note that these parameters are supposed to be fixed for the structure # and work well accross different datasets intensity_norm_param = [8, 15.5] vesselness_sigma = [1] vesselness_cutoff = 0.01 minArea = 15 ########################################################################## out_img_list = [] out_name_list = [] ################### # PRE_PROCESSING ################### # intenisty normalization (min/max) struct_img = intensity_normalization(struct_img, scaling_param=intensity_norm_param) out_img_list.append(struct_img.copy()) out_name_list.append("im_norm") # rescale if needed if rescale_ratio > 0: struct_img = zoom(struct_img, (1, rescale_ratio, rescale_ratio), order=2) struct_img = (struct_img - struct_img.min() + 1e-8) / (struct_img.max() - struct_img.min() + 1e-8) # smoothing with gaussian filter structure_img_smooth = edge_preserving_smoothing_3d(struct_img) out_img_list.append(structure_img_smooth.copy()) out_name_list.append("im_smooth") ################### # core algorithm ################### # vesselness 3d response = vesselness3D(structure_img_smooth, sigmas=vesselness_sigma, tau=1, whiteonblack=True) bw = response > vesselness_cutoff ################### # POST-PROCESSING ################### seg = remove_small_objects(bw > 0, min_size=minArea, connectivity=1, in_place=False) # output seg = seg > 0 seg = seg.astype(np.uint8) seg[seg > 0] = 255 out_img_list.append(seg.copy()) out_name_list.append("bw_final") if output_type == "default": # the default final output, simply save it to the output path save_segmentation(seg, False, Path(output_path), fn) elif output_type == "customize": # the hook for passing in a customized output function # use "out_img_list" and "out_name_list" in your hook to # customize your output functions output_func(out_img_list, out_name_list, Path(output_path), fn) elif output_type == "array": return seg elif output_type == "array_with_contour": return (seg, generate_segmentation_contour(seg)) else: raise NotImplementedError("invalid output type: {output_type}")
def cover_space(samples: np.ndarray, tolerance=0.03) -> bool: return (np.allclose(samples.min(axis=0), -1, atol=tolerance) and np.allclose(samples.max(axis=0), 1, atol=tolerance))
def apply_image(self, img: np.ndarray): return (img - img.min()) / (img.max() - img.min()) * self.range[1] + self.range[0]
def _compute_num_spots_per_threshold( self, img: np.ndarray) -> Tuple[np.ndarray, List[int]]: """Computes the number of detected spots for each threshold Parameters ---------- img : np.ndarray The image in which to count spots Returns ------- np.ndarray : thresholds List[int] : spot counts """ # thresholds to search over thresholds = np.linspace(img.min(), img.max(), num=100) # number of spots detected at each threshold spot_counts = [] # where we stop our threshold search stop_threshold = None if self.verbose and StarfishConfig().verbose: threshold_iter = tqdm(thresholds) print('Determining optimal threshold ...') else: threshold_iter = thresholds for stop_index, threshold in enumerate(threshold_iter): spots = peak_local_max(img, min_distance=self.min_distance, threshold_abs=threshold, exclude_border=False, indices=True, num_peaks=np.inf, footprint=None, labels=None) # stop spot finding when the number of detected spots falls below min_num_spots_detected if len(spots) <= self.min_num_spots_detected: stop_threshold = threshold if self.verbose: print( f'Stopping early at threshold={threshold}. Number of spots fell below: ' f'{self.min_num_spots_detected}') break else: spot_counts.append(len(spots)) if stop_threshold is None: stop_threshold = thresholds.max() if len(thresholds > 1): thresholds = thresholds[:stop_index] spot_counts = spot_counts[:stop_index] return thresholds, spot_counts