def generate_reverb(signal, reverb, fname, iter_range): """ Adds reverb from the path reverb to the data in the path signal and saves it as fname. Applies reverb iteratively over iter_range :param signal: the filename for the stereo input signal :param reverb: the filename for the stereo impulse response :param fname: the output filename to save as :param iter_range: the max number of iterations to convolve with the signal :return: """ sr, data = wav.read(signal) if data.dtype == np.dtype("int16"): data = data / float(np.iinfo(data.dtype).max) sr_ir, data_ir = wav.read(reverb) if data_ir.dtype == np.dtype("int16"): data_ir = data_ir / float(np.iinfo(data_ir.dtype).max) if sr_ir != sr: raise ValueError("Impulse Response must have same sample rate as signal") prev_data = data for i in xrange(0, iter_range+1): if i > 0: mix = add_reverb(prev_data.T, data_ir.T) prev_data = np.copy(mix).T else: mix = data.T if not os.path.exists(os.path.splitext(fname)[0]+'-'+str(i)+'.wav'): scipy.io.wavfile.write(os.path.splitext(fname)[0]+'-'+str(i)+'.wav', sr, mix.T)
def _finalize(self, dtype=np.uint8): """Finalize the image, that is put it in RGB mode, and set the channels in unsigned 8bit format ([0,255] range) (if the *dtype* doesn't say otherwise). """ channels = [] if self.mode == "P": self.convert("RGB") if self.mode == "PA": self.convert("RGBA") for chn in self.channels: if isinstance(chn, np.ma.core.MaskedArray): final_data = chn.data.clip(0, 1) * np.iinfo(dtype).max else: final_data = chn.clip(0, 1) * np.iinfo(dtype).max channels.append(np.ma.array(final_data, dtype, mask = np.ma.getmaskarray(chn))) if self.fill_value is not None: fill_value = [int(col * np.iinfo(dtype).max) for col in self.fill_value] else: fill_value = None return channels, fill_value
def test_ldexp_overflow(self): # silence warning emitted on overflow with np.errstate(over="ignore"): imax = np.iinfo(np.dtype('l')).max imin = np.iinfo(np.dtype('l')).min assert_equal(ncu.ldexp(2., imax), np.inf) assert_equal(ncu.ldexp(2., imin), 0)
def add_noise(sim): det_left = sim.outarr[:, :sim.nxpix] det_mid = sim.outarr[:, sim.nxpix:2*sim.nxpix] det_right = sim.outarr[:, 2*sim.nxpix:3*sim.nxpix] shape = det_left.shape det_left += det_bias(sim.dl_bias, det="left") det_mid += det_bias(sim.dm_bias, det="middle") det_right += det_bias(sim.dr_bias, det="right") det_left += readout_noise(sim.dl_ron, shape, det="left") det_mid += readout_noise(sim.dm_ron, shape, det="middle") det_right += readout_noise(sim.dr_ron, shape, det="right") det_left += dark_current(sim.dl_dc, sim.tobs, shape, det="left") det_mid += dark_current(sim.dm_dc, sim.tobs, shape, det="middle") det_right += dark_current(sim.dr_dc, sim.tobs, shape, det="right") sim.outarr = gain(sim.outarr, sim.inv_gain) if sim.outarr.max() > np.iinfo(np.uint16).max: log.info("Clipping array values larger than %s.", np.iinfo(np.uint16).max) sim.outarr[sim.outarr > np.iinfo(np.uint16).max] = np.iinfo(np.uint16).max sim.outarr = np.asarray(sim.outarr, dtype=np.uint16) log.info("Converting image array back to %s.", sim.outarr.dtype)
def able_int_type(values): """ Find the smallest integer numpy type to contain sequence `values` Prefers uint to int if minimum is >= 0 Parameters ---------- values : sequence sequence of integer values Returns ------- itype : None or numpy type numpy integer type or None if no integer type holds all `values` Examples -------- >>> able_int_type([0, 1]) == np.uint8 True >>> able_int_type([-1, 1]) == np.int8 True """ if any([v % 1 for v in values]): return None mn = min(values) mx = max(values) if mn >= 0: for ityp in np.sctypes['uint']: if mx <= np.iinfo(ityp).max: return ityp for ityp in np.sctypes['int']: info = np.iinfo(ityp) if mn >= info.min and mx <= info.max: return ityp return None
def test_int64_overflow(self): data = """ID 00013007854817840016671868 00013007854817840016749251 00013007854817840016754630 00013007854817840016781876 00013007854817840017028824 00013007854817840017963235 00013007854817840018860166""" result = self.read_csv(StringIO(data)) self.assertTrue(result['ID'].dtype == object) self.assertRaises(OverflowError, self.read_csv, StringIO(data), converters={'ID': np.int64}) # Just inside int64 range: parse as integer i_max = np.iinfo(np.int64).max i_min = np.iinfo(np.int64).min for x in [i_max, i_min]: result = self.read_csv(StringIO(str(x)), header=None) expected = DataFrame([x]) tm.assert_frame_equal(result, expected) # Just outside int64 range: parse as string too_big = i_max + 1 too_small = i_min - 1 for x in [too_big, too_small]: result = self.read_csv(StringIO(str(x)), header=None) expected = DataFrame([str(x)]) tm.assert_frame_equal(result, expected)
def test_implementation_limits(self): min_td = Timedelta(Timedelta.min) max_td = Timedelta(Timedelta.max) # GH 12727 # timedelta limits correspond to int64 boundaries assert min_td.value == np.iinfo(np.int64).min + 1 assert max_td.value == np.iinfo(np.int64).max # Beyond lower limit, a NAT before the Overflow assert (min_td - Timedelta(1, 'ns')) is NaT with pytest.raises(OverflowError): min_td - Timedelta(2, 'ns') with pytest.raises(OverflowError): max_td + Timedelta(1, 'ns') # Same tests using the internal nanosecond values td = Timedelta(min_td.value - 1, 'ns') assert td is NaT with pytest.raises(OverflowError): Timedelta(min_td.value - 2, 'ns') with pytest.raises(OverflowError): Timedelta(max_td.value + 1, 'ns')
def test_int_out_of_range(parallel): """ Integer numbers outside int range shall be returned as string columns consistent with the standard (Python) parser (no 'upcasting' to float). """ imin = np.iinfo(int).min+1 imax = np.iinfo(int).max-1 huge = '{:d}'.format(imax+2) text = 'P M S\n {:d} {:d} {:s}'.format(imax, imin, huge) expected = Table([[imax], [imin], [huge]], names=('P', 'M', 'S')) table = ascii.read(text, format='basic', guess=False, fast_reader={'parallel': parallel}) assert_table_equal(table, expected) # check with leading zeroes to make sure strtol does not read them as octal text = 'P M S\n000{:d} -0{:d} 00{:s}'.format(imax, -imin, huge) expected = Table([[imax], [imin], ['00'+huge]], names=('P', 'M', 'S')) table = ascii.read(text, format='basic', guess=False, fast_reader={'parallel': parallel}) assert_table_equal(table, expected) # mixed columns should be returned as float, but if the out-of-range integer # shows up first, it will produce a string column - with both readers pytest.xfail("Integer fallback depends on order of rows") text = 'A B\n 12.3 {0:d}9\n {0:d}9 45.6e7'.format(imax) expected = Table([[12.3, 10.*imax], [10.*imax, 4.56e8]], names=('A', 'B')) table = ascii.read(text, format='basic', guess=False, fast_reader={'parallel': parallel}) assert_table_equal(table, expected) table = ascii.read(text, format='basic', guess=False, fast_reader=False) assert_table_equal(table, expected)
def initBuffers(self,puzzle): #define lengths buffer and copy to the GPU #as we will not read from this buffer later, mapping is not required self.lengths = np.full(self.simulations,np.iinfo(np.int16).max,dtype=np.int16) self.lengthsBuffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.lengths) #define buffer for aggregated lengths for each workgroup self.groupLengths = np.full(self.workGroups,np.iinfo(np.int16).max,dtype=np.int16) self.groupLengthsBuffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE | cl.mem_flags.USE_HOST_PTR, hostbuf=self.groupLengths) #map group lengths buffer cl.enqueue_map_buffer(self.queue,self.groupLengthsBuffer,cl.map_flags.READ,0,self.groupLengths.shape,self.groupLengths.dtype) #get the input puzzle ready for the kernel; convert to 8 bit int (char) p = np.array(puzzle['puzzle']).astype(np.int8) #subtract 1 so that -1 denotes a gap and 0 denotes a square to be filled p = p - np.ones_like(p,dtype=p.dtype) #copy the puzzle, one for each simulation self.puzzles = np.zeros((self.simulations,self.height,self.width),dtype=p.dtype) self.puzzles[:,0:self.height,0:self.width] = p #define puzzles buffer and copy data (we do not need to worry about getting data out of this buffer, so mapping isn't required) #this buffer contains the input puzzles, one for each invocation (the puzzle is too large to hold in local or shared memory) self.puzzlesFlattened = self.puzzles.ravel() self.puzzlesBuffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.puzzlesFlattened) #define output buffer for best solutions aggregated across workgroups self.solutions = self.puzzles[0:self.workGroups] self.solutionsFlattened = self.solutions.ravel() self.solutionsBuffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE | cl.mem_flags.USE_HOST_PTR, hostbuf=self.solutionsFlattened) #map solutions buffer cl.enqueue_map_buffer(self.queue,self.solutionsBuffer,cl.map_flags.READ,0,self.solutionsFlattened.shape,self.solutions.dtype)
def randimg_in2out(rng, in_dtype, out_dtype, name): in_dtype = np.dtype(in_dtype) out_dtype = np.dtype(out_dtype) shape = (2,3,4) if in_dtype.kind in 'iu': info = np.iinfo(in_dtype) dmin, dmax = info.min, info.max # Numpy bug for np < 1.6.0 allows overflow for range that does not fit # into C long int (int32 on 32-bit, int64 on 64-bit) try: data = rng.randint(dmin, dmax, size=shape) except ValueError: from random import randint vals = [randint(dmin, dmax) for v in range(np.prod(shape))] data = np.array(vals).astype(in_dtype).reshape(shape) elif in_dtype.kind == 'f': info = np.finfo(in_dtype) dmin, dmax = info.min, info.max # set some value for scaling our data scale = np.iinfo(np.uint16).max * 2.0 data = rng.normal(size=shape, scale=scale) data[0,0,0] = dmin data[1,0,0] = dmax data = data.astype(in_dtype) img = Image(data, vox2mni(np.eye(4))) # The dtype_from dtype won't be visible until the image is loaded newimg = save_image(img, name, dtype_from=out_dtype) return newimg.get_data(), data
def testInfNan(self): i4 = np.iinfo(np.int32) i8 = np.iinfo(np.int64) self._compare(np.inf, np.float32, np.inf, False) self._compare(np.inf, np.float64, np.inf, False) if sys.byteorder == "big": self._compare(np.inf, np.int32, i4.max, False) self._compare(np.inf, np.int64, i8.max, False) else: # np.float64("np.inf").astype(np.int32) is negative on x86 but positive on ppc64le # Numpy link to relevant discussion - https://github.com/numpy/numpy/issues/9040 # Tensorflow link to relevant discussion - https://github.com/tensorflow/tensorflow/issues/9360 if platform.machine() == "ppc64le": self._compare(-np.inf, np.int32, i4.min, False) self._compare(-np.inf, np.int64, i8.min, False) else: self._compare(np.inf, np.int32, i4.min, False) self._compare(np.inf, np.int64, i8.min, False) self._compare(-np.inf, np.float32, -np.inf, False) self._compare(-np.inf, np.float64, -np.inf, False) self._compare(-np.inf, np.int32, i4.min, False) self._compare(-np.inf, np.int64, i8.min, False) self.assertAllEqual(np.isnan(self._cast(np.nan, np.float32, False)), True) self.assertAllEqual(np.isnan(self._cast(np.nan, np.float64, False)), True) self._compare(np.nan, np.int32, i4.min, False) self._compare(np.nan, np.int64, i8.min, False) self._compare(np.inf, np.float32, np.inf, True) self._compare(np.inf, np.float64, np.inf, True) self._compare(-np.inf, np.float32, -np.inf, True) self._compare(-np.inf, np.float64, -np.inf, True) self.assertAllEqual(np.isnan(self._cast(np.nan, np.float32, True)), True) self.assertAllEqual(np.isnan(self._cast(np.nan, np.float64, True)), True)
def __init__(self, vocabulary, fixed_length, custom_wordgen=None, ignore_sentences_with_only_custom=False, masking_value=0, unknown_value=1): """ Needs a dictionary as input for the vocabulary. """ if len(vocabulary) > np.iinfo('uint16').max: raise ValueError('Dictionary is too big ({} tokens) for the numpy ' 'datatypes used (max limit={}). Reduce vocabulary' ' or adjust code accordingly!' .format(len(vocabulary), np.iinfo('uint16').max)) # Shouldn't be able to modify the given vocabulary self.vocabulary = deepcopy(vocabulary) self.fixed_length = fixed_length self.ignore_sentences_with_only_custom = ignore_sentences_with_only_custom self.masking_value = masking_value self.unknown_value = unknown_value # Initialized with an empty stream of sentences that must then be fed # to the generator at a later point for reusability. # A custom word generator can be used for domain-specific filtering etc if custom_wordgen is not None: assert custom_wordgen.stream is None self.wordgen = custom_wordgen self.uses_custom_wordgen = True else: self.wordgen = WordGenerator(None, allow_unicode_text=True, ignore_emojis=False, remove_variation_selectors=True, break_replacement=True) self.uses_custom_wordgen = False
def test_implementation_limits(self): min_td = Timedelta(Timedelta.min) max_td = Timedelta(Timedelta.max) # GH 12727 # timedelta limits correspond to int64 boundaries self.assertTrue(min_td.value == np.iinfo(np.int64).min + 1) self.assertTrue(max_td.value == np.iinfo(np.int64).max) # Beyond lower limit, a NAT before the Overflow self.assertIsInstance(min_td - Timedelta(1, 'ns'), pd.tslib.NaTType) with tm.assertRaises(OverflowError): min_td - Timedelta(2, 'ns') with tm.assertRaises(OverflowError): max_td + Timedelta(1, 'ns') # Same tests using the internal nanosecond values td = Timedelta(min_td.value - 1, 'ns') self.assertIsInstance(td, pd.tslib.NaTType) with tm.assertRaises(OverflowError): Timedelta(min_td.value - 2, 'ns') with tm.assertRaises(OverflowError): Timedelta(max_td.value + 1, 'ns')
def test_absolute_ufunc(self, flags=enable_pyobj_flags): self.unary_ufunc_test('absolute', flags=flags, additional_inputs = [(np.iinfo(np.uint32).max, types.uint32), (np.iinfo(np.uint64).max, types.uint64), (np.finfo(np.float32).min, types.float32), (np.finfo(np.float64).min, types.float64) ])
def _random_integers(size, dtype): # We do not generate integers outside the int64 range platform_int_info = np.iinfo('int_') iinfo = np.iinfo(dtype) return np.random.randint(max(iinfo.min, platform_int_info.min), min(iinfo.max, platform_int_info.max), size=size).astype(dtype)
def tbl_2_nparray(in_tbl, flds): """Form the TableToNumPyArray to account for nulls for various dtypes. This is essentially a shortcut to `arcpy.da.TableToNumPyArray` Requires -------- `in_tbl` : table, or featureclass table name `flds` : list of field names `skip_nulls` = False : set within function `null_value` : determined from the dtype of the array... otherwise you may as well do it manually Source ------ arraytools, apt.py module """ nulls = {'Double':np.nan, 'Single':np.nan, 'Integer':np.iinfo(np.int32).min, 'OID':np.iinfo(np.int32).min, 'String':"None"} # fld_dict = {i.name: i.type for i in arcpy.ListFields(in_tbl)} null_dict = {f:nulls[fld_dict[f]] for f in flds} a = arcpy.da.TableToNumPyArray(in_table=in_tbl, field_names=flds, skip_nulls=False, null_value=null_dict) return a
def _iu2iu(self): # (u)int to (u)int mn, mx = [as_int(v) for v in self.finite_range()] # range may be greater than the largest integer for this type. # as_int needed to work round numpy 1.4.1 int casting bug out_dtype = self._out_dtype t_min, t_max = np.iinfo(out_dtype).min, np.iinfo(out_dtype).max type_range = as_int(t_max) - as_int(t_min) mn2mx = mx - mn if mn2mx <= type_range: # might offset be enough? if t_min == 0: # uint output - take min to 0 # decrease offset with floor_exact, meaning mn >= t_min after # subtraction. But we may have pushed the data over t_max, # which we check below inter = floor_exact(mn - t_min, self.scaler_dtype) else: # int output - take midpoint to 0 # ceil below increases inter, pushing scale up to 0.5 towards # -inf, because ints have abs min == abs max + 1 midpoint = mn + as_int(np.ceil(mn2mx / 2.0)) # Floor exact decreases inter, so pulling scaled values more # positive. This may make mx - inter > t_max inter = floor_exact(midpoint, self.scaler_dtype) # Need to check still in range after floor_exact-ing int_inter = as_int(inter) assert mn - int_inter >= t_min if mx - int_inter <= t_max: self.inter = inter return # Try slope options (sign flip) and then range scaling super(SlopeInterArrayWriter, self)._iu2iu()
def set_signal_dtype(self, data_type, signal=None, clip=False): if signal is None: signal = self.get_selected_signal() self.record_code("signal = ui.get_selected_signal()") if isinstance(data_type, str) and data_type.lower() == 'custom': return # TODO: Show dialog and prompt if not clip: old_type = signal.data.dtype if np.issubdtype(data_type, np.integer): info = np.iinfo(data_type) elif np.issubdtype(data_type, np.float): info = np.finfo(data_type) if np.issubdtype(old_type, np.integer): old_info = np.iinfo(old_type) elif np.issubdtype(old_type, np.float): old_info = np.finfo(old_type) if old_info.max > info.max: signal.data *= float(info.max) / np.nanmax(signal.data) self.record_code("signal.data *= %f / np.nanmax(signal.data)" % float(info.max)) signal.change_dtype(data_type) dts = data_type.__name__ if data_type.__module__ == 'numpy': dts = 'np.' + dts self.record_code("signal.change_dtype(%s)" % dts)
def test_big_game_functions(): """Test that everything works when game_size > int max""" base = rsgame.basegame([100, 100], [30, 30]) game = gamegen.add_profiles(base, 1000) assert game.num_all_profiles > np.iinfo(int).max assert game.num_all_dpr_profiles > np.iinfo(int).max assert np.all(game.profile_id(game.profiles) >= 0)
def checkTypeConversionNecessary(self, inputType = None, outputType = None): if inputType is None: if hasattr(self, "inputType"): inputType = self.inputType else: return False if outputType is None: outputType = self.getOutputDType() t = inputType limits = [] try: limits.append(numpy.iinfo(t).min) limits.append(numpy.iinfo(t).max) except: limits.append(numpy.finfo(t).min) limits.append(numpy.finfo(t).max) try: if not numpy.all(numpy.array(limits, dtype = outputType) == limits): self.normalizationComboBox.setCurrentIndex(1) return True #outputtype is too small to hold the limits, #renormalization has to be done beforehand except: self.normalizationComboBox.setCurrentIndex(1) return True #outputtype is too small to hold the limits, #renormalization has to be done beforehand return False
def _testDequantizeOp(self, inputs, min_range, max_range, dtype): with self.cached_session(): input_op = constant_op.constant(inputs, shape=[len(inputs)], dtype=dtype) dequantized = array_ops.dequantize(input_op, min_range, max_range) tf_ans = dequantized.eval() # TODO(vrv): Add support for DT_QINT32 quantization if needed. type_dict = { dtypes.quint8: np.uint8, dtypes.qint8: np.int8, dtypes.quint16: np.uint16, dtypes.qint16: np.int16 } self.assertTrue(dtype in type_dict.keys()) v_max = np.iinfo(type_dict[dtype]).max v_min = np.iinfo(type_dict[dtype]).min self.assertTrue(min_range >= v_min) self.assertTrue(max_range <= v_max) type_range = v_max - v_min if v_min < 0: half_range = (type_range + 1) / 2 else: half_range = 0.0 np_ans = ((inputs.astype(np.float32) + half_range) * (max_range - min_range) / type_range) + min_range self.assertAllClose(tf_ans, np_ans, rtol=1e-5, atol=1e-5)
def construct_lookup_variables(self): # Materialize negatives for fast lookup sampling. start_time = timeit.default_timer() inner_bounds = np.argwhere(self._train_pos_users[1:] - self._train_pos_users[:-1])[:, 0] + 1 (upper_bound,) = self._train_pos_users.shape index_bounds = [0] + inner_bounds.tolist() + [upper_bound] self._negative_table = np.zeros(shape=(self._num_users, self._num_items), dtype=rconst.ITEM_DTYPE) # Set the table to the max value to make sure the embedding lookup will fail # if we go out of bounds, rather than just overloading item zero. self._negative_table += np.iinfo(rconst.ITEM_DTYPE).max assert self._num_items < np.iinfo(rconst.ITEM_DTYPE).max # Reuse arange during generation. np.delete will make a copy. full_set = np.arange(self._num_items, dtype=rconst.ITEM_DTYPE) self._per_user_neg_count = np.zeros( shape=(self._num_users,), dtype=np.int32) # Threading does not improve this loop. For some reason, the np.delete # call does not parallelize well. Multiprocessing incurs too much # serialization overhead to be worthwhile. for i in range(self._num_users): positives = self._train_pos_items[index_bounds[i]:index_bounds[i+1]] negatives = np.delete(full_set, positives) self._per_user_neg_count[i] = self._num_items - positives.shape[0] self._negative_table[i, :self._per_user_neg_count[i]] = negatives logging.info("Negative sample table built. Time: {:.1f} seconds".format( timeit.default_timer() - start_time))
def _range_scale(self): """ Calculate scaling, intercept based on data range and output type """ mn, mx = self.finite_range() # Values of self.array.dtype type out_dtype = self._out_dtype if mx == mn: # Only one number in array self.inter = mn return # Straight mx-mn can overflow. if mn.dtype.kind == 'f': # Already floats # float64 and below cast correctly to longdouble. Longdouble needs # no casting mn2mx = np.diff(np.array([mn, mx], dtype=np.longdouble)) else: # max possible (u)int range is 2**64-1 (int64, uint64) # int_to_float covers this range. On windows longdouble is the same # as double so mn2mx will be 2**64 - thus overestimating slope # slightly. Casting to int needed to allow mx-mn to be larger than # the largest (u)int value mn2mx = int_to_float(as_int(mx) - as_int(mn), np.longdouble) if out_dtype.kind == 'f': # Type range, these are also floats info = type_info(out_dtype) t_mn_mx = info['min'], info['max'] else: t_mn_mx = np.iinfo(out_dtype).min, np.iinfo(out_dtype).max t_mn_mx= [int_to_float(v, np.longdouble) for v in t_mn_mx] # We want maximum precision for the calculations. Casting will # not lose precision because min/max are of fp type. assert [v.dtype.kind for v in t_mn_mx] == ['f', 'f'] scaled_mn2mx = np.diff(np.array(t_mn_mx, dtype = np.longdouble)) slope = mn2mx / scaled_mn2mx self.inter = mn - t_mn_mx[0] * slope self.slope = slope if not np.all(np.isfinite([self.slope, self.inter])): raise ScalingError("Slope / inter not both finite")
def __init__(self, name, unit='s', nullable=True): min_val, max_val = np.iinfo('int64').min, np.iinfo('int64').max, super(DurationIntervalType, self).__init__( name, True, 64, nullable=nullable, min_value=min_val, max_value=max_val) self.unit = unit
def iter_raw_buffers(self): """Return an iterator over raw buffers. Returns ------- raw_buffer : generator Generator for iteration over raw buffers. """ # self.tmax_samp should be included iter_times = list(zip( list(range(self.tmin_samp, self.tmax_samp, self.buffer_size)), list(range(self.tmin_samp + self.buffer_size, self.tmax_samp + 1, self.buffer_size)))) last_iter_sample = iter_times[-1][1] if iter_times else self.tmin_samp if last_iter_sample < self.tmax_samp + 1: iter_times.append((last_iter_sample, self.tmax_samp + 1)) for ii, (start, stop) in enumerate(iter_times): # wait for correct number of samples to be available self.ft_client.wait(stop, np.iinfo(np.uint32).max, np.iinfo(np.uint32).max) # get the samples (stop index is inclusive) raw_buffer = self.ft_client.getData([start, stop - 1]).transpose() yield raw_buffer
def munchetal_filter(im, wlevel, sigma, wname='db15'): # Wavelet decomposition: coeffs = pywt.wavedec2(im.astype(np.float32), wname, level=wlevel) coeffsFlt = [coeffs[0]] # FFT transform of horizontal frequency bands: for i in range(1, wlevel + 1): # FFT: fcV = np.fft.fftshift(np.fft.fft(coeffs[i][1], axis=0)) my, mx = fcV.shape # Damping of vertical stripes: damp = 1 - np.exp(-(np.arange(-np.floor(my / 2.), -np.floor(my / 2.) + my) ** 2) / (2 * (sigma ** 2))) dampprime = np.kron(np.ones((1, mx)), damp.reshape((damp.shape[0], 1))) fcV = fcV * dampprime # Inverse FFT: fcVflt = np.real(np.fft.ifft(np.fft.ifftshift(fcV), axis=0)) cVHDtup = (coeffs[i][0], fcVflt, coeffs[i][2]) coeffsFlt.append(cVHDtup) # Get wavelet reconstruction: im_f = np.real(pywt.waverec2(coeffsFlt, wname)) # Return image according to input type: if (im.dtype == 'uint16'): # Check extrema for uint16 images: im_f[im_f < np.iinfo(np.uint16).min] = np.iinfo(np.uint16).min im_f[im_f > np.iinfo(np.uint16).max] = np.iinfo(np.uint16).max # Return filtered image (an additional row and/or column might be present): return im_f[0:im.shape[0], 0:im.shape[1]].astype(np.uint16) else: return im_f[0:im.shape[0], 0:im.shape[1]]
def getGDALRasterType(self): ''' Gets the output raster type ''' index = self.numberComboBox.currentIndex() if index == 0: min = numpy.iinfo(numpy.uint8).min max = numpy.iinfo(numpy.uint8).max return (osgeo.gdal.GDT_Byte, min, max) elif index == 1: min = numpy.iinfo(numpy.uint16).min max = numpy.iinfo(numpy.uint16).max return (osgeo.gdal.GDT_UInt16, min, max) elif index == 2: min = numpy.iinfo(numpy.int16).min max = numpy.iinfo(numpy.int16).max return (osgeo.gdal.GDT_Int16, min, max) elif index == 3: min = numpy.iinfo(numpy.uint32).min max = numpy.iinfo(numpy.uint32).max return (osgeo.gdal.GDT_UInt32, min, max) elif index == 4: min = numpy.iinfo(numpy.int32).min max = numpy.iinfo(numpy.int32).max return (osgeo.gdal.GDT_Int32, min, max) elif index == 5: min = numpy.finfo(numpy.float32).min max = numpy.finfo(numpy.float32).max return (osgeo.gdal.GDT_Float32, min, max) elif index == 6: min = numpy.finfo(numpy.float64).min max = numpy.finfo(numpy.float64).max return (osgeo.gdal.GDT_Float64, min, max)
def testInfNan(self): i4 = np.iinfo(np.int32) i8 = np.iinfo(np.int64) self._compare(np.inf, np.float32, np.inf, False) self._compare(np.inf, np.float64, np.inf, False) if sys.byteorder == "big": self._compare(np.inf, np.int32, i4.max, False) self._compare(np.inf, np.int64, i8.max, False) else: self._compare(np.inf, np.int32, i4.min, False) self._compare(np.inf, np.int64, i8.min, False) self._compare(-np.inf, np.float32, -np.inf, False) self._compare(-np.inf, np.float64, -np.inf, False) self._compare(-np.inf, np.int32, i4.min, False) self._compare(-np.inf, np.int64, i8.min, False) self.assertAllEqual(np.isnan(self._cast(np.nan, np.float32, False)), True) self.assertAllEqual(np.isnan(self._cast(np.nan, np.float64, False)), True) self._compare(np.nan, np.int32, i4.min, False) self._compare(np.nan, np.int64, i8.min, False) self._compare(np.inf, np.float32, np.inf, True) self._compare(np.inf, np.float64, np.inf, True) self._compare(-np.inf, np.float32, -np.inf, True) self._compare(-np.inf, np.float64, -np.inf, True) self.assertAllEqual(np.isnan(self._cast(np.nan, np.float32, True)), True) self.assertAllEqual(np.isnan(self._cast(np.nan, np.float64, True)), True)
def clean(data): data_wso = data.astype(np.int) masked = np.ma.array(data_wso) masked[:,np.arange(0,2592,16)] = np.ma.masked #Set the mean of each spectra to zero #med_col = np.mean(masked,axis=1) med_col = np.min((\ np.mean(masked[:,:2592/5],axis=1),\ np.mean(masked[:,-2592/5:],axis=1)),\ axis=0) data_wso = data_wso - med_col[:,np.newaxis] #Shift the mean to 128 data_wso = data_wso + 128 #Set the right proprieties to the data data_wso[:,np.arange(0,2592,16)] = 0 dtype_min = np.iinfo(data.dtype).min dtype_max = np.iinfo(data.dtype).max np.clip(data_wso, dtype_min, dtype_max, out=data_wso) data_wso = np.around(data_wso) data = data_wso.astype(data.dtype) return data
def test1DDataRandom(self): """Test pixmap generation for 1D data of different size and types.""" self._log("TestLog10Colormap.test1DDataRandom") for cmapName, colormap in self.COLORMAPS.items(): for size in self.SIZES: for dtype in self.DTYPES: for start, end in self.RANGES: try: dtypeMax = np.iinfo(dtype).max dtypeMin = np.iinfo(dtype).min except ValueError: dtypeMax = np.finfo(dtype).max dtypeMin = np.finfo(dtype).min if dtypeMin < 0: data = np.asarray(-dtypeMax/2. + np.random.rand(size) * dtypeMax, dtype=dtype) else: data = np.asarray(np.random.rand(size) * dtypeMax, dtype=dtype) duration = self._testColormap(data, colormap, start, end, isLog10=True) self._log('1D Random', cmapName, dtype, size, (start, end), duration)
def _daal_fit_classifier(self, X, y, sample_weight=None): y = check_array(y, ensure_2d=False, dtype=None) y, expanded_class_weight = self._validate_y_class_weight(y) n_classes_ = self.n_classes_[0] classes_ = self.classes_[0] self.n_features_ = X.shape[1] if expanded_class_weight is not None: if sample_weight is not None: sample_weight = sample_weight * expanded_class_weight else: sample_weight = expanded_class_weight if sample_weight is not None: sample_weight = [sample_weight] rs_ = check_random_state(self.random_state) seed_ = rs_.randint(0, np.iinfo('i').max) if n_classes_ < 2: raise ValueError("Training data only contain information about one class.") # create algorithm X_fptype = getFPType(X) daal_engine_ = daal4py.engines_mt2203(seed=seed_, fptype=X_fptype) features_per_node_ = _to_absolute_max_features(self.max_features, X.shape[1], is_classification=True) n_samples_bootstrap_ = _get_n_samples_bootstrap( n_samples=X.shape[0], max_samples=self.max_samples ) if not self.bootstrap and self.oob_score: raise ValueError("Out of bag estimation only available" " if bootstrap=True") dfc_algorithm = daal4py.decision_forest_classification_training( nClasses = int(n_classes_), fptype = X_fptype, method = 'defaultDense', nTrees = int(self.n_estimators), observationsPerTreeFraction = n_samples_bootstrap_ if self.bootstrap is True else 1., featuresPerNode = int(features_per_node_), maxTreeDepth = int(0 if self.max_depth is None else self.max_depth), minObservationsInLeafNode = (self.min_samples_leaf if isinstance(self.min_samples_leaf, numbers.Integral) else int(ceil(self.min_samples_leaf * X.shape[0]))), engine = daal_engine_, impurityThreshold = float(0.0 if self.min_impurity_split is None else self.min_impurity_split), varImportance = "MDI", resultsToCompute = "", memorySavingMode = False, bootstrap = bool(self.bootstrap), minObservationsInSplitNode = (self.min_samples_split if isinstance(self.min_samples_split, numbers.Integral) else int(ceil(self.min_samples_split * X.shape[0]))), minWeightFractionInLeafNode = self.min_weight_fraction_leaf, minImpurityDecreaseInSplitNode = self.min_impurity_decrease, maxLeafNodes = 0 if self.max_leaf_nodes is None else self.max_leaf_nodes ) self._cached_estimators_ = None # compute dfc_trainingResult = dfc_algorithm.compute(X, y, sample_weight) # get resulting model model = dfc_trainingResult.model self.daal_model_ = model # compute oob_score_ if self.oob_score: self.estimators_ = self._estimators_ self._set_oob_score(X, y) return self
def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight, penalty, dual, verbose, max_iter, tol, random_state=None, multi_class='ovr', loss='logistic_regression', epsilon=0.1, sample_weight=None): """Used by Logistic Regression (and CV) and LinearSVC/LinearSVR. Preprocessing is done in this function before supplying it to liblinear. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. y : array-like of shape (n_samples,) Target vector relative to X C : float Inverse of cross-validation parameter. Lower the C, the more the penalization. fit_intercept : bool Whether or not to fit the intercept, that is to add a intercept term to the decision function. intercept_scaling : float LibLinear internally penalizes the intercept and this term is subject to regularization just like the other terms of the feature vector. In order to avoid this, one should increase the intercept_scaling. such that the feature vector becomes [x, intercept_scaling]. class_weight : dict or 'balanced', default=None Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. The "balanced" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` penalty : {'l1', 'l2'} The norm of the penalty used in regularization. dual : bool Dual or primal formulation, verbose : int Set verbose to any positive number for verbosity. max_iter : int Number of iterations. tol : float Stopping condition. random_state : int or RandomState instance, default=None Controls the pseudo random number generation for shuffling the data. Pass an int for reproducible output across multiple function calls. See :term:`Glossary <random_state>`. multi_class : {'ovr', 'crammer_singer'}, default='ovr' `ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer` optimizes a joint objective over all classes. While `crammer_singer` is interesting from an theoretical perspective as it is consistent it is seldom used in practice and rarely leads to better accuracy and is more expensive to compute. If `crammer_singer` is chosen, the options loss, penalty and dual will be ignored. loss : {'logistic_regression', 'hinge', 'squared_hinge', \ 'epsilon_insensitive', 'squared_epsilon_insensitive}, \ default='logistic_regression' The loss function used to fit the model. epsilon : float, default=0.1 Epsilon parameter in the epsilon-insensitive loss function. Note that the value of this parameter depends on the scale of the target variable y. If unsure, set epsilon=0. sample_weight : array-like of shape (n_samples,), default=None Weights assigned to each sample. Returns ------- coef_ : ndarray of shape (n_features, n_features + 1) The coefficient vector got by minimizing the objective function. intercept_ : float The intercept term added to the vector. n_iter_ : int Maximum number of iterations run across all classes. """ if loss not in ['epsilon_insensitive', 'squared_epsilon_insensitive']: enc = LabelEncoder() y_ind = enc.fit_transform(y) classes_ = enc.classes_ if len(classes_) < 2: raise ValueError("This solver needs samples of at least 2 classes" " in the data, but the data contains only one" " class: %r" % classes_[0]) class_weight_ = compute_class_weight(class_weight, classes=classes_, y=y) else: class_weight_ = np.empty(0, dtype=np.float64) y_ind = y liblinear.set_verbosity_wrap(verbose) rnd = check_random_state(random_state) if verbose: print('[LibLinear]', end='') # LinearSVC breaks when intercept_scaling is <= 0 bias = -1.0 if fit_intercept: if intercept_scaling <= 0: raise ValueError("Intercept scaling is %r but needs to be greater " "than 0. To disable fitting an intercept," " set fit_intercept=False." % intercept_scaling) else: bias = intercept_scaling libsvm.set_verbosity_wrap(verbose) libsvm_sparse.set_verbosity_wrap(verbose) liblinear.set_verbosity_wrap(verbose) # Liblinear doesn't support 64bit sparse matrix indices yet if sp.issparse(X): _check_large_sparse(X) # LibLinear wants targets as doubles, even for classification y_ind = np.asarray(y_ind, dtype=np.float64).ravel() y_ind = np.require(y_ind, requirements="W") sample_weight = _check_sample_weight(sample_weight, X, dtype=np.float64) solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual) raw_coef_, n_iter_ = liblinear.train_wrap( X, y_ind, sp.isspmatrix(X), solver_type, tol, bias, C, class_weight_, max_iter, rnd.randint(np.iinfo('i').max), epsilon, sample_weight) # Regarding rnd.randint(..) in the above signature: # seed for srand in range [0..INT_MAX); due to limitations in Numpy # on 32-bit platforms, we can't get to the UINT_MAX limit that # srand supports n_iter_ = max(n_iter_) if n_iter_ >= max_iter: warnings.warn("Liblinear failed to converge, increase " "the number of iterations.", ConvergenceWarning) if fit_intercept: coef_ = raw_coef_[:, :-1] intercept_ = intercept_scaling * raw_coef_[:, -1] else: coef_ = raw_coef_ intercept_ = 0. return coef_, intercept_, n_iter_
def fit(self, X, y, sample_weight=None): """Fit the SVM model according to the given training data. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) \ or (n_samples, n_samples) Training vectors, where n_samples is the number of samples and n_features is the number of features. For kernel="precomputed", the expected shape of X is (n_samples, n_samples). y : array-like of shape (n_samples,) Target values (class labels in classification, real numbers in regression) sample_weight : array-like of shape (n_samples,), default=None Per-sample weights. Rescale C per sample. Higher weights force the classifier to put more emphasis on these points. Returns ------- self : object Notes ----- If X and y are not C-ordered and contiguous arrays of np.float64 and X is not a scipy.sparse.csr_matrix, X and/or y may be copied. If X is a dense array, then the other methods will not support sparse matrices as input. """ rnd = check_random_state(self.random_state) sparse = sp.isspmatrix(X) if sparse and self.kernel == "precomputed": raise TypeError("Sparse precomputed kernels are not supported.") self._sparse = sparse and not callable(self.kernel) if hasattr(self, 'decision_function_shape'): if self.decision_function_shape not in ('ovr', 'ovo'): raise ValueError( f"decision_function_shape must be either 'ovr' or 'ovo', " f"got {self.decision_function_shape}." ) if callable(self.kernel): check_consistent_length(X, y) else: X, y = self._validate_data(X, y, dtype=np.float64, order='C', accept_sparse='csr', accept_large_sparse=False) y = self._validate_targets(y) sample_weight = np.asarray([] if sample_weight is None else sample_weight, dtype=np.float64) solver_type = LIBSVM_IMPL.index(self._impl) # input validation n_samples = _num_samples(X) if solver_type != 2 and n_samples != y.shape[0]: raise ValueError("X and y have incompatible shapes.\n" + "X has %s samples, but y has %s." % (n_samples, y.shape[0])) if self.kernel == "precomputed" and n_samples != X.shape[1]: raise ValueError("Precomputed matrix must be a square matrix." " Input is a {}x{} matrix." .format(X.shape[0], X.shape[1])) if sample_weight.shape[0] > 0 and sample_weight.shape[0] != n_samples: raise ValueError("sample_weight and X have incompatible shapes: " "%r vs %r\n" "Note: Sparse matrices cannot be indexed w/" "boolean masks (use `indices=True` in CV)." % (sample_weight.shape, X.shape)) kernel = 'precomputed' if callable(self.kernel) else self.kernel if kernel == 'precomputed': # unused but needs to be a float for cython code that ignores # it anyway self._gamma = 0. elif isinstance(self.gamma, str): if self.gamma == 'scale': # var = E[X^2] - E[X]^2 if sparse X_var = ((X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()) self._gamma = 1.0 / (X.shape[1] * X_var) if X_var != 0 else 1.0 elif self.gamma == 'auto': self._gamma = 1.0 / X.shape[1] else: raise ValueError( "When 'gamma' is a string, it should be either 'scale' or " "'auto'. Got '{}' instead.".format(self.gamma) ) else: self._gamma = self.gamma fit = self._sparse_fit if self._sparse else self._dense_fit if self.verbose: print('[LibSVM]', end='') seed = rnd.randint(np.iinfo('i').max) fit(X, y, sample_weight, solver_type, kernel, random_seed=seed) # see comment on the other call to np.iinfo in this file self.shape_fit_ = X.shape if hasattr(X, "shape") else (n_samples, ) # In binary case, we need to flip the sign of coef, intercept and # decision function. Use self._intercept_ and self._dual_coef_ # internally. self._intercept_ = self.intercept_.copy() self._dual_coef_ = self.dual_coef_ if self._impl in ['c_svc', 'nu_svc'] and len(self.classes_) == 2: self.intercept_ *= -1 self.dual_coef_ = -self.dual_coef_ return self
from ..base import ClassifierMixin, RegressorMixin from ..metrics import r2_score, accuracy_score from ..tree import DecisionTreeClassifier, DecisionTreeRegressor from ..utils import check_random_state, check_array, column_or_1d from ..utils import indices_to_mask from ..utils.metaestimators import if_delegate_has_method from ..utils.multiclass import check_classification_targets from ..utils.random import sample_without_replacement from ..utils.validation import has_fit_parameter, check_is_fitted, \ _check_sample_weight, _deprecate_positional_args __all__ = ["BaggingClassifier", "BaggingRegressor"] MAX_INT = np.iinfo(np.int32).max def _generate_indices(random_state, bootstrap, n_population, n_samples): """Draw randomly sampled indices.""" # Draw sample indices if bootstrap: indices = random_state.randint(0, n_population, n_samples) else: indices = sample_without_replacement(n_population, n_samples, random_state=random_state) return indices def _generate_bagging_indices(random_state, bootstrap_features,
def fit( self, train_X, train_y, epochs, batch_size, input_time_length=None, validation_data=None, model_constraint=None, remember_best_column=None, scheduler=None, log_0_epoch=True, ): """ Fit the model using the given training data. Will set `epochs_df` variable with a pandas dataframe to the history of the training process. Parameters ---------- train_X: ndarray Training input data train_y: 1darray Training labels epochs: int Number of epochs to train batch_size: int input_time_length: int, optional Super crop size, what temporal size is pushed forward through the network, see cropped decoding tuturial. validation_data: (ndarray, 1darray), optional X and y for validation set if wanted model_constraint: object, optional You can supply :class:`.MaxNormDefaultConstraint` if wanted. remember_best_column: string, optional In case you want to do an early stopping/reset parameters to some "best" epoch, define here the monitored value whose minimum determines the best epoch. scheduler: 'cosine' or None, optional Whether to use cosine annealing (:class:`.CosineAnnealing`). log_0_epoch: bool Whether to compute the metrics once before training as well. Returns ------- exp: Underlying braindecode :class:`.Experiment` """ if (not hasattr(self, "compiled")) or (not self.compiled): raise ValueError( "Compile the model first by calling model.compile(loss, optimizer, metrics)" ) if self.cropped and input_time_length is None: raise ValueError( "In cropped mode, need to specify input_time_length," "which is the number of timesteps that will be pushed through" "the network in a single pass.") train_X = _ensure_float32(train_X) if self.cropped: self.network.eval() test_input = np_to_var( np.ones( (1, train_X[0].shape[0], input_time_length) + train_X[0].shape[2:], dtype=np.float32, )) while len(test_input.size()) < 4: test_input = test_input.unsqueeze(-1) if self.cuda: test_input = test_input.cuda() out = self.network(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] self.iterator = CropsFromTrialsIterator( batch_size=batch_size, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input, seed=self.seed_rng.randint(0, np.iinfo(np.int32).max - 1), ) else: self.iterator = BalancedBatchSizeIterator( batch_size=batch_size, seed=self.seed_rng.randint(0, np.iinfo(np.int32).max - 1), ) if log_0_epoch: stop_criterion = MaxEpochs(epochs) else: stop_criterion = MaxEpochs(epochs - 1) train_set = SignalAndTarget(train_X, train_y) optimizer = self.optimizer if scheduler is not None: assert (scheduler == "cosine" ), "Supply either 'cosine' or None as scheduler." n_updates_per_epoch = sum([ 1 for _ in self.iterator.get_batches(train_set, shuffle=True) ]) n_updates_per_period = n_updates_per_epoch * epochs if scheduler == "cosine": scheduler = CosineAnnealing(n_updates_per_period) schedule_weight_decay = False if optimizer.__class__.__name__ == "AdamW": schedule_weight_decay = True optimizer = ScheduledOptimizer( scheduler, self.optimizer, schedule_weight_decay=schedule_weight_decay, ) loss_function = self.loss if self.cropped: loss_function = lambda outputs, targets: self.loss( th.mean(outputs, dim=2), targets) if validation_data is not None: valid_X = _ensure_float32(validation_data[0]) valid_y = validation_data[1] valid_set = SignalAndTarget(valid_X, valid_y) else: valid_set = None test_set = None self.monitors = [LossMonitor()] if self.cropped: self.monitors.append( CroppedTrialMisclassMonitor(input_time_length)) else: self.monitors.append(MisclassMonitor()) if self.extra_monitors is not None: self.monitors.extend(self.extra_monitors) self.monitors.append(RuntimeMonitor()) exp = Experiment( self.network, train_set, valid_set, test_set, iterator=self.iterator, loss_function=loss_function, optimizer=optimizer, model_constraint=model_constraint, monitors=self.monitors, stop_criterion=stop_criterion, remember_best_column=remember_best_column, run_after_early_stop=False, cuda=self.cuda, log_0_epoch=log_0_epoch, do_early_stop=(remember_best_column is not None), ) exp.run() self.epochs_df = exp.epochs_df return exp
def __init__(self, vFunc=None, dtype=numpy.uint8): length = numpy.iinfo(dtype).max + 1 self._vLookupArray = utils.createLookupArray(vFunc, length)
# # DATA TYPE CONVERSION - float64 -> uint16 # # VOLUME # remove NaN rawVolume = np.nan_to_num(rawVolume) # zero out negative values rawVolume[rawVolume < 0] = 0.0 # normalize to range 0.0 ... 1.0 rawVolume = rawVolume / np.max(rawVolume) # scale up to 65535 (uin16 max value) rawVolume = rawVolume * np.iinfo(np.uint16).max # actually switch to uint16 rawVolume = rawVolume.astype(np.uint16) # SEGMENTATION - is already uint8 # # ROTATION - rotate cw and ccw # #test data if useTestData: #ccw rotation if 0 <= j <= 1 or 7 <= j <= 23 or 34 <= j <= 34 or 37 <= j <= 44 or 46 <= j <= 54 or 56 <= j <= 65: rawVolume = np.rot90(rawVolume, 1)
import unittest import binascii import pickle import numpy from threatexchange.hashing.pdq_faiss_matcher import PDQFlatHashIndex, PDQMultiHashIndex test_hashes = [ "0000000000000000000000000000000000000000000000000000000000000000", "000000000000000000000000000000000000000000000000000000000000ffff", "0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f", "f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0", "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", ] MAX_UNSIGNED_INT64 = numpy.iinfo(numpy.uint64).max class MixinTests: class PDQHashIndexCommonTests(unittest.TestCase): index = None def assertEqualPDQHashSearchResults(self, result, expected): self.assertEqual(len(result), len(expected), "search results not of expected length") for (r, e) in zip(result, expected): self.assertCountEqual(r, e) def test_search_index_for_exact_matches(self): query = test_hashes[:1] result = self.index.search(query, 0)
def __init__(self, vertices, groups, skip_tests=False, node_vertex_consistency_tolerance=None, skip_element_orientation_test=False, nodal_adjacency=None, facial_adjacency_groups=None, boundary_tags=None, vertex_id_dtype=np.int32, element_id_dtype=np.int32, is_conforming=None): """ The following are keyword-only: :arg skip_tests: Skip mesh tests, in case you want to load a broken mesh anyhow and then fix it inside of this data structure. :arg node_vertex_consistency_tolerance: If *False*, do not check for consistency between vertex and nodal data. If *None*, use the (small, near FP-epsilon) default tolerance. :arg skip_element_orientation_test: If *False*, check that element orientation is positive in volume meshes (i.e. ones where ambient and topological dimension match). :arg nodal_adjacency: One of three options: *None*, in which case this information will be deduced from vertex adjacency. *False*, in which case this information will be marked unavailable (such as if there are hanging nodes in the geometry, so that vertex adjacency does not convey the full picture), and references to :attr:`element_neighbors_starts` and :attr:`element_neighbors` will result in exceptions. Lastly, a tuple :class:`NodalAdjacency` object. :arg facial_adjacency_groups: One of three options: *None*, in which case this information will be deduced from vertex adjacency. *False*, in which case this information will be marked unavailable (such as if there are hanging nodes in the geometry, so that vertex adjacency does not convey the full picture), and references to :attr:`element_neighbors_starts` and :attr:`element_neighbors` will result in exceptions. Lastly, a data structure as described in :attr:`facial_adjacency_groups` may be passed. """ el_nr = 0 node_nr = 0 new_groups = [] for g in groups: ng = g.join_mesh(el_nr, node_nr) new_groups.append(ng) el_nr += ng.nelements node_nr += ng.nnodes # {{{ boundary tags if boundary_tags is None: boundary_tags = [] else: boundary_tags = boundary_tags[:] if BTAG_NONE in boundary_tags: raise ValueError("BTAG_NONE is not allowed to be part of " "boundary_tags") if BTAG_ALL not in boundary_tags: boundary_tags.append(BTAG_ALL) if BTAG_REALLY_ALL not in boundary_tags: boundary_tags.append(BTAG_REALLY_ALL) max_boundary_tag_count = int( np.log(np.iinfo(element_id_dtype).max)/np.log(2)) if len(boundary_tags) > max_boundary_tag_count: raise ValueError("too few bits in element_id_dtype to represent all " "boundary tags") btag_to_index = dict( (btag, i) for i, btag in enumerate(boundary_tags)) # }}} if not is_conforming: if nodal_adjacency is None: nodal_adjacency = False if facial_adjacency_groups is None: facial_adjacency_groups = False if nodal_adjacency is not False and nodal_adjacency is not None: if not isinstance(nodal_adjacency, NodalAdjacency): nb_starts, nbs = nodal_adjacency nodal_adjacency = NodalAdjacency( neighbors_starts=nb_starts, neighbors=nbs) del nb_starts del nbs Record.__init__( self, vertices=vertices, groups=new_groups, _nodal_adjacency=nodal_adjacency, _facial_adjacency_groups=facial_adjacency_groups, boundary_tags=boundary_tags, btag_to_index=btag_to_index, vertex_id_dtype=np.dtype(vertex_id_dtype), element_id_dtype=np.dtype(element_id_dtype), is_conforming=is_conforming, ) if not skip_tests: if node_vertex_consistency_tolerance is not False: assert _test_node_vertex_consistency( self, node_vertex_consistency_tolerance) for g in self.groups: assert g.vertex_indices.dtype == self.vertex_id_dtype if nodal_adjacency: assert nodal_adjacency.neighbors_starts.shape == (self.nelements+1,) assert len(nodal_adjacency.neighbors.shape) == 1 assert (nodal_adjacency.neighbors_starts.dtype == self.element_id_dtype) assert nodal_adjacency.neighbors.dtype == self.element_id_dtype if facial_adjacency_groups: assert len(facial_adjacency_groups) == len(self.groups) for fagrp_map in facial_adjacency_groups: for fagrp in six.itervalues(fagrp_map): nfagrp_elements, = fagrp.elements.shape assert fagrp.element_faces.dtype == self.face_id_dtype assert fagrp.element_faces.shape == (nfagrp_elements,) assert fagrp.neighbors.dtype == self.element_id_dtype assert fagrp.neighbors.shape == (nfagrp_elements,) assert fagrp.neighbor_faces.dtype == self.face_id_dtype assert fagrp.neighbor_faces.shape == (nfagrp_elements,) if fagrp.ineighbor_group is None: is_bdry = fagrp.neighbors < 0 assert ((1 << btag_to_index[BTAG_REALLY_ALL]) & -fagrp.neighbors[is_bdry]).all(), \ "boundary faces without BTAG_REALLY_ALL found" from meshmode.mesh.processing import \ test_volume_mesh_element_orientations if self.dim == self.ambient_dim and not skip_element_orientation_test: # only for volume meshes, for now assert test_volume_mesh_element_orientations(self), \ "negatively oriented elements found"
def unstructured_from_composite_arrays(points, arrays, controller=None): """Given a set of VTKCompositeDataArrays, creates a vtkUnstructuredGrid. The main goal of this function is to transform the output of XXX_per_block() methods to a single dataset that can be visualized and further processed. Here arrays is an iterable (e.g. list) of (array, name) pairs. Here is an example: centroid = mean_per_block(composite_data.Points) T = mean_per_block(composite_data.PointData['Temperature']) ug = unstructured_from_composite_arrays(centroid, (T, 'Temperature')) When called in parallel, this function makes sure that each array in the input dataset is represented only on 1 process. This is important because methods like mean_per_block() return the same value for blocks that are partitioned on all of the participating processes. If the same point were to be created across multiple processes in the output, filters like histogram would report duplicate values erroneously. """ try: dataset = points.DataSet except AttributeError: dataset = None if dataset is None and points is not dsa.NoneArray: raise ValueError( "Expecting a points arrays with an associated dataset.") if points is dsa.NoneArray: cpts = [] else: cpts = points.Arrays ownership = numpy.zeros(len(cpts), dtype=numpy.int32) rank = 0 # Let's first create a map of array index to composite ids. if dataset is None: ids = [] else: it = dataset.NewIterator() it.UnRegister(None) itr = cpts.__iter__() ids = numpy.empty(len(cpts), dtype=numpy.int32) counter = 0 while not it.IsDoneWithTraversal(): _id = it.GetCurrentFlatIndex() ids[counter] = _id counter += 1 it.GoToNextItem() if controller is None and vtkMultiProcessController is not None: controller = vtkMultiProcessController.GetGlobalController() if controller and controller.IsA("vtkMPIController"): from mpi4py import MPI comm = vtkMPI4PyCommunicator.ConvertToPython( controller.GetCommunicator()) rank = comm.Get_rank() # Determine the max id to use for reduction # operations # Get all ids from dataset, including empty ones. lmax_id = numpy.int32(0) if dataset is not None: it = dataset.NewIterator() it.UnRegister(None) it.SetSkipEmptyNodes(False) while not it.IsDoneWithTraversal(): _id = it.GetCurrentFlatIndex() lmax_id = numpy.max((lmax_id, _id)).astype(numpy.int32) it.GoToNextItem() max_id = numpy.array(0, dtype=numpy.int32) mpitype = _lookup_mpi_type(numpy.int32) comm.Allreduce([lmax_id, mpitype], [max_id, mpitype], MPI.MAX) # Now we figure out which processes have which ids lownership = numpy.empty(max_id, dtype=numpy.int32) lownership.fill(numpy.iinfo(numpy.int32).max) ownership = numpy.empty(max_id, dtype=numpy.int32) if dataset is not None: it = dataset.NewIterator() it.UnRegister(None) it.InitTraversal() itr = cpts.__iter__() while not it.IsDoneWithTraversal(): _id = it.GetCurrentFlatIndex() if next(itr) is not dsa.NoneArray: lownership[_id] = rank it.GoToNextItem() mpitype = _lookup_mpi_type(numpy.int32) # The process with the lowest id containing a block will # produce the output for that block. comm.Allreduce([lownership, mpitype], [ownership, mpitype], MPI.MIN) # Iterate over blocks to produce points and arrays from vtk.vtkCommonDataModel import vtkUnstructuredGrid from vtk.vtkCommonCore import vtkDoubleArray, vtkPoints ugrid = vtkUnstructuredGrid() da = vtkDoubleArray() da.SetNumberOfComponents(3) pts = vtkPoints() pts.SetData(da) counter = 0 for pt in cpts: if ownership[ids[counter]] == rank: pts.InsertNextPoint(tuple(pt)) counter += 1 ugrid.SetPoints(pts) for ca, name in arrays: if ca is not dsa.NoneArray: da = vtkDoubleArray() ncomps = ca.Arrays[0].flatten().shape[0] da.SetNumberOfComponents(ncomps) counter = 0 for a in ca.Arrays: if ownership[ids[counter]] == rank: a = a.flatten() for i in range(ncomps): da.InsertNextValue(a[i]) counter += 1 if len(a) > 0: da.SetName(name) ugrid.GetPointData().AddArray(da) return ugrid
match_value = False # Note: type check above ensures that we have the _same_ NA value # for missing values, None == None (which is checked # through match_value above), but np.nan != np.nan and pd.NaT != pd.NaT match_missing = isna(result_fill_value) and isna(expected_fill_value) assert match_value or match_missing @pytest.mark.parametrize( "dtype, fill_value, expected_dtype", [ # size 8 ("int8", 1, "int8"), ("int8", np.iinfo("int8").max + 1, "int16"), ("int8", np.iinfo("int16").max + 1, "int32"), ("int8", np.iinfo("int32").max + 1, "int64"), ("int8", np.iinfo("int64").max + 1, "object"), ("int8", -1, "int8"), ("int8", np.iinfo("int8").min - 1, "int16"), ("int8", np.iinfo("int16").min - 1, "int32"), ("int8", np.iinfo("int32").min - 1, "int64"), ("int8", np.iinfo("int64").min - 1, "object"), # keep signed-ness as long as possible ("uint8", 1, "uint8"), ("uint8", np.iinfo("int8").max + 1, "uint8"), ("uint8", np.iinfo("uint8").max + 1, "uint16"), ("uint8", np.iinfo("int16").max + 1, "uint16"), ("uint8", np.iinfo("uint16").max + 1, "uint32"), ("uint8", np.iinfo("int32").max + 1, "uint32"),
parser.add_argument('--n_epochs', type=int, default=200) parser.add_argument('--lr_milestones', type=int, default='66') parser.add_argument('--batch_size', type=int, default=128) parser.add_argument('-gpu', '--device_no', type=int, default=1) parser.add_argument('--n_jobs_dataloader', type=int, default=0) p = parser.parse_args() # =========================================== # 0.1. Parameters # =========================================== # Exract from parser print('Loading parameters...') random_state_test = p.random_state_test random_state_eval = random.randint(0, np.iinfo(np.int32).max) loader_name, n, n_eval, n_test, mix = p.loader_name, p.n, p.n_eval, p.n_test, bool(p.mix) ratio_abnormal_train, ratio_abnormal_eval = p.ratio_abnormal_train, p.ratio_abnormal_eval ratio_abnormal_test = p.ratio_abnormal_test n_features, net_name, load_model = p.n_features, p.net_name, p.load_model optimizer_, eta_str= p.optimizer_, p.eta_str lr, n_epochs, batch_size = p.lr, p.n_epochs, p.batch_size device_no, n_jobs_dataloader = p.device_no, p.n_jobs_dataloader lr_milestones = p.lr_milestones # Define addional parameters lr_milestones = tuple(i for i in range(lr_milestones, n_epochs, lr_milestones)) torch.manual_seed(random_state_test) device = 'cuda:{}'.format(device_no) eta = float(eta_str * 0.01) label_normal = (0,)
def _scan_file(infile: Union[str, Path], categorical: bool = True, chunksize: int = 100000, cat_threshold: float = 0.1, unsigned: bool = False) -> Dict[str, Any]: """Scan dta file to find minimal dtypes to hold data in For each of the chunks of df: for string columns: hold all unique values if I want them categorical for float columns: do nothing for integer columns: search for missings, highest and lowest value for date columns: nothing Args: infile: dta file to scan categorical: whether to change strings to categorical chunksize: number of rows of infile to read at a time cat_threshold: maximum fraction of unique values in order to convert to categorical Returns: dictionary with variable names and dtyplist """ itr = pd.read_stata(infile, iterator=True) varlist_df = pd.DataFrame({ 'format': itr.fmtlist, 'name': itr.varlist, 'col_size': itr.col_sizes, 'dtype': itr.dtyplist, 'label': list(itr.variable_labels().values()) }) start_cols = {} date_fmts = ('%tc', '%tC', '%td', '%d', '%tw', '%tm', '%tq', '%th', '%ty') date_cols = varlist_df['format'].apply(lambda x: x.startswith(date_fmts)) date_cols = varlist_df[date_cols]['name'].values.tolist() start_cols['date_cols'] = date_cols int_cols = varlist_df['dtype'].apply(lambda x: np.issubdtype( x, np.integer) if inspect.isclass(x) else False) int_cols = varlist_df[int_cols]['name'].values.tolist() int_cols = sorted(list(set(int_cols) - set(date_cols))) start_cols['int_cols'] = int_cols regex = r'%.+s' str_cols = varlist_df['format'].apply(lambda x: bool(re.search(regex, x))) str_cols = varlist_df[str_cols]['name'].values.tolist() start_cols['str_cols'] = str_cols float_cols = varlist_df['dtype'].apply(lambda x: np.issubdtype( x, np.floating) if inspect.isclass(x) else False) float_cols = varlist_df[float_cols]['name'].values.tolist() start_cols['float_cols'] = float_cols end_cols = { 'date_cols': start_cols['date_cols'], 'int_cols': { 'names': start_cols['int_cols'], 'min': {key: None for key in start_cols['int_cols']}, 'max': {key: None for key in start_cols['int_cols']} }, 'float_cols': start_cols['float_cols'] } if categorical: end_cols['cat_cols'] = { 'names': start_cols['str_cols'], 'cats': {key: set() for key in start_cols['str_cols']} } end_cols['str_cols'] = [] else: end_cols['cat_cols'] = {} end_cols['str_cols'] = start_cols['str_cols'] tokeep = [] tokeep.extend(start_cols['int_cols']) if categorical: tokeep.extend(start_cols['str_cols']) itr = pd.read_stata(infile, columns=tokeep, chunksize=chunksize) i = 0 for df in itr: i += 1 print(f'Scanning group {i} of data') # Integer vars: int_cols = end_cols['int_cols']['names'].copy() for col in int_cols: # Check missings if df.loc[:, col].isnull().values.any(): # If missings, convert to float end_cols['float_cols'].append(col) end_cols['int_cols']['names'].remove(col) end_cols['int_cols']['max'].pop(col) end_cols['int_cols']['min'].pop(col) else: # Check minimum minval = min(df.loc[:, col]) if end_cols['int_cols']['min'][col] is None: end_cols['int_cols']['min'][col] = minval elif minval < end_cols['int_cols']['min'][col]: end_cols['int_cols']['min'][col] = minval # Check maximum maxval = max(df.loc[:, col]) if end_cols['int_cols']['max'][col] is None: end_cols['int_cols']['max'][col] = maxval elif maxval > end_cols['int_cols']['max'][col]: end_cols['int_cols']['max'][col] = maxval if categorical: # Scan str vars for categories cat_cols = end_cols['cat_cols']['names'].copy() for col in cat_cols: num_unique_values = len(df[col].unique()) num_total_values = len(df[col]) if num_unique_values / num_total_values < cat_threshold: # Then stays as category # Add category values unique_vals = df[col].unique().tolist() end_cols['cat_cols']['cats'][col].update(unique_vals) else: print(f'{col} is now a string') # Becomes regular string column end_cols['str_cols'].append(col) end_cols['cat_cols']['cats'].pop(col) end_cols['cat_cols']['names'].remove(col) # Not currently scanning date or float vars dtypes_dict = {} # Int dtypes: for col in end_cols['int_cols']['names']: if unsigned and (end_cols['int_cols']['min'][col] >= 0): if end_cols['int_cols']['max'][col] <= np.iinfo(np.uint8).max: dtypes_dict[col] = np.uint8 elif end_cols['int_cols']['max'][col] <= np.iinfo(np.uint16).max: dtypes_dict[col] = np.uint16 elif end_cols['int_cols']['max'][col] <= np.iinfo(np.uint32).max: dtypes_dict[col] = np.uint32 elif end_cols['int_cols']['max'][col] <= np.iinfo(np.uint64).max: dtypes_dict[col] = np.uint64 else: if False: pass elif ((end_cols['int_cols']['max'][col] <= np.iinfo(np.int8).max) & (end_cols['int_cols']['min'][col] >= np.iinfo(np.int8).min)): dtypes_dict[col] = np.int8 elif ( (end_cols['int_cols']['max'][col] <= np.iinfo(np.int16).max) & (end_cols['int_cols']['min'][col] >= np.iinfo(np.int16).min)): dtypes_dict[col] = np.int16 elif ( (end_cols['int_cols']['max'][col] <= np.iinfo(np.int32).max) & (end_cols['int_cols']['min'][col] >= np.iinfo(np.int32).min)): dtypes_dict[col] = np.int32 elif ( (end_cols['int_cols']['max'][col] <= np.iinfo(np.int64).max) & (end_cols['int_cols']['min'][col] >= np.iinfo(np.int64).min)): dtypes_dict[col] = np.int64 for col in end_cols['float_cols']: dtypes_dict[col] = np.float64 if categorical: for col in end_cols['cat_cols']['names']: dtypes_dict[col] = CategoricalDtype( end_cols['cat_cols']['cats'][col]) return dtypes_dict
comp = dict(zlib=True, complevel=4, fletcher32=True, _FillValue=np.finfo("float32").max) encoding = { var: comp for var in to_save_ds.data_vars if var not in ["platform_id", "sonde_id", "alt_bnds"] } encoding["launch_time"] = { "units": "seconds since 2020-01-01", "dtype": "int32" } encoding["interpolated_time"] = { "units": "seconds since 2020-01-01", "dtype": "int32", "_FillValue": np.iinfo("int32").max, } for key in dicts.nc_global_attrs.keys(): to_save_ds.attrs[key] = dicts.nc_global_attrs[key] to_save_ds.to_netcdf(save_directory + file_name, mode="w", format="NETCDF4", encoding=encoding) # %% # %%
def infer_exact(tester, pf, tensor_shape, batch_size, input_dtype, output0_dtype, output1_dtype, output0_raw=True, output1_raw=True, model_version=None, swap=False, outputs=("OUTPUT0", "OUTPUT1"), use_http=True, use_grpc=True, use_http_json_tensors=True, skip_request_id_check=False, use_streaming=True, correlation_id=0, shm_region_names=None, precreated_shm_regions=None, use_system_shared_memory=False, use_cuda_shared_memory=False, priority=0, timeout_us=0): tester.assertTrue(use_http or use_http_json_tensors or use_grpc or use_streaming) configs = [] if use_http: configs.append(("localhost:8000", "http", False, True)) if output0_raw == output1_raw: # Float16 not supported for Input and Output via JSON if use_http_json_tensors and (input_dtype != np.float16) and \ (output0_dtype != np.float16) and (output1_dtype != np.float16): configs.append(("localhost:8000", "http", False, False)) if use_grpc: configs.append(("localhost:8001", "grpc", False, False)) if use_streaming: configs.append(("localhost:8001", "grpc", True, False)) # outputs are sum and difference of inputs so set max input # values so that they will not overflow the output. This # allows us to do an exact match. For float types use 8, 16, # 32 int range for fp 16, 32, 64 respectively. When getting # class outputs the result value/probability is returned as a # float so must use fp32 range in that case. rinput_dtype = _range_repr_dtype(input_dtype) routput0_dtype = _range_repr_dtype( output0_dtype if output0_raw else np.float32) routput1_dtype = _range_repr_dtype( output1_dtype if output1_raw else np.float32) val_min = max( np.iinfo(rinput_dtype).min, np.iinfo(routput0_dtype).min, np.iinfo(routput1_dtype).min) / 2 val_max = min( np.iinfo(rinput_dtype).max, np.iinfo(routput0_dtype).max, np.iinfo(routput1_dtype).max) / 2 num_classes = 3 input0_array = np.random.randint(low=val_min, high=val_max, size=tensor_shape, dtype=rinput_dtype) input1_array = np.random.randint(low=val_min, high=val_max, size=tensor_shape, dtype=rinput_dtype) if input_dtype != np.object: input0_array = input0_array.astype(input_dtype) input1_array = input1_array.astype(input_dtype) if not swap: output0_array = input0_array + input1_array output1_array = input0_array - input1_array else: output0_array = input0_array - input1_array output1_array = input0_array + input1_array if output0_dtype == np.object: output0_array = np.array([ unicode(str(x), encoding='utf-8') for x in (output0_array.flatten()) ], dtype=object).reshape(output0_array.shape) else: output0_array = output0_array.astype(output0_dtype) if output1_dtype == np.object: output1_array = np.array([ unicode(str(x), encoding='utf-8') for x in (output1_array.flatten()) ], dtype=object).reshape(output1_array.shape) else: output1_array = output1_array.astype(output1_dtype) if input_dtype == np.object: in0n = np.array( [str(x) for x in input0_array.reshape(input0_array.size)], dtype=object) input0_array = in0n.reshape(input0_array.shape) in1n = np.array( [str(x) for x in input1_array.reshape(input1_array.size)], dtype=object) input1_array = in1n.reshape(input1_array.shape) # prepend size of string to output string data if output0_dtype == np.object: if batch_size == 1: output0_array_tmp = serialize_byte_tensor_list([output0_array]) else: output0_array_tmp = serialize_byte_tensor_list(output0_array) else: output0_array_tmp = output0_array if output1_dtype == np.object: if batch_size == 1: output1_array_tmp = serialize_byte_tensor_list([output1_array]) else: output1_array_tmp = serialize_byte_tensor_list(output1_array) else: output1_array_tmp = output1_array OUTPUT0 = "OUTPUT0" OUTPUT1 = "OUTPUT1" INPUT0 = "INPUT0" INPUT1 = "INPUT1" if pf == "libtorch" or pf == "libtorch_nobatch": OUTPUT0 = "OUTPUT__0" OUTPUT1 = "OUTPUT__1" INPUT0 = "INPUT__0" INPUT1 = "INPUT__1" output0_byte_size = sum([o0.nbytes for o0 in output0_array_tmp]) output1_byte_size = sum([o1.nbytes for o1 in output1_array_tmp]) if batch_size == 1: input0_list = [input0_array] input1_list = [input1_array] else: input0_list = [x for x in input0_array] input1_list = [x for x in input1_array] # Serialization of string tensors in the case of shared memory must be done manually if input_dtype == np.object: input0_list_tmp = serialize_byte_tensor_list(input0_list) input1_list_tmp = serialize_byte_tensor_list(input1_list) else: input0_list_tmp = input0_list input1_list_tmp = input1_list input0_byte_size = sum([i0.nbytes for i0 in input0_list_tmp]) input1_byte_size = sum([i1.nbytes for i1 in input1_list_tmp]) # Create system/cuda shared memory regions if needed shm_regions, shm_handles = su.create_set_shm_regions( input0_list_tmp, input1_list_tmp, output0_byte_size, output1_byte_size, outputs, shm_region_names, precreated_shm_regions, use_system_shared_memory, use_cuda_shared_memory) if model_version is not None: model_version = str(model_version) else: model_version = "" # Run inference and check results for each config for config in configs: model_name = tu.get_model_name(pf, input_dtype, output0_dtype, output1_dtype) if config[1] == "http": triton_client = httpclient.InferenceServerClient(config[0], verbose=True) else: triton_client = grpcclient.InferenceServerClient(config[0], verbose=True) inputs = [] if config[1] == "http": inputs.append( httpclient.InferInput(INPUT0, tensor_shape, np_to_triton_dtype(input_dtype))) inputs.append( httpclient.InferInput(INPUT1, tensor_shape, np_to_triton_dtype(input_dtype))) else: inputs.append( grpcclient.InferInput(INPUT0, tensor_shape, np_to_triton_dtype(input_dtype))) inputs.append( grpcclient.InferInput(INPUT1, tensor_shape, np_to_triton_dtype(input_dtype))) if not (use_cuda_shared_memory or use_system_shared_memory): if config[1] == "http": inputs[0].set_data_from_numpy(input0_array, binary_data=config[3]) inputs[1].set_data_from_numpy(input1_array, binary_data=config[3]) else: inputs[0].set_data_from_numpy(input0_array) inputs[1].set_data_from_numpy(input1_array) else: # Register necessary shared memory regions/handles su.register_add_shm_regions(inputs, outputs, shm_regions, precreated_shm_regions, shm_handles, input0_byte_size, input1_byte_size, output0_byte_size, output1_byte_size, use_system_shared_memory, use_cuda_shared_memory, triton_client) if batch_size == 1: expected0_sort_idx = [ np.flip(np.argsort(x.flatten()), 0) for x in output0_array.reshape((1, ) + tensor_shape) ] expected1_sort_idx = [ np.flip(np.argsort(x.flatten()), 0) for x in output1_array.reshape((1, ) + tensor_shape) ] else: expected0_sort_idx = [ np.flip(np.argsort(x.flatten()), 0) for x in output0_array.reshape(tensor_shape) ] expected1_sort_idx = [ np.flip(np.argsort(x.flatten()), 0) for x in output1_array.reshape(tensor_shape) ] # Force binary_data = False for shared memory and class output_req = [] i = 0 if "OUTPUT0" in outputs: if len(shm_regions) != 0: if config[1] == "http": output_req.append( httpclient.InferRequestedOutput(OUTPUT0, binary_data=False)) else: output_req.append(grpcclient.InferRequestedOutput(OUTPUT0)) output_req[-1].set_shared_memory(shm_regions[2] + '_data', output0_byte_size) else: if output0_raw: if config[1] == "http": output_req.append( httpclient.InferRequestedOutput( OUTPUT0, binary_data=config[3])) else: output_req.append( grpcclient.InferRequestedOutput(OUTPUT0)) else: if config[1] == "http": output_req.append( httpclient.InferRequestedOutput( OUTPUT0, binary_data=False, class_count=num_classes)) else: output_req.append( grpcclient.InferRequestedOutput( OUTPUT0, class_count=num_classes)) i += 1 if "OUTPUT1" in outputs: if len(shm_regions) != 0: if config[1] == "http": output_req.append( httpclient.InferRequestedOutput(OUTPUT1, binary_data=False)) else: output_req.append(grpcclient.InferRequestedOutput(OUTPUT1)) output_req[-1].set_shared_memory(shm_regions[2 + i] + '_data', output1_byte_size) else: if output1_raw: if config[1] == "http": output_req.append( httpclient.InferRequestedOutput( OUTPUT1, binary_data=config[3])) else: output_req.append( grpcclient.InferRequestedOutput(OUTPUT1)) else: if config[1] == "http": output_req.append( httpclient.InferRequestedOutput( OUTPUT1, binary_data=False, class_count=num_classes)) else: output_req.append( grpcclient.InferRequestedOutput( OUTPUT1, class_count=num_classes)) if config[2]: user_data = UserData() triton_client.start_stream(partial(completion_callback, user_data)) try: results = triton_client.async_stream_infer( model_name, inputs, model_version=model_version, outputs=output_req, request_id=str(_unique_request_id())) except Exception as e: triton_client.stop_stream() raise e triton_client.stop_stream() (results, error) = user_data._completed_requests.get() if error is not None: raise error else: results = triton_client.infer(model_name, inputs, model_version=model_version, outputs=output_req, request_id=str(_unique_request_id())) last_response = results.get_response() if not skip_request_id_check: global _seen_request_ids if config[1] == "http": request_id = int(last_response["id"]) else: request_id = int(last_response.id) tester.assertFalse(request_id in _seen_request_ids, "request_id: {}".format(request_id)) _seen_request_ids.add(request_id) if config[1] == "http": response_model_name = last_response["model_name"] if model_version != "": response_model_version = last_response["model_version"] response_outputs = last_response["outputs"] else: response_model_name = last_response.model_name if model_version != "": response_model_version = last_response.model_version response_outputs = last_response.outputs tester.assertEqual(response_model_name, model_name) if model_version != "": tester.assertEqual(str(response_model_version), model_version) tester.assertEqual(len(response_outputs), len(outputs)) for result in response_outputs: if config[1] == "http": result_name = result["name"] else: result_name = result.name if ((result_name == OUTPUT0 and output0_raw) or (result_name == OUTPUT1 and output1_raw)): if use_system_shared_memory or use_cuda_shared_memory: if result_name == OUTPUT0: shm_handle = shm_handles[2] else: shm_handle = shm_handles[3] output = results.get_output(result_name) if config[1] == "http": output_datatype = output['datatype'] output_shape = output['shape'] else: output_datatype = output.datatype output_shape = output.shape output_dtype = triton_to_np_dtype(output_datatype) if use_system_shared_memory: output_data = shm.get_contents_as_numpy( shm_handle, output_dtype, output_shape) elif use_cuda_shared_memory: output_data = cudashm.get_contents_as_numpy( shm_handle, output_dtype, output_shape) else: output_data = results.as_numpy(result_name) if (output_data.dtype == np.object) and (config[3] == False): output_data = output_data.astype(np.bytes_) if result_name == OUTPUT0: tester.assertTrue( np.array_equal(output_data, output0_array), "{}, {} expected: {}, got {}".format( model_name, OUTPUT0, output0_array, output_data)) elif result_name == OUTPUT1: tester.assertTrue( np.array_equal(output_data, output1_array), "{}, {} expected: {}, got {}".format( model_name, OUTPUT1, output1_array, output_data)) else: tester.assertTrue( False, "unexpected raw result {}".format(result_name)) else: for b in range(batch_size): # num_classes values must be returned and must # match expected top values if "nobatch" in pf: class_list = results.as_numpy(result_name) else: class_list = results.as_numpy(result_name)[b] tester.assertEqual(len(class_list), num_classes) if batch_size == 1: expected0_flatten = output0_array.flatten() expected1_flatten = output1_array.flatten() else: expected0_flatten = output0_array[b].flatten() expected1_flatten = output1_array[b].flatten() for idx, class_label in enumerate(class_list): # can't compare indices since could have different # indices with the same value/prob, so check that # the value of each index equals the expected value. # Only compare labels when the indices are equal. if type(class_label) == str: ctuple = class_label.split(':') else: ctuple = "".join(chr(x) for x in class_label).split(':') cval = float(ctuple[0]) cidx = int(ctuple[1]) if result_name == OUTPUT0: tester.assertEqual(cval, expected0_flatten[cidx]) tester.assertEqual( cval, expected0_flatten[expected0_sort_idx[b][idx]]) if cidx == expected0_sort_idx[b][idx]: tester.assertEqual( ctuple[2], 'label{}'.format( expected0_sort_idx[b][idx])) elif result_name == OUTPUT1: tester.assertEqual(cval, expected1_flatten[cidx]) tester.assertEqual( cval, expected1_flatten[expected1_sort_idx[b][idx]]) else: tester.assertTrue( False, "unexpected class result {}".format( result_name)) # Unregister system/cuda shared memory regions if they exist su.unregister_cleanup_shm_regions(shm_regions, shm_handles, precreated_shm_regions, outputs, use_system_shared_memory, use_cuda_shared_memory) return results
from warnings import warn import locale import numpy as np import numba import scipy.sparse from pynndescent.sparse import sparse_mul, sparse_diff, sparse_sum from pynndescent.utils import tau_rand_int, norm import joblib locale.setlocale(locale.LC_NUMERIC, "C") # Used for a floating point "nearly zero" comparison EPS = 1e-8 INT32_MIN = np.iinfo(np.int32).min + 1 INT32_MAX = np.iinfo(np.int32).max - 1 RandomProjectionTreeNode = namedtuple( "RandomProjectionTreeNode", [ "graph_indices", "is_leaf", "hyperplane", "offset", "left_child", "right_child" ], ) FlatTree = namedtuple( "FlatTree", ["hyperplanes", "offsets", "children", "indices", "leaf_size"]) dense_hyperplane_type = numba.float32[::1] sparse_hyperplane_type = numba.float64[:, ::1]
def infer_shape_tensor(tester, pf, tensor_dtype, input_shape_values, dummy_input_shapes, use_http=True, use_grpc=True, use_streaming=True, shm_suffix="", use_system_shared_memory=False, use_cuda_shared_memory=False, priority=0, timeout_us=0, batch_size=1): tester.assertTrue(use_http or use_grpc or use_streaming) tester.assertTrue(pf == "plan" or pf == "plan_nobatch") tester.assertEqual(len(input_shape_values), len(dummy_input_shapes)) if use_system_shared_memory and use_cuda_shared_memory: raise ValueError( "Cannot set both System and CUDA shared memory flags to 1") configs = [] if use_http: configs.append(("localhost:8000", "http", False)) if use_grpc: configs.append(("localhost:8001", "grpc", False)) if use_streaming: configs.append(("localhost:8001", "grpc", True)) io_cnt = len(input_shape_values) # FIXME wrap up shm handle cleanup # For (cuda) shared memory, it's only set for shape tensor for simplicity. # Regular tensor with (cuda) shared memory should be well-tested in other # tests. # item is (handle, byte_size, is_cuda) input_shm_handle_list = [] output_shm_handle_list = [] dummy_input_list = [] input_list = [] expected_dict = dict() # Prepare IO in advance for io_num in range(io_cnt): dummy_input_name = "DUMMY_INPUT{}".format(io_num) input_name = "INPUT{}".format(io_num) dummy_output_name = "DUMMY_OUTPUT{}".format(io_num) output_name = "OUTPUT{}".format(io_num) # Prepare the dummy tensor rtensor_dtype = _range_repr_dtype(tensor_dtype) if (rtensor_dtype != np.bool): dummy_in0 = np.random.randint(low=np.iinfo(rtensor_dtype).min, high=np.iinfo(rtensor_dtype).max, size=dummy_input_shapes[io_num], dtype=rtensor_dtype) else: dummy_in0 = np.random.choice(a=[False, True], size=dummy_input_shapes[io_num]) if tensor_dtype != np.object: dummy_in0 = dummy_in0.astype(tensor_dtype) else: dummy_in0 = np.array([str(x) for x in dummy_in0.flatten()], dtype=object).reshape(dummy_in0.shape) dummy_input_list.append(dummy_in0) # Prepare shape input tensor in0 = np.asarray(input_shape_values[io_num], dtype=np.int32) input_list.append(in0) # Prepare the expected value for the output. Skip dummy output as we # only care about its shape (== value of OUTPUT*) expected_dict[output_name] = np.ndarray.copy(in0) # Only need to create region once input_byte_size = in0.size * np.dtype(np.int32).itemsize output_byte_size = input_byte_size * batch_size if use_system_shared_memory: input_shm_handle_list.append( (shm.create_shared_memory_region(input_name + shm_suffix, '/' + input_name + shm_suffix, input_byte_size), input_byte_size, False)) output_shm_handle_list.append((shm.create_shared_memory_region( output_name + shm_suffix, '/' + output_name + shm_suffix, output_byte_size), output_byte_size, False)) shm.set_shared_memory_region(input_shm_handle_list[-1][0], [ in0, ]) elif use_cuda_shared_memory: input_shm_handle_list.append( (cudashm.create_shared_memory_region(input_name + shm_suffix, input_byte_size, 0), input_byte_size, True)) output_shm_handle_list.append( (cudashm.create_shared_memory_region(output_name + shm_suffix, output_byte_size, 0), output_byte_size, True)) cudashm.set_shared_memory_region(input_shm_handle_list[-1][0], [ in0, ]) model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype) # Run inference and check results for each config for config in configs: client_utils = grpcclient if config[1] == "grpc" else httpclient triton_client = client_utils.InferenceServerClient(config[0], verbose=True) inputs = [] outputs = [] # Set IOs for io_num in range(io_cnt): dummy_input_name = "DUMMY_INPUT{}".format(io_num) input_name = "INPUT{}".format(io_num) dummy_output_name = "DUMMY_OUTPUT{}".format(io_num) output_name = "OUTPUT{}".format(io_num) inputs.append( client_utils.InferInput(dummy_input_name, dummy_input_shapes[io_num], np_to_triton_dtype(tensor_dtype))) inputs.append( client_utils.InferInput(input_name, input_list[io_num].shape, "INT32")) outputs.append( client_utils.InferRequestedOutput(dummy_output_name)) outputs.append(client_utils.InferRequestedOutput(output_name)) # -2: dummy; -1: input inputs[-2].set_data_from_numpy(dummy_input_list[io_num]) if (not use_system_shared_memory) and (not use_cuda_shared_memory): inputs[-1].set_data_from_numpy(input_list[io_num]) else: input_byte_size = input_shm_handle_list[io_num][1] output_byte_size = output_shm_handle_list[io_num][1] if use_system_shared_memory: triton_client.register_system_shared_memory( input_name + shm_suffix, "/" + input_name + shm_suffix, input_byte_size) triton_client.register_system_shared_memory( output_name + shm_suffix, "/" + output_name + shm_suffix, output_byte_size) else: triton_client.register_cuda_shared_memory( input_name + shm_suffix, cudashm.get_raw_handle( input_shm_handle_list[io_num][0]), 0, input_byte_size) triton_client.register_cuda_shared_memory( output_name + shm_suffix, cudashm.get_raw_handle( output_shm_handle_list[io_num][0]), 0, output_byte_size) inputs[-1].set_shared_memory(input_name + shm_suffix, input_byte_size) outputs[-1].set_shared_memory(output_name + shm_suffix, output_byte_size) if config[2]: user_data = UserData() triton_client.start_stream(partial(completion_callback, user_data)) try: results = triton_client.async_stream_infer(model_name, inputs, outputs=outputs, priority=priority, timeout=timeout_us) except Exception as e: triton_client.stop_stream() raise e triton_client.stop_stream() (results, error) = user_data._completed_requests.get() if error is not None: raise error else: results = triton_client.infer(model_name, inputs, outputs=outputs, priority=priority, timeout=timeout_us) for io_num in range(io_cnt): output_name = "OUTPUT{}".format(io_num) dummy_output_name = "DUMMY_OUTPUT{}".format(io_num) expected = expected_dict[output_name] # get outputs as numpy array dummy_out = results.as_numpy(dummy_output_name) if (not use_system_shared_memory) and (not use_cuda_shared_memory): out = results.as_numpy(output_name) else: output = results.get_output(output_name) if config[1] == "grpc": output_shape = output.shape else: output_shape = output["shape"] if use_system_shared_memory: out = shm.get_contents_as_numpy( output_shm_handle_list[io_num][0], np.int32, output_shape) else: out = cudashm.get_contents_as_numpy( output_shm_handle_list[io_num][0], np.int32, output_shape) # if out shape is 2D, it is batched if (len(out.shape) == 2): # The shape of the dummy output should be equal to the shape values # specified in the shape tensor tester.assertTrue( np.array_equal(dummy_out.shape[1:], out[0]), "{}, {} shape, expected: {}, got {}".format( model_name, dummy_output_name, out[0], dummy_out.shape[1:])) for b in range(1, out.shape[0]): tester.assertTrue( np.array_equal(out[b - 1], out[b]), "expect shape tensor has consistent value, " "expected: {}, got {}".format(out[b - 1], out[b])) out = out[0] else: tester.assertTrue( np.array_equal(dummy_out.shape, out), "{}, {} shape, expected: {}, got {}".format( model_name, dummy_output_name, out, dummy_out.shape)) tester.assertTrue( np.array_equal(out, expected), "{}, {}, expected: {}, got {}".format(model_name, output_name, expected, out)) # unregister shared memory region for next config if use_system_shared_memory: triton_client.unregister_system_shared_memory(input_name + shm_suffix) triton_client.unregister_system_shared_memory(output_name + shm_suffix) elif use_cuda_shared_memory: triton_client.unregister_cuda_shared_memory(input_name + shm_suffix) triton_client.unregister_cuda_shared_memory(output_name + shm_suffix) for handle in input_shm_handle_list: if (handle[2]): cudashm.destroy_shared_memory_region(handle[0]) else: shm.destroy_shared_memory_region(handle[0]) for handle in output_shm_handle_list: if (handle[2]): cudashm.destroy_shared_memory_region(handle[0]) else: shm.destroy_shared_memory_region(handle[0])
def software_he_veto(records, to_pe, chunk_end, area_threshold=int(1e5), veto_length=int(3e6), veto_res=int(1e3), pass_veto_fraction=0.01, pass_veto_extend=3, max_veto_value=None): """Veto veto_length (time in ns) after peaks larger than area_threshold (in PE). Further large peaks inside the veto regions are still passed: We sum the waveform inside the veto region (with time resolution veto_res in ns) and pass regions within pass_veto_extend samples of samples with amplitude above pass_veto_fraction times the maximum. :returns: (preserved records, vetoed records, veto intervals). :param records: PMT records :param to_pe: ADC to PE conversion factors for the channels in records. :param chunk_end: Endtime of chunk to set as maximum ceiling for the veto period :param area_threshold: Minimum peak area to trigger the veto. Note we use a much rougher clustering than in later processing. :param veto_length: Time in ns to veto after the peak :param veto_res: Resolution of the sum waveform inside the veto region. Do not make too large without increasing integer type in some strax dtypes... :param pass_veto_fraction: fraction of maximum sum waveform amplitude to trigger veto passing of further peaks :param pass_veto_extend: samples to extend (left and right) the pass veto regions. :param max_veto_value: if not None, pass peaks that exceed this area no matter what. """ veto_res = int(veto_res) if veto_res > np.iinfo(np.int16).max: raise ValueError("Veto resolution does not fit 16-bit int") veto_length = np.ceil(veto_length / veto_res).astype(np.int64) * veto_res veto_n = int(veto_length / veto_res) + 1 # 1. Find large peaks in the data. # This will actually return big agglomerations of peaks and their tails peaks = strax.find_peaks(records, to_pe, gap_threshold=1, left_extension=0, right_extension=0, min_channels=100, min_area=area_threshold, result_dtype=strax.peak_dtype( n_channels=len(to_pe), n_sum_wv_samples=veto_n)) # 2a. Set 'candidate regions' at these peaks. These should: # - Have a fixed maximum length (else we can't use the strax hitfinder on them) # - Never extend beyond the current chunk # - Do not overlap veto_start = peaks['time'] veto_end = np.clip(peaks['time'] + veto_length, None, chunk_end) veto_end[:-1] = np.clip(veto_end[:-1], None, veto_start[1:]) # 2b. Convert these into strax record-like objects # Note the waveform is float32 though (it's a summed waveform) regions = np.zeros(len(veto_start), dtype=strax.interval_dtype + [ ("data", (np.float32, veto_n)), ("baseline", np.float32), ("baseline_rms", np.float32), ("reduction_level", np.int64), ("record_i", np.int64), ("pulse_length", np.int64), ]) regions['time'] = veto_start regions['length'] = (veto_end - veto_start) // veto_n regions['pulse_length'] = veto_n regions['dt'] = veto_res if not len(regions): # No veto anywhere in this data return records, records[:0], np.zeros(0, strax.hit_dtype) # 3. Find pass_veto regios with big peaks inside the veto regions. # For this we compute a rough sum waveform (at low resolution, # without looping over the pulse data) rough_sum(regions, records, to_pe, veto_n, veto_res) if max_veto_value is not None: pass_veto = strax.find_hits(regions, min_amplitude=max_veto_value) else: regions['data'] /= np.max(regions['data'], axis=1)[:, np.newaxis] pass_veto = strax.find_hits(regions, min_amplitude=pass_veto_fraction) # 4. Extend these by a few samples and inverse to find veto regions regions['data'] = 1 regions = strax.cut_outside_hits(regions, pass_veto, left_extension=pass_veto_extend, right_extension=pass_veto_extend) regions['data'] = 1 - regions['data'] veto = strax.find_hits(regions, min_amplitude=1) # Do not remove very tiny regions veto = veto[veto['length'] > 2 * pass_veto_extend] # 5. Apply the veto and return results veto_mask = strax.fully_contained_in(records, veto) == -1 return tuple(list(mask_and_not(records, veto_mask)) + [veto])
def convert(image, dtype, force_copy=False, uniform=False): """ Convert an image to the requested data-type. Warnings are issued in case of precision loss, or when negative values are clipped during conversion to unsigned integer types (sign loss). Floating point values are expected to be normalized and will be clipped to the range [0.0, 1.0] or [-1.0, 1.0] when converting to unsigned or signed integers respectively. Numbers are not shifted to the negative side when converting from unsigned to signed integer types. Negative values will be clipped when converting to unsigned integers. Parameters ---------- image : ndarray Input image. dtype : dtype Target data-type. force_copy : bool, optional Force a copy of the data, irrespective of its current dtype. uniform : bool, optional Uniformly quantize the floating point range to the integer range. By default (uniform=False) floating point values are scaled and rounded to the nearest integers, which minimizes back and forth conversion errors. References ---------- .. [1] DirectX data conversion rules. http://msdn.microsoft.com/en-us/library/windows/desktop/dd607323%28v=vs.85%29.aspx .. [2] Data Conversions. In "OpenGL ES 2.0 Specification v2.0.25", pp 7-8. Khronos Group, 2010. .. [3] Proper treatment of pixels as integers. A.W. Paeth. In "Graphics Gems I", pp 249-256. Morgan Kaufmann, 1990. .. [4] Dirty Pixels. J. Blinn. In "Jim Blinn's corner: Dirty Pixels", pp 47-57. Morgan Kaufmann, 1998. """ image = np.asarray(image) dtypeobj = np.dtype(dtype) dtypeobj_in = image.dtype dtype = dtypeobj.type dtype_in = dtypeobj_in.type if dtype_in == dtype: if force_copy: image = image.copy() return image if not (dtype_in in _supported_types and dtype in _supported_types): raise ValueError("can not convert %s to %s." % (dtypeobj_in, dtypeobj)) def sign_loss(): warn("Possible sign loss when converting negative image of type " "%s to positive image of type %s." % (dtypeobj_in, dtypeobj)) def prec_loss(): warn("Possible precision loss when converting from " "%s to %s" % (dtypeobj_in, dtypeobj)) def _dtype(itemsize, *dtypes): # Return first of `dtypes` with itemsize greater than `itemsize` return next(dt for dt in dtypes if itemsize < np.dtype(dt).itemsize) def _dtype2(kind, bits, itemsize=1): # Return dtype of `kind` that can store a `bits` wide unsigned int def compare(x, y, kind='u'): if kind == 'u': return x <= y else: return x < y s = next(i for i in (itemsize, ) + (2, 4, 8) if compare(bits, i * 8, kind=kind)) return np.dtype(kind + str(s)) def _scale(a, n, m, copy=True): # Scale unsigned/positive integers from n to m bits # Numbers can be represented exactly only if m is a multiple of n # Output array is of same kind as input. kind = a.dtype.kind if n > m and a.max() < 2**m: mnew = int(np.ceil(m / 2) * 2) if mnew > m: dtype = "int%s" % mnew else: dtype = "uint%s" % mnew n = int(np.ceil(n / 2) * 2) msg = ("Downcasting %s to %s without scaling because max " "value %s fits in %s" % (a.dtype, dtype, a.max(), dtype)) warn(msg) return a.astype(_dtype2(kind, m)) elif n == m: return a.copy() if copy else a elif n > m: # downscale with precision loss prec_loss() if copy: b = np.empty(a.shape, _dtype2(kind, m)) np.floor_divide(a, 2**(n - m), out=b, dtype=a.dtype, casting='unsafe') return b else: a //= 2**(n - m) return a elif m % n == 0: # exact upscale to a multiple of n bits if copy: b = np.empty(a.shape, _dtype2(kind, m)) np.multiply(a, (2**m - 1) // (2**n - 1), out=b, dtype=b.dtype) return b else: a = np.array(a, _dtype2(kind, m, a.dtype.itemsize), copy=False) a *= (2**m - 1) // (2**n - 1) return a else: # upscale to a multiple of n bits, # then downscale with precision loss prec_loss() o = (m // n + 1) * n if copy: b = np.empty(a.shape, _dtype2(kind, o)) np.multiply(a, (2**o - 1) // (2**n - 1), out=b, dtype=b.dtype) b //= 2**(o - m) return b else: a = np.array(a, _dtype2(kind, o, a.dtype.itemsize), copy=False) a *= (2**o - 1) // (2**n - 1) a //= 2**(o - m) return a kind = dtypeobj.kind kind_in = dtypeobj_in.kind itemsize = dtypeobj.itemsize itemsize_in = dtypeobj_in.itemsize if kind == 'b': # to binary image if kind_in in "fi": sign_loss() prec_loss() return image > dtype_in(dtype_range[dtype_in][1] / 2) if kind_in == 'b': # from binary image, to float and to integer result = image.astype(dtype) if kind != 'f': result *= dtype(dtype_range[dtype][1]) return result if kind in 'ui': imin = np.iinfo(dtype).min imax = np.iinfo(dtype).max if kind_in in 'ui': imin_in = np.iinfo(dtype_in).min imax_in = np.iinfo(dtype_in).max if kind_in == 'f': if np.min(image) < -1.0 or np.max(image) > 1.0: raise ValueError("Images of type float must be between -1 and 1.") if kind == 'f': # floating point -> floating point if itemsize_in > itemsize: prec_loss() return image.astype(dtype) # floating point -> integer prec_loss() # use float type that can represent output integer type image = np.array(image, _dtype(itemsize, dtype_in, np.float32, np.float64)) if not uniform: if kind == 'u': image *= imax else: image *= imax - imin image -= 1.0 image /= 2.0 np.rint(image, out=image) np.clip(image, imin, imax, out=image) elif kind == 'u': image *= imax + 1 np.clip(image, 0, imax, out=image) else: image *= (imax - imin + 1.0) / 2.0 np.floor(image, out=image) np.clip(image, imin, imax, out=image) return image.astype(dtype) if kind == 'f': # integer -> floating point if itemsize_in >= itemsize: prec_loss() # use float type that can exactly represent input integers image = np.array(image, _dtype(itemsize_in, dtype, np.float32, np.float64)) if kind_in == 'u': image /= imax_in # DirectX uses this conversion also for signed ints #if imin_in: # np.maximum(image, -1.0, out=image) else: image *= 2.0 image += 1.0 image /= imax_in - imin_in return image.astype(dtype) if kind_in == 'u': if kind == 'i': # unsigned integer -> signed integer image = _scale(image, 8 * itemsize_in, 8 * itemsize - 1) return image.view(dtype) else: # unsigned integer -> unsigned integer return _scale(image, 8 * itemsize_in, 8 * itemsize) if kind == 'u': # signed integer -> unsigned integer sign_loss() image = _scale(image, 8 * itemsize_in - 1, 8 * itemsize) result = np.empty(image.shape, dtype) np.maximum(image, 0, out=result, dtype=image.dtype, casting='unsafe') return result # signed integer -> signed integer if itemsize_in > itemsize: return _scale(image, 8 * itemsize_in - 1, 8 * itemsize - 1) image = image.astype(_dtype2('i', itemsize * 8)) image -= imin_in image = _scale(image, 8 * itemsize_in, 8 * itemsize, copy=False) image += imin return image.astype(dtype)
def infer_zero(tester, pf, batch_size, tensor_dtype, input_shapes, output_shapes, model_version=None, use_http=True, use_grpc=True, use_http_json_tensors=True, use_streaming=True, shm_region_name_prefix=None, use_system_shared_memory=False, use_cuda_shared_memory=False, priority=0, timeout_us=0): tester.assertTrue(use_http or use_grpc or use_http_json_tensors or use_streaming) configs = [] if use_http: configs.append(("localhost:8000", "http", False, True)) if use_http_json_tensors and (tensor_dtype != np.float16): configs.append(("localhost:8000", "http", False, False)) if use_grpc: configs.append(("localhost:8001", "grpc", False, False)) if use_streaming: configs.append(("localhost:8001", "grpc", True, False)) tester.assertEqual(len(input_shapes), len(output_shapes)) io_cnt = len(input_shapes) if shm_region_name_prefix is None: shm_region_name_prefix = ["input", "output"] input_dict = {} expected_dict = {} shm_ip_handles = list() shm_op_handles = list() for io_num in range(io_cnt): if pf == "libtorch" or pf == "libtorch_nobatch": input_name = "INPUT__{}".format(io_num) output_name = "OUTPUT__{}".format(io_num) else: input_name = "INPUT{}".format(io_num) output_name = "OUTPUT{}".format(io_num) input_shape = input_shapes[io_num] output_shape = output_shapes[io_num] rtensor_dtype = _range_repr_dtype(tensor_dtype) if (rtensor_dtype != np.bool): input_array = np.random.randint(low=np.iinfo(rtensor_dtype).min, high=np.iinfo(rtensor_dtype).max, size=input_shape, dtype=rtensor_dtype) else: input_array = np.random.choice(a=[False, True], size=input_shape) if tensor_dtype != np.object: input_array = input_array.astype(tensor_dtype) expected_array = np.ndarray.copy(input_array) else: expected_array = np.array([ unicode(str(x), encoding='utf-8') for x in input_array.flatten() ], dtype=object) input_array = np.array([str(x) for x in input_array.flatten()], dtype=object).reshape(input_array.shape) expected_array = expected_array.reshape(output_shape) expected_dict[output_name] = expected_array output_byte_size = expected_array.nbytes if batch_size == 1: input_list = [input_array] else: input_list = [x for x in input_array] # Serialization of string tensors in the case of shared memory must be done manually if tensor_dtype == np.object: input_list_tmp = serialize_byte_tensor_list(input_list) else: input_list_tmp = input_list input_byte_size = sum([ip.nbytes for ip in input_list_tmp]) # create and register shared memory region for inputs and outputs shm_io_handles = su.create_set_either_shm_region( [ shm_region_name_prefix[0] + str(io_num), shm_region_name_prefix[1] + str(io_num) ], input_list_tmp, input_byte_size, output_byte_size, use_system_shared_memory, use_cuda_shared_memory) if len(shm_io_handles) != 0: shm_ip_handles.append(shm_io_handles[0]) shm_op_handles.append(shm_io_handles[1]) input_dict[input_name] = input_array if model_version is not None: model_version = str(model_version) else: model_version = "" # Run inference and check results for each config for config in configs: model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype) if config[1] == "http": triton_client = httpclient.InferenceServerClient(config[0], verbose=True) else: triton_client = grpcclient.InferenceServerClient(config[0], verbose=True) inputs = [] output_req = [] for io_num, (input_name, output_name) in enumerate( zip(input_dict.keys(), expected_dict.keys())): input_data = input_dict[input_name] input_byte_size = input_data.nbytes output_byte_size = expected_dict[output_name].nbytes if config[1] == "http": inputs.append( httpclient.InferInput(input_name, input_data.shape, np_to_triton_dtype(tensor_dtype))) output_req.append( httpclient.InferRequestedOutput(output_name, binary_data=config[3])) else: inputs.append( grpcclient.InferInput(input_name, input_data.shape, np_to_triton_dtype(tensor_dtype))) output_req.append(grpcclient.InferRequestedOutput(output_name)) if not (use_cuda_shared_memory or use_system_shared_memory): if config[1] == "http": inputs[-1].set_data_from_numpy(input_data, binary_data=config[3]) else: inputs[-1].set_data_from_numpy(input_data) else: # Register necessary shared memory regions/handles su.register_add_either_shm_regions( inputs, output_req, shm_region_name_prefix, (shm_ip_handles, shm_op_handles), io_num, input_byte_size, output_byte_size, use_system_shared_memory, use_cuda_shared_memory, triton_client) if config[2]: user_data = UserData() triton_client.start_stream(partial(completion_callback, user_data)) try: results = triton_client.async_stream_infer( model_name, inputs, model_version=model_version, outputs=output_req, request_id=str(_unique_request_id()), priority=priority, timeout=timeout_us) except Exception as e: triton_client.stop_stream() raise e triton_client.stop_stream() (results, error) = user_data._completed_requests.get() if error is not None: raise error else: results = triton_client.infer(model_name, inputs, model_version=model_version, outputs=output_req, request_id=str(_unique_request_id()), priority=priority, timeout=timeout_us) last_response = results.get_response() if config[1] == "http": response_model_name = last_response["model_name"] if model_version != "": response_model_version = last_response["model_version"] response_outputs = last_response["outputs"] else: response_model_name = last_response.model_name if model_version != "": response_model_version = last_response.model_version response_outputs = last_response.outputs tester.assertEqual(response_model_name, model_name) if model_version != "": tester.assertEqual(response_model_version, model_version) tester.assertEqual(len(response_outputs), io_cnt) for result in response_outputs: if config[1] == "http": result_name = result["name"] else: result_name = result.name tester.assertTrue(result_name in expected_dict) if use_system_shared_memory or use_cuda_shared_memory: if pf == "libtorch" or pf == "libtorch_nobatch": io_num = int(result_name.split("OUTPUT__")[1]) else: io_num = int(result_name.split("OUTPUT")[1]) shm_handle = shm_op_handles[io_num] output = results.get_output(result_name) if config[1] == "http": output_datatype = output['datatype'] output_shape = output['shape'] else: output_datatype = output.datatype output_shape = output.shape output_dtype = triton_to_np_dtype(output_datatype) if use_system_shared_memory: output_data = shm.get_contents_as_numpy( shm_handle, output_dtype, output_shape) elif use_cuda_shared_memory: output_data = cudashm.get_contents_as_numpy( shm_handle, output_dtype, output_shape) else: output_data = results.as_numpy(result_name) if (output_data.dtype == np.object) and (config[3] == False): output_data = output_data.astype(np.bytes_) expected = expected_dict[result_name] tester.assertEqual(output_data.shape, expected.shape) tester.assertTrue( np.array_equal(output_data, expected), "{}, {}, expected: {}, got {}".format(model_name, result_name, expected, output_data)) if len(shm_ip_handles) != 0: for io_num in range(io_cnt): if use_cuda_shared_memory: triton_client.unregister_cuda_shared_memory( shm_region_name_prefix[0] + str(io_num) + '_data') triton_client.unregister_cuda_shared_memory( shm_region_name_prefix[0] + str(io_num) + '_data') cudashm.destroy_shared_memory_region(shm_ip_handles[io_num]) cudashm.destroy_shared_memory_region(shm_op_handles[io_num]) else: triton_client.unregister_system_shared_memory( shm_region_name_prefix[1] + str(io_num) + '_data') triton_client.unregister_system_shared_memory( shm_region_name_prefix[1] + str(io_num) + '_data') shm.destroy_shared_memory_region(shm_ip_handles[io_num]) shm.destroy_shared_memory_region(shm_op_handles[io_num]) return results
def setData(self, index, value, role=Qt.DisplayRole): """Set the value to the index position depending on Qt::ItemDataRole and data type of the column Args: index (QtCore.QModelIndex): Index to define column and row. value (object): new value. role (Qt::ItemDataRole): Use this role to specify what you want to do. Raises: TypeError: If the value could not be converted to a known datatype. Returns: True if value is changed. Calls layoutChanged after update. False if value is not different from original value. """ if not index.isValid() or not self.editable: return False if value != index.data(role): self.layoutAboutToBeChanged.emit() row = self._dataFrame.index[index.row()] col = self._dataFrame.columns[index.column()] #print 'before change: ', index.data().toUTC(), self._dataFrame.iloc[row][col] columnDtype = self._dataFrame[col].dtype if columnDtype == object: pass elif columnDtype in self._intDtypes: dtypeInfo = numpy.iinfo(columnDtype) if value < dtypeInfo.min: value = dtypeInfo.min elif value > dtypeInfo.max: value = dtypeInfo.max elif columnDtype in self._floatDtypes: value = numpy.float64(value).astype(columnDtype) elif columnDtype in self._boolDtypes: value = numpy.bool_(value) elif columnDtype in self._dateDtypes: # convert the given value to a compatible datetime object. # if the conversation could not be done, keep the original # value. if isinstance(value, QtCore.QDateTime): value = value.toString(self.timestampFormat) try: value = pandas.Timestamp(value) except Exception: raise Exception( "Can't convert '{0}' into a datetime".format(value)) # return False else: raise TypeError("try to set unhandled data type") self._dataFrame.set_value(row, col, value) #print 'after change: ', value, self._dataFrame.iloc[row][col] self.layoutChanged.emit() return True else: return False
def test_cummin_cummax(): # GH 15048 num_types = [np.int32, np.int64, np.float32, np.float64] num_mins = [ np.iinfo(np.int32).min, np.iinfo(np.int64).min, np.finfo(np.float32).min, np.finfo(np.float64).min ] num_max = [ np.iinfo(np.int32).max, np.iinfo(np.int64).max, np.finfo(np.float32).max, np.finfo(np.float64).max ] base_df = pd.DataFrame({ 'A': [1, 1, 1, 1, 2, 2, 2, 2], 'B': [3, 4, 3, 2, 2, 3, 2, 1] }) expected_mins = [3, 3, 3, 2, 2, 2, 2, 1] expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3] for dtype, min_val, max_val in zip(num_types, num_mins, num_max): df = base_df.astype(dtype) # cummin expected = pd.DataFrame({'B': expected_mins}).astype(dtype) result = df.groupby('A').cummin() tm.assert_frame_equal(result, expected) result = df.groupby('A').B.apply(lambda x: x.cummin()).to_frame() tm.assert_frame_equal(result, expected) # Test cummin w/ min value for dtype df.loc[[2, 6], 'B'] = min_val expected.loc[[2, 3, 6, 7], 'B'] = min_val result = df.groupby('A').cummin() tm.assert_frame_equal(result, expected) expected = df.groupby('A').B.apply(lambda x: x.cummin()).to_frame() tm.assert_frame_equal(result, expected) # cummax expected = pd.DataFrame({'B': expected_maxs}).astype(dtype) result = df.groupby('A').cummax() tm.assert_frame_equal(result, expected) result = df.groupby('A').B.apply(lambda x: x.cummax()).to_frame() tm.assert_frame_equal(result, expected) # Test cummax w/ max value for dtype df.loc[[2, 6], 'B'] = max_val expected.loc[[2, 3, 6, 7], 'B'] = max_val result = df.groupby('A').cummax() tm.assert_frame_equal(result, expected) expected = df.groupby('A').B.apply(lambda x: x.cummax()).to_frame() tm.assert_frame_equal(result, expected) # Test nan in some values base_df.loc[[0, 2, 4, 6], 'B'] = np.nan expected = pd.DataFrame( {'B': [np.nan, 4, np.nan, 2, np.nan, 3, np.nan, 1]}) result = base_df.groupby('A').cummin() tm.assert_frame_equal(result, expected) expected = (base_df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()) tm.assert_frame_equal(result, expected) expected = pd.DataFrame( {'B': [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]}) result = base_df.groupby('A').cummax() tm.assert_frame_equal(result, expected) expected = (base_df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()) tm.assert_frame_equal(result, expected) # Test nan in entire column base_df['B'] = np.nan expected = pd.DataFrame({'B': [np.nan] * 8}) result = base_df.groupby('A').cummin() tm.assert_frame_equal(expected, result) result = base_df.groupby('A').B.apply(lambda x: x.cummin()).to_frame() tm.assert_frame_equal(expected, result) result = base_df.groupby('A').cummax() tm.assert_frame_equal(expected, result) result = base_df.groupby('A').B.apply(lambda x: x.cummax()).to_frame() tm.assert_frame_equal(expected, result) # GH 15561 df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(['2001']))) expected = pd.Series(pd.to_datetime('2001'), index=[0], name='b') for method in ['cummax', 'cummin']: result = getattr(df.groupby('a')['b'], method)() tm.assert_series_equal(expected, result) # GH 15635 df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1])) result = df.groupby('a').b.cummax() expected = pd.Series([2, 1, 2], name='b') tm.assert_series_equal(result, expected) df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2])) result = df.groupby('a').b.cummin() expected = pd.Series([1, 2, 1], name='b') tm.assert_series_equal(result, expected)
def generate_test_data(dtype, size=SIZE, order="C"): return np.array( np.random.uniform(np.iinfo(dtype).min, np.iinfo(dtype).max, size).astype(dtype), order=order, )
def na_accum_func(values: ArrayLike, accum_func, skipna: bool) -> ArrayLike: """ Cumulative function with skipna support. Parameters ---------- values : np.ndarray or ExtensionArray accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minumum.accumulate} skipna : bool Returns ------- np.ndarray or ExtensionArray """ mask_a, mask_b = { np.cumprod: (1.0, np.nan), np.maximum.accumulate: (-np.inf, np.nan), np.cumsum: (0.0, np.nan), np.minimum.accumulate: (np.inf, np.nan), }[accum_func] # We will be applying this function to block values if values.dtype.kind in ["m", "M"]: # GH#30460, GH#29058 # numpy 1.18 started sorting NaTs at the end instead of beginning, # so we need to work around to maintain backwards-consistency. orig_dtype = values.dtype # We need to define mask before masking NaTs mask = isna(values) if accum_func == np.minimum.accumulate: # Note: the accum_func comparison fails as an "is" comparison y = values.view("i8") y[mask] = np.iinfo(np.int64).max changed = True else: y = values changed = False result = accum_func(y.view("i8"), axis=0) if skipna: result[mask] = iNaT elif accum_func == np.minimum.accumulate: # Restore NaTs that we masked previously nz = (~np.asarray(mask)).nonzero()[0] if len(nz): # everything up to the first non-na entry stays NaT result[: nz[0]] = iNaT if changed: # restore NaT elements y[mask] = iNaT # TODO: could try/finally for this? if isinstance(values, np.ndarray): result = result.view(orig_dtype) else: # DatetimeArray result = type(values)._from_sequence(result, dtype=orig_dtype) elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)): vals = values.copy() mask = isna(vals) vals[mask] = mask_a result = accum_func(vals, axis=0) result[mask] = mask_b else: result = accum_func(values, axis=0) return result
def env(): np.set_printoptions(linewidth=400, threshold=np.iinfo('int64').max, suppress=True) with buzz.Env(allow_complex_footprint=1): yield
def test_min_int(self): a = np.array([np.iinfo(np.int_).min], dtype=np.int_) # Should not raise: assert_allclose(a, a)
from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python import ipu from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest from tensorflow.python.training import gradient_descent # Error threshold for forward pass test. THRESHOLD = 0.03 # Dimensions of the random data tensor. DIMS = (1024, 1024, 4) # Initialise with a random seed. SEED = np.random.randint(np.iinfo(np.int32).max, size=[2], dtype=np.int32) # Number of times to verify output for a given seed. SEED_TEST_REPETITIONS = 6 def build_test_cases(exhaustive=False): # Dropout rate(s) to test. rate = [0.1, 0.5, 0.9] if exhaustive else [0.5] # User specified and non-specified cases. seed = [SEED, None] # Shape of the dropout. # Note that shaping the dropout such that a very large portion of # the input weights are dropped will fail the test criteria, as expected.
def setup_module(): """ A function with a 'magic name' executed automatically before each pytest module (file of tests) that helps reproduce a test segfault by setting and outputting the rng seeds. The segfault-debug procedure on a module called test_module.py is: 1. run "pytest --verbose test_module.py". A seg-faulting output might be: [INFO] np, mx and python random seeds = 4018804151 test_module.test1 ... ok test_module.test2 ... Illegal instruction (core dumped) 2. Copy the module-starting seed into the next command, then run: MXNET_MODULE_SEED=4018804151 pytest --logging-level=DEBUG --verbose test_module.py Output might be: [WARNING] **** module-level seed is set: all tests running deterministically **** [INFO] np, mx and python random seeds = 4018804151 test_module.test1 ... [DEBUG] np and mx random seeds = 3935862516 ok test_module.test2 ... [DEBUG] np and mx random seeds = 1435005594 Illegal instruction (core dumped) 3. Copy the segfaulting-test seed into the command: MXNET_TEST_SEED=1435005594 pytest --logging-level=DEBUG --verbose test_module.py:test2 Output might be: [INFO] np, mx and python random seeds = 2481884723 test_module.test2 ... [DEBUG] np and mx random seeds = 1435005594 Illegal instruction (core dumped) 3. Finally reproduce the segfault directly under gdb (might need additional os packages) by editing the bottom of test_module.py to be if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) test2() MXNET_TEST_SEED=1435005594 gdb -ex r --args python test_module.py 4. When finished debugging the segfault, remember to unset any exported MXNET_ seed variables in the environment to return to non-deterministic testing (a good thing). """ module_seed_str = os.getenv('MXNET_MODULE_SEED') logger = default_logger() if module_seed_str is None: seed = np.random.randint(0, np.iinfo(np.int32).max) else: seed = int(module_seed_str) logger.warn( '*** module-level seed is set: all tests running deterministically ***' ) logger.info( 'Setting module np/mx/python random seeds, use MXNET_MODULE_SEED=%s to reproduce.', seed) np.random.seed(seed) mx.random.seed(seed) random.seed(seed) # The MXNET_TEST_SEED environment variable will override MXNET_MODULE_SEED for tests with # the 'with_seed()' decoration. Inform the user of this once here at the module level. if os.getenv('MXNET_TEST_SEED') is not None: logger.warn( '*** test-level seed set: all "@with_seed()" tests run deterministically ***' )
def locate(raw_image, diameter, minmass=100., maxsize=None, separation=None, noise_size=1, smoothing_size=None, threshold=None, invert=False, percentile=64, topn=None, preprocess=True, max_iterations=10, filter_before=True, filter_after=True, characterize=True, engine='auto'): """Locate Gaussian-like blobs of some approximate size in an image. Preprocess the image by performing a band pass and a threshold. Locate all peaks of brightness, characterize the neighborhoods of the peaks and take only those with given total brightnesss ("mass"). Finally, refine the positions of each peak. Parameters ---------- image : image array (any dimensions) diameter : feature size in px This may be a single number or a tuple giving the feature's extent in each dimension, useful when the dimensions do not have equal resolution (e.g. confocal microscopy). The tuple order is the same as the image shape, conventionally (z, y, x) or (y, x). The number(s) must be odd integers. When in doubt, round up. minmass : minimum integrated brightness Default is 100, but a good value is often much higher. This is a crucial parameter for elminating spurious features. maxsize : maximum radius-of-gyration of brightness, default None separation : feature separation, in pixels Default is diameter + 1. May be a tuple, see diameter for details. noise_size : width of Gaussian blurring kernel, in pixels Default is 1. May be a tuple, see diameter for details. smoothing_size : size of boxcar smoothing, in pixels Default is diameter. May be a tuple, see diameter for details. threshold : Clip bandpass result below this value. Default None, passed through to bandpass. invert : Set to True if features are darker than background. False by default. percentile : Features must have a peak brighter than pixels in this percentile. This helps eliminate spurious peaks. topn : Return only the N brightest features above minmass. If None (default), return all features above minmass. Returns ------- DataFrame([x, y, mass, size, ecc, signal]) where mass means total integrated brightness of the blob, size means the radius of gyration of its Gaussian-like profile, and ecc is its eccentricity (1 is circular). Other Parameters ---------------- preprocess : Set to False to turn out bandpass preprocessing. max_iterations : integer max number of loops to refine the center of mass, default 10 filter_before : boolean Use minmass (and maxsize, if set) to eliminate spurious features based on their estimated mass and size before refining position. True by default for performance. filter_after : boolean Use final characterizations of mass and size to eliminate spurious features. True by default. characterize : boolean Compute "extras": eccentricity, signal, ep. True by default. engine : {'auto', 'python', 'numba'} See Also -------- batch : performs location on many images in batch Notes ----- Locate works with a coordinate system that has its origin at the center of pixel (0, 0). In almost all cases this will be the topleft pixel: the y-axis is pointing downwards. This is an implementation of the Crocker-Grier centroid-finding algorithm. [1]_ References ---------- .. [1] Crocker, J.C., Grier, D.G. http://dx.doi.org/10.1006/jcis.1996.0217 """ # Validate parameters and set defaults. raw_image = np.squeeze(raw_image) shape = raw_image.shape ndim = len(shape) diameter = validate_tuple(diameter, ndim) diameter = tuple([int(x) for x in diameter]) if not np.all([x & 1 for x in diameter]): raise ValueError("Feature diameter must be an odd integer. Round up.") radius = tuple([x//2 for x in diameter]) if separation is None: separation = tuple([x + 1 for x in diameter]) else: separation = validate_tuple(separation, ndim) if smoothing_size is None: smoothing_size = diameter else: smoothing_size = validate_tuple(smoothing_size, ndim) noise_size = validate_tuple(noise_size, ndim) # Don't do characterization for rectangular pixels/voxels if diameter[1:] != diameter[:-1]: characterize = False # Check whether the image looks suspiciously like a color image. if 3 in shape or 4 in shape: dim = raw_image.ndim warnings.warn("I am interpreting the image as {0}-dimensional. " "If it is actually a {1}-dimensional color image, " "convert it to grayscale first.".format(dim, dim-1)) if preprocess: if invert: # It is tempting to do this in place, but if it is called multiple # times on the same image, chaos reigns. if np.issubdtype(raw_image.dtype, np.integer): max_value = np.iinfo(raw_image.dtype).max raw_image = raw_image ^ max_value else: # To avoid degrading performance, assume gamut is zero to one. # Have you ever encountered an image of unnormalized floats? raw_image = 1 - raw_image image = bandpass(raw_image, noise_size, smoothing_size, threshold) else: image = raw_image.copy() # Coerce the image into integer type. Rescale to fill dynamic range. if np.issubdtype(raw_image.dtype, np.integer): dtype = raw_image.dtype else: dtype = np.uint8 image = scale_to_gamut(image, dtype) # Set up a DataFrame for the final results. if image.ndim < 4: coord_columns = ['x', 'y', 'z'][:image.ndim] else: coord_columns = map(lambda i: 'x' + str(i), range(image.ndim)) char_columns = ['mass'] if characterize: char_columns += ['size', 'ecc', 'signal'] columns = coord_columns + char_columns # The 'ep' column is joined on at the end, so we need this... if characterize: all_columns = columns + ['ep'] else: all_columns = columns # Find local maxima. # Define zone of exclusion at edges of image, avoiding # - Features with incomplete image data ("radius") # - Extended particles that cannot be explored during subpixel # refinement ("separation") # - Invalid output of the bandpass step ("smoothing_size") margin = tuple([max(rad, sep // 2 - 1, sm // 2) for (rad, sep, sm) in zip(radius, separation, smoothing_size)]) coords = local_maxima(image, radius, percentile, margin) count_maxima = coords.shape[0] if count_maxima == 0: return DataFrame(columns=all_columns) # Proactively filter based on estimated mass/size before # refining positions. if filter_before: approx_mass = np.empty(count_maxima) # initialize to avoid appending for i in range(count_maxima): approx_mass[i] = estimate_mass(image, radius, coords[i]) condition = approx_mass > minmass if maxsize is not None: approx_size = np.empty(count_maxima) for i in range(count_maxima): approx_size[i] = estimate_size(image, radius, coords[i], approx_mass[i]) condition &= approx_size < maxsize coords = coords[condition] count_qualified = coords.shape[0] if count_qualified == 0: warnings.warn("No maxima survived mass- and size-based prefiltering.") return DataFrame(columns=all_columns) # Refine their locations and characterize mass, size, etc. refined_coords = refine(raw_image, image, radius, coords, separation, max_iterations, engine, characterize) # Filter again, using final ("exact") mass -- and size, if set. MASS_COLUMN_INDEX = image.ndim SIZE_COLUMN_INDEX = image.ndim + 1 exact_mass = refined_coords[:, MASS_COLUMN_INDEX] if filter_after: condition = exact_mass > minmass if maxsize is not None: exact_size = refined_coords[:, SIZE_COLUMN_INDEX] condition &= exact_size < maxsize refined_coords = refined_coords[condition] exact_mass = exact_mass[condition] # used below by topn count_qualified = refined_coords.shape[0] if count_qualified == 0: warnings.warn("No maxima survived mass- and size-based filtering.") return DataFrame(columns=all_columns) if topn is not None and count_qualified > topn: if topn == 1: # special case for high performance and correct shape refined_coords = refined_coords[np.argmax(exact_mass)] refined_coords = refined_coords.reshape(1, -1) else: refined_coords = refined_coords[np.argsort(exact_mass)][-topn:] f = DataFrame(refined_coords, columns=columns) # Estimate the uncertainty in position using signal (measured in refine) # and noise (measured here below). if characterize: black_level, noise = uncertainty.measure_noise( raw_image, diameter, threshold) f['signal'] -= black_level ep = uncertainty.static_error(f, noise, diameter[0], noise_size[0]) f = f.join(ep) # If this is a pims Frame object, it has a frame number. # Tag it on; this is helpful for parallelization. if hasattr(raw_image, 'frame_no') and raw_image.frame_no is not None: f['frame'] = raw_image.frame_no return f
def clip_add(image1: np.ndarray, image2: np.ndarray, dtype: np.dtype = np.uint16): """Clip the image to the dtype extrema. Otherwise the bits will flip.""" return np.clip(image1 + image2, np.iinfo(dtype).min, np.iinfo(dtype).max).astype(dtype)