def generate_reverb(signal, reverb, fname, iter_range):
    """
    Adds reverb from the path reverb to the data in the path signal and saves it as fname. Applies reverb iteratively over
    iter_range
    :param signal: the filename for the stereo input signal
    :param reverb: the filename for the stereo impulse response
    :param fname: the output filename to save as
    :param iter_range: the max number of iterations to convolve with the signal
    :return:
    """
    sr, data = wav.read(signal)
    if data.dtype == np.dtype("int16"):
        data = data / float(np.iinfo(data.dtype).max)


    sr_ir, data_ir = wav.read(reverb)
    if data_ir.dtype == np.dtype("int16"):
        data_ir = data_ir / float(np.iinfo(data_ir.dtype).max)

    if sr_ir != sr:
        raise ValueError("Impulse Response must have same sample rate as signal")

    prev_data = data
    for i in xrange(0, iter_range+1):
        if i > 0:
            mix = add_reverb(prev_data.T, data_ir.T)
            prev_data = np.copy(mix).T
        else:
            mix = data.T
        if not os.path.exists(os.path.splitext(fname)[0]+'-'+str(i)+'.wav'):
            scipy.io.wavfile.write(os.path.splitext(fname)[0]+'-'+str(i)+'.wav', sr, mix.T)
Example #2
0
    def _finalize(self, dtype=np.uint8):
        """Finalize the image, that is put it in RGB mode, and set the channels
        in unsigned 8bit format ([0,255] range) (if the *dtype* doesn't say
        otherwise).
        """
        channels = []
        if self.mode == "P":
            self.convert("RGB")
        if self.mode == "PA":
            self.convert("RGBA")

        for chn in self.channels:
            if isinstance(chn, np.ma.core.MaskedArray):
                final_data = chn.data.clip(0, 1) * np.iinfo(dtype).max
            else:
                final_data = chn.clip(0, 1) * np.iinfo(dtype).max

            channels.append(np.ma.array(final_data,
                                        dtype,
                                        mask = np.ma.getmaskarray(chn)))
        if self.fill_value is not None:
            fill_value = [int(col * np.iinfo(dtype).max)
                          for col in self.fill_value]
        else:
            fill_value = None
        return channels, fill_value
Example #3
0
 def test_ldexp_overflow(self):
     # silence warning emitted on overflow
     with np.errstate(over="ignore"):
         imax = np.iinfo(np.dtype('l')).max
         imin = np.iinfo(np.dtype('l')).min
         assert_equal(ncu.ldexp(2., imax), np.inf)
         assert_equal(ncu.ldexp(2., imin), 0)
Example #4
0
def add_noise(sim):
    det_left = sim.outarr[:, :sim.nxpix]
    det_mid = sim.outarr[:, sim.nxpix:2*sim.nxpix]
    det_right = sim.outarr[:, 2*sim.nxpix:3*sim.nxpix]
    shape = det_left.shape

    det_left += det_bias(sim.dl_bias, det="left")
    det_mid += det_bias(sim.dm_bias, det="middle")
    det_right += det_bias(sim.dr_bias, det="right")

    det_left += readout_noise(sim.dl_ron, shape, det="left")
    det_mid += readout_noise(sim.dm_ron, shape, det="middle")
    det_right += readout_noise(sim.dr_ron, shape, det="right")

    det_left += dark_current(sim.dl_dc, sim.tobs, shape, det="left")
    det_mid += dark_current(sim.dm_dc, sim.tobs, shape, det="middle")
    det_right += dark_current(sim.dr_dc, sim.tobs, shape, det="right")

    sim.outarr = gain(sim.outarr, sim.inv_gain)

    if sim.outarr.max() > np.iinfo(np.uint16).max:
        log.info("Clipping array values larger than %s.", np.iinfo(np.uint16).max)
        sim.outarr[sim.outarr > np.iinfo(np.uint16).max] = np.iinfo(np.uint16).max
    sim.outarr = np.asarray(sim.outarr, dtype=np.uint16)    
    log.info("Converting image array back to %s.", sim.outarr.dtype)
    
Example #5
0
def able_int_type(values):
    """ Find the smallest integer numpy type to contain sequence `values`

    Prefers uint to int if minimum is >= 0

    Parameters
    ----------
    values : sequence
        sequence of integer values

    Returns
    -------
    itype : None or numpy type
        numpy integer type or None if no integer type holds all `values`

    Examples
    --------
    >>> able_int_type([0, 1]) == np.uint8
    True
    >>> able_int_type([-1, 1]) == np.int8
    True
    """
    if any([v % 1 for v in values]):
        return None
    mn = min(values)
    mx = max(values)
    if mn >= 0:
        for ityp in np.sctypes['uint']:
            if mx <= np.iinfo(ityp).max:
                return ityp
    for ityp in np.sctypes['int']:
        info = np.iinfo(ityp)
        if mn >= info.min and mx <= info.max:
            return ityp
    return None
Example #6
0
    def test_int64_overflow(self):
        data = """ID
00013007854817840016671868
00013007854817840016749251
00013007854817840016754630
00013007854817840016781876
00013007854817840017028824
00013007854817840017963235
00013007854817840018860166"""

        result = self.read_csv(StringIO(data))
        self.assertTrue(result['ID'].dtype == object)

        self.assertRaises(OverflowError, self.read_csv,
                          StringIO(data), converters={'ID': np.int64})

        # Just inside int64 range: parse as integer
        i_max = np.iinfo(np.int64).max
        i_min = np.iinfo(np.int64).min
        for x in [i_max, i_min]:
            result = self.read_csv(StringIO(str(x)), header=None)
            expected = DataFrame([x])
            tm.assert_frame_equal(result, expected)

        # Just outside int64 range: parse as string
        too_big = i_max + 1
        too_small = i_min - 1
        for x in [too_big, too_small]:
            result = self.read_csv(StringIO(str(x)), header=None)
            expected = DataFrame([str(x)])
            tm.assert_frame_equal(result, expected)
Example #7
0
    def test_implementation_limits(self):
        min_td = Timedelta(Timedelta.min)
        max_td = Timedelta(Timedelta.max)

        # GH 12727
        # timedelta limits correspond to int64 boundaries
        assert min_td.value == np.iinfo(np.int64).min + 1
        assert max_td.value == np.iinfo(np.int64).max

        # Beyond lower limit, a NAT before the Overflow
        assert (min_td - Timedelta(1, 'ns')) is NaT

        with pytest.raises(OverflowError):
            min_td - Timedelta(2, 'ns')

        with pytest.raises(OverflowError):
            max_td + Timedelta(1, 'ns')

        # Same tests using the internal nanosecond values
        td = Timedelta(min_td.value - 1, 'ns')
        assert td is NaT

        with pytest.raises(OverflowError):
            Timedelta(min_td.value - 2, 'ns')

        with pytest.raises(OverflowError):
            Timedelta(max_td.value + 1, 'ns')
Example #8
0
def test_int_out_of_range(parallel):
    """
    Integer numbers outside int range shall be returned as string columns
    consistent with the standard (Python) parser (no 'upcasting' to float).
    """
    imin = np.iinfo(int).min+1
    imax = np.iinfo(int).max-1
    huge = '{:d}'.format(imax+2)

    text = 'P M S\n {:d} {:d} {:s}'.format(imax, imin, huge)
    expected = Table([[imax], [imin], [huge]], names=('P', 'M', 'S'))
    table = ascii.read(text, format='basic', guess=False,
                       fast_reader={'parallel': parallel})
    assert_table_equal(table, expected)

    # check with leading zeroes to make sure strtol does not read them as octal
    text = 'P M S\n000{:d} -0{:d} 00{:s}'.format(imax, -imin, huge)
    expected = Table([[imax], [imin], ['00'+huge]], names=('P', 'M', 'S'))
    table = ascii.read(text, format='basic', guess=False,
                       fast_reader={'parallel': parallel})
    assert_table_equal(table, expected)

    # mixed columns should be returned as float, but if the out-of-range integer
    # shows up first, it will produce a string column - with both readers
    pytest.xfail("Integer fallback depends on order of rows")
    text = 'A B\n 12.3 {0:d}9\n {0:d}9 45.6e7'.format(imax)
    expected = Table([[12.3, 10.*imax], [10.*imax, 4.56e8]],
                     names=('A', 'B'))

    table = ascii.read(text, format='basic', guess=False,
                       fast_reader={'parallel': parallel})
    assert_table_equal(table, expected)
    table = ascii.read(text, format='basic', guess=False, fast_reader=False)
    assert_table_equal(table, expected)
Example #9
0
    def initBuffers(self,puzzle):
        #define lengths buffer and copy to the GPU
        #as we will not read from this buffer later, mapping is not required
        self.lengths = np.full(self.simulations,np.iinfo(np.int16).max,dtype=np.int16)
        self.lengthsBuffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.lengths)
         
        #define buffer for aggregated lengths for each workgroup
        self.groupLengths = np.full(self.workGroups,np.iinfo(np.int16).max,dtype=np.int16)
        self.groupLengthsBuffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE | cl.mem_flags.USE_HOST_PTR, hostbuf=self.groupLengths)
        
        #map group lengths buffer
        cl.enqueue_map_buffer(self.queue,self.groupLengthsBuffer,cl.map_flags.READ,0,self.groupLengths.shape,self.groupLengths.dtype)
        
        #get the input puzzle ready for the kernel; convert to 8 bit int (char)
        p = np.array(puzzle['puzzle']).astype(np.int8)
        #subtract 1 so that -1 denotes a gap and 0 denotes a square to be filled
        p = p - np.ones_like(p,dtype=p.dtype)
        
        #copy the puzzle, one for each simulation
        self.puzzles = np.zeros((self.simulations,self.height,self.width),dtype=p.dtype)
        self.puzzles[:,0:self.height,0:self.width] = p
    
        #define puzzles buffer and copy data (we do not need to worry about getting data out of this buffer, so mapping isn't required)
        #this buffer contains the input puzzles, one for each invocation (the puzzle is too large to hold in local or shared memory)
        self.puzzlesFlattened = self.puzzles.ravel()
        self.puzzlesBuffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.puzzlesFlattened)
        
        #define output buffer for best solutions aggregated across workgroups
        self.solutions = self.puzzles[0:self.workGroups]
        self.solutionsFlattened = self.solutions.ravel()
        self.solutionsBuffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE | cl.mem_flags.USE_HOST_PTR, hostbuf=self.solutionsFlattened)

        #map solutions buffer
        cl.enqueue_map_buffer(self.queue,self.solutionsBuffer,cl.map_flags.READ,0,self.solutionsFlattened.shape,self.solutions.dtype)
Example #10
0
def randimg_in2out(rng, in_dtype, out_dtype, name):
    in_dtype = np.dtype(in_dtype)
    out_dtype = np.dtype(out_dtype)
    shape = (2,3,4)
    if in_dtype.kind in 'iu':
        info = np.iinfo(in_dtype)
        dmin, dmax = info.min, info.max
        # Numpy bug for np < 1.6.0 allows overflow for range that does not fit
        # into C long int (int32 on 32-bit, int64 on 64-bit)
        try:
            data = rng.randint(dmin, dmax, size=shape)
        except ValueError:
            from random import randint
            vals = [randint(dmin, dmax) for v in range(np.prod(shape))]
            data = np.array(vals).astype(in_dtype).reshape(shape)
    elif in_dtype.kind == 'f':
        info = np.finfo(in_dtype)
        dmin, dmax = info.min, info.max
        # set some value for scaling our data
        scale = np.iinfo(np.uint16).max * 2.0
        data = rng.normal(size=shape, scale=scale)
    data[0,0,0] = dmin
    data[1,0,0] = dmax
    data = data.astype(in_dtype)
    img = Image(data, vox2mni(np.eye(4)))
    # The dtype_from dtype won't be visible until the image is loaded
    newimg = save_image(img, name, dtype_from=out_dtype)
    return newimg.get_data(), data
Example #11
0
  def testInfNan(self):
    i4 = np.iinfo(np.int32)
    i8 = np.iinfo(np.int64)

    self._compare(np.inf, np.float32, np.inf, False)
    self._compare(np.inf, np.float64, np.inf, False)
    if sys.byteorder == "big":
      self._compare(np.inf, np.int32, i4.max, False)
      self._compare(np.inf, np.int64, i8.max, False)
    else:
      # np.float64("np.inf").astype(np.int32) is negative on x86 but positive on ppc64le
      # Numpy link to relevant discussion - https://github.com/numpy/numpy/issues/9040
      # Tensorflow link to relevant discussion - https://github.com/tensorflow/tensorflow/issues/9360
      if platform.machine() == "ppc64le":
        self._compare(-np.inf, np.int32, i4.min, False)
        self._compare(-np.inf, np.int64, i8.min, False)
      else:
        self._compare(np.inf, np.int32, i4.min, False)
        self._compare(np.inf, np.int64, i8.min, False)
    self._compare(-np.inf, np.float32, -np.inf, False)
    self._compare(-np.inf, np.float64, -np.inf, False)
    self._compare(-np.inf, np.int32, i4.min, False)
    self._compare(-np.inf, np.int64, i8.min, False)
    self.assertAllEqual(np.isnan(self._cast(np.nan, np.float32, False)), True)
    self.assertAllEqual(np.isnan(self._cast(np.nan, np.float64, False)), True)
    self._compare(np.nan, np.int32, i4.min, False)
    self._compare(np.nan, np.int64, i8.min, False)

    self._compare(np.inf, np.float32, np.inf, True)
    self._compare(np.inf, np.float64, np.inf, True)
    self._compare(-np.inf, np.float32, -np.inf, True)
    self._compare(-np.inf, np.float64, -np.inf, True)
    self.assertAllEqual(np.isnan(self._cast(np.nan, np.float32, True)), True)
    self.assertAllEqual(np.isnan(self._cast(np.nan, np.float64, True)), True)
Example #12
0
    def __init__(self, vocabulary, fixed_length, custom_wordgen=None,
                 ignore_sentences_with_only_custom=False, masking_value=0,
                 unknown_value=1):
        """ Needs a dictionary as input for the vocabulary.
        """

        if len(vocabulary) > np.iinfo('uint16').max:
            raise ValueError('Dictionary is too big ({} tokens) for the numpy '
                             'datatypes used (max limit={}). Reduce vocabulary'
                             ' or adjust code accordingly!'
                             .format(len(vocabulary), np.iinfo('uint16').max))

        # Shouldn't be able to modify the given vocabulary
        self.vocabulary = deepcopy(vocabulary)
        self.fixed_length = fixed_length
        self.ignore_sentences_with_only_custom = ignore_sentences_with_only_custom
        self.masking_value = masking_value
        self.unknown_value = unknown_value

        # Initialized with an empty stream of sentences that must then be fed
        # to the generator at a later point for reusability.
        # A custom word generator can be used for domain-specific filtering etc
        if custom_wordgen is not None:
            assert custom_wordgen.stream is None
            self.wordgen = custom_wordgen
            self.uses_custom_wordgen = True
        else:
            self.wordgen = WordGenerator(None, allow_unicode_text=True,
                                         ignore_emojis=False,
                                         remove_variation_selectors=True,
                                         break_replacement=True)
            self.uses_custom_wordgen = False
Example #13
0
    def test_implementation_limits(self):
        min_td = Timedelta(Timedelta.min)
        max_td = Timedelta(Timedelta.max)

        # GH 12727
        # timedelta limits correspond to int64 boundaries
        self.assertTrue(min_td.value == np.iinfo(np.int64).min + 1)
        self.assertTrue(max_td.value == np.iinfo(np.int64).max)

        # Beyond lower limit, a NAT before the Overflow
        self.assertIsInstance(min_td - Timedelta(1, 'ns'),
                              pd.tslib.NaTType)

        with tm.assertRaises(OverflowError):
            min_td - Timedelta(2, 'ns')

        with tm.assertRaises(OverflowError):
            max_td + Timedelta(1, 'ns')

        # Same tests using the internal nanosecond values
        td = Timedelta(min_td.value - 1, 'ns')
        self.assertIsInstance(td, pd.tslib.NaTType)

        with tm.assertRaises(OverflowError):
            Timedelta(min_td.value - 2, 'ns')

        with tm.assertRaises(OverflowError):
            Timedelta(max_td.value + 1, 'ns')
Example #14
0
 def test_absolute_ufunc(self, flags=enable_pyobj_flags):
     self.unary_ufunc_test('absolute', flags=flags,
         additional_inputs = [(np.iinfo(np.uint32).max, types.uint32),
                              (np.iinfo(np.uint64).max, types.uint64),
                              (np.finfo(np.float32).min, types.float32),
                              (np.finfo(np.float64).min, types.float64)
                              ])
def _random_integers(size, dtype):
    # We do not generate integers outside the int64 range
    platform_int_info = np.iinfo('int_')
    iinfo = np.iinfo(dtype)
    return np.random.randint(max(iinfo.min, platform_int_info.min),
                             min(iinfo.max, platform_int_info.max),
                             size=size).astype(dtype)
Example #16
0
def tbl_2_nparray(in_tbl, flds):
    """Form the TableToNumPyArray to account for nulls for various dtypes.
    This is essentially a shortcut to `arcpy.da.TableToNumPyArray`

    Requires
    --------
    `in_tbl` :
        table, or featureclass table name
    `flds` :
        list of field names
    `skip_nulls` = False :
        set within function
    `null_value` :
        determined from the dtype of the array...
        otherwise you may as well do it manually

    Source
    ------
    arraytools, apt.py module
    """
    nulls = {'Double':np.nan,
             'Single':np.nan,
             'Integer':np.iinfo(np.int32).min,
             'OID':np.iinfo(np.int32).min,
             'String':"None"}
    #
    fld_dict = {i.name: i.type for i in arcpy.ListFields(in_tbl)}
    null_dict = {f:nulls[fld_dict[f]] for f in flds}
    a = arcpy.da.TableToNumPyArray(in_table=in_tbl,
                                   field_names=flds,
                                   skip_nulls=False,
                                   null_value=null_dict)
    return a
Example #17
0
 def _iu2iu(self):
     # (u)int to (u)int
     mn, mx = [as_int(v) for v in self.finite_range()]
     # range may be greater than the largest integer for this type.
     # as_int needed to work round numpy 1.4.1 int casting bug
     out_dtype = self._out_dtype
     t_min, t_max = np.iinfo(out_dtype).min, np.iinfo(out_dtype).max
     type_range = as_int(t_max) - as_int(t_min)
     mn2mx = mx - mn
     if mn2mx <= type_range: # might offset be enough?
         if t_min == 0: # uint output - take min to 0
             # decrease offset with floor_exact, meaning mn >= t_min after
             # subtraction.  But we may have pushed the data over t_max,
             # which we check below
             inter = floor_exact(mn - t_min, self.scaler_dtype)
         else: # int output - take midpoint to 0
             # ceil below increases inter, pushing scale up to 0.5 towards
             # -inf, because ints have abs min == abs max + 1
             midpoint = mn + as_int(np.ceil(mn2mx / 2.0))
             # Floor exact decreases inter, so pulling scaled values more
             # positive. This may make mx - inter > t_max
             inter = floor_exact(midpoint, self.scaler_dtype)
         # Need to check still in range after floor_exact-ing
         int_inter = as_int(inter)
         assert mn - int_inter >= t_min
         if mx - int_inter <= t_max:
             self.inter = inter
             return
     # Try slope options (sign flip) and then range scaling
     super(SlopeInterArrayWriter, self)._iu2iu()
Example #18
0
 def set_signal_dtype(self, data_type, signal=None, clip=False):
     if signal is None:
         signal = self.get_selected_signal()
     self.record_code("signal = ui.get_selected_signal()")
     if isinstance(data_type, str) and data_type.lower() == 'custom':
         return    # TODO: Show dialog and prompt
     if not clip:
         old_type = signal.data.dtype
         if np.issubdtype(data_type, np.integer):
             info = np.iinfo(data_type)
         elif np.issubdtype(data_type, np.float):
             info = np.finfo(data_type)
         if np.issubdtype(old_type, np.integer):
             old_info = np.iinfo(old_type)
         elif np.issubdtype(old_type, np.float):
             old_info = np.finfo(old_type)
         if old_info.max > info.max:
             signal.data *= float(info.max) / np.nanmax(signal.data)
             self.record_code("signal.data *= %f / np.nanmax(signal.data)" %
                              float(info.max))
     signal.change_dtype(data_type)
     dts = data_type.__name__
     if data_type.__module__ == 'numpy':
         dts = 'np.' + dts
     self.record_code("signal.change_dtype(%s)" % dts)
Example #19
0
def test_big_game_functions():
    """Test that everything works when game_size > int max"""
    base = rsgame.basegame([100, 100], [30, 30])
    game = gamegen.add_profiles(base, 1000)
    assert game.num_all_profiles > np.iinfo(int).max
    assert game.num_all_dpr_profiles > np.iinfo(int).max
    assert np.all(game.profile_id(game.profiles) >= 0)
Example #20
0
    def checkTypeConversionNecessary(self, inputType = None, outputType = None):
        if inputType is None:
            if hasattr(self, "inputType"):
                inputType = self.inputType
            else:
                return False
        if outputType is None:
            outputType = self.getOutputDType()

        t = inputType
        limits = []
        try:
            limits.append(numpy.iinfo(t).min)
            limits.append(numpy.iinfo(t).max)
        except:
            limits.append(numpy.finfo(t).min)
            limits.append(numpy.finfo(t).max)

        try:
            if not numpy.all(numpy.array(limits, dtype = outputType) == limits):
                self.normalizationComboBox.setCurrentIndex(1)
                return True #outputtype is too small to hold the limits,
                         #renormalization has to be done beforehand
        except:
            self.normalizationComboBox.setCurrentIndex(1)
            return True #outputtype is too small to hold the limits,
                     #renormalization has to be done beforehand
        return False
  def _testDequantizeOp(self, inputs, min_range, max_range, dtype):
    with self.cached_session():
      input_op = constant_op.constant(inputs, shape=[len(inputs)], dtype=dtype)
      dequantized = array_ops.dequantize(input_op, min_range, max_range)
      tf_ans = dequantized.eval()

    # TODO(vrv): Add support for DT_QINT32 quantization if needed.
    type_dict = {
        dtypes.quint8: np.uint8,
        dtypes.qint8: np.int8,
        dtypes.quint16: np.uint16,
        dtypes.qint16: np.int16
    }
    self.assertTrue(dtype in type_dict.keys())
    v_max = np.iinfo(type_dict[dtype]).max
    v_min = np.iinfo(type_dict[dtype]).min
    self.assertTrue(min_range >= v_min)
    self.assertTrue(max_range <= v_max)
    type_range = v_max - v_min
    if v_min < 0:
      half_range = (type_range + 1) / 2
    else:
      half_range = 0.0

    np_ans = ((inputs.astype(np.float32) + half_range) *
              (max_range - min_range) / type_range) + min_range
    self.assertAllClose(tf_ans, np_ans, rtol=1e-5, atol=1e-5)
Example #22
0
  def construct_lookup_variables(self):
    # Materialize negatives for fast lookup sampling.
    start_time = timeit.default_timer()
    inner_bounds = np.argwhere(self._train_pos_users[1:] -
                               self._train_pos_users[:-1])[:, 0] + 1
    (upper_bound,) = self._train_pos_users.shape
    index_bounds = [0] + inner_bounds.tolist() + [upper_bound]
    self._negative_table = np.zeros(shape=(self._num_users, self._num_items),
                                    dtype=rconst.ITEM_DTYPE)

    # Set the table to the max value to make sure the embedding lookup will fail
    # if we go out of bounds, rather than just overloading item zero.
    self._negative_table += np.iinfo(rconst.ITEM_DTYPE).max
    assert self._num_items < np.iinfo(rconst.ITEM_DTYPE).max

    # Reuse arange during generation. np.delete will make a copy.
    full_set = np.arange(self._num_items, dtype=rconst.ITEM_DTYPE)

    self._per_user_neg_count = np.zeros(
        shape=(self._num_users,), dtype=np.int32)

    # Threading does not improve this loop. For some reason, the np.delete
    # call does not parallelize well. Multiprocessing incurs too much
    # serialization overhead to be worthwhile.
    for i in range(self._num_users):
      positives = self._train_pos_items[index_bounds[i]:index_bounds[i+1]]
      negatives = np.delete(full_set, positives)
      self._per_user_neg_count[i] = self._num_items - positives.shape[0]
      self._negative_table[i, :self._per_user_neg_count[i]] = negatives

    logging.info("Negative sample table built. Time: {:.1f} seconds".format(
        timeit.default_timer() - start_time))
Example #23
0
 def _range_scale(self):
     """ Calculate scaling, intercept based on data range and output type """
     mn, mx = self.finite_range() # Values of self.array.dtype type
     out_dtype = self._out_dtype
     if mx == mn: # Only one number in array
         self.inter = mn
         return
     # Straight mx-mn can overflow.
     if mn.dtype.kind == 'f': # Already floats
         # float64 and below cast correctly to longdouble.  Longdouble needs
         # no casting
         mn2mx = np.diff(np.array([mn, mx], dtype=np.longdouble))
     else: # max possible (u)int range is 2**64-1 (int64, uint64)
         # int_to_float covers this range.  On windows longdouble is the same
         # as double so mn2mx will be 2**64 - thus overestimating slope
         # slightly.  Casting to int needed to allow mx-mn to be larger than
         # the largest (u)int value
         mn2mx = int_to_float(as_int(mx) - as_int(mn), np.longdouble)
     if out_dtype.kind == 'f':
         # Type range, these are also floats
         info = type_info(out_dtype)
         t_mn_mx = info['min'], info['max']
     else:
         t_mn_mx = np.iinfo(out_dtype).min, np.iinfo(out_dtype).max
         t_mn_mx= [int_to_float(v, np.longdouble) for v in t_mn_mx]
     # We want maximum precision for the calculations. Casting will
     # not lose precision because min/max are of fp type.
     assert [v.dtype.kind for v in t_mn_mx] == ['f', 'f']
     scaled_mn2mx = np.diff(np.array(t_mn_mx, dtype = np.longdouble))
     slope = mn2mx / scaled_mn2mx
     self.inter = mn - t_mn_mx[0] * slope
     self.slope = slope
     if not np.all(np.isfinite([self.slope, self.inter])):
         raise ScalingError("Slope / inter not both finite")
Example #24
0
 def __init__(self, name, unit='s', nullable=True):
     min_val, max_val = np.iinfo('int64').min, np.iinfo('int64').max,
     super(DurationIntervalType, self).__init__(
             name, True, 64, nullable=nullable,
             min_value=min_val,
             max_value=max_val)
     self.unit = unit
    def iter_raw_buffers(self):
        """Return an iterator over raw buffers.

        Returns
        -------
        raw_buffer : generator
            Generator for iteration over raw buffers.
        """
        # self.tmax_samp should be included
        iter_times = list(zip(
            list(range(self.tmin_samp, self.tmax_samp, self.buffer_size)),
            list(range(self.tmin_samp + self.buffer_size,
                       self.tmax_samp + 1, self.buffer_size))))
        last_iter_sample = iter_times[-1][1] if iter_times else self.tmin_samp
        if last_iter_sample < self.tmax_samp + 1:
            iter_times.append((last_iter_sample, self.tmax_samp + 1))

        for ii, (start, stop) in enumerate(iter_times):

            # wait for correct number of samples to be available
            self.ft_client.wait(stop, np.iinfo(np.uint32).max,
                                np.iinfo(np.uint32).max)

            # get the samples (stop index is inclusive)
            raw_buffer = self.ft_client.getData([start, stop - 1]).transpose()

            yield raw_buffer
Example #26
0
    def munchetal_filter(im, wlevel, sigma, wname='db15'):
        # Wavelet decomposition:
        coeffs = pywt.wavedec2(im.astype(np.float32), wname, level=wlevel)
        coeffsFlt = [coeffs[0]]
        # FFT transform of horizontal frequency bands:
        for i in range(1, wlevel + 1):
            # FFT:
            fcV = np.fft.fftshift(np.fft.fft(coeffs[i][1], axis=0))
            my, mx = fcV.shape
            # Damping of vertical stripes:
            damp = 1 - np.exp(-(np.arange(-np.floor(my / 2.), -np.floor(my / 2.) + my) ** 2) / (2 * (sigma ** 2)))
            dampprime = np.kron(np.ones((1, mx)), damp.reshape((damp.shape[0], 1)))
            fcV = fcV * dampprime
            # Inverse FFT:
            fcVflt = np.real(np.fft.ifft(np.fft.ifftshift(fcV), axis=0))
            cVHDtup = (coeffs[i][0], fcVflt, coeffs[i][2])
            coeffsFlt.append(cVHDtup)

        # Get wavelet reconstruction:
        im_f = np.real(pywt.waverec2(coeffsFlt, wname))
        # Return image according to input type:
        if (im.dtype == 'uint16'):
            # Check extrema for uint16 images:
            im_f[im_f < np.iinfo(np.uint16).min] = np.iinfo(np.uint16).min
            im_f[im_f > np.iinfo(np.uint16).max] = np.iinfo(np.uint16).max
            # Return filtered image (an additional row and/or column might be present):
            return im_f[0:im.shape[0], 0:im.shape[1]].astype(np.uint16)
        else:
            return im_f[0:im.shape[0], 0:im.shape[1]]
Example #27
0
 def getGDALRasterType(self):
     '''
     Gets the output raster type
     '''
     index = self.numberComboBox.currentIndex()
     if index == 0:
         min = numpy.iinfo(numpy.uint8).min
         max = numpy.iinfo(numpy.uint8).max
         return (osgeo.gdal.GDT_Byte, min, max)
     elif index == 1:
         min = numpy.iinfo(numpy.uint16).min
         max = numpy.iinfo(numpy.uint16).max
         return (osgeo.gdal.GDT_UInt16, min, max)
     elif index == 2:
         min = numpy.iinfo(numpy.int16).min
         max = numpy.iinfo(numpy.int16).max
         return (osgeo.gdal.GDT_Int16, min, max)
     elif index == 3:
         min = numpy.iinfo(numpy.uint32).min
         max = numpy.iinfo(numpy.uint32).max
         return (osgeo.gdal.GDT_UInt32, min, max)
     elif index == 4:
         min = numpy.iinfo(numpy.int32).min
         max = numpy.iinfo(numpy.int32).max
         return (osgeo.gdal.GDT_Int32, min, max)
     elif index == 5:
         min = numpy.finfo(numpy.float32).min
         max = numpy.finfo(numpy.float32).max
         return (osgeo.gdal.GDT_Float32, min, max)
     elif index == 6:
         min = numpy.finfo(numpy.float64).min
         max = numpy.finfo(numpy.float64).max
         return (osgeo.gdal.GDT_Float64, min, max)
Example #28
0
  def testInfNan(self):
    i4 = np.iinfo(np.int32)
    i8 = np.iinfo(np.int64)

    self._compare(np.inf, np.float32, np.inf, False)
    self._compare(np.inf, np.float64, np.inf, False)
    if sys.byteorder == "big":  
      self._compare(np.inf, np.int32, i4.max, False)  
      self._compare(np.inf, np.int64, i8.max, False)  
    else:  
      self._compare(np.inf, np.int32, i4.min, False)  
      self._compare(np.inf, np.int64, i8.min, False)  
    self._compare(-np.inf, np.float32, -np.inf, False)
    self._compare(-np.inf, np.float64, -np.inf, False)
    self._compare(-np.inf, np.int32, i4.min, False)
    self._compare(-np.inf, np.int64, i8.min, False)
    self.assertAllEqual(np.isnan(self._cast(np.nan, np.float32, False)), True)
    self.assertAllEqual(np.isnan(self._cast(np.nan, np.float64, False)), True)
    self._compare(np.nan, np.int32, i4.min, False)
    self._compare(np.nan, np.int64, i8.min, False)

    self._compare(np.inf, np.float32, np.inf, True)
    self._compare(np.inf, np.float64, np.inf, True)
    self._compare(-np.inf, np.float32, -np.inf, True)
    self._compare(-np.inf, np.float64, -np.inf, True)
    self.assertAllEqual(np.isnan(self._cast(np.nan, np.float32, True)), True)
    self.assertAllEqual(np.isnan(self._cast(np.nan, np.float64, True)), True)
Example #29
0
def clean(data):

  data_wso = data.astype(np.int)

  masked = np.ma.array(data_wso)
  masked[:,np.arange(0,2592,16)] = np.ma.masked


  #Set the mean of each spectra to zero
  #med_col = np.mean(masked,axis=1)
  med_col = np.min((\
   np.mean(masked[:,:2592/5],axis=1),\
   np.mean(masked[:,-2592/5:],axis=1)),\
   axis=0)

  data_wso = data_wso - med_col[:,np.newaxis]

  #Shift the mean to 128
  data_wso = data_wso + 128

  #Set the right proprieties to the data
  data_wso[:,np.arange(0,2592,16)] = 0
  dtype_min = np.iinfo(data.dtype).min
  dtype_max = np.iinfo(data.dtype).max
  np.clip(data_wso, dtype_min, dtype_max, out=data_wso)
  data_wso = np.around(data_wso)
  data = data_wso.astype(data.dtype)

  return data
Example #30
0
    def test1DDataRandom(self):
        """Test pixmap generation for 1D data of different size and types."""
        self._log("TestLog10Colormap.test1DDataRandom")
        for cmapName, colormap in self.COLORMAPS.items():
            for size in self.SIZES:
                for dtype in self.DTYPES:
                    for start, end in self.RANGES:
                        try:
                            dtypeMax = np.iinfo(dtype).max
                            dtypeMin = np.iinfo(dtype).min
                        except ValueError:
                            dtypeMax = np.finfo(dtype).max
                            dtypeMin = np.finfo(dtype).min
                        if dtypeMin < 0:
                            data = np.asarray(-dtypeMax/2. +
                                              np.random.rand(size) * dtypeMax,
                                              dtype=dtype)
                        else:
                            data = np.asarray(np.random.rand(size) * dtypeMax,
                                              dtype=dtype)

                        duration = self._testColormap(data, colormap,
                                                      start, end,
                                                      isLog10=True)

                        self._log('1D Random', cmapName, dtype, size,
                                  (start, end), duration)
Example #31
0
def _daal_fit_classifier(self, X, y, sample_weight=None):
    y = check_array(y, ensure_2d=False, dtype=None)
    y, expanded_class_weight = self._validate_y_class_weight(y)
    n_classes_ = self.n_classes_[0]
    classes_ = self.classes_[0]
    self.n_features_ = X.shape[1]

    if expanded_class_weight is not None:
        if sample_weight is not None:
            sample_weight = sample_weight * expanded_class_weight
        else:
            sample_weight = expanded_class_weight
    if sample_weight is not None:
        sample_weight = [sample_weight]

    rs_ = check_random_state(self.random_state)
    seed_ = rs_.randint(0, np.iinfo('i').max)

    if n_classes_ < 2:
        raise ValueError("Training data only contain information about one class.")

    # create algorithm
    X_fptype = getFPType(X)
    daal_engine_ = daal4py.engines_mt2203(seed=seed_, fptype=X_fptype)
    features_per_node_ = _to_absolute_max_features(self.max_features, X.shape[1], is_classification=True)

    n_samples_bootstrap_ = _get_n_samples_bootstrap(
        n_samples=X.shape[0],
        max_samples=self.max_samples
    )

    if not self.bootstrap and self.oob_score:
        raise ValueError("Out of bag estimation only available"
                         " if bootstrap=True")

    dfc_algorithm = daal4py.decision_forest_classification_training(
        nClasses = int(n_classes_),
        fptype = X_fptype,
        method = 'defaultDense',
        nTrees = int(self.n_estimators),
        observationsPerTreeFraction = n_samples_bootstrap_ if self.bootstrap is True else 1.,
        featuresPerNode = int(features_per_node_),
        maxTreeDepth = int(0 if self.max_depth is None else self.max_depth),
        minObservationsInLeafNode = (self.min_samples_leaf if isinstance(self.min_samples_leaf, numbers.Integral)
                                     else int(ceil(self.min_samples_leaf * X.shape[0]))),
        engine = daal_engine_,
        impurityThreshold = float(0.0 if self.min_impurity_split is None else self.min_impurity_split),
        varImportance = "MDI",
        resultsToCompute = "",
        memorySavingMode = False,
        bootstrap = bool(self.bootstrap),
        minObservationsInSplitNode = (self.min_samples_split if isinstance(self.min_samples_split, numbers.Integral)
                                      else int(ceil(self.min_samples_split * X.shape[0]))),
        minWeightFractionInLeafNode = self.min_weight_fraction_leaf,
        minImpurityDecreaseInSplitNode = self.min_impurity_decrease,
        maxLeafNodes = 0 if self.max_leaf_nodes is None else self.max_leaf_nodes
    )
    self._cached_estimators_ = None
    # compute
    dfc_trainingResult = dfc_algorithm.compute(X, y, sample_weight)

    # get resulting model
    model = dfc_trainingResult.model
    self.daal_model_ = model

    # compute oob_score_
    if self.oob_score:
        self.estimators_ = self._estimators_
        self._set_oob_score(X, y)

    return self
Example #32
0
def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
                   penalty, dual, verbose, max_iter, tol,
                   random_state=None, multi_class='ovr',
                   loss='logistic_regression', epsilon=0.1,
                   sample_weight=None):
    """Used by Logistic Regression (and CV) and LinearSVC/LinearSVR.

    Preprocessing is done in this function before supplying it to liblinear.

    Parameters
    ----------
    X : {array-like, sparse matrix} of shape (n_samples, n_features)
        Training vector, where n_samples in the number of samples and
        n_features is the number of features.

    y : array-like of shape (n_samples,)
        Target vector relative to X

    C : float
        Inverse of cross-validation parameter. Lower the C, the more
        the penalization.

    fit_intercept : bool
        Whether or not to fit the intercept, that is to add a intercept
        term to the decision function.

    intercept_scaling : float
        LibLinear internally penalizes the intercept and this term is subject
        to regularization just like the other terms of the feature vector.
        In order to avoid this, one should increase the intercept_scaling.
        such that the feature vector becomes [x, intercept_scaling].

    class_weight : dict or 'balanced', default=None
        Weights associated with classes in the form ``{class_label: weight}``.
        If not given, all classes are supposed to have weight one. For
        multi-output problems, a list of dicts can be provided in the same
        order as the columns of y.

        The "balanced" mode uses the values of y to automatically adjust
        weights inversely proportional to class frequencies in the input data
        as ``n_samples / (n_classes * np.bincount(y))``

    penalty : {'l1', 'l2'}
        The norm of the penalty used in regularization.

    dual : bool
        Dual or primal formulation,

    verbose : int
        Set verbose to any positive number for verbosity.

    max_iter : int
        Number of iterations.

    tol : float
        Stopping condition.

    random_state : int or RandomState instance, default=None
        Controls the pseudo random number generation for shuffling the data.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    multi_class : {'ovr', 'crammer_singer'}, default='ovr'
        `ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer`
        optimizes a joint objective over all classes.
        While `crammer_singer` is interesting from an theoretical perspective
        as it is consistent it is seldom used in practice and rarely leads to
        better accuracy and is more expensive to compute.
        If `crammer_singer` is chosen, the options loss, penalty and dual will
        be ignored.

    loss : {'logistic_regression', 'hinge', 'squared_hinge', \
            'epsilon_insensitive', 'squared_epsilon_insensitive}, \
            default='logistic_regression'
        The loss function used to fit the model.

    epsilon : float, default=0.1
        Epsilon parameter in the epsilon-insensitive loss function. Note
        that the value of this parameter depends on the scale of the target
        variable y. If unsure, set epsilon=0.

    sample_weight : array-like of shape (n_samples,), default=None
        Weights assigned to each sample.

    Returns
    -------
    coef_ : ndarray of shape (n_features, n_features + 1)
        The coefficient vector got by minimizing the objective function.

    intercept_ : float
        The intercept term added to the vector.

    n_iter_ : int
        Maximum number of iterations run across all classes.
    """
    if loss not in ['epsilon_insensitive', 'squared_epsilon_insensitive']:
        enc = LabelEncoder()
        y_ind = enc.fit_transform(y)
        classes_ = enc.classes_
        if len(classes_) < 2:
            raise ValueError("This solver needs samples of at least 2 classes"
                             " in the data, but the data contains only one"
                             " class: %r" % classes_[0])

        class_weight_ = compute_class_weight(class_weight, classes=classes_,
                                             y=y)
    else:
        class_weight_ = np.empty(0, dtype=np.float64)
        y_ind = y
    liblinear.set_verbosity_wrap(verbose)
    rnd = check_random_state(random_state)
    if verbose:
        print('[LibLinear]', end='')

    # LinearSVC breaks when intercept_scaling is <= 0
    bias = -1.0
    if fit_intercept:
        if intercept_scaling <= 0:
            raise ValueError("Intercept scaling is %r but needs to be greater "
                             "than 0. To disable fitting an intercept,"
                             " set fit_intercept=False." % intercept_scaling)
        else:
            bias = intercept_scaling

    libsvm.set_verbosity_wrap(verbose)
    libsvm_sparse.set_verbosity_wrap(verbose)
    liblinear.set_verbosity_wrap(verbose)

    # Liblinear doesn't support 64bit sparse matrix indices yet
    if sp.issparse(X):
        _check_large_sparse(X)

    # LibLinear wants targets as doubles, even for classification
    y_ind = np.asarray(y_ind, dtype=np.float64).ravel()
    y_ind = np.require(y_ind, requirements="W")

    sample_weight = _check_sample_weight(sample_weight, X,
                                         dtype=np.float64)

    solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)
    raw_coef_, n_iter_ = liblinear.train_wrap(
        X, y_ind, sp.isspmatrix(X), solver_type, tol, bias, C,
        class_weight_, max_iter, rnd.randint(np.iinfo('i').max),
        epsilon, sample_weight)
    # Regarding rnd.randint(..) in the above signature:
    # seed for srand in range [0..INT_MAX); due to limitations in Numpy
    # on 32-bit platforms, we can't get to the UINT_MAX limit that
    # srand supports
    n_iter_ = max(n_iter_)
    if n_iter_ >= max_iter:
        warnings.warn("Liblinear failed to converge, increase "
                      "the number of iterations.", ConvergenceWarning)

    if fit_intercept:
        coef_ = raw_coef_[:, :-1]
        intercept_ = intercept_scaling * raw_coef_[:, -1]
    else:
        coef_ = raw_coef_
        intercept_ = 0.

    return coef_, intercept_, n_iter_
Example #33
0
    def fit(self, X, y, sample_weight=None):
        """Fit the SVM model according to the given training data.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features) \
                or (n_samples, n_samples)
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.
            For kernel="precomputed", the expected shape of X is
            (n_samples, n_samples).

        y : array-like of shape (n_samples,)
            Target values (class labels in classification, real numbers in
            regression)

        sample_weight : array-like of shape (n_samples,), default=None
            Per-sample weights. Rescale C per sample. Higher weights
            force the classifier to put more emphasis on these points.

        Returns
        -------
        self : object

        Notes
        -----
        If X and y are not C-ordered and contiguous arrays of np.float64 and
        X is not a scipy.sparse.csr_matrix, X and/or y may be copied.

        If X is a dense array, then the other methods will not support sparse
        matrices as input.
        """

        rnd = check_random_state(self.random_state)

        sparse = sp.isspmatrix(X)
        if sparse and self.kernel == "precomputed":
            raise TypeError("Sparse precomputed kernels are not supported.")
        self._sparse = sparse and not callable(self.kernel)

        if hasattr(self, 'decision_function_shape'):
            if self.decision_function_shape not in ('ovr', 'ovo'):
                raise ValueError(
                    f"decision_function_shape must be either 'ovr' or 'ovo', "
                    f"got {self.decision_function_shape}."
                )

        if callable(self.kernel):
            check_consistent_length(X, y)
        else:
            X, y = self._validate_data(X, y, dtype=np.float64,
                                       order='C', accept_sparse='csr',
                                       accept_large_sparse=False)

        y = self._validate_targets(y)

        sample_weight = np.asarray([]
                                   if sample_weight is None
                                   else sample_weight, dtype=np.float64)
        solver_type = LIBSVM_IMPL.index(self._impl)

        # input validation
        n_samples = _num_samples(X)
        if solver_type != 2 and n_samples != y.shape[0]:
            raise ValueError("X and y have incompatible shapes.\n" +
                             "X has %s samples, but y has %s." %
                             (n_samples, y.shape[0]))

        if self.kernel == "precomputed" and n_samples != X.shape[1]:
            raise ValueError("Precomputed matrix must be a square matrix."
                             " Input is a {}x{} matrix."
                             .format(X.shape[0], X.shape[1]))

        if sample_weight.shape[0] > 0 and sample_weight.shape[0] != n_samples:
            raise ValueError("sample_weight and X have incompatible shapes: "
                             "%r vs %r\n"
                             "Note: Sparse matrices cannot be indexed w/"
                             "boolean masks (use `indices=True` in CV)."
                             % (sample_weight.shape, X.shape))

        kernel = 'precomputed' if callable(self.kernel) else self.kernel

        if kernel == 'precomputed':
            # unused but needs to be a float for cython code that ignores
            # it anyway
            self._gamma = 0.
        elif isinstance(self.gamma, str):
            if self.gamma == 'scale':
                # var = E[X^2] - E[X]^2 if sparse
                X_var = ((X.multiply(X)).mean() - (X.mean()) ** 2
                         if sparse else X.var())
                self._gamma = 1.0 / (X.shape[1] * X_var) if X_var != 0 else 1.0
            elif self.gamma == 'auto':
                self._gamma = 1.0 / X.shape[1]
            else:
                raise ValueError(
                    "When 'gamma' is a string, it should be either 'scale' or "
                    "'auto'. Got '{}' instead.".format(self.gamma)
                )
        else:
            self._gamma = self.gamma

        fit = self._sparse_fit if self._sparse else self._dense_fit
        if self.verbose:
            print('[LibSVM]', end='')

        seed = rnd.randint(np.iinfo('i').max)
        fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
        # see comment on the other call to np.iinfo in this file

        self.shape_fit_ = X.shape if hasattr(X, "shape") else (n_samples, )

        # In binary case, we need to flip the sign of coef, intercept and
        # decision function. Use self._intercept_ and self._dual_coef_
        # internally.
        self._intercept_ = self.intercept_.copy()
        self._dual_coef_ = self.dual_coef_
        if self._impl in ['c_svc', 'nu_svc'] and len(self.classes_) == 2:
            self.intercept_ *= -1
            self.dual_coef_ = -self.dual_coef_

        return self
Example #34
0
from ..base import ClassifierMixin, RegressorMixin
from ..metrics import r2_score, accuracy_score
from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
from ..utils import check_random_state, check_array, column_or_1d
from ..utils import indices_to_mask
from ..utils.metaestimators import if_delegate_has_method
from ..utils.multiclass import check_classification_targets
from ..utils.random import sample_without_replacement
from ..utils.validation import has_fit_parameter, check_is_fitted, \
    _check_sample_weight, _deprecate_positional_args


__all__ = ["BaggingClassifier",
           "BaggingRegressor"]

MAX_INT = np.iinfo(np.int32).max


def _generate_indices(random_state, bootstrap, n_population, n_samples):
    """Draw randomly sampled indices."""
    # Draw sample indices
    if bootstrap:
        indices = random_state.randint(0, n_population, n_samples)
    else:
        indices = sample_without_replacement(n_population, n_samples,
                                             random_state=random_state)

    return indices


def _generate_bagging_indices(random_state, bootstrap_features,
    def fit(
        self,
        train_X,
        train_y,
        epochs,
        batch_size,
        input_time_length=None,
        validation_data=None,
        model_constraint=None,
        remember_best_column=None,
        scheduler=None,
        log_0_epoch=True,
    ):
        """
        Fit the model using the given training data.
        
        Will set `epochs_df` variable with a pandas dataframe to the history
        of the training process.
        
        Parameters
        ----------
        train_X: ndarray
            Training input data
        train_y: 1darray
            Training labels
        epochs: int
            Number of epochs to train
        batch_size: int
        input_time_length: int, optional
            Super crop size, what temporal size is pushed forward through 
            the network, see cropped decoding tuturial.
        validation_data: (ndarray, 1darray), optional
            X and y for validation set if wanted
        model_constraint: object, optional
            You can supply :class:`.MaxNormDefaultConstraint` if wanted.
        remember_best_column: string, optional
            In case you want to do an early stopping/reset parameters to some
            "best" epoch, define here the monitored value whose minimum
            determines the best epoch.
        scheduler: 'cosine' or None, optional
            Whether to use cosine annealing (:class:`.CosineAnnealing`).
        log_0_epoch: bool
            Whether to compute the metrics once before training as well.

        Returns
        -------
        exp: 
            Underlying braindecode :class:`.Experiment`
        """
        if (not hasattr(self, "compiled")) or (not self.compiled):
            raise ValueError(
                "Compile the model first by calling model.compile(loss, optimizer, metrics)"
            )

        if self.cropped and input_time_length is None:
            raise ValueError(
                "In cropped mode, need to specify input_time_length,"
                "which is the number of timesteps that will be pushed through"
                "the network in a single pass.")

        train_X = _ensure_float32(train_X)
        if self.cropped:
            self.network.eval()
            test_input = np_to_var(
                np.ones(
                    (1, train_X[0].shape[0], input_time_length) +
                    train_X[0].shape[2:],
                    dtype=np.float32,
                ))
            while len(test_input.size()) < 4:
                test_input = test_input.unsqueeze(-1)
            if self.cuda:
                test_input = test_input.cuda()
            out = self.network(test_input)
            n_preds_per_input = out.cpu().data.numpy().shape[2]
            self.iterator = CropsFromTrialsIterator(
                batch_size=batch_size,
                input_time_length=input_time_length,
                n_preds_per_input=n_preds_per_input,
                seed=self.seed_rng.randint(0,
                                           np.iinfo(np.int32).max - 1),
            )
        else:
            self.iterator = BalancedBatchSizeIterator(
                batch_size=batch_size,
                seed=self.seed_rng.randint(0,
                                           np.iinfo(np.int32).max - 1),
            )
        if log_0_epoch:
            stop_criterion = MaxEpochs(epochs)
        else:
            stop_criterion = MaxEpochs(epochs - 1)
        train_set = SignalAndTarget(train_X, train_y)
        optimizer = self.optimizer
        if scheduler is not None:
            assert (scheduler == "cosine"
                    ), "Supply either 'cosine' or None as scheduler."
            n_updates_per_epoch = sum([
                1 for _ in self.iterator.get_batches(train_set, shuffle=True)
            ])
            n_updates_per_period = n_updates_per_epoch * epochs
            if scheduler == "cosine":
                scheduler = CosineAnnealing(n_updates_per_period)
            schedule_weight_decay = False
            if optimizer.__class__.__name__ == "AdamW":
                schedule_weight_decay = True
            optimizer = ScheduledOptimizer(
                scheduler,
                self.optimizer,
                schedule_weight_decay=schedule_weight_decay,
            )
        loss_function = self.loss
        if self.cropped:
            loss_function = lambda outputs, targets: self.loss(
                th.mean(outputs, dim=2), targets)
        if validation_data is not None:
            valid_X = _ensure_float32(validation_data[0])
            valid_y = validation_data[1]
            valid_set = SignalAndTarget(valid_X, valid_y)
        else:
            valid_set = None
        test_set = None
        self.monitors = [LossMonitor()]
        if self.cropped:
            self.monitors.append(
                CroppedTrialMisclassMonitor(input_time_length))
        else:
            self.monitors.append(MisclassMonitor())
        if self.extra_monitors is not None:
            self.monitors.extend(self.extra_monitors)
        self.monitors.append(RuntimeMonitor())
        exp = Experiment(
            self.network,
            train_set,
            valid_set,
            test_set,
            iterator=self.iterator,
            loss_function=loss_function,
            optimizer=optimizer,
            model_constraint=model_constraint,
            monitors=self.monitors,
            stop_criterion=stop_criterion,
            remember_best_column=remember_best_column,
            run_after_early_stop=False,
            cuda=self.cuda,
            log_0_epoch=log_0_epoch,
            do_early_stop=(remember_best_column is not None),
        )
        exp.run()
        self.epochs_df = exp.epochs_df
        return exp
 def __init__(self, vFunc=None, dtype=numpy.uint8):
     length = numpy.iinfo(dtype).max + 1
     self._vLookupArray = utils.createLookupArray(vFunc, length)
Example #37
0
    #
    # DATA TYPE CONVERSION - float64 -> uint16
    #

    # VOLUME
    # remove NaN
    rawVolume = np.nan_to_num(rawVolume)

    # zero out negative values
    rawVolume[rawVolume < 0] = 0.0

    # normalize to range 0.0 ... 1.0
    rawVolume = rawVolume / np.max(rawVolume)

    # scale up to 65535 (uin16 max value)
    rawVolume = rawVolume * np.iinfo(np.uint16).max

    # actually switch to uint16
    rawVolume = rawVolume.astype(np.uint16)

    # SEGMENTATION - is already uint8

    #
    # ROTATION - rotate cw and ccw
    #

    #test data
    if useTestData:
        #ccw rotation
        if 0 <= j <= 1 or 7 <= j <= 23 or 34 <= j <= 34 or 37 <= j <= 44 or 46 <= j <= 54 or 56 <= j <= 65:
            rawVolume = np.rot90(rawVolume, 1)
Example #38
0
import unittest
import binascii
import pickle
import numpy

from threatexchange.hashing.pdq_faiss_matcher import PDQFlatHashIndex, PDQMultiHashIndex

test_hashes = [
    "0000000000000000000000000000000000000000000000000000000000000000",
    "000000000000000000000000000000000000000000000000000000000000ffff",
    "0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f",
    "f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0",
    "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
]

MAX_UNSIGNED_INT64 = numpy.iinfo(numpy.uint64).max


class MixinTests:
    class PDQHashIndexCommonTests(unittest.TestCase):
        index = None

        def assertEqualPDQHashSearchResults(self, result, expected):
            self.assertEqual(len(result), len(expected),
                             "search results not of expected length")
            for (r, e) in zip(result, expected):
                self.assertCountEqual(r, e)

        def test_search_index_for_exact_matches(self):
            query = test_hashes[:1]
            result = self.index.search(query, 0)
Example #39
0
    def __init__(self, vertices, groups, skip_tests=False,
            node_vertex_consistency_tolerance=None,
            skip_element_orientation_test=False,
            nodal_adjacency=None,
            facial_adjacency_groups=None,
            boundary_tags=None,
            vertex_id_dtype=np.int32,
            element_id_dtype=np.int32,
            is_conforming=None):
        """
        The following are keyword-only:

        :arg skip_tests: Skip mesh tests, in case you want to load a broken
            mesh anyhow and then fix it inside of this data structure.
        :arg node_vertex_consistency_tolerance: If *False*, do not check
            for consistency between vertex and nodal data. If *None*, use
            the (small, near FP-epsilon) default tolerance.
        :arg skip_element_orientation_test: If *False*, check that
            element orientation is positive in volume meshes
            (i.e. ones where ambient and topological dimension match).
        :arg nodal_adjacency: One of three options:
            *None*, in which case this information
            will be deduced from vertex adjacency. *False*, in which case
            this information will be marked unavailable (such as if there are
            hanging nodes in the geometry, so that vertex adjacency does not convey
            the full picture), and references to
            :attr:`element_neighbors_starts` and :attr:`element_neighbors`
            will result in exceptions. Lastly, a tuple
            :class:`NodalAdjacency` object.
        :arg facial_adjacency_groups: One of three options:
            *None*, in which case this information
            will be deduced from vertex adjacency. *False*, in which case
            this information will be marked unavailable (such as if there are
            hanging nodes in the geometry, so that vertex adjacency does not convey
            the full picture), and references to
            :attr:`element_neighbors_starts` and :attr:`element_neighbors`
            will result in exceptions. Lastly, a data structure as described in
            :attr:`facial_adjacency_groups` may be passed.
        """

        el_nr = 0
        node_nr = 0

        new_groups = []
        for g in groups:
            ng = g.join_mesh(el_nr, node_nr)
            new_groups.append(ng)
            el_nr += ng.nelements
            node_nr += ng.nnodes

        # {{{ boundary tags

        if boundary_tags is None:
            boundary_tags = []
        else:
            boundary_tags = boundary_tags[:]

        if BTAG_NONE in boundary_tags:
            raise ValueError("BTAG_NONE is not allowed to be part of "
                    "boundary_tags")
        if BTAG_ALL not in boundary_tags:
            boundary_tags.append(BTAG_ALL)
        if BTAG_REALLY_ALL not in boundary_tags:
            boundary_tags.append(BTAG_REALLY_ALL)

        max_boundary_tag_count = int(
                np.log(np.iinfo(element_id_dtype).max)/np.log(2))
        if len(boundary_tags) > max_boundary_tag_count:
            raise ValueError("too few bits in element_id_dtype to represent all "
                    "boundary tags")

        btag_to_index = dict(
                (btag, i) for i, btag in enumerate(boundary_tags))

        # }}}

        if not is_conforming:
            if nodal_adjacency is None:
                nodal_adjacency = False
            if facial_adjacency_groups is None:
                facial_adjacency_groups = False

        if nodal_adjacency is not False and nodal_adjacency is not None:
            if not isinstance(nodal_adjacency, NodalAdjacency):
                nb_starts, nbs = nodal_adjacency
                nodal_adjacency = NodalAdjacency(
                        neighbors_starts=nb_starts,
                        neighbors=nbs)

                del nb_starts
                del nbs

        Record.__init__(
                self, vertices=vertices, groups=new_groups,
                _nodal_adjacency=nodal_adjacency,
                _facial_adjacency_groups=facial_adjacency_groups,
                boundary_tags=boundary_tags,
                btag_to_index=btag_to_index,
                vertex_id_dtype=np.dtype(vertex_id_dtype),
                element_id_dtype=np.dtype(element_id_dtype),
                is_conforming=is_conforming,
                )

        if not skip_tests:
            if node_vertex_consistency_tolerance is not False:
                assert _test_node_vertex_consistency(
                        self, node_vertex_consistency_tolerance)

            for g in self.groups:
                assert g.vertex_indices.dtype == self.vertex_id_dtype

            if nodal_adjacency:
                assert nodal_adjacency.neighbors_starts.shape == (self.nelements+1,)
                assert len(nodal_adjacency.neighbors.shape) == 1

                assert (nodal_adjacency.neighbors_starts.dtype
                        == self.element_id_dtype)
                assert nodal_adjacency.neighbors.dtype == self.element_id_dtype

            if facial_adjacency_groups:
                assert len(facial_adjacency_groups) == len(self.groups)
                for fagrp_map in facial_adjacency_groups:
                    for fagrp in six.itervalues(fagrp_map):
                        nfagrp_elements, = fagrp.elements.shape

                        assert fagrp.element_faces.dtype == self.face_id_dtype
                        assert fagrp.element_faces.shape == (nfagrp_elements,)

                        assert fagrp.neighbors.dtype == self.element_id_dtype
                        assert fagrp.neighbors.shape == (nfagrp_elements,)

                        assert fagrp.neighbor_faces.dtype == self.face_id_dtype
                        assert fagrp.neighbor_faces.shape == (nfagrp_elements,)

                        if fagrp.ineighbor_group is None:
                            is_bdry = fagrp.neighbors < 0
                            assert ((1 << btag_to_index[BTAG_REALLY_ALL])
                                    & -fagrp.neighbors[is_bdry]).all(), \
                                    "boundary faces without BTAG_REALLY_ALL found"

            from meshmode.mesh.processing import \
                    test_volume_mesh_element_orientations

            if self.dim == self.ambient_dim and not skip_element_orientation_test:
                # only for volume meshes, for now
                assert test_volume_mesh_element_orientations(self), \
                        "negatively oriented elements found"
Example #40
0
def unstructured_from_composite_arrays(points, arrays, controller=None):
    """Given a set of VTKCompositeDataArrays, creates a vtkUnstructuredGrid.
    The main goal of this function is to transform the output of XXX_per_block()
    methods to a single dataset that can be visualized and further processed.
    Here arrays is an iterable (e.g. list) of (array, name) pairs. Here is
    an example:

    centroid = mean_per_block(composite_data.Points)
    T = mean_per_block(composite_data.PointData['Temperature'])
    ug = unstructured_from_composite_arrays(centroid, (T, 'Temperature'))

    When called in parallel, this function makes sure that each array in
    the input dataset is represented only on 1 process. This is important
    because methods like mean_per_block() return the same value for blocks
    that are partitioned on all of the participating processes. If the
    same point were to be created across multiple processes in the output,
    filters like histogram would report duplicate values erroneously.
    """

    try:
        dataset = points.DataSet
    except AttributeError:
        dataset = None

    if dataset is None and points is not dsa.NoneArray:
        raise ValueError(
            "Expecting a points arrays with an associated dataset.")

    if points is dsa.NoneArray:
        cpts = []
    else:
        cpts = points.Arrays
    ownership = numpy.zeros(len(cpts), dtype=numpy.int32)
    rank = 0

    # Let's first create a map of array index to composite ids.
    if dataset is None:
        ids = []
    else:
        it = dataset.NewIterator()
        it.UnRegister(None)
        itr = cpts.__iter__()
        ids = numpy.empty(len(cpts), dtype=numpy.int32)
        counter = 0
        while not it.IsDoneWithTraversal():
            _id = it.GetCurrentFlatIndex()
            ids[counter] = _id
            counter += 1
            it.GoToNextItem()

    if controller is None and vtkMultiProcessController is not None:
        controller = vtkMultiProcessController.GetGlobalController()
    if controller and controller.IsA("vtkMPIController"):
        from mpi4py import MPI
        comm = vtkMPI4PyCommunicator.ConvertToPython(
            controller.GetCommunicator())
        rank = comm.Get_rank()

        # Determine the max id to use for reduction
        # operations

        # Get all ids from dataset, including empty ones.
        lmax_id = numpy.int32(0)
        if dataset is not None:
            it = dataset.NewIterator()
            it.UnRegister(None)
            it.SetSkipEmptyNodes(False)
            while not it.IsDoneWithTraversal():
                _id = it.GetCurrentFlatIndex()
                lmax_id = numpy.max((lmax_id, _id)).astype(numpy.int32)
                it.GoToNextItem()
        max_id = numpy.array(0, dtype=numpy.int32)
        mpitype = _lookup_mpi_type(numpy.int32)
        comm.Allreduce([lmax_id, mpitype], [max_id, mpitype], MPI.MAX)

        # Now we figure out which processes have which ids
        lownership = numpy.empty(max_id, dtype=numpy.int32)
        lownership.fill(numpy.iinfo(numpy.int32).max)

        ownership = numpy.empty(max_id, dtype=numpy.int32)

        if dataset is not None:
            it = dataset.NewIterator()
            it.UnRegister(None)
            it.InitTraversal()
            itr = cpts.__iter__()
            while not it.IsDoneWithTraversal():
                _id = it.GetCurrentFlatIndex()
                if next(itr) is not dsa.NoneArray:
                    lownership[_id] = rank
                it.GoToNextItem()
        mpitype = _lookup_mpi_type(numpy.int32)
        # The process with the lowest id containing a block will
        # produce the output for that block.
        comm.Allreduce([lownership, mpitype], [ownership, mpitype], MPI.MIN)

    # Iterate over blocks to produce points and arrays
    from vtk.vtkCommonDataModel import vtkUnstructuredGrid
    from vtk.vtkCommonCore import vtkDoubleArray, vtkPoints
    ugrid = vtkUnstructuredGrid()
    da = vtkDoubleArray()
    da.SetNumberOfComponents(3)
    pts = vtkPoints()
    pts.SetData(da)
    counter = 0
    for pt in cpts:
        if ownership[ids[counter]] == rank:
            pts.InsertNextPoint(tuple(pt))
        counter += 1
    ugrid.SetPoints(pts)

    for ca, name in arrays:
        if ca is not dsa.NoneArray:
            da = vtkDoubleArray()
            ncomps = ca.Arrays[0].flatten().shape[0]
            da.SetNumberOfComponents(ncomps)
            counter = 0
            for a in ca.Arrays:
                if ownership[ids[counter]] == rank:
                    a = a.flatten()
                    for i in range(ncomps):
                        da.InsertNextValue(a[i])
                counter += 1
            if len(a) > 0:
                da.SetName(name)
                ugrid.GetPointData().AddArray(da)
    return ugrid
Example #41
0
        match_value = False

    # Note: type check above ensures that we have the _same_ NA value
    # for missing values, None == None (which is checked
    # through match_value above), but np.nan != np.nan and pd.NaT != pd.NaT
    match_missing = isna(result_fill_value) and isna(expected_fill_value)

    assert match_value or match_missing


@pytest.mark.parametrize(
    "dtype, fill_value, expected_dtype",
    [
        # size 8
        ("int8", 1, "int8"),
        ("int8", np.iinfo("int8").max + 1, "int16"),
        ("int8", np.iinfo("int16").max + 1, "int32"),
        ("int8", np.iinfo("int32").max + 1, "int64"),
        ("int8", np.iinfo("int64").max + 1, "object"),
        ("int8", -1, "int8"),
        ("int8", np.iinfo("int8").min - 1, "int16"),
        ("int8", np.iinfo("int16").min - 1, "int32"),
        ("int8", np.iinfo("int32").min - 1, "int64"),
        ("int8", np.iinfo("int64").min - 1, "object"),
        # keep signed-ness as long as possible
        ("uint8", 1, "uint8"),
        ("uint8", np.iinfo("int8").max + 1, "uint8"),
        ("uint8", np.iinfo("uint8").max + 1, "uint16"),
        ("uint8", np.iinfo("int16").max + 1, "uint16"),
        ("uint8", np.iinfo("uint16").max + 1, "uint32"),
        ("uint8", np.iinfo("int32").max + 1, "uint32"),
parser.add_argument('--n_epochs', type=int, default=200)
parser.add_argument('--lr_milestones', type=int, default='66')
parser.add_argument('--batch_size', type=int, default=128)
parser.add_argument('-gpu', '--device_no', type=int, default=1)
parser.add_argument('--n_jobs_dataloader', type=int, default=0)


p = parser.parse_args()

# ===========================================
# 0.1. Parameters
# ===========================================
# Exract from parser
print('Loading parameters...')
random_state_test = p.random_state_test
random_state_eval = random.randint(0, np.iinfo(np.int32).max)
loader_name, n, n_eval, n_test, mix = p.loader_name, p.n, p.n_eval, p.n_test, bool(p.mix)
ratio_abnormal_train, ratio_abnormal_eval = p.ratio_abnormal_train, p.ratio_abnormal_eval
ratio_abnormal_test = p.ratio_abnormal_test
n_features, net_name, load_model = p.n_features, p.net_name, p.load_model
optimizer_, eta_str= p.optimizer_, p.eta_str
lr, n_epochs, batch_size = p.lr, p.n_epochs, p.batch_size
device_no, n_jobs_dataloader = p.device_no, p.n_jobs_dataloader
lr_milestones = p.lr_milestones

# Define addional parameters
lr_milestones = tuple(i for i in range(lr_milestones, n_epochs, lr_milestones))
torch.manual_seed(random_state_test)
device = 'cuda:{}'.format(device_no)
eta = float(eta_str * 0.01)
label_normal = (0,)
Example #43
0
def _scan_file(infile: Union[str, Path],
               categorical: bool = True,
               chunksize: int = 100000,
               cat_threshold: float = 0.1,
               unsigned: bool = False) -> Dict[str, Any]:
    """Scan dta file to find minimal dtypes to hold data in

    For each of the chunks of df:
        for string columns: hold all unique values if I want them categorical
        for float columns: do nothing
        for integer columns: search for missings, highest and lowest value
        for date columns: nothing

    Args:
        infile: dta file to scan
        categorical: whether to change strings to categorical
        chunksize: number of rows of infile to read at a time
        cat_threshold: maximum fraction of unique values in order
            to convert to categorical

    Returns:
        dictionary with variable names and dtyplist
    """
    itr = pd.read_stata(infile, iterator=True)
    varlist_df = pd.DataFrame({
        'format': itr.fmtlist,
        'name': itr.varlist,
        'col_size': itr.col_sizes,
        'dtype': itr.dtyplist,
        'label': list(itr.variable_labels().values())
    })

    start_cols = {}

    date_fmts = ('%tc', '%tC', '%td', '%d', '%tw', '%tm', '%tq', '%th', '%ty')
    date_cols = varlist_df['format'].apply(lambda x: x.startswith(date_fmts))
    date_cols = varlist_df[date_cols]['name'].values.tolist()
    start_cols['date_cols'] = date_cols

    int_cols = varlist_df['dtype'].apply(lambda x: np.issubdtype(
        x, np.integer) if inspect.isclass(x) else False)
    int_cols = varlist_df[int_cols]['name'].values.tolist()
    int_cols = sorted(list(set(int_cols) - set(date_cols)))
    start_cols['int_cols'] = int_cols

    regex = r'%.+s'
    str_cols = varlist_df['format'].apply(lambda x: bool(re.search(regex, x)))
    str_cols = varlist_df[str_cols]['name'].values.tolist()
    start_cols['str_cols'] = str_cols

    float_cols = varlist_df['dtype'].apply(lambda x: np.issubdtype(
        x, np.floating) if inspect.isclass(x) else False)
    float_cols = varlist_df[float_cols]['name'].values.tolist()
    start_cols['float_cols'] = float_cols

    end_cols = {
        'date_cols': start_cols['date_cols'],
        'int_cols': {
            'names': start_cols['int_cols'],
            'min': {key: None
                    for key in start_cols['int_cols']},
            'max': {key: None
                    for key in start_cols['int_cols']}
        },
        'float_cols': start_cols['float_cols']
    }
    if categorical:
        end_cols['cat_cols'] = {
            'names': start_cols['str_cols'],
            'cats': {key: set()
                     for key in start_cols['str_cols']}
        }
        end_cols['str_cols'] = []
    else:
        end_cols['cat_cols'] = {}
        end_cols['str_cols'] = start_cols['str_cols']

    tokeep = []
    tokeep.extend(start_cols['int_cols'])
    if categorical:
        tokeep.extend(start_cols['str_cols'])
    itr = pd.read_stata(infile, columns=tokeep, chunksize=chunksize)

    i = 0
    for df in itr:
        i += 1
        print(f'Scanning group {i} of data')
        # Integer vars:
        int_cols = end_cols['int_cols']['names'].copy()
        for col in int_cols:
            # Check missings
            if df.loc[:, col].isnull().values.any():
                # If missings, convert to float
                end_cols['float_cols'].append(col)
                end_cols['int_cols']['names'].remove(col)
                end_cols['int_cols']['max'].pop(col)
                end_cols['int_cols']['min'].pop(col)
            else:
                # Check minimum
                minval = min(df.loc[:, col])
                if end_cols['int_cols']['min'][col] is None:
                    end_cols['int_cols']['min'][col] = minval
                elif minval < end_cols['int_cols']['min'][col]:
                    end_cols['int_cols']['min'][col] = minval

                # Check maximum
                maxval = max(df.loc[:, col])
                if end_cols['int_cols']['max'][col] is None:
                    end_cols['int_cols']['max'][col] = maxval
                elif maxval > end_cols['int_cols']['max'][col]:
                    end_cols['int_cols']['max'][col] = maxval

        if categorical:
            # Scan str vars for categories
            cat_cols = end_cols['cat_cols']['names'].copy()
            for col in cat_cols:
                num_unique_values = len(df[col].unique())
                num_total_values = len(df[col])

                if num_unique_values / num_total_values < cat_threshold:
                    # Then stays as category
                    # Add category values
                    unique_vals = df[col].unique().tolist()
                    end_cols['cat_cols']['cats'][col].update(unique_vals)
                else:
                    print(f'{col} is now a string')
                    # Becomes regular string column
                    end_cols['str_cols'].append(col)
                    end_cols['cat_cols']['cats'].pop(col)
                    end_cols['cat_cols']['names'].remove(col)

        # Not currently scanning date or float vars

    dtypes_dict = {}

    # Int dtypes:
    for col in end_cols['int_cols']['names']:
        if unsigned and (end_cols['int_cols']['min'][col] >= 0):
            if end_cols['int_cols']['max'][col] <= np.iinfo(np.uint8).max:
                dtypes_dict[col] = np.uint8
            elif end_cols['int_cols']['max'][col] <= np.iinfo(np.uint16).max:
                dtypes_dict[col] = np.uint16
            elif end_cols['int_cols']['max'][col] <= np.iinfo(np.uint32).max:
                dtypes_dict[col] = np.uint32
            elif end_cols['int_cols']['max'][col] <= np.iinfo(np.uint64).max:
                dtypes_dict[col] = np.uint64
        else:
            if False:
                pass
            elif ((end_cols['int_cols']['max'][col] <= np.iinfo(np.int8).max) &
                  (end_cols['int_cols']['min'][col] >= np.iinfo(np.int8).min)):
                dtypes_dict[col] = np.int8
            elif (
                (end_cols['int_cols']['max'][col] <= np.iinfo(np.int16).max) &
                (end_cols['int_cols']['min'][col] >= np.iinfo(np.int16).min)):
                dtypes_dict[col] = np.int16
            elif (
                (end_cols['int_cols']['max'][col] <= np.iinfo(np.int32).max) &
                (end_cols['int_cols']['min'][col] >= np.iinfo(np.int32).min)):
                dtypes_dict[col] = np.int32
            elif (
                (end_cols['int_cols']['max'][col] <= np.iinfo(np.int64).max) &
                (end_cols['int_cols']['min'][col] >= np.iinfo(np.int64).min)):
                dtypes_dict[col] = np.int64

    for col in end_cols['float_cols']:
        dtypes_dict[col] = np.float64

    if categorical:
        for col in end_cols['cat_cols']['names']:
            dtypes_dict[col] = CategoricalDtype(
                end_cols['cat_cols']['cats'][col])

    return dtypes_dict
Example #44
0
comp = dict(zlib=True,
            complevel=4,
            fletcher32=True,
            _FillValue=np.finfo("float32").max)

encoding = {
    var: comp
    for var in to_save_ds.data_vars
    if var not in ["platform_id", "sonde_id", "alt_bnds"]
}
encoding["launch_time"] = {
    "units": "seconds since 2020-01-01",
    "dtype": "int32"
}
encoding["interpolated_time"] = {
    "units": "seconds since 2020-01-01",
    "dtype": "int32",
    "_FillValue": np.iinfo("int32").max,
}

for key in dicts.nc_global_attrs.keys():
    to_save_ds.attrs[key] = dicts.nc_global_attrs[key]

to_save_ds.to_netcdf(save_directory + file_name,
                     mode="w",
                     format="NETCDF4",
                     encoding=encoding)
# %%

# %%
def infer_exact(tester,
                pf,
                tensor_shape,
                batch_size,
                input_dtype,
                output0_dtype,
                output1_dtype,
                output0_raw=True,
                output1_raw=True,
                model_version=None,
                swap=False,
                outputs=("OUTPUT0", "OUTPUT1"),
                use_http=True,
                use_grpc=True,
                use_http_json_tensors=True,
                skip_request_id_check=False,
                use_streaming=True,
                correlation_id=0,
                shm_region_names=None,
                precreated_shm_regions=None,
                use_system_shared_memory=False,
                use_cuda_shared_memory=False,
                priority=0,
                timeout_us=0):
    tester.assertTrue(use_http or use_http_json_tensors or use_grpc
                      or use_streaming)
    configs = []
    if use_http:
        configs.append(("localhost:8000", "http", False, True))
    if output0_raw == output1_raw:
        # Float16 not supported for Input and Output via JSON
        if use_http_json_tensors and (input_dtype != np.float16) and \
            (output0_dtype != np.float16) and (output1_dtype != np.float16):
            configs.append(("localhost:8000", "http", False, False))
    if use_grpc:
        configs.append(("localhost:8001", "grpc", False, False))
    if use_streaming:
        configs.append(("localhost:8001", "grpc", True, False))

    # outputs are sum and difference of inputs so set max input
    # values so that they will not overflow the output. This
    # allows us to do an exact match. For float types use 8, 16,
    # 32 int range for fp 16, 32, 64 respectively. When getting
    # class outputs the result value/probability is returned as a
    # float so must use fp32 range in that case.
    rinput_dtype = _range_repr_dtype(input_dtype)
    routput0_dtype = _range_repr_dtype(
        output0_dtype if output0_raw else np.float32)
    routput1_dtype = _range_repr_dtype(
        output1_dtype if output1_raw else np.float32)
    val_min = max(
        np.iinfo(rinput_dtype).min,
        np.iinfo(routput0_dtype).min,
        np.iinfo(routput1_dtype).min) / 2
    val_max = min(
        np.iinfo(rinput_dtype).max,
        np.iinfo(routput0_dtype).max,
        np.iinfo(routput1_dtype).max) / 2

    num_classes = 3

    input0_array = np.random.randint(low=val_min,
                                     high=val_max,
                                     size=tensor_shape,
                                     dtype=rinput_dtype)
    input1_array = np.random.randint(low=val_min,
                                     high=val_max,
                                     size=tensor_shape,
                                     dtype=rinput_dtype)
    if input_dtype != np.object:
        input0_array = input0_array.astype(input_dtype)
        input1_array = input1_array.astype(input_dtype)

    if not swap:
        output0_array = input0_array + input1_array
        output1_array = input0_array - input1_array
    else:
        output0_array = input0_array - input1_array
        output1_array = input0_array + input1_array

    if output0_dtype == np.object:
        output0_array = np.array([
            unicode(str(x), encoding='utf-8')
            for x in (output0_array.flatten())
        ],
                                 dtype=object).reshape(output0_array.shape)
    else:
        output0_array = output0_array.astype(output0_dtype)
    if output1_dtype == np.object:
        output1_array = np.array([
            unicode(str(x), encoding='utf-8')
            for x in (output1_array.flatten())
        ],
                                 dtype=object).reshape(output1_array.shape)
    else:
        output1_array = output1_array.astype(output1_dtype)

    if input_dtype == np.object:
        in0n = np.array(
            [str(x) for x in input0_array.reshape(input0_array.size)],
            dtype=object)
        input0_array = in0n.reshape(input0_array.shape)
        in1n = np.array(
            [str(x) for x in input1_array.reshape(input1_array.size)],
            dtype=object)
        input1_array = in1n.reshape(input1_array.shape)

    # prepend size of string to output string data
    if output0_dtype == np.object:
        if batch_size == 1:
            output0_array_tmp = serialize_byte_tensor_list([output0_array])
        else:
            output0_array_tmp = serialize_byte_tensor_list(output0_array)
    else:
        output0_array_tmp = output0_array

    if output1_dtype == np.object:
        if batch_size == 1:
            output1_array_tmp = serialize_byte_tensor_list([output1_array])
        else:
            output1_array_tmp = serialize_byte_tensor_list(output1_array)
    else:
        output1_array_tmp = output1_array

    OUTPUT0 = "OUTPUT0"
    OUTPUT1 = "OUTPUT1"
    INPUT0 = "INPUT0"
    INPUT1 = "INPUT1"
    if pf == "libtorch" or pf == "libtorch_nobatch":
        OUTPUT0 = "OUTPUT__0"
        OUTPUT1 = "OUTPUT__1"
        INPUT0 = "INPUT__0"
        INPUT1 = "INPUT__1"

    output0_byte_size = sum([o0.nbytes for o0 in output0_array_tmp])
    output1_byte_size = sum([o1.nbytes for o1 in output1_array_tmp])

    if batch_size == 1:
        input0_list = [input0_array]
        input1_list = [input1_array]
    else:
        input0_list = [x for x in input0_array]
        input1_list = [x for x in input1_array]

    # Serialization of string tensors in the case of shared memory must be done manually
    if input_dtype == np.object:
        input0_list_tmp = serialize_byte_tensor_list(input0_list)
        input1_list_tmp = serialize_byte_tensor_list(input1_list)
    else:
        input0_list_tmp = input0_list
        input1_list_tmp = input1_list

    input0_byte_size = sum([i0.nbytes for i0 in input0_list_tmp])
    input1_byte_size = sum([i1.nbytes for i1 in input1_list_tmp])

    # Create system/cuda shared memory regions if needed
    shm_regions, shm_handles = su.create_set_shm_regions(
        input0_list_tmp, input1_list_tmp, output0_byte_size, output1_byte_size,
        outputs, shm_region_names, precreated_shm_regions,
        use_system_shared_memory, use_cuda_shared_memory)

    if model_version is not None:
        model_version = str(model_version)
    else:
        model_version = ""

    # Run inference and check results for each config
    for config in configs:
        model_name = tu.get_model_name(pf, input_dtype, output0_dtype,
                                       output1_dtype)

        if config[1] == "http":
            triton_client = httpclient.InferenceServerClient(config[0],
                                                             verbose=True)
        else:
            triton_client = grpcclient.InferenceServerClient(config[0],
                                                             verbose=True)

        inputs = []
        if config[1] == "http":
            inputs.append(
                httpclient.InferInput(INPUT0, tensor_shape,
                                      np_to_triton_dtype(input_dtype)))
            inputs.append(
                httpclient.InferInput(INPUT1, tensor_shape,
                                      np_to_triton_dtype(input_dtype)))
        else:
            inputs.append(
                grpcclient.InferInput(INPUT0, tensor_shape,
                                      np_to_triton_dtype(input_dtype)))
            inputs.append(
                grpcclient.InferInput(INPUT1, tensor_shape,
                                      np_to_triton_dtype(input_dtype)))

        if not (use_cuda_shared_memory or use_system_shared_memory):
            if config[1] == "http":
                inputs[0].set_data_from_numpy(input0_array,
                                              binary_data=config[3])
                inputs[1].set_data_from_numpy(input1_array,
                                              binary_data=config[3])
            else:
                inputs[0].set_data_from_numpy(input0_array)
                inputs[1].set_data_from_numpy(input1_array)
        else:
            # Register necessary shared memory regions/handles
            su.register_add_shm_regions(inputs, outputs, shm_regions,
                                        precreated_shm_regions, shm_handles,
                                        input0_byte_size, input1_byte_size,
                                        output0_byte_size, output1_byte_size,
                                        use_system_shared_memory,
                                        use_cuda_shared_memory, triton_client)

        if batch_size == 1:
            expected0_sort_idx = [
                np.flip(np.argsort(x.flatten()), 0)
                for x in output0_array.reshape((1, ) + tensor_shape)
            ]
            expected1_sort_idx = [
                np.flip(np.argsort(x.flatten()), 0)
                for x in output1_array.reshape((1, ) + tensor_shape)
            ]
        else:
            expected0_sort_idx = [
                np.flip(np.argsort(x.flatten()), 0)
                for x in output0_array.reshape(tensor_shape)
            ]
            expected1_sort_idx = [
                np.flip(np.argsort(x.flatten()), 0)
                for x in output1_array.reshape(tensor_shape)
            ]

        # Force binary_data = False for shared memory and class
        output_req = []
        i = 0
        if "OUTPUT0" in outputs:
            if len(shm_regions) != 0:
                if config[1] == "http":
                    output_req.append(
                        httpclient.InferRequestedOutput(OUTPUT0,
                                                        binary_data=False))
                else:
                    output_req.append(grpcclient.InferRequestedOutput(OUTPUT0))

                output_req[-1].set_shared_memory(shm_regions[2] + '_data',
                                                 output0_byte_size)
            else:
                if output0_raw:
                    if config[1] == "http":
                        output_req.append(
                            httpclient.InferRequestedOutput(
                                OUTPUT0, binary_data=config[3]))
                    else:
                        output_req.append(
                            grpcclient.InferRequestedOutput(OUTPUT0))
                else:
                    if config[1] == "http":
                        output_req.append(
                            httpclient.InferRequestedOutput(
                                OUTPUT0,
                                binary_data=False,
                                class_count=num_classes))
                    else:
                        output_req.append(
                            grpcclient.InferRequestedOutput(
                                OUTPUT0, class_count=num_classes))
            i += 1
        if "OUTPUT1" in outputs:
            if len(shm_regions) != 0:
                if config[1] == "http":
                    output_req.append(
                        httpclient.InferRequestedOutput(OUTPUT1,
                                                        binary_data=False))
                else:
                    output_req.append(grpcclient.InferRequestedOutput(OUTPUT1))

                output_req[-1].set_shared_memory(shm_regions[2 + i] + '_data',
                                                 output1_byte_size)
            else:
                if output1_raw:
                    if config[1] == "http":
                        output_req.append(
                            httpclient.InferRequestedOutput(
                                OUTPUT1, binary_data=config[3]))
                    else:
                        output_req.append(
                            grpcclient.InferRequestedOutput(OUTPUT1))
                else:
                    if config[1] == "http":
                        output_req.append(
                            httpclient.InferRequestedOutput(
                                OUTPUT1,
                                binary_data=False,
                                class_count=num_classes))
                    else:
                        output_req.append(
                            grpcclient.InferRequestedOutput(
                                OUTPUT1, class_count=num_classes))

        if config[2]:
            user_data = UserData()
            triton_client.start_stream(partial(completion_callback, user_data))
            try:
                results = triton_client.async_stream_infer(
                    model_name,
                    inputs,
                    model_version=model_version,
                    outputs=output_req,
                    request_id=str(_unique_request_id()))
            except Exception as e:
                triton_client.stop_stream()
                raise e
            triton_client.stop_stream()
            (results, error) = user_data._completed_requests.get()
            if error is not None:
                raise error
        else:
            results = triton_client.infer(model_name,
                                          inputs,
                                          model_version=model_version,
                                          outputs=output_req,
                                          request_id=str(_unique_request_id()))

        last_response = results.get_response()

        if not skip_request_id_check:
            global _seen_request_ids
            if config[1] == "http":
                request_id = int(last_response["id"])
            else:
                request_id = int(last_response.id)
            tester.assertFalse(request_id in _seen_request_ids,
                               "request_id: {}".format(request_id))
            _seen_request_ids.add(request_id)

        if config[1] == "http":
            response_model_name = last_response["model_name"]
            if model_version != "":
                response_model_version = last_response["model_version"]
            response_outputs = last_response["outputs"]
        else:
            response_model_name = last_response.model_name
            if model_version != "":
                response_model_version = last_response.model_version
            response_outputs = last_response.outputs

        tester.assertEqual(response_model_name, model_name)

        if model_version != "":
            tester.assertEqual(str(response_model_version), model_version)

        tester.assertEqual(len(response_outputs), len(outputs))

        for result in response_outputs:
            if config[1] == "http":
                result_name = result["name"]
            else:
                result_name = result.name

            if ((result_name == OUTPUT0 and output0_raw)
                    or (result_name == OUTPUT1 and output1_raw)):
                if use_system_shared_memory or use_cuda_shared_memory:
                    if result_name == OUTPUT0:
                        shm_handle = shm_handles[2]
                    else:
                        shm_handle = shm_handles[3]

                    output = results.get_output(result_name)
                    if config[1] == "http":
                        output_datatype = output['datatype']
                        output_shape = output['shape']
                    else:
                        output_datatype = output.datatype
                        output_shape = output.shape
                    output_dtype = triton_to_np_dtype(output_datatype)
                if use_system_shared_memory:
                    output_data = shm.get_contents_as_numpy(
                        shm_handle, output_dtype, output_shape)
                elif use_cuda_shared_memory:
                    output_data = cudashm.get_contents_as_numpy(
                        shm_handle, output_dtype, output_shape)
                else:
                    output_data = results.as_numpy(result_name)

                if (output_data.dtype == np.object) and (config[3] == False):
                    output_data = output_data.astype(np.bytes_)

                if result_name == OUTPUT0:
                    tester.assertTrue(
                        np.array_equal(output_data, output0_array),
                        "{}, {} expected: {}, got {}".format(
                            model_name, OUTPUT0, output0_array, output_data))
                elif result_name == OUTPUT1:
                    tester.assertTrue(
                        np.array_equal(output_data, output1_array),
                        "{}, {} expected: {}, got {}".format(
                            model_name, OUTPUT1, output1_array, output_data))
                else:
                    tester.assertTrue(
                        False, "unexpected raw result {}".format(result_name))
            else:
                for b in range(batch_size):
                    # num_classes values must be returned and must
                    # match expected top values
                    if "nobatch" in pf:
                        class_list = results.as_numpy(result_name)
                    else:
                        class_list = results.as_numpy(result_name)[b]

                    tester.assertEqual(len(class_list), num_classes)
                    if batch_size == 1:
                        expected0_flatten = output0_array.flatten()
                        expected1_flatten = output1_array.flatten()
                    else:
                        expected0_flatten = output0_array[b].flatten()
                        expected1_flatten = output1_array[b].flatten()

                    for idx, class_label in enumerate(class_list):
                        # can't compare indices since could have different
                        # indices with the same value/prob, so check that
                        # the value of each index equals the expected value.
                        # Only compare labels when the indices are equal.
                        if type(class_label) == str:
                            ctuple = class_label.split(':')
                        else:
                            ctuple = "".join(chr(x)
                                             for x in class_label).split(':')
                        cval = float(ctuple[0])
                        cidx = int(ctuple[1])
                        if result_name == OUTPUT0:
                            tester.assertEqual(cval, expected0_flatten[cidx])
                            tester.assertEqual(
                                cval,
                                expected0_flatten[expected0_sort_idx[b][idx]])
                            if cidx == expected0_sort_idx[b][idx]:
                                tester.assertEqual(
                                    ctuple[2], 'label{}'.format(
                                        expected0_sort_idx[b][idx]))
                        elif result_name == OUTPUT1:
                            tester.assertEqual(cval, expected1_flatten[cidx])
                            tester.assertEqual(
                                cval,
                                expected1_flatten[expected1_sort_idx[b][idx]])
                        else:
                            tester.assertTrue(
                                False, "unexpected class result {}".format(
                                    result_name))

    # Unregister system/cuda shared memory regions if they exist
    su.unregister_cleanup_shm_regions(shm_regions, shm_handles,
                                      precreated_shm_regions, outputs,
                                      use_system_shared_memory,
                                      use_cuda_shared_memory)

    return results
Example #46
0
from warnings import warn

import locale
import numpy as np
import numba
import scipy.sparse

from pynndescent.sparse import sparse_mul, sparse_diff, sparse_sum
from pynndescent.utils import tau_rand_int, norm
import joblib

locale.setlocale(locale.LC_NUMERIC, "C")

# Used for a floating point "nearly zero" comparison
EPS = 1e-8
INT32_MIN = np.iinfo(np.int32).min + 1
INT32_MAX = np.iinfo(np.int32).max - 1

RandomProjectionTreeNode = namedtuple(
    "RandomProjectionTreeNode",
    [
        "graph_indices", "is_leaf", "hyperplane", "offset", "left_child",
        "right_child"
    ],
)

FlatTree = namedtuple(
    "FlatTree", ["hyperplanes", "offsets", "children", "indices", "leaf_size"])

dense_hyperplane_type = numba.float32[::1]
sparse_hyperplane_type = numba.float64[:, ::1]
def infer_shape_tensor(tester,
                       pf,
                       tensor_dtype,
                       input_shape_values,
                       dummy_input_shapes,
                       use_http=True,
                       use_grpc=True,
                       use_streaming=True,
                       shm_suffix="",
                       use_system_shared_memory=False,
                       use_cuda_shared_memory=False,
                       priority=0,
                       timeout_us=0,
                       batch_size=1):
    tester.assertTrue(use_http or use_grpc or use_streaming)
    tester.assertTrue(pf == "plan" or pf == "plan_nobatch")
    tester.assertEqual(len(input_shape_values), len(dummy_input_shapes))
    if use_system_shared_memory and use_cuda_shared_memory:
        raise ValueError(
            "Cannot set both System and CUDA shared memory flags to 1")

    configs = []
    if use_http:
        configs.append(("localhost:8000", "http", False))
    if use_grpc:
        configs.append(("localhost:8001", "grpc", False))
    if use_streaming:
        configs.append(("localhost:8001", "grpc", True))

    io_cnt = len(input_shape_values)

    # FIXME wrap up shm handle cleanup
    # For (cuda) shared memory, it's only set for shape tensor for simplicity.
    # Regular tensor with (cuda) shared memory should be well-tested in other
    # tests.
    # item is (handle, byte_size, is_cuda)
    input_shm_handle_list = []
    output_shm_handle_list = []
    dummy_input_list = []
    input_list = []
    expected_dict = dict()
    # Prepare IO in advance
    for io_num in range(io_cnt):
        dummy_input_name = "DUMMY_INPUT{}".format(io_num)
        input_name = "INPUT{}".format(io_num)
        dummy_output_name = "DUMMY_OUTPUT{}".format(io_num)
        output_name = "OUTPUT{}".format(io_num)

        # Prepare the dummy tensor
        rtensor_dtype = _range_repr_dtype(tensor_dtype)
        if (rtensor_dtype != np.bool):
            dummy_in0 = np.random.randint(low=np.iinfo(rtensor_dtype).min,
                                          high=np.iinfo(rtensor_dtype).max,
                                          size=dummy_input_shapes[io_num],
                                          dtype=rtensor_dtype)
        else:
            dummy_in0 = np.random.choice(a=[False, True],
                                         size=dummy_input_shapes[io_num])
        if tensor_dtype != np.object:
            dummy_in0 = dummy_in0.astype(tensor_dtype)
        else:
            dummy_in0 = np.array([str(x) for x in dummy_in0.flatten()],
                                 dtype=object).reshape(dummy_in0.shape)
        dummy_input_list.append(dummy_in0)

        # Prepare shape input tensor
        in0 = np.asarray(input_shape_values[io_num], dtype=np.int32)
        input_list.append(in0)

        # Prepare the expected value for the output. Skip dummy output as we
        # only care about its shape (== value of OUTPUT*)
        expected_dict[output_name] = np.ndarray.copy(in0)

        # Only need to create region once
        input_byte_size = in0.size * np.dtype(np.int32).itemsize
        output_byte_size = input_byte_size * batch_size
        if use_system_shared_memory:
            input_shm_handle_list.append(
                (shm.create_shared_memory_region(input_name + shm_suffix,
                                                 '/' + input_name + shm_suffix,
                                                 input_byte_size),
                 input_byte_size, False))
            output_shm_handle_list.append((shm.create_shared_memory_region(
                output_name + shm_suffix, '/' + output_name + shm_suffix,
                output_byte_size), output_byte_size, False))
            shm.set_shared_memory_region(input_shm_handle_list[-1][0], [
                in0,
            ])
        elif use_cuda_shared_memory:
            input_shm_handle_list.append(
                (cudashm.create_shared_memory_region(input_name + shm_suffix,
                                                     input_byte_size, 0),
                 input_byte_size, True))
            output_shm_handle_list.append(
                (cudashm.create_shared_memory_region(output_name + shm_suffix,
                                                     output_byte_size, 0),
                 output_byte_size, True))
            cudashm.set_shared_memory_region(input_shm_handle_list[-1][0], [
                in0,
            ])

    model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype)
    # Run inference and check results for each config
    for config in configs:
        client_utils = grpcclient if config[1] == "grpc" else httpclient
        triton_client = client_utils.InferenceServerClient(config[0],
                                                           verbose=True)

        inputs = []
        outputs = []

        # Set IOs
        for io_num in range(io_cnt):
            dummy_input_name = "DUMMY_INPUT{}".format(io_num)
            input_name = "INPUT{}".format(io_num)
            dummy_output_name = "DUMMY_OUTPUT{}".format(io_num)
            output_name = "OUTPUT{}".format(io_num)

            inputs.append(
                client_utils.InferInput(dummy_input_name,
                                        dummy_input_shapes[io_num],
                                        np_to_triton_dtype(tensor_dtype)))
            inputs.append(
                client_utils.InferInput(input_name, input_list[io_num].shape,
                                        "INT32"))
            outputs.append(
                client_utils.InferRequestedOutput(dummy_output_name))
            outputs.append(client_utils.InferRequestedOutput(output_name))

            # -2: dummy; -1: input
            inputs[-2].set_data_from_numpy(dummy_input_list[io_num])
            if (not use_system_shared_memory) and (not use_cuda_shared_memory):
                inputs[-1].set_data_from_numpy(input_list[io_num])
            else:
                input_byte_size = input_shm_handle_list[io_num][1]
                output_byte_size = output_shm_handle_list[io_num][1]
                if use_system_shared_memory:
                    triton_client.register_system_shared_memory(
                        input_name + shm_suffix, "/" + input_name + shm_suffix,
                        input_byte_size)
                    triton_client.register_system_shared_memory(
                        output_name + shm_suffix,
                        "/" + output_name + shm_suffix, output_byte_size)
                else:
                    triton_client.register_cuda_shared_memory(
                        input_name + shm_suffix,
                        cudashm.get_raw_handle(
                            input_shm_handle_list[io_num][0]), 0,
                        input_byte_size)
                    triton_client.register_cuda_shared_memory(
                        output_name + shm_suffix,
                        cudashm.get_raw_handle(
                            output_shm_handle_list[io_num][0]), 0,
                        output_byte_size)
                inputs[-1].set_shared_memory(input_name + shm_suffix,
                                             input_byte_size)
                outputs[-1].set_shared_memory(output_name + shm_suffix,
                                              output_byte_size)

        if config[2]:
            user_data = UserData()
            triton_client.start_stream(partial(completion_callback, user_data))
            try:
                results = triton_client.async_stream_infer(model_name,
                                                           inputs,
                                                           outputs=outputs,
                                                           priority=priority,
                                                           timeout=timeout_us)
            except Exception as e:
                triton_client.stop_stream()
                raise e
            triton_client.stop_stream()
            (results, error) = user_data._completed_requests.get()
            if error is not None:
                raise error
        else:
            results = triton_client.infer(model_name,
                                          inputs,
                                          outputs=outputs,
                                          priority=priority,
                                          timeout=timeout_us)

        for io_num in range(io_cnt):
            output_name = "OUTPUT{}".format(io_num)
            dummy_output_name = "DUMMY_OUTPUT{}".format(io_num)
            expected = expected_dict[output_name]

            # get outputs as numpy array
            dummy_out = results.as_numpy(dummy_output_name)
            if (not use_system_shared_memory) and (not use_cuda_shared_memory):
                out = results.as_numpy(output_name)
            else:
                output = results.get_output(output_name)
                if config[1] == "grpc":
                    output_shape = output.shape
                else:
                    output_shape = output["shape"]
                if use_system_shared_memory:
                    out = shm.get_contents_as_numpy(
                        output_shm_handle_list[io_num][0], np.int32,
                        output_shape)
                else:
                    out = cudashm.get_contents_as_numpy(
                        output_shm_handle_list[io_num][0], np.int32,
                        output_shape)

            # if out shape is 2D, it is batched
            if (len(out.shape) == 2):
                # The shape of the dummy output should be equal to the shape values
                # specified in the shape tensor
                tester.assertTrue(
                    np.array_equal(dummy_out.shape[1:], out[0]),
                    "{}, {} shape, expected: {}, got {}".format(
                        model_name, dummy_output_name, out[0],
                        dummy_out.shape[1:]))
                for b in range(1, out.shape[0]):
                    tester.assertTrue(
                        np.array_equal(out[b - 1], out[b]),
                        "expect shape tensor has consistent value, "
                        "expected: {}, got {}".format(out[b - 1], out[b]))
                out = out[0]
            else:
                tester.assertTrue(
                    np.array_equal(dummy_out.shape, out),
                    "{}, {} shape, expected: {}, got {}".format(
                        model_name, dummy_output_name, out, dummy_out.shape))
            tester.assertTrue(
                np.array_equal(out, expected),
                "{}, {}, expected: {}, got {}".format(model_name, output_name,
                                                      expected, out))

            # unregister shared memory region for next config
            if use_system_shared_memory:
                triton_client.unregister_system_shared_memory(input_name +
                                                              shm_suffix)
                triton_client.unregister_system_shared_memory(output_name +
                                                              shm_suffix)
            elif use_cuda_shared_memory:
                triton_client.unregister_cuda_shared_memory(input_name +
                                                            shm_suffix)
                triton_client.unregister_cuda_shared_memory(output_name +
                                                            shm_suffix)

    for handle in input_shm_handle_list:
        if (handle[2]):
            cudashm.destroy_shared_memory_region(handle[0])
        else:
            shm.destroy_shared_memory_region(handle[0])
    for handle in output_shm_handle_list:
        if (handle[2]):
            cudashm.destroy_shared_memory_region(handle[0])
        else:
            shm.destroy_shared_memory_region(handle[0])
Example #48
0
def software_he_veto(records,
                     to_pe,
                     chunk_end,
                     area_threshold=int(1e5),
                     veto_length=int(3e6),
                     veto_res=int(1e3),
                     pass_veto_fraction=0.01,
                     pass_veto_extend=3,
                     max_veto_value=None):
    """Veto veto_length (time in ns) after peaks larger than
    area_threshold (in PE).

    Further large peaks inside the veto regions are still passed:
    We sum the waveform inside the veto region (with time resolution
    veto_res in ns) and pass regions within pass_veto_extend samples
    of samples with amplitude above pass_veto_fraction times the maximum.

    :returns: (preserved records, vetoed records, veto intervals).

    :param records: PMT records
    :param to_pe: ADC to PE conversion factors for the channels in records.
    :param chunk_end: Endtime of chunk to set as maximum ceiling for the veto period
    :param area_threshold: Minimum peak area to trigger the veto.
    Note we use a much rougher clustering than in later processing.
    :param veto_length: Time in ns to veto after the peak
    :param veto_res: Resolution of the sum waveform inside the veto region.
    Do not make too large without increasing integer type in some strax
    dtypes...
    :param pass_veto_fraction: fraction of maximum sum waveform amplitude to
    trigger veto passing of further peaks
    :param pass_veto_extend: samples to extend (left and right) the pass veto
    regions.
    :param max_veto_value: if not None, pass peaks that exceed this area
    no matter what.
    """
    veto_res = int(veto_res)
    if veto_res > np.iinfo(np.int16).max:
        raise ValueError("Veto resolution does not fit 16-bit int")
    veto_length = np.ceil(veto_length / veto_res).astype(np.int64) * veto_res
    veto_n = int(veto_length / veto_res) + 1

    # 1. Find large peaks in the data.
    # This will actually return big agglomerations of peaks and their tails
    peaks = strax.find_peaks(records,
                             to_pe,
                             gap_threshold=1,
                             left_extension=0,
                             right_extension=0,
                             min_channels=100,
                             min_area=area_threshold,
                             result_dtype=strax.peak_dtype(
                                 n_channels=len(to_pe),
                                 n_sum_wv_samples=veto_n))

    # 2a. Set 'candidate regions' at these peaks. These should:
    #  - Have a fixed maximum length (else we can't use the strax hitfinder on them)
    #  - Never extend beyond the current chunk
    #  - Do not overlap
    veto_start = peaks['time']
    veto_end = np.clip(peaks['time'] + veto_length, None, chunk_end)
    veto_end[:-1] = np.clip(veto_end[:-1], None, veto_start[1:])

    # 2b. Convert these into strax record-like objects
    # Note the waveform is float32 though (it's a summed waveform)
    regions = np.zeros(len(veto_start),
                       dtype=strax.interval_dtype + [
                           ("data", (np.float32, veto_n)),
                           ("baseline", np.float32),
                           ("baseline_rms", np.float32),
                           ("reduction_level", np.int64),
                           ("record_i", np.int64),
                           ("pulse_length", np.int64),
                       ])
    regions['time'] = veto_start
    regions['length'] = (veto_end - veto_start) // veto_n
    regions['pulse_length'] = veto_n
    regions['dt'] = veto_res

    if not len(regions):
        # No veto anywhere in this data
        return records, records[:0], np.zeros(0, strax.hit_dtype)

    # 3. Find pass_veto regios with big peaks inside the veto regions.
    # For this we compute a rough sum waveform (at low resolution,
    # without looping over the pulse data)
    rough_sum(regions, records, to_pe, veto_n, veto_res)
    if max_veto_value is not None:
        pass_veto = strax.find_hits(regions, min_amplitude=max_veto_value)
    else:
        regions['data'] /= np.max(regions['data'], axis=1)[:, np.newaxis]
        pass_veto = strax.find_hits(regions, min_amplitude=pass_veto_fraction)

    # 4. Extend these by a few samples and inverse to find veto regions
    regions['data'] = 1
    regions = strax.cut_outside_hits(regions,
                                     pass_veto,
                                     left_extension=pass_veto_extend,
                                     right_extension=pass_veto_extend)
    regions['data'] = 1 - regions['data']
    veto = strax.find_hits(regions, min_amplitude=1)
    # Do not remove very tiny regions
    veto = veto[veto['length'] > 2 * pass_veto_extend]

    # 5. Apply the veto and return results
    veto_mask = strax.fully_contained_in(records, veto) == -1
    return tuple(list(mask_and_not(records, veto_mask)) + [veto])
Example #49
0
def convert(image, dtype, force_copy=False, uniform=False):
    """
    Convert an image to the requested data-type.
 
    Warnings are issued in case of precision loss, or when negative values
    are clipped during conversion to unsigned integer types (sign loss).
 
    Floating point values are expected to be normalized and will be clipped
    to the range [0.0, 1.0] or [-1.0, 1.0] when converting to unsigned or
    signed integers respectively.
 
    Numbers are not shifted to the negative side when converting from
    unsigned to signed integer types. Negative values will be clipped when
    converting to unsigned integers.
 
    Parameters
    ----------
    image : ndarray
        Input image.
    dtype : dtype
        Target data-type.
    force_copy : bool, optional
        Force a copy of the data, irrespective of its current dtype.
    uniform : bool, optional
        Uniformly quantize the floating point range to the integer range.
        By default (uniform=False) floating point values are scaled and
        rounded to the nearest integers, which minimizes back and forth
        conversion errors.
 
    References
    ----------
    .. [1] DirectX data conversion rules.
           http://msdn.microsoft.com/en-us/library/windows/desktop/dd607323%28v=vs.85%29.aspx
    .. [2] Data Conversions. In "OpenGL ES 2.0 Specification v2.0.25",
           pp 7-8. Khronos Group, 2010.
    .. [3] Proper treatment of pixels as integers. A.W. Paeth.
           In "Graphics Gems I", pp 249-256. Morgan Kaufmann, 1990.
    .. [4] Dirty Pixels. J. Blinn. In "Jim Blinn's corner: Dirty Pixels",
           pp 47-57. Morgan Kaufmann, 1998.
 
    """
    image = np.asarray(image)
    dtypeobj = np.dtype(dtype)
    dtypeobj_in = image.dtype
    dtype = dtypeobj.type
    dtype_in = dtypeobj_in.type

    if dtype_in == dtype:
        if force_copy:
            image = image.copy()
        return image

    if not (dtype_in in _supported_types and dtype in _supported_types):
        raise ValueError("can not convert %s to %s." % (dtypeobj_in, dtypeobj))

    def sign_loss():
        warn("Possible sign loss when converting negative image of type "
             "%s to positive image of type %s." % (dtypeobj_in, dtypeobj))

    def prec_loss():
        warn("Possible precision loss when converting from "
             "%s to %s" % (dtypeobj_in, dtypeobj))

    def _dtype(itemsize, *dtypes):
        # Return first of `dtypes` with itemsize greater than `itemsize`
        return next(dt for dt in dtypes if itemsize < np.dtype(dt).itemsize)

    def _dtype2(kind, bits, itemsize=1):
        # Return dtype of `kind` that can store a `bits` wide unsigned int
        def compare(x, y, kind='u'):
            if kind == 'u':
                return x <= y
            else:
                return x < y

        s = next(i for i in (itemsize, ) + (2, 4, 8)
                 if compare(bits, i * 8, kind=kind))
        return np.dtype(kind + str(s))

    def _scale(a, n, m, copy=True):
        # Scale unsigned/positive integers from n to m bits
        # Numbers can be represented exactly only if m is a multiple of n
        # Output array is of same kind as input.
        kind = a.dtype.kind
        if n > m and a.max() < 2**m:
            mnew = int(np.ceil(m / 2) * 2)
            if mnew > m:
                dtype = "int%s" % mnew
            else:
                dtype = "uint%s" % mnew
            n = int(np.ceil(n / 2) * 2)
            msg = ("Downcasting %s to %s without scaling because max "
                   "value %s fits in %s" % (a.dtype, dtype, a.max(), dtype))
            warn(msg)
            return a.astype(_dtype2(kind, m))
        elif n == m:
            return a.copy() if copy else a
        elif n > m:
            # downscale with precision loss
            prec_loss()
            if copy:
                b = np.empty(a.shape, _dtype2(kind, m))
                np.floor_divide(a,
                                2**(n - m),
                                out=b,
                                dtype=a.dtype,
                                casting='unsafe')
                return b
            else:
                a //= 2**(n - m)
                return a
        elif m % n == 0:
            # exact upscale to a multiple of n bits
            if copy:
                b = np.empty(a.shape, _dtype2(kind, m))
                np.multiply(a, (2**m - 1) // (2**n - 1), out=b, dtype=b.dtype)
                return b
            else:
                a = np.array(a, _dtype2(kind, m, a.dtype.itemsize), copy=False)
                a *= (2**m - 1) // (2**n - 1)
                return a
        else:
            # upscale to a multiple of n bits,
            # then downscale with precision loss
            prec_loss()
            o = (m // n + 1) * n
            if copy:
                b = np.empty(a.shape, _dtype2(kind, o))
                np.multiply(a, (2**o - 1) // (2**n - 1), out=b, dtype=b.dtype)
                b //= 2**(o - m)
                return b
            else:
                a = np.array(a, _dtype2(kind, o, a.dtype.itemsize), copy=False)
                a *= (2**o - 1) // (2**n - 1)
                a //= 2**(o - m)
                return a

    kind = dtypeobj.kind
    kind_in = dtypeobj_in.kind
    itemsize = dtypeobj.itemsize
    itemsize_in = dtypeobj_in.itemsize

    if kind == 'b':
        # to binary image
        if kind_in in "fi":
            sign_loss()
        prec_loss()
        return image > dtype_in(dtype_range[dtype_in][1] / 2)

    if kind_in == 'b':
        # from binary image, to float and to integer
        result = image.astype(dtype)
        if kind != 'f':
            result *= dtype(dtype_range[dtype][1])
        return result

    if kind in 'ui':
        imin = np.iinfo(dtype).min
        imax = np.iinfo(dtype).max
    if kind_in in 'ui':
        imin_in = np.iinfo(dtype_in).min
        imax_in = np.iinfo(dtype_in).max

    if kind_in == 'f':
        if np.min(image) < -1.0 or np.max(image) > 1.0:
            raise ValueError("Images of type float must be between -1 and 1.")
        if kind == 'f':
            # floating point -> floating point
            if itemsize_in > itemsize:
                prec_loss()
            return image.astype(dtype)

        # floating point -> integer
        prec_loss()
        # use float type that can represent output integer type
        image = np.array(image,
                         _dtype(itemsize, dtype_in, np.float32, np.float64))
        if not uniform:
            if kind == 'u':
                image *= imax
            else:
                image *= imax - imin
                image -= 1.0
                image /= 2.0
            np.rint(image, out=image)
            np.clip(image, imin, imax, out=image)
        elif kind == 'u':
            image *= imax + 1
            np.clip(image, 0, imax, out=image)
        else:
            image *= (imax - imin + 1.0) / 2.0
            np.floor(image, out=image)
            np.clip(image, imin, imax, out=image)
        return image.astype(dtype)

    if kind == 'f':
        # integer -> floating point
        if itemsize_in >= itemsize:
            prec_loss()
        # use float type that can exactly represent input integers
        image = np.array(image,
                         _dtype(itemsize_in, dtype, np.float32, np.float64))
        if kind_in == 'u':
            image /= imax_in
            # DirectX uses this conversion also for signed ints
            #if imin_in:
            #    np.maximum(image, -1.0, out=image)
        else:
            image *= 2.0
            image += 1.0
            image /= imax_in - imin_in
        return image.astype(dtype)

    if kind_in == 'u':
        if kind == 'i':
            # unsigned integer -> signed integer
            image = _scale(image, 8 * itemsize_in, 8 * itemsize - 1)
            return image.view(dtype)
        else:
            # unsigned integer -> unsigned integer
            return _scale(image, 8 * itemsize_in, 8 * itemsize)

    if kind == 'u':
        # signed integer -> unsigned integer
        sign_loss()
        image = _scale(image, 8 * itemsize_in - 1, 8 * itemsize)
        result = np.empty(image.shape, dtype)
        np.maximum(image, 0, out=result, dtype=image.dtype, casting='unsafe')
        return result

    # signed integer -> signed integer
    if itemsize_in > itemsize:
        return _scale(image, 8 * itemsize_in - 1, 8 * itemsize - 1)
    image = image.astype(_dtype2('i', itemsize * 8))
    image -= imin_in
    image = _scale(image, 8 * itemsize_in, 8 * itemsize, copy=False)
    image += imin
    return image.astype(dtype)
def infer_zero(tester,
               pf,
               batch_size,
               tensor_dtype,
               input_shapes,
               output_shapes,
               model_version=None,
               use_http=True,
               use_grpc=True,
               use_http_json_tensors=True,
               use_streaming=True,
               shm_region_name_prefix=None,
               use_system_shared_memory=False,
               use_cuda_shared_memory=False,
               priority=0,
               timeout_us=0):
    tester.assertTrue(use_http or use_grpc or use_http_json_tensors
                      or use_streaming)
    configs = []
    if use_http:
        configs.append(("localhost:8000", "http", False, True))
    if use_http_json_tensors and (tensor_dtype != np.float16):
        configs.append(("localhost:8000", "http", False, False))
    if use_grpc:
        configs.append(("localhost:8001", "grpc", False, False))
    if use_streaming:
        configs.append(("localhost:8001", "grpc", True, False))
    tester.assertEqual(len(input_shapes), len(output_shapes))
    io_cnt = len(input_shapes)

    if shm_region_name_prefix is None:
        shm_region_name_prefix = ["input", "output"]

    input_dict = {}
    expected_dict = {}
    shm_ip_handles = list()
    shm_op_handles = list()

    for io_num in range(io_cnt):
        if pf == "libtorch" or pf == "libtorch_nobatch":
            input_name = "INPUT__{}".format(io_num)
            output_name = "OUTPUT__{}".format(io_num)
        else:
            input_name = "INPUT{}".format(io_num)
            output_name = "OUTPUT{}".format(io_num)

        input_shape = input_shapes[io_num]
        output_shape = output_shapes[io_num]

        rtensor_dtype = _range_repr_dtype(tensor_dtype)
        if (rtensor_dtype != np.bool):
            input_array = np.random.randint(low=np.iinfo(rtensor_dtype).min,
                                            high=np.iinfo(rtensor_dtype).max,
                                            size=input_shape,
                                            dtype=rtensor_dtype)
        else:
            input_array = np.random.choice(a=[False, True], size=input_shape)
        if tensor_dtype != np.object:
            input_array = input_array.astype(tensor_dtype)
            expected_array = np.ndarray.copy(input_array)
        else:
            expected_array = np.array([
                unicode(str(x), encoding='utf-8')
                for x in input_array.flatten()
            ],
                                      dtype=object)
            input_array = np.array([str(x) for x in input_array.flatten()],
                                   dtype=object).reshape(input_array.shape)

        expected_array = expected_array.reshape(output_shape)
        expected_dict[output_name] = expected_array

        output_byte_size = expected_array.nbytes

        if batch_size == 1:
            input_list = [input_array]
        else:
            input_list = [x for x in input_array]

        # Serialization of string tensors in the case of shared memory must be done manually
        if tensor_dtype == np.object:
            input_list_tmp = serialize_byte_tensor_list(input_list)
        else:
            input_list_tmp = input_list

        input_byte_size = sum([ip.nbytes for ip in input_list_tmp])

        # create and register shared memory region for inputs and outputs
        shm_io_handles = su.create_set_either_shm_region(
            [
                shm_region_name_prefix[0] + str(io_num),
                shm_region_name_prefix[1] + str(io_num)
            ], input_list_tmp, input_byte_size, output_byte_size,
            use_system_shared_memory, use_cuda_shared_memory)

        if len(shm_io_handles) != 0:
            shm_ip_handles.append(shm_io_handles[0])
            shm_op_handles.append(shm_io_handles[1])
        input_dict[input_name] = input_array

    if model_version is not None:
        model_version = str(model_version)
    else:
        model_version = ""

    # Run inference and check results for each config
    for config in configs:
        model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype)

        if config[1] == "http":
            triton_client = httpclient.InferenceServerClient(config[0],
                                                             verbose=True)
        else:
            triton_client = grpcclient.InferenceServerClient(config[0],
                                                             verbose=True)

        inputs = []
        output_req = []
        for io_num, (input_name, output_name) in enumerate(
                zip(input_dict.keys(), expected_dict.keys())):
            input_data = input_dict[input_name]
            input_byte_size = input_data.nbytes
            output_byte_size = expected_dict[output_name].nbytes
            if config[1] == "http":
                inputs.append(
                    httpclient.InferInput(input_name, input_data.shape,
                                          np_to_triton_dtype(tensor_dtype)))
                output_req.append(
                    httpclient.InferRequestedOutput(output_name,
                                                    binary_data=config[3]))
            else:
                inputs.append(
                    grpcclient.InferInput(input_name, input_data.shape,
                                          np_to_triton_dtype(tensor_dtype)))
                output_req.append(grpcclient.InferRequestedOutput(output_name))

            if not (use_cuda_shared_memory or use_system_shared_memory):
                if config[1] == "http":
                    inputs[-1].set_data_from_numpy(input_data,
                                                   binary_data=config[3])
                else:
                    inputs[-1].set_data_from_numpy(input_data)
            else:
                # Register necessary shared memory regions/handles
                su.register_add_either_shm_regions(
                    inputs, output_req, shm_region_name_prefix,
                    (shm_ip_handles, shm_op_handles), io_num, input_byte_size,
                    output_byte_size, use_system_shared_memory,
                    use_cuda_shared_memory, triton_client)

        if config[2]:
            user_data = UserData()
            triton_client.start_stream(partial(completion_callback, user_data))
            try:
                results = triton_client.async_stream_infer(
                    model_name,
                    inputs,
                    model_version=model_version,
                    outputs=output_req,
                    request_id=str(_unique_request_id()),
                    priority=priority,
                    timeout=timeout_us)
            except Exception as e:
                triton_client.stop_stream()
                raise e
            triton_client.stop_stream()
            (results, error) = user_data._completed_requests.get()
            if error is not None:
                raise error
        else:
            results = triton_client.infer(model_name,
                                          inputs,
                                          model_version=model_version,
                                          outputs=output_req,
                                          request_id=str(_unique_request_id()),
                                          priority=priority,
                                          timeout=timeout_us)

        last_response = results.get_response()

        if config[1] == "http":
            response_model_name = last_response["model_name"]
            if model_version != "":
                response_model_version = last_response["model_version"]
            response_outputs = last_response["outputs"]
        else:
            response_model_name = last_response.model_name
            if model_version != "":
                response_model_version = last_response.model_version
            response_outputs = last_response.outputs

        tester.assertEqual(response_model_name, model_name)

        if model_version != "":
            tester.assertEqual(response_model_version, model_version)

        tester.assertEqual(len(response_outputs), io_cnt)

        for result in response_outputs:
            if config[1] == "http":
                result_name = result["name"]
            else:
                result_name = result.name

            tester.assertTrue(result_name in expected_dict)
            if use_system_shared_memory or use_cuda_shared_memory:
                if pf == "libtorch" or pf == "libtorch_nobatch":
                    io_num = int(result_name.split("OUTPUT__")[1])
                else:
                    io_num = int(result_name.split("OUTPUT")[1])
                shm_handle = shm_op_handles[io_num]

                output = results.get_output(result_name)
                if config[1] == "http":
                    output_datatype = output['datatype']
                    output_shape = output['shape']
                else:
                    output_datatype = output.datatype
                    output_shape = output.shape
                output_dtype = triton_to_np_dtype(output_datatype)
            if use_system_shared_memory:
                output_data = shm.get_contents_as_numpy(
                    shm_handle, output_dtype, output_shape)
            elif use_cuda_shared_memory:
                output_data = cudashm.get_contents_as_numpy(
                    shm_handle, output_dtype, output_shape)
            else:
                output_data = results.as_numpy(result_name)

            if (output_data.dtype == np.object) and (config[3] == False):
                output_data = output_data.astype(np.bytes_)

            expected = expected_dict[result_name]
            tester.assertEqual(output_data.shape, expected.shape)
            tester.assertTrue(
                np.array_equal(output_data, expected),
                "{}, {}, expected: {}, got {}".format(model_name, result_name,
                                                      expected, output_data))

    if len(shm_ip_handles) != 0:
        for io_num in range(io_cnt):
            if use_cuda_shared_memory:
                triton_client.unregister_cuda_shared_memory(
                    shm_region_name_prefix[0] + str(io_num) + '_data')
                triton_client.unregister_cuda_shared_memory(
                    shm_region_name_prefix[0] + str(io_num) + '_data')
                cudashm.destroy_shared_memory_region(shm_ip_handles[io_num])
                cudashm.destroy_shared_memory_region(shm_op_handles[io_num])
            else:
                triton_client.unregister_system_shared_memory(
                    shm_region_name_prefix[1] + str(io_num) + '_data')
                triton_client.unregister_system_shared_memory(
                    shm_region_name_prefix[1] + str(io_num) + '_data')
                shm.destroy_shared_memory_region(shm_ip_handles[io_num])
                shm.destroy_shared_memory_region(shm_op_handles[io_num])

    return results
Example #51
0
    def setData(self, index, value, role=Qt.DisplayRole):
        """Set the value to the index position depending on Qt::ItemDataRole and data type of the column

        Args:
            index (QtCore.QModelIndex): Index to define column and row.
            value (object): new value.
            role (Qt::ItemDataRole): Use this role to specify what you want to do.

        Raises:
            TypeError: If the value could not be converted to a known datatype.

        Returns:
            True if value is changed. Calls layoutChanged after update.
            False if value is not different from original value.

        """
        if not index.isValid() or not self.editable:
            return False

        if value != index.data(role):

            self.layoutAboutToBeChanged.emit()

            row = self._dataFrame.index[index.row()]
            col = self._dataFrame.columns[index.column()]
            #print 'before change: ', index.data().toUTC(), self._dataFrame.iloc[row][col]
            columnDtype = self._dataFrame[col].dtype

            if columnDtype == object:
                pass

            elif columnDtype in self._intDtypes:
                dtypeInfo = numpy.iinfo(columnDtype)
                if value < dtypeInfo.min:
                    value = dtypeInfo.min
                elif value > dtypeInfo.max:
                    value = dtypeInfo.max

            elif columnDtype in self._floatDtypes:
                value = numpy.float64(value).astype(columnDtype)

            elif columnDtype in self._boolDtypes:
                value = numpy.bool_(value)

            elif columnDtype in self._dateDtypes:
                # convert the given value to a compatible datetime object.
                # if the conversation could not be done, keep the original
                # value.
                if isinstance(value, QtCore.QDateTime):
                    value = value.toString(self.timestampFormat)
                try:
                    value = pandas.Timestamp(value)
                except Exception:
                    raise Exception(
                        "Can't convert '{0}' into a datetime".format(value))
                    # return False
            else:
                raise TypeError("try to set unhandled data type")

            self._dataFrame.set_value(row, col, value)

            #print 'after change: ', value, self._dataFrame.iloc[row][col]
            self.layoutChanged.emit()
            return True
        else:
            return False
Example #52
0
def test_cummin_cummax():
    # GH 15048
    num_types = [np.int32, np.int64, np.float32, np.float64]
    num_mins = [
        np.iinfo(np.int32).min,
        np.iinfo(np.int64).min,
        np.finfo(np.float32).min,
        np.finfo(np.float64).min
    ]
    num_max = [
        np.iinfo(np.int32).max,
        np.iinfo(np.int64).max,
        np.finfo(np.float32).max,
        np.finfo(np.float64).max
    ]
    base_df = pd.DataFrame({
        'A': [1, 1, 1, 1, 2, 2, 2, 2],
        'B': [3, 4, 3, 2, 2, 3, 2, 1]
    })
    expected_mins = [3, 3, 3, 2, 2, 2, 2, 1]
    expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3]

    for dtype, min_val, max_val in zip(num_types, num_mins, num_max):
        df = base_df.astype(dtype)

        # cummin
        expected = pd.DataFrame({'B': expected_mins}).astype(dtype)
        result = df.groupby('A').cummin()
        tm.assert_frame_equal(result, expected)
        result = df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
        tm.assert_frame_equal(result, expected)

        # Test cummin w/ min value for dtype
        df.loc[[2, 6], 'B'] = min_val
        expected.loc[[2, 3, 6, 7], 'B'] = min_val
        result = df.groupby('A').cummin()
        tm.assert_frame_equal(result, expected)
        expected = df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
        tm.assert_frame_equal(result, expected)

        # cummax
        expected = pd.DataFrame({'B': expected_maxs}).astype(dtype)
        result = df.groupby('A').cummax()
        tm.assert_frame_equal(result, expected)
        result = df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
        tm.assert_frame_equal(result, expected)

        # Test cummax w/ max value for dtype
        df.loc[[2, 6], 'B'] = max_val
        expected.loc[[2, 3, 6, 7], 'B'] = max_val
        result = df.groupby('A').cummax()
        tm.assert_frame_equal(result, expected)
        expected = df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
        tm.assert_frame_equal(result, expected)

    # Test nan in some values
    base_df.loc[[0, 2, 4, 6], 'B'] = np.nan
    expected = pd.DataFrame(
        {'B': [np.nan, 4, np.nan, 2, np.nan, 3, np.nan, 1]})
    result = base_df.groupby('A').cummin()
    tm.assert_frame_equal(result, expected)
    expected = (base_df.groupby('A').B.apply(lambda x: x.cummin()).to_frame())
    tm.assert_frame_equal(result, expected)

    expected = pd.DataFrame(
        {'B': [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]})
    result = base_df.groupby('A').cummax()
    tm.assert_frame_equal(result, expected)
    expected = (base_df.groupby('A').B.apply(lambda x: x.cummax()).to_frame())
    tm.assert_frame_equal(result, expected)

    # Test nan in entire column
    base_df['B'] = np.nan
    expected = pd.DataFrame({'B': [np.nan] * 8})
    result = base_df.groupby('A').cummin()
    tm.assert_frame_equal(expected, result)
    result = base_df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
    tm.assert_frame_equal(expected, result)
    result = base_df.groupby('A').cummax()
    tm.assert_frame_equal(expected, result)
    result = base_df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
    tm.assert_frame_equal(expected, result)

    # GH 15561
    df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(['2001'])))
    expected = pd.Series(pd.to_datetime('2001'), index=[0], name='b')
    for method in ['cummax', 'cummin']:
        result = getattr(df.groupby('a')['b'], method)()
        tm.assert_series_equal(expected, result)

    # GH 15635
    df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1]))
    result = df.groupby('a').b.cummax()
    expected = pd.Series([2, 1, 2], name='b')
    tm.assert_series_equal(result, expected)

    df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2]))
    result = df.groupby('a').b.cummin()
    expected = pd.Series([1, 2, 1], name='b')
    tm.assert_series_equal(result, expected)
Example #53
0
def generate_test_data(dtype, size=SIZE, order="C"):
    return np.array(
        np.random.uniform(np.iinfo(dtype).min,
                          np.iinfo(dtype).max, size).astype(dtype),
        order=order,
    )
Example #54
0
def na_accum_func(values: ArrayLike, accum_func, skipna: bool) -> ArrayLike:
    """
    Cumulative function with skipna support.

    Parameters
    ----------
    values : np.ndarray or ExtensionArray
    accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minumum.accumulate}
    skipna : bool

    Returns
    -------
    np.ndarray or ExtensionArray
    """
    mask_a, mask_b = {
        np.cumprod: (1.0, np.nan),
        np.maximum.accumulate: (-np.inf, np.nan),
        np.cumsum: (0.0, np.nan),
        np.minimum.accumulate: (np.inf, np.nan),
    }[accum_func]

    # We will be applying this function to block values
    if values.dtype.kind in ["m", "M"]:
        # GH#30460, GH#29058
        # numpy 1.18 started sorting NaTs at the end instead of beginning,
        #  so we need to work around to maintain backwards-consistency.
        orig_dtype = values.dtype

        # We need to define mask before masking NaTs
        mask = isna(values)

        if accum_func == np.minimum.accumulate:
            # Note: the accum_func comparison fails as an "is" comparison
            y = values.view("i8")
            y[mask] = np.iinfo(np.int64).max
            changed = True
        else:
            y = values
            changed = False

        result = accum_func(y.view("i8"), axis=0)
        if skipna:
            result[mask] = iNaT
        elif accum_func == np.minimum.accumulate:
            # Restore NaTs that we masked previously
            nz = (~np.asarray(mask)).nonzero()[0]
            if len(nz):
                # everything up to the first non-na entry stays NaT
                result[: nz[0]] = iNaT

        if changed:
            # restore NaT elements
            y[mask] = iNaT  # TODO: could try/finally for this?

        if isinstance(values, np.ndarray):
            result = result.view(orig_dtype)
        else:
            # DatetimeArray
            result = type(values)._from_sequence(result, dtype=orig_dtype)

    elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):
        vals = values.copy()
        mask = isna(vals)
        vals[mask] = mask_a
        result = accum_func(vals, axis=0)
        result[mask] = mask_b
    else:
        result = accum_func(values, axis=0)

    return result
def env():
    np.set_printoptions(linewidth=400,
                        threshold=np.iinfo('int64').max,
                        suppress=True)
    with buzz.Env(allow_complex_footprint=1):
        yield
Example #56
0
 def test_min_int(self):
     a = np.array([np.iinfo(np.int_).min], dtype=np.int_)
     # Should not raise:
     assert_allclose(a, a)
Example #57
0
from tensorflow.python.framework import ops
from tensorflow.python.framework import test_util
from tensorflow.python import ipu
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.platform import googletest
from tensorflow.python.training import gradient_descent

# Error threshold for forward pass test.
THRESHOLD = 0.03

# Dimensions of the random data tensor.
DIMS = (1024, 1024, 4)

# Initialise with a random seed.
SEED = np.random.randint(np.iinfo(np.int32).max, size=[2], dtype=np.int32)

# Number of times to verify output for a given seed.
SEED_TEST_REPETITIONS = 6


def build_test_cases(exhaustive=False):
    # Dropout rate(s) to test.
    rate = [0.1, 0.5, 0.9] if exhaustive else [0.5]

    # User specified and non-specified cases.
    seed = [SEED, None]

    # Shape of the dropout.
    # Note that shaping the dropout such that a very large portion of
    # the input weights are dropped will fail the test criteria, as expected.
Example #58
0
def setup_module():
    """
    A function with a 'magic name' executed automatically before each pytest module
    (file of tests) that helps reproduce a test segfault by setting and outputting the rng seeds.

    The segfault-debug procedure on a module called test_module.py is:

    1. run "pytest --verbose test_module.py".  A seg-faulting output might be:

       [INFO] np, mx and python random seeds = 4018804151
       test_module.test1 ... ok
       test_module.test2 ... Illegal instruction (core dumped)

    2. Copy the module-starting seed into the next command, then run:

       MXNET_MODULE_SEED=4018804151 pytest --logging-level=DEBUG --verbose test_module.py

       Output might be:

       [WARNING] **** module-level seed is set: all tests running deterministically ****
       [INFO] np, mx and python random seeds = 4018804151
       test_module.test1 ... [DEBUG] np and mx random seeds = 3935862516
       ok
       test_module.test2 ... [DEBUG] np and mx random seeds = 1435005594
       Illegal instruction (core dumped)

    3. Copy the segfaulting-test seed into the command:
       MXNET_TEST_SEED=1435005594 pytest --logging-level=DEBUG --verbose test_module.py:test2
       Output might be:

       [INFO] np, mx and python random seeds = 2481884723
       test_module.test2 ... [DEBUG] np and mx random seeds = 1435005594
       Illegal instruction (core dumped)

    3. Finally reproduce the segfault directly under gdb (might need additional os packages)
       by editing the bottom of test_module.py to be

       if __name__ == '__main__':
           logging.getLogger().setLevel(logging.DEBUG)
           test2()

       MXNET_TEST_SEED=1435005594 gdb -ex r --args python test_module.py

    4. When finished debugging the segfault, remember to unset any exported MXNET_ seed
       variables in the environment to return to non-deterministic testing (a good thing).
    """

    module_seed_str = os.getenv('MXNET_MODULE_SEED')
    logger = default_logger()
    if module_seed_str is None:
        seed = np.random.randint(0, np.iinfo(np.int32).max)
    else:
        seed = int(module_seed_str)
        logger.warn(
            '*** module-level seed is set: all tests running deterministically ***'
        )
    logger.info(
        'Setting module np/mx/python random seeds, use MXNET_MODULE_SEED=%s to reproduce.',
        seed)
    np.random.seed(seed)
    mx.random.seed(seed)
    random.seed(seed)
    # The MXNET_TEST_SEED environment variable will override MXNET_MODULE_SEED for tests with
    #  the 'with_seed()' decoration.  Inform the user of this once here at the module level.
    if os.getenv('MXNET_TEST_SEED') is not None:
        logger.warn(
            '*** test-level seed set: all "@with_seed()" tests run deterministically ***'
        )
Example #59
0
def locate(raw_image, diameter, minmass=100., maxsize=None, separation=None,
           noise_size=1, smoothing_size=None, threshold=None, invert=False,
           percentile=64, topn=None, preprocess=True, max_iterations=10,
           filter_before=True, filter_after=True,
           characterize=True, engine='auto'):
    """Locate Gaussian-like blobs of some approximate size in an image.

    Preprocess the image by performing a band pass and a threshold.
    Locate all peaks of brightness, characterize the neighborhoods of the peaks
    and take only those with given total brightnesss ("mass"). Finally,
    refine the positions of each peak.

    Parameters
    ----------
    image : image array (any dimensions)
    diameter : feature size in px
        This may be a single number or a tuple giving the feature's
        extent in each dimension, useful when the dimensions do not have
        equal resolution (e.g. confocal microscopy). The tuple order is the
        same as the image shape, conventionally (z, y, x) or (y, x). The
        number(s) must be odd integers. When in doubt, round up.
    minmass : minimum integrated brightness
        Default is 100, but a good value is often much higher. This is a
        crucial parameter for elminating spurious features.
    maxsize : maximum radius-of-gyration of brightness, default None
    separation : feature separation, in pixels
        Default is diameter + 1. May be a tuple, see diameter for details.
    noise_size : width of Gaussian blurring kernel, in pixels
        Default is 1. May be a tuple, see diameter for details.
    smoothing_size : size of boxcar smoothing, in pixels
        Default is diameter. May be a tuple, see diameter for details.
    threshold : Clip bandpass result below this value.
        Default None, passed through to bandpass.
    invert : Set to True if features are darker than background. False by
        default.
    percentile : Features must have a peak brighter than pixels in this
        percentile. This helps eliminate spurious peaks.
    topn : Return only the N brightest features above minmass.
        If None (default), return all features above minmass.

    Returns
    -------
    DataFrame([x, y, mass, size, ecc, signal])
        where mass means total integrated brightness of the blob,
        size means the radius of gyration of its Gaussian-like profile,
        and ecc is its eccentricity (1 is circular).

    Other Parameters
    ----------------
    preprocess : Set to False to turn out bandpass preprocessing.
    max_iterations : integer
        max number of loops to refine the center of mass, default 10
    filter_before : boolean
        Use minmass (and maxsize, if set) to eliminate spurious features
        based on their estimated mass and size before refining position.
        True by default for performance.
    filter_after : boolean
        Use final characterizations of mass and size to eliminate spurious
        features. True by default.
    characterize : boolean
        Compute "extras": eccentricity, signal, ep. True by default.
    engine : {'auto', 'python', 'numba'}

    See Also
    --------
    batch : performs location on many images in batch

    Notes
    -----
    Locate works with a coordinate system that has its origin at the center of
    pixel (0, 0). In almost all cases this will be the topleft pixel: the
    y-axis is pointing downwards.

    This is an implementation of the Crocker-Grier centroid-finding algorithm.
    [1]_

    References
    ----------
    .. [1] Crocker, J.C., Grier, D.G. http://dx.doi.org/10.1006/jcis.1996.0217

    """

    # Validate parameters and set defaults.
    raw_image = np.squeeze(raw_image)
    shape = raw_image.shape
    ndim = len(shape)

    diameter = validate_tuple(diameter, ndim)
    diameter = tuple([int(x) for x in diameter])
    if not np.all([x & 1 for x in diameter]):
        raise ValueError("Feature diameter must be an odd integer. Round up.")
    radius = tuple([x//2 for x in diameter])

    if separation is None:
        separation = tuple([x + 1 for x in diameter])
    else:
        separation = validate_tuple(separation, ndim)

    if smoothing_size is None:
        smoothing_size = diameter
    else:
        smoothing_size = validate_tuple(smoothing_size, ndim)

    noise_size = validate_tuple(noise_size, ndim)

    # Don't do characterization for rectangular pixels/voxels
    if diameter[1:] != diameter[:-1]:
        characterize = False

    # Check whether the image looks suspiciously like a color image.
    if 3 in shape or 4 in shape:
        dim = raw_image.ndim
        warnings.warn("I am interpreting the image as {0}-dimensional. "
                      "If it is actually a {1}-dimensional color image, "
                      "convert it to grayscale first.".format(dim, dim-1))
    if preprocess:
        if invert:
            # It is tempting to do this in place, but if it is called multiple
            # times on the same image, chaos reigns.
            if np.issubdtype(raw_image.dtype, np.integer):
                max_value = np.iinfo(raw_image.dtype).max
                raw_image = raw_image ^ max_value
            else:
                # To avoid degrading performance, assume gamut is zero to one.
                # Have you ever encountered an image of unnormalized floats?
                raw_image = 1 - raw_image
        image = bandpass(raw_image, noise_size, smoothing_size, threshold)
    else:
        image = raw_image.copy()
    # Coerce the image into integer type. Rescale to fill dynamic range.
    if np.issubdtype(raw_image.dtype, np.integer):
        dtype = raw_image.dtype
    else:
        dtype = np.uint8
    image = scale_to_gamut(image, dtype)

    # Set up a DataFrame for the final results.
    if image.ndim < 4:
        coord_columns = ['x', 'y', 'z'][:image.ndim]
    else:
        coord_columns = map(lambda i: 'x' + str(i), range(image.ndim))
    char_columns = ['mass']
    if characterize:
        char_columns += ['size', 'ecc', 'signal']
    columns = coord_columns + char_columns
    # The 'ep' column is joined on at the end, so we need this...
    if characterize:
        all_columns = columns + ['ep']
    else:
        all_columns = columns

    # Find local maxima.
    # Define zone of exclusion at edges of image, avoiding
    #   - Features with incomplete image data ("radius")
    #   - Extended particles that cannot be explored during subpixel
    #       refinement ("separation")
    #   - Invalid output of the bandpass step ("smoothing_size")
    margin = tuple([max(rad, sep // 2 - 1, sm // 2) for (rad, sep, sm) in
                    zip(radius, separation, smoothing_size)])
    coords = local_maxima(image, radius, percentile, margin)
    count_maxima = coords.shape[0]

    if count_maxima == 0:
        return DataFrame(columns=all_columns)

    # Proactively filter based on estimated mass/size before
    # refining positions.
    if filter_before:
        approx_mass = np.empty(count_maxima)  # initialize to avoid appending
        for i in range(count_maxima):
            approx_mass[i] = estimate_mass(image, radius, coords[i])
        condition = approx_mass > minmass
        if maxsize is not None:
            approx_size = np.empty(count_maxima)
            for i in range(count_maxima):
                approx_size[i] = estimate_size(image, radius, coords[i],
                                               approx_mass[i])
            condition &= approx_size < maxsize
        coords = coords[condition]
    count_qualified = coords.shape[0]

    if count_qualified == 0:
        warnings.warn("No maxima survived mass- and size-based prefiltering.")
        return DataFrame(columns=all_columns)

    # Refine their locations and characterize mass, size, etc.
    refined_coords = refine(raw_image, image, radius, coords, separation,
                            max_iterations, engine, characterize)

    # Filter again, using final ("exact") mass -- and size, if set.
    MASS_COLUMN_INDEX = image.ndim
    SIZE_COLUMN_INDEX = image.ndim + 1
    exact_mass = refined_coords[:, MASS_COLUMN_INDEX]
    if filter_after:
        condition = exact_mass > minmass
        if maxsize is not None:
            exact_size = refined_coords[:, SIZE_COLUMN_INDEX]
            condition &= exact_size < maxsize
        refined_coords = refined_coords[condition]
        exact_mass = exact_mass[condition]  # used below by topn
    count_qualified = refined_coords.shape[0]

    if count_qualified == 0:
        warnings.warn("No maxima survived mass- and size-based filtering.")
        return DataFrame(columns=all_columns)

    if topn is not None and count_qualified > topn:
        if topn == 1:
            # special case for high performance and correct shape
            refined_coords = refined_coords[np.argmax(exact_mass)]
            refined_coords = refined_coords.reshape(1, -1)
        else:
            refined_coords = refined_coords[np.argsort(exact_mass)][-topn:]

    f = DataFrame(refined_coords, columns=columns)

    # Estimate the uncertainty in position using signal (measured in refine)
    # and noise (measured here below).
    if characterize:
        black_level, noise = uncertainty.measure_noise(
            raw_image, diameter, threshold)
        f['signal'] -= black_level
        ep = uncertainty.static_error(f, noise, diameter[0], noise_size[0])
        f = f.join(ep)

    # If this is a pims Frame object, it has a frame number.
    # Tag it on; this is helpful for parallelization.
    if hasattr(raw_image, 'frame_no') and raw_image.frame_no is not None:
        f['frame'] = raw_image.frame_no
    return f
Example #60
0
def clip_add(image1: np.ndarray, image2: np.ndarray, dtype: np.dtype = np.uint16):
    """Clip the image to the dtype extrema. Otherwise the bits will flip."""
    return np.clip(image1 + image2, np.iinfo(dtype).min, np.iinfo(dtype).max).astype(dtype)