Example #1
0
def nextafter(x, direction, dtype, itemsize):
    """Return the next representable neighbor of x in the appropriate
    direction."""

    assert direction in [-1, 0, +1]
    assert dtype.kind == "S" or type(x) in (bool, int, int, float)

    if direction == 0:
        return x

    if dtype.kind == "S":
        return string_next_after(x, direction, itemsize)

    if dtype.kind in ['b']:
        return bool_type_next_after(x, direction, itemsize)
    elif dtype.kind in ['i', 'u']:
        return int_type_next_after(x, direction, itemsize)
    elif dtype.kind == "f":
        if direction < 0:
            return numpy.nextafter(x, x - 1)
        else:
            return numpy.nextafter(x, x + 1)

    # elif dtype.name == "float32":
    #    if direction < 0:
    #        return PyNextAfterF(x,x-1)
    #    else:
    #        return PyNextAfterF(x,x + 1)
    # elif dtype.name == "float64":
    #    if direction < 0:
    #        return PyNextAfter(x,x-1)
    #    else:
    #        return PyNextAfter(x,x + 1)

    raise TypeError("data type ``%s`` is not supported" % dtype)
Example #2
0
def make_strictly_feasible(x, lb, ub, rstep=1e-10):
    """Shift a point to the interior of a feasible region.
    
    Each element of the returned vector is at least at a relative distance
    `rstep` from the closest bound. If ``rstep=0`` then `np.nextafter` is used.
    """
    x_new = x.copy()

    active = find_active_constraints(x, lb, ub, rstep)
    lower_mask = np.equal(active, -1)
    upper_mask = np.equal(active, 1)

    if rstep == 0:
        x_new[lower_mask] = np.nextafter(lb[lower_mask], ub[lower_mask])
        x_new[upper_mask] = np.nextafter(ub[upper_mask], lb[upper_mask])
    else:
        x_new[lower_mask] = (lb[lower_mask] +
                             rstep * np.maximum(1, np.abs(lb[lower_mask])))
        x_new[upper_mask] = (ub[upper_mask] -
                             rstep * np.maximum(1, np.abs(ub[upper_mask])))

    tight_bounds = (x_new < lb) | (x_new > ub)
    x_new[tight_bounds] = 0.5 * (lb[tight_bounds] + ub[tight_bounds])

    return x_new
Example #3
0
    def __init__(self, value_1, value_2=None):
        # use Decimal as exact value holder (because of arbitrary precision)
        from decimal import Decimal
        # nextafter(x, y) returns next machine number after x in direction of y
        from numpy import nextafter

        # creating interval from middle value
        if not value_2:
            exact = Decimal(value_1)
            float_repr = Decimal("{0:0.70f}".format(float(exact)))
            if exact == float_repr:
                self.lv = float(float_repr)
                self.rv = float(float_repr)
            elif exact > float_repr:
                self.lv = float(float_repr)
                self.rv = nextafter(self.lv, float('Inf'))
            elif exact < float_repr:
                self.rv = float(float_repr)
                self.lv = nextafter(self.rv, -float('Inf'))
        # creating interval from left and right edge
        else:
            exact_left = Decimal(value_1)
            exact_right = Decimal(value_2)
            if exact_left > exact_right:
                exact_left, exact_right = exact_right, exact_left
            float_repr_left = Decimal(float(exact_left))
            float_repr_right = Decimal(float(exact_right))
            if exact_left < float_repr_left:
                self.lv = nextafter(float(float_repr_left), -float('Inf'))
            else:
                self.lv = float(float_repr_left)
            if exact_right > float_repr_right:
                self.rv = nextafter(float(float_repr_right), float('Inf'))
            else:
                self.rv = float(float_repr_right)
Example #4
0
 def _logpmf(self, x, mu, alpha, p):
     mu_p = mu ** (p - 1.)
     a1 = np.maximum(np.nextafter(0, 1), 1 + alpha * mu_p)
     a2 = np.maximum(np.nextafter(0, 1), mu + (a1 - 1.) * x)
     logpmf_ = np.log(mu) + (x - 1.) * np.log(a2)
     logpmf_ -=  x * np.log(a1) + gammaln(x + 1.) + a2 / a1
     return logpmf_
Example #5
0
def make_strictly_feasible(x, lb, ub, rstep=0):
    """Shift the point in the slightest possible way to the interior.

    If ``rstep=0`` the function uses np.nextafter, otherwise `rstep` is
    multiplied by absolute value of the bound.

    The utility of this function is questionable to me. Maybe bigger shifts
    should be used, or maybe this function is not necessary at all despite
    theoretical requirement of our interior point algorithm.
    """
    x_new = x.copy()

    m = x <= lb
    if rstep == 0:
        x_new[m] = np.nextafter(lb[m], ub[m])
    else:
        x_new[m] = lb[m] + rstep * (1 + np.abs(lb[m]))

    m = x >= ub
    if rstep == 0:
        x_new[m] = np.nextafter(ub[m], lb[m])
    else:
        x_new[m] = ub[m] - rstep * (1 + np.abs(ub[m]))

    return x_new
Example #6
0
    def test_half_fpe(self):
        oldsettings = np.seterr(all="raise")
        try:
            sx16 = np.array((1e-4,), dtype=float16)
            bx16 = np.array((1e4,), dtype=float16)
            sy16 = float16(1e-4)
            by16 = float16(1e4)

            # Underflow errors
            assert_raises_fpe("underflow", lambda a, b: a * b, sx16, sx16)
            assert_raises_fpe("underflow", lambda a, b: a * b, sx16, sy16)
            assert_raises_fpe("underflow", lambda a, b: a * b, sy16, sx16)
            assert_raises_fpe("underflow", lambda a, b: a * b, sy16, sy16)
            assert_raises_fpe("underflow", lambda a, b: a / b, sx16, bx16)
            assert_raises_fpe("underflow", lambda a, b: a / b, sx16, by16)
            assert_raises_fpe("underflow", lambda a, b: a / b, sy16, bx16)
            assert_raises_fpe("underflow", lambda a, b: a / b, sy16, by16)
            assert_raises_fpe("underflow", lambda a, b: a / b, float16(2.0 ** -14), float16(2 ** 11))
            assert_raises_fpe("underflow", lambda a, b: a / b, float16(-2.0 ** -14), float16(2 ** 11))
            assert_raises_fpe("underflow", lambda a, b: a / b, float16(2.0 ** -14 + 2 ** -24), float16(2))
            assert_raises_fpe("underflow", lambda a, b: a / b, float16(-2.0 ** -14 - 2 ** -24), float16(2))
            assert_raises_fpe("underflow", lambda a, b: a / b, float16(2.0 ** -14 + 2 ** -23), float16(4))

            # Overflow errors
            assert_raises_fpe("overflow", lambda a, b: a * b, bx16, bx16)
            assert_raises_fpe("overflow", lambda a, b: a * b, bx16, by16)
            assert_raises_fpe("overflow", lambda a, b: a * b, by16, bx16)
            assert_raises_fpe("overflow", lambda a, b: a * b, by16, by16)
            assert_raises_fpe("overflow", lambda a, b: a / b, bx16, sx16)
            assert_raises_fpe("overflow", lambda a, b: a / b, bx16, sy16)
            assert_raises_fpe("overflow", lambda a, b: a / b, by16, sx16)
            assert_raises_fpe("overflow", lambda a, b: a / b, by16, sy16)
            assert_raises_fpe("overflow", lambda a, b: a + b, float16(65504), float16(17))
            assert_raises_fpe("overflow", lambda a, b: a - b, float16(-65504), float16(17))
            assert_raises_fpe("overflow", np.nextafter, float16(65504), float16(np.inf))
            assert_raises_fpe("overflow", np.nextafter, float16(-65504), float16(-np.inf))
            assert_raises_fpe("overflow", np.spacing, float16(65504))

            # Invalid value errors
            assert_raises_fpe("invalid", np.divide, float16(np.inf), float16(np.inf))
            assert_raises_fpe("invalid", np.spacing, float16(np.inf))
            assert_raises_fpe("invalid", np.spacing, float16(np.nan))
            assert_raises_fpe("invalid", np.nextafter, float16(np.inf), float16(0))
            assert_raises_fpe("invalid", np.nextafter, float16(-np.inf), float16(0))
            assert_raises_fpe("invalid", np.nextafter, float16(0), float16(np.nan))

            # These should not raise
            float16(65472) + float16(32)
            float16(2 ** -13) / float16(2)
            float16(2 ** -14) / float16(2 ** 10)
            np.spacing(float16(-65504))
            np.nextafter(float16(65504), float16(-np.inf))
            np.nextafter(float16(-65504), float16(np.inf))
            float16(2 ** -14) / float16(2 ** 10)
            float16(-2 ** -14) / float16(2 ** 10)
            float16(2 ** -14 + 2 ** -23) / float16(2)
            float16(-2 ** -14 - 2 ** -23) / float16(2)
        finally:
            np.seterr(**oldsettings)
    def forward_cpu(self, inputs):
        U, points = inputs
        batch_size, height, width = U.shape

        # Points just on the boundary are slightly (i.e. nextafter in float32)
        # moved inward to simplify the implementation
        points = points.copy()
        on_boundary = (points == 0)
        points[on_boundary] = np.nextafter(points[on_boundary], np.float32(1))
        x = points[:, 0]
        y = points[:, 1]
        on_boundary = (x == (width - 1))
        x[on_boundary] = np.nextafter(x[on_boundary], np.float32(0))
        on_boundary = (y == (height - 1))
        y[on_boundary] = np.nextafter(y[on_boundary], np.float32(0))

        batch_axis = np.expand_dims(np.arange(batch_size), 1)
        points_floor = np.floor(points)
        x_l = points_floor[:, 0].astype(np.int32)
        y_l = points_floor[:, 1].astype(np.int32)
        x_l = np.clip(x_l, 0, width - 1)
        y_l = np.clip(y_l, 0, height - 1)
        x_h = np.clip(x_l + 1, 0, width - 1)
        y_h = np.clip(y_l + 1, 0, height - 1)

        weight = 1.0 - (points - points_floor)
        weight_x_l = weight[:, 0]
        weight_y_l = weight[:, 1]
        weight_x_h = 1 - weight_x_l
        weight_y_h = 1 - weight_y_l

        # remove points outside of the (source) image region
        # by setting their weights to 0
        x_invalid = np.logical_or(x < 0, (width - 1) < x)
        y_invalid = np.logical_or(y < 0, (height - 1) < y)
        invalid = np.logical_or(x_invalid, y_invalid)
        weight_x_l[invalid] = 0
        weight_y_l[invalid] = 0
        weight_x_h[invalid] = 0
        weight_y_h[invalid] = 0

        U_y_l = (weight_x_l * U[batch_axis, y_l, x_l] +
                 weight_x_h * U[batch_axis, y_l, x_h])
        U_y_h = (weight_x_l * U[batch_axis, y_h, x_l] +
                 weight_x_h * U[batch_axis, y_h, x_h])
        V = weight_y_l * U_y_l + weight_y_h * U_y_h

        self.x_l = x_l
        self.y_l = y_l
        self.x_h = x_h
        self.y_h = y_h
        self.weight_x_l = weight_x_l
        self.weight_y_l = weight_y_l
        self.weight_x_h = weight_x_h
        self.weight_y_h = weight_y_h
        return (V,)
Example #8
0
def _test_nextafter(t):
    one = t(1)
    two = t(2)
    zero = t(0)
    eps = np.finfo(t).eps
    assert_(np.nextafter(one, two) - one == eps)
    assert_(np.nextafter(one, zero) - one < 0)
    assert_(np.isnan(np.nextafter(np.nan, one)))
    assert_(np.isnan(np.nextafter(one, np.nan)))
    assert_(np.nextafter(one, one) == one)
Example #9
0
def test_nextafter():
    for t in [np.float32, np.float64, np.longdouble]:
        one = t(1)
        two = t(2)
        zero = t(0)
        eps = np.finfo(t).eps
        assert np.nextafter(one, two) - one == eps
        assert np.nextafter(one, zero) - one < 0
        assert np.isnan(np.nextafter(np.nan, one))
        assert np.isnan(np.nextafter(one, np.nan))
        assert np.nextafter(one, one) == one
Example #10
0
def ranges_to_weight_table(ranges):
    """
    Create a table of weights from ranges. Include only edge points of ranges.
    Include each edge point twice: once as values within the range and zero
    value outside the range (with this output the weights can easily be interpolated).

    Weights of overlapping intervals are summed.

    Assumes 64-bit floats.

    :param ranges: list of triples (edge1, edge2, weight)
    :return: an Orange.data.Table
    """

    values = {}

    inf = float("inf")
    minf = float("-inf")

    def dict_to_numpy(d):
        x = []
        y = []
        for a, b in d.items():
            x.append(a)
            y.append(b)
        return np.array(x), np.array([y])

    for l, r, w in ranges:
        l, r = min(l, r), max(l, r)
        positions = [nextafter(l, minf), l, r, nextafter(r, inf)]
        weights = [0., float(w), float(w), 0.]

        all_positions = list(set(positions) | set(values))  # new and old positions

        # current values on all position
        x, y = dict_to_numpy(values)
        current = interp1d_with_unknowns_numpy(x, y, all_positions)[0]
        current[np.isnan(current)] = 0

        # new values on all positions
        new = interp1d_with_unknowns_numpy(np.array(positions), np.array([weights]),
                                           all_positions)[0]
        new[np.isnan(new)] = 0

        # update values
        for p, f in zip(all_positions, current + new):
            values[p] = f

    x, y = dict_to_numpy(values)
    dom = Orange.data.Domain([Orange.data.ContinuousVariable(name=str(float(a))) for a in x])
    data = Orange.data.Table.from_numpy(dom, y)
    return data
Example #11
0
    def test_half_conversion_rounding(self, float_t, shift, offset):
        # Assumes that round to even is used during casting.
        max_pattern = np.float16(np.finfo(np.float16).max).view(np.uint16)

        # Test all (positive) finite numbers, denormals are most interesting
        # however:
        f16s_patterns = np.arange(0, max_pattern+1, dtype=np.uint16)
        f16s_float = f16s_patterns.view(np.float16).astype(float_t)

        # Shift the values by half a bit up or a down (or do not shift),
        if shift == "up":
            f16s_float = 0.5 * (f16s_float[:-1] + f16s_float[1:])[1:]
        elif shift == "down":
            f16s_float = 0.5 * (f16s_float[:-1] + f16s_float[1:])[:-1]
        else:
            f16s_float = f16s_float[1:-1]

        # Increase the float by a minimal value:
        if offset == "up":
            f16s_float = np.nextafter(f16s_float, float_t(1e50))
        elif offset == "down":
            f16s_float = np.nextafter(f16s_float, float_t(-1e50))

        # Convert back to float16 and its bit pattern:
        res_patterns = f16s_float.astype(np.float16).view(np.uint16)

        # The above calculations tries the original values, or the exact
        # mid points between the float16 values. It then further offsets them
        # by as little as possible. If no offset occurs, "round to even"
        # logic will be necessary, an arbitrarily small offset should cause
        # normal up/down rounding always.

        # Calculate the expecte pattern:
        cmp_patterns = f16s_patterns[1:-1].copy()

        if shift == "down" and offset != "up":
            shift_pattern = -1
        elif shift == "up" and offset != "down":
            shift_pattern = 1
        else:
            # There cannot be a shift, either shift is None, so all rounding
            # will go back to original, or shift is reduced by offset too much.
            shift_pattern = 0

        # If rounding occurs, is it normal rounding or round to even?
        if offset is None:
            # Round to even occurs, modify only non-even, cast to allow + (-1)
            cmp_patterns[0::2].view(np.int16)[...] += shift_pattern
        else:
            cmp_patterns.view(np.int16)[...] += shift_pattern

        assert_equal(res_patterns, cmp_patterns)
 def compute_likelihoods(self, PLCs, FLCs):
     K = self.K()
     N = self.N()
     future_given_state_probs = np.nextafter(self.f_hat_conditional_densities(FLCs, label="PDF_FLCs"), 1.)
     state_given_past_probs = np.nextafter(np.vstack([self.PLC_densities(j, PLCs) for j in range(K)]), 1.).T        
     ''' Weight by state likelihood '''
     n_hats = self.W.sum(axis=0) / N
     state_given_past_probs *= n_hats
     state_given_past_probs = np.nextafter(state_given_past_probs, 1.)
     ''' Normalize '''
     state_given_past_probs /= np.expand_dims(np.sum(state_given_past_probs, axis=1), axis=1)
     ''' Return mixed likelihoods '''
     return np.nextafter(np.sum(np.multiply(state_given_past_probs, future_given_state_probs), axis=1), 1.)
Example #13
0
    def testBearingToValueOnEquator(self):
        """Test if bearingTo() returns the expected value from a point on the equator
        """
        lon0 = 90.0
        lat0 = 0.0   # These tests only work from the equator.
        arcLen = 10.0

        trials = [
            # Along celestial equator
            dict(lon=lon0, lat=lat0, bearing=0.0,
                 lonEnd=lon0+arcLen, latEnd=lat0),
            # Along a meridian
            dict(lon=lon0, lat=lat0, bearing=90.0,
                 lonEnd=lon0, latEnd=lat0+arcLen),
            # 180 degree arc (should go to antipodal point)
            dict(lon=lon0, lat=lat0, bearing=45.0,
                 lonEnd=lon0+180.0, latEnd=-lat0),
            #
            dict(lon=lon0, lat=lat0, bearing=45.0,
                 lonEnd=lon0+90.0, latEnd=lat0 + 45.0),
            dict(lon=lon0, lat=lat0, bearing=225.0,
                 lonEnd=lon0-90.0, latEnd=lat0 - 45.0),
            dict(lon=lon0, lat=np.nextafter(-90.0, inf),
                 bearing=90.0, lonEnd=lon0, latEnd=0.0),
            dict(lon=lon0, lat=np.nextafter(-90.0, inf),
                 bearing=0.0, lonEnd=lon0 + 90.0, latEnd=0.0),
            # Argument at a pole should work
            dict(lon=lon0, lat=lat0, bearing=270.0, lonEnd=lon0, latEnd=-90.0),
            # Support for non-finite values
            dict(lon=lon0, lat=nan, bearing=nan, lonEnd=lon0, latEnd=45.0),
            dict(lon=lon0, lat=lat0, bearing=nan, lonEnd=nan, latEnd=90.0),
            dict(lon=inf, lat=lat0, bearing=nan, lonEnd=lon0, latEnd=42.0),
            dict(lon=lon0, lat=lat0, bearing=nan, lonEnd=-inf, latEnd=42.0),
        ]

        for trial in trials:
            origin = SpherePoint(trial['lon']*degrees, trial['lat']*degrees)
            end = SpherePoint(trial['lonEnd']*degrees, trial['latEnd']*degrees)
            bearing = origin.bearingTo(end)

            self.assertIsInstance(bearing, geom.Angle)
            if origin.isFinite() and end.isFinite():
                self.assertGreaterEqual(bearing.asDegrees(), 0.0)
                self.assertLess(bearing.asDegrees(), 360.0)
            if origin.separation(end).asDegrees() != 180.0:
                if not math.isnan(trial['bearing']):
                    self.assertAlmostEqual(
                        trial['bearing'], bearing.asDegrees(), 12)
                else:
                    self.assertTrue(math.isnan(bearing.asRadians()))
Example #14
0
File: idmp.py Project: tbekolay/phd
def similarity(v1, v2):
    # v1 and v2 are vectors
    eps = np.nextafter(0, 1)  # smallest float above zero
    dot = np.dot(v1, v2)
    dot /= max(npext.norm(v1), eps)
    dot /= max(npext.norm(v2), eps)
    return dot
Example #15
0
    def test_to_corr(self):
        # Check some corner cases in to_corr

        # ajj == 1
        m = np.array([[0.1, 0], [0, 1]], dtype=float)
        m = random_correlation._to_corr(m)
        assert_allclose(m, np.array([[1, 0], [0, 0.1]]))

        # Floating point overflow; fails to compute the correct
        # rotation, but should still produce some valid rotation
        # rather than infs/nans
        with np.errstate(over='ignore'):
            g = np.array([[0, 1], [-1, 0]])

            m0 = np.array([[1e300, 0], [0, np.nextafter(1, 0)]], dtype=float)
            m = random_correlation._to_corr(m0.copy())
            assert_allclose(m, g.T.dot(m0).dot(g))

            m0 = np.array([[0.9, 1e300], [1e300, 1.1]], dtype=float)
            m = random_correlation._to_corr(m0.copy())
            assert_allclose(m, g.T.dot(m0).dot(g))

        # Zero discriminant; should set the first diag entry to 1
        m0 = np.array([[2, 1], [1, 2]], dtype=float)
        m = random_correlation._to_corr(m0.copy())
        assert_allclose(m[0,0], 1)

        # Slightly negative discriminant; should be approx correct still
        m0 = np.array([[2 + 1e-7, 1], [1, 2]], dtype=float)
        m = random_correlation._to_corr(m0.copy())
        assert_allclose(m[0,0], 1)
Example #16
0
  def sample_n(self, n, seed=None, name="sample_n"):
    """Sample `n` observations from the Laplace Distributions.

    Args:
      n: `Scalar`, type int32, the number of observations to sample.
      seed: Python integer, the random seed.
      name: The name to give this op.

    Returns:
      samples: `[n, ...]`, a `Tensor` of `n` samples for each
        of the distributions determined by broadcasting the parameters.
    """
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[self._loc, self._scale, n]):
        n = ops.convert_to_tensor(n)
        n_val = tensor_util.constant_value(n)
        shape = array_ops.concat(0, ([n], self.batch_shape()))
        # Sample uniformly-at-random from the open-interval (-1, 1).
        uniform_samples = random_ops.random_uniform(
            shape=shape,
            minval=np.nextafter(self.dtype.as_numpy_dtype(-1.),
                                self.dtype.as_numpy_dtype(0.)),
            maxval=self.dtype.as_numpy_dtype(1.),
            dtype=self.dtype,
            seed=seed)

        # Provide some hints to shape inference
        inferred_shape = tensor_shape.vector(n_val).concatenate(
            self.get_batch_shape())
        uniform_samples.set_shape(inferred_shape)

        return (self._loc - self._scale * math_ops.sign(uniform_samples) *
                math_ops.log(1. - math_ops.abs(uniform_samples)))
Example #17
0
def _compute_lwork(routine, *args, **kwargs):
    """
    Round floating-point lwork returned by lapack to integer.

    Several LAPACK routines compute optimal values for LWORK, which
    they return in a floating-point variable. However, for large
    values of LWORK, single-precision floating point is not sufficient
    to hold the exact value --- some LAPACK versions (<= 3.5.0 at
    least) truncate the returned integer to single precision and in
    some cases this can be smaller than the required value.
    """
    lwork, info = routine(*args, **kwargs)
    if info != 0:
        raise ValueError("Internal work array size computation failed: %d" % (info,))

    lwork = lwork.real

    if getattr(routine, "dtype", None) == _np.float32:
        # Single-precision routine -- take next fp value to work
        # around possible truncation in LAPACK code
        lwork = _np.nextafter(_np.float32(lwork), _np.float32(_np.inf))

    lwork = int(lwork)
    if lwork < 0 or lwork > _np.iinfo(_np.int32).max:
        raise ValueError(
            "Too large work array required -- computation cannot " "be performed with standard 32-bit LAPACK."
        )

    return lwork
Example #18
0
def _addMinMaxToStyle(theStyle):
    """Add a min and max to each style class in a style dictionary.

    When InaSAFE provides style classes they are specific values, not ranges.
    However QGIS wants to work in ranges, so this helper will address that by
    updating the dictionary to include a min max value for each class.

    It is assumed that we will start for 0 as the min for the first class
    and the quantity of each class shall constitute the max. For all other
    classes , min shall constitute the smalles increment to a float that can
    meaningfully be made by python (as determined by numpy.nextafter()).

    Args:
        style: list - A list of dictionaries of the form as in the example
            below.

    Returns:
        dict: A new dictionary list with min max attributes added to each
            entry.

    Example input:

        style_classes = [dict(colour='#38A800', quantity=2, transparency=0),
                         dict(colour='#38A800', quantity=5, transparency=50),
                         dict(colour='#79C900', quantity=10, transparency=50),
                         dict(colour='#CEED00', quantity=20, transparency=50),
                         dict(colour='#FFCC00', quantity=50, transparency=34),
                         dict(colour='#FF6600', quantity=100, transparency=77),
                         dict(colour='#FF0000', quantity=200, transparency=24),
                         dict(colour='#7A0000', quantity=300, transparency=22)]

    Example output:

        style_classes = [dict(colour='#38A800', quantity=2, transparency=0,
                              min=0, max=2),
                         dict(colour='#38A800', quantity=5, transparency=50,
                              min=2.0000000000002, max=5),
                         ),
                         dict(colour='#79C900', quantity=10, transparency=50,
                              min=5.0000000000002, max=10),),
                         dict(colour='#CEED00', quantity=20, transparency=50,
                              min=5.0000000000002, max=20),),
                         dict(colour='#FFCC00', quantity=50, transparency=34,
                              min=20.0000000000002, max=50),),
                         dict(colour='#FF6600', quantity=100, transparency=77,
                              min=50.0000000000002, max=100),),
                         dict(colour='#FF0000', quantity=200, transparency=24,
                              min=100.0000000000002, max=200),),
                         dict(colour='#7A0000', quantity=300, transparency=22,
                              min=200.0000000000002, max=300),)]
    """
    myNewStyles = []
    myLastMax = 0.0
    for myClass in theStyle:
        myQuantity = float(myClass['quantity'])
        myClass['min'] = myLastMax
        myClass['max'] = myQuantity
        myLastMax = numpy.nextafter(myQuantity, sys.float_info.max)
        myNewStyles.append(myClass)
    return myNewStyles
Example #19
0
	def subscribe(self, t=None):
		'''subscribe to events after t'''
		if t is None:
			i = len(deltas)
			cur = dict((key, util.state.history(key)[-1]) for key in util.state)
			j = len(deltas)
			prefix = []
			#hide race conditions
			for t, key, val in deltas[i:j]:
				if t > cur[key][0]:
					cur[key] = t, val
			for key in cur:
				t, val = cur[key]
				prefix.append((key, val, t))
			t = 0
			if j:
				t = deltas[j-1][0]
			return {'t':t, 'deltas':prefix}
		t = float(t)
		while True:#completely different logic
			if deltas and deltas[-1][0] > t:
				i = bisect.bisect_left(deltas, (numpy.nextafter(t, t+1),))
				j = len(deltas)
				return {'t':deltas[j-1][0], 'deltas':[(key, val, t) for (t, key, val) in deltas[i:j]]}
			try:
				deltas_change.acquire()
				deltas_change.wait(25)
			finally:
				deltas_change.release()
			return {'t':t, 'deltas':[]}
Example #20
0
 def _sample_n(self, n, seed=None):
     shape = array_ops.concat(0, ([n], array_ops.shape(self.mean())))
     np_dtype = self.dtype.as_numpy_dtype()
     minval = np.nextafter(np_dtype(0), np_dtype(1))
     uniform = random_ops.random_uniform(shape=shape, minval=minval, maxval=1, dtype=self.dtype, seed=seed)
     sampled = -math_ops.log(-math_ops.log(uniform))
     return sampled * self.scale + self.loc
Example #21
0
def test_nextafter_vs_spacing():
    # XXX: spacing does not handle long double yet
    for t in [np.float32, np.float64]:
        for _f in [1, 1e-5, 1000]:
            f = t(_f)
            f1 = t(_f + 1)
            assert_(np.nextafter(f, f1) - f == np.spacing(f))
Example #22
0
def _compute_lwork(routine, *args, **kwargs):
    """
    Round floating-point lwork returned by lapack to integer.

    Several LAPACK routines compute optimal values for LWORK, which
    they return in a floating-point variable. However, for large
    values of LWORK, single-precision floating point is not sufficient
    to hold the exact value --- some LAPACK versions (<= 3.5.0 at
    least) truncate the returned integer to single precision and in
    some cases this can be smaller than the required value.
    """
    wi = routine(*args, **kwargs)
    if len(wi) < 2:
        raise ValueError("")
    info = wi[-1]
    if info != 0:
        raise ValueError("Internal work array size computation failed: " "%d" % (info,))

    lwork = [w.real for w in wi[:-1]]

    dtype = getattr(routine, "dtype", None)
    if dtype == _np.float32 or dtype == _np.complex64:
        # Single-precision routine -- take next fp value to work
        # around possible truncation in LAPACK code
        lwork = _np.nextafter(lwork, _np.inf, dtype=_np.float32)

    lwork = _np.array(lwork, _np.int64)
    if _np.any(_np.logical_or(lwork < 0, lwork > _np.iinfo(_np.int32).max)):
        raise ValueError(
            "Too large work array required -- computation cannot " "be performed with standard 32-bit LAPACK."
        )
    lwork = lwork.astype(_np.int32)
    if lwork.size == 1:
        return lwork[0]
    return lwork
Example #23
0
    def test_float_modulus_corner_cases(self):
        # Check remainder magnitude.
        for dt in np.typecodes['Float']:
            b = np.array(1.0, dtype=dt)
            a = np.nextafter(np.array(0.0, dtype=dt), -b)
            rem = self.mod(a, b)
            assert_(rem <= b, 'dt: %s' % dt)
            rem = self.mod(-a, -b)
            assert_(rem >= -b, 'dt: %s' % dt)

        # Check nans, inf
        with warnings.catch_warnings():
            warnings.simplefilter('always')
            warnings.simplefilter('ignore', RuntimeWarning)
            for dt in np.typecodes['Float']:
                fone = np.array(1.0, dtype=dt)
                fzer = np.array(0.0, dtype=dt)
                finf = np.array(np.inf, dtype=dt)
                fnan = np.array(np.nan, dtype=dt)
                rem = self.mod(fone, fzer)
                assert_(np.isnan(rem), 'dt: %s' % dt)
                # MSVC 2008 returns NaN here, so disable the check.
                #rem = self.mod(fone, finf)
                #assert_(rem == fone, 'dt: %s' % dt)
                rem = self.mod(fone, fnan)
                assert_(np.isnan(rem), 'dt: %s' % dt)
                rem = self.mod(finf, fone)
                assert_(np.isnan(rem), 'dt: %s' % dt)
Example #24
0
  def sample_n(self, n, seed=None, name="sample_n"):
    """Sample `n` observations from the Exponential Distributions.

    Args:
      n: `Scalar`, type int32, the number of observations to sample.
      seed: Python integer, the random seed.
      name: The name to give this op.

    Returns:
      samples: `[n, ...]`, a `Tensor` of `n` samples for each
        of the distributions determined by the hyperparameters.
    """
    broadcast_shape = self._lam.get_shape()
    with ops.op_scope([self.lam, n], name, "ExponentialSample"):
      n = ops.convert_to_tensor(n, name="n")
      shape = array_ops.concat(
          0, [array_ops.pack([n]), array_ops.shape(self._lam)])
      # Sample uniformly-at-random from the open-interval (0, 1).
      sampled = random_ops.random_uniform(
          shape, minval=np.nextafter(
              self.dtype.as_numpy_dtype(0.), self.dtype.as_numpy_dtype(1.)),
          maxval=constant_op.constant(1.0, dtype=self.dtype),
          seed=seed,
          dtype=self.dtype)

      n_val = tensor_util.constant_value(n)
      final_shape = tensor_shape.vector(n_val).concatenate(broadcast_shape)
      sampled.set_shape(final_shape)

      return -math_ops.log(sampled) / self._lam
Example #25
0
    def test_float_modulus_corner_cases(self):
        # Check remainder magnitude.
        for dt in np.typecodes['Float']:
            b = np.array(1.0, dtype=dt)
            a = np.nextafter(np.array(0.0, dtype=dt), -b)
            rem = self.mod(a, b)
            assert_(rem <= b, 'dt: %s' % dt)
            rem = self.mod(-a, -b)
            assert_(rem >= -b, 'dt: %s' % dt)

        # Check nans, inf
        with suppress_warnings() as sup:
            sup.filter(RuntimeWarning, "invalid value encountered in remainder")
            for dt in np.typecodes['Float']:
                fone = np.array(1.0, dtype=dt)
                fzer = np.array(0.0, dtype=dt)
                finf = np.array(np.inf, dtype=dt)
                fnan = np.array(np.nan, dtype=dt)
                rem = self.mod(fone, fzer)
                assert_(np.isnan(rem), 'dt: %s' % dt)
                # MSVC 2008 returns NaN here, so disable the check.
                #rem = self.mod(fone, finf)
                #assert_(rem == fone, 'dt: %s' % dt)
                rem = self.mod(fone, fnan)
                assert_(np.isnan(rem), 'dt: %s' % dt)
                rem = self.mod(finf, fone)
                assert_(np.isnan(rem), 'dt: %s' % dt)
Example #26
0
def _compute_mi_cc(x, y, n_neighbors):
    """Compute mutual information between two continuous variables.

    Parameters
    ----------
    x, y : ndarray, shape (n_samples,)
        Samples of two continuous random variables, must have an identical
        shape.

    n_neighbors : int
        Number of nearest neighbors to search for each point, see [1]_.

    Returns
    -------
    mi : float
        Estimated mutual information. If it turned out to be negative it is
        replace by 0.

    Notes
    -----
    True mutual information can't be negative. If its estimate by a numerical
    method is negative, it means (providing the method is adequate) that the
    mutual information is close to 0 and replacing it by 0 is a reasonable
    strategy.

    References
    ----------
    .. [1] A. Kraskov, H. Stogbauer and P. Grassberger, "Estimating mutual
           information". Phys. Rev. E 69, 2004.
    """
    n_samples = x.size

    x = x.reshape((-1, 1))
    y = y.reshape((-1, 1))
    xy = np.hstack((x, y))

    # Here we rely on NearestNeighbors to select the fastest algorithm.
    nn = NearestNeighbors(metric='chebyshev', n_neighbors=n_neighbors)

    nn.fit(xy)
    radius = nn.kneighbors()[0]
    radius = np.nextafter(radius[:, -1], 0)

    # Algorithm is selected explicitly to allow passing an array as radius
    # later (not all algorithms support this).
    nn.set_params(algorithm='kd_tree')

    nn.fit(x)
    ind = nn.radius_neighbors(radius=radius, return_distance=False)
    nx = np.array([i.size for i in ind])

    nn.fit(y)
    ind = nn.radius_neighbors(radius=radius, return_distance=False)
    ny = np.array([i.size for i in ind])

    mi = (digamma(n_samples) + digamma(n_neighbors) -
          np.mean(digamma(nx + 1)) - np.mean(digamma(ny + 1)))

    return max(0, mi)
Example #27
0
File: rk.py Project: BranYang/scipy
    def _step_impl(self):
        t = self.t
        y = self.y

        max_step = self.max_step
        rtol = self.rtol
        atol = self.atol

        min_step = 10 * np.abs(np.nextafter(t, self.direction * np.inf) - t)

        if self.h_abs > max_step:
            h_abs = max_step
        elif self.h_abs < min_step:
            h_abs = min_step
        else:
            h_abs = self.h_abs

        order = self.order
        step_accepted = False

        while not step_accepted:
            if h_abs < min_step:
                return False, self.TOO_SMALL_STEP

            h = h_abs * self.direction
            t_new = t + h

            if self.direction * (t_new - self.t_bound) > 0:
                t_new = self.t_bound

            h = t_new - t
            h_abs = np.abs(h)

            y_new, f_new, error = rk_step(self.fun, t, y, self.f, h, self.A,
                                          self.B, self.C, self.E, self.K)
            scale = atol + np.maximum(np.abs(y), np.abs(y_new)) * rtol
            error_norm = norm(error / scale)

            if error_norm == 0.0:
                h_abs *= MAX_FACTOR
                step_accepted = True
            elif error_norm < 1:
                h_abs *= min(MAX_FACTOR,
                             max(1, SAFETY * error_norm ** (-1 / (order + 1))))
                step_accepted = True
            else:
                h_abs *= max(MIN_FACTOR,
                             SAFETY * error_norm ** (-1 / (order + 1)))

        self.y_old = y

        self.t = t_new
        self.y = y_new

        self.h_abs = h_abs
        self.f = f_new

        return True, None
Example #28
0
File: wrfout.py Project: qingu/WEM
    def compute_comp_ref(self,tidx,lvidx,lonidx,latidx,other):
        """Amend this so variables obtain at start fetch only correct date, lats, lons
        All levels need to be fetched as this is composite reflectivity
        """
        T2 = self.get('T2',tidx,False,lonidx,latidx)
        # QR = self.nc.variables['QRAIN'][PS['t'],:,PS['la'],PS['lo']]
        QR = self.get('QRAIN',tidx,False,lonidx,latidx) # This should get all levels
        PSFC = self.get('PSFC',tidx,False,lonidx,latidx)

        try:
            QS = self.get('QSNOW',tidx,False,lonidx,latidx)
        except:
            QS = N.zeros(N.shape(QR))
        rhor = 1000.0
        rhos = 100.0
        rhog = 400.0
        rhoi = 917.0

        no_rain = 8.0E6
        # How do I access this time?
        no_snow = 2.0E6 * N.exp(-0.12*(T2-273.15))
        no_grau = 4.0E6

        density = N.divide(PSFC,(287.0 * T2))
        Qra_all = QR[0,...]
        Qsn_all = QS[0,...]

        for j in range(len(Qra_all[1,:,1])):
            curcol_r = []
            curcol_s = []
            for i in range(len(Qra_all[1,1,:])):
                    maxrval = N.max(Qra_all[:,j,i])
                    maxsval = N.max(Qsn_all[:,j,i])
                    curcol_r.append(maxrval)
                    curcol_s.append(maxsval)
            N_curcol_r = N.array(curcol_r)
            N_curcol_s = N.array(curcol_s)
            if j == 0:
                Qra = N_curcol_r
                Qsn = N_curcol_s
            else:
                Qra = N.row_stack((Qra, N_curcol_r))
                Qsn = N.row_stack((Qsn, N_curcol_s))

        # Calculate slope factor lambda
        lambr = (N.divide((3.14159 * no_rain * rhor), N.multiply(density, Qra)+N.nextafter(0,1))) ** 0.25
        lambs = N.exp(-0.0536 * (T2 - 273.15))

        # Calculate equivalent reflectivity factor
        Zer = (720.0 * no_rain * (lambr ** -7.0)) * 1E18
        Zes = (0.224 * 720.0 * no_snow * (lambr ** -7.0) * (rhos/rhoi) ** 2) * 1E18
        Zes_int = N.divide((lambs * Qsn * density), no_snow)
        Zes = ((0.224 * 720 * 1E18) / (3.14159 * rhor) ** 2) * Zes_int ** 2

        Ze = N.add(Zer, Zes)
        dBZ = N.nan_to_num(10*N.log10(Ze))
        return dBZ
Example #29
0
def gen(x, y, name):
    """Generate test data and write to file."""

    z = np.nextafter(x, y)

    out = {"x": x.tolist(), "y": y.tolist(), "expected": z.tolist()}

    with open(name, "w") as f:
        json.dump(out, f)
Example #30
0
def int_type_next_after(x, direction, itemsize):
    """Return the next representable neighbor of x in the appropriate
    direction."""

    assert direction in [-1, +1]

    # x is guaranteed to be either an int or a float
    if direction < 0:
        if isinstance(x, int):
            return x - 1
        else:
            # return int(PyNextAfter(x, x - 1))
            return int(numpy.nextafter(x, x - 1))
    else:
        if isinstance(x, int):
            return x + 1
        else:
            # return int(PyNextAfter(x,x + 1)) + 1
            return int(numpy.nextafter(x, x + 1)) + 1
Example #31
0
    def compute_new_range(self, request=None, use_auto=True):
        def en_int(range0):
            import numpy as np
            a = range0[0]
            b = range0[1]
            if a != 0:
                si = a/abs(a)
                ex = int(np.log10(abs(a)))
#               if (a/(10.**ex) % 1)== 0.:
                ai = (np.floor(a/(10.**ex)))*10.**ex
#               else:
#                  ai = (np.floor(a/(10.**ex))-si)*10.**ex
            else:
                ai = 0.
            if b != 0:
                si = b/abs(b)
                ex = int(np.log10(abs(b)))
                if (b/(10.**ex) % 1) == 0.:
                    bi = (np.floor(b/(10.**ex)))*10.**ex
                else:
                    bi = (np.floor(b/(10.**ex))+1)*10.**ex
            else:
                bi = 0.
            return ai, bi

        def en_sym(range0):
            a = range0[0]
            b = range0[1]
            if abs(a) > abs(b):
                return (-abs(a), abs(a))
            else:
                return (-abs(b), abs(b))

        def en_int_sym(range, mode):
            if range[0] is not None:
                if np.iscomplex(range[0]):
                    range = sorted([float(np.real(range[0])),
                                    float(np.real(range[1]))])
                if mode[2]:
                    d = abs(float(range[0])-float(range[1]))
                    if range[1] > range[0]:
                        range[1] = range[1]+d/10.
                        range[0] = range[0]-d/10.
                    else:
                        range[1] = range[1]-d/10.
                        range[0] = range[0]+d/10.
            if (mode[0] and range[0] is not None and
                    range[1] is not None):
                range = en_int(range)
            if (mode[1] and range[0] is not None and
                    range[1] is not None):
                range = en_sym(range)
            return range

        def _value2param(value):
            return (value[0], value[1], value[2], value[3], value[4], value[5],
                    (value[6], value[7], value[8]))

        def _a2param(ax):
            return (ax.base, ax.auto, ax.range, ax.scale, ax.symloglin,
                    ax.symloglinscale, ax.mode)

        data = []
        newrange = {}
        # 0) first do xrange
        for ax in self._xaxis:
            base, auto, range, scale, symloglin, symscale, mode = _a2param(ax)
            if request is not None:
                for name, value in request:
                    if name == ax.name:
                        base, auto, range, scale, symloglin, symscale, mode = _value2param(
                            value)
                        break
            if (auto and use_auto) or range is None:
                range = [None]*2
                for m in ax.walk_member():
                    if m.is_suppress():
                        continue
                    range = m.get_xrange(range, scale=scale)
                range = en_int_sym(range, mode)
                if (range[0] is None or
                        range[1] is None):
                    range = (0, 1)
                if (range[0] == range[1]):
                    range = (range[0]-0.5, range[0]+0.5)
            p = [base, auto, range, scale, symloglin, symscale, ] + list(mode)
            newrange[ax] = range
            data.append((ax.name, p))
        # 1) second do yrange
        for ay in self._yaxis:
            base, auto, range, scale, symloglin, symscale, mode = _a2param(ay)
            if request is not None:
                for name, value in request:
                    if name == ay.name:
                        base, auto, range, scale, symloglin, symscale, mode = _value2param(
                            value)
                        break
            if (auto and use_auto) or range[0] is None:
                range = [None]*2
                for m in ay.walk_member():
                    if m.is_suppress():
                        continue
                    ax = m.get_xaxisparam()
                    if ax in newrange:
                        xrange = newrange[ax]
                    else:
                        xrange = ax.range
                    range = m.get_yrange(range,
                                         xrange=xrange, scale=scale)
                range = en_int_sym(range, mode)
                if (range[0] is None or
                        range[1] is None):
                    range = (0, 1)
                if (range[0] == range[1]):
                    range = (range[0]-0.5, range[0]+0.5)
            p = [base, auto, range, scale, symloglin, symscale, ] + list(mode)
            newrange[ay] = range
            data.append((ay.name, p))
        # 2-1) third do zrange
        for az in self._zaxis:
            base, auto, range, scale, symloglin, symscale, mode = _a2param(az)
            if request is not None:
                for name, value in request:
                    if name == az.name:
                        base, auto, range, scale, symloglin, symscale, mode = _value2param(
                            value)
                        break
            if (auto and use_auto) or range[0] is None:
                range = [None]*2
                for m in az.walk_member():
                    if m.is_suppress():
                        continue
                    ax = m.get_xaxisparam()
                    ay = m.get_yaxisparam()
                    if ax in newrange:
                        xrange = newrange[ax]
                    else:
                        xrange = ax.range
                    if ay in newrange:
                        yrange = newrange[ay]
                    else:
                        yrange = ay.range
                    range = m.get_zrange(range, xrange=xrange,
                                         yrange=yrange, scale=scale)
                range = en_int_sym(range, mode)
                if (range[0] is None or
                        range[1] is None):
                    range = (0, 1)
                if (range[0] == range[1]):
                    range = (range[0]-0.5, range[0]+0.5)
            p = [base, auto, range, scale, symloglin, symscale] + list(mode)
            data.append((az.name, p))
        # 2-2) third do crange
        for ac in self._caxis:
            base, auto, range, scale, symloglin, symscale, mode = _a2param(ac)
            if request is not None:
                for name, value in request:
                    if name == ac.name:
                        base, auto, range, scale, symloglin, symscale, mode = _value2param(
                            value)
                        break

            if (auto and use_auto) or range[0] is None:
                range = [None]*2
                for m in ac.walk_member():
                    if m.is_suppress():
                        continue
                    ax = m.get_xaxisparam()
                    ay = m.get_yaxisparam()
                    if ax in newrange:
                        xrange = newrange[ax]
                    else:
                        xrange = ax.range
                    if ay in newrange:
                        yrange = newrange[ay]
                    else:
                        yrange = ay.range
                    range = m.get_crange(range, xrange=xrange,
                                         yrange=yrange,
                                         scale=scale)
                range = en_int_sym(range, mode)
                if (range[0] is None or
                        range[1] is None):
                    range = (0, 1)
                if (range[0] == range[1]):
                    if range[0] == 0:
                        delta = 1 - np.nextafter(1.0, 0.0)
                        range = (-abs(delta), abs(delta))
                    else:
                        range = (range[0]-abs(range[0])/10, range[0]+abs(range[0])/10)
            p = [base, auto, range, scale, symloglin, symscale, ] + list(mode)
            data.append((ac.name, p))
        return data
Example #32
0
        bigram_prob[charid[name[i]], charid[name[i+1]]] += 1
        trigram_prob[charid[name[i]], charid[name[i+1]], charid[name[i+2]]] += 1

    if len(name) == 1:
        bigram_prob[charid[name[0]], charid['end']] += 1
    elif len(name) == 2:
        bigram_prob[charid[name[0]], charid[name[1]]] += 1
        bigram_prob[charid[name[1]], charid['end']] += 1
        trigram_prob[charid[name[0]], charid[name[1]], charid['end']] += 1
    else:
        bigram_prob[charid[name[-1]], charid['end']] += 1
        trigram_prob[charid[name[-2]], charid[name[-1]], charid['end']] += 1


bigram_prob = bigram_prob / bigram_prob.sum(axis=1, keepdims=True)
trigram_prob = trigram_prob / (trigram_prob.sum(axis=2, keepdims=True) + np.nextafter(0, 1))


def create_sample(seed=None):
    if seed:
        np.random.seed(seed)

    variable_name = ''

    stop_chance = 0
    curr = np.random.choice(list(start_char_prob.keys()), p=list(start_char_prob.values()))
    variable_name += curr
    curr = np.random.choice(list(charid.keys()), p=bigram_prob[charid[curr], :])
    while curr != 'end':
        variable_name += curr
        if np.random.uniform(0, 1) < stop_chance:
Example #33
0
def _normal(key, shape, dtype):
    _check_shape("normal", shape)
    lo = onp.nextafter(onp.array(-1., dtype), 0., dtype=dtype)
    hi = onp.array(1., dtype)
    u = uniform(key, shape, dtype, lo, hi)
    return onp.array(onp.sqrt(2), dtype) * lax.erf_inv(u)
Example #34
0
from __future__ import (absolute_import, division, print_function)

import numpy as np
from functools import wraps
from lmfit.model import Model

MIN_POS_DBL = np.nextafter(0, 1)  # minimum positive float


def prefix_params(param_expr):
    r"""Prepend parameter names with prefix in parameter expressions

    Parameters
    ----------
    param_expr: function
        bound method of a model returning an expression for a parameter in
        string format

    Returns
    -------
        function
    """
    @wraps(param_expr)
    def wrapper(model_instance):
        if not isinstance(model_instance, Model):
            raise TypeError('Function argument is not a Model instance')
        prefix = model_instance.prefix
        p_e = param_expr(model_instance)  # the parameter expression in str
        for prefixed_name in model_instance.param_names:
            name = prefixed_name.replace(prefix, '')  # drop the prefix
            p_e = p_e.replace(name, prefixed_name)
Example #35
0
 def __init__(self, coord, value=0.0):
     self._coord = coord
     self._value = value
     # it will behave like an very small interval
     self._end = np.nextafter(self._coord, self._coord + 1)
Example #36
0
def _compute_mi_cd(c, d, n_neighbors):
    """Compute mutual information between continuous and discrete variables.

    Parameters
    ----------
    c : ndarray, shape (n_samples,)
        Samples of a continuous random variable.

    d : ndarray, shape (n_samples,)
        Samples of a discrete random variable.

    n_neighbors : int
        Number of nearest neighbors to search for each point, see [1]_.

    Returns
    -------
    mi : float
        Estimated mutual information. If it turned out to be negative it is
        replace by 0.

    Notes
    -----
    True mutual information can't be negative. If its estimate by a numerical
    method is negative, it means (providing the method is adequate) that the
    mutual information is close to 0 and replacing it by 0 is a reasonable
    strategy.

    References
    ----------
    .. [1] B. C. Ross "Mutual Information between Discrete and Continuous
       Data Sets". PLoS ONE 9(2), 2014.
    """
    n_samples = c.shape[0]
    c = c.reshape((-1, 1))

    radius = np.empty(n_samples)
    label_counts = np.empty(n_samples)
    k_all = np.empty(n_samples)
    nn = NearestNeighbors()
    for label in np.unique(d):
        mask = d == label
        count = np.sum(mask)
        if count > 1:
            k = min(n_neighbors, count - 1)
            nn.set_params(n_neighbors=k)
            nn.fit(c[mask])
            r = nn.kneighbors()[0]
            radius[mask] = np.nextafter(r[:, -1], 0)
            k_all[mask] = k
        label_counts[mask] = count

    # Ignore points with unique labels.
    mask = label_counts > 1
    n_samples = np.sum(mask)
    label_counts = label_counts[mask]
    k_all = k_all[mask]
    c = c[mask]
    radius = radius[mask]

    nn.set_params(algorithm='kd_tree')
    nn.fit(c)
    ind = nn.radius_neighbors(radius=radius, return_distance=False)
    m_all = np.array([i.size for i in ind])

    mi = (digamma(n_samples) + np.mean(digamma(k_all)) -
          np.mean(digamma(label_counts)) - np.mean(digamma(m_all + 1)))

    return max(0, mi)
Example #37
0
def run(walk: Callable, sample: Callable, delta: Callable,
        log_handler: Callable, theta_0, observed: Sequence, simulation_n: int,
        boundaries: Sequence, r: float, bin_n: int) -> None:
    """ Our approach: a weighted regression-based likelihood approximator using MCMC to walk around our posterior
    distribution. My interpretation of this approach is given below:

    1) We start with some initial guess theta_0. Right off the bat, we move to another theta from theta_0.
    2) For 'boundaries[1] - boundaries[0]' iterations...
        a) For 'simulation_n' iterations...
            i) We simulate a population using the given theta.
            ii) For each observed frequency ... 'D'
                1) We compute the difference between the two distributions.
                2) ** Apply our weighted regression likelihood approximator here. ** Obtain a probability.
        c) If this probability is greater than the probability of the previous, we accept.
        d) Otherwise, we accept our proposed with probability p(proposed) / p(prev).

    :param walk: Function that accepts some parameter set and returns another parameter set.
    :param sample: Function that produces a collection of repeat lengths (i.e. the model function).
    :param delta: Frequency distribution distance function. 0 = exact match, 1 = maximally dissimilar.
    :param log_handler: Function that handles what occurs with the current Markov chain and results.
    :param theta_0: Initial starting point.
    :param observed: 2D list of (int, float) tuples representing the (repeat length, frequency) tuples.
    :param simulation_n: Number of simulations to use to obtain a distance.
    :param boundaries: Starting and ending iteration for this specific MCMC run.
    :param r: Exponential decay rate for weight vector used in regression (a=1).
    :param bin_n: Number of bins used to construct histogram.
    :return: None.
    """
    from numpy import zeros, mean, nextafter, RankWarning
    from types import SimpleNamespace
    from warnings import simplefilter
    from numpy.random import uniform
    from datetime import datetime

    # We need to filter out all of the rank warnings.
    simplefilter('ignore', RankWarning)

    # Save our results according to the namespace below.
    a_record = lambda a_1, b_1, c_1, d_1, e_1, f_1: SimpleNamespace(
        theta=a_1,
        time_r=b_1,
        waiting_time=c_1,
        p_proposed=d_1,
        expected_delta=e_1,
        proposed_time=f_1)

    # Seed our Markov chain with our initial guess.
    x = [a_record(theta_0, 0, 1, 0, 0, 0)]

    for i in range(boundaries[0] + 1, boundaries[1]):
        theta_proposed = walk(x[-1].theta)  # Walk from our previous state.

        # Generate our D matrix.
        d = zeros((simulation_n, len(observed)), dtype='float64')
        populate_d(d, observed, sample, delta, theta_proposed,
                   [theta_proposed.kappa, theta_proposed.omega])

        # Compute our likelihood vector.
        v = _generate_v(d, r, bin_n)

        # Accept our proposal according to our alpha value.
        p_proposed, p_k = _likelihood_from_v(v), x[-1].p_proposed
        if abs(0 - p_k) < nextafter(0, 1) or p_proposed / p_k > uniform(0, 1):
            x = x + [
                a_record(theta_proposed, datetime.now(), 1, p_proposed,
                         mean(d), i)
            ]

        # Reject our proposal. We keep our current state and increment our waiting times.
        else:
            x[-1].waiting_time += 1

        # We record to our chain. This is dependent on the current iteration of MCMC.
        log_handler(x, i)
Example #38
0
        ),
        "logaddexp2": pandas_udf(
            lambda s1, s2: np.logaddexp2(s1, s2), DoubleType(), PandasUDFType.SCALAR
        ),
        "logical_and": lambda c1, c2: c1.cast(BooleanType()) & c2.cast(BooleanType()),
        "logical_or": lambda c1, c2: c1.cast(BooleanType()) | c2.cast(BooleanType()),
        "logical_xor": lambda c1, c2: (
            # mimics xor by logical operators.
            (c1.cast(BooleanType()) | c2.cast(BooleanType()))
            & (~(c1.cast(BooleanType())) | ~(c2.cast(BooleanType())))
        ),
        "maximum": F.greatest,
        "minimum": F.least,
        "modf": pandas_udf(lambda s1, s2: np.modf(s1, s2), DoubleType(), PandasUDFType.SCALAR),
        "nextafter": pandas_udf(
            lambda s1, s2: np.nextafter(s1, s2), DoubleType(), PandasUDFType.SCALAR
        ),
        "right_shift": pandas_udf(
            lambda s1, s2: np.right_shift(s1, s2), LongType(), PandasUDFType.SCALAR
        ),
    }
)


# Copied from pandas.
# See also https://docs.scipy.org/doc/numpy/reference/arrays.classes.html#standard-array-subclasses
def maybe_dispatch_ufunc_to_dunder_op(
    ser_or_index: "IndexOpsMixin", ufunc: Callable, method: str, *inputs: Any, **kwargs: Any
) -> "IndexOpsMixin":
    special = {
        "add",
Example #39
0
    def __call__(self, X, alpha=None, bytes=False):
        """
        *X* is either a scalar or an array (of any dimension).
        If scalar, a tuple of rgba values is returned, otherwise
        an array with the new shape = oldshape+(4,). If the X-values
        are integers, then they are used as indices into the array.
        If they are floating point, then they must be in the
        interval (0.0, 1.0).
        Alpha must be a scalar between 0 and 1, or None.
        If bytes is False, the rgba values will be floats on a
        0-1 scale; if True, they will be uint8, 0-255.
        """

        if not self._isinit:
            self._init()
        mask_bad = None
        if not cbook.iterable(X):
            vtype = 'scalar'
            xa = np.array([X])
        else:
            vtype = 'array'
            xma = ma.array(X, copy=True)  # Copy here to avoid side effects.
            mask_bad = xma.mask  # Mask will be used below.
            xa = xma.filled()  # Fill to avoid infs, etc.
            del xma

        # Calculations with native byteorder are faster, and avoid a
        # bug that otherwise can occur with putmask when the last
        # argument is a numpy scalar.
        if not xa.dtype.isnative:
            xa = xa.byteswap().newbyteorder()

        if xa.dtype.kind == "f":
            # Treat 1.0 as slightly less than 1.
            vals = np.array([1, 0], dtype=xa.dtype)
            almost_one = np.nextafter(*vals)
            cbook._putmask(xa, xa == 1.0, almost_one)
            # The following clip is fast, and prevents possible
            # conversion of large positive values to negative integers.

            xa *= self.N
            if NP_CLIP_OUT:
                np.clip(xa, -1, self.N, out=xa)
            else:
                xa = np.clip(xa, -1, self.N)

            # ensure that all 'under' values will still have negative
            # value after casting to int
            cbook._putmask(xa, xa < 0.0, -1)
            xa = xa.astype(int)
        # Set the over-range indices before the under-range;
        # otherwise the under-range values get converted to over-range.
        cbook._putmask(xa, xa > self.N - 1, self._i_over)
        cbook._putmask(xa, xa < 0, self._i_under)
        if mask_bad is not None:
            if mask_bad.shape == xa.shape:
                cbook._putmask(xa, mask_bad, self._i_bad)
            elif mask_bad:
                xa.fill(self._i_bad)
        if bytes:
            lut = (self._lut * 255).astype(np.uint8)
        else:
            lut = self._lut.copy()  # Don't let alpha modify original _lut.

        if alpha is not None:
            alpha = min(alpha, 1.0)  # alpha must be between 0 and 1
            alpha = max(alpha, 0.0)
            if bytes:
                alpha = int(alpha * 255)
            if (lut[-1] == 0).all():
                lut[:-1, -1] = alpha
                # All zeros is taken as a flag for the default bad
                # color, which is no color--fully transparent.  We
                # don't want to override this.
            else:
                lut[:, -1] = alpha
                # If the bad value is set to have a color, then we
                # override its alpha just as for any other value.

        rgba = np.empty(shape=xa.shape + (4, ), dtype=lut.dtype)
        lut.take(xa, axis=0, mode='clip', out=rgba)
        #  twice as fast as lut[xa];
        #  using the clip or wrap mode and providing an
        #  output array speeds it up a little more.
        if vtype == 'scalar':
            rgba = tuple(rgba[0, :])
        return rgba
Example #40
0
def compute_mi_cc(x, y, n_neighbors=3):

    leaf_size = 30

    x = x.reshape((-1, 1))
    y = y.reshape((-1, 1))
    xy = np.hstack((x, y))

    n_samples = xy.shape[0]
    n_features = xy.shape[1]

    radius = np.empty(n_samples)

    # create the objects that are going to be needed for NN
    n_levels = 1 + np.log2(max(1, ((n_samples - 1) // leaf_size)))
    n_nodes = int(2**n_levels) - 1
    # allocate arrays for storage
    idx_array = np.arange(n_samples)
    node_radius = np.zeros(n_nodes, dtype=np.float64)
    node_idx_start = np.zeros(n_nodes, dtype=np.int64)
    node_idx_end = np.zeros(n_nodes, dtype=np.int64)
    node_is_leaf = np.zeros(n_nodes, dtype=np.int64)
    node_centroids = np.zeros((n_nodes, n_features), dtype=np.float64)
    # set metric==1 for chebyshev distance
    ball_tree.recursive_build(0,
                              0,
                              n_samples,
                              xy,
                              node_centroids,
                              node_radius,
                              idx_array,
                              node_idx_start,
                              node_idx_end,
                              node_is_leaf,
                              n_nodes,
                              leaf_size,
                              metric=1)
    # This algorithm returns the point itself as a neighbor, so
    # if n_neighbors need to be returned then '1' needs to be
    # added in order to get the correct value from 'nth'
    # neighbor when the heap is created
    heap_distances, heap_indices = ball_tree.heap_create(
        n_samples, n_neighbors + 1)
    ball_tree.query(0,
                    xy,
                    heap_distances,
                    heap_indices,
                    xy,
                    idx_array,
                    node_centroids,
                    node_radius,
                    node_is_leaf,
                    node_idx_start,
                    node_idx_end,
                    metric=1)
    ball_tree.heap_sort(heap_distances, heap_indices)
    radius = np.nextafter(heap_distances[:, -1], 0)

    # A whole new set of Tree elements need to be created for the KDTree
    # algorithms that are going to be run on both the x and y arrays that
    # were initially passed in.
    #
    # Perform KD-tree NN on x array
    n_samples_kd = x.shape[0]

    # determine number of levels in the tree, and from this
    # the number of nodes in the tree.  This results in leaf nodes
    # with numbers of points betweeen leaf_size and 2 * leaf_size
    n_levels_kd = 1 + np.log2(max(1, ((n_samples_kd - 1) // leaf_size)))
    # having to round first and then apply int in order to calculate
    # correct number of nodes
    n_nodes_kd = int(round((2**n_levels_kd))) - 1

    # allocate arrays for storage
    idx_array_kd = np.arange(n_samples_kd)
    node_radius_kd = np.zeros(n_nodes_kd, dtype=np.float64)
    node_idx_start_kd = np.zeros(n_nodes_kd, dtype=np.int64)
    node_idx_end_kd = np.zeros(n_nodes_kd, dtype=np.int64)
    node_is_leaf_kd = np.zeros(n_nodes_kd, dtype=np.int64)
    node_lower_bounds_kd = np.zeros((n_nodes_kd, n_features), dtype=np.float64)
    node_upper_bounds_kd = np.zeros((n_nodes_kd, n_features), dtype=np.float64)

    # use 'chebyshev' distance as metric (metric==1)
    kd_tree.recursive_build(0,
                            0,
                            n_samples_kd,
                            x,
                            node_lower_bounds_kd,
                            node_upper_bounds_kd,
                            node_radius_kd,
                            idx_array_kd,
                            node_idx_start_kd,
                            node_idx_end_kd,
                            node_is_leaf_kd,
                            n_nodes_kd,
                            leaf_size,
                            metric=1)

    count_only = True
    return_distance = False
    counts_x = \
        kd_tree.radius_neighbors_count(x, radius,
                                       idx_array_kd, node_lower_bounds_kd,
                                       node_upper_bounds_kd, node_radius_kd,
                                       node_is_leaf_kd, node_idx_start_kd,
                                       node_idx_end_kd, count_only, return_distance,
                                       metric=1)

    # Perform KD-tree NN on y array
    # Note: The data structures to perform the KD-tree build and search should
    # be the same for x and y. In order to preserve memory, re-using the same
    # objects

    # use 'chebyshev' distance as metric (metric==1)
    kd_tree.recursive_build(0,
                            0,
                            n_samples_kd,
                            y,
                            node_lower_bounds_kd,
                            node_upper_bounds_kd,
                            node_radius_kd,
                            idx_array_kd,
                            node_idx_start_kd,
                            node_idx_end_kd,
                            node_is_leaf_kd,
                            n_nodes_kd,
                            leaf_size,
                            metric=1)

    count_only = True
    return_distance = False
    counts_y = \
        kd_tree.radius_neighbors_count(y, radius,
                                       idx_array_kd, node_lower_bounds_kd,
                                       node_upper_bounds_kd, node_radius_kd,
                                       node_is_leaf_kd, node_idx_start_kd,
                                       node_idx_end_kd, count_only, return_distance,
                                       metric=1)

    mi = (digamma_cpu(n_samples) + digamma_cpu(n_neighbors) -
          np.mean(digamma_cpu(counts_x)) - np.mean(digamma_cpu(counts_y)))

    mi = max(0, mi)

    return mi
Example #41
0
    def choropleth(self,
                   geo_data,
                   data=None,
                   columns=None,
                   key_on=None,
                   bins=6,
                   fill_color='blue',
                   nan_fill_color='black',
                   fill_opacity=0.6,
                   nan_fill_opacity=None,
                   line_color='black',
                   line_weight=1,
                   line_opacity=1,
                   name=None,
                   legend_name='',
                   topojson=None,
                   reset=False,
                   smooth_factor=None,
                   highlight=None,
                   **kwargs):
        """
        Apply a GeoJSON overlay to the map.

        Plot a GeoJSON overlay on the base map. There is no requirement
        to bind data (passing just a GeoJSON plots a single-color overlay),
        but there is a data binding option to map your columnar data to
        different feature objects with a color scale.

        If data is passed as a Pandas DataFrame, the "columns" and "key-on"
        keywords must be included, the first to indicate which DataFrame
        columns to use, the second to indicate the layer in the GeoJSON
        on which to key the data. The 'columns' keyword does not need to be
        passed for a Pandas series.

        Colors are generated from color brewer (http://colorbrewer2.org/)
        sequential palettes. By default, linear binning is used between
        the min and the max of the values. Custom binning can be achieved
        with the `bins` parameter.

        TopoJSONs can be passed as "geo_data", but the "topojson" keyword must
        also be passed with the reference to the topojson objects to convert.
        See the topojson.feature method in the TopoJSON API reference:
        https://github.com/topojson/topojson/wiki/API-Reference


        Parameters
        ----------
        geo_data: string/object
            URL, file path, or data (json, dict, geopandas, etc) to your GeoJSON
            geometries
        data: Pandas DataFrame or Series, default None
            Data to bind to the GeoJSON.
        columns: dict or tuple, default None
            If the data is a Pandas DataFrame, the columns of data to be bound.
            Must pass column 1 as the key, and column 2 the values.
        key_on: string, default None
            Variable in the `geo_data` GeoJSON file to bind the data to. Must
            start with 'feature' and be in JavaScript objection notation.
            Ex: 'feature.id' or 'feature.properties.statename'.
        bins: int or sequence of scalars or str, default 6
            If `bins` is an int, it defines the number of equal-width
            bins between the min and the max of the values.
            If `bins` is a sequence, it directly defines the bin edges.
            For more information on this parameter, have a look at
            numpy.histogram function.
        fill_color: string, default 'blue'
            Area fill color. Can pass a hex code, color name, or if you are
            binding data, one of the following color brewer palettes:
            'BuGn', 'BuPu', 'GnBu', 'OrRd', 'PuBu', 'PuBuGn', 'PuRd', 'RdPu',
            'YlGn', 'YlGnBu', 'YlOrBr', and 'YlOrRd'.
        nan_fill_color: string, default 'black'
            Area fill color for nan or missing values.
            Can pass a hex code, color name.
        fill_opacity: float, default 0.6
            Area fill opacity, range 0-1.
        nan_fill_opacity: float, default fill_opacity
            Area fill opacity for nan or missing values, range 0-1.
        line_color: string, default 'black'
            GeoJSON geopath line color.
        line_weight: int, default 1
            GeoJSON geopath line weight.
        line_opacity: float, default 1
            GeoJSON geopath line opacity, range 0-1.
        legend_name: string, default empty string
            Title for data legend.
        topojson: string, default None
            If using a TopoJSON, passing "objects.yourfeature" to the topojson
            keyword argument will enable conversion to GeoJSON.
        reset: boolean, default False
            Remove all current geoJSON layers, start with new layer
        smooth_factor: float, default None
            How much to simplify the polyline on each zoom level. More means
            better performance and smoother look, and less means more accurate
            representation. Leaflet defaults to 1.0.
        highlight: boolean, default False
            Enable highlight functionality when hovering over a GeoJSON area.

        Returns
        -------
        GeoJSON data layer in obj.template_vars

        Examples
        --------
        >>> m.choropleth(geo_data='us-states.json', line_color='blue',
        ...              line_weight=3)
        >>> m.choropleth(geo_data='geo.json', data=df,
        ...              columns=['Data 1', 'Data 2'],
        ...              key_on='feature.properties.myvalue',
        ...              fill_color='PuBu',
        ...              bins=[0, 20, 30, 40, 50, 60])
        >>> m.choropleth(geo_data='countries.json',
        ...              topojson='objects.countries')
        >>> m.choropleth(geo_data='geo.json', data=df,
        ...              columns=['Data 1', 'Data 2'],
        ...              key_on='feature.properties.myvalue',
        ...              fill_color='PuBu',
        ...              bins=[0, 20, 30, 40, 50, 60],
        ...              highlight=True)

        """
        if data is not None and not color_brewer(fill_color):
            raise ValueError('Please pass a valid color brewer code to '
                             'fill_local. See docstring for valid codes.')

        if nan_fill_opacity is None:
            nan_fill_opacity = fill_opacity

        if 'threshold_scale' in kwargs:
            if kwargs['threshold_scale'] is not None:
                bins = kwargs['threshold_scale']
            warnings.warn(
                'choropleth `threshold_scale` parameter is now depreciated '
                'in favor of the `bins` parameter.', DeprecationWarning)

        # Create color_data dict
        if hasattr(data, 'set_index'):
            # This is a pd.DataFrame
            color_data = data.set_index(columns[0])[columns[1]].to_dict()
        elif hasattr(data, 'to_dict'):
            # This is a pd.Series
            color_data = data.to_dict()
        elif data:
            color_data = dict(data)
        else:
            color_data = None

        if color_data is not None and key_on is not None:
            real_values = np.array(list(color_data.values()))
            real_values = real_values[~np.isnan(real_values)]
            _, bin_edges = np.histogram(real_values, bins=bins)

            bins_min, bins_max = min(bin_edges), max(bin_edges)
            if np.any((real_values < bins_min) | (real_values > bins_max)):
                raise ValueError(
                    'All values are expected to fall into one of the provided '
                    'bins (or to be Nan). Please check the `bins` parameter '
                    'and/or your data.')

            # We add the colorscale
            nb_bins = len(bin_edges) - 1
            color_range = color_brewer(fill_color, n=nb_bins)
            color_scale = StepColormap(color_range,
                                       index=bin_edges,
                                       vmin=bins_min,
                                       vmax=bins_max,
                                       caption=legend_name)
            self.add_child(color_scale)

            # then we 'correct' the last edge for numpy digitize
            # (we add a very small amount to fake an inclusive right interval)
            increasing = bin_edges[0] <= bin_edges[-1]
            bin_edges[-1] = np.nextafter(bin_edges[-1],
                                         (1 if increasing else -1) * np.inf)

            key_on = key_on[8:] if key_on.startswith('feature.') else key_on

            def get_by_key(obj, key):
                return (obj.get(key, None) if len(key.split('.')) <= 1 else
                        get_by_key(obj.get(key.split('.')[0], None), '.'.join(
                            key.split('.')[1:])))

            def color_scale_fun(x):
                key_of_x = get_by_key(x, key_on)

                if key_of_x not in color_data.keys():
                    return nan_fill_color, nan_fill_opacity

                value_of_x = color_data[key_of_x]
                if np.isnan(value_of_x):
                    return nan_fill_color, nan_fill_opacity

                color_idx = np.digitize(value_of_x, bin_edges, right=False) - 1
                return color_range[color_idx], fill_opacity

        else:

            def color_scale_fun(x):
                return fill_color, fill_opacity

        def style_function(x):
            color, opacity = color_scale_fun(x)
            return {
                'weight': line_weight,
                'opacity': line_opacity,
                'color': line_color,
                'fillOpacity': opacity,
                'fillColor': color
            }

        def highlight_function(x):
            return {
                'weight': line_weight + 2,
                'fillOpacity': fill_opacity + .2
            }

        if topojson:
            geo_json = TopoJson(geo_data,
                                topojson,
                                name=name,
                                style_function=style_function,
                                smooth_factor=smooth_factor)
        else:
            geo_json = GeoJson(
                geo_data,
                name=name,
                style_function=style_function,
                smooth_factor=smooth_factor,
                highlight_function=highlight_function if highlight else None)

        self.add_child(geo_json)
Example #42
0
def add_extrema_to_style(style):
    """Add a min and max to each style class in a style dictionary.

    When InaSAFE provides style classes they are specific values, not ranges.
    However QGIS wants to work in ranges, so this helper will address that by
    updating the dictionary to include a min max value for each class.

    It is assumed that we will start for 0 as the min for the first class
    and the quantity of each class shall constitute the max. For all other
    classes , min shall constitute the smalles increment to a float that can
    meaningfully be made by python (as determined by numpy.nextafter()).

    :param style: A list of dictionaries of the form as per the example below.
    :type style: list(dict)

    :returns: A new dictionary list with min max attributes added to each
        entry.
    :rtype: list(dict)

    Example input::

        style_classes = [dict(colour='#38A800', quantity=2, transparency=0),
                         dict(colour='#38A800', quantity=5, transparency=50),
                         dict(colour='#79C900', quantity=10, transparency=50),
                         dict(colour='#CEED00', quantity=20, transparency=50),
                         dict(colour='#FFCC00', quantity=50, transparency=34),
                         dict(colour='#FF6600', quantity=100, transparency=77),
                         dict(colour='#FF0000', quantity=200, transparency=24),
                         dict(colour='#7A0000', quantity=300, transparency=22)]

    Example output::

        style_classes = [dict(colour='#38A800', quantity=2, transparency=0,
                              min=0, max=2),
                         dict(colour='#38A800', quantity=5, transparency=50,
                              min=2.0000000000002, max=5),
                         ),
                         dict(colour='#79C900', quantity=10, transparency=50,
                              min=5.0000000000002, max=10),),
                         dict(colour='#CEED00', quantity=20, transparency=50,
                              min=5.0000000000002, max=20),),
                         dict(colour='#FFCC00', quantity=50, transparency=34,
                              min=20.0000000000002, max=50),),
                         dict(colour='#FF6600', quantity=100, transparency=77,
                              min=50.0000000000002, max=100),),
                         dict(colour='#FF0000', quantity=200, transparency=24,
                              min=100.0000000000002, max=200),),
                         dict(colour='#7A0000', quantity=300, transparency=22,
                              min=200.0000000000002, max=300),)]
    """
    new_styles = []
    last_max = 0.0
    for style_class in style:
        quantity = float(style_class['quantity'])
        style_class['min'] = last_max
        style_class['max'] = quantity
        if quantity == last_max and quantity != 0:
            # skip it as it does not represent a class increment
            continue
        last_max = numpy.nextafter(quantity, sys.float_info.max)
        new_styles.append(style_class)
    return new_styles
Example #43
0
    def _step_impl(self):
        t = self.t
        y = self.y
        f = self.f

        max_step = self.max_step
        atol = self.atol
        rtol = self.rtol

        min_step = 10 * np.abs(np.nextafter(t, self.direction * np.inf) - t)
        if self.h_abs > max_step:
            h_abs = max_step
            h_abs_old = None
            error_norm_old = None
        elif self.h_abs < min_step:
            h_abs = min_step
            h_abs_old = None
            error_norm_old = None
        else:
            h_abs = self.h_abs
            h_abs_old = self.h_abs_old
            error_norm_old = self.error_norm_old

        J = self.J
        LU_real = self.LU_real
        LU_complex = self.LU_complex

        current_jac = self.current_jac
        jac = self.jac

        rejected = False
        step_accepted = False
        message = None
        while not step_accepted:
            if h_abs < min_step:
                return False, self.TOO_SMALL_STEP

            h = h_abs * self.direction
            t_new = t + h

            if self.direction * (t_new - self.t_bound) > 0:
                t_new = self.t_bound

            h = t_new - t
            h_abs = np.abs(h)

            if self.sol is None:
                Z0 = np.zeros((3, y.shape[0]))
            else:
                Z0 = self.sol(t + h * C).T - y

            scale = atol + np.abs(y) * rtol

            converged = False
            while not converged:
                if LU_real is None or LU_complex is None:
                    LU_real = self.lu(MU_REAL / h * self.I - J)
                    LU_complex = self.lu(MU_COMPLEX / h * self.I - J)

                converged, n_iter, Z, rate = solve_collocation_system(
                    self.fun, t, y, h, Z0, scale, self.newton_tol, LU_real,
                    LU_complex, self.solve_lu)

                if not converged:
                    if current_jac:
                        break

                    J = self.jac(t, y, f)
                    current_jac = True
                    LU_real = None
                    LU_complex = None

            if not converged:
                h_abs *= 0.5
                LU_real = None
                LU_complex = None
                continue

            y_new = y + Z[-1]
            ZE = Z.T.dot(E) / h
            error = self.solve_lu(LU_real, f + ZE)
            scale = atol + np.maximum(np.abs(y), np.abs(y_new)) * rtol
            error_norm = norm(error / scale)
            safety = 0.9 * (2 * NEWTON_MAXITER + 1) / (2 * NEWTON_MAXITER +
                                                       n_iter)

            if rejected and error_norm > 1:
                error = self.solve_lu(LU_real, self.fun(t, y + error) + ZE)
                error_norm = norm(error / scale)

            if error_norm > 1:
                factor = predict_factor(h_abs, h_abs_old, error_norm,
                                        error_norm_old)
                h_abs *= max(MIN_FACTOR, safety * factor)

                LU_real = None
                LU_complex = None
                rejected = True
            else:
                step_accepted = True

        recompute_jac = jac is not None and n_iter > 2 and rate > 1e-3

        factor = predict_factor(h_abs, h_abs_old, error_norm, error_norm_old)
        factor = min(MAX_FACTOR, safety * factor)

        if not recompute_jac and factor < 1.2:
            factor = 1
        else:
            LU_real = None
            LU_complex = None

        f_new = self.fun(t_new, y_new)
        if recompute_jac:
            J = jac(t_new, y_new, f_new)
            current_jac = True
        elif jac is not None:
            current_jac = False

        self.h_abs_old = self.h_abs
        self.error_norm_old = error_norm

        self.h_abs = h_abs * factor

        self.y_old = y

        self.t = t_new
        self.y = y_new
        self.f = f_new

        self.Z = Z

        self.LU_real = LU_real
        self.LU_complex = LU_complex
        self.current_jac = current_jac
        self.J = J

        self.t_old = t
        self.sol = self._compute_dense_output()

        return step_accepted, message
Example #44
0
def sinc_pattern(x):
	x[x==0] = np.nextafter(0,1)
	return np.sin(x*np.pi)/(np.pi*x)
Example #45
0
def wavelet_transform(X, rate, filters='rat', hg_only=True, X_fft_h=None, npad=1000):
    """Apply a wavelet transform using a prespecified set of filters.

    Calculates the center frequencies and bandwidths for the wavelets and applies them along with
    a heavyside function to the fft of the signal before performing an inverse fft.

    Parameters
    ----------
    X : ndarray (n_time, n_channels)
        Input data, dimensions
    rate : float
        Number of samples per second.
    filters : str (optional)
        Which type of filters to use. Options are
        'rat': center frequencies spanning 2-1200 Hz, constant Q, 54 bands
        'human': center frequencies spanning 4-200 Hz, constant Q, 40 bands
        'changlab': center frequencies spanning 4-200 Hz, variable Q, 40 bands
        Note - calculating center frequencies above rate/2 raises a ValueError
    hg_only : bool
        If True, only the amplitudes in the high gamma range [70-150 Hz] is computed.
    X_fft_h : ndarray (n_time, n_channels)
        Precomputed product of X_fft and heavyside. Useful for when bands are computed
        independently.
    npad : int
        Length of padding in samples. Default 1000.
    npad : int
        Padding to add to beginning and end of timeseries. Default 1000.

    Returns
    -------
    Xh : ndarray, complex
        Bandpassed analytic signal
    X_fft_h : ndarray, complex
        Product of X_fft and heavyside.
    cfs : ndarray
        Center frequencies used.
    sds : ndarray
        Bandwidths used.
    """
    if X_fft_h is None:
        npads, to_removes, _ = _npads(X, npad)
        X = _smart_pad(X, npads)
        n_time = X.shape[0]
    else:
        n_time = X_fft_h.shape[0]
    freq = fftfreq(n_time, 1. / rate)

    # Calculate center frequencies
    if filters in ['human', 'changlab']:
        cfs = log_spaced_cfs(4.0749286538265, 200, 40)
    elif filters == 'rat':
        cfs = log_spaced_cfs(2.6308, 1200., 54)
    else:
        raise NotImplementedError

    # Subselect high gamma bands
    if hg_only:
        idxs = np.logical_and(cfs >= 70., cfs <= 150.)
        cfs = cfs[idxs]

    # Raise exception if sample rate too small
    if cfs.max() * 2. > np.nextafter(rate, np.inf):  # Allow floating point tolerance
        string = ('Unable to compute wavelet transform above Nyquist rate ({} Hz).' +
                  ' Increase your rate ({} Hz) to at least twice your desired maximum' +
                  'frequency of interest.')
        raise ValueError(string.format(cfs.max() * 2., np.nextafter(rate, np.inf)))

    # Calculate bandwidths
    if filters in ['rat', 'human']:
        sds = const_Q_sds(cfs)
    elif filters == 'changlab':
        sds = chang_sds(cfs)
    else:
        raise NotImplementedError

    filters = []
    for cf, sd in zip(cfs, sds):
        filters.append(gaussian(n_time, rate, cf, sd))

    Xh = np.zeros(X.shape + (len(filters),), dtype=np.complex)
    if X_fft_h is None:
        # Heavyside filter with 0 DC
        h = np.zeros(len(freq))
        h[freq > 0] = 2.
        h = h[:, np.newaxis]
        X_fft_h = fft(X, axis=0) * h

    for ii, f in enumerate(filters):
        if f is None:
            Xh[..., ii] = ifft(X_fft_h, axis=0)
        else:
            f = f / np.linalg.norm(f)
            Xh[..., ii] = ifft(X_fft_h * f[:, np.newaxis], axis=0)

    Xh = _trim(Xh, to_removes)

    return Xh, X_fft_h, cfs, sds
import bayesmark.constants as cc
import bayesmark.expected_max as em
import bayesmark.quantiles as qt
from bayesmark.cmd_parse import CmdArgs, general_parser, parse_args
from bayesmark.constants import ARG_DELIM, ITER, METHOD, PERF_BEST, PERF_CLIP, PERF_MEAN, PERF_MED, SUGGEST, TEST_CASE
from bayesmark.experiment_aggregate import validate_agg_perf
from bayesmark.serialize import XRSerializer
from bayesmark.util import str_join_safe
from bayesmark.xr_util import ds_concat, ds_like_mixed

# Mathematical settings
# We could move these to constants to eliminate repetition but we will probably phase out anyway
EVAL_Q = 0.5  # Evaluate based on median loss across n_trials
ALPHA = 0.05  # ==> 95% CIs
MIN_POS = np.nextafter(0, 1)
PAD_FACTOR = 10000

logger = logging.getLogger(__name__)


def validate(baseline_ds):
    """Perform same tracks as will happen in analysis."""
    for func_name in baseline_ds.coords[TEST_CASE].values:
        rand_perf_med = baseline_ds[PERF_MED].sel({TEST_CASE: func_name}, drop=True).values
        rand_perf_mean = baseline_ds[PERF_MEAN].sel({TEST_CASE: func_name}, drop=True).values
        best_opt = baseline_ds[PERF_BEST].sel({TEST_CASE: func_name}, drop=True).values
        base_clip_val = baseline_ds[PERF_CLIP].sel({TEST_CASE: func_name}, drop=True).values

        assert np.all(np.diff(rand_perf_med) <= 0), "Baseline should be decreasing with iteration"
        assert np.all(np.diff(rand_perf_mean) <= 0), "Baseline should be decreasing with iteration"
    def _define_probability_bins(self, n_probability_bins,
                                 single_value_lower_limit,
                                 single_value_upper_limit):
        """
        Define equally sized probability bins for use in a reliability table.
        The range 0 to 1 is divided into ranges to give n_probability bins.
        If single_value_lower_limit and / or single_value_upper_limit are True,
        additional bins corresponding to values of 0 and / or 1 will be created,
        each with a width defined by self.single_value_tolerance.

        Args:
            n_probability_bins (int):
                The total number of probability bins desired in the
                reliability tables. This number includes the extrema bins
                (equals 0 and equals 1) if single value limits are turned on,
                in which case the minimum number of bins is 3.
            single_value_lower_limit (bool):
                Mandates that the lowest bin should be single valued,
                with a small precision tolerance, defined as 1.0E-6.
                The bin is thus 0 to 1.0E-6.
            single_value_upper_limit (bool):
                Mandates that the highest bin should be single valued,
                with a small precision tolerance, defined as 1.0E-6.
                The bin is thus (1 - 1.0E-6) to 1.
        Returns:
            numpy.ndarray:
                An array of 2-element arrays that contain the bounds of the
                probability bins. These bounds are non-overlapping, with
                adjacent bin boundaries spaced at the smallest representable
                interval.
        Raises:
            ValueError: If trying to use both single_value_lower_limit and
                        single_value_upper_limit with 2 or fewer probability bins.
        """
        if single_value_lower_limit and single_value_upper_limit:
            if n_probability_bins <= 2:
                msg = ("Cannot use both single_value_lower_limit and "
                       "single_value_upper_limit with 2 or fewer "
                       "probability bins.")
                raise ValueError(msg)
            n_probability_bins = n_probability_bins - 2
        elif single_value_lower_limit or single_value_upper_limit:
            n_probability_bins = n_probability_bins - 1

        bin_lower = np.linspace(0, 1, n_probability_bins + 1, dtype=np.float32)
        bin_upper = np.nextafter(bin_lower, 0, dtype=np.float32)
        bin_upper[-1] = 1.0
        bins = np.stack([bin_lower[:-1], bin_upper[1:]], 1).astype(np.float32)

        if single_value_lower_limit:
            bins[0, 0] = np.nextafter(self.single_value_tolerance,
                                      1,
                                      dtype=np.float32)
            lowest_bin = np.array([0, self.single_value_tolerance],
                                  dtype=np.float32)
            bins = np.vstack([lowest_bin, bins]).astype(np.float32)

        if single_value_upper_limit:
            bins[-1, 1] = np.nextafter(1.0 - self.single_value_tolerance,
                                       0,
                                       dtype=np.float32)
            highest_bin = np.array([1.0 - self.single_value_tolerance, 1],
                                   dtype=np.float32)
            bins = np.vstack([bins, highest_bin]).astype(np.float32)

        return bins
Example #48
0
from aesara.compile.builders import OpFromGraph
from aesara.graph.basic import Apply
from aesara.graph.op import Op
from aesara.scalar import UnaryScalarOp, upgrade_to_float_no_complex
from aesara.scan import until
from aesara.tensor.elemwise import Elemwise
from aesara.tensor.slinalg import Cholesky, Solve

from pymc3.aesaraf import floatX
from pymc3.distributions.shape_utils import to_tuple
from pymc3.distributions.special import gammaln

f = floatX
c = -0.5 * np.log(2.0 * np.pi)
_beta_clip_values = {
    dtype: (np.nextafter(0, 1, dtype=dtype), np.nextafter(1, 0, dtype=dtype))
    for dtype in ["float16", "float32", "float64"]
}


def bound(logp, *conditions, **kwargs):
    """
    Bounds a log probability density with several conditions.
    When conditions are not met, the logp values are replaced by -inf.

    Note that bound should not be used to enforce the logic of the logp under the normal
    support as it can be disabled by the user via check_bounds = False in pm.Model()

    Parameters
    ----------
    logp: float
Example #49
0
    def _step_impl(self):
        t = self.t
        D = self.D

        max_step = self.max_step
        min_step = 10 * np.abs(np.nextafter(t, self.direction * np.inf) - t)
        if self.h_abs > max_step:
            h_abs = max_step
            change_D(D, self.order, max_step / self.h_abs)
            self.n_equal_steps = 0
        elif self.h_abs < min_step:
            h_abs = min_step
            change_D(D, self.order, min_step / self.h_abs)
            self.n_equal_steps = 0
        else:
            h_abs = self.h_abs

        atol = self.atol
        rtol = self.rtol
        order = self.order

        alpha = self.alpha
        gamma = self.gamma
        error_const = self.error_const

        J = self.J
        LU = self.LU
        current_jac = self.jac is None

        step_accepted = False
        while not step_accepted:
            if h_abs < min_step:
                return False, self.TOO_SMALL_STEP

            h = h_abs * self.direction
            t_new = t + h

            if self.direction * (t_new - self.t_bound) > 0:
                t_new = self.t_bound
                change_D(D, order, np.abs(t_new - t) / h_abs)
                self.n_equal_steps = 0
                LU = None

            h = t_new - t
            h_abs = np.abs(h)

            y_predict = np.sum(D[:order + 1], axis=0)

            scale = atol + rtol * np.abs(y_predict)
            psi = np.dot(D[1:order + 1].T, gamma[1:order + 1]) / alpha[order]

            converged = False
            c = h / alpha[order]
            while not converged:
                if LU is None:
                    LU = self.lu(self.I - c * J)

                converged, n_iter, y_new, d = solve_bdf_system(
                    self.fun, t_new, y_predict, c, psi, LU, self.solve_lu,
                    scale, self.newton_tol)

                if not converged:
                    if current_jac:
                        break
                    J = self.jac(t_new, y_predict)
                    LU = None
                    current_jac = True

            if not converged:
                factor = 0.5
                h_abs *= factor
                change_D(D, order, factor)
                self.n_equal_steps = 0
                LU = None
                continue

            safety = 0.9 * (2 * NEWTON_MAXITER + 1) / (2 * NEWTON_MAXITER +
                                                       n_iter)

            scale = atol + rtol * np.abs(y_new)
            error = error_const[order] * d
            error_norm = norm(error / scale)

            if error_norm > 1:
                factor = max(MIN_FACTOR,
                             safety * error_norm**(-1 / (order + 1)))
                h_abs *= factor
                change_D(D, order, factor)
                self.n_equal_steps = 0
                # As we didn't have problems with convergence, we don't
                # reset LU here.
            else:
                step_accepted = True

        self.n_equal_steps += 1

        self.t = t_new
        self.y = y_new

        self.h_abs = h_abs
        self.J = J
        self.LU = LU

        # Update differences. The principal relation here is
        # D^{j + 1} y_n = D^{j} y_n - D^{j} y_{n - 1}. Keep in mind that D
        # contained difference for previous interpolating polynomial and
        # d = D^{k + 1} y_n. Thus this elegant code follows.
        D[order + 2] = d - D[order + 1]
        D[order + 1] = d
        for i in reversed(range(order + 1)):
            D[i] += D[i + 1]

        if self.n_equal_steps < order + 1:
            return True, None

        if order > 1:
            error_m = error_const[order - 1] * D[order]
            error_m_norm = norm(error_m / scale)
        else:
            error_m_norm = np.inf

        if order < MAX_ORDER:
            error_p = error_const[order + 1] * D[order + 2]
            error_p_norm = norm(error_p / scale)
        else:
            error_p_norm = np.inf

        error_norms = np.array([error_m_norm, error_norm, error_p_norm])
        factors = error_norms**(-1 / np.arange(order, order + 3))

        delta_order = np.argmax(factors) - 1
        order += delta_order
        self.order = order

        factor = min(MAX_FACTOR, safety * np.max(factors))
        self.h_abs *= factor
        change_D(D, order, factor)
        self.n_equal_steps = 0
        self.LU = None

        return True, None
Example #50
0
    def test_half_fpe(self):
        with np.errstate(all='raise'):
            sx16 = np.array((1e-4,), dtype=float16)
            bx16 = np.array((1e4,), dtype=float16)
            sy16 = float16(1e-4)
            by16 = float16(1e4)

            # Underflow errors
            assert_raises_fpe('underflow', lambda a, b:a*b, sx16, sx16)
            assert_raises_fpe('underflow', lambda a, b:a*b, sx16, sy16)
            assert_raises_fpe('underflow', lambda a, b:a*b, sy16, sx16)
            assert_raises_fpe('underflow', lambda a, b:a*b, sy16, sy16)
            assert_raises_fpe('underflow', lambda a, b:a/b, sx16, bx16)
            assert_raises_fpe('underflow', lambda a, b:a/b, sx16, by16)
            assert_raises_fpe('underflow', lambda a, b:a/b, sy16, bx16)
            assert_raises_fpe('underflow', lambda a, b:a/b, sy16, by16)
            assert_raises_fpe('underflow', lambda a, b:a/b,
                                             float16(2.**-14), float16(2**11))
            assert_raises_fpe('underflow', lambda a, b:a/b,
                                             float16(-2.**-14), float16(2**11))
            assert_raises_fpe('underflow', lambda a, b:a/b,
                                             float16(2.**-14+2**-24), float16(2))
            assert_raises_fpe('underflow', lambda a, b:a/b,
                                             float16(-2.**-14-2**-24), float16(2))
            assert_raises_fpe('underflow', lambda a, b:a/b,
                                             float16(2.**-14+2**-23), float16(4))

            # Overflow errors
            assert_raises_fpe('overflow', lambda a, b:a*b, bx16, bx16)
            assert_raises_fpe('overflow', lambda a, b:a*b, bx16, by16)
            assert_raises_fpe('overflow', lambda a, b:a*b, by16, bx16)
            assert_raises_fpe('overflow', lambda a, b:a*b, by16, by16)
            assert_raises_fpe('overflow', lambda a, b:a/b, bx16, sx16)
            assert_raises_fpe('overflow', lambda a, b:a/b, bx16, sy16)
            assert_raises_fpe('overflow', lambda a, b:a/b, by16, sx16)
            assert_raises_fpe('overflow', lambda a, b:a/b, by16, sy16)
            assert_raises_fpe('overflow', lambda a, b:a+b,
                                             float16(65504), float16(17))
            assert_raises_fpe('overflow', lambda a, b:a-b,
                                             float16(-65504), float16(17))
            assert_raises_fpe('overflow', np.nextafter, float16(65504), float16(np.inf))
            assert_raises_fpe('overflow', np.nextafter, float16(-65504), float16(-np.inf))
            assert_raises_fpe('overflow', np.spacing, float16(65504))

            # Invalid value errors
            assert_raises_fpe('invalid', np.divide, float16(np.inf), float16(np.inf))
            assert_raises_fpe('invalid', np.spacing, float16(np.inf))
            assert_raises_fpe('invalid', np.spacing, float16(np.nan))
            assert_raises_fpe('invalid', np.nextafter, float16(np.inf), float16(0))
            assert_raises_fpe('invalid', np.nextafter, float16(-np.inf), float16(0))
            assert_raises_fpe('invalid', np.nextafter, float16(0), float16(np.nan))

            # These should not raise
            float16(65472)+float16(32)
            float16(2**-13)/float16(2)
            float16(2**-14)/float16(2**10)
            np.spacing(float16(-65504))
            np.nextafter(float16(65504), float16(-np.inf))
            np.nextafter(float16(-65504), float16(np.inf))
            float16(2**-14)/float16(2**10)
            float16(-2**-14)/float16(2**10)
            float16(2**-14+2**-23)/float16(2)
            float16(-2**-14-2**-23)/float16(2)
Example #51
0
def main(dirpath, skip_old=False, num_specs=1):
    uc = ursgal.UController()
    uc.params.update({
        'bigger_scores_better': False,
        'num_compared_psms': 10,
        'accept_conflicting_psms': False,
        'threshold_is_log10': True,
        'score_diff_threshold': 1,
        'psm_defining_colnames': [
            'Spectrum Title',
            'Sequence',
        ],
    })
    pkl_name = os.path.join(dirpath, 'datasets_result.pkl')
    fdr_pkl_name = os.path.join(dirpath, 'fdr_result.pkl')
    old_exists = False
    if os.path.exists(pkl_name) and skip_old is True:
        #load results from previous analysis
        #will only add datasets that are not part of it already
        print('>>>>>>>> loading pkl <<<<<<<<<<<')
        results_dict = pickle.load(open(pkl_name, 'rb'))
        fdr_dict = pickle.load(open(fdr_pkl_name, 'rb'))
        old_exists = True
    else:
        #collect proteins and peptides from result csv,
        #store in dict with all important data
        results_dict = {
            'all': {
                'num_spectra': 0,
                'instrument': set(),
                'lab': set(),
                #protein_groups, proteins and peptides are dicts that contain sets for each level of confidence
                'protein_groups': {
                    'all': set(),
                    'safe_psm': set(),
                    'safe_seq': set(),
                    'safe_seq_num_spec': set(),
                    'safe_seq_num_spec_0005': set()
                },
                'proteins': {
                    'all': set(),
                    'safe_psm': set(),
                    'safe_seq': set(),
                    'safe_seq_num_spec': set(),
                    'safe_seq_num_spec_0005': set()
                },
                'peptides': {
                    'all': set(),
                    'safe': set(),
                    'safe_num_specs': set()
                },
                'spectra': {
                    'all': set()
                },
                #protein_dict in contrast is a nested dict with protein/protein_group --> peptide sequence --> spectral information
                #(containing lists of 'spec_title', 'bayes_pep', modifications', 'charge', 'psm_q_value', 'start_stop')
                'protein_dict': {},
                'original_results': {
                    'peptides': {
                        'all': set(),
                        'safe': set()
                    },
                    'spectra': {
                        'all': set()
                    },
                },
                '3engines_results': {
                    'peptides': {
                        'all': set(),
                        'safe': set()
                    },
                    'spectra': {
                        'all': set()
                    },
                },
                'combined_results': {
                    'peptides': {
                        'all': set(),
                        'safe': set()
                    },
                    'spectra': {
                        'all': set()
                    },
                }
            }
        }
        fdr_dict = {
            'peptides_seq_level': {},
            'peptides_psm_level': {},
            'peptides_seq_level_2specs': {},
            'proteins_seq_level': {},
            'proteins_psm_level': {},
            'proteins_seq_level_2specs': {},
        }

    result_file_list = []
    org_peptide_dict = {}
    for PRIDE_ID in datasets.keys():
        if skip_old is True and old_exists is True and PRIDE_ID in results_dict:
            continue
        print('reading:', PRIDE_ID)
        instrument = datasets[PRIDE_ID]['instrument']
        results_dict['all']['instrument'].add(instrument)
        lab = datasets[PRIDE_ID]['lab']
        results_dict['all']['lab'].add(lab)
        results_dict['all']['num_spectra'] += datasets[PRIDE_ID]['num_spectra']
        if PRIDE_ID not in results_dict.keys():
            results_dict[PRIDE_ID] = {
                'num_spectra': datasets[PRIDE_ID]['num_spectra'],
                'instrument': instrument,
                'lab': lab,
                'protein_groups': {
                    'all': set(),
                    'safe_psm': set(),
                    'safe_seq': set(),
                    'safe_seq_num_spec': set(),
                    'safe_seq_num_spec_0005': set()
                },
                'proteins': {
                    'all': set(),
                    'safe_psm': set(),
                    'safe_seq': set(),
                    'safe_seq_num_spec': set(),
                    'safe_seq_num_spec_0005': set()
                },
                'peptides': {
                    'all': set(),
                    'safe': set(),
                    'safe_num_specs': set()
                },
                'spectra': {
                    'all': set()
                },
                'protein_dict': {},
                'original_results': {
                    'peptides': {
                        'all': set(),
                        'safe': set()
                    },
                    'spectra': {
                        'all': set()
                    },
                },
                '3engines_results': {
                    'peptides': {
                        'all': set(),
                        'safe': set()
                    },
                    'spectra': {
                        'all': set()
                    },
                },
                'combined_results': {
                    'peptides': {
                        'all': set(),
                        'safe': set()
                    },
                    'spectra': {
                        'all': set()
                    },
                },
            }

        results2be_merged = []
        if type(datasets[PRIDE_ID]['result_file']) == list:
            print('list', PRIDE_ID)
            for result_file in datasets[PRIDE_ID]['result_file']:
                results2be_merged.append(os.path.join(PRIDE_ID, result_file))
        elif datasets[PRIDE_ID]['result_file'] is not None:
            print('not_list', PRIDE_ID)
            results2be_merged.append(
                os.path.join(PRIDE_ID, datasets[PRIDE_ID]['result_file']))
        else:
            print('Could not find result file(s) for dataset:', PRIDE_ID)
            sys.exit(1)

        # merge if multiple files
        merged_file = uc.execute_misc_engine(
            input_file=results2be_merged,
            engine='merge_csvs',
            merge_duplicates=False,
        )

        #collect proteins, peptides and corresponding spectrum_titles
        result_file_list.append(merged_file)
        protein_ids = set()
        protein_groups = set()
        with open(merged_file, 'r') as in_file:
            result_csv = csv.DictReader(in_file)
            for line_dict in result_csv:
                seq = line_dict['Sequence']  #+ line_dict['Modifications']
                mod = line_dict['Modifications']
                charge = line_dict['Charge']
                seq_mod = '{0}#{1}'.format(seq, mod)
                seq_length = len(seq)
                spec_title = line_dict['Spectrum Title']
                sample = spec_title.split('.')[0]
                is_decoy = line_dict['Is decoy']
                prot = line_dict['Protein ID']
                start = line_dict['Sequence Start']
                stop = line_dict['Sequence Stop']
                psm_q_value = float(line_dict['combined PEP'])
                bayes_pep = float(line_dict['Bayes PEP'])
                if psm_q_value <= 0.01:
                    if seq_length not in fdr_dict['peptides_psm_level'].keys():
                        fdr_dict['peptides_psm_level'][seq_length] = {}
                    if seq not in fdr_dict['peptides_psm_level'][
                            seq_length].keys():
                        fdr_dict['peptides_psm_level'][seq_length][seq] = (
                            psm_q_value, is_decoy)
                    elif psm_q_value < fdr_dict['peptides_psm_level'][
                            seq_length][seq][0]:
                        fdr_dict['peptides_psm_level'][seq_length][seq] = (
                            psm_q_value, is_decoy)
                else:
                    print(
                        'Results should be filtered by combined PEP <= 1% (but should contain targets and decoys)'
                    )
                    sys.exit(1)

                # differentiate between protein groups and proteins
                # and remove contaminants
                if len(prot.split('<|>')) > 1:
                    contaminants = True
                    for p in prot.split('<|>'):
                        prot_id = p.split(' ')[0]
                        if 'HVO' not in prot_id:
                            continue
                        else:
                            contaminants = False
                    if contaminants is False and is_decoy == 'false':
                        results_dict[PRIDE_ID]['protein_groups']['all'].add(
                            line_dict['Protein ID'])
                        results_dict[PRIDE_ID]['peptides']['all'].add(seq)
                        results_dict[PRIDE_ID]['spectra']['all'].add(
                            spec_title)
                else:
                    contaminants = False
                    prot_id = prot.split(' ')[0]
                    if 'HVO' not in prot_id:
                        contaminants = True
                    if contaminants is False and is_decoy == 'false':
                        results_dict[PRIDE_ID]['proteins']['all'].add(
                            line_dict['Protein ID'])
                        results_dict[PRIDE_ID]['peptides']['all'].add(seq)
                        results_dict[PRIDE_ID]['spectra']['all'].add(
                            spec_title)

                #add info to protein_dict
                if prot not in results_dict[PRIDE_ID]['protein_dict'].keys():
                    results_dict[PRIDE_ID]['protein_dict'][prot] = {}
                if seq not in results_dict[PRIDE_ID]['protein_dict'][
                        prot].keys():
                    results_dict[PRIDE_ID]['protein_dict'][prot][seq] = {
                        'spec_title': [],
                        'bayes_pep': [],
                        'modifications': [],
                        'charge': [],
                        'psm_q_value': [],
                        'start_stop': (start, stop),
                    }
                results_dict[PRIDE_ID]['protein_dict'][prot][seq][
                    'spec_title'].append(spec_title)
                results_dict[PRIDE_ID]['protein_dict'][prot][seq][
                    'bayes_pep'].append(bayes_pep)
                results_dict[PRIDE_ID]['protein_dict'][prot][seq][
                    'psm_q_value'].append(psm_q_value)
                results_dict[PRIDE_ID]['protein_dict'][prot][seq][
                    'modifications'].append(mod)
                results_dict[PRIDE_ID]['protein_dict'][prot][seq][
                    'charge'].append(charge)

        #read results from original (and intermediate) result files
        for file_type in ['original_file', '3engines_file', 'combined_file']:
            results_type = '{0}_results'.format(file_type.split('_')[0])
            if datasets[PRIDE_ID][file_type] is not None:
                filepath_org = os.path.join(
                    dirpath,
                    PRIDE_ID,
                    datasets[PRIDE_ID][file_type],
                )
                with open(filepath_org, 'r') as in_file:
                    result_csv = csv.DictReader(in_file)
                    for line_dict in result_csv:
                        seq = line_dict['Sequence']
                        spec_title = line_dict['Spectrum Title']
                        results_dict[PRIDE_ID][results_type]['peptides'][
                            'all'].add(seq)
                        results_dict[PRIDE_ID][results_type]['spectra'][
                            'all'].add(spec_title)
                        if seq not in org_peptide_dict.keys():
                            org_peptide_dict[seq] = set()
                        org_peptide_dict[seq].add(spec_title)

        # merge identifications from each dataset into "all"
        for level in ['protein_groups', 'proteins', 'peptides', 'spectra']:
            results_dict['all'][level]['all'] |= results_dict[PRIDE_ID][level][
                'all']
        for results_type in [
                'original_results', '3engines_results', 'combined_results'
        ]:
            for level in ['peptides', 'spectra']:
                results_dict['all'][results_type][level][
                    'all'] |= results_dict[PRIDE_ID][results_type][level][
                        'all']
        for prot in results_dict[PRIDE_ID]['protein_dict'].keys():
            if prot not in results_dict['all']['protein_dict'].keys():
                results_dict['all']['protein_dict'][prot] = {'datasets': set()}
            results_dict['all']['protein_dict'][prot]['datasets'].add(PRIDE_ID)
            for seq in results_dict[PRIDE_ID]['protein_dict'][prot].keys():
                start_stop = results_dict[PRIDE_ID]['protein_dict'][prot][seq][
                    'start_stop']
                if seq not in results_dict['all']['protein_dict'][prot].keys():
                    results_dict['all']['protein_dict'][prot][seq] = {
                        'spec_title': [],
                        'bayes_pep': [],
                        'modifications': [],
                        'charge': [],
                        'psm_q_value': [],
                        'start_stop': start_stop,
                    }
                for k, v in results_dict[PRIDE_ID]['protein_dict'][prot][
                        seq].items():
                    if k == 'start_stop':
                        continue
                    results_dict['all']['protein_dict'][prot][seq][k].extend(v)

    # Calculate q-values
    # peptides first, then proteins
    for PRIDE_ID in results_dict.keys():
        # generate input dict for q_value calculation function
        seq_q_value_dict = {}
        for prot in results_dict[PRIDE_ID]['protein_dict'].keys():
            for seq in results_dict[PRIDE_ID]['protein_dict'][prot].keys():
                if seq == 'datasets':
                    continue
                seq_length = len(seq)
                if seq_length not in seq_q_value_dict.keys():
                    seq_q_value_dict[seq_length] = {}
                min_bayes_pep = min(results_dict[PRIDE_ID]['protein_dict']
                                    [prot][seq]['bayes_pep'])
                if 'decoy_' in prot:
                    is_decoy = True
                else:
                    is_decoy = False
                seq_q_value_dict[seq_length][seq] = {
                    'Bayes PEP': min_bayes_pep,
                    'Is decoy': is_decoy,
                }

        print('calculating q-values on peptide level')
        seq_calc_q_value_dict = calculate_q_value_by_group(seq_q_value_dict,
                                                           sliding=False)

        # read results from peptide q_value calc, at the same time
        # generate input dict for proteins for q_value calculation function
        prot_q_value_dict = {'seq_level': {}, 'psm_level': {}}
        for prot in results_dict[PRIDE_ID]['protein_dict'].keys():
            contaminants = False
            prot_id = prot.split(' ')[0]
            if 'HVO' not in prot_id:
                contaminants = True
            if 'decoy_' in prot:
                is_decoy = True
            else:
                is_decoy = False
            for seq in results_dict[PRIDE_ID]['protein_dict'][prot].keys():
                if seq == 'datasets':
                    continue
                seq_length = len(seq)
                seq_q_value = seq_calc_q_value_dict[seq_length][seq][
                    'combined PEP']
                results_dict[PRIDE_ID]['protein_dict'][prot][seq][
                    'seq_q_value'] = seq_q_value

                if seq_q_value <= SEQ_Q_VALUE_THRESHOLD:
                    if PRIDE_ID == 'all':
                        if seq_length not in fdr_dict[
                                'peptides_seq_level'].keys():
                            fdr_dict['peptides_seq_level'][seq_length] = {}
                        fdr_dict['peptides_seq_level'][seq_length][seq] = (
                            seq_q_value, is_decoy)
                    counts = len(
                        set(results_dict[PRIDE_ID]['protein_dict'][prot][seq]
                            ['spec_title']))
                    if is_decoy is False and contaminants is False:
                        results_dict[PRIDE_ID]['peptides']['safe'].add(seq)
                        if counts >= num_specs:
                            results_dict[PRIDE_ID]['peptides'][
                                'safe_num_specs'].add(seq)
                            if PRIDE_ID == 'all':
                                if seq_length not in fdr_dict[
                                        'peptides_seq_level_2specs'].keys():
                                    fdr_dict['peptides_seq_level_2specs'][
                                        seq_length] = {}
                                fdr_dict['peptides_seq_level_2specs'][
                                    seq_length][seq] = (seq_q_value, is_decoy)
                    min_bayes_pep = min(results_dict[PRIDE_ID]['protein_dict']
                                        [prot][seq]['bayes_pep'])
                    if min_bayes_pep == 0.0:
                        min_bayes_pep = np.nextafter(0, 1)
                    log_seq_bayes = math.log10(min_bayes_pep)
                    if prot not in prot_q_value_dict['seq_level'].keys():
                        prot_q_value_dict['seq_level'][prot] = {
                            'Bayes PEP': log_seq_bayes,
                            'Is decoy': is_decoy,
                        }
                    else:
                        prot_q_value_dict['seq_level'][prot][
                            'Bayes PEP'] += log_seq_bayes

                for bayes_pep in results_dict[PRIDE_ID]['protein_dict'][prot][
                        seq]['bayes_pep']:
                    if bayes_pep == 0.0:
                        bayes_pep = np.nextafter(0, 1)
                    log_psm_bayes = math.log10(bayes_pep)
                    if prot not in prot_q_value_dict['psm_level'].keys():
                        prot_q_value_dict['psm_level'][prot] = {
                            'Bayes PEP': log_seq_bayes,
                            'Is decoy': is_decoy,
                        }
                    else:
                        prot_q_value_dict['psm_level'][prot][
                            'Bayes PEP'] += log_seq_bayes

        print('calculating q-values on protein level')
        prot_calc_q_value_dict = calculate_q_value_by_group(prot_q_value_dict,
                                                            sliding=False,
                                                            picked_fdr=True)

        # read results from protein q_value calc
        for prot in results_dict[PRIDE_ID]['protein_dict'].keys():
            contaminants = False
            prot_id = prot.split(' ')[0]
            if 'HVO' not in prot_id:
                contaminants = True
            if 'decoy_' in prot:
                is_decoy = True
            else:
                is_decoy = False
            for level in ['psm_level', 'seq_level']:
                if prot in prot_calc_q_value_dict[level].keys():
                    prot_q_value = prot_calc_q_value_dict[level][prot][
                        'combined PEP']
                    prot_bayes_pep = prot_calc_q_value_dict[level][prot][
                        'Bayes PEP']
                else:
                    prot_q_value = 1
                    prot_bayes_pep = 1
                # count number of spectra for each prot (for seq FDR > 1%)
                # collect samples for simple protein inference model
                counts = 0
                samples = set()
                for seq in results_dict[PRIDE_ID]['protein_dict'][prot].keys():
                    if seq in [
                            'datasets', 'prot_q_value_seq', 'prot_q_value_psm',
                            'samples'
                    ]:
                        continue
                    if results_dict[PRIDE_ID]['protein_dict'][prot][seq][
                            'seq_q_value'] > 0.01:
                        continue
                    psm_set = set(results_dict[PRIDE_ID]['protein_dict'][prot]
                                  [seq]['spec_title'])
                    counts += len(psm_set)
                    for psm in psm_set:
                        ms_filename = '.'.join(psm.split('.')[:-3])
                        samples.add(
                            ms_filename2sample.get(ms_filename, ms_filename))

                if PRIDE_ID == 'all':
                    if level == 'seq_level':
                        fdr_dict['proteins_seq_level'][prot] = (prot_bayes_pep,
                                                                is_decoy)
                        if counts >= num_specs:
                            fdr_dict['proteins_seq_level_2specs'][prot] = (
                                prot_bayes_pep, is_decoy)
                    else:
                        fdr_dict['proteins_psm_level'][prot] = (prot_bayes_pep,
                                                                is_decoy)
                if prot_q_value <= 0.01 and is_decoy is False and contaminants is False:
                    if level == 'seq_level':
                        if len(prot.split('<|>')) > 1:
                            results_dict[PRIDE_ID]['protein_groups'][
                                'safe_seq'].add(prot)
                            if counts >= num_specs:
                                results_dict[PRIDE_ID]['protein_groups'][
                                    'safe_seq_num_spec'].add(prot)
                                if prot_q_value <= PROT_Q_VALUE_THRESHOLD:
                                    results_dict[PRIDE_ID]['protein_groups'][
                                        'safe_seq_num_spec_0005'].add(prot)
                        else:
                            results_dict[PRIDE_ID]['proteins']['safe_seq'].add(
                                prot)
                            if counts >= num_specs:
                                results_dict[PRIDE_ID]['proteins'][
                                    'safe_seq_num_spec'].add(prot)
                                if prot_q_value <= PROT_Q_VALUE_THRESHOLD:
                                    results_dict[PRIDE_ID]['proteins'][
                                        'safe_seq_num_spec_0005'].add(prot)
                    elif counts >= num_specs:
                        if len(prot.split('<|>')) > 1:
                            results_dict[PRIDE_ID]['protein_groups'][
                                'safe_psm'].add(prot)
                        else:
                            results_dict[PRIDE_ID]['proteins']['safe_psm'].add(
                                prot)
                if level == 'seq_level':
                    results_dict[PRIDE_ID]['protein_dict'][prot][
                        'prot_q_value_seq'] = prot_q_value
                else:
                    results_dict[PRIDE_ID]['protein_dict'][prot][
                        'prot_q_value_psm'] = prot_q_value
                results_dict[PRIDE_ID]['protein_dict'][prot][
                    'samples'] = samples
        print(
            'Number of confident protein identifications for {0}: {1}'.format(
                PRIDE_ID,
                len(results_dict[PRIDE_ID]['proteins']
                    ['safe_seq_num_spec_0005'])))

    #save results in a pkl
    pickle.dump(results_dict, open(pkl_name, 'wb'))
    print('pickled results: ', pkl_name)

    pickle.dump(fdr_dict, open(fdr_pkl_name, 'wb'))
    print('pickled fdr_dict: ', fdr_pkl_name)
Example #52
0
 def sample(self, n, d=None, rng=np.random):
     shape = self._sample_shape(n, d)
     x = rng.exponential(self.scale, shape) + self.shift
     high = np.nextafter(self.high, np.asarray(-np.inf, dtype=x.dtype))
     return npext.clip(x, self.shift, high)
Example #53
0
    def __init__(self,
                 dtype,
                 default_round,
                 warp="linear",
                 values=None,
                 range_=None):
        """Generic constructor of `Space` class.

        Not intended to be called directly but instead by child classes. However, `Space` is not an abstract class and
        will not give an error when instantiated.
        """
        self.dtype = dtype
        assert warp in WARP_DICT, "invalid space %s, allowed spaces are: %s" % (
            str(warp), str(WARP_DICT.keys()))
        self.warp_f = WARP_DICT[warp]
        self.unwarp_f = UNWARP_DICT[warp]

        # Setup range and rounding if values is suplied
        assert (values is None) != (range_ is None)
        round_to_values = default_round
        if range_ is None:  # => value is not None
            # Debatable if unique should be done before or after cast. But I
            # think after is better, esp. when changing precisions.
            values = np.asarray(values, dtype=dtype)
            values = np.unique(values)  # values now 1D ndarray no matter what
            check_array(
                values,
                "unique values",
                pre=True,
                ndim=1,
                dtype=dtype,
                min_size=2,
                allow_infinity=False,
                allow_nan=False,
            )

            # Extrapolation might happen due to numerics in type conversions.
            # Bounds checking is still done in validate routines.
            round_to_values = interp1d(values,
                                       values,
                                       kind="nearest",
                                       fill_value="extrapolate")
            range_ = (values[0], values[-1])
        # Save values and rounding
        # Values is either None or was validated inside if statement
        self.values = values
        self.round_to_values = round_to_values

        # Note that if dtype=None that is the default for asarray.
        range_ = np.asarray(range_, dtype=dtype)
        check_array(range_,
                    "range",
                    pre=True,
                    shape=(2, ),
                    dtype=dtype,
                    unsorted=False)
        # Save range info, with input validation and post validation
        self.lower, self.upper = range_

        # Convert to warped bounds too with lots of post validation
        self.lower_warped, self.upper_warped = self.warp_f(
            range_[..., None]).astype(WARPED_DTYPE, copy=False)
        check_array(
            self.lower_warped,
            "warped lower bound %s(%.1f)" % (warp, self.lower),
            ndim=1,
            pre=True,
            dtype=WARPED_DTYPE,
            allow_infinity=False,
            allow_nan=False,
        )
        # Should never happen if warpers are strictly monotonic:
        assert np.all(self.lower_warped <= self.upper_warped)

        # Make sure a bit bigger to keep away from lower due to numerics
        self.upper_warped = np.maximum(self.upper_warped,
                                       np.nextafter(self.lower_warped, np.inf))
        check_array(
            self.upper_warped,
            "warped upper bound %s(%.1f)" % (warp, self.upper),
            pre=True,
            shape=self.lower_warped.shape,
            dtype=WARPED_DTYPE,
            allow_infinity=False,
            allow_nan=False,
        )
        # Should never happen if warpers are strictly monotonic:
        assert np.all(self.lower_warped < self.upper_warped)
Example #54
0
    def _step_impl(self):
        t = self.t
        y = self.y

        max_step = self.max_step
        rtol = self.rtol
        atol = self.atol

        min_step = 10 * np.abs(np.nextafter(t, self.direction * np.inf) - t)

        if self.h_abs > max_step:
            h_abs = max_step
        elif self.h_abs < min_step:
            h_abs = min_step
        else:
            h_abs = self.h_abs

        step_accepted = False
        step_rejected = False

        while not step_accepted:
            if h_abs < min_step:
                return False, self.TOO_SMALL_STEP

            h = h_abs * self.direction
            t_new = t + h

            if self.direction * (t_new - self.t_bound) > 0:
                t_new = self.t_bound

            h = t_new - t
            h_abs = np.abs(h)

            y_new, f_new = rk_step(self.fun, t, y, self.f, h, self.A, self.B,
                                   self.C, self.K)
            scale = atol + np.maximum(np.abs(y), np.abs(y_new)) * rtol
            error_norm = self._estimate_error_norm(self.K, h, scale)

            if error_norm < 1:
                if error_norm == 0:
                    factor = MAX_FACTOR
                else:
                    factor = min(MAX_FACTOR,
                                 SAFETY * error_norm**self.error_exponent)

                if step_rejected:
                    factor = min(1, factor)

                h_abs *= factor

                step_accepted = True
            else:
                h_abs *= max(MIN_FACTOR,
                             SAFETY * error_norm**self.error_exponent)
                step_rejected = True

        self.h_previous = h
        self.y_old = y

        self.t = t_new
        self.y = y_new

        self.h_abs = h_abs
        self.f = f_new

        return True, None
Example #55
0
 def fit(self, X):
     self.mean = np.mean(X, axis=0)
     self.var = np.var(X, axis=0) + np.nextafter(0, 1)
Example #56
0
def fitgalaxy(img, psfs, sigmainverse, band, modelspecs, mask=None, modellib=None, modellibopts=None,
              plot=False, name=None, models=None, fitsbyengine=None, redoall=True,
              ):
    """

    :param img: ndarray; 2D Image
    :param psfs: Collection of proutil.PSF object
    :param sigmainverse: ndarray; 2D Inverse sigma image ndarr
    :param band: string; Filter/passband name
    :param mask: ndarray; 2D Inverse mask image (1=include, 0=omit)
    :param modelspecs: Model specifications as returned by getmodelspecs
    :param modellib: string; Model fitting library
    :param modellibopts: dict; Model fitting library options
    :param plot: bool; Make plots?
    :param name: string; Name of the model for plot labelling

    :return: fitsbyengine, models: tuple of complicated structures:
        modelinfos: dict; key=model name: value=dict; TBD
        models: dict; key=engine name: value=dict(key=model type: value=proobj.Model of that type)
        psfmodels: dict: TBD
    """
    initfrommoments = {name: value for name, value in zip(["axrat", "ang", "re"],
                                                          getellipseestimate(img.array))}
    engines = {
        "galsim": {"gsparams": gs.GSParams(kvalue_accuracy=1e-2, integration_relerr=1e-2,
                                           integration_abserr=1e-3, maximum_fft_size=16384)}
    }
    title = name if plot else None
    npiximg = np.flip(img.array.shape, axis=0)
    flux = np.sum(img.array[mask] if mask is not None else img.array)

    valuesmax = {
        "re": np.sqrt(np.sum((npiximg/2.)**2)),
        "flux": 10*np.sum(img.array),
    }
    # TODO: validate specs
    specs = {name: idx for idx, name in enumerate(modelspecs[1])}
    models = {} if (models is None) or redoall else models
    paramsfixeddefault = {}
    fitsbyengine = {} if ((models is None) or (fitsbyengine is None) or redoall) else fitsbyengine
    usemodellibdefault = modellibopts is None
    for engine, engineopts in engines.items():
        if (engine not in fitsbyengine) or redoall:
            fitsbyengine[engine] = {}
        fitsengine = fitsbyengine[engine]
        if plot:
            nrows = len(modelspecs[0])
            # Change to landscape
            figure, axes = plt.subplots(nrows=min([5, nrows]), ncols=max([5, nrows]))
            if nrows > 5:
                axes = np.transpose(axes)
            # This keeps things consistent with the nrows>1 case
            if nrows == 1:
                axes = np.array([axes])
            plt.suptitle(title + " {} model".format(engine))
            flipplot = nrows > 5
        else:
            figure = None
            axes = None
            flipplot = None
        for modelidx, modelinfo in enumerate(modelspecs[0]):
            modelname = modelinfo[specs["name"]]
            modeltype = modelinfo[specs["model"]]
            modeldefault = proutil.getmodel(
                {band: flux}, modeltype, npiximg, engine=engine, engineopts=engineopts
            )
            paramsfixeddefault[modeltype] = [param.fixed for param in
                                             modeldefault.getparameters(fixed=True)]
            model = modeldefault if (redoall or modeltype not in models) else models[modeltype]
            psfname = modelinfo[specs["psfmodel"]] + ("_pixelated" if proutil.str2bool(
                modelinfo[specs["psfpixel"]]) else "")
            proutil.setexposure(model, band, image=img.array, sigmainverse=sigmainverse,
                                psf=psfs[psfname]["object"], mask=mask)
            if not redoall and (modelname in fitsbyengine[engine]):
                if plot:
                    valuesbest = fitsengine[modelname]['fits'][-1]['paramsbestalltransformed']
                    # TODO: consider how to avoid code repetition here and below
                    modeldescs = {x: [] for x in ['f', 'n', 'r']}
                    formats = {x: '{:.1f}' if x == 'r' else '{:.2f}' for x in ['f', 'n', 'r']}
                    for param, value in zip(model.getparameters(fixed=True), valuesbest):
                        param.setvalue(value, transformed=True)
                        if param.name == "nser":
                            modeldescs['n'].append(param)
                        elif param.name == "re":
                            modeldescs['r'].append(param)
                        elif isfluxratio(param) and param.getvalue(transformed=False) < 1:
                            modeldescs['f'].append(param)
                    modeldescs = [paramname + '=' + ','.join(
                        [formats[paramname] .format(param.getvalue(transformed=False)) for param in params])
                        for paramname, params in modeldescs.items() if params]
                    modeldescs = ';'.join(modeldescs)
                    if title is not None:
                        plt.suptitle(title)
                    model.evaluate(plot=plot, modelname=modelname,
                                   modeldesc=modeldescs if modeldescs else None, figure=figure, axes=axes,
                                   figurerow=modelidx, flipplot=flipplot)
                    plt.show(block=False)
            else:
                inittype = modelinfo[specs["inittype"]]
                if inittype == "moments":
                    for param in model.getparameters(fixed=False):
                        if param.name in initfrommoments:
                            param.setvalue(initfrommoments[param.name], transformed=False)
                else:
                    # TODO: Refactor into function
                    if inittype.startswith("best"):
                        if inittype == "best":
                            modelnamecomps = []
                            for modelidxcomp in range(modelidx):
                                modelinfocomp = modelspecs[0][modelidxcomp]
                                if modelinfocomp[specs["model"]] == modeltype:
                                    modelnamecomps.append(modelinfocomp[specs['name']])
                        else:
                            # TODO: Check this more thoroughly
                            modelnamecomps = inittype.split(":")[1].split(";")
                            print(modelnamecomps)
                        chisqredbest = np.Inf
                        for modelnamecomp in modelnamecomps:
                            chisqred = fitsbyengine[engine][modelnamecomp]["fits"][-1]["chisqred"]
                            if chisqred < chisqredbest:
                                chisqredbest = chisqred
                                inittype = modelnamecomp
                    else:
                        inittype = inittype.split(';')
                        if len(inittype) > 1:
                            modelfits = [{
                                'paramvals': fitsengine[initname]['fits'][-1]['paramsbestall'],
                                'paramtree': models[fitsengine[initname]['modeltype']].getparameters(
                                    fixed=True, flatten=False),
                                'params': models[fitsengine[initname]['modeltype']].getparameters(fixed=True),
                                'chisqred': fitsengine[initname]['fits'][-1]['chisqred'],
                                'modeltype': fitsengine[initname]['modeltype']}
                                for initname in inittype
                            ]
                            initmodelfrommodelfits(model, modelfits)
                            inittype = None
                        else:
                            inittype = inittype[0]
                            if inittype not in fitsbyengine[engine]:
                                # TODO: Fail or fall back here?
                                raise RuntimeError("Model {} can't find reference {} "
                                    "to initialize from".format(modelname, inittype))
                    if inittype:
                        paramvalsinit = fitsbyengine[engine][inittype]["fits"][-1]["paramsbestall"]
                        for param, value in zip(model.getparameters(fixed=True), paramvalsinit):
                            param.setvalue(value, transformed=False)

                # Reset parameter fixed status
                for param, fixed in zip(model.getparameters(fixed=True), paramsfixeddefault[modeltype]):
                    param.fixed = fixed
                # Parse default overrides from model spec
                paramflags = {}
                for flag in ["fixedparams", "initparams"]:
                    paramflags[flag] = {}
                    values = modelinfo[specs[flag]]
                    if values:
                        for flagvalue in values.split(";"):
                            if flag == "fixedparams":
                                paramflags[flag][flagvalue] = None
                            elif flag == "initparams":
                                value = flagvalue.split("=")
                                # TODO: sort this out
                                valuesplit = [np.float(x) for x in value[1].split(',')]
                                paramflags[flag][value[0]] = valuesplit
                # For printing parameter values when plotting
                modelnameappendparams = []
                # Now actually apply the overrides and the hardcoded maxima
                timesmatched = {}
                for param in model.getparameters(fixed=True):
                    if param.name in paramflags["fixedparams"]:
                        param.fixed = True
                    if param.name in paramflags["initparams"]:
                        if param.name not in timesmatched:
                            timesmatched[param.name] = 0
                        param.setvalue(paramflags["initparams"][param.name][timesmatched[param.name]],
                                       transformed=False)
                        timesmatched[param.name] += 1
                    isfluxrat = isfluxratio(param)
                    if plot and not param.fixed:
                        if param.name == "nser":
                            modelnameappendparams += [("n={:.2f}", param)]
                        elif isfluxrat:
                            modelnameappendparams += [("f={:.2f}", param)]
                    if param.name in valuesmax and not isfluxrat:
                        transform = param.transform.transform
                        param.limits = proobj.Limits(lower=transform(0), upper=transform(valuesmax[param.name]),
                                                     transformed=True)
                    # Reset non-finite free param values
                    # This occurs e.g. at the limits of a logit transformed param
                    if not param.fixed:
                        paramval = param.getvalue(transformed=True)
                        if not np.isfinite(paramval):
                            param.setvalue(
                                np.nextafter(param.getvalue(transformed=False),(-1) ** (paramval < 0)),
                                transformed=False)

                print("Fitting model {:s} of type {:s} using engine {:s}".format(modelname, modeltype, engine))
                sys.stdout.flush()
                try:
                    fits = []
                    dosecond = (len(model.sources[0].modelphotometric.components) > 1) or not usemodellibdefault
                    if usemodellibdefault:
                        modellibopts = {
                            "algo": ("cobyla" if modellib == "pygmo" else "COBYLA") if dosecond else
                            ("neldermead" if modellib == "pygmo" else "Nelder-Mead")
                        }
                        if modellib == "scipy":
                            modellibopts['options'] = {'maxfun': 1e4}
                    fit1, modeller = proutil.fitmodel(model, modellib=modellib, modellibopts=modellibopts,
                                                      printfinal=True, printsteps=100,
                                                      plot=plot and not dosecond,
                                                      figure=figure, axes=axes, figurerow=modelidx,
                                                      flipplot=flipplot, modelname=modelname,
                                                      modelnameappendparams=modelnameappendparams
                                                      )
                    fits.append(fit1)
                    if dosecond:
                        if usemodellibdefault:
                            modeller.modellibopts["algo"] = "neldermead" if modellib == "pygmo" else \
                                "Nelder-Mead"
                        fit2, _ = proutil.fitmodel(model, modeller, printfinal=True, printsteps=100,
                                                   plot=plot, figure=figure, axes=axes, figurerow=modelidx,
                                                   flipplot=flipplot, modelname=modelname,
                                                   modelnameappendparams=modelnameappendparams)
                        fits.append(fit2)
                    fitsbyengine[engine][modelname] = {"fits": fits, "modeltype": modeltype}
                except Exception as e:
                    print("Error fitting id={}:".format(idnum))
                    print(e)
                    trace = traceback.format_exc()
                    print(trace)
                    fitsbyengine[engine][modelname] = e, trace
    if plot:
        plt.show(block=False)
        plt.tight_layout()
        plt.subplots_adjust(wspace=0.05, hspace=0.05)
        plt.show(block=False)

    return fitsbyengine, models
Example #57
0
def check_adv(args, device,
              gap, mat_model_path, model_class, inp, adv_inp, label, eps, *,
              allow_retry=2):
    err = np.abs(inp - adv_inp).max()
    assert 0 <= adv_inp.min() <= adv_inp.max() <= 1
    assert err <= eps, (err, eps, err - eps)

    with tempfile.NamedTemporaryFile() as modified_model:
        if args.no_gap:
            modified_model_name = mat_model_path
        else:
            modified_model_name = modified_model.name
            params = read_mat_file(mat_model_path)
            params['softmax/bias'][0, label] += gap
            sio.savemat(modified_model_name, params)

        mip_verify = MIPVerify(
            modified_model_name,
            (model_class.input_size, model_class.input_chl),
            args.time_limit,
        )
        try:
            model = (model_class.
                     from_mat(modified_model_name).
                     to(device).
                     use_unstable_conv(not args.stable))
            adv_inp_dev = torch.from_numpy(adv_inp).to(device)
            out0 = torch_as_npy(model(adv_inp_dev)).flatten()
            out1 = torch_as_npy(model.features_chk(adv_inp_dev)).flatten()
            test_acc = eval_acc(model, device)

            lprint(f'conv {out0} l={np.argmax(out0)} '
                  f'cw={cw_loss_vec(out0, label):.2e}')
            lprint(f'mm   {out1} l={np.argmax(out1)} '
                  f'cw={cw_loss_vec(out1, label):.2e}')
            lprint(f'test acc: {test_acc*100:.2f}%')

            if args.mm:
                out_adv = out1
            else:
                out_adv = out0

            assert np.argmax(out_adv) != label

            v = mip_verify(inp, label, eps)
            if not (v['status_known'] and v['robust']):
                v.pop('PerturbationValue', None)
                v.pop('PerturbedInputValue', None)
                lprint('verification of original model failed:',
                      pprint.pformat(v))
                if allow_retry > 0 and v['status_known']:
                    try:
                        # try to increase robustness a little bit
                        safe = -cw_loss_vec(out_adv, label)
                        gap += min(-v['ObjectiveValue'],
                                   max(safe - 1e-7, safe * 0.99))
                        gap = float(np.nextafter(gap, float('inf'),
                                                 dtype=np.float32))
                        lprint(f'retrying with new gap {gap} ...')
                        return check_adv(
                            args, device,
                            gap, mat_model_path, model_class, inp, adv_inp,
                            label, eps, allow_retry=allow_retry-1)
                    except:
                        traceback.print_exc()
                return
        finally:
            mip_verify.stop()

    inp_dev = torch.from_numpy(inp).to(device)
    save_state = {
        'inp': inp,
        'adv_inp': adv_inp,
        'label': label,
        'gap': gap,
        'eps': eps,
        'verify': v,
        'test_acc': test_acc,
        'adv_out_score': out0,
        'adv_out_score_mm': out1,
        'inp_out_score': torch_as_npy(model(inp_dev)).flatten(),
        'mat_model_path': mat_model_path,
        'device': device,
        'argv_options': [i for i in sys.argv if i.startswith('-')],
    }
    return save_state
Example #58
0
 def compute_scales_fun(variance, mean):
     denominator = np.fmax(variance - mean, np.sqrt(np.nextafter(0, 1, dtype=variance.dtype)))
     groupwise_scales = np.square(mean) / denominator
     return groupwise_scales
Example #59
0
def compute_mi_cd(c, d, n_neighbors=3):

    leaf_size = 30

    c = c.reshape((-1, 1))

    n_samples = c.shape[0]
    n_features = c.shape[1]

    radius = np.empty(n_samples)
    label_counts = np.empty(n_samples, dtype=np.int64)
    k_all = np.empty(n_samples, dtype=np.int8)

    labels = np_unique(d)
    n_labels = len(labels)

    for idx in range(n_labels):
        label = labels[idx]
        mask = np.where(d.ravel() == label)[0]
        count = mask.shape[0]
        if count > 1:
            # create the objects that are going to be needed for NN
            n_levels = 1 + np.log2(max(1, ((count - 1) // leaf_size)))
            n_nodes = int(2**n_levels) - 1
            # allocate arrays for storage
            idx_array = np.arange(count)
            node_radius = np.zeros(n_nodes, dtype=np.float64)
            node_idx_start = np.zeros(n_nodes, dtype=np.int64)
            node_idx_end = np.zeros(n_nodes, dtype=np.int64)
            node_is_leaf = np.zeros(n_nodes, dtype=np.int64)
            node_centroids = np.zeros((n_nodes, n_features), dtype=np.float64)
            ball_tree.recursive_build(0,
                                      0,
                                      count,
                                      c[mask],
                                      node_centroids,
                                      node_radius,
                                      idx_array,
                                      node_idx_start,
                                      node_idx_end,
                                      node_is_leaf,
                                      n_nodes,
                                      leaf_size,
                                      metric=0)
            # This algorithm returns the point itself as a neighbor, so
            # if n_neighbors need to be returned then '1' needs to be
            # added to 'k' in order to get the correct value from 'nth'
            # neighbor when the heap is created
            k = min(n_neighbors, count - 1)
            heap_distances, heap_indices = ball_tree.heap_create(count, k + 1)
            ball_tree.query(0,
                            c[mask],
                            heap_distances,
                            heap_indices,
                            c[mask],
                            idx_array,
                            node_centroids,
                            node_radius,
                            node_is_leaf,
                            node_idx_start,
                            node_idx_end,
                            metric=0)
            ball_tree.heap_sort(heap_distances, heap_indices)
            heap_distances = np.sqrt(heap_distances)
            radius[mask] = np.nextafter(heap_distances[:, -1], 0)
            k_all[mask] = k
        label_counts[mask] = count

    # Ignore points with unique labels
    mask_unique = np.array(
        [n if label_counts[n] > 1 else 0 for n in range(n_samples)])

    # A whole new set of Tree elements need to be created since the entire
    # data set is now going to be run throught the algorithm
    n_samples_kd = c[mask_unique].shape[0]

    # determine number of levels in the tree, and from this
    # the number of nodes in the tree.  This results in leaf nodes
    # with numbers of points betweeen leaf_size and 2 * leaf_size
    n_levels_kd = 1 + np.log2(max(1, ((n_samples_kd - 1) // leaf_size)))
    # having to round first and then apply int in order to calculate
    # correct number of nodes
    n_nodes_kd = int(round((2**n_levels_kd))) - 1

    # allocate arrays for storage
    idx_array_kd = np.arange(n_samples_kd)
    node_radius_kd = np.zeros(n_nodes_kd, dtype=np.float64)
    node_idx_start_kd = np.zeros(n_nodes_kd, dtype=np.int64)
    node_idx_end_kd = np.zeros(n_nodes_kd, dtype=np.int64)
    node_is_leaf_kd = np.zeros(n_nodes_kd, dtype=np.int64)
    node_lower_bounds_kd = np.zeros((n_nodes_kd, n_features), dtype=np.float64)
    node_upper_bounds_kd = np.zeros((n_nodes_kd, n_features), dtype=np.float64)

    kd_tree.recursive_build(0, 0, n_samples_kd, c[mask_unique],
                            node_lower_bounds_kd, node_upper_bounds_kd,
                            node_radius_kd, idx_array_kd, node_idx_start_kd,
                            node_idx_end_kd, node_is_leaf_kd, n_nodes_kd,
                            leaf_size)

    count_only = True
    return_distance = False
    counts = \
        kd_tree.radius_neighbors_count(c[mask_unique], radius[mask_unique],
                                       idx_array_kd, node_lower_bounds_kd,
                                       node_upper_bounds_kd, node_radius_kd,
                                       node_is_leaf_kd, node_idx_start_kd,
                                       node_idx_end_kd, count_only, return_distance)

    mi = (digamma_cpu(n_samples_kd) +
          np.mean(digamma_cpu(k_all[mask_unique])) -
          np.mean(digamma_cpu(label_counts[mask_unique])) -
          np.mean(digamma_cpu(counts)))

    mi = max(0, mi)

    return mi
Example #60
0
def test_ConsequenceFunction_sample_unit_DV():
    """
    Test if the function samples the DV distribution properly. Note that we
    have already tested the sampling algorithm in the uq module, so we will not
    do a thorough verification of the samples here, but rather check for errors
    in the inputs that would typically lead to significant mistakes in the
    results.
    """
    test_quants = [0.5, 1.0, 1.5, 2.0, 2.5]

    # create a Random Variable with 3 correlated decision variables
    dims = 3
    ref_mean = [1., 1., 0.]
    ref_std = [0.4, 0.3, 0.2]
    ref_rho = np.ones((dims, dims)) * 0.8
    np.fill_diagonal(ref_rho, 1.0)

    ref_mean[2] = np.exp(ref_mean[2])

    # prepare lower truncation limits at 0 for all...
    tr_lower = np.zeros(dims).tolist()
    # and an upper limit at 2 sigma for the second
    tr_upper = [np.inf, 1.6, np.inf]

    RV_reg = RandomVariableRegistry()

    for i, (name, dist, theta, beta) in enumerate(
            zip(['A', 'B', 'C'], ['normal', 'normal', 'lognormal'], ref_mean,
                ref_std)):
        RV_reg.add_RV(
            RandomVariable(name=name,
                           distribution=dist,
                           theta=[theta, beta],
                           truncation_limits=[tr_lower[i], tr_upper[i]]))

    RV_reg.add_RV_set(
        RandomVariableSet('set_A', [RV_reg.RV[rv] for rv in ['A', 'B', 'C']],
                          ref_rho))
    RV_reg.generate_samples(sample_size=1000)

    # first test sampling for each decision variable
    for r_i, tag in enumerate(['A', 'B', 'C']):

        # use fixed value for 'B' and bounded linear for the other two
        if tag == 'B':
            f_median = prep_constant_median_DV(10.)
        else:
            f_median = prep_bounded_linear_median_DV(median_max=20.0,
                                                     median_min=2.0,
                                                     quantity_lower=1.0,
                                                     quantity_upper=2.0)

        # create the consequence function
        conseq_function = ConsequenceFunction(DV_median=f_median,
                                              DV_distribution=RV_reg.RV[tag])

        for qnt in test_quants:
            samples = conseq_function.sample_unit_DV(quantity=qnt,
                                                     sample_size=1000)

            # transform the results to log space for 'C' to facilitate testing
            if tag == 'C':
                samples = np.log(samples)
                ref_mu = np.log(f_median(qnt))
                ref_min = np.log(max(np.nextafter(0, 1), tr_lower[r_i]))
                ref_max = np.log(max(np.nextafter(0, 1), tr_upper[r_i]))
                a = (ref_min - np.log(ref_mean[r_i])) / ref_std[r_i]
                b = (ref_max - np.log(ref_mean[r_i])) / ref_std[r_i]
                ref_max = ref_mu * b
            else:
                ref_mu = f_median(qnt)
                ref_min = tr_lower[r_i]
                a = (ref_min - ref_mean[r_i]) / ref_std[r_i]
                b = (tr_upper[r_i] - ref_mean[r_i]) / ref_std[r_i]
                ref_max = ref_mu * b

            trNorm = truncnorm(
                a=a,
                b=b,
                loc=ref_mu,
                scale=ref_std[r_i] if tag == 'C' else ref_std[r_i] * ref_mu)
            ref_samples = trNorm.rvs(size=1000)

            # test the means and coefficients of variation
            assert np.mean(samples) == pytest.approx(np.mean(ref_samples),
                                                     rel=0.1)
            assert np.std(samples) == pytest.approx(np.std(ref_samples),
                                                    rel=0.15)

            # test the limits
            assert np.min(samples) > ref_min
            assert np.max(samples) < ref_max