Beispiel #1
0
    def _set_spinbox_limits(self, bottom_val, top_val):
        # turn off signals on the spin boxes
        reset_state = [(sb, sb.blockSignals(True)) for sb in
                       (self._spin_max,
                        self._spin_min)]
        try:
            # set the top and bottom limits on the spinboxs to be in bounds
            self._spin_max.setMinimum(bottom_val)
            self._spin_min.setMinimum(bottom_val)

            self._spin_max.setMaximum(top_val)
            self._spin_min.setMaximum(top_val)
            # don't let the step be bigger than the total allowed range
            self._spin_step.setMaximum(top_val - bottom_val)

            if not np.isinf(bottom_val) or not np.isinf(top_val):
                # set the current values
                self._spin_min.setValue(bottom_val)
                self._spin_max.setValue(top_val)

                # this will trigger via the call-back updating everything else
                self._spin_step.setValue(
                    (top_val - bottom_val) / 100)
        finally:
            # un-wrap the signal blocking
            [sb.blockSignals(state) for sb, state in reset_state]
    def transform(self, data):

        assert np.isfinite(data).all()

        ntest = len(data)

        data = data.copy()

        data.shape = ntest, -1

        assert np.isfinite(data).all()

        print ">>> Computing traintest linear kernel"
        start = time.time()
        kernel_traintest = np.dot(data,
                                  self._train_data.T)

        assert not np.isnan(kernel_traintest).any()
        assert not np.isinf(kernel_traintest).any()

        kernel_traintest /= self._ktrace

        assert not np.isnan(kernel_traintest).any()
        assert not np.isinf(kernel_traintest).any()

        end = time.time()
        print "Time: %s" % (end-start)

        return self._clf.decision_function(kernel_traintest).ravel()
Beispiel #3
0
def calcForces_and_potentialE(F_x, F_y, old_or_new, x_positions, y_positions, V_atoms):
    """calculates x and y forces and potential energy per atom as summed over
    all contributions due to all neighbors, as functions of position and the
    parameters of the LJ potential"""

    for atom in xrange(Natoms):
        for i in xrange(Natoms):
            if i != atom:                    
                    delx = x_positions[atom,old_or_new]-x_positions[i,old_or_new]
                    dely = y_positions[atom,old_or_new]-y_positions[i,old_or_new]
                    r_ij = np.sqrt( (x_positions[atom,old_or_new]-x_positions[i,old_or_new])**2\
                                    + (y_positions[atom,old_or_new]-y_positions[i,old_or_new])**2 )
                    F_x[atom,old_or_new] =  F_x[atom,old_or_new] - 24.0 *epsilon * sigma**6 \
                                           * delx * ( 1 - 2.0*(sigma/r_ij)**6 ) / r_ij**8
                    F_y[atom,old_or_new] =  F_y[atom,old_or_new] - 24.0 *epsilon * sigma**6 * \
                                           dely * ( 1 - 2.0*(sigma/r_ij)**6 ) / r_ij**8    
                    V_atoms[atom] = V_atoms[atom] + 4.0 * epsilon \
                                    * ( (sigma/r_ij)**12-(sigma/r_ij)**6 )
                    if np.isnan(F_x[atom,old_or_new]) or np.isinf(F_x[atom,old_or_new]):
                        F_x[atom,old_or_new]=0
                    if np.isnan(F_y[atom,old_or_new]) or np.isinf(F_y[atom,old_or_new]):
                        F_y[atom,0]=0
                    if np.isnan(V_atoms[atom]) or np.isinf(V_atoms[atom]):
                        V_atoms[atom]=0                   
    return F_x, F_y, V_atoms
 def __init__(self, pt1, pt2, imageSize=None):
   if pt1[0] <= pt2[0]:  # ensure pt1 is to the left of pt2 for easier computations later on
     self.pt1 = pt1
     self.pt2 = pt2
   else:
     self.pt1 = pt2
     self.pt2 = pt1
   self.delta = np.subtract(self.pt2, self.pt1)
   self.length = sqrt(self.delta[0]**2 + self.delta[1]**2)
   self.m = float(self.delta[1]) / float(self.delta[0]) if self.delta[0] != 0.0 else (np.inf if self.delta[1] >=0 else -np.inf)
   self.c = self.pt1[1] - self.m * self.pt1[0]
   #print "delta = {0}, m = {1}, c = {2}".format(self.delta, self.m, self.c)
   
   # Check for validity/stability
   if np.isinf(self.m) or np.isinf(self.c):
     self.angle = 0.0
     self.valid = False
     return
   
   # Compute angle in degrees
   self.angle = degrees(atan2(self.delta[1], self.delta[0]))
   self.valid = True
   
   # Compute points on left and right edges, if an imageSize is given
   if imageSize is None:
     self.ptLeft = self.pt1
     self.ptRight = self.pt2
   else:
     self.ptLeft = (0, int(self.c))
     self.ptRight = (imageSize[0] - 1, int(self.m * (imageSize[0] - 1) + self.c))
Beispiel #5
0
 def _get_sum(self):
     """Compute sum of non NaN / Inf values in the array."""
     try:
         return self._sum
     except AttributeError:
         self._sum = self.no_nan.sum()
         # The following 2 lines are needede as in Python 3.3 with NumPy
         # 1.7.1, numpy.ndarray and numpy.memmap aren't hashable.
         if type(self._sum) is numpy.memmap:
             self._sum = numpy.asarray(self._sum).item()
         if self.has_nan and self.no_nan.mask.all():
             # In this case the sum is not properly computed by numpy.
             self._sum = 0
         if numpy.isinf(self._sum) or numpy.isnan(self._sum):
             # NaN may happen when there are both -inf and +inf values.
             if self.has_nan:
                 # Filter both NaN and Inf values.
                 mask = self.no_nan.mask + numpy.isinf(self[1])
             else:
                 # Filter only Inf values.
                 mask = numpy.isinf(self[1])
             if mask.all():
                 self._sum = 0
             else:
                 self._sum = numpy.ma.masked_array(self[1], mask).sum()
             # At this point there should be no more NaN.
             assert not numpy.isnan(self._sum)
     return self._sum
Beispiel #6
0
def init_bounds(v):
    """
    Returns a bounds object of the appropriate type given the arguments.

    This is a helper factory to simplify the user interface to parameter
    objects.
    """
    # if it is none, then it is unbounded
    if v == None:
        return Unbounded()

    # if it isn't a tuple, assume it is a bounds type.
    try:
        lo,hi = v
    except TypeError:
        return v

    # if it is a tuple, then determine what kind of bounds we have
    if lo == None: lo = -inf
    if hi == None: hi = inf
    # TODO: consider issuing a warning instead of correcting reversed bounds
    if lo >= hi: lo, hi = hi, lo
    if isinf(lo) and isinf(hi):
        return Unbounded()
    elif isinf(lo):
        return BoundedAbove(hi)
    elif isinf(hi):
        return BoundedBelow(lo)
    else:
        return Bounded(lo,hi)
    def test_nan_inf(self):
        # Not-a-number
        q = u.Quantity('nan', unit='cm')
        assert np.isnan(q.value)

        q = u.Quantity('NaN', unit='cm')
        assert np.isnan(q.value)

        q = u.Quantity('-nan', unit='cm')  # float() allows this
        assert np.isnan(q.value)

        q = u.Quantity('nan cm')
        assert np.isnan(q.value)
        assert q.unit == u.cm

        # Infinity
        q = u.Quantity('inf', unit='cm')
        assert np.isinf(q.value)

        q = u.Quantity('-inf', unit='cm')
        assert np.isinf(q.value)

        q = u.Quantity('inf cm')
        assert np.isinf(q.value)
        assert q.unit == u.cm

        q = u.Quantity('Infinity', unit='cm')  # float() allows this
        assert np.isinf(q.value)

        # make sure these strings don't parse...
        with pytest.raises(TypeError):
            q = u.Quantity('', unit='cm')

        with pytest.raises(TypeError):
            q = u.Quantity('spam', unit='cm')
Beispiel #8
0
    def _crop_out_special_values(self, ws):

        if ws.getNumberHistograms() != 1:
            # Strip zeros is only possible on 1D workspaces
            return

        y_vals = ws.readY(0)
        length = len(y_vals)
        # Find the first non-zero value
        start = 0
        for i in range(0, length):
            if not np.isnan(y_vals[i]) and not np.isinf(y_vals[i]):
                start = i
                break
        # Now find the last non-zero value
        stop = 0
        length -= 1
        for j in range(length, 0, -1):
            if not np.isnan(y_vals[j]) and not np.isinf(y_vals[j]):
                stop = j
                break
        # Find the appropriate X values and call CropWorkspace
        x_vals = ws.readX(0)
        start_x = x_vals[start]
        # Make sure we're inside the bin that we want to crop
        end_x = x_vals[stop + 1]
        return self._crop_to_x_range(ws=ws,x_min=start_x, x_max=end_x)
Beispiel #9
0
def get_region_boxes(sp, reg2sp):
  x = np.arange(0, sp.shape[1])
  y = np.arange(0, sp.shape[0])
  xv, yv = np.meshgrid(x, y)
  maxsp = np.max(sp)
  sp1=sp.reshape(-1)-1
  xv = xv.reshape(-1)
  yv = yv.reshape(-1)
  spxmin = accum.my_accumarray(sp1,xv, maxsp, 'min')
  spymin = accum.my_accumarray(sp1,yv, maxsp, 'min')
  spxmax = accum.my_accumarray(sp1,xv, maxsp, 'max')
  spymax = accum.my_accumarray(sp1,yv, maxsp, 'max')
  
  Z = reg2sp.astype(float, copy=True)
  Z[reg2sp==0] = np.inf
  xmin = np.nanmin(np.multiply(spxmin.reshape(-1,1), Z),0)
  ymin = np.nanmin(np.multiply(spymin.reshape(-1,1), Z),0)
  xmax = np.amax(np.multiply(spxmax.reshape(-1,1), reg2sp),0)
  ymax = np.amax(np.multiply(spymax.reshape(-1,1), reg2sp), 0)
  xmin[np.isinf(xmin)]=0
  ymin[np.isinf(ymin)]=0
  

  boxes = np.hstack((xmin.reshape(-1,1), ymin.reshape(-1,1), xmax.reshape(-1,1), ymax.reshape(-1,1)))
  return boxes 
def contains_inf(arr):
    """
    Test whether a numpy.ndarray contains any `np.inf` values.

    Parameters
    ----------
    arr : np.ndarray

    Returns
    -------
    contains_inf : bool
        `True` if the array contains any `np.inf` values, `False` otherwise.

    Notes
    -----
    Tests for the presence of `np.inf`'s by determining whether the
    values returned by `np.nanmin(arr)` and `np.nanmax(arr)` are finite.
    This approach is more memory efficient than the obvious alternative,
    calling `np.any(np.isinf(ndarray))`, which requires the construction of a
    boolean array with the same shape as the input array.
    """
    if isinstance(arr, theano.gof.type.CDataType._cdata_type):
        return False
    elif isinstance(arr, np.random.mtrand.RandomState):
        return False
    return np.isinf(np.nanmax(arr)) or np.isinf(np.nanmin(arr))
Beispiel #11
0
    def common_limits(datasets, default_min=0, default_max=0):
        """Find the global maxima and minima of a list of datasets.

        Parameters
        ----------
        datasets : `iterable`
            list (or any other iterable) of data arrays to analyse.

        default_min : `float`, optional
            fall-back minimum value if datasets are all empty.

        default_max : `float`, optional
            fall-back maximum value if datasets are all empty.

        Returns
        -------
        (min, max) : `float`
            2-tuple of common minimum and maximum over all datasets.
        """
        from glue import iterutils
        if isinstance(datasets, numpy.ndarray) or not iterable(datasets[0]):
            datasets = [datasets]
        max_stat = max(list(iterutils.flatten(datasets)) + [-numpy.inf])
        min_stat = min(list(iterutils.flatten(datasets)) + [numpy.inf])
        if numpy.isinf(-max_stat):
            max_stat = default_max
        if numpy.isinf(min_stat):
            min_stat = default_min
        return min_stat, max_stat
Beispiel #12
0
 def _check_for_infinities(self, tif):
     try:
         if np.any(np.isinf(tif)):
             tif[np.isinf(tif)] = 0
             g.alert('Some array values were inf. Setting those values to 0')
     except MemoryError:
         pass
Beispiel #13
0
def circumcircle(P1,P2,P3):
    ''' 
    Adapted from:
    http://local.wasp.uwa.edu.au/~pbourke/geometry/circlefrom3/Circle.cpp
    '''
    delta_a = P2 - P1
    delta_b = P3 - P2
    if np.abs(delta_a[0]) <= 0.000000001 and np.abs(delta_b[1]) <= 0.000000001:
        center_x = 0.5*(P2[0] + P3[0])
        center_y = 0.5*(P1[1] + P2[1])
    else:
        aSlope = delta_a[1]/delta_a[0]
        bSlope = delta_b[1]/delta_b[0]

        if aSlope == 0.0:
            aSlope = 1E-6

        if bSlope == 0.0:
            bSlope = 1E-6

        if np.isinf(aSlope):
            aSlope = 1E6

        if np.isinf(bSlope):
            bSlope = 1E6

        if np.abs(aSlope-bSlope) <= 0.000000001:
            return None
        center_x= (aSlope*bSlope*(P1[1] - P3[1]) + bSlope*(P1[0] + P2 [0]) \
                        - aSlope*(P2[0]+P3[0]) )/(2* (bSlope-aSlope) )
        center_y = -1*(center_x - (P1[0]+P2[0])/2)/aSlope +  (P1[1]+P2[1])/2;
    return center_x, center_y
Beispiel #14
0
def weighted_mean(_line):
    max_weight = 50
    
    # print _line.shape
    
    median_2d = bottleneck.nanmedian(_line, axis=1).reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1)
    std = bottleneck.nanstd(_line, axis=1)
    std_2d = std.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1)
    
    weight_2d = numpy.fabs(std_2d / (_line - median_2d))
#    weight_2d[weight_2d > max_weight] = max_weight
    weight_2d[numpy.isinf(weight_2d)] = max_weight
    
    for i in range(3):
        avg = bottleneck.nansum(_line*weight_2d, axis=1)/bottleneck.nansum(weight_2d, axis=1)
        avg_2d = avg.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1)
        
        std = numpy.sqrt(bottleneck.nansum(((_line - avg_2d)**2 * weight_2d), axis=1)/bottleneck.nansum(weight_2d, axis=1))
        std_2d = std.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1)
        
        weight_2d = numpy.fabs(std_2d / (_line - avg_2d))
        #weight_2d[weight_2d > max_weight] = max_weight
        weight_2d[numpy.isinf(weight_2d)] = max_weight
    
    return bottleneck.nansum(_line*weight_2d, axis=1)/bottleneck.nansum(weight_2d, axis=1)
Beispiel #15
0
def __compare( verify_obj, obj, nsig, ndec ):
  if isinstance(verify_obj,tuple):
    if len(verify_obj) == len(obj):
      return all([ __compare( vo, o, nsig, ndec, title='#%d' % i )
        for i, (vo,o) in enumerate( zip( verify_obj, obj ) ) ])
    log.error( 'non matching lenghts: %d != %d' % ( len(verify_obj), len(obj) ) )
  elif not isinstance(verify_obj,float):
    if verify_obj == obj:
      return True
    log.error( 'non equal: %s != %d' % ( obj, verify_obj ) )
  elif numpy.isnan(verify_obj):
    if numpy.isnan(obj):
      return True
    log.error( 'expected nan: %s' % obj )
  elif numpy.isinf(verify_obj):
    if numpy.isinf(obj):
      return True
    log.error( 'expected inf: %s' % obj )
  else:
    if verify_obj:
      n = numeric.floor( numpy.log10( abs(verify_obj) ) )
      N = max( n-(nsig-1), -ndec )
    else:
      N = -ndec
    maxerr = .5 * 10.**N
    if abs(verify_obj-obj) <= maxerr:
      return True
    log.error( 'non equal to %s digits: %e != %e' % ( nsig, obj, verify_obj ) )
  return False
Beispiel #16
0
    def test_nans_infs(self):
        oldsettings = np.seterr(all='ignore')
        try:
            # Check some of the ufuncs
            assert_equal(np.isnan(self.all_f16), np.isnan(self.all_f32))
            assert_equal(np.isinf(self.all_f16), np.isinf(self.all_f32))
            assert_equal(np.isfinite(self.all_f16), np.isfinite(self.all_f32))
            assert_equal(np.signbit(self.all_f16), np.signbit(self.all_f32))
            assert_equal(np.spacing(float16(65504)), np.inf)

            # Check comparisons of all values with NaN
            nan = float16(np.nan)

            assert_(not (self.all_f16 == nan).any())
            assert_(not (nan == self.all_f16).any())

            assert_((self.all_f16 != nan).all())
            assert_((nan != self.all_f16).all())

            assert_(not (self.all_f16 < nan).any())
            assert_(not (nan < self.all_f16).any())

            assert_(not (self.all_f16 <= nan).any())
            assert_(not (nan <= self.all_f16).any())

            assert_(not (self.all_f16 > nan).any())
            assert_(not (nan > self.all_f16).any())

            assert_(not (self.all_f16 >= nan).any())
            assert_(not (nan >= self.all_f16).any())
        finally:
            np.seterr(**oldsettings)
Beispiel #17
0
 def float_test(self, dtype, significant=None):
     colname = 'col_%s' % dtype.__name__
     self.writeread(dtype)
     before, after = self.table_orig.data[colname], self.table_new.data[colname]
     self.assertEqual(before.shape, after.shape)
     self.assertEqual(before.dtype.type, after.dtype.type)
     if before.ndim == 1:
         for i in range(before.shape[0]):
             if(np.isnan(before[i])):
                 self.failUnless(np.isnan(after[i]))
             elif(np.isinf(before[i])):
                 self.failUnless(np.isinf(after[i]))
             else:
                 if significant:
                     self.assertAlmostEqualSig(before[i], after[i], significant=significant)
                 else:
                     self.assertEqual(before[i], after[i])
     else:
         for i in range(before.shape[0]):
             for j in range(before.shape[1]):
                 if(np.isnan(before[i, j])):
                     self.failUnless(np.isnan(after[i, j]))
                 elif(np.isinf(before[i, j])):
                     self.failUnless(np.isinf(after[i, j]))
                 else:
                     if significant:
                         self.assertAlmostEqualSig(before[i, j], after[i, j], significant=significant)
                     else:
                         self.assertEqual(before[i, j], after[i, j])
def likelihood_check(obs_distns,trans_matrix,init_distn,data,target_val):
    for cls in [m.HMMPython, m.HMM]:
        hmm = cls(alpha=6.,init_state_concentration=1, # placeholders
                obs_distns=obs_distns)
        hmm.trans_distn.trans_matrix = trans_matrix
        hmm.init_state_distn.weights = init_distn
        hmm.add_data(data)

        # test default log_likelihood method

        assert np.isclose(target_val, hmm.log_likelihood())

        # manual tests of the several message passing methods

        states = hmm.states_list[-1]

        states.clear_caches()
        states.messages_forwards_normalized()
        assert np.isclose(target_val,states._normalizer)

        states.clear_caches()
        states.messages_forwards_log()
        assert np.isinf(target_val) or np.isclose(target_val,states._normalizer)

        states.clear_caches()
        states.messages_backwards_log()
        assert np.isinf(target_val) or np.isclose(target_val,states._normalizer)

        # test held-out vs in-model

        assert np.isclose(target_val, hmm.log_likelihood(data))
Beispiel #19
0
def clean_invalid(x,y,min_x=-numpy.inf,min_y=-numpy.inf,max_x=numpy.inf,max_y=numpy.inf):
    """Remove corresponding values from x and y when one or both of those is `nan` or `inf`,
    and optionally truncate values to minima and maxima

    Parameters
    ----------
    x, y : :class:`numpy.ndarray` or list
        Pair arrays or lists of corresponding numbers

    min_x, min_y, max_x, max_y : number, optional
        If supplied, set values below `min_x` to `min_x`, values larger
        than `max_x` to `max_x` and so for `min_y` and `max_y`

    Returns
    -------
    :class:`numpy.ndarray`
        A shortened version of `x`, excluding invalid values

    :class:`numpy.ndarray`
        A shortened version of `y`, excluding invalid values
    """
    x = numpy.array(x).astype(float)
    y = numpy.array(y).astype(float)

    x[x < min_x] = min_x
    x[x > max_x] = max_x
    y[y < min_y] = min_y
    y[y > max_y] = max_y
    
    newmask = numpy.isinf(x) | numpy.isnan(x) | numpy.isinf(y) | numpy.isnan(y) 
    x = x[~newmask]
    y = y[~newmask]


    return x,y 
Beispiel #20
0
    def __call__(self, value, clip=None):
        if clip is None:
            clip = self.clip

        if cbook.iterable(value):
            vtype = 'array'
            val = np.ma.asarray(value).astype(np.float)
        else:
            vtype = 'scalar'
            val = np.ma.array([value]).astype(np.float)

        val = np.ma.masked_where(np.isinf(val.data),val)

        self.autoscale_None(val)
        vmin, vmax = float(self.vmin), float(self.vmax)
        if vmin > vmax:
            raise ValueError("minvalue must be less than or equal to maxvalue")
        elif vmin<=0:
            raise ValueError("values must all be positive")
        elif vmin==vmax:
            return type(value)(0.0 * np.asarray(value))
        else:
            if clip:
                mask = np.ma.getmask(val)
                val = np.ma.array(np.clip(val.filled(vmax), vmin, vmax),
                                   mask=mask)
            result = (np.ma.log(val)-np.log(vmin))/(np.log(vmax)-np.log(vmin))
            result.data[result.data<0]=0.0
            result.data[result.data>1]=1.0
            result[np.isinf(val.data)] = -np.inf
            if result.mask is not np.ma.nomask:
                result.mask[np.isinf(val.data)] = False
        if vtype == 'scalar':
            result = result[0]
        return result
Beispiel #21
0
def equal(a, b, exact):
    if array_equal(a, b):
        return True

    if hasattr(a, 'dtype') and a.dtype in ['f4','f8']:
        nnans = isnan(a).sum()
        if nnans > 0:
            # For results containing NaNs, just check that the number
            # of NaNs is the same in both arrays.  This check could be
            # made more exhaustive, but checking element by element in
            # python space is very expensive in general.
            return nnans == isnan(b).sum()
        ninfs = isinf(a).sum()
        if ninfs > 0:
            # Ditto for Inf's
            return ninfs == isinf(b).sum()
    if exact:
        return (shape(a) == shape(b)) and alltrue(ravel(a) == ravel(b), axis=0)
    else:
        if hasattr(a, 'dtype') and a.dtype == 'f4':
            atol = 1e-5   # Relax precission for special opcodes, like fmod
        else:
            atol = 1e-8
        return (shape(a) == shape(b) and
                allclose(ravel(a), ravel(b), atol=atol))
    def sample_representer_points(self):
        # Sample representer points only in the
        # configuration space by setting all environmental
        # variables to 1
        D = np.where(self.is_env == 0)[0].shape[0]

        lower = self.lower[np.where(self.is_env == 0)]
        upper = self.upper[np.where(self.is_env == 0)]

        self.sampling_acquisition.update(self.model)

        for i in range(5):
            restarts = np.random.uniform(low=lower,
                                         high=upper,
                                         size=(self.Nb, D))
            sampler = emcee.EnsembleSampler(self.Nb, D,
                                        self.sampling_acquisition_wrapper)

            self.zb, self.lmb, _ = sampler.run_mcmc(restarts, 50)
            if not np.any(np.isinf(self.lmb)):
                break
            else:
                print("Infinity")
        if np.any(np.isinf(self.lmb)):
            raise ValueError("Could not sample valid representer points! LogEI is -infinity")
        if len(self.zb.shape) == 1:
            self.zb = self.zb[:, None]
        if len(self.lmb.shape) == 1:
            self.lmb = self.lmb[:, None]

        # Project representer points to subspace
        proj = np.ones([self.zb.shape[0],
                    self.upper[self.is_env == 1].shape[0]])
        proj *= self.upper[self.is_env == 1].shape[0]
        self.zb = np.concatenate((self.zb, proj), axis=1)
Beispiel #23
0
    def checkPattern( self, Ntries=100, debug=False ):
        """
        checks if sparse patterns cover all nonzero entries in user defined gradients by
        evaluating objg() and consg() at random points drawn from a Gaussian distribution.

        Arguments:
        Ntries: number of random tries. (default: 100).
        debug:  boolean to enable extra debug information. (default: False).

        Returns:
        isCorrect: boolean, True if pattern covers all nonzero entries in the gradients.

        """

        if( self.objg is None or self.objgpattern is None ):
            raise StandardError( "objective gradient and pattern must be set before check" )
        if( self.Ncons > 0 and
            ( self.consg is None or self.consgpattern is None ) ):
            raise StandardError( "constraint gradient and pattern must be set before check" )

        if( self.Ncons > 0 ):
            pattern = np.vstack( (self.objgpattern, self.consgpattern ) )
        else:
            pattern = self.objgpattern

        if( self.ub is not None ):
            ub = self.ub
            ub[ np.isinf( ub ) ] = 1
        else:
            ub = np.ones( (self.N,) )
        if( self.lb is not None ):
            lb = self.lb
            lb[ np.isinf( lb ) ] = -1
        else:
            lb = -np.ones( (self.N,) )

        for k in range( Ntries ):
            usrgrad = np.zeros( (self.Ncons + 1, self.N) )
            point = np.random.rand( self.N ) * ( ub - lb ) + lb

            self.objg( usrgrad[0,:], point )
            if( self.Ncons > 0 ):
                self.consg( usrgrad[1:,:], point )

            usrgrad[ np.nonzero( pattern ) ] = 0
            if( np.any( usrgrad ) ):
                if( debug ):
                    idx = np.unravel_index( np.argmax( np.abs(usrgrad) ), usrgrad.shape )
                    if( idx[0] == 0 ):
                        print( ">>> Pattern check failed. Found wrong nonzero value in " +
                               "objg() at element {0}".format( idx[1] ) )
                    else:
                        print( ">>> Pattern check failed. Found wrong nonzero value in " +
                               "consg() at element ({0},{1})".format( idx[0]-1, idx[1] ) )
                return False

        if( debug ):
            print( ">>> Pattern check passed" )

        return True
def knn(x_train, y_train, x_valid):
    x_train=np.log(x_train+1)
    x_valid=np.log(x_valid+1)

    where_are_nan = np.isnan(x_train)
    where_are_inf = np.isinf(x_train)
    x_train[where_are_nan] = 0
    x_train[where_are_inf] = 0
    where_are_nan = np.isnan(x_valid)
    where_are_inf = np.isinf(x_valid)
    x_valid[where_are_nan] = 0
    x_valid[where_are_inf] = 0

    scale=StandardScaler()
    scale.fit(x_train)
    x_train=scale.transform(x_train)
    x_valid=scale.transform(x_valid)

    #pca = PCA(n_components=10)
    #pca.fit(x_train)
    #x_train = pca.transform(x_train)
    #x_valid = pca.transform(x_valid)

    kneighbors=KNeighborsClassifier(n_neighbors=200,n_jobs=-1)
    knn_train, knn_test = stacking(kneighbors, x_train, y_train, x_valid, "knn")
    return knn_train, knn_test, "knn"
Beispiel #25
0
def lscsum0(lx):
  """
  Accepts log-values as input, exponentiates them, sums down the rows
  (first dimension), then converts the sum back to log-space and returns the result.
  Handles underflow by rescaling so that the largest values is exactly 1.0.
  """
  # rows = lx.shape[0]
  # columns = numpy.prod(lx.shape[1:])
  # lx = lx.reshape(rows, columns)
  # bases = lx.max(1).reshape(rows, 1)
  # bases = lx.max(0).reshape((1,) + lx.shape[1:])
  lx = numpy.asarray(lx)
  bases = lx.max(0) # Don't need to reshape in the case of 0.
  x = numpy.exp(lx - bases)
  ssum = x.sum(0)

  result = numpy.log(ssum) + bases
  try:
    conventional = numpy.log(numpy.exp(lx).sum(0))

    if not similar(result, conventional):
      if numpy.isinf(conventional).any() and not numpy.isinf(result).any():
        # print "Scaled log sum down axis 0 avoided underflow or overflow."
        pass
      else:
        import sys
        print >>sys.stderr, "Warning: scaled log sum down axis 0 did not match."
        print >>sys.stderr, "Scaled log result:"
        print >>sys.stderr, result
        print >>sys.stderr, "Conventional result:"
        print >>sys.stderr, conventional
  except FloatingPointError, e:
    # print "Scaled log sum down axis 0 avoided underflow or overflow."
    pass
Beispiel #26
0
def find_reasonable_epsilon(theta0, grad0, logp0, f):
    """ Heuristic for choosing an initial value of epsilon """
    epsilon = 1.
    r0 = np.random.normal(0., 1., len(theta0))

    # Figure out what direction we should be moving epsilon.
    _, rprime, gradprime, logpprime = leapfrog(theta0, r0, grad0, epsilon, f)
    # brutal! This trick make sure the step is not huge leading to infinite
    # values of the likelihood. This could also help to make sure theta stays
    # within the prior domain (if any)
    k = 1.
    while np.isinf(logpprime) or np.isinf(gradprime).any():
        k *= 0.5
        _, rprime, _, logpprime = leapfrog(theta0, r0, grad0, epsilon * k, f)

    epsilon = 0.5 * k * epsilon

    # acceptprob = np.exp(logpprime - logp0 - 0.5 * (np.dot(rprime, rprime.T) - np.dot(r0, r0.T)))
    # a = 2. * float((acceptprob > 0.5)) - 1.
    logacceptprob = logpprime-logp0-0.5*(np.dot(rprime, rprime)-np.dot(r0,r0))
    a = 1. if logacceptprob > np.log(0.5) else -1.
    # Keep moving epsilon in that direction until acceptprob crosses 0.5.
    # while ( (acceptprob ** a) > (2. ** (-a))):
    while a * logacceptprob > -a * np.log(2):
        epsilon = epsilon * (2. ** a)
        _, rprime, _, logpprime = leapfrog(theta0, r0, grad0, epsilon, f)
        # acceptprob = np.exp(logpprime - logp0 - 0.5 * ( np.dot(rprime, rprime.T) - np.dot(r0, r0.T)))
        logacceptprob = logpprime-logp0-0.5*(np.dot(rprime, rprime)-np.dot(r0,r0))

    print("find_reasonable_epsilon=", epsilon)

    return epsilon
Beispiel #27
0
def contains_inf(arr, node=None, var=None):
    """
    Test whether a numpy.ndarray contains any `np.inf` values.

    Parameters
    ----------
    arr : np.ndarray or output of any Theano op
    node : None or an Apply instance.
        If the output of a Theano op, the node associated to it.
    var : The Theano symbolic variable.

    Returns
    -------
    contains_inf : bool
        `True` if the array contains any `np.inf` values, `False` otherwise.

    Notes
    -----
    Tests for the presence of `np.inf`'s by determining whether the
    values returned by `np.nanmin(arr)` and `np.nanmax(arr)` are finite.
    This approach is more memory efficient than the obvious alternative,
    calling `np.any(np.isinf(ndarray))`, which requires the construction of a
    boolean array with the same shape as the input array.

    """
    if not _is_numeric_value(arr, var):
        return False
    elif getattr(arr, 'dtype', '') in T.discrete_dtypes:
        return False
    elif pygpu_available and isinstance(arr, GpuArray):
        return (np.isinf(f_gpua_min(arr.reshape(arr.size))) or
                np.isinf(f_gpua_max(arr.reshape(arr.size))))

    return np.isinf(np.nanmax(arr)) or np.isinf(np.nanmin(arr))
Beispiel #28
0
 def test_rescale_by_zero(self):
     # post.y should contain some nans and infs
     self.test_curve.rescale(factor=0)
     self.assertTrue(np.array_equal(self.compare_curve.x, self.test_curve.x))
     self.assertTrue(np.isnan(self.test_curve.y[0]))
     self.assertTrue(np.isinf(self.test_curve.y[1]))
     self.assertTrue(np.isinf(self.test_curve.y[2]))
Beispiel #29
0
def calculate_bounds_of_probability_distribution(
    probability_distribution, distribution_integral_limit=DISTRIBUTION_INTEGRAL_LIMIT
):
    a, b = probability_distribution.interval(1)
    if isinf(a) or isinf(b):
        a, b = probability_distribution.interval(distribution_integral_limit)
    return a, b
Beispiel #30
0
def initwb_lin(layer):
    """
    Initialize weights and bias
    linspace betweene active space,
    not random values

    This function need for tests

    :Parameters:
        layer: core.Layer object
            Initialization layer
    """
    active = layer.transf.inp_active[:]

    if np.isinf(active[0]):
        active[0] = -100.0

    if np.isinf(active[1]):
        active[1] = 100.0

    min = active[0] / (2 * layer.cn)
    max = active[1] / (2 * layer.cn)

    for k in layer.np:
        inits = np.linspace(min, max, layer.np[k].size)
        inits.shape = layer.np[k].shape
        layer.np[k] = inits
Beispiel #31
0
def train(args,
          model_args):

    #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_'

    model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 =  'logs/walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    print model_dir2 + '/' + 'log.jsonl.gz'
    logger = mimir.Logger(filename=model_dir2  + '/log.jsonl.gz', formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features',))
        dataset_test = MNIST(['test'], sources=('features',))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features',))
        dataset_test = CIFAR10(['test'], sources=('features',))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == "lsun" or args.dataset == "lsunsmall":

        print "loading lsun class!"

        from load_lsun import load_lsun

        print "loading lsun data!"

        if args.dataset == "lsunsmall":
            dataset_train, dataset_test = load_lsun(args.batch_size, downsample=True)
            spatial_width=32
        else:
            dataset_train, dataset_test = load_lsun(args.batch_size, downsample=False)
            spatial_width=64

        n_colors = 3


    elif args.dataset == "celeba":

        print "loading celeba data"

        from fuel.datasets.celeba import CelebA

        dataset_train = CelebA(which_sets = ['train'], which_format="64", sources=('features',), load_in_memory=False)
        dataset_test = CelebA(which_sets = ['test'], which_format="64", sources=('features',), load_in_memory=False)

        spatial_width = 64
        n_colors = 3

        tr_scheme = SequentialScheme(examples=dataset_train.num_examples, batch_size=args.batch_size)
        ts_scheme = SequentialScheme(examples=dataset_test.num_examples, batch_size=args.batch_size)

        train_stream = DataStream.default_stream(dataset_train, iteration_scheme = tr_scheme)
        test_stream = DataStream.default_stream(dataset_test, iteration_scheme = ts_scheme)

        dataset_train = train_stream
        dataset_test = test_stream

        #epoch_it = train_stream.get_epoch_iterator()

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=100000, classes=1, cycles=2., noise=0.01,
                           sources=('features',))
        dataset_train = DataStream.default_stream(train_set,
                            iteration_scheme=ShuffledScheme(
                            train_set.num_examples, args.batch_size))

    else:
        raise ValueError("Unknown dataset %s."%args.dataset)

    model_options = locals().copy()

    if args.dataset != 'lsun' and args.dataset != 'celeba':
        train_stream = Flatten(DataStream.default_stream(dataset_train,
                              iteration_scheme=ShuffledScheme(
                                  examples=dataset_train.num_examples - (dataset_train.num_examples%args.batch_size),
                                  batch_size=args.batch_size)))
    else:
        train_stream = dataset_train
        test_stream = dataset_test

    print "Width", WIDTH, spatial_width

    shp = next(train_stream.get_epoch_iterator())[0].shape

    print "got epoch iterator"

    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1./np.sqrt(np.mean((Xbatch-np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch*scl)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_:
        print "Trying to reload parameters"
        if os.path.exists(args.saveto_filename):
            print 'Reloading Parameters'
            print args.saveto_filename
            params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    print tparams
    x, cost, start_temperature, step_chain = build_model(tparams, model_options)
    inps = [x.astype('float32'), start_temperature, step_chain]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature  = T.scalar('temperature', dtype='float32')
    step_chain_part  = T.scalar('step_chain_part', dtype='int32')

    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature, step_chain_part)

    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    #get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j])


    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost)
    print 'Done'

    #for param in tparams:
    #    print param
    #    print tparams[param].get_value().shape

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'

    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 1
    print  'Number of steps....'
    print args.num_steps
    print "Number of metasteps...."
    print args.meta_steps
    print 'Done'
    count_sample = 1
    for eidx in xrange(max_epochs):
        n_samples = 0
        print 'Starting Next Epoch ', eidx
        for data in train_stream.get_epoch_iterator():
            if args.dataset == 'CIFAR10':
                if data[0].shape[0] == args.batch_size:
                    data_use = (data[0].reshape(args.batch_size,3*32*32),)
                else:
                    continue
            t0 = time.time()
            batch_index += 1
            n_samples += len(data_use[0])
            uidx += 1
            if data_use[0] is None:
                print 'No data '
                uidx -= 1
                continue
            ud_start = time.time()

            t1 = time.time()

            data_run = data_use[0]
            temperature_forward = args.temperature
            meta_cost = []
            for meta_step in range(0, args.meta_steps):
                data_run = data_run.astype('float32')
                meta_cost.append(f_grad_shared(data_run, temperature_forward, meta_step))
                f_update(lrate)
                if args.meta_steps > 1:
                    data_run, sigma, _, _ = forward_diffusion(data_run, temperature_forward, meta_step)
                    temperature_forward *= args.temperature_factor
            cost = sum(meta_cost) / len(meta_cost)
            ud = time.time() - ud_start

            #gradient_updates_ = get_grads(data_use[0],args.temperature)
            if np.isnan(cost) or np.isinf(cost):
                print 'NaN detected'
                return 1.
            logger.log({'epoch': eidx,
                        'batch_index': batch_index,
                        'uidx': uidx,
                        'training_error': cost})

            if batch_index%20==0:
                print batch_index, "cost", cost

            if batch_index%1000==0:
                print 'saving params'
                params = unzip(tparams)
                save_params(params, model_dir + '/' + 'params_' + str(batch_index) + '.npz')

            if batch_index%200==0:
                count_sample += 1
                '''
                temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps -1 ))
                temperature_forward = args.temperature
                for num_step in range(args.num_steps * args.meta_steps):
                    print "Forward temperature", temperature_forward
                    if num_step == 0:
                        x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(data[0].astype('float32'), temperature_forward, num_step)
                        x_data = np.asarray(x_data).astype('float32').reshape(args.batch_size, INPUT_SIZE)
                        x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH)
                        plot_images(x_temp, model_dir + '/' + "batch_" + str(batch_index) + '_corrupted' + 'epoch_' + str(count_sample) + '_time_step_' + str(num_step))
                    else:
                        x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(x_data.astype('float32'), temperature_forward, num_step)
                        x_data = np.asarray(x_data).astype('float32').reshape(args.batch_size, INPUT_SIZE)
                        x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH)
                        plot_images(x_temp, model_dir + '/batch_' + str(batch_index) + '_corrupted' + '_epoch_' + str(count_sample) + '_time_step_' + str(num_step))

                    temperature_forward = temperature_forward * args.temperature_factor;
                x_temp2 = data_use[0].reshape(args.batch_size, n_colors, WIDTH, WIDTH)
                plot_images(x_temp2, model_dir + '/' + 'orig_' + 'epoch_' + str(eidx) + '_batch_index_' +  str(batch_index))

                temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps - 1 ))
                for i in range(args.num_steps*args.meta_steps + args.extra_steps):
                    x_data, sampled, sampled_activation, sampled_preactivation  = f_sample(x_data.astype('float32'), temperature, args.num_steps*args.meta_steps -i - 1)
                    print 'On backward step number, using temperature', i, temperature
                    reverse_time(scl, shft, x_data, model_dir + '/'+ "batch_" + str(batch_index) + '_samples_backward_' + 'epoch_' + str(count_sample) + '_time_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor
                '''

                if args.noise == "gaussian":
                    x_sampled = np.random.normal(0.5, 2.0, size=(args.batch_size,INPUT_SIZE)).clip(0.0, 1.0)
                else:
                    s = np.random.binomial(1, 0.5, INPUT_SIZE)

                temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps - 1))
                x_data = np.asarray(x_sampled).astype('float32')
                for i in range(args.num_steps*args.meta_steps + args.extra_steps):
                    x_data,  sampled, sampled_activation, sampled_preactivation = f_sample(x_data.astype('float32'), temperature, args.num_steps*args.meta_steps -i - 1)
                    print 'On step number, using temperature', i, temperature
                    reverse_time(scl, shft, x_data, model_dir + '/batch_index_' + str(batch_index) + '_inference_' + 'epoch_' + str(count_sample) + '_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

    ipdb.set_trace()
def _compare_segregation(seg_class_1,
                         seg_class_2,
                         iterations_under_null=500,
                         null_approach="random_label",
                         **kwargs):
    '''
    Perform inference comparison for a two segregation measures

    Parameters
    ----------

    seg_class_1           : a PySAL segregation object to be compared to seg_class_2
    
    seg_class_2           : a PySAL segregation object to be compared to seg_class_1
    
    iterations_under_null : number of iterations under null hyphothesis
    
    null_approach: argument that specifies which type of null hypothesis the inference will iterate.
    
        "random_label"               : random label the data in each iteration
        
        "counterfactual_composition" : randomizes the number of minority population according to both cumulative distribution function of a variable that represents the composition of the minority group. The composition is the division of the minority population of unit i divided by total population of tract i.

        "counterfactual_share" : randomizes the number of minority population and total population according to both cumulative distribution function of a variable that represents the share of the minority group. The share is the division of the minority population of unit i divided by total population of minority population.

    **kwargs : customizable parameters to pass to the segregation measures. Usually they need to be the same as both seg_class_1 and seg_class_2  was built.
    
    Attributes
    ----------

    p_value        : float
                     Two-Tailed p-value
    
    est_sim        : numpy array
                     Estimates of the segregation measure differences under the null hypothesis
                  
    est_point_diff : float
                     Point estimation of the difference between the segregation measures
                
    Notes
    -----
    This function performs inference to compare two segregation measures. This can be either two measures of the same locations in two different points in time or it can be two different locations at the same point in time.
    
    The null hypothesis is H0: Segregation_1 is not different than Segregation_2.
    
    Based on Rey, Sergio J., and Myrna L. Sastré-Gutiérrez. "Interregional inequality dynamics in Mexico." Spatial Economic Analysis 5.3 (2010): 277-298.

    '''

    if not null_approach in [
            'random_label', 'counterfactual_composition',
            'counterfactual_share'
    ]:
        raise ValueError(
            'null_approach must one of \'random_label\', \'counterfactual_composition\', \'counterfactual_share\''
        )

    if (type(seg_class_1) != type(seg_class_2)):
        raise TypeError(
            'seg_class_1 and seg_class_2 must be the same type/class.')

    point_estimation = seg_class_1.statistic - seg_class_2.statistic

    aux = str(type(seg_class_1))
    _class_name = aux[1 + aux.rfind(
        '.'):-2]  # 'rfind' finds the last occurence of a pattern in a string

    data_1 = seg_class_1.core_data
    data_2 = seg_class_2.core_data

    # This step is just to make sure the each frequecy column is integer for the approaches and from the same type in order to stack them for the random data approach
    data_1['group_pop_var'] = round(data_1['group_pop_var']).astype(int)
    data_1['total_pop_var'] = round(data_1['total_pop_var']).astype(int)

    data_2['group_pop_var'] = round(data_2['group_pop_var']).astype(int)
    data_2['total_pop_var'] = round(data_2['total_pop_var']).astype(int)

    est_sim = np.empty(iterations_under_null)

    ################
    # RANDOM LABEL #
    ################
    if (null_approach == "random_label"):

        data_1['grouping_variable'] = 'Group_1'
        data_2['grouping_variable'] = 'Group_2'

        stacked_data = pd.concat([data_1, data_2], ignore_index=True)

        for i in np.array(range(iterations_under_null)):

            aux_rand = list(
                np.random.choice(stacked_data.shape[0],
                                 stacked_data.shape[0],
                                 replace=False))

            stacked_data['rand_group_pop'] = stacked_data.group_pop_var[
                aux_rand].reset_index()['group_pop_var']
            stacked_data['rand_total_pop'] = stacked_data.total_pop_var[
                aux_rand].reset_index()['total_pop_var']

            # Dropping variable to avoid confusion in the calculate_segregation function
            # Building auxiliar data to avoid affecting the next iteration
            stacked_data_aux = stacked_data.drop(
                ['group_pop_var', 'total_pop_var'], axis=1)

            stacked_data_1 = stacked_data_aux.loc[
                stacked_data_aux['grouping_variable'] == 'Group_1']
            stacked_data_2 = stacked_data_aux.loc[
                stacked_data_aux['grouping_variable'] == 'Group_2']

            simulations_1 = seg_class_1._function(stacked_data_1,
                                                  'rand_group_pop',
                                                  'rand_total_pop',
                                                  **kwargs)[0]
            simulations_2 = seg_class_2._function(stacked_data_2,
                                                  'rand_group_pop',
                                                  'rand_total_pop',
                                                  **kwargs)[0]

            est_sim[i] = simulations_1 - simulations_2

            print('Processed {} iterations out of {}.'.format(
                i + 1, iterations_under_null),
                  end="\r")

    ##############################
    # COUNTERFACTUAL COMPOSITION #
    ##############################
    if (null_approach == "counterfactual_composition"):

        data_1['rel'] = np.where(
            data_1['total_pop_var'] == 0, 0,
            data_1['group_pop_var'] / data_1['total_pop_var'])
        data_2['rel'] = np.where(
            data_2['total_pop_var'] == 0, 0,
            data_2['group_pop_var'] / data_2['total_pop_var'])

        # Both appends are to force both distribution to have values in all space between 0 and 1
        x_1_pre = np.sort(data_1['rel'])
        y_1_pre = np.arange(0, len(x_1_pre)) / (len(x_1_pre))

        x_2_pre = np.sort(data_2['rel'])
        y_2_pre = np.arange(0, len(x_2_pre)) / (len(x_2_pre))

        x_1 = np.append(np.append(0, x_1_pre), 1)
        y_1 = np.append(np.append(0, y_1_pre), 1)

        x_2 = np.append(np.append(0, x_2_pre), 1)
        y_2 = np.append(np.append(0, y_2_pre), 1)

        def inverse_cdf_1(pct):
            return x_1[np.where(y_1 > pct)[0][0] - 1]

        def inverse_cdf_2(pct):
            return x_2[np.where(y_2 > pct)[0][0] - 1]

        # Adding the pseudo columns for FIRST spatial context
        data_1['cumulative_percentage'] = (data_1['rel'].rank() - 1) / len(
            data_1
        )  # It has to be a minus 1 in the rank, in order to avoid 100% percentile in the max
        data_1['pseudo_rel'] = data_1['cumulative_percentage'].apply(
            inverse_cdf_2)
        data_1['pseudo_group_pop_var'] = round(
            data_1['pseudo_rel'] * data_1['total_pop_var']).astype(int)

        # Adding the pseudo columns for SECOND spatial context
        data_2['cumulative_percentage'] = (data_2['rel'].rank() - 1) / len(
            data_2
        )  # It has to be a minus 1 in the rank, in order to avoid 100% percentile in the max
        data_2['pseudo_rel'] = data_2['cumulative_percentage'].apply(
            inverse_cdf_1)
        data_2['pseudo_group_pop_var'] = round(
            data_2['pseudo_rel'] * data_2['total_pop_var']).astype(int)

        for i in np.array(range(iterations_under_null)):

            data_1['fair_coin'] = np.random.uniform(size=len(data_1))
            data_1['test_group_pop_var'] = np.where(
                data_1['fair_coin'] > 0.5, data_1['group_pop_var'],
                data_1['pseudo_group_pop_var'])

            # Dropping to avoid confusion in the internal function
            data_1_test = data_1.drop(['group_pop_var'], axis=1)

            simulations_1 = seg_class_1._function(data_1_test,
                                                  'test_group_pop_var',
                                                  'total_pop_var', **kwargs)[0]

            # Dropping to avoid confusion in the next iteration
            data_1 = data_1.drop(['fair_coin', 'test_group_pop_var'], axis=1)

            data_2['fair_coin'] = np.random.uniform(size=len(data_2))
            data_2['test_group_pop_var'] = np.where(
                data_2['fair_coin'] > 0.5, data_2['group_pop_var'],
                data_2['pseudo_group_pop_var'])

            # Dropping to avoid confusion in the internal function
            data_2_test = data_2.drop(['group_pop_var'], axis=1)

            simulations_2 = seg_class_2._function(data_2_test,
                                                  'test_group_pop_var',
                                                  'total_pop_var', **kwargs)[0]

            # Dropping to avoid confusion in the next iteration
            data_2 = data_2.drop(['fair_coin', 'test_group_pop_var'], axis=1)

            est_sim[i] = simulations_1 - simulations_2

            print('Processed {} iterations out of {}.'.format(
                i + 1, iterations_under_null),
                  end="\r")

    ########################
    # COUNTERFACTUAL SHARE #
    ########################
    if (null_approach == "counterfactual_share"):

        data_1['compl_pop_var'] = data_1['total_pop_var'] - data_1[
            'group_pop_var']
        data_2['compl_pop_var'] = data_2['total_pop_var'] - data_2[
            'group_pop_var']

        # Build the share for each group individually
        data_1['share'] = np.where(
            data_1['total_pop_var'] == 0, 0,
            data_1['group_pop_var'] / data_1['group_pop_var'].sum())
        data_2['share'] = np.where(
            data_2['total_pop_var'] == 0, 0,
            data_2['group_pop_var'] / data_2['group_pop_var'].sum())

        data_1['compl_share'] = np.where(
            data_1['compl_pop_var'] == 0, 0,
            data_1['compl_pop_var'] / data_1['compl_pop_var'].sum())
        data_2['compl_share'] = np.where(
            data_2['compl_pop_var'] == 0, 0,
            data_2['compl_pop_var'] / data_2['compl_pop_var'].sum())

        # Both appends are to force both distribution to have values in all space between 0 and 1
        x_1_pre = np.sort(data_1['share'])
        y_1_pre = np.arange(0, len(x_1_pre)) / (len(x_1_pre))

        x_2_pre = np.sort(data_2['share'])
        y_2_pre = np.arange(0, len(x_2_pre)) / (len(x_2_pre))

        x_1 = np.append(np.append(0, x_1_pre), 1)
        y_1 = np.append(np.append(0, y_1_pre), 1)

        x_2 = np.append(np.append(0, x_2_pre), 1)
        y_2 = np.append(np.append(0, y_2_pre), 1)

        def inverse_cdf_1(pct):
            return x_1[np.where(y_1 > pct)[0][0] - 1]

        def inverse_cdf_2(pct):
            return x_2[np.where(y_2 > pct)[0][0] - 1]

        # Both appends are to force both distribution to have values in all space between 0 and 1
        compl_x_1_pre = np.sort(data_1['compl_share'])
        compl_y_1_pre = np.arange(0, len(compl_x_1_pre)) / (len(compl_x_1_pre))

        compl_x_2_pre = np.sort(data_2['compl_share'])
        compl_y_2_pre = np.arange(0, len(compl_x_2_pre)) / (len(compl_x_2_pre))

        compl_x_1 = np.append(np.append(0, compl_x_1_pre), 1)
        compl_y_1 = np.append(np.append(0, compl_y_1_pre), 1)

        compl_x_2 = np.append(np.append(0, compl_x_2_pre), 1)
        compl_y_2 = np.append(np.append(0, compl_y_2_pre), 1)

        def compl_inverse_cdf_1(pct):
            return compl_x_1[np.where(compl_y_1 > pct)[0][0] - 1]

        def compl_inverse_cdf_2(pct):
            return compl_x_2[np.where(compl_y_2 > pct)[0][0] - 1]

        # Adding the pseudo columns for FIRST spatial context
        data_1['cumulative_percentage'] = (data_1['share'].rank() - 1) / len(
            data_1
        )  # It has to be a minus 1 in the rank, in order to avoid 100% percentile in the max
        data_1['pseudo_share_pre'] = data_1['cumulative_percentage'].apply(
            inverse_cdf_2)
        data_1['pseudo_share'] = data_1['pseudo_share_pre'] / data_1[
            'pseudo_share_pre'].sum(
            )  # Rescale due to possibility of the summation of the values being grater of lower than 1
        data_1['pseudo_group_pop_var'] = round(
            data_1['pseudo_share'] * data_1['group_pop_var'].sum()).astype(int)

        data_1['compl_cumulative_percentage'] = (data_1['compl_share'].rank(
        ) - 1) / len(
            data_1
        )  # It has to be a minus 1 in the rank, in order to avoid 100% percentile in the max
        data_1['compl_pseudo_share_pre'] = data_1[
            'compl_cumulative_percentage'].apply(compl_inverse_cdf_2)
        data_1['compl_pseudo_share'] = data_1[
            'compl_pseudo_share_pre'] / data_1['compl_pseudo_share_pre'].sum(
            )  # Rescale due to possibility of the summation of the values being grater of lower than 1
        data_1['pseudo_compl_pop_var'] = round(
            data_1['compl_pseudo_share'] *
            data_1['compl_pop_var'].sum()).astype(int)

        data_1['pseudo_total_pop'] = data_1['pseudo_group_pop_var'] + data_1[
            'pseudo_compl_pop_var']

        # Adding the pseudo columns for SECOND spatial context
        data_2['cumulative_percentage'] = (data_2['share'].rank() - 1) / len(
            data_2
        )  # It has to be a minus 1 in the rank, in order to avoid 100% percentile in the max
        data_2['pseudo_share_pre'] = data_2['cumulative_percentage'].apply(
            inverse_cdf_1)
        data_2['pseudo_share'] = data_2['pseudo_share_pre'] / data_2[
            'pseudo_share_pre'].sum(
            )  # Rescale due to possibility of the summation of the values being grater of lower than 1
        data_2['pseudo_group_pop_var'] = round(
            data_2['pseudo_share'] * data_2['group_pop_var'].sum()).astype(int)

        data_2['compl_cumulative_percentage'] = (data_2['compl_share'].rank(
        ) - 1) / len(
            data_2
        )  # It has to be a minus 1 in the rank, in order to avoid 100% percentile in the max
        data_2['compl_pseudo_share_pre'] = data_2[
            'compl_cumulative_percentage'].apply(compl_inverse_cdf_1)
        data_2['compl_pseudo_share'] = data_2[
            'compl_pseudo_share_pre'] / data_2['compl_pseudo_share_pre'].sum(
            )  # Rescale due to possibility of the summation of the values being grater of lower than 1
        data_2['pseudo_compl_pop_var'] = round(
            data_2['compl_pseudo_share'] *
            data_2['compl_pop_var'].sum()).astype(int)

        data_2['pseudo_total_pop'] = data_2['pseudo_group_pop_var'] + data_2[
            'pseudo_compl_pop_var']

        for i in np.array(range(iterations_under_null)):

            # For this 'counterfactual_share' approach, also the group and total population can be swapped during the iterations
            data_1['fair_coin'] = np.random.uniform(size=len(data_1))
            data_1['test_group_pop_var'] = np.where(
                data_1['fair_coin'] > 0.5, data_1['group_pop_var'],
                data_1['pseudo_group_pop_var'])
            data_1['test_total_pop_var'] = np.where(data_1['fair_coin'] > 0.5,
                                                    data_1['total_pop_var'],
                                                    data_1['pseudo_total_pop'])

            # Dropping to avoid confusion in the internal function
            data_1_test = data_1.drop(['group_pop_var', 'total_pop_var'],
                                      axis=1)

            simulations_1 = seg_class_1._function(data_1_test,
                                                  'test_group_pop_var',
                                                  'test_total_pop_var',
                                                  **kwargs)[0]

            # Dropping to avoid confusion in the next iteration
            data_1 = data_1.drop(
                ['fair_coin', 'test_group_pop_var', 'test_total_pop_var'],
                axis=1)

            # For this 'counterfactual_share' approach, also the group and total population can be swapped during the iterations
            data_2['fair_coin'] = np.random.uniform(size=len(data_2))
            data_2['test_group_pop_var'] = np.where(
                data_2['fair_coin'] > 0.5, data_2['group_pop_var'],
                data_2['pseudo_group_pop_var'])
            data_2['test_total_pop_var'] = np.where(data_2['fair_coin'] > 0.5,
                                                    data_2['total_pop_var'],
                                                    data_2['pseudo_total_pop'])

            # Dropping to avoid confusion in the internal function
            data_2_test = data_2.drop(['group_pop_var', 'total_pop_var'],
                                      axis=1)

            simulations_2 = seg_class_2._function(data_2_test,
                                                  'test_group_pop_var',
                                                  'test_total_pop_var',
                                                  **kwargs)[0]

            # Dropping to avoid confusion in the next iteration
            data_2 = data_2.drop(
                ['fair_coin', 'test_group_pop_var', 'test_total_pop_var'],
                axis=1)

            est_sim[i] = simulations_1 - simulations_2

            print('Processed {} iterations out of {}.'.format(
                i + 1, iterations_under_null),
                  end="\r")

    # Check and, if the case, remove iterations_under_null that resulted in nan or infinite values
    if any((np.isinf(est_sim) | np.isnan(est_sim))):
        warnings.warn(
            'Some estimates resulted in NaN or infinite values for estimations under null hypothesis. These values will be removed for the final results.'
        )
        est_sim = est_sim[~(np.isinf(est_sim) | np.isnan(est_sim))]

    # Two-Tailed p-value
    # Obs.: the null distribution can be located far from zero. Therefore, this is the the appropriate way to calculate the two tailed p-value.
    aux1 = (point_estimation < est_sim).sum()
    aux2 = (point_estimation > est_sim).sum()
    p_value = 2 * np.array([aux1, aux2]).min() / len(est_sim)

    return p_value, est_sim, point_estimation, _class_name
Beispiel #33
0
# 获得训练集的异常点
outliers = np.where(pTest < epsilon, True, False).ravel()
plt.plot(X[outliers, 0],
         X[outliers, 1],
         'ro',
         lw=2,
         markersize=10,
         fillstyle='none',
         markeredgewidth=1)
n = np.linspace(0, 35, 100)
X1 = np.meshgrid(n, n)
XFit = np.mat(np.column_stack((X1[0].T.flatten(), X1[1].T.flatten())))
pFit = np.mat([p(x.T) for x in XFit]).reshape(-1, 1)
pFit = pFit.reshape(X1[0].shape)
if not np.isinf(np.sum(pFit)):
    plt.contour(X1[0], X1[1], pFit, 10.0**np.arange(-20, 0, 3).T)
plt.show()

# 大维度测试......
data = loadmat('ex8data2.mat')
X = np.mat(data['X'])
XVal = np.mat(data['Xval'])
yVal = np.mat(data['yval'])

# p = anomaly.train(X)
p = train(X, model=multivariateGaussianModel)
pTest = np.mat([p(x.T) for x in X]).reshape(-1, 1)

epsilon, f1 = selectEpsilon(XVal, yVal, p)
Beispiel #34
0
                x: batchInputs,
                SeqLens: batchSeqLengths,
                indices: batchTargetIxs,
                values: batchTargetVals,
                shape: batchTargetShape
            }
            del batchInputs, batchTargetIxs, batchTargetVals, batchTargetShape, batchSeqLengths

            _, summary, Losses, Loss, Error = session.run(
                [train_step, LocalTrainSummary, losses, loss, error_rate],
                feed_dict=feed)
            del feed

            SummaryWriter.add_summary(summary, epoch * totalIter + batch)

            numberOfInfElements = np.count_nonzero(np.isinf(Losses))
            if numberOfInfElements > 0:
                LogFile.write("WARNING: INF VALUE(S) FOUND!\n")
                LogFile.write("%s\n" % (batchTargetList[np.where(
                    np.isinf(Losses) == True)[0][0]]))
                LogFile.write("Losses\n")
                Losses = filter(lambda v: ~np.isinf(v), Losses)
                Loss = np.mean(Losses)

            TrainingLoss.append(Loss)
            TrainingError.append(Error)

            LogFile.write("Epoch %d, Batch: %d, Loss: %.6f, Error: %.6f, " %
                          (epoch, batch, Loss, Error))

            if currTrainLoss < Loss: LogFile.write("Bad\n")
def safe_log(x, nan_substitute=-1e+4):
    l = np.log(x)
    l[np.logical_or(np.isnan(l), np.isinf(l))] = nan_substitute
    return l
    def plot(self,
             filename="triplot.png",
             doshow=True,
             figsize=(8, 6),
             save=True,
             minimum=None,
             points=None,
             colorbararrow=None):
        '''
        Create the triangle plots as in the optimal frequencies paper.
        '''
        fig = figure(figsize=figsize)
        ax = fig.add_subplot(111)
        if self.frac_bw == False:
            data = np.transpose(np.log10(self.sigmas))
            if self.log == False:
                im = uimshow(
                    data,
                    extent=[self.Cs[0], self.Cs[-1], self.Bs[0], self.Bs[-1]],
                    cmap=cm.inferno_r,
                    ax=ax)

                ax.set_xlabel(r"$\mathrm{Center~Frequency~\nu_0~(GHz)}$")
                ax.set_ylabel(r"$\mathrm{Bandwidth}~B~\mathrm{(GHz)}$")
            else:

                im = uimshow(data,
                             extent=np.log10(
                                 np.array([
                                     self.Cs[0], self.Cs[-1], self.Bs[0],
                                     self.Bs[-1]
                                 ])),
                             cmap=cm.inferno_r,
                             ax=ax)
                cax = ax.contour(data,
                                 extent=np.log10(
                                     np.array([
                                         self.Cs[0], self.Cs[-1], self.Bs[0],
                                         self.Bs[-1]
                                     ])),
                                 colors=self.colors,
                                 levels=self.levels,
                                 linewidths=self.lws,
                                 origin='lower')

                #https://stackoverflow.com/questions/18390068/hatch-a-nan-region-in-a-contourplot-in-matplotlib
                # get data you will need to create a "background patch" to your plot
                xmin, xmax = ax.get_xlim()
                ymin, ymax = ax.get_ylim()
                xy = (xmin, ymin)
                width = xmax - xmin
                height = ymax - ymin
                # create the patch and place it in the back of countourf (zorder!)
                p = patches.Rectangle(xy,
                                      width,
                                      height,
                                      hatch='X',
                                      color='0.5',
                                      fill=None,
                                      zorder=-10)
                ax.add_patch(p)

                ax.set_xlabel(r"$\mathrm{Center~Frequency~\nu_0~(GHz)}$")
                ax.set_ylabel(r"$\mathrm{Bandwidth}~B~\mathrm{(GHz)}$")
                ax.xaxis.set_major_locator(MultipleLocator(0.5))
                ax.yaxis.set_major_locator(MultipleLocator(0.5))
                ax.xaxis.set_major_formatter(noformatter)
                ax.yaxis.set_major_formatter(noformatter)

                ax.text(0.05,
                        0.9,
                        "PSR~%s" % self.psrnoise.name.replace("-", "$-$"),
                        fontsize=18,
                        transform=ax.transAxes,
                        bbox=dict(boxstyle="square", fc="white"))

            if minimum is not None:
                checkdata = np.log10(self.sigmas)
                flatdata = checkdata.flatten()
                #inds = np.where(np.logical_not(np.isnan(flatdata)))[0]
                inds = np.where((~np.isnan(flatdata))
                                & ~(np.isinf(flatdata)))[0]
                MIN = np.min(flatdata[inds])
                INDC, INDB = np.where(checkdata == MIN)
                INDC, INDB = INDC[0], INDB[0]
                MINB = self.Bs[INDB]
                MINC = self.Cs[INDC]
                cax = ax.contour(data,
                                 extent=np.log10(
                                     np.array([
                                         self.Cs[0], self.Cs[-1], self.Bs[0],
                                         self.Bs[-1]
                                     ])),
                                 colors=['b', 'b'],
                                 levels=[
                                     np.log10(1.1 * (10**MIN)),
                                     np.log10(1.5 * (10**MIN))
                                 ],
                                 linewidths=[1, 1],
                                 linestyles=['--', '--'],
                                 origin='lower')
                print("Minimum", MINC, MINB, MIN)
                with open("minima.txt", 'a') as FILE:
                    FILE.write("%s minima %f %f %f\n" %
                               (self.psrnoise.name, MINC, MINB, MIN))
                if self.log:
                    ax.plot(np.log10(MINC),
                            np.log10(MINB),
                            minimum,
                            zorder=50,
                            ms=10)
                else:
                    ax.plot(MINC, MINB, minimum, zorder=50, ms=10)

            if points is not None:
                if type(points) == tuple:
                    points = [points]
                for point in points:
                    x, y, fmt = point
                    nulow = x - y / 2.0
                    nuhigh = x + y / 2.0

                    if self.log:
                        ax.plot(np.log10(x), np.log10(y), fmt, zorder=50, ms=8)
                        nus = np.logspace(np.log10(nulow), np.log10(nuhigh),
                                          self.nchan + 1)[:-1]
                        sigma = np.log10(self.calc_single(nus))
                    else:
                        ax.plot(x, y, fmt, zorder=50, ms=8)
                        nus = np.linspace(nulow, nuhigh, self.nchan +
                                          1)[:-1]  #more uniform sampling?
                        sigma = np.log10(self.calc_single(nus))
                    with open("minima.txt", 'a') as FILE:
                        FILE.write("%s point %f %f %f\n" %
                                   (self.psrnoise.name, x, y, sigma))

            if colorbararrow is not None:
                data = np.log10(self.sigmas)
                flatdata = data.flatten()
                #inds = np.where(np.logical_not(np.isnan(flatdata)))[0]
                inds = np.where((~np.isnan(flatdata))
                                & ~(np.isinf(flatdata)))[0]
                MIN = np.min(flatdata[inds])
                MAX = np.max(flatdata[inds])
                if self.log == True:
                    x = np.log10(self.Cs[-1] * 1.05)  #self.Bs[-1])
                    dx = np.log10(1.2)  #np.log10(self.Cs[-1])#self.Bs[-1]*2)
                    frac = (np.log10(colorbararrow) - MIN) / (MAX - MIN)
                    y = frac * (np.log10(self.Bs[-1]) -
                                np.log10(self.Bs[0])) + np.log10(self.Bs[0])
                    arrow(x,
                          y,
                          dx,
                          0.0,
                          fc='k',
                          ec='k',
                          zorder=50,
                          clip_on=False)

        else:
            if self.log == False:
                pass
            else:
                goodinds = []
                for indf, F in enumerate(self.Fs):
                    if np.any(np.isnan(self.sigmas[:, indf])):
                        continue
                    goodinds.append(indf)
                goodinds = np.array(goodinds)
                data = np.transpose(np.log10(self.sigmas[:, goodinds]))

                im = uimshow(data,
                             extent=np.log10(
                                 np.array([
                                     self.Cs[0], self.Cs[-1],
                                     self.Fs[goodinds][0],
                                     self.Fs[goodinds][-1]
                                 ])),
                             cmap=cm.inferno_r,
                             ax=ax)
                cax = ax.contour(data,
                                 extent=np.log10(
                                     np.array([
                                         self.Cs[0], self.Cs[-1],
                                         self.Fs[goodinds][0],
                                         self.Fs[goodinds][-1]
                                     ])),
                                 colors=COLORS,
                                 levels=LEVELS,
                                 linewidths=LWS,
                                 origin='lower')

                #im = uimshow(data,extent=np.array([np.log10(self.Cs[0]),np.log10(self.Cs[-1]),self.Fs[goodinds][0],self.Fs[goodinds][-1]]),cmap=cm.inferno_r,ax=ax)
                #cax = ax.contour(data,extent=np.array([np.log10(self.Cs[0]),np.log10(self.Cs[-1]),self.Fs[goodinds][0],self.Fs[goodinds][-1]]),colors=COLORS,levels=LEVELS,linewidths=LWS,origin='lower')

                print(self.Fs)
                ax.set_xlabel(r"$\mathrm{Center~Frequency~\nu_0~(GHz)}$")
                #ax.set_ylabel(r"$r~\mathrm{(\nu_{max}/\nu_{min})}$")
                ax.set_ylabel(r"$\mathrm{Fractional~Bandwidth~(B/\nu_0)}$")
                # no log
                #ax.yaxis.set_major_locator(FixedLocator(np.log10(np.arange(0.25,1.75,0.25))))

                ax.xaxis.set_major_formatter(noformatter)
                #ax.yaxis.set_major_formatter(noformatter)

        cbar = fig.colorbar(im)  #,format=formatter)
        cbar.set_label("$\mathrm{TOA~Uncertainty~\sigma_{TOA}~(\mu s)}$")

        # https://stackoverflow.com/questions/6485000/python-matplotlib-colorbar-setting-tick-formator-locator-changes-tick-labels
        cbar.locator = MultipleLocator(1)
        cbar.formatter = formatter
        '''
        MAX = np.max(data[np.where(np.logical_not(np.isnan(data)))])
        if MAX <= np.log10(700):
            cbar.formatter = formatter100
        else:
            cbar.formatter = formatter
        '''
        cbar.update_ticks()
        #if self.log:
        #    cb = colorbar(cax)

        if save:
            savefig(filename)
        if doshow:
            show()
        else:
            close()
def getinf(x):
    return num.nonzero(num.isinf(num.atleast_1d(x)))
def run_crossmatch_lc(field,
                      CCD,
                      FILTER,
                      kind='final',
                      startTime=datetime.now()):

    warnings.filterwarnings("ignore")

    ##########################################################################

    if not os.path.exists("%s/lightcurves/" % (jorgepath)):
        print "Creating lightcurve folder"
        os.makedirs("%s/lightcurves/" % (jorgepath))
    if not os.path.exists("%s/lightcurves/%s" % (jorgepath, field)):
        print "Creating field folder"
        os.makedirs("%s/lightcurves/%s" % (jorgepath, field))
    if not os.path.exists("%s/lightcurves/%s/%s" % (jorgepath, field, CCD)):
        print "Creating CCD folder"
        os.makedirs("%s/lightcurves/%s/%s" % (jorgepath, field, CCD))

    ##########################################################################

    epochs_file = '%s/info/%s/%s_epochs_%s.txt' % (jorgepath, field, field,
                                                   FILTER)
    if not os.path.exists(epochs_file):
        print 'No epochs file: %s' % (epochs_file)
        sys.exit()
    epochs = np.loadtxt(epochs_file, comments='#', dtype=str)

    if epochs.shape == (2, ):
        epochs = epochs.reshape(1, 2)

    INFO = []
    epoch_c = []
    tree = []
    X_Y = []

    print 'Loading catalogues (%s) files, creating tree structure' % (kind)
    no_epoch = 0
    for epoch in epochs:
        print 'Epoch %s' % epoch[0]

        # catalogues

        cata_file = "%s/catalogues/%s/%s/%s_%s_%s_image_crblaster_thresh%s_minarea%s_backsize64_final-scamp.dat" % \
            (jorgepath, field, CCD, field, CCD,
             epoch[0], str(thresh), str(minarea))
        if not os.path.exists(cata_file):
            print 'No catalog file: %s' % (cata_file)
            no_epoch += 1
            continue
        # cata = np.loadtxt(cata_file, comments='#')
        cata = Table.read(cata_file, format='ascii')

        # epoch_c has all the catalogues, each element of epoch_c contain the
        # catalogue of a given epoch
        epoch_c.append(cata)
        cata_XY = np.transpose(
            np.array((cata['X_IMAGE_REF'], cata['Y_IMAGE_REF'])))
        # X_Y has the pix coordinates of each catalogue
        X_Y.append(cata_XY)
        # X_Y has the pix coordinates of each catalogue in tree structure
        tree.append(cKDTree(cata_XY))

        # INFO of epochs

        INFO.append(epoch)

    if len(epoch_c) == 0:
        print 'No catalogues for this CCD'
        sys.exit()

    INFO = np.asarray(INFO)
    print '____________________________________________________________________'

    # compare each all catalogues to find same

    # master has the final index matrix, rows are objects and columns epochs
    # if master_cat[i][j] = -1 then no match for this object i in epoch j
    master_cat = np.ones((1, len(epoch_c)), dtype=np.int) * (-1)

    # comparar todas las epocas entre ellas buscando matching
    for TIME in range(len(epoch_c)):

        print 'Length of catalog %s = %i' % (INFO[TIME, 0], len(X_Y[TIME]))

        aux_cat = np.ones((len(X_Y[TIME]), len(epoch_c)), dtype=np.int) * (-1)
        aux_cat[:, TIME] = np.arange(len(X_Y[TIME]))

        if TIME < len(epoch_c):
            for time in range(TIME + 1, len(epoch_c)):

                print 'comparing epoch %s with epoch %s' % (INFO[TIME, 0],
                                                            INFO[time, 0])
                # find for nn
                aux_dist = tree[time].query(X_Y[TIME],
                                            k=1,
                                            distance_upper_bound=5)
                aux_cat[:, time] = aux_dist[1]
                # busca y reemplaza por -1 los que no encontro
                mask_no = np.where(aux_cat[:, time] == len(X_Y[time]))
                aux_cat[mask_no, time] = -1
                print 'max: ', np.max(aux_dist[0][~np.isinf(aux_dist[0])])
                # mask_yes es un arreglo con los indices de los encontrados
                # mask_yes tiene los indices de los objetos encontrados en
                # epoch[time] y tiene largo de len(epoch[TIME])
                mask_yes = aux_cat[np.where(aux_cat[:, time] > 0), time]

                print 'objects with match = %i' % len(mask_yes[0])

        # quitar de aux_cat los ya encontrados en las iteraciones anteriores.
        if TIME > 0:
            to_remove = []
            for q in range(len(aux_cat[:, TIME])):

                repited = np.where(aux_cat[q, TIME] == master_cat[:, TIME])[0]
                if len(repited) > 0:
                    to_remove.append(q)

            aux_cat = np.delete(aux_cat, to_remove, 0)

        # concatenate the aux_catalog to the master catalog
        master_cat = np.vstack((master_cat, aux_cat))
        print 'objects added = %i' % len(aux_cat)
        aux_cat = 0
        print '_______________________________________________________________'

    master_cat = np.delete(master_cat, 0, 0)
    print 'Total of objects = %i' % len(master_cat)

    ##########################################################################

    if kind == 'final':
        np.savetxt("%s/lightcurves/%s/%s/%s_%s_%s_master_index.txt" %
                   (jorgepath, field, CCD, field, CCD, FILTER),
                   master_cat,
                   fmt='%04i',
                   delimiter='\t')
    elif kind == 'temp':
        np.savetxt("%s/catalogues/%s/%s/temp_%s_%s_%s_master_index.txt" %
                   (jorgepath, field, CCD, field, CCD, FILTER),
                   master_cat,
                   fmt='%04i',
                   delimiter='\t')

    # Create lig

    print 'Total of epochs %i' % len(epochs)
    print 'Effective epochs %i' % (len(epochs) - no_epoch)

    print 'It took', (datetime.now() - startTime), 'seconds'
    print '___________________________________________________________________'
Beispiel #39
0
def _parse_yahoo_historical(fh, adjusted=True, asobject=False, ochl=True):
    """Parse the historical data in file handle fh from yahoo finance.


    Parameters
    ----------

    adjusted : bool
      If True (default) replace open, high, low, close prices with
      their adjusted values. The adjustment is by a scale factor, S =
      adjusted_close/close. Adjusted prices are actual prices
      multiplied by S.

      Volume is not adjusted as it is already backward split adjusted
      by Yahoo. If you want to compute dollars traded, multiply volume
      by the adjusted close, regardless of whether you choose adjusted
      = True|False.


    asobject : bool or None
      If False (default for compatibility with earlier versions)
      return a list of tuples containing

        d, open, high, low, close, volume

       or

        d, open, close, high, low, volume

      depending on `ochl`

      If None (preferred alternative to False), return
      a 2-D ndarray corresponding to the list of tuples.

      Otherwise return a numpy recarray with

        date, year, month, day, d, open, high, low, close,
        volume, adjusted_close

      where d is a floating poing representation of date,
      as returned by date2num, and date is a python standard
      library datetime.date instance.

      The name of this kwarg is a historical artifact.  Formerly,
      True returned a cbook Bunch
      holding 1-D ndarrays.  The behavior of a numpy recarray is
      very similar to the Bunch.

    ochl : bool
        Selects between ochl and ohlc ordering.
        Defaults to True to preserve original functionality.

    """
    if ochl:
        stock_dt = stock_dt_ochl
    else:
        stock_dt = stock_dt_ohlc

    results = []

    #    datefmt = '%Y-%m-%d'
    fh.readline()  # discard heading
    for line in fh:

        vals = line.split(',')
        if len(vals) != 7:
            continue  # add warning?
        datestr = vals[0]
        #dt = datetime.date(*time.strptime(datestr, datefmt)[:3])
        # Using strptime doubles the runtime. With the present
        # format, we don't need it.
        dt = datetime.date(*[int(val) for val in datestr.split('-')])
        dnum = date2num(dt)
        open, high, low, close = [float(val) for val in vals[1:5]]
        volume = float(vals[5])
        aclose = float(vals[6])
        if ochl:
            results.append((dt, dt.year, dt.month, dt.day, dnum, open, close,
                            high, low, volume, aclose))

        else:
            results.append((dt, dt.year, dt.month, dt.day, dnum, open, high,
                            low, close, volume, aclose))
    results.reverse()
    d = np.array(results, dtype=stock_dt)
    if adjusted:
        scale = d['aclose'] / d['close']
        scale[np.isinf(scale)] = np.nan
        d['open'] *= scale
        d['high'] *= scale
        d['low'] *= scale
        d['close'] *= scale

    if not asobject:
        # 2-D sequence; formerly list of tuples, now ndarray
        ret = np.zeros((len(d), 6), dtype=np.float)
        ret[:, 0] = d['d']
        if ochl:
            ret[:, 1] = d['open']
            ret[:, 2] = d['close']
            ret[:, 3] = d['high']
            ret[:, 4] = d['low']
        else:
            ret[:, 1] = d['open']
            ret[:, 2] = d['high']
            ret[:, 3] = d['low']
            ret[:, 4] = d['close']
        ret[:, 5] = d['volume']
        if asobject is None:
            return ret
        return [tuple(row) for row in ret]

    return d.view(np.recarray)  # Close enough to former Bunch return
Beispiel #40
0
def polyinterp(points, x_min_bound=None, x_max_bound=None, plot=False):
    """
    Gives the minimizer and minimum of the interpolating polynomial over given points
    based on function and derivative information. Defaults to bisection if no critical
    points are valid.

    Based on polyinterp.m Matlab function in minFunc by Mark Schmidt with some slight
    modifications.

    Implemented by: Hao-Jun Michael Shi and Dheevatsa Mudigere
    Last edited 12/6/18.

    Inputs:
        points (nparray): two-dimensional array with each point of form [x f g]
        x_min_bound (float): minimum value that brackets minimum (default: minimum of points)
        x_max_bound (float): maximum value that brackets minimum (default: maximum of points)
        plot (bool): plot interpolating polynomial

    Outputs:
        x_sol (float): minimizer of interpolating polynomial
        F_min (float): minimum of interpolating polynomial

    Note:
      . Set f or g to np.nan if they are unknown

    """
    no_points = points.shape[0]
    order = np.sum(1 - np.isnan(points[:,1:3]).astype('int')) - 1

    x_min = np.min(points[:, 0])
    x_max = np.max(points[:, 0])

    # compute bounds of interpolation area
    if(x_min_bound is None):
        x_min_bound = x_min
    if(x_max_bound is None):
        x_max_bound = x_max

    # explicit formula for quadratic interpolation
    if no_points == 2 and order == 2 and plot is False:
        # Solution to quadratic interpolation is given by:
        # a = -(f1 - f2 - g1(x1 - x2))/(x1 - x2)^2
        # x_min = x1 - g1/(2a)
        # if x1 = 0, then is given by:
        # x_min = - (g1*x2^2)/(2(f2 - f1 - g1*x2))

        if(points[0, 0] == 0):
            x_sol = -points[0, 2]*points[1, 0]**2/(2*(points[1, 1] - points[0, 1] - points[0, 2]*points[1, 0]))
        else:
            a = -(points[0, 1] - points[1, 1] - points[0, 2]*(points[0, 0] - points[1, 0]))/(points[0, 0] - points[1, 0])**2
            x_sol = points[0, 0] - points[0, 2]/(2*a)

        x_sol = np.minimum(np.maximum(x_min_bound, x_sol), x_max_bound)

    # explicit formula for cubic interpolation
    elif no_points == 2 and order == 3 and plot is False:
        # Solution to cubic interpolation is given by:
        # d1 = g1 + g2 - 3((f1 - f2)/(x1 - x2))
        # d2 = sqrt(d1^2 - g1*g2)
        # x_min = x2 - (x2 - x1)*((g2 + d2 - d1)/(g2 - g1 + 2*d2))
        d1 = points[0, 2] + points[1, 2] - 3*((points[0, 1] - points[1, 1])/(points[0, 0] - points[1, 0]))
        d2 = np.sqrt(d1**2 - points[0, 2]*points[1, 2])
        if np.isreal(d2):
            x_sol = points[1, 0] - (points[1, 0] - points[0, 0])*((points[1, 2] + d2 - d1)/(points[1, 2] - points[0, 2] + 2*d2))
            x_sol = np.minimum(np.maximum(x_min_bound, x_sol), x_max_bound)
        else:
            x_sol = (x_max_bound + x_min_bound)/2

    # solve linear system
    else:
        # define linear constraints
        A = np.zeros((0, order+1))
        b = np.zeros((0, 1))

        # add linear constraints on function values
        for i in range(no_points):
            if not np.isnan(points[i, 1]):
                constraint = np.zeros((1, order+1))
                for j in range(order, -1, -1):
                    constraint[0, order - j] = points[i, 0]**j
                A = np.append(A, constraint, 0)
                b = np.append(b, points[i, 1])

        # add linear constraints on gradient values
        for i in range(no_points):
            if not np.isnan(points[i, 2]):
                constraint = np.zeros((1, order+1))
                for j in range(order):
                    constraint[0, j] = (order-j)*points[i,0]**(order-j-1)
                A = np.append(A, constraint, 0)
                b = np.append(b, points[i, 2])

        # check if system is solvable
        if(A.shape[0] != A.shape[1] or np.linalg.matrix_rank(A) != A.shape[0]):
            x_sol = (x_min_bound + x_max_bound)/2
            f_min = np.Inf
        else:
            # solve linear system for interpolating polynomial
            coeff = np.linalg.solve(A, b)

            # compute critical points
            dcoeff = np.zeros(order)
            for i in range(len(coeff) - 1):
                dcoeff[i] = coeff[i]*(order-i)

            crit_pts = np.array([x_min_bound, x_max_bound])
            crit_pts = np.append(crit_pts, points[:, 0])

            if not np.isinf(dcoeff).any():
                roots = np.roots(dcoeff)
                crit_pts = np.append(crit_pts, roots)

            # test critical points
            f_min = np.Inf
            x_sol = (x_min_bound + x_max_bound)/2 # defaults to bisection
            for crit_pt in crit_pts:
                if np.isreal(crit_pt) and crit_pt >= x_min_bound and crit_pt <= x_max_bound:
                    F_cp = np.polyval(coeff, crit_pt)
                    if np.isreal(F_cp) and F_cp < f_min:
                        x_sol = np.real(crit_pt)
                        f_min = np.real(F_cp)

            if(plot):
                plt.figure()
                x = np.arange(x_min_bound, x_max_bound, (x_max_bound - x_min_bound)/10000)
                f = np.polyval(coeff, x)
                plt.plot(x, f)
                plt.plot(x_sol, f_min, 'x')

    return x_sol