def _set_spinbox_limits(self, bottom_val, top_val): # turn off signals on the spin boxes reset_state = [(sb, sb.blockSignals(True)) for sb in (self._spin_max, self._spin_min)] try: # set the top and bottom limits on the spinboxs to be in bounds self._spin_max.setMinimum(bottom_val) self._spin_min.setMinimum(bottom_val) self._spin_max.setMaximum(top_val) self._spin_min.setMaximum(top_val) # don't let the step be bigger than the total allowed range self._spin_step.setMaximum(top_val - bottom_val) if not np.isinf(bottom_val) or not np.isinf(top_val): # set the current values self._spin_min.setValue(bottom_val) self._spin_max.setValue(top_val) # this will trigger via the call-back updating everything else self._spin_step.setValue( (top_val - bottom_val) / 100) finally: # un-wrap the signal blocking [sb.blockSignals(state) for sb, state in reset_state]
def transform(self, data): assert np.isfinite(data).all() ntest = len(data) data = data.copy() data.shape = ntest, -1 assert np.isfinite(data).all() print ">>> Computing traintest linear kernel" start = time.time() kernel_traintest = np.dot(data, self._train_data.T) assert not np.isnan(kernel_traintest).any() assert not np.isinf(kernel_traintest).any() kernel_traintest /= self._ktrace assert not np.isnan(kernel_traintest).any() assert not np.isinf(kernel_traintest).any() end = time.time() print "Time: %s" % (end-start) return self._clf.decision_function(kernel_traintest).ravel()
def calcForces_and_potentialE(F_x, F_y, old_or_new, x_positions, y_positions, V_atoms): """calculates x and y forces and potential energy per atom as summed over all contributions due to all neighbors, as functions of position and the parameters of the LJ potential""" for atom in xrange(Natoms): for i in xrange(Natoms): if i != atom: delx = x_positions[atom,old_or_new]-x_positions[i,old_or_new] dely = y_positions[atom,old_or_new]-y_positions[i,old_or_new] r_ij = np.sqrt( (x_positions[atom,old_or_new]-x_positions[i,old_or_new])**2\ + (y_positions[atom,old_or_new]-y_positions[i,old_or_new])**2 ) F_x[atom,old_or_new] = F_x[atom,old_or_new] - 24.0 *epsilon * sigma**6 \ * delx * ( 1 - 2.0*(sigma/r_ij)**6 ) / r_ij**8 F_y[atom,old_or_new] = F_y[atom,old_or_new] - 24.0 *epsilon * sigma**6 * \ dely * ( 1 - 2.0*(sigma/r_ij)**6 ) / r_ij**8 V_atoms[atom] = V_atoms[atom] + 4.0 * epsilon \ * ( (sigma/r_ij)**12-(sigma/r_ij)**6 ) if np.isnan(F_x[atom,old_or_new]) or np.isinf(F_x[atom,old_or_new]): F_x[atom,old_or_new]=0 if np.isnan(F_y[atom,old_or_new]) or np.isinf(F_y[atom,old_or_new]): F_y[atom,0]=0 if np.isnan(V_atoms[atom]) or np.isinf(V_atoms[atom]): V_atoms[atom]=0 return F_x, F_y, V_atoms
def __init__(self, pt1, pt2, imageSize=None): if pt1[0] <= pt2[0]: # ensure pt1 is to the left of pt2 for easier computations later on self.pt1 = pt1 self.pt2 = pt2 else: self.pt1 = pt2 self.pt2 = pt1 self.delta = np.subtract(self.pt2, self.pt1) self.length = sqrt(self.delta[0]**2 + self.delta[1]**2) self.m = float(self.delta[1]) / float(self.delta[0]) if self.delta[0] != 0.0 else (np.inf if self.delta[1] >=0 else -np.inf) self.c = self.pt1[1] - self.m * self.pt1[0] #print "delta = {0}, m = {1}, c = {2}".format(self.delta, self.m, self.c) # Check for validity/stability if np.isinf(self.m) or np.isinf(self.c): self.angle = 0.0 self.valid = False return # Compute angle in degrees self.angle = degrees(atan2(self.delta[1], self.delta[0])) self.valid = True # Compute points on left and right edges, if an imageSize is given if imageSize is None: self.ptLeft = self.pt1 self.ptRight = self.pt2 else: self.ptLeft = (0, int(self.c)) self.ptRight = (imageSize[0] - 1, int(self.m * (imageSize[0] - 1) + self.c))
def _get_sum(self): """Compute sum of non NaN / Inf values in the array.""" try: return self._sum except AttributeError: self._sum = self.no_nan.sum() # The following 2 lines are needede as in Python 3.3 with NumPy # 1.7.1, numpy.ndarray and numpy.memmap aren't hashable. if type(self._sum) is numpy.memmap: self._sum = numpy.asarray(self._sum).item() if self.has_nan and self.no_nan.mask.all(): # In this case the sum is not properly computed by numpy. self._sum = 0 if numpy.isinf(self._sum) or numpy.isnan(self._sum): # NaN may happen when there are both -inf and +inf values. if self.has_nan: # Filter both NaN and Inf values. mask = self.no_nan.mask + numpy.isinf(self[1]) else: # Filter only Inf values. mask = numpy.isinf(self[1]) if mask.all(): self._sum = 0 else: self._sum = numpy.ma.masked_array(self[1], mask).sum() # At this point there should be no more NaN. assert not numpy.isnan(self._sum) return self._sum
def init_bounds(v): """ Returns a bounds object of the appropriate type given the arguments. This is a helper factory to simplify the user interface to parameter objects. """ # if it is none, then it is unbounded if v == None: return Unbounded() # if it isn't a tuple, assume it is a bounds type. try: lo,hi = v except TypeError: return v # if it is a tuple, then determine what kind of bounds we have if lo == None: lo = -inf if hi == None: hi = inf # TODO: consider issuing a warning instead of correcting reversed bounds if lo >= hi: lo, hi = hi, lo if isinf(lo) and isinf(hi): return Unbounded() elif isinf(lo): return BoundedAbove(hi) elif isinf(hi): return BoundedBelow(lo) else: return Bounded(lo,hi)
def test_nan_inf(self): # Not-a-number q = u.Quantity('nan', unit='cm') assert np.isnan(q.value) q = u.Quantity('NaN', unit='cm') assert np.isnan(q.value) q = u.Quantity('-nan', unit='cm') # float() allows this assert np.isnan(q.value) q = u.Quantity('nan cm') assert np.isnan(q.value) assert q.unit == u.cm # Infinity q = u.Quantity('inf', unit='cm') assert np.isinf(q.value) q = u.Quantity('-inf', unit='cm') assert np.isinf(q.value) q = u.Quantity('inf cm') assert np.isinf(q.value) assert q.unit == u.cm q = u.Quantity('Infinity', unit='cm') # float() allows this assert np.isinf(q.value) # make sure these strings don't parse... with pytest.raises(TypeError): q = u.Quantity('', unit='cm') with pytest.raises(TypeError): q = u.Quantity('spam', unit='cm')
def _crop_out_special_values(self, ws): if ws.getNumberHistograms() != 1: # Strip zeros is only possible on 1D workspaces return y_vals = ws.readY(0) length = len(y_vals) # Find the first non-zero value start = 0 for i in range(0, length): if not np.isnan(y_vals[i]) and not np.isinf(y_vals[i]): start = i break # Now find the last non-zero value stop = 0 length -= 1 for j in range(length, 0, -1): if not np.isnan(y_vals[j]) and not np.isinf(y_vals[j]): stop = j break # Find the appropriate X values and call CropWorkspace x_vals = ws.readX(0) start_x = x_vals[start] # Make sure we're inside the bin that we want to crop end_x = x_vals[stop + 1] return self._crop_to_x_range(ws=ws,x_min=start_x, x_max=end_x)
def get_region_boxes(sp, reg2sp): x = np.arange(0, sp.shape[1]) y = np.arange(0, sp.shape[0]) xv, yv = np.meshgrid(x, y) maxsp = np.max(sp) sp1=sp.reshape(-1)-1 xv = xv.reshape(-1) yv = yv.reshape(-1) spxmin = accum.my_accumarray(sp1,xv, maxsp, 'min') spymin = accum.my_accumarray(sp1,yv, maxsp, 'min') spxmax = accum.my_accumarray(sp1,xv, maxsp, 'max') spymax = accum.my_accumarray(sp1,yv, maxsp, 'max') Z = reg2sp.astype(float, copy=True) Z[reg2sp==0] = np.inf xmin = np.nanmin(np.multiply(spxmin.reshape(-1,1), Z),0) ymin = np.nanmin(np.multiply(spymin.reshape(-1,1), Z),0) xmax = np.amax(np.multiply(spxmax.reshape(-1,1), reg2sp),0) ymax = np.amax(np.multiply(spymax.reshape(-1,1), reg2sp), 0) xmin[np.isinf(xmin)]=0 ymin[np.isinf(ymin)]=0 boxes = np.hstack((xmin.reshape(-1,1), ymin.reshape(-1,1), xmax.reshape(-1,1), ymax.reshape(-1,1))) return boxes
def contains_inf(arr): """ Test whether a numpy.ndarray contains any `np.inf` values. Parameters ---------- arr : np.ndarray Returns ------- contains_inf : bool `True` if the array contains any `np.inf` values, `False` otherwise. Notes ----- Tests for the presence of `np.inf`'s by determining whether the values returned by `np.nanmin(arr)` and `np.nanmax(arr)` are finite. This approach is more memory efficient than the obvious alternative, calling `np.any(np.isinf(ndarray))`, which requires the construction of a boolean array with the same shape as the input array. """ if isinstance(arr, theano.gof.type.CDataType._cdata_type): return False elif isinstance(arr, np.random.mtrand.RandomState): return False return np.isinf(np.nanmax(arr)) or np.isinf(np.nanmin(arr))
def common_limits(datasets, default_min=0, default_max=0): """Find the global maxima and minima of a list of datasets. Parameters ---------- datasets : `iterable` list (or any other iterable) of data arrays to analyse. default_min : `float`, optional fall-back minimum value if datasets are all empty. default_max : `float`, optional fall-back maximum value if datasets are all empty. Returns ------- (min, max) : `float` 2-tuple of common minimum and maximum over all datasets. """ from glue import iterutils if isinstance(datasets, numpy.ndarray) or not iterable(datasets[0]): datasets = [datasets] max_stat = max(list(iterutils.flatten(datasets)) + [-numpy.inf]) min_stat = min(list(iterutils.flatten(datasets)) + [numpy.inf]) if numpy.isinf(-max_stat): max_stat = default_max if numpy.isinf(min_stat): min_stat = default_min return min_stat, max_stat
def _check_for_infinities(self, tif): try: if np.any(np.isinf(tif)): tif[np.isinf(tif)] = 0 g.alert('Some array values were inf. Setting those values to 0') except MemoryError: pass
def circumcircle(P1,P2,P3): ''' Adapted from: http://local.wasp.uwa.edu.au/~pbourke/geometry/circlefrom3/Circle.cpp ''' delta_a = P2 - P1 delta_b = P3 - P2 if np.abs(delta_a[0]) <= 0.000000001 and np.abs(delta_b[1]) <= 0.000000001: center_x = 0.5*(P2[0] + P3[0]) center_y = 0.5*(P1[1] + P2[1]) else: aSlope = delta_a[1]/delta_a[0] bSlope = delta_b[1]/delta_b[0] if aSlope == 0.0: aSlope = 1E-6 if bSlope == 0.0: bSlope = 1E-6 if np.isinf(aSlope): aSlope = 1E6 if np.isinf(bSlope): bSlope = 1E6 if np.abs(aSlope-bSlope) <= 0.000000001: return None center_x= (aSlope*bSlope*(P1[1] - P3[1]) + bSlope*(P1[0] + P2 [0]) \ - aSlope*(P2[0]+P3[0]) )/(2* (bSlope-aSlope) ) center_y = -1*(center_x - (P1[0]+P2[0])/2)/aSlope + (P1[1]+P2[1])/2; return center_x, center_y
def weighted_mean(_line): max_weight = 50 # print _line.shape median_2d = bottleneck.nanmedian(_line, axis=1).reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) std = bottleneck.nanstd(_line, axis=1) std_2d = std.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) weight_2d = numpy.fabs(std_2d / (_line - median_2d)) # weight_2d[weight_2d > max_weight] = max_weight weight_2d[numpy.isinf(weight_2d)] = max_weight for i in range(3): avg = bottleneck.nansum(_line*weight_2d, axis=1)/bottleneck.nansum(weight_2d, axis=1) avg_2d = avg.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) std = numpy.sqrt(bottleneck.nansum(((_line - avg_2d)**2 * weight_2d), axis=1)/bottleneck.nansum(weight_2d, axis=1)) std_2d = std.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) weight_2d = numpy.fabs(std_2d / (_line - avg_2d)) #weight_2d[weight_2d > max_weight] = max_weight weight_2d[numpy.isinf(weight_2d)] = max_weight return bottleneck.nansum(_line*weight_2d, axis=1)/bottleneck.nansum(weight_2d, axis=1)
def __compare( verify_obj, obj, nsig, ndec ): if isinstance(verify_obj,tuple): if len(verify_obj) == len(obj): return all([ __compare( vo, o, nsig, ndec, title='#%d' % i ) for i, (vo,o) in enumerate( zip( verify_obj, obj ) ) ]) log.error( 'non matching lenghts: %d != %d' % ( len(verify_obj), len(obj) ) ) elif not isinstance(verify_obj,float): if verify_obj == obj: return True log.error( 'non equal: %s != %d' % ( obj, verify_obj ) ) elif numpy.isnan(verify_obj): if numpy.isnan(obj): return True log.error( 'expected nan: %s' % obj ) elif numpy.isinf(verify_obj): if numpy.isinf(obj): return True log.error( 'expected inf: %s' % obj ) else: if verify_obj: n = numeric.floor( numpy.log10( abs(verify_obj) ) ) N = max( n-(nsig-1), -ndec ) else: N = -ndec maxerr = .5 * 10.**N if abs(verify_obj-obj) <= maxerr: return True log.error( 'non equal to %s digits: %e != %e' % ( nsig, obj, verify_obj ) ) return False
def test_nans_infs(self): oldsettings = np.seterr(all='ignore') try: # Check some of the ufuncs assert_equal(np.isnan(self.all_f16), np.isnan(self.all_f32)) assert_equal(np.isinf(self.all_f16), np.isinf(self.all_f32)) assert_equal(np.isfinite(self.all_f16), np.isfinite(self.all_f32)) assert_equal(np.signbit(self.all_f16), np.signbit(self.all_f32)) assert_equal(np.spacing(float16(65504)), np.inf) # Check comparisons of all values with NaN nan = float16(np.nan) assert_(not (self.all_f16 == nan).any()) assert_(not (nan == self.all_f16).any()) assert_((self.all_f16 != nan).all()) assert_((nan != self.all_f16).all()) assert_(not (self.all_f16 < nan).any()) assert_(not (nan < self.all_f16).any()) assert_(not (self.all_f16 <= nan).any()) assert_(not (nan <= self.all_f16).any()) assert_(not (self.all_f16 > nan).any()) assert_(not (nan > self.all_f16).any()) assert_(not (self.all_f16 >= nan).any()) assert_(not (nan >= self.all_f16).any()) finally: np.seterr(**oldsettings)
def float_test(self, dtype, significant=None): colname = 'col_%s' % dtype.__name__ self.writeread(dtype) before, after = self.table_orig.data[colname], self.table_new.data[colname] self.assertEqual(before.shape, after.shape) self.assertEqual(before.dtype.type, after.dtype.type) if before.ndim == 1: for i in range(before.shape[0]): if(np.isnan(before[i])): self.failUnless(np.isnan(after[i])) elif(np.isinf(before[i])): self.failUnless(np.isinf(after[i])) else: if significant: self.assertAlmostEqualSig(before[i], after[i], significant=significant) else: self.assertEqual(before[i], after[i]) else: for i in range(before.shape[0]): for j in range(before.shape[1]): if(np.isnan(before[i, j])): self.failUnless(np.isnan(after[i, j])) elif(np.isinf(before[i, j])): self.failUnless(np.isinf(after[i, j])) else: if significant: self.assertAlmostEqualSig(before[i, j], after[i, j], significant=significant) else: self.assertEqual(before[i, j], after[i, j])
def likelihood_check(obs_distns,trans_matrix,init_distn,data,target_val): for cls in [m.HMMPython, m.HMM]: hmm = cls(alpha=6.,init_state_concentration=1, # placeholders obs_distns=obs_distns) hmm.trans_distn.trans_matrix = trans_matrix hmm.init_state_distn.weights = init_distn hmm.add_data(data) # test default log_likelihood method assert np.isclose(target_val, hmm.log_likelihood()) # manual tests of the several message passing methods states = hmm.states_list[-1] states.clear_caches() states.messages_forwards_normalized() assert np.isclose(target_val,states._normalizer) states.clear_caches() states.messages_forwards_log() assert np.isinf(target_val) or np.isclose(target_val,states._normalizer) states.clear_caches() states.messages_backwards_log() assert np.isinf(target_val) or np.isclose(target_val,states._normalizer) # test held-out vs in-model assert np.isclose(target_val, hmm.log_likelihood(data))
def clean_invalid(x,y,min_x=-numpy.inf,min_y=-numpy.inf,max_x=numpy.inf,max_y=numpy.inf): """Remove corresponding values from x and y when one or both of those is `nan` or `inf`, and optionally truncate values to minima and maxima Parameters ---------- x, y : :class:`numpy.ndarray` or list Pair arrays or lists of corresponding numbers min_x, min_y, max_x, max_y : number, optional If supplied, set values below `min_x` to `min_x`, values larger than `max_x` to `max_x` and so for `min_y` and `max_y` Returns ------- :class:`numpy.ndarray` A shortened version of `x`, excluding invalid values :class:`numpy.ndarray` A shortened version of `y`, excluding invalid values """ x = numpy.array(x).astype(float) y = numpy.array(y).astype(float) x[x < min_x] = min_x x[x > max_x] = max_x y[y < min_y] = min_y y[y > max_y] = max_y newmask = numpy.isinf(x) | numpy.isnan(x) | numpy.isinf(y) | numpy.isnan(y) x = x[~newmask] y = y[~newmask] return x,y
def __call__(self, value, clip=None): if clip is None: clip = self.clip if cbook.iterable(value): vtype = 'array' val = np.ma.asarray(value).astype(np.float) else: vtype = 'scalar' val = np.ma.array([value]).astype(np.float) val = np.ma.masked_where(np.isinf(val.data),val) self.autoscale_None(val) vmin, vmax = float(self.vmin), float(self.vmax) if vmin > vmax: raise ValueError("minvalue must be less than or equal to maxvalue") elif vmin<=0: raise ValueError("values must all be positive") elif vmin==vmax: return type(value)(0.0 * np.asarray(value)) else: if clip: mask = np.ma.getmask(val) val = np.ma.array(np.clip(val.filled(vmax), vmin, vmax), mask=mask) result = (np.ma.log(val)-np.log(vmin))/(np.log(vmax)-np.log(vmin)) result.data[result.data<0]=0.0 result.data[result.data>1]=1.0 result[np.isinf(val.data)] = -np.inf if result.mask is not np.ma.nomask: result.mask[np.isinf(val.data)] = False if vtype == 'scalar': result = result[0] return result
def equal(a, b, exact): if array_equal(a, b): return True if hasattr(a, 'dtype') and a.dtype in ['f4','f8']: nnans = isnan(a).sum() if nnans > 0: # For results containing NaNs, just check that the number # of NaNs is the same in both arrays. This check could be # made more exhaustive, but checking element by element in # python space is very expensive in general. return nnans == isnan(b).sum() ninfs = isinf(a).sum() if ninfs > 0: # Ditto for Inf's return ninfs == isinf(b).sum() if exact: return (shape(a) == shape(b)) and alltrue(ravel(a) == ravel(b), axis=0) else: if hasattr(a, 'dtype') and a.dtype == 'f4': atol = 1e-5 # Relax precission for special opcodes, like fmod else: atol = 1e-8 return (shape(a) == shape(b) and allclose(ravel(a), ravel(b), atol=atol))
def sample_representer_points(self): # Sample representer points only in the # configuration space by setting all environmental # variables to 1 D = np.where(self.is_env == 0)[0].shape[0] lower = self.lower[np.where(self.is_env == 0)] upper = self.upper[np.where(self.is_env == 0)] self.sampling_acquisition.update(self.model) for i in range(5): restarts = np.random.uniform(low=lower, high=upper, size=(self.Nb, D)) sampler = emcee.EnsembleSampler(self.Nb, D, self.sampling_acquisition_wrapper) self.zb, self.lmb, _ = sampler.run_mcmc(restarts, 50) if not np.any(np.isinf(self.lmb)): break else: print("Infinity") if np.any(np.isinf(self.lmb)): raise ValueError("Could not sample valid representer points! LogEI is -infinity") if len(self.zb.shape) == 1: self.zb = self.zb[:, None] if len(self.lmb.shape) == 1: self.lmb = self.lmb[:, None] # Project representer points to subspace proj = np.ones([self.zb.shape[0], self.upper[self.is_env == 1].shape[0]]) proj *= self.upper[self.is_env == 1].shape[0] self.zb = np.concatenate((self.zb, proj), axis=1)
def checkPattern( self, Ntries=100, debug=False ): """ checks if sparse patterns cover all nonzero entries in user defined gradients by evaluating objg() and consg() at random points drawn from a Gaussian distribution. Arguments: Ntries: number of random tries. (default: 100). debug: boolean to enable extra debug information. (default: False). Returns: isCorrect: boolean, True if pattern covers all nonzero entries in the gradients. """ if( self.objg is None or self.objgpattern is None ): raise StandardError( "objective gradient and pattern must be set before check" ) if( self.Ncons > 0 and ( self.consg is None or self.consgpattern is None ) ): raise StandardError( "constraint gradient and pattern must be set before check" ) if( self.Ncons > 0 ): pattern = np.vstack( (self.objgpattern, self.consgpattern ) ) else: pattern = self.objgpattern if( self.ub is not None ): ub = self.ub ub[ np.isinf( ub ) ] = 1 else: ub = np.ones( (self.N,) ) if( self.lb is not None ): lb = self.lb lb[ np.isinf( lb ) ] = -1 else: lb = -np.ones( (self.N,) ) for k in range( Ntries ): usrgrad = np.zeros( (self.Ncons + 1, self.N) ) point = np.random.rand( self.N ) * ( ub - lb ) + lb self.objg( usrgrad[0,:], point ) if( self.Ncons > 0 ): self.consg( usrgrad[1:,:], point ) usrgrad[ np.nonzero( pattern ) ] = 0 if( np.any( usrgrad ) ): if( debug ): idx = np.unravel_index( np.argmax( np.abs(usrgrad) ), usrgrad.shape ) if( idx[0] == 0 ): print( ">>> Pattern check failed. Found wrong nonzero value in " + "objg() at element {0}".format( idx[1] ) ) else: print( ">>> Pattern check failed. Found wrong nonzero value in " + "consg() at element ({0},{1})".format( idx[0]-1, idx[1] ) ) return False if( debug ): print( ">>> Pattern check passed" ) return True
def knn(x_train, y_train, x_valid): x_train=np.log(x_train+1) x_valid=np.log(x_valid+1) where_are_nan = np.isnan(x_train) where_are_inf = np.isinf(x_train) x_train[where_are_nan] = 0 x_train[where_are_inf] = 0 where_are_nan = np.isnan(x_valid) where_are_inf = np.isinf(x_valid) x_valid[where_are_nan] = 0 x_valid[where_are_inf] = 0 scale=StandardScaler() scale.fit(x_train) x_train=scale.transform(x_train) x_valid=scale.transform(x_valid) #pca = PCA(n_components=10) #pca.fit(x_train) #x_train = pca.transform(x_train) #x_valid = pca.transform(x_valid) kneighbors=KNeighborsClassifier(n_neighbors=200,n_jobs=-1) knn_train, knn_test = stacking(kneighbors, x_train, y_train, x_valid, "knn") return knn_train, knn_test, "knn"
def lscsum0(lx): """ Accepts log-values as input, exponentiates them, sums down the rows (first dimension), then converts the sum back to log-space and returns the result. Handles underflow by rescaling so that the largest values is exactly 1.0. """ # rows = lx.shape[0] # columns = numpy.prod(lx.shape[1:]) # lx = lx.reshape(rows, columns) # bases = lx.max(1).reshape(rows, 1) # bases = lx.max(0).reshape((1,) + lx.shape[1:]) lx = numpy.asarray(lx) bases = lx.max(0) # Don't need to reshape in the case of 0. x = numpy.exp(lx - bases) ssum = x.sum(0) result = numpy.log(ssum) + bases try: conventional = numpy.log(numpy.exp(lx).sum(0)) if not similar(result, conventional): if numpy.isinf(conventional).any() and not numpy.isinf(result).any(): # print "Scaled log sum down axis 0 avoided underflow or overflow." pass else: import sys print >>sys.stderr, "Warning: scaled log sum down axis 0 did not match." print >>sys.stderr, "Scaled log result:" print >>sys.stderr, result print >>sys.stderr, "Conventional result:" print >>sys.stderr, conventional except FloatingPointError, e: # print "Scaled log sum down axis 0 avoided underflow or overflow." pass
def find_reasonable_epsilon(theta0, grad0, logp0, f): """ Heuristic for choosing an initial value of epsilon """ epsilon = 1. r0 = np.random.normal(0., 1., len(theta0)) # Figure out what direction we should be moving epsilon. _, rprime, gradprime, logpprime = leapfrog(theta0, r0, grad0, epsilon, f) # brutal! This trick make sure the step is not huge leading to infinite # values of the likelihood. This could also help to make sure theta stays # within the prior domain (if any) k = 1. while np.isinf(logpprime) or np.isinf(gradprime).any(): k *= 0.5 _, rprime, _, logpprime = leapfrog(theta0, r0, grad0, epsilon * k, f) epsilon = 0.5 * k * epsilon # acceptprob = np.exp(logpprime - logp0 - 0.5 * (np.dot(rprime, rprime.T) - np.dot(r0, r0.T))) # a = 2. * float((acceptprob > 0.5)) - 1. logacceptprob = logpprime-logp0-0.5*(np.dot(rprime, rprime)-np.dot(r0,r0)) a = 1. if logacceptprob > np.log(0.5) else -1. # Keep moving epsilon in that direction until acceptprob crosses 0.5. # while ( (acceptprob ** a) > (2. ** (-a))): while a * logacceptprob > -a * np.log(2): epsilon = epsilon * (2. ** a) _, rprime, _, logpprime = leapfrog(theta0, r0, grad0, epsilon, f) # acceptprob = np.exp(logpprime - logp0 - 0.5 * ( np.dot(rprime, rprime.T) - np.dot(r0, r0.T))) logacceptprob = logpprime-logp0-0.5*(np.dot(rprime, rprime)-np.dot(r0,r0)) print("find_reasonable_epsilon=", epsilon) return epsilon
def contains_inf(arr, node=None, var=None): """ Test whether a numpy.ndarray contains any `np.inf` values. Parameters ---------- arr : np.ndarray or output of any Theano op node : None or an Apply instance. If the output of a Theano op, the node associated to it. var : The Theano symbolic variable. Returns ------- contains_inf : bool `True` if the array contains any `np.inf` values, `False` otherwise. Notes ----- Tests for the presence of `np.inf`'s by determining whether the values returned by `np.nanmin(arr)` and `np.nanmax(arr)` are finite. This approach is more memory efficient than the obvious alternative, calling `np.any(np.isinf(ndarray))`, which requires the construction of a boolean array with the same shape as the input array. """ if not _is_numeric_value(arr, var): return False elif getattr(arr, 'dtype', '') in T.discrete_dtypes: return False elif pygpu_available and isinstance(arr, GpuArray): return (np.isinf(f_gpua_min(arr.reshape(arr.size))) or np.isinf(f_gpua_max(arr.reshape(arr.size)))) return np.isinf(np.nanmax(arr)) or np.isinf(np.nanmin(arr))
def test_rescale_by_zero(self): # post.y should contain some nans and infs self.test_curve.rescale(factor=0) self.assertTrue(np.array_equal(self.compare_curve.x, self.test_curve.x)) self.assertTrue(np.isnan(self.test_curve.y[0])) self.assertTrue(np.isinf(self.test_curve.y[1])) self.assertTrue(np.isinf(self.test_curve.y[2]))
def calculate_bounds_of_probability_distribution( probability_distribution, distribution_integral_limit=DISTRIBUTION_INTEGRAL_LIMIT ): a, b = probability_distribution.interval(1) if isinf(a) or isinf(b): a, b = probability_distribution.interval(distribution_integral_limit) return a, b
def initwb_lin(layer): """ Initialize weights and bias linspace betweene active space, not random values This function need for tests :Parameters: layer: core.Layer object Initialization layer """ active = layer.transf.inp_active[:] if np.isinf(active[0]): active[0] = -100.0 if np.isinf(active[1]): active[1] = 100.0 min = active[0] / (2 * layer.cn) max = active[1] / (2 * layer.cn) for k in layer.np: inits = np.linspace(min, max, layer.np[k].size) inits.shape = layer.np[k].shape layer.np[k] = inits
def train(args, model_args): #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_' model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_' model_dir = create_log_dir(args, model_id) model_id2 = 'logs/walkback_' model_dir2 = create_log_dir(args, model_id2) print model_dir print model_dir2 + '/' + 'log.jsonl.gz' logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz', formatter=None) # TODO batches_per_epoch should not be hard coded lrate = args.lr import sys sys.setrecursionlimit(10000000) args, model_args = parse_args() #trng = RandomStreams(1234) if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': print 'loading MNIST' from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features',)) dataset_test = MNIST(['test'], sources=('features',)) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features',)) dataset_test = CIFAR10(['test'], sources=('features',)) n_colors = 3 spatial_width = 32 elif args.dataset == "lsun" or args.dataset == "lsunsmall": print "loading lsun class!" from load_lsun import load_lsun print "loading lsun data!" if args.dataset == "lsunsmall": dataset_train, dataset_test = load_lsun(args.batch_size, downsample=True) spatial_width=32 else: dataset_train, dataset_test = load_lsun(args.batch_size, downsample=False) spatial_width=64 n_colors = 3 elif args.dataset == "celeba": print "loading celeba data" from fuel.datasets.celeba import CelebA dataset_train = CelebA(which_sets = ['train'], which_format="64", sources=('features',), load_in_memory=False) dataset_test = CelebA(which_sets = ['test'], which_format="64", sources=('features',), load_in_memory=False) spatial_width = 64 n_colors = 3 tr_scheme = SequentialScheme(examples=dataset_train.num_examples, batch_size=args.batch_size) ts_scheme = SequentialScheme(examples=dataset_test.num_examples, batch_size=args.batch_size) train_stream = DataStream.default_stream(dataset_train, iteration_scheme = tr_scheme) test_stream = DataStream.default_stream(dataset_test, iteration_scheme = ts_scheme) dataset_train = train_stream dataset_test = test_stream #epoch_it = train_stream.get_epoch_iterator() elif args.dataset == 'Spiral': print 'loading SPIRAL' train_set = Spiral(num_examples=100000, classes=1, cycles=2., noise=0.01, sources=('features',)) dataset_train = DataStream.default_stream(train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, args.batch_size)) else: raise ValueError("Unknown dataset %s."%args.dataset) model_options = locals().copy() if args.dataset != 'lsun' and args.dataset != 'celeba': train_stream = Flatten(DataStream.default_stream(dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples - (dataset_train.num_examples%args.batch_size), batch_size=args.batch_size))) else: train_stream = dataset_train test_stream = dataset_test print "Width", WIDTH, spatial_width shp = next(train_stream.get_epoch_iterator())[0].shape print "got epoch iterator" Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1./np.sqrt(np.mean((Xbatch-np.mean(Xbatch))**2)) shft = -np.mean(Xbatch*scl) print 'Building model' params = init_params(model_options) if args.reload_: print "Trying to reload parameters" if os.path.exists(args.saveto_filename): print 'Reloading Parameters' print args.saveto_filename params = load_params(args.saveto_filename, params) tparams = init_tparams(params) print tparams x, cost, start_temperature, step_chain = build_model(tparams, model_options) inps = [x.astype('float32'), start_temperature, step_chain] x_Data = T.matrix('x_Data', dtype='float32') temperature = T.scalar('temperature', dtype='float32') step_chain_part = T.scalar('step_chain_part', dtype='int32') forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature, step_chain_part) print tparams grads = T.grad(cost, wrt=itemlist(tparams)) #get_grads = theano.function(inps, grads) for j in range(0, len(grads)): grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j]) # compile the optimizer, the actual computational graph is compiled here lr = T.scalar(name='lr') print 'Building optimizers...', optimizer = args.optimizer f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost) print 'Done' #for param in tparams: # print param # print tparams[param].get_value().shape print 'Buiding Sampler....' f_sample = sample(tparams, model_options) print 'Done' uidx = 0 estop = False bad_counter = 0 max_epochs = 4000 batch_index = 1 print 'Number of steps....' print args.num_steps print "Number of metasteps...." print args.meta_steps print 'Done' count_sample = 1 for eidx in xrange(max_epochs): n_samples = 0 print 'Starting Next Epoch ', eidx for data in train_stream.get_epoch_iterator(): if args.dataset == 'CIFAR10': if data[0].shape[0] == args.batch_size: data_use = (data[0].reshape(args.batch_size,3*32*32),) else: continue t0 = time.time() batch_index += 1 n_samples += len(data_use[0]) uidx += 1 if data_use[0] is None: print 'No data ' uidx -= 1 continue ud_start = time.time() t1 = time.time() data_run = data_use[0] temperature_forward = args.temperature meta_cost = [] for meta_step in range(0, args.meta_steps): data_run = data_run.astype('float32') meta_cost.append(f_grad_shared(data_run, temperature_forward, meta_step)) f_update(lrate) if args.meta_steps > 1: data_run, sigma, _, _ = forward_diffusion(data_run, temperature_forward, meta_step) temperature_forward *= args.temperature_factor cost = sum(meta_cost) / len(meta_cost) ud = time.time() - ud_start #gradient_updates_ = get_grads(data_use[0],args.temperature) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1. logger.log({'epoch': eidx, 'batch_index': batch_index, 'uidx': uidx, 'training_error': cost}) if batch_index%20==0: print batch_index, "cost", cost if batch_index%1000==0: print 'saving params' params = unzip(tparams) save_params(params, model_dir + '/' + 'params_' + str(batch_index) + '.npz') if batch_index%200==0: count_sample += 1 ''' temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps -1 )) temperature_forward = args.temperature for num_step in range(args.num_steps * args.meta_steps): print "Forward temperature", temperature_forward if num_step == 0: x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(data[0].astype('float32'), temperature_forward, num_step) x_data = np.asarray(x_data).astype('float32').reshape(args.batch_size, INPUT_SIZE) x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images(x_temp, model_dir + '/' + "batch_" + str(batch_index) + '_corrupted' + 'epoch_' + str(count_sample) + '_time_step_' + str(num_step)) else: x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(x_data.astype('float32'), temperature_forward, num_step) x_data = np.asarray(x_data).astype('float32').reshape(args.batch_size, INPUT_SIZE) x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images(x_temp, model_dir + '/batch_' + str(batch_index) + '_corrupted' + '_epoch_' + str(count_sample) + '_time_step_' + str(num_step)) temperature_forward = temperature_forward * args.temperature_factor; x_temp2 = data_use[0].reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images(x_temp2, model_dir + '/' + 'orig_' + 'epoch_' + str(eidx) + '_batch_index_' + str(batch_index)) temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps - 1 )) for i in range(args.num_steps*args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample(x_data.astype('float32'), temperature, args.num_steps*args.meta_steps -i - 1) print 'On backward step number, using temperature', i, temperature reverse_time(scl, shft, x_data, model_dir + '/'+ "batch_" + str(batch_index) + '_samples_backward_' + 'epoch_' + str(count_sample) + '_time_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor ''' if args.noise == "gaussian": x_sampled = np.random.normal(0.5, 2.0, size=(args.batch_size,INPUT_SIZE)).clip(0.0, 1.0) else: s = np.random.binomial(1, 0.5, INPUT_SIZE) temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps - 1)) x_data = np.asarray(x_sampled).astype('float32') for i in range(args.num_steps*args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample(x_data.astype('float32'), temperature, args.num_steps*args.meta_steps -i - 1) print 'On step number, using temperature', i, temperature reverse_time(scl, shft, x_data, model_dir + '/batch_index_' + str(batch_index) + '_inference_' + 'epoch_' + str(count_sample) + '_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor ipdb.set_trace()
def _compare_segregation(seg_class_1, seg_class_2, iterations_under_null=500, null_approach="random_label", **kwargs): ''' Perform inference comparison for a two segregation measures Parameters ---------- seg_class_1 : a PySAL segregation object to be compared to seg_class_2 seg_class_2 : a PySAL segregation object to be compared to seg_class_1 iterations_under_null : number of iterations under null hyphothesis null_approach: argument that specifies which type of null hypothesis the inference will iterate. "random_label" : random label the data in each iteration "counterfactual_composition" : randomizes the number of minority population according to both cumulative distribution function of a variable that represents the composition of the minority group. The composition is the division of the minority population of unit i divided by total population of tract i. "counterfactual_share" : randomizes the number of minority population and total population according to both cumulative distribution function of a variable that represents the share of the minority group. The share is the division of the minority population of unit i divided by total population of minority population. **kwargs : customizable parameters to pass to the segregation measures. Usually they need to be the same as both seg_class_1 and seg_class_2 was built. Attributes ---------- p_value : float Two-Tailed p-value est_sim : numpy array Estimates of the segregation measure differences under the null hypothesis est_point_diff : float Point estimation of the difference between the segregation measures Notes ----- This function performs inference to compare two segregation measures. This can be either two measures of the same locations in two different points in time or it can be two different locations at the same point in time. The null hypothesis is H0: Segregation_1 is not different than Segregation_2. Based on Rey, Sergio J., and Myrna L. Sastré-Gutiérrez. "Interregional inequality dynamics in Mexico." Spatial Economic Analysis 5.3 (2010): 277-298. ''' if not null_approach in [ 'random_label', 'counterfactual_composition', 'counterfactual_share' ]: raise ValueError( 'null_approach must one of \'random_label\', \'counterfactual_composition\', \'counterfactual_share\'' ) if (type(seg_class_1) != type(seg_class_2)): raise TypeError( 'seg_class_1 and seg_class_2 must be the same type/class.') point_estimation = seg_class_1.statistic - seg_class_2.statistic aux = str(type(seg_class_1)) _class_name = aux[1 + aux.rfind( '.'):-2] # 'rfind' finds the last occurence of a pattern in a string data_1 = seg_class_1.core_data data_2 = seg_class_2.core_data # This step is just to make sure the each frequecy column is integer for the approaches and from the same type in order to stack them for the random data approach data_1['group_pop_var'] = round(data_1['group_pop_var']).astype(int) data_1['total_pop_var'] = round(data_1['total_pop_var']).astype(int) data_2['group_pop_var'] = round(data_2['group_pop_var']).astype(int) data_2['total_pop_var'] = round(data_2['total_pop_var']).astype(int) est_sim = np.empty(iterations_under_null) ################ # RANDOM LABEL # ################ if (null_approach == "random_label"): data_1['grouping_variable'] = 'Group_1' data_2['grouping_variable'] = 'Group_2' stacked_data = pd.concat([data_1, data_2], ignore_index=True) for i in np.array(range(iterations_under_null)): aux_rand = list( np.random.choice(stacked_data.shape[0], stacked_data.shape[0], replace=False)) stacked_data['rand_group_pop'] = stacked_data.group_pop_var[ aux_rand].reset_index()['group_pop_var'] stacked_data['rand_total_pop'] = stacked_data.total_pop_var[ aux_rand].reset_index()['total_pop_var'] # Dropping variable to avoid confusion in the calculate_segregation function # Building auxiliar data to avoid affecting the next iteration stacked_data_aux = stacked_data.drop( ['group_pop_var', 'total_pop_var'], axis=1) stacked_data_1 = stacked_data_aux.loc[ stacked_data_aux['grouping_variable'] == 'Group_1'] stacked_data_2 = stacked_data_aux.loc[ stacked_data_aux['grouping_variable'] == 'Group_2'] simulations_1 = seg_class_1._function(stacked_data_1, 'rand_group_pop', 'rand_total_pop', **kwargs)[0] simulations_2 = seg_class_2._function(stacked_data_2, 'rand_group_pop', 'rand_total_pop', **kwargs)[0] est_sim[i] = simulations_1 - simulations_2 print('Processed {} iterations out of {}.'.format( i + 1, iterations_under_null), end="\r") ############################## # COUNTERFACTUAL COMPOSITION # ############################## if (null_approach == "counterfactual_composition"): data_1['rel'] = np.where( data_1['total_pop_var'] == 0, 0, data_1['group_pop_var'] / data_1['total_pop_var']) data_2['rel'] = np.where( data_2['total_pop_var'] == 0, 0, data_2['group_pop_var'] / data_2['total_pop_var']) # Both appends are to force both distribution to have values in all space between 0 and 1 x_1_pre = np.sort(data_1['rel']) y_1_pre = np.arange(0, len(x_1_pre)) / (len(x_1_pre)) x_2_pre = np.sort(data_2['rel']) y_2_pre = np.arange(0, len(x_2_pre)) / (len(x_2_pre)) x_1 = np.append(np.append(0, x_1_pre), 1) y_1 = np.append(np.append(0, y_1_pre), 1) x_2 = np.append(np.append(0, x_2_pre), 1) y_2 = np.append(np.append(0, y_2_pre), 1) def inverse_cdf_1(pct): return x_1[np.where(y_1 > pct)[0][0] - 1] def inverse_cdf_2(pct): return x_2[np.where(y_2 > pct)[0][0] - 1] # Adding the pseudo columns for FIRST spatial context data_1['cumulative_percentage'] = (data_1['rel'].rank() - 1) / len( data_1 ) # It has to be a minus 1 in the rank, in order to avoid 100% percentile in the max data_1['pseudo_rel'] = data_1['cumulative_percentage'].apply( inverse_cdf_2) data_1['pseudo_group_pop_var'] = round( data_1['pseudo_rel'] * data_1['total_pop_var']).astype(int) # Adding the pseudo columns for SECOND spatial context data_2['cumulative_percentage'] = (data_2['rel'].rank() - 1) / len( data_2 ) # It has to be a minus 1 in the rank, in order to avoid 100% percentile in the max data_2['pseudo_rel'] = data_2['cumulative_percentage'].apply( inverse_cdf_1) data_2['pseudo_group_pop_var'] = round( data_2['pseudo_rel'] * data_2['total_pop_var']).astype(int) for i in np.array(range(iterations_under_null)): data_1['fair_coin'] = np.random.uniform(size=len(data_1)) data_1['test_group_pop_var'] = np.where( data_1['fair_coin'] > 0.5, data_1['group_pop_var'], data_1['pseudo_group_pop_var']) # Dropping to avoid confusion in the internal function data_1_test = data_1.drop(['group_pop_var'], axis=1) simulations_1 = seg_class_1._function(data_1_test, 'test_group_pop_var', 'total_pop_var', **kwargs)[0] # Dropping to avoid confusion in the next iteration data_1 = data_1.drop(['fair_coin', 'test_group_pop_var'], axis=1) data_2['fair_coin'] = np.random.uniform(size=len(data_2)) data_2['test_group_pop_var'] = np.where( data_2['fair_coin'] > 0.5, data_2['group_pop_var'], data_2['pseudo_group_pop_var']) # Dropping to avoid confusion in the internal function data_2_test = data_2.drop(['group_pop_var'], axis=1) simulations_2 = seg_class_2._function(data_2_test, 'test_group_pop_var', 'total_pop_var', **kwargs)[0] # Dropping to avoid confusion in the next iteration data_2 = data_2.drop(['fair_coin', 'test_group_pop_var'], axis=1) est_sim[i] = simulations_1 - simulations_2 print('Processed {} iterations out of {}.'.format( i + 1, iterations_under_null), end="\r") ######################## # COUNTERFACTUAL SHARE # ######################## if (null_approach == "counterfactual_share"): data_1['compl_pop_var'] = data_1['total_pop_var'] - data_1[ 'group_pop_var'] data_2['compl_pop_var'] = data_2['total_pop_var'] - data_2[ 'group_pop_var'] # Build the share for each group individually data_1['share'] = np.where( data_1['total_pop_var'] == 0, 0, data_1['group_pop_var'] / data_1['group_pop_var'].sum()) data_2['share'] = np.where( data_2['total_pop_var'] == 0, 0, data_2['group_pop_var'] / data_2['group_pop_var'].sum()) data_1['compl_share'] = np.where( data_1['compl_pop_var'] == 0, 0, data_1['compl_pop_var'] / data_1['compl_pop_var'].sum()) data_2['compl_share'] = np.where( data_2['compl_pop_var'] == 0, 0, data_2['compl_pop_var'] / data_2['compl_pop_var'].sum()) # Both appends are to force both distribution to have values in all space between 0 and 1 x_1_pre = np.sort(data_1['share']) y_1_pre = np.arange(0, len(x_1_pre)) / (len(x_1_pre)) x_2_pre = np.sort(data_2['share']) y_2_pre = np.arange(0, len(x_2_pre)) / (len(x_2_pre)) x_1 = np.append(np.append(0, x_1_pre), 1) y_1 = np.append(np.append(0, y_1_pre), 1) x_2 = np.append(np.append(0, x_2_pre), 1) y_2 = np.append(np.append(0, y_2_pre), 1) def inverse_cdf_1(pct): return x_1[np.where(y_1 > pct)[0][0] - 1] def inverse_cdf_2(pct): return x_2[np.where(y_2 > pct)[0][0] - 1] # Both appends are to force both distribution to have values in all space between 0 and 1 compl_x_1_pre = np.sort(data_1['compl_share']) compl_y_1_pre = np.arange(0, len(compl_x_1_pre)) / (len(compl_x_1_pre)) compl_x_2_pre = np.sort(data_2['compl_share']) compl_y_2_pre = np.arange(0, len(compl_x_2_pre)) / (len(compl_x_2_pre)) compl_x_1 = np.append(np.append(0, compl_x_1_pre), 1) compl_y_1 = np.append(np.append(0, compl_y_1_pre), 1) compl_x_2 = np.append(np.append(0, compl_x_2_pre), 1) compl_y_2 = np.append(np.append(0, compl_y_2_pre), 1) def compl_inverse_cdf_1(pct): return compl_x_1[np.where(compl_y_1 > pct)[0][0] - 1] def compl_inverse_cdf_2(pct): return compl_x_2[np.where(compl_y_2 > pct)[0][0] - 1] # Adding the pseudo columns for FIRST spatial context data_1['cumulative_percentage'] = (data_1['share'].rank() - 1) / len( data_1 ) # It has to be a minus 1 in the rank, in order to avoid 100% percentile in the max data_1['pseudo_share_pre'] = data_1['cumulative_percentage'].apply( inverse_cdf_2) data_1['pseudo_share'] = data_1['pseudo_share_pre'] / data_1[ 'pseudo_share_pre'].sum( ) # Rescale due to possibility of the summation of the values being grater of lower than 1 data_1['pseudo_group_pop_var'] = round( data_1['pseudo_share'] * data_1['group_pop_var'].sum()).astype(int) data_1['compl_cumulative_percentage'] = (data_1['compl_share'].rank( ) - 1) / len( data_1 ) # It has to be a minus 1 in the rank, in order to avoid 100% percentile in the max data_1['compl_pseudo_share_pre'] = data_1[ 'compl_cumulative_percentage'].apply(compl_inverse_cdf_2) data_1['compl_pseudo_share'] = data_1[ 'compl_pseudo_share_pre'] / data_1['compl_pseudo_share_pre'].sum( ) # Rescale due to possibility of the summation of the values being grater of lower than 1 data_1['pseudo_compl_pop_var'] = round( data_1['compl_pseudo_share'] * data_1['compl_pop_var'].sum()).astype(int) data_1['pseudo_total_pop'] = data_1['pseudo_group_pop_var'] + data_1[ 'pseudo_compl_pop_var'] # Adding the pseudo columns for SECOND spatial context data_2['cumulative_percentage'] = (data_2['share'].rank() - 1) / len( data_2 ) # It has to be a minus 1 in the rank, in order to avoid 100% percentile in the max data_2['pseudo_share_pre'] = data_2['cumulative_percentage'].apply( inverse_cdf_1) data_2['pseudo_share'] = data_2['pseudo_share_pre'] / data_2[ 'pseudo_share_pre'].sum( ) # Rescale due to possibility of the summation of the values being grater of lower than 1 data_2['pseudo_group_pop_var'] = round( data_2['pseudo_share'] * data_2['group_pop_var'].sum()).astype(int) data_2['compl_cumulative_percentage'] = (data_2['compl_share'].rank( ) - 1) / len( data_2 ) # It has to be a minus 1 in the rank, in order to avoid 100% percentile in the max data_2['compl_pseudo_share_pre'] = data_2[ 'compl_cumulative_percentage'].apply(compl_inverse_cdf_1) data_2['compl_pseudo_share'] = data_2[ 'compl_pseudo_share_pre'] / data_2['compl_pseudo_share_pre'].sum( ) # Rescale due to possibility of the summation of the values being grater of lower than 1 data_2['pseudo_compl_pop_var'] = round( data_2['compl_pseudo_share'] * data_2['compl_pop_var'].sum()).astype(int) data_2['pseudo_total_pop'] = data_2['pseudo_group_pop_var'] + data_2[ 'pseudo_compl_pop_var'] for i in np.array(range(iterations_under_null)): # For this 'counterfactual_share' approach, also the group and total population can be swapped during the iterations data_1['fair_coin'] = np.random.uniform(size=len(data_1)) data_1['test_group_pop_var'] = np.where( data_1['fair_coin'] > 0.5, data_1['group_pop_var'], data_1['pseudo_group_pop_var']) data_1['test_total_pop_var'] = np.where(data_1['fair_coin'] > 0.5, data_1['total_pop_var'], data_1['pseudo_total_pop']) # Dropping to avoid confusion in the internal function data_1_test = data_1.drop(['group_pop_var', 'total_pop_var'], axis=1) simulations_1 = seg_class_1._function(data_1_test, 'test_group_pop_var', 'test_total_pop_var', **kwargs)[0] # Dropping to avoid confusion in the next iteration data_1 = data_1.drop( ['fair_coin', 'test_group_pop_var', 'test_total_pop_var'], axis=1) # For this 'counterfactual_share' approach, also the group and total population can be swapped during the iterations data_2['fair_coin'] = np.random.uniform(size=len(data_2)) data_2['test_group_pop_var'] = np.where( data_2['fair_coin'] > 0.5, data_2['group_pop_var'], data_2['pseudo_group_pop_var']) data_2['test_total_pop_var'] = np.where(data_2['fair_coin'] > 0.5, data_2['total_pop_var'], data_2['pseudo_total_pop']) # Dropping to avoid confusion in the internal function data_2_test = data_2.drop(['group_pop_var', 'total_pop_var'], axis=1) simulations_2 = seg_class_2._function(data_2_test, 'test_group_pop_var', 'test_total_pop_var', **kwargs)[0] # Dropping to avoid confusion in the next iteration data_2 = data_2.drop( ['fair_coin', 'test_group_pop_var', 'test_total_pop_var'], axis=1) est_sim[i] = simulations_1 - simulations_2 print('Processed {} iterations out of {}.'.format( i + 1, iterations_under_null), end="\r") # Check and, if the case, remove iterations_under_null that resulted in nan or infinite values if any((np.isinf(est_sim) | np.isnan(est_sim))): warnings.warn( 'Some estimates resulted in NaN or infinite values for estimations under null hypothesis. These values will be removed for the final results.' ) est_sim = est_sim[~(np.isinf(est_sim) | np.isnan(est_sim))] # Two-Tailed p-value # Obs.: the null distribution can be located far from zero. Therefore, this is the the appropriate way to calculate the two tailed p-value. aux1 = (point_estimation < est_sim).sum() aux2 = (point_estimation > est_sim).sum() p_value = 2 * np.array([aux1, aux2]).min() / len(est_sim) return p_value, est_sim, point_estimation, _class_name
# 获得训练集的异常点 outliers = np.where(pTest < epsilon, True, False).ravel() plt.plot(X[outliers, 0], X[outliers, 1], 'ro', lw=2, markersize=10, fillstyle='none', markeredgewidth=1) n = np.linspace(0, 35, 100) X1 = np.meshgrid(n, n) XFit = np.mat(np.column_stack((X1[0].T.flatten(), X1[1].T.flatten()))) pFit = np.mat([p(x.T) for x in XFit]).reshape(-1, 1) pFit = pFit.reshape(X1[0].shape) if not np.isinf(np.sum(pFit)): plt.contour(X1[0], X1[1], pFit, 10.0**np.arange(-20, 0, 3).T) plt.show() # 大维度测试...... data = loadmat('ex8data2.mat') X = np.mat(data['X']) XVal = np.mat(data['Xval']) yVal = np.mat(data['yval']) # p = anomaly.train(X) p = train(X, model=multivariateGaussianModel) pTest = np.mat([p(x.T) for x in X]).reshape(-1, 1) epsilon, f1 = selectEpsilon(XVal, yVal, p)
x: batchInputs, SeqLens: batchSeqLengths, indices: batchTargetIxs, values: batchTargetVals, shape: batchTargetShape } del batchInputs, batchTargetIxs, batchTargetVals, batchTargetShape, batchSeqLengths _, summary, Losses, Loss, Error = session.run( [train_step, LocalTrainSummary, losses, loss, error_rate], feed_dict=feed) del feed SummaryWriter.add_summary(summary, epoch * totalIter + batch) numberOfInfElements = np.count_nonzero(np.isinf(Losses)) if numberOfInfElements > 0: LogFile.write("WARNING: INF VALUE(S) FOUND!\n") LogFile.write("%s\n" % (batchTargetList[np.where( np.isinf(Losses) == True)[0][0]])) LogFile.write("Losses\n") Losses = filter(lambda v: ~np.isinf(v), Losses) Loss = np.mean(Losses) TrainingLoss.append(Loss) TrainingError.append(Error) LogFile.write("Epoch %d, Batch: %d, Loss: %.6f, Error: %.6f, " % (epoch, batch, Loss, Error)) if currTrainLoss < Loss: LogFile.write("Bad\n")
def safe_log(x, nan_substitute=-1e+4): l = np.log(x) l[np.logical_or(np.isnan(l), np.isinf(l))] = nan_substitute return l
def plot(self, filename="triplot.png", doshow=True, figsize=(8, 6), save=True, minimum=None, points=None, colorbararrow=None): ''' Create the triangle plots as in the optimal frequencies paper. ''' fig = figure(figsize=figsize) ax = fig.add_subplot(111) if self.frac_bw == False: data = np.transpose(np.log10(self.sigmas)) if self.log == False: im = uimshow( data, extent=[self.Cs[0], self.Cs[-1], self.Bs[0], self.Bs[-1]], cmap=cm.inferno_r, ax=ax) ax.set_xlabel(r"$\mathrm{Center~Frequency~\nu_0~(GHz)}$") ax.set_ylabel(r"$\mathrm{Bandwidth}~B~\mathrm{(GHz)}$") else: im = uimshow(data, extent=np.log10( np.array([ self.Cs[0], self.Cs[-1], self.Bs[0], self.Bs[-1] ])), cmap=cm.inferno_r, ax=ax) cax = ax.contour(data, extent=np.log10( np.array([ self.Cs[0], self.Cs[-1], self.Bs[0], self.Bs[-1] ])), colors=self.colors, levels=self.levels, linewidths=self.lws, origin='lower') #https://stackoverflow.com/questions/18390068/hatch-a-nan-region-in-a-contourplot-in-matplotlib # get data you will need to create a "background patch" to your plot xmin, xmax = ax.get_xlim() ymin, ymax = ax.get_ylim() xy = (xmin, ymin) width = xmax - xmin height = ymax - ymin # create the patch and place it in the back of countourf (zorder!) p = patches.Rectangle(xy, width, height, hatch='X', color='0.5', fill=None, zorder=-10) ax.add_patch(p) ax.set_xlabel(r"$\mathrm{Center~Frequency~\nu_0~(GHz)}$") ax.set_ylabel(r"$\mathrm{Bandwidth}~B~\mathrm{(GHz)}$") ax.xaxis.set_major_locator(MultipleLocator(0.5)) ax.yaxis.set_major_locator(MultipleLocator(0.5)) ax.xaxis.set_major_formatter(noformatter) ax.yaxis.set_major_formatter(noformatter) ax.text(0.05, 0.9, "PSR~%s" % self.psrnoise.name.replace("-", "$-$"), fontsize=18, transform=ax.transAxes, bbox=dict(boxstyle="square", fc="white")) if minimum is not None: checkdata = np.log10(self.sigmas) flatdata = checkdata.flatten() #inds = np.where(np.logical_not(np.isnan(flatdata)))[0] inds = np.where((~np.isnan(flatdata)) & ~(np.isinf(flatdata)))[0] MIN = np.min(flatdata[inds]) INDC, INDB = np.where(checkdata == MIN) INDC, INDB = INDC[0], INDB[0] MINB = self.Bs[INDB] MINC = self.Cs[INDC] cax = ax.contour(data, extent=np.log10( np.array([ self.Cs[0], self.Cs[-1], self.Bs[0], self.Bs[-1] ])), colors=['b', 'b'], levels=[ np.log10(1.1 * (10**MIN)), np.log10(1.5 * (10**MIN)) ], linewidths=[1, 1], linestyles=['--', '--'], origin='lower') print("Minimum", MINC, MINB, MIN) with open("minima.txt", 'a') as FILE: FILE.write("%s minima %f %f %f\n" % (self.psrnoise.name, MINC, MINB, MIN)) if self.log: ax.plot(np.log10(MINC), np.log10(MINB), minimum, zorder=50, ms=10) else: ax.plot(MINC, MINB, minimum, zorder=50, ms=10) if points is not None: if type(points) == tuple: points = [points] for point in points: x, y, fmt = point nulow = x - y / 2.0 nuhigh = x + y / 2.0 if self.log: ax.plot(np.log10(x), np.log10(y), fmt, zorder=50, ms=8) nus = np.logspace(np.log10(nulow), np.log10(nuhigh), self.nchan + 1)[:-1] sigma = np.log10(self.calc_single(nus)) else: ax.plot(x, y, fmt, zorder=50, ms=8) nus = np.linspace(nulow, nuhigh, self.nchan + 1)[:-1] #more uniform sampling? sigma = np.log10(self.calc_single(nus)) with open("minima.txt", 'a') as FILE: FILE.write("%s point %f %f %f\n" % (self.psrnoise.name, x, y, sigma)) if colorbararrow is not None: data = np.log10(self.sigmas) flatdata = data.flatten() #inds = np.where(np.logical_not(np.isnan(flatdata)))[0] inds = np.where((~np.isnan(flatdata)) & ~(np.isinf(flatdata)))[0] MIN = np.min(flatdata[inds]) MAX = np.max(flatdata[inds]) if self.log == True: x = np.log10(self.Cs[-1] * 1.05) #self.Bs[-1]) dx = np.log10(1.2) #np.log10(self.Cs[-1])#self.Bs[-1]*2) frac = (np.log10(colorbararrow) - MIN) / (MAX - MIN) y = frac * (np.log10(self.Bs[-1]) - np.log10(self.Bs[0])) + np.log10(self.Bs[0]) arrow(x, y, dx, 0.0, fc='k', ec='k', zorder=50, clip_on=False) else: if self.log == False: pass else: goodinds = [] for indf, F in enumerate(self.Fs): if np.any(np.isnan(self.sigmas[:, indf])): continue goodinds.append(indf) goodinds = np.array(goodinds) data = np.transpose(np.log10(self.sigmas[:, goodinds])) im = uimshow(data, extent=np.log10( np.array([ self.Cs[0], self.Cs[-1], self.Fs[goodinds][0], self.Fs[goodinds][-1] ])), cmap=cm.inferno_r, ax=ax) cax = ax.contour(data, extent=np.log10( np.array([ self.Cs[0], self.Cs[-1], self.Fs[goodinds][0], self.Fs[goodinds][-1] ])), colors=COLORS, levels=LEVELS, linewidths=LWS, origin='lower') #im = uimshow(data,extent=np.array([np.log10(self.Cs[0]),np.log10(self.Cs[-1]),self.Fs[goodinds][0],self.Fs[goodinds][-1]]),cmap=cm.inferno_r,ax=ax) #cax = ax.contour(data,extent=np.array([np.log10(self.Cs[0]),np.log10(self.Cs[-1]),self.Fs[goodinds][0],self.Fs[goodinds][-1]]),colors=COLORS,levels=LEVELS,linewidths=LWS,origin='lower') print(self.Fs) ax.set_xlabel(r"$\mathrm{Center~Frequency~\nu_0~(GHz)}$") #ax.set_ylabel(r"$r~\mathrm{(\nu_{max}/\nu_{min})}$") ax.set_ylabel(r"$\mathrm{Fractional~Bandwidth~(B/\nu_0)}$") # no log #ax.yaxis.set_major_locator(FixedLocator(np.log10(np.arange(0.25,1.75,0.25)))) ax.xaxis.set_major_formatter(noformatter) #ax.yaxis.set_major_formatter(noformatter) cbar = fig.colorbar(im) #,format=formatter) cbar.set_label("$\mathrm{TOA~Uncertainty~\sigma_{TOA}~(\mu s)}$") # https://stackoverflow.com/questions/6485000/python-matplotlib-colorbar-setting-tick-formator-locator-changes-tick-labels cbar.locator = MultipleLocator(1) cbar.formatter = formatter ''' MAX = np.max(data[np.where(np.logical_not(np.isnan(data)))]) if MAX <= np.log10(700): cbar.formatter = formatter100 else: cbar.formatter = formatter ''' cbar.update_ticks() #if self.log: # cb = colorbar(cax) if save: savefig(filename) if doshow: show() else: close()
def getinf(x): return num.nonzero(num.isinf(num.atleast_1d(x)))
def run_crossmatch_lc(field, CCD, FILTER, kind='final', startTime=datetime.now()): warnings.filterwarnings("ignore") ########################################################################## if not os.path.exists("%s/lightcurves/" % (jorgepath)): print "Creating lightcurve folder" os.makedirs("%s/lightcurves/" % (jorgepath)) if not os.path.exists("%s/lightcurves/%s" % (jorgepath, field)): print "Creating field folder" os.makedirs("%s/lightcurves/%s" % (jorgepath, field)) if not os.path.exists("%s/lightcurves/%s/%s" % (jorgepath, field, CCD)): print "Creating CCD folder" os.makedirs("%s/lightcurves/%s/%s" % (jorgepath, field, CCD)) ########################################################################## epochs_file = '%s/info/%s/%s_epochs_%s.txt' % (jorgepath, field, field, FILTER) if not os.path.exists(epochs_file): print 'No epochs file: %s' % (epochs_file) sys.exit() epochs = np.loadtxt(epochs_file, comments='#', dtype=str) if epochs.shape == (2, ): epochs = epochs.reshape(1, 2) INFO = [] epoch_c = [] tree = [] X_Y = [] print 'Loading catalogues (%s) files, creating tree structure' % (kind) no_epoch = 0 for epoch in epochs: print 'Epoch %s' % epoch[0] # catalogues cata_file = "%s/catalogues/%s/%s/%s_%s_%s_image_crblaster_thresh%s_minarea%s_backsize64_final-scamp.dat" % \ (jorgepath, field, CCD, field, CCD, epoch[0], str(thresh), str(minarea)) if not os.path.exists(cata_file): print 'No catalog file: %s' % (cata_file) no_epoch += 1 continue # cata = np.loadtxt(cata_file, comments='#') cata = Table.read(cata_file, format='ascii') # epoch_c has all the catalogues, each element of epoch_c contain the # catalogue of a given epoch epoch_c.append(cata) cata_XY = np.transpose( np.array((cata['X_IMAGE_REF'], cata['Y_IMAGE_REF']))) # X_Y has the pix coordinates of each catalogue X_Y.append(cata_XY) # X_Y has the pix coordinates of each catalogue in tree structure tree.append(cKDTree(cata_XY)) # INFO of epochs INFO.append(epoch) if len(epoch_c) == 0: print 'No catalogues for this CCD' sys.exit() INFO = np.asarray(INFO) print '____________________________________________________________________' # compare each all catalogues to find same # master has the final index matrix, rows are objects and columns epochs # if master_cat[i][j] = -1 then no match for this object i in epoch j master_cat = np.ones((1, len(epoch_c)), dtype=np.int) * (-1) # comparar todas las epocas entre ellas buscando matching for TIME in range(len(epoch_c)): print 'Length of catalog %s = %i' % (INFO[TIME, 0], len(X_Y[TIME])) aux_cat = np.ones((len(X_Y[TIME]), len(epoch_c)), dtype=np.int) * (-1) aux_cat[:, TIME] = np.arange(len(X_Y[TIME])) if TIME < len(epoch_c): for time in range(TIME + 1, len(epoch_c)): print 'comparing epoch %s with epoch %s' % (INFO[TIME, 0], INFO[time, 0]) # find for nn aux_dist = tree[time].query(X_Y[TIME], k=1, distance_upper_bound=5) aux_cat[:, time] = aux_dist[1] # busca y reemplaza por -1 los que no encontro mask_no = np.where(aux_cat[:, time] == len(X_Y[time])) aux_cat[mask_no, time] = -1 print 'max: ', np.max(aux_dist[0][~np.isinf(aux_dist[0])]) # mask_yes es un arreglo con los indices de los encontrados # mask_yes tiene los indices de los objetos encontrados en # epoch[time] y tiene largo de len(epoch[TIME]) mask_yes = aux_cat[np.where(aux_cat[:, time] > 0), time] print 'objects with match = %i' % len(mask_yes[0]) # quitar de aux_cat los ya encontrados en las iteraciones anteriores. if TIME > 0: to_remove = [] for q in range(len(aux_cat[:, TIME])): repited = np.where(aux_cat[q, TIME] == master_cat[:, TIME])[0] if len(repited) > 0: to_remove.append(q) aux_cat = np.delete(aux_cat, to_remove, 0) # concatenate the aux_catalog to the master catalog master_cat = np.vstack((master_cat, aux_cat)) print 'objects added = %i' % len(aux_cat) aux_cat = 0 print '_______________________________________________________________' master_cat = np.delete(master_cat, 0, 0) print 'Total of objects = %i' % len(master_cat) ########################################################################## if kind == 'final': np.savetxt("%s/lightcurves/%s/%s/%s_%s_%s_master_index.txt" % (jorgepath, field, CCD, field, CCD, FILTER), master_cat, fmt='%04i', delimiter='\t') elif kind == 'temp': np.savetxt("%s/catalogues/%s/%s/temp_%s_%s_%s_master_index.txt" % (jorgepath, field, CCD, field, CCD, FILTER), master_cat, fmt='%04i', delimiter='\t') # Create lig print 'Total of epochs %i' % len(epochs) print 'Effective epochs %i' % (len(epochs) - no_epoch) print 'It took', (datetime.now() - startTime), 'seconds' print '___________________________________________________________________'
def _parse_yahoo_historical(fh, adjusted=True, asobject=False, ochl=True): """Parse the historical data in file handle fh from yahoo finance. Parameters ---------- adjusted : bool If True (default) replace open, high, low, close prices with their adjusted values. The adjustment is by a scale factor, S = adjusted_close/close. Adjusted prices are actual prices multiplied by S. Volume is not adjusted as it is already backward split adjusted by Yahoo. If you want to compute dollars traded, multiply volume by the adjusted close, regardless of whether you choose adjusted = True|False. asobject : bool or None If False (default for compatibility with earlier versions) return a list of tuples containing d, open, high, low, close, volume or d, open, close, high, low, volume depending on `ochl` If None (preferred alternative to False), return a 2-D ndarray corresponding to the list of tuples. Otherwise return a numpy recarray with date, year, month, day, d, open, high, low, close, volume, adjusted_close where d is a floating poing representation of date, as returned by date2num, and date is a python standard library datetime.date instance. The name of this kwarg is a historical artifact. Formerly, True returned a cbook Bunch holding 1-D ndarrays. The behavior of a numpy recarray is very similar to the Bunch. ochl : bool Selects between ochl and ohlc ordering. Defaults to True to preserve original functionality. """ if ochl: stock_dt = stock_dt_ochl else: stock_dt = stock_dt_ohlc results = [] # datefmt = '%Y-%m-%d' fh.readline() # discard heading for line in fh: vals = line.split(',') if len(vals) != 7: continue # add warning? datestr = vals[0] #dt = datetime.date(*time.strptime(datestr, datefmt)[:3]) # Using strptime doubles the runtime. With the present # format, we don't need it. dt = datetime.date(*[int(val) for val in datestr.split('-')]) dnum = date2num(dt) open, high, low, close = [float(val) for val in vals[1:5]] volume = float(vals[5]) aclose = float(vals[6]) if ochl: results.append((dt, dt.year, dt.month, dt.day, dnum, open, close, high, low, volume, aclose)) else: results.append((dt, dt.year, dt.month, dt.day, dnum, open, high, low, close, volume, aclose)) results.reverse() d = np.array(results, dtype=stock_dt) if adjusted: scale = d['aclose'] / d['close'] scale[np.isinf(scale)] = np.nan d['open'] *= scale d['high'] *= scale d['low'] *= scale d['close'] *= scale if not asobject: # 2-D sequence; formerly list of tuples, now ndarray ret = np.zeros((len(d), 6), dtype=np.float) ret[:, 0] = d['d'] if ochl: ret[:, 1] = d['open'] ret[:, 2] = d['close'] ret[:, 3] = d['high'] ret[:, 4] = d['low'] else: ret[:, 1] = d['open'] ret[:, 2] = d['high'] ret[:, 3] = d['low'] ret[:, 4] = d['close'] ret[:, 5] = d['volume'] if asobject is None: return ret return [tuple(row) for row in ret] return d.view(np.recarray) # Close enough to former Bunch return
def polyinterp(points, x_min_bound=None, x_max_bound=None, plot=False): """ Gives the minimizer and minimum of the interpolating polynomial over given points based on function and derivative information. Defaults to bisection if no critical points are valid. Based on polyinterp.m Matlab function in minFunc by Mark Schmidt with some slight modifications. Implemented by: Hao-Jun Michael Shi and Dheevatsa Mudigere Last edited 12/6/18. Inputs: points (nparray): two-dimensional array with each point of form [x f g] x_min_bound (float): minimum value that brackets minimum (default: minimum of points) x_max_bound (float): maximum value that brackets minimum (default: maximum of points) plot (bool): plot interpolating polynomial Outputs: x_sol (float): minimizer of interpolating polynomial F_min (float): minimum of interpolating polynomial Note: . Set f or g to np.nan if they are unknown """ no_points = points.shape[0] order = np.sum(1 - np.isnan(points[:,1:3]).astype('int')) - 1 x_min = np.min(points[:, 0]) x_max = np.max(points[:, 0]) # compute bounds of interpolation area if(x_min_bound is None): x_min_bound = x_min if(x_max_bound is None): x_max_bound = x_max # explicit formula for quadratic interpolation if no_points == 2 and order == 2 and plot is False: # Solution to quadratic interpolation is given by: # a = -(f1 - f2 - g1(x1 - x2))/(x1 - x2)^2 # x_min = x1 - g1/(2a) # if x1 = 0, then is given by: # x_min = - (g1*x2^2)/(2(f2 - f1 - g1*x2)) if(points[0, 0] == 0): x_sol = -points[0, 2]*points[1, 0]**2/(2*(points[1, 1] - points[0, 1] - points[0, 2]*points[1, 0])) else: a = -(points[0, 1] - points[1, 1] - points[0, 2]*(points[0, 0] - points[1, 0]))/(points[0, 0] - points[1, 0])**2 x_sol = points[0, 0] - points[0, 2]/(2*a) x_sol = np.minimum(np.maximum(x_min_bound, x_sol), x_max_bound) # explicit formula for cubic interpolation elif no_points == 2 and order == 3 and plot is False: # Solution to cubic interpolation is given by: # d1 = g1 + g2 - 3((f1 - f2)/(x1 - x2)) # d2 = sqrt(d1^2 - g1*g2) # x_min = x2 - (x2 - x1)*((g2 + d2 - d1)/(g2 - g1 + 2*d2)) d1 = points[0, 2] + points[1, 2] - 3*((points[0, 1] - points[1, 1])/(points[0, 0] - points[1, 0])) d2 = np.sqrt(d1**2 - points[0, 2]*points[1, 2]) if np.isreal(d2): x_sol = points[1, 0] - (points[1, 0] - points[0, 0])*((points[1, 2] + d2 - d1)/(points[1, 2] - points[0, 2] + 2*d2)) x_sol = np.minimum(np.maximum(x_min_bound, x_sol), x_max_bound) else: x_sol = (x_max_bound + x_min_bound)/2 # solve linear system else: # define linear constraints A = np.zeros((0, order+1)) b = np.zeros((0, 1)) # add linear constraints on function values for i in range(no_points): if not np.isnan(points[i, 1]): constraint = np.zeros((1, order+1)) for j in range(order, -1, -1): constraint[0, order - j] = points[i, 0]**j A = np.append(A, constraint, 0) b = np.append(b, points[i, 1]) # add linear constraints on gradient values for i in range(no_points): if not np.isnan(points[i, 2]): constraint = np.zeros((1, order+1)) for j in range(order): constraint[0, j] = (order-j)*points[i,0]**(order-j-1) A = np.append(A, constraint, 0) b = np.append(b, points[i, 2]) # check if system is solvable if(A.shape[0] != A.shape[1] or np.linalg.matrix_rank(A) != A.shape[0]): x_sol = (x_min_bound + x_max_bound)/2 f_min = np.Inf else: # solve linear system for interpolating polynomial coeff = np.linalg.solve(A, b) # compute critical points dcoeff = np.zeros(order) for i in range(len(coeff) - 1): dcoeff[i] = coeff[i]*(order-i) crit_pts = np.array([x_min_bound, x_max_bound]) crit_pts = np.append(crit_pts, points[:, 0]) if not np.isinf(dcoeff).any(): roots = np.roots(dcoeff) crit_pts = np.append(crit_pts, roots) # test critical points f_min = np.Inf x_sol = (x_min_bound + x_max_bound)/2 # defaults to bisection for crit_pt in crit_pts: if np.isreal(crit_pt) and crit_pt >= x_min_bound and crit_pt <= x_max_bound: F_cp = np.polyval(coeff, crit_pt) if np.isreal(F_cp) and F_cp < f_min: x_sol = np.real(crit_pt) f_min = np.real(F_cp) if(plot): plt.figure() x = np.arange(x_min_bound, x_max_bound, (x_max_bound - x_min_bound)/10000) f = np.polyval(coeff, x) plt.plot(x, f) plt.plot(x_sol, f_min, 'x') return x_sol