def mag2fluxcal( mag, magerr=0 ): """ convert magnitudes into SNANA-style FLUXCAL units (fixed zero point of 27.5 for all bands) """ from numpy import iterable, abs, array, zeros, any if not iterable( mag ) : mag = array( [ mag ] ) magerr = array( [ magerr ] ) if not iterable( magerr ) : magerr = zeros( len(mag) ) fluxcal, fluxcalerr = [],[] for m,me in zip( mag, magerr) : if me < 0 : fluxcal.append( 0 ) fluxcalerr.append( 10**(-0.4*(m-27.5)) ) else : fluxcal.append( 10**(-0.4*(m-27.5)) ) fluxcalerr.append( 0.92103 * me * fluxcal[-1] ) fluxcal = array( fluxcal ) fluxcalerr = array( fluxcalerr ) if len(mag)==1 : fluxcal = fluxcal[0] fluxcalerr = fluxcalerr[0] if any( magerr ) : return( fluxcal, fluxcalerr ) else : return( fluxcal )
def importance_sample_var(x, est, p_gen, p_tar, log_weight_lim=float('inf'), normalize=False): # TODO normalize not used? x = np.asarray(x) N = len(x) log_weight_lim = float(log_weight_lim) if not np.iterable(p_gen): p_gen = np.full(N, p_gen) else: p_gen = np.asarray(p_gen) if not np.iterable(p_tar): p_tar = np.full(N, p_tar) else: p_tar = np.asarray(p_tar) log_weights = p_tar - p_gen valid = np.logical_and(log_weights > -log_weight_lim, log_weights < log_weight_lim) if not np.any(valid): return float('inf'), 0 weights = np.exp(log_weights[valid]) x = x[valid] deltas = x - est outers = np.asarray([np.outer(d, d) for d in deltas]) norm_weights = weights / np.sum(weights) est_var = np.sum((norm_weights * norm_weights) * outers.T, axis=-1).T ess = np.sum(norm_weights ** 2) ** 2 / np.sum(norm_weights ** 4) return est_var, ess
def write_arrays(filename, args, fields=None, sep=" ", comment="#", clobber=False, linebreak="\n", format="%g"): if os.path.isfile(filename) and not clobber: raise IOErr("filefound", filename) if not numpy.iterable(args) or len(args) == 0: raise IOErr("noarrayswrite") if not numpy.iterable(args[0]): raise IOErr("noarrayswrite") size = len(args[0]) for arg in args: if not numpy.iterable(arg): raise IOErr("noarrayswrite") elif len(arg) != size: raise IOErr("arraysnoteq") args = numpy.column_stack(numpy.asarray(args)) f = file(filename, "w") if fields is not None: f.write(comment + sep.join(fields) + linebreak) lines = [] for arg in args: line = [format % elem for elem in arg] lines.append(sep.join(line)) f.write(linebreak.join(lines)) # add a newline at end f.write(linebreak) f.close()
def Min3(a,b,c): code = """ for(int i=0;i<ntot;i++) { double av = *(a+i); """ if iterable(b): code += """ double bv = *(b+i); """ else: code += """ double bv = b; """ if iterable(c): code += """ double cv = *(c+i); """ else: code += """ double cv = c; """ code += """ *(result+i) = (av<bv) ? ((cv<av) ? cv:av) : ((cv<bv) ? cv:bv); } """ ntot = a.size result = GridArray.GridArray.empty(a.shape) W.inline(code, ['a','b','c','result','ntot'], extra_compile_args=["-w"]) return result
def dict_diff(dict1, dict2): """Return the difference between two dictionaries as a dictionary of key: [val1, val2] pairs. Keys unique to either dictionary are included as key: [val1, '-'] or key: ['-', val2].""" diff_keys = [] common_keys = np.intersect1d(dict1.keys(), dict2.keys()) for key in common_keys: if np.iterable(dict1[key]) or np.iterable(dict2[key]): if not np.array_equal(dict1[key], dict2[key]): diff_keys.append(key) else: if dict1[key] != dict2[key]: diff_keys.append(key) dict1_unique = [key for key in dict1.keys() if key not in common_keys] dict2_unique = [key for key in dict2.keys() if key not in common_keys] diff = {} for key in diff_keys: diff[key] = [dict1[key], dict2[key]] for key in dict1_unique: diff[key] = [dict1[key], '-'] for key in dict2_unique: diff[key] = ['-', dict2[key]] return diff
def __init__(self, mesh, material1=None, material2=None, surface=None, color=0x33ffffff): self.mesh = mesh if np.iterable(material1): if len(material1) != len(mesh.triangles): raise ValueError('shape mismatch') self.material1 = np.array(material1, dtype=np.object) else: self.material1 = np.tile(material1, len(self.mesh.triangles)) if np.iterable(material2): if len(material2) != len(mesh.triangles): raise ValueError('shape mismatch') self.material2 = np.array(material2, dtype=np.object) else: self.material2 = np.tile(material2, len(self.mesh.triangles)) if np.iterable(surface): if len(surface) != len(mesh.triangles): raise ValueError('shape mismatch') self.surface = np.array(surface, dtype=np.object) else: self.surface = np.tile(surface, len(self.mesh.triangles)) if np.iterable(color): if len(color) != len(mesh.triangles): raise ValueError('shape mismatch') self.color = np.array(color, dtype=np.uint32) else: self.color = np.tile(color, len(self.mesh.triangles)).astype(np.uint32) self.unique_materials = \ np.unique(np.concatenate([self.material1, self.material2])) self.unique_surfaces = np.unique(self.surface)
def test_kernel_clone(): """ Test that sklearn's clone works correctly on kernels. """ for kernel in kernels: kernel_cloned = clone(kernel) assert_equal(kernel, kernel_cloned) assert_not_equal(id(kernel), id(kernel_cloned)) for attr in kernel.__dict__.keys(): attr_value = getattr(kernel, attr) attr_value_cloned = getattr(kernel_cloned, attr) if attr.startswith("hyperparameter_"): assert_equal(attr_value.name, attr_value_cloned.name) assert_equal(attr_value.value_type, attr_value_cloned.value_type) assert_array_equal(attr_value.bounds, attr_value_cloned.bounds) assert_equal(attr_value.n_elements, attr_value_cloned.n_elements) elif np.iterable(attr_value): for i in range(len(attr_value)): if np.iterable(attr_value[i]): assert_array_equal(attr_value[i], attr_value_cloned[i]) else: assert_equal(attr_value[i], attr_value_cloned[i]) else: assert_equal(attr_value, attr_value_cloned) if not isinstance(attr_value, Hashable): # modifiable attributes must not be identical assert_not_equal(id(attr_value), id(attr_value_cloned))
def set_arrays(filename, args, fields=None, ascii=True, clobber=False): if os.path.isfile(filename) and not clobber: raise IOErr("filefound", filename) if not numpy.iterable(args) or len(args) == 0: raise IOErr('noarrayswrite') if not numpy.iterable(args[0]): raise IOErr('noarrayswrite') size = len(args[0]) for arg in args: if not numpy.iterable(arg): raise IOErr('noarrayswrite') elif len(arg) != size: raise IOErr('arraysnoteq') if ascii and '[' not in filename and ']' not in filename: filename += "[opt kernel=text/simple]" tbl = pycrates.TABLECrate() if fields is None: fields = ['col%i' % (ii+1) for ii in range(len(args))] if len(args) != len(fields): raise IOErr('toomanycols', str(len(fields)), str(len(args))) for val, name in izip(args, fields): _set_column(tbl, name, val) pycrates.write_file(tbl, filename, clobber=True) close_crate_dataset(tbl.get_dataset())
def _set_values_to_brick(self, brick_guid, brick_slice, values, value_slice=None): brick_file_path = os.path.join(self.brick_path, '{0}.hdf5'.format(brick_guid)) log.trace('Brick slice to fill: %s', brick_slice) log.trace('Value slice to extract: %s', value_slice) # Create the HDF5 dataset that represents one brick bD = tuple(self.brick_domains[1]) cD = self.brick_domains[2] if value_slice is not None: vals = values[value_slice] else: vals = values if values.ndim == 0 and len(values.shape) == 0 and np.iterable(vals): # Prevent single value strings from being iterated vals = [vals] # Check for object type data_type = self.dtype fv = self.fill_value # Check for object type if data_type == '|O8': if np.iterable(vals): vals = [pack(x) for x in vals] else: vals = pack(vals) if self.inline_data_writes: if data_type == '|O8': data_type = h5py.special_dtype(vlen=str) if 0 in cD or 1 in cD: cD = True with HDFLockingFile(brick_file_path, 'a') as f: # TODO: Due to usage concerns, currently locking chunking to "auto" f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=None, fillvalue=fv) f[brick_guid][brick_slice] = vals else: work_key = brick_guid work = (brick_slice, vals) work_metrics = (brick_file_path, bD, cD, data_type, fv) log.trace('Work key: %s', work_key) log.trace('Work metrics: %s', work_metrics) log.trace('Work[0]: %s', work[0]) # If the brick file doesn't exist, 'touch' it to make sure it's immediately available if not os.path.exists(brick_file_path): if data_type == '|O8': data_type = h5py.special_dtype(vlen=str) if 0 in cD or 1 in cD: cD = True with HDFLockingFile(brick_file_path, 'a') as f: # TODO: Due to usage concerns, currently locking chunking to "auto" f.require_dataset(brick_guid, shape=bD, dtype=data_type, chunks=None, fillvalue=fv) if self.auto_flush: # Immediately submit work to the dispatcher self.brick_dispatcher.put_work(work_key, work_metrics, work) else: # Queue the work for later flushing self._queue_work(work_key, work_metrics, work)
def broadcast_indices(indices): """ if any array index is present, broadcast all arrays and integer indices on the same shape """ aindices = [] # convert all booleans, and scan the indices to get the size size = None for i, ix in enumerate(indices): if np.iterable(ix) and np.asarray(ix).dtype is np.dtype(bool): ix = np.where(ix)[0] if np.iterable(ix): if size is None: size = np.size(ix) # consistency check elif size != np.size(ix): print size, np.size(ix) raise ValueError( "array-indices could not be broadcast on the same shape (got {} and {}, try box[...] or take(..., broadcast_arrays=False) if you intend to sample values along several dimensions independently)".format( size, np.size(ix) ) ) aindices.append(ix) # Now convert all integers to the same size, if applicable if size is not None: for i, ix in enumerate(aindices): if not np.iterable(ix) and not type(ix) is slice: aindices[i] = np.zeros(size, dtype=type(ix)) + ix return aindices
def clone_with_param(self, new_length_scale): if np.iterable(self.length_scale): if not np.iterable(new_length_scale): raise Exception("new_legnth_scale is not iterable") if len(self.length_scale) != len(new_length_scale): raise Exception("new_length_scale mismatched") return RBFKernel(new_length_scale, self.length_scale_bounds)
def integrate_prop_odeint(self, D, eps, x,t1,t2): ''' integrate the lineage propagator, accounting for non branching. THIS USES THE SCIPY ODE INTEGRATOR parameters: D -- dimensionless diffusion constant eps -- initial condition for the generating function, corresponding to the sampling probability x -- fitness at the "closer to the present" end of the branch t1 -- time closer to the present t2 -- times after which to evaluate the propagator, either a float or iterable of floats ''' if not np.iterable(t2): # if only one value is provided, produce a list with this value t2=[t2] else: # otherwise, cast to list. This is necessary to concatenate with with t1 t2=list(t2) if np.iterable(x): xdim = len(x) else: xdim = 1 x=[x] # allocate array for solution: dimensions: #time points, #late fitness values, #fitness grid points sol = np.zeros((len(t2)+1, len(x), self.L)) # loop over late fitness values for ii, x_val in enumerate(x): # find index in fitness grid xi = np.argmin(x_val >self.fitness_grid) # init as delta function, normized prop0 = np.zeros(self.L) prop0[xi] = self.dxinv # propagate backwards and save in correct row sol[:,ii,:] = odeint(self.dprop_backward, prop0, [t1]+t2,args=((D,eps),), rtol = 0.001,atol = 1e-5, h0=1e-2,hmin = 1e-4, printmessg=False) return np.maximum(non_negativity_cutoff,sol)
def _get_k_variables(self, k, m, c=None, coord="k"): """ From a raw array in k, mass, returns concentration, kappa. Returns ------- c : same shape as m concentration K : 1d or 2d array Dimensionless scale parameter, shape (r,[m]). """ if c is None: c = self.cm_relation(m) r_s = self._rs_from_m(m, c) if coord == "k": if np.iterable(k) and np.iterable(r_s): K = np.outer(k, r_s) else: K = k*r_s elif coord == "kappa": K = k return c, K
def point_displ(pt1, pt2): """ Calculate the displacement vector between two n-D points. pt1 - pt2 .. todo:: Complete point_disp docstring """ #Imports import numpy as np # Make iterable if not np.iterable(pt1): pt1 = np.float64(np.array([pt1])) else: pt1 = np.float64(np.array(pt1).squeeze()) ## end if if not np.iterable(pt2): pt2 = np.float64(np.array([pt2])) else: pt2 = np.float64(np.array(pt2).squeeze()) ## end if # Calculate the displacement vector and return displ = np.matrix(np.subtract(pt2, pt1)).reshape(3,1) return displ
def _values_equal(a, b): "Test equality, taking into account array values" if a is b: return True elif type(a) is not type(b): return False a_iterable = np.iterable(a) b_iterable = np.iterable(b) if a_iterable != b_iterable: return False elif not a_iterable: return a == b elif len(a) != len(b): return False elif isinstance(a, np.ndarray): if a.shape == b.shape: return (a == b).all() else: return False elif isinstance(a, (tuple, list)): return all(_values_equal(a_, b_) for a_, b_ in zip(a, b)) elif isinstance(a, dict): if a.keys() == b.keys(): return all(_values_equal(a[k], b[k]) for k in a) else: return False elif isinstance(a, mne.io.BaseRaw): return isinstance(b, a.__class__) and _values_equal(a.info, b.info) else: return a == b
def __init__(self): # Seed RNG if specified seed = rospy.get_param('~random_seed', None) if seed is None: rospy.loginfo('No random seed specified. Using default behavior.') else: rospy.loginfo('Initializing with random seed: ' + str(seed)) np.random.seed(seed) self.save_period = rospy.get_param('~save_period', 1) self.input_dim = rospy.get_param('~input_dimension') self.input_lower = rospy.get_param('~input_lower_bound') self.input_upper = rospy.get_param('~input_upper_bound') if not np.iterable(self.input_lower): self.input_lower = [self.input_lower]*self.input_dim self.input_lower = np.asarray(self.input_lower) if not np.iterable(self.input_upper): self.input_upper = [self.input_upper]*self.input_dim self.input_upper = np.asarray(self.input_upper) checker_func = self.check_input self.prog_path = rospy.get_param('~progress_path', None) self.out_path = rospy.get_param('~output_path') crossover_rate = rospy.get_param('~crossover_rate', 0.5) crossover_func = lambda x, y: optgen.uniform_crossover(x, y, crossover_rate) mutate_cov = float(rospy.get_param('~mutate_cov', 0.1)) mutate_func = lambda x: optgen.gaussian_mutate(x, mutate_cov) selection_k = rospy.get_param('~selection_k', None) selection_func = lambda N, w: optgen.tournament_selection(N, w, selection_k) crossover_prob = rospy.get_param('~crossover_prob', 0.6) init_popsize = rospy.get_param('~init_popsize') run_popsize = rospy.get_param('~run_popsize', init_popsize) elitist = rospy.get_param('~elitist', False) verbose = rospy.get_param('~verbose', False) self.max_iters = rospy.get_param('~convergence/max_iters', 100) self.iter_counter = 0 self.optimizer = optgen.GeneticOptimizer(crossover_func=crossover_func, mutate_func=mutate_func, selection_func=selection_func, checker_func=checker_func, prob_cx=crossover_prob, popsize=run_popsize, # elitist=elitist, verbose=verbose) initial_pop = [self.sample_input() for i in range(init_popsize)] self.optimizer.initialize(initial_pop) self.rounds = [] self.prog_path = rospy.get_param('~progress_path', None) self.out_path = rospy.get_param('~output_path')
def get_season(year, month, return_month=2): """ Apply `_get_season` to a scalar or sequence. See `_get_season`. """ if not np.iterable(year) or not np.iterable(month): year = np.asarray([year]) month = np.asarray([month]) ym = np.asarray([_get_season(y, m, return_month) for y, m in zip(year, month)]) return ym[:,0], ym[:,1]
def __setitem__(self, key, value): if type(key) == tuple: selector = key[0] else: selector = key # Try using the selector to select data from the internal DataFrame: try: idx = self.sel.get_index(self.data, selector, names=self.data.index.names) # If the select fails, try to create new rows with the index specified # by the selector and load them with the specified data: except ValueError: try: idx = self.sel.make_index(selector, self.data.index.names) except: raise ValueError('cannot create index with ' 'selector %s and column names %s' \ % (selector, str(self.data.index.names))) else: found = False else: found = True # If the data specified is not a dict, convert it to a dict: if type(key) == tuple and len(key) > 1: if np.isscalar(value): data = {k:value for k in key[1:]} elif type(value) == dict: data = value elif np.iterable(value) and len(value) <= len(key[1:]): data={k:v for k, v in zip(key[1:], value)} else: raise ValueError('cannot assign specified value') else: if np.isscalar(value): data = {self.data.columns[0]: value} elif type(value) == dict: data = value elif np.iterable(value) and len(value) <= len(self.data.columns): data={k:v for k, v in zip(self.data.columns, value)} else: raise ValueError('cannot assign specified value') if found: for k, v in data.iteritems(): self.data[k].ix[idx] = v else: new_data = self.data.append(pd.DataFrame(data=data, index=idx, dtype=object)) # Validate updated DataFrame's index before updating the instance's # data attribute: self.__validate_index__(new_data.index) self.data = new_data self.data.sort(inplace=True)
def fit(self, outfile=None, clobber=False): dep, staterror, syserror = self.data.to_fit(self.stat.calc_staterror) if not iterable(dep) or len(dep) == 0: #raise FitError('no noticed bins found in data set') raise FitErr( 'nobins' ) if ((iterable(staterror) and 0.0 in staterror) and isinstance(self.stat, Chi2) and type(self.stat) != Chi2 and type(self.stat) != Chi2ModVar): #raise FitError('zeros found in uncertainties, consider using' + # ' calculated uncertainties') raise FitErr( 'binhas0' ) if (getattr(self.data, 'subtracted', False) and isinstance(self.stat, Likelihood) ): #raise FitError('%s statistics cannot be used with background' # % self.stat.name + ' subtracted data') raise FitErr( 'statnotforbackgsub', self.stat.name ) init_stat = self.calc_stat() # output = self.method.fit ... output = self._iterfit.fit(self._iterfit._get_callback(outfile, clobber), self.model.thawedpars, self.model.thawedparmins, self.model.thawedparmaxes) # LevMar always calculate chisquare, so call calc_stat # just in case statistics is something other then chisquare self.model.thawedpars = output[1] tmp = list(output) tmp[2] = self.calc_stat() output = tuple(tmp) # end of the gymnastics 'cause one cannot write to a tuple # check if any parameter values are at boundaries, # and warn user. tol = finfo(float32).eps param_warnings = "" for par in self.model.pars: if not par.frozen: if sao_fcmp(par.val, par.min, tol) == 0: param_warnings += ("WARNING: parameter value %s is at its minimum boundary %s\n" % (par.fullname, str(par.min))) if sao_fcmp(par.val, par.max, tol) == 0: param_warnings += ("WARNING: parameter value %s is at its maximum boundary %s\n" % (par.fullname, str(par.max))) if self._iterfit._file is not None: vals = ['%5e %5e' % (self._iterfit._nfev, tmp[2])] vals.extend(['%5e' % val for val in self.model.thawedpars]) print >> self._iterfit._file, ' '.join(vals) self._iterfit._file.close() self._iterfit._file=None return FitResults(self, output, init_stat, param_warnings.strip("\n"))
def stokes(self, bead, gain, output='stokes_avg.dat', template=None, path='Stokes'): """ stokes(bead, gain, output='stokes_avg.dat', template=None, path='Stokes') Loads data from stokes dat files. You can override the default template. bead & gain: similar to file_list in usage, but specificly fills out the first and second template positions for stokes drag templates Saves averaged data to filename given by output """ template = template or self.templates['stokes'] #if type(bead) != list: if not iterable(bead): bead = fullrange(bead)[1:] #if type(gain) != list: if not iterable(gain): gain = fullrange(gain)[1:] stokes_k = [] stokes_gain = [] for g in gain: files = [ os.path.join(self.path, path, template) % (b,g) for b in bead ] print "Loading files: " + str(files), "\n" #--------------------------------- # Load data from each Stokes file # and calculate stiffness. Store # values in object variables #--------------------------------- k = [] for file in files: try: with open(file, 'r') as f: header = [ f.readline() for dummy in range(9) ] vel, x, y = loadtxt(f, unpack=True) except IOError, (errno, errstr): if errno == 2: print "File %s not found! Skipping..." % file else: raise else: pfit = polyfit(vel*1000, y, 1) k.append(self.bead_size * 9.42e-6 / pfit[0]) print file, "=", str(poly1d(pfit, variable='v')).strip(), "stiffness = %.3f" % k[-1] if k == []: continue k = -average(k) stokes_k.append(k) stokes_gain += [ int(header[4].split()[2]) ] print "\nAverage k for gain %.1f: %f" % (stokes_gain[-1], k), "\n--------\n"
def jd_from_date(y, m, d): """ Given year, month, day, return the Julian Day. If d is an integer, the JD number for that day is returned as an integer; if d is floating point, it is assumed to be day + fraction, and the corresponding floating point JDN is returned. Note that the fractional part of this is zero at noon, so int(jd_from_date(2000, 10, 10.5)) == jd_from_date(2000, 10, 10) Vectorized: y, m, d can be broadcast-compatible sequences. Note: for scalars, a python scalar version would be much faster; but this optimization would likely not matter in practice. From http://www.astro.uu.nl/~strous/AA/en/reken/juliaansedag.html """ if np.iterable(y) or np.iterable(m) or np.iterable(d): scalar = False else: scalar = True y, m, d = np.atleast_1d(y, m, d) y = y.astype(int) m = m.astype(int) if d.dtype.kind in 'iu': day_offset = 1721119 else: day_offset = 1721118.5 cond = m < 3 if cond.any(): m += 12 y -= 1 mm3 = m - 3 c7 = y // 400 x6 = y % 400 c6 = x6 // 100 x5 = x6 % 100 c5 = x5 // 4 c4 = x5 % 4 c3 = mm3 // 5 x2 = mm3 % 5 c2 = x2 // 2 c1 = x2 % 2 jd = (146097 * c7 + 36524 * c6 + 1461 * c5 + 365 * c4 + 153 * c3 + 61 * c2 + 31 * c1 + d + day_offset) if scalar: jd = jd[0] return jd
def photonPeaks(x,amps=None,sigma=1,gain=5): res = np.zeros_like(x) if np.iterable(amps[0]): ns,amps = zip(*amps) else: ns = xrange(len(amps)) if np.iterable(sigma): for n,amp,sig in zip(ns,amps,sigma): res += gauss(x,amp,n*gain,sig) else: for n,amp in zip(ns,amps): res += gauss(x,amp,n*gain,sigma) return res
def check_vector2_arg(arg, n): if not np.iterable(arg): return [(arg, arg)] * n if arg is None: raise ValueError('Received none but need 1 or 2 or %d scalars' % n) if len(arg) == n: if np.iterable(arg[0]): return arg else: return [(ai,ai) for ai in arg] if len(arg) == 2: return [arg] * n return arg
def set_xlim(ax, lim=None, labels=[], show_lim=True, size=tick_label_size, axis='x',): """ Set x-lim """ if np.iterable(ax) and np.iterable(show_lim): ax_ls = ax show_lim_ls = show_lim for ax, show_lim in izip(ax_ls, show_lim_ls): set_xlim(ax, lim, labels=labels, show_lim=show_lim, size=size, axis=axis) elif np.iterable(ax): ax_ls = ax for ax in ax_ls: set_xlim(ax, lim, labels=labels, show_lim=show_lim, size=size, axis=axis) else: # get axis-dependent functions if axis == 'x': ticklabel_fn = ax.set_xticklabels lim_fn = ax.set_xlim tick_fn = ax.set_xticks get_ticks = ax.xaxis.get_major_ticks tick_alignment = ['left', 'right'] align_dim = 'set_ha' if lim == None: lim = ax.get_xlim() elif axis == 'y': ticklabel_fn = ax.set_yticklabels lim_fn = ax.set_ylim tick_fn = ax.set_yticks get_ticks = ax.yaxis.get_major_ticks tick_alignment = ['bottom', 'top'] align_dim = 'set_va' if lim == None: lim = ax.get_ylim() lim_fn(lim) tick_fn(lim) if len(labels) > 0: ticklabel_fn(labels, size=size) elif show_lim: ticklabel_fn(lim, size=size) else: ticklabel_fn([], size=size) ticks = get_ticks() try: getattr(ticks[0].label1, align_dim)(tick_alignment[0]) getattr(ticks[-1].label1, align_dim)(tick_alignment[1]) except: pass
def _get_k_variables(self, k, m, c=None, coord="k"): if c is None: c = self.cm_relation(m) r_s = self._rs_from_m(m, c) if coord == "k": if np.iterable(k) and np.iterable(r_s): K = np.outer(k, r_s) else: K = k * r_s elif coord == "kappa": K = k return c, np.atleast_1d(K)
def print_params(params): """ Print supplied parameters of type dict. """ print() for (name, value) in params.items(): if name == 'ks': if np.iterable(value): print('{0:5}: {1!s}'.format('Ks', value)) else: print('{:5}: {: .8g}'.format('Ks', value)) else: if np.iterable(value): print('{0:5}: {1!s}'.format(name, value)) else: print('{:5}: {: .8g}'.format(name, value))
def randomVector(key, seed, k): if not np.iterable(key): key = [key] # input checking assert(np.iterable(key)) assert(np.iterable(seed)) # create unique key uniqueKey = list(key) uniqueKey.extend(seed) np.random.seed(uniqueKey) # generate random output return np.random.rand(k)
def load_features(cm, _cxs=None, force_recomp=False): if _cxs is None: cxs = cm.get_valid_cxs() elif type(_cxs) is types.ListType: cxs = np.array(_cxs) elif type(_cxs) in [types.IntType, types.LongType, np.uint32]: cxs = np.array([_cxs]) else: cxs = _cxs count_feat = 0 is_dirty = np.bitwise_or(cm.cx2_dirty_bit[cxs], force_recomp) num_samp = cxs.size num_dirty = np.sum(is_dirty) # HACKS if not np.iterable(is_dirty): is_dirty = np.array([is_dirty]) if not np.iterable(cxs): cxs = np.array([cxs]) load_cx = cxs[is_dirty] num_clean = num_samp - num_dirty #logdbg('Loading Features: Dirty=%d ; #Clean=%d' % (num_dirty, num_clean)) if num_dirty == 0: return logio('Loading %d Feature Reps' % num_dirty) am = cm.hs.am for cx in iter(load_cx): cid = cm.cx2_cid[cx] if cid <= 0: logwarn('WARNING: IX='+str(cx)+' is invalid'); continue chiprep_fpath = cm.hs.iom.get_chiprep_fpath(cid) # Ensure that the features exists if force_recomp or not os.path.exists(chiprep_fpath): logio('Computing and saving features of cid='+str(cid)) hotspotter.ChipFunctions.precompute_chipreps(cm.hs, [cx], num_procs=1, force_recompute=force_recomp) # Load the features logdbg('Loading features in '+chiprep_fpath) npz = np.load(chiprep_fpath) fpts = npz['arr_0'] fdsc = npz['arr_1'] npz.close() cm.cx2_fpts[cx] = fpts cm.cx2_fdsc[cx] = fdsc cm.cx2_dirty_bit[cx] = False count_feat += len(fpts) logdbg('* Loaded '+str(count_feat)+' keypoints and fdscriptors' ) return True
def ylabel(ax, label, **kwargs): # kwargs['offset'] = kwargs.pop('offset', -0.03) if np.iterable(ax): map(lambda x: ylabel(x, label, **kwargs), ax) else: kwargs.pop('axis', 'y') return axis_label(ax, label, axis='y', **kwargs)
def cutout(yx, half_size, shape=None): """Return a slice to cut out a subarray from an array. Parameters ---------- yx : array of ints The center of the cutout. half_size : int or array of ints The half_size of the array. The cut out will have shape `2 * half_size + 1`. shape : tuple, optional If provided, then the slice will not extend beyond the lengths of the axes. Returns ------- s : slice """ if shape is None: shape = (inf, inf) if not np.iterable(half_size): half_size = (half_size, half_size) s = np.s_[max(yx[0] - half_size[0], 0): min(yx[0] + half_size[0] + 1, shape[0]), max(yx[1] - half_size[1], 0): min(yx[1] + half_size[1] + 1, shape[1])] return s
def _call( self, X: np.ndarray, Y: Optional[np.ndarray] = None, eval_gradient: bool = False, active: Optional[np.ndarray] = None, ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: """Return the kernel k(X, Y) and optionally its gradient. Parameters ---------- X : [array-like, shape=(n_samples_X, n_features)] Left argument of the returned kernel k(X, Y) Y : [array-like, shape=(n_samples_Y, n_features) or None(default)] Right argument of the returned kernel k(X, Y). If None, k(X, X) if evaluated instead. eval_gradient : [bool, False(default)] Determines whether the gradient with respect to the kernel hyperparameter is determined. Only supported when Y is None. active : np.ndarray (n_samples_X, n_features) (optional) Boolean array specifying which hyperparameters are active. Returns ------- K : [array-like, shape=(n_samples_X, n_samples_Y)] Kernel k(X, Y) K_gradient : [array-like, shape=(n_samples_X, n_samples_X, n_dims)] The gradient of the kernel k(X, X) with respect to the hyperparameter of the kernel. Only returned when eval_gradient is True. Note ---- Code partially copied from skopt (https://github.com/scikit-optimize). Made small changes to only compute necessary values and use scikit-learn helper functions. """ X = np.atleast_2d(X) length_scale = sklearn.gaussian_process.kernels._check_length_scale( X, self.length_scale) if Y is None: Y = X elif eval_gradient: raise ValueError("gradient can be evaluated only when Y != X") else: Y = np.atleast_2d(Y) indicator = np.expand_dims(X, axis=1) != Y K = (-1 / (2 * length_scale**2) * indicator).sum(axis=2) K = np.exp(K) if active is not None: K = K * active if eval_gradient: # dK / d theta = (dK / dl) * (dl / d theta) # theta = log(l) => dl / d (theta) = e^theta = l # dK / d theta = l * dK / dl # dK / dL computation if np.iterable(length_scale) and length_scale.shape[0] > 1: grad = (np.expand_dims(K, axis=-1) * np.array(indicator, dtype=np.float32)) else: grad = np.expand_dims(K * np.sum(indicator, axis=2), axis=-1) grad *= (1 / length_scale**3) return K, grad return K
def sliding_window_view(x, window_shape, axis=None, *, subok=False, writeable=False): """ Create a sliding window view into the array with the given window shape. Also known as rolling or moving window, the window slides across all dimensions of the array and extracts subsets of the array at all window positions. .. versionadded:: 1.20.0 Parameters ---------- x : array_like Array to create the sliding window view from. window_shape : int or tuple of int Size of window over each axis that takes part in the sliding window. If `axis` is not present, must have same length as the number of input array dimensions. Single integers `i` are treated as if they were the tuple `(i,)`. axis : int or tuple of int, optional Axis or axes along which the sliding window is applied. By default, the sliding window is applied to all axes and `window_shape[i]` will refer to axis `i` of `x`. If `axis` is given as a `tuple of int`, `window_shape[i]` will refer to the axis `axis[i]` of `x`. Single integers `i` are treated as if they were the tuple `(i,)`. subok : bool, optional If True, sub-classes will be passed-through, otherwise the returned array will be forced to be a base-class array (default). writeable : bool, optional When true, allow writing to the returned view. The default is false, as this should be used with caution: the returned view contains the same memory location multiple times, so writing to one location will cause others to change. Returns ------- view : ndarray Sliding window view of the array. The sliding window dimensions are inserted at the end, and the original dimensions are trimmed as required by the size of the sliding window. That is, ``view.shape = x_shape_trimmed + window_shape``, where ``x_shape_trimmed`` is ``x.shape`` with every entry reduced by one less than the corresponding window size. """ window_shape = (tuple(window_shape) if np.iterable(window_shape) else (window_shape, )) # first convert input to array, possibly keeping subclass x = np.array(x, copy=False, subok=subok) window_shape_array = np.array(window_shape) if np.any(window_shape_array < 0): raise ValueError("`window_shape` cannot contain negative values") if axis is None: axis = tuple(range(x.ndim)) if len(window_shape) != len(axis): raise ValueError( f"Since axis is `None`, must provide " f"window_shape for all dimensions of `x`; " f"got {len(window_shape)} window_shape elements " f"and `x.ndim` is {x.ndim}.") else: axis = normalize_axis_tuple(axis, x.ndim, allow_duplicate=True) if len(window_shape) != len(axis): raise ValueError( f"Must provide matching length window_shape and " f"axis; got {len(window_shape)} window_shape " f"elements and {len(axis)} axes elements.") out_strides = x.strides + tuple(x.strides[ax] for ax in axis) # note: same axis can be windowed repeatedly x_shape_trimmed = list(x.shape) for ax, dim in zip(axis, window_shape): if x_shape_trimmed[ax] < dim: raise ValueError( "window shape cannot be larger than input array shape") x_shape_trimmed[ax] -= dim - 1 out_shape = tuple(x_shape_trimmed) + window_shape return as_strided(x, strides=out_strides, shape=out_shape, subok=subok, writeable=writeable)
def regularized_function(x, y, func, bins=100, brange=None): """Compute *func()* over data aggregated in bins. ``(x,y) --> (x', func(Y'))`` with ``Y' = {y: y(x) where x in x' bin}`` First the data is collected in bins x' along x and then *func* is applied to all data points Y' that have been collected in the bin. .. function:: func(y) -> float *func* takes exactly one argument, a numpy 1D array *y* (the values in a single bin of the histogram), and reduces it to one scalar float. .. Note:: *x* and *y* must be 1D arrays. :Arguments: x abscissa values (for binning) y ordinate values (func is applied) func a numpy ufunc that takes one argument, func(Y') bins number or array brange limits (used with number of bins) :Returns: F,edges function and edges (``midpoints = 0.5*(edges[:-1]+edges[1:])``) (This function originated as :func:`recsql.sqlfunctions.regularized_function`.)""" _x = numpy.asarray(x) _y = numpy.asarray(y) if len(_x.shape) != 1 or len(_y.shape) != 1: raise TypeError("Can only deal with 1D arrays.") # setup of bins (taken from numpy.histogram) if (brange is not None): mn, mx = brange if (mn > mx): raise AttributeError( 'max must be larger than min in range parameter.') if not numpy.iterable(bins): if brange is None: brange = (_x.min(), _x.max()) mn, mx = [float(mi) for mi in brange] if mn == mx: mn -= 0.5 mx += 0.5 bins = numpy.linspace(mn, mx, bins + 1, endpoint=True) else: bins = numpy.asarray(bins) if (numpy.diff(bins) < 0).any(): raise ValueError('bins must increase monotonically.') sorting_index = numpy.argsort(_x) sx = _x[sorting_index] sy = _y[sorting_index] # boundaries in SORTED data that demarcate bins; position in bin_index is the bin number bin_index = numpy.r_[sx.searchsorted(bins[:-1], 'left'), sx.searchsorted(bins[-1], 'right')] # naive implementation: apply operator to each chunk = sy[start:stop] separately # # It's not clear to me how one could effectively block this procedure (cf # block = 65536 in numpy.histogram) because there does not seem to be a # general way to combine the chunks for different blocks, just think of # func=median F = numpy.zeros(len(bins) - 1) # final function F[:] = [ func(sy[start:stop]) for start, stop in izip(bin_index[:-1], bin_index[1:]) ] return F, bins
def add(self, patchlabel='', flows=None, orientations=None, labels='', trunklength=1.0, pathlengths=0.25, prior=None, connect=(0, 0), rotation=0, **kwargs): """ Add a simple Sankey diagram with flows at the same hierarchical level. Parameters ---------- patchlabel : str Label to be placed at the center of the diagram. Note that *label* (not *patchlabel*) can be passed as keyword argument to create an entry in the legend. flows : list of float Array of flow values. By convention, inputs are positive and outputs are negative. Flows are placed along the top of the diagram from the inside out in order of their index within *flows*. They are placed along the sides of the diagram from the top down and along the bottom from the outside in. If the sum of the inputs and outputs is nonzero, the discrepancy will appear as a cubic Bezier curve along the top and bottom edges of the trunk. orientations : list of {-1, 0, 1} List of orientations of the flows (or a single orientation to be used for all flows). Valid values are 0 (inputs from the left, outputs to the right), 1 (from and to the top) or -1 (from and to the bottom). labels : list of (str or None) List of labels for the flows (or a single label to be used for all flows). Each label may be *None* (no label), or a labeling string. If an entry is a (possibly empty) string, then the quantity for the corresponding flow will be shown below the string. However, if the *unit* of the main diagram is None, then quantities are never shown, regardless of the value of this argument. trunklength : float Length between the bases of the input and output groups (in data-space units). pathlengths : list of float List of lengths of the vertical arrows before break-in or after break-away. If a single value is given, then it will be applied to the first (inside) paths on the top and bottom, and the length of all other arrows will be justified accordingly. The *pathlengths* are not applied to the horizontal inputs and outputs. prior : int Index of the prior diagram to which this diagram should be connected. connect : (int, int) A (prior, this) tuple indexing the flow of the prior diagram and the flow of this diagram which should be connected. If this is the first diagram or *prior* is *None*, *connect* will be ignored. rotation : float Angle of rotation of the diagram in degrees. The interpretation of the *orientations* argument will be rotated accordingly (e.g., if *rotation* == 90, an *orientations* entry of 1 means to/from the left). *rotation* is ignored if this diagram is connected to an existing one (using *prior* and *connect*). Returns ------- Sankey The current `.Sankey` instance. Other Parameters ---------------- **kwargs Additional keyword arguments set `matplotlib.patches.PathPatch` properties, listed below. For example, one may want to use ``fill=False`` or ``label="A legend entry"``. %(Patch)s See Also -------- Sankey.finish """ # Check and preprocess the arguments. if flows is None: flows = np.array([1.0, -1.0]) else: flows = np.array(flows) n = flows.shape[0] # Number of flows if rotation is None: rotation = 0 else: # In the code below, angles are expressed in deg/90. rotation /= 90.0 if orientations is None: orientations = 0 try: orientations = np.broadcast_to(orientations, n) except ValueError: raise ValueError( f"The shapes of 'flows' {np.shape(flows)} and 'orientations' " f"{np.shape(orientations)} are incompatible") from None try: labels = np.broadcast_to(labels, n) except ValueError: raise ValueError( f"The shapes of 'flows' {np.shape(flows)} and 'labels' " f"{np.shape(labels)} are incompatible") from None if trunklength < 0: raise ValueError( "'trunklength' is negative, which is not allowed because it " "would cause poor layout") if np.abs(np.sum(flows)) > self.tolerance: _log.info( "The sum of the flows is nonzero (%f; patchlabel=%r); " "is the system not at steady state?", np.sum(flows), patchlabel) scaled_flows = self.scale * flows gain = sum(max(flow, 0) for flow in scaled_flows) loss = sum(min(flow, 0) for flow in scaled_flows) if prior is not None: if prior < 0: raise ValueError("The index of the prior diagram is negative") if min(connect) < 0: raise ValueError( "At least one of the connection indices is negative") if prior >= len(self.diagrams): raise ValueError( f"The index of the prior diagram is {prior}, but there " f"are only {len(self.diagrams)} other diagrams") if connect[0] >= len(self.diagrams[prior].flows): raise ValueError( "The connection index to the source diagram is {}, but " "that diagram has only {} flows".format( connect[0], len(self.diagrams[prior].flows))) if connect[1] >= n: raise ValueError( f"The connection index to this diagram is {connect[1]}, " f"but this diagram has only {n} flows") if self.diagrams[prior].angles[connect[0]] is None: raise ValueError( f"The connection cannot be made, which may occur if the " f"magnitude of flow {connect[0]} of diagram {prior} is " f"less than the specified tolerance") flow_error = (self.diagrams[prior].flows[connect[0]] + flows[connect[1]]) if abs(flow_error) >= self.tolerance: raise ValueError( f"The scaled sum of the connected flows is {flow_error}, " f"which is not within the tolerance ({self.tolerance})") # Determine if the flows are inputs. are_inputs = [None] * n for i, flow in enumerate(flows): if flow >= self.tolerance: are_inputs[i] = True elif flow <= -self.tolerance: are_inputs[i] = False else: _log.info( "The magnitude of flow %d (%f) is below the tolerance " "(%f).\nIt will not be shown, and it cannot be used in a " "connection.", i, flow, self.tolerance) # Determine the angles of the arrows (before rotation). angles = [None] * n for i, (orient, is_input) in enumerate(zip(orientations, are_inputs)): if orient == 1: if is_input: angles[i] = DOWN elif not is_input: # Be specific since is_input can be None. angles[i] = UP elif orient == 0: if is_input is not None: angles[i] = RIGHT else: if orient != -1: raise ValueError( f"The value of orientations[{i}] is {orient}, " f"but it must be -1, 0, or 1") if is_input: angles[i] = UP elif not is_input: angles[i] = DOWN # Justify the lengths of the paths. if np.iterable(pathlengths): if len(pathlengths) != n: raise ValueError( f"The lengths of 'flows' ({n}) and 'pathlengths' " f"({len(pathlengths)}) are incompatible") else: # Make pathlengths into a list. urlength = pathlengths ullength = pathlengths lrlength = pathlengths lllength = pathlengths d = dict(RIGHT=pathlengths) pathlengths = [d.get(angle, 0) for angle in angles] # Determine the lengths of the top-side arrows # from the middle outwards. for i, (angle, is_input, flow) in enumerate(zip(angles, are_inputs, scaled_flows)): if angle == DOWN and is_input: pathlengths[i] = ullength ullength += flow elif angle == UP and not is_input: pathlengths[i] = urlength urlength -= flow # Flow is negative for outputs. # Determine the lengths of the bottom-side arrows # from the middle outwards. for i, (angle, is_input, flow) in enumerate( reversed(list(zip(angles, are_inputs, scaled_flows)))): if angle == UP and is_input: pathlengths[n - i - 1] = lllength lllength += flow elif angle == DOWN and not is_input: pathlengths[n - i - 1] = lrlength lrlength -= flow # Determine the lengths of the left-side arrows # from the bottom upwards. has_left_input = False for i, (angle, is_input, spec) in enumerate( reversed( list( zip(angles, are_inputs, zip(scaled_flows, pathlengths))))): if angle == RIGHT: if is_input: if has_left_input: pathlengths[n - i - 1] = 0 else: has_left_input = True # Determine the lengths of the right-side arrows # from the top downwards. has_right_output = False for i, (angle, is_input, spec) in enumerate( zip(angles, are_inputs, list(zip(scaled_flows, pathlengths)))): if angle == RIGHT: if not is_input: if has_right_output: pathlengths[i] = 0 else: has_right_output = True # Begin the subpaths, and smooth the transition if the sum of the flows # is nonzero. urpath = [ ( Path.MOVETO, [ (self.gap - trunklength / 2.0), # Upper right gain / 2.0 ]), (Path.LINETO, [(self.gap - trunklength / 2.0) / 2.0, gain / 2.0]), (Path.CURVE4, [(self.gap - trunklength / 2.0) / 8.0, gain / 2.0]), (Path.CURVE4, [(trunklength / 2.0 - self.gap) / 8.0, -loss / 2.0]), (Path.LINETO, [(trunklength / 2.0 - self.gap) / 2.0, -loss / 2.0]), (Path.LINETO, [(trunklength / 2.0 - self.gap), -loss / 2.0]) ] llpath = [ ( Path.LINETO, [ (trunklength / 2.0 - self.gap), # Lower left loss / 2.0 ]), (Path.LINETO, [(trunklength / 2.0 - self.gap) / 2.0, loss / 2.0]), (Path.CURVE4, [(trunklength / 2.0 - self.gap) / 8.0, loss / 2.0]), (Path.CURVE4, [(self.gap - trunklength / 2.0) / 8.0, -gain / 2.0]), (Path.LINETO, [(self.gap - trunklength / 2.0) / 2.0, -gain / 2.0]), (Path.LINETO, [(self.gap - trunklength / 2.0), -gain / 2.0]) ] lrpath = [( Path.LINETO, [ (trunklength / 2.0 - self.gap), # Lower right loss / 2.0 ])] ulpath = [( Path.LINETO, [ self.gap - trunklength / 2.0, # Upper left gain / 2.0 ])] # Add the subpaths and assign the locations of the tips and labels. tips = np.zeros((n, 2)) label_locations = np.zeros((n, 2)) # Add the top-side inputs and outputs from the middle outwards. for i, (angle, is_input, spec) in enumerate( zip(angles, are_inputs, list(zip(scaled_flows, pathlengths)))): if angle == DOWN and is_input: tips[i, :], label_locations[i, :] = self._add_input( ulpath, angle, *spec) elif angle == UP and not is_input: tips[i, :], label_locations[i, :] = self._add_output( urpath, angle, *spec) # Add the bottom-side inputs and outputs from the middle outwards. for i, (angle, is_input, spec) in enumerate( reversed( list( zip(angles, are_inputs, list(zip(scaled_flows, pathlengths)))))): if angle == UP and is_input: tip, label_location = self._add_input(llpath, angle, *spec) tips[n - i - 1, :] = tip label_locations[n - i - 1, :] = label_location elif angle == DOWN and not is_input: tip, label_location = self._add_output(lrpath, angle, *spec) tips[n - i - 1, :] = tip label_locations[n - i - 1, :] = label_location # Add the left-side inputs from the bottom upwards. has_left_input = False for i, (angle, is_input, spec) in enumerate( reversed( list( zip(angles, are_inputs, list(zip(scaled_flows, pathlengths)))))): if angle == RIGHT and is_input: if not has_left_input: # Make sure the lower path extends # at least as far as the upper one. if llpath[-1][1][0] > ulpath[-1][1][0]: llpath.append( (Path.LINETO, [ulpath[-1][1][0], llpath[-1][1][1]])) has_left_input = True tip, label_location = self._add_input(llpath, angle, *spec) tips[n - i - 1, :] = tip label_locations[n - i - 1, :] = label_location # Add the right-side outputs from the top downwards. has_right_output = False for i, (angle, is_input, spec) in enumerate( zip(angles, are_inputs, list(zip(scaled_flows, pathlengths)))): if angle == RIGHT and not is_input: if not has_right_output: # Make sure the upper path extends # at least as far as the lower one. if urpath[-1][1][0] < lrpath[-1][1][0]: urpath.append( (Path.LINETO, [lrpath[-1][1][0], urpath[-1][1][1]])) has_right_output = True tips[i, :], label_locations[i, :] = self._add_output( urpath, angle, *spec) # Trim any hanging vertices. if not has_left_input: ulpath.pop() llpath.pop() if not has_right_output: lrpath.pop() urpath.pop() # Concatenate the subpaths in the correct order (clockwise from top). path = (urpath + self._revert(lrpath) + llpath + self._revert(ulpath) + [(Path.CLOSEPOLY, urpath[0][1])]) # Create a patch with the Sankey outline. codes, vertices = zip(*path) vertices = np.array(vertices) def _get_angle(a, r): if a is None: return None else: return a + r if prior is None: if rotation != 0: # By default, none of this is needed. angles = [_get_angle(angle, rotation) for angle in angles] rotate = Affine2D().rotate_deg(rotation * 90).transform_affine tips = rotate(tips) label_locations = rotate(label_locations) vertices = rotate(vertices) text = self.ax.text(0, 0, s=patchlabel, ha='center', va='center') else: rotation = (self.diagrams[prior].angles[connect[0]] - angles[connect[1]]) angles = [_get_angle(angle, rotation) for angle in angles] rotate = Affine2D().rotate_deg(rotation * 90).transform_affine tips = rotate(tips) offset = self.diagrams[prior].tips[connect[0]] - tips[connect[1]] translate = Affine2D().translate(*offset).transform_affine tips = translate(tips) label_locations = translate(rotate(label_locations)) vertices = translate(rotate(vertices)) kwds = dict(s=patchlabel, ha='center', va='center') text = self.ax.text(*offset, **kwds) if rcParams['_internal.classic_mode']: fc = kwargs.pop('fc', kwargs.pop('facecolor', '#bfd1d4')) lw = kwargs.pop('lw', kwargs.pop('linewidth', 0.5)) else: fc = kwargs.pop('fc', kwargs.pop('facecolor', None)) lw = kwargs.pop('lw', kwargs.pop('linewidth', None)) if fc is None: fc = next(self.ax._get_patches_for_fill.prop_cycler)['color'] patch = PathPatch(Path(vertices, codes), fc=fc, lw=lw, **kwargs) self.ax.add_patch(patch) # Add the path labels. texts = [] for number, angle, label, location in zip(flows, angles, labels, label_locations): if label is None or angle is None: label = '' elif self.unit is not None: quantity = self.format % abs(number) + self.unit if label != '': label += "\n" label += quantity texts.append( self.ax.text(x=location[0], y=location[1], s=label, ha='center', va='center')) # Text objects are placed even they are empty (as long as the magnitude # of the corresponding flow is larger than the tolerance) in case the # user wants to provide labels later. # Expand the size of the diagram if necessary. self.extent = (min(np.min(vertices[:, 0]), np.min(label_locations[:, 0]), self.extent[0]), max(np.max(vertices[:, 0]), np.max(label_locations[:, 0]), self.extent[1]), min(np.min(vertices[:, 1]), np.min(label_locations[:, 1]), self.extent[2]), max(np.max(vertices[:, 1]), np.max(label_locations[:, 1]), self.extent[3])) # Include both vertices _and_ label locations in the extents; there are # where either could determine the margins (e.g., arrow shoulders). # Add this diagram as a subdiagram. self.diagrams.append( SimpleNamespace(patch=patch, flows=flows, angles=angles, tips=tips, text=text, texts=texts)) # Allow a daisy-chained call structure (see docstring for the class). return self
def fit(self, X, y): """Fit Gaussian process regression model. Parameters ---------- X : array-like, shape = (n_samples, n_features) Training data y : array-like, shape = (n_samples, [n_output_dims]) Target values Returns ------- self : returns an instance of self. """ if self.kernel is None: # Use an RBF kernel as default self.kernel_ = C(1.0, constant_value_bounds="fixed") \ * RBF(1.0, length_scale_bounds="fixed") else: self.kernel_ = clone(self.kernel) self._rng = check_random_state(self.random_state) X, y = check_X_y(X, y, multi_output=True, y_numeric=True) # Normalize target value if self.normalize_y: self._y_train_mean = np.mean(y, axis=0) # demean y y = y - self._y_train_mean else: self._y_train_mean = np.zeros(1) if np.iterable(self.alpha) \ and self.alpha.shape[0] != y.shape[0]: if self.alpha.shape[0] == 1: self.alpha = self.alpha[0] else: raise ValueError( "alpha must be a scalar or an array" " with same number of entries as y.(%d != %d)" % (self.alpha.shape[0], y.shape[0])) self.X_train_ = np.copy(X) if self.copy_X_train else X self.y_train_ = np.copy(y) if self.copy_X_train else y if self.optimizer is not None and self.kernel_.n_dims > 0: # Choose hyperparameters based on maximizing the log-marginal # likelihood (potentially starting from several initial values) def obj_func(theta, eval_gradient=True): if eval_gradient: lml, grad = self.log_marginal_likelihood( theta, eval_gradient=True) return -lml, -grad else: return -self.log_marginal_likelihood(theta) # First optimize starting from theta specified in kernel optima = [(self._constrained_optimization(obj_func, self.kernel_.theta, self.kernel_.bounds))] # Additional runs are performed from log-uniform chosen initial # theta if self.n_restarts_optimizer > 0: if not np.isfinite(self.kernel_.bounds).all(): raise ValueError( "Multiple optimizer restarts (n_restarts_optimizer>0) " "requires that all bounds are finite.") bounds = self.kernel_.bounds for iteration in range(self.n_restarts_optimizer): theta_initial = \ self._rng.uniform(bounds[:, 0], bounds[:, 1]) optima.append( self._constrained_optimization(obj_func, theta_initial, bounds)) # Select result from run with minimal (negative) log-marginal # likelihood lml_values = list(map(itemgetter(1), optima)) self.kernel_.theta = optima[np.argmin(lml_values)][0] self.log_marginal_likelihood_value_ = -np.min(lml_values) else: self.log_marginal_likelihood_value_ = \ self.log_marginal_likelihood(self.kernel_.theta) # Precompute quantities required for predictions which are independent # of actual query points K = self.kernel_(self.X_train_) K[np.diag_indices_from(K)] += self.alpha try: self.L_ = cholesky(K, lower=True) # Line 2 # self.L_ changed, self._K_inv needs to be recomputed self._K_inv = None except np.linalg.LinAlgError as exc: exc.args = ("The kernel, %s, is not returning a " "positive definite matrix. Try gradually " "increasing the 'alpha' parameter of your " "GaussianProcessRegressor estimator." % self.kernel_, ) + exc.args raise self.alpha_ = cho_solve((self.L_, True), self.y_train_) # Line 3 return self
def _is_list_like(obj): # Consider namedtuples to be not list like as they are useful as indices return (np.iterable(obj) and not isinstance(obj, basestring) and not (isinstance(obj, tuple) and type(obj) is not tuple))
def __init__(self, artists, tolerance=5, formatter=None, point_labels=None, display='one-per-axes', draggable=False, hover=False, props_override=None, keybindings=True, date_format='%x %X', display_button=1, hide_button=3, keep_inside=True, **kwargs): """Create the data cursor and connect it to the relevant figure. Parameters ----------- artists : a matplotlib artist or sequence of artists. The artists to make selectable and display information for. tolerance : number, optional The radius (in points) that the mouse click must be within to select the artist. formatter : function, optional A function that accepts arbitrary kwargs and returns a string that will be displayed with annotate. The `x`, `y`, `event`, `ind`, and `label` kwargs will always be present. See the ``mpldatacursor.datacursor`` function docstring for more information. point_labels : sequence or dict, optional Labels for "subitems" of an artist, passed to the formatter function as the `point_label` kwarg. May be either a single sequence (used for all artists) or a dict of artist:sequence pairs. display : {'one-per-axes', 'single', 'multiple'}, optional Controls whether more than one annotation box will be shown. draggable : boolean, optional Controls whether or not the annotation box will be interactively draggable to a new location after being displayed. Default: False. hover : boolean, optional If True, the datacursor will "pop up" when the mouse hovers over an artist. Defaults to False. Enabling hover also sets `display="single"` and `draggable=False`. props_override : function, optional If specified, this function customizes the parameters passed into the formatter function and the x, y location that the datacursor "pop up" "points" to. This is often useful to make the annotation "point" to a specific side or corner of an artist, regardless of the position clicked. The function is passed the same kwargs as the `formatter` function and is expected to return a dict with at least the keys "x" and "y" (and probably several others). Expected call signature: `props_dict = props_override(**kwargs)` keybindings : boolean or dict, optional By default, the keys "d" and "t" will be bound to hiding/showing all annotation boxes and toggling interactivity for datacursors, respectively. "<shift> + <right>" and "<shift> + <left>" will be bound to moving the datacursor to the next and previous item in the sequence for artists that support it. If keybindings is False, the ability to hide/toggle datacursors interactively will be disabled. Alternatively, a dict mapping "hide", "toggle", "next", and "previous" to matplotlib key specifications may specified to customize the keyboard shortcuts. Note that hitting the "hide" key once will hide datacursors, and hitting it again will show all of the hidden datacursors. date_format : string, optional The strftime-style formatting string for dates. Used only if the x or y axes have been set to display dates. Defaults to "%x %X". display_button: int, optional The mouse button that will triggers displaying an annotation box. Defaults to 1, for left-clicking. (Common options are 1:left-click, 2:middle-click, 3:right-click) hide_button: int or None, optional The mouse button that triggers hiding the selected annotation box. Defaults to 3, for right-clicking. (Common options are 1:left-click, 2:middle-click, 3:right-click, None:hiding disabled) keep_inside : boolean, optional Whether or not to adjust the x,y offset to keep the text box inside the figure. This option has no effect on draggable datacursors. Defaults to True. Note: Currently disabled on OSX and NbAgg/notebook backends. **kwargs : additional keyword arguments, optional Additional keyword arguments are passed on to annotate. """ def filter_artists(artists): """Replace ContourSets, etc with their constituent artists.""" output = [] for item in artists: if isinstance(item, ContourSet): output += item.collections elif isinstance(item, Container): children = item.get_children() for child in children: child._mpldatacursor_label = item.get_label() child._mpldatacursor_parent = item output += children else: output.append(item) return output if not np.iterable(artists): artists = [artists] #-- Deal with contour sets... ------------------------------------- # These are particularly difficult, as the original z-value array # is never associated with the ContourSet, and they're not "normal" # artists (they're not actually added to the axes). Not only that, but # the PatchCollections created by filled contours don't even fire a # pick event for points inside them, only on their edges. At any rate, # this is a somewhat hackish way of handling contours, but it works. self.artists = filter_artists(artists) self.contour_levels = {} for cs in [x for x in artists if isinstance(x, ContourSet)]: for z, artist in zip(cs.levels, cs.collections): self.contour_levels[artist] = z valid_display_options = ['single', 'one-per-axes', 'multiple'] if display in valid_display_options: self.display = display else: raise ValueError('"display" must be one of the following: '\ ', '.join(valid_display_options)) self.hover = hover if self.hover: self.display = 'single' self.draggable = False self.keep_inside = keep_inside self.tolerance = tolerance self.point_labels = point_labels self.draggable = draggable self.date_format = date_format self.props_override = props_override self.display_button = display_button self.hide_button = hide_button self.axes = tuple(set(art.axes for art in self.artists)) self.figures = tuple(set(ax.figure for ax in self.axes)) self._mplformatter = ScalarFormatter(useOffset=False, useMathText=True) self._hidden = False self._last_event = None self._last_annotation = None if self.draggable: # If we're dealing with draggable cursors, don't try to override # the x,y position. Otherwise, dragging the cursor outside the # figure will have unexpected consequences. self.keep_inside = False if formatter is None: self.formatter = self._formatter else: self.formatter = formatter self._annotation_kwargs = kwargs self.annotations = {} if self.display is not 'multiple': for ax in self.axes: self.annotations[ax] = self.annotate(ax, **kwargs) # Hide the annotation box until clicked... self.annotations[ax].set_visible(False) if keybindings: if keybindings is True: self.keybindings = self.default_keybindings else: self.keybindings = self.default_keybindings.copy() self.keybindings.update(keybindings) for fig in self.figures: fig.canvas.mpl_connect('key_press_event', self._on_keypress) self.enable() # We need to make sure the DataCursor isn't garbage collected until the # figure is. Matplotlib's weak references won't keep this DataCursor # instance alive in all cases. for fig in self.figures: try: fig._mpldatacursors.append(self) except AttributeError: fig._mpldatacursors = [self]
def merge(sources, bounds=None, res=None, nodata=None, precision=7): """Copy valid pixels from input files to an output file. All files must have the same number of bands, data type, and coordinate reference system. Input files are merged in their listed order using the reverse painter's algorithm. If the output file exists, its values will be overwritten by input values. Geospatial bounds and resolution of a new output file in the units of the input file coordinate reference system may be provided and are otherwise taken from the first input file. Parameters ---------- sources: list of source datasets Open rasterio RasterReader objects to be merged. bounds: tuple, optional Bounds of the output image (left, bottom, right, top). If not set, bounds are determined from bounds of input rasters. res: tuple, optional Output resolution in units of coordinate reference system. If not set, the resolution of the first raster is used. If a single value is passed, output pixels will be square. nodata: float, optional nodata value to use in output file. If not set, uses the nodata value in the first input raster. Returns ------- tuple Two elements: dest: numpy ndarray Contents of all input rasters in single array. out_transform: affine.Affine() Information for mapping pixel coordinates in `dest` to another coordinate system """ first = sources[0] first_res = first.res nodataval = first.nodatavals[0] dtype = first.dtypes[0] # Extent from option or extent of all inputs. if bounds: dst_w, dst_s, dst_e, dst_n = bounds else: # scan input files. xs = [] ys = [] for src in sources: left, bottom, right, top = src.bounds xs.extend([left, right]) ys.extend([bottom, top]) dst_w, dst_s, dst_e, dst_n = min(xs), min(ys), max(xs), max(ys) logger.debug("Output bounds: %r", (dst_w, dst_s, dst_e, dst_n)) output_transform = Affine.translation(dst_w, dst_n) logger.debug("Output transform, before scaling: %r", output_transform) # Resolution/pixel size. if not res: res = first_res elif not np.iterable(res): res = (res, res) elif len(res) == 1: res = (res[0], res[0]) output_transform *= Affine.scale(res[0], -res[1]) logger.debug("Output transform, after scaling: %r", output_transform) # Compute output array shape. We guarantee it will cover the output # bounds completely. output_width = int(math.ceil((dst_e - dst_w) / res[0])) output_height = int(math.ceil((dst_n - dst_s) / res[1])) # Adjust bounds to fit. dst_e, dst_s = output_transform * (output_width, output_height) logger.debug("Output width: %d, height: %d", output_width, output_height) logger.debug("Adjusted bounds: %r", (dst_w, dst_s, dst_e, dst_n)) # create destination array dest = np.zeros((first.count, output_height, output_width), dtype=dtype) if nodata is not None: nodataval = nodata logger.debug("Set nodataval: %r", nodataval) if nodataval is not None: # Only fill if the nodataval is within dtype's range. inrange = False if np.dtype(dtype).kind in ('i', 'u'): info = np.iinfo(dtype) inrange = (info.min <= nodataval <= info.max) elif np.dtype(dtype).kind == 'f': info = np.finfo(dtype) inrange = (info.min <= nodataval <= info.max) if inrange: dest.fill(nodataval) else: warnings.warn( "Input file's nodata value, %s, is beyond the valid " "range of its data type, %s. Consider overriding it " "using the --nodata option for better results." % ( nodataval, dtype)) else: nodataval = 0 for src in sources: # Real World (tm) use of boundless reads. # This approach uses the maximum amount of memory to solve the # problem. Making it more efficient is a TODO. # 1. Compute spatial intersection of destination and source. src_w, src_s, src_e, src_n = src.bounds int_w = src_w if src_w > dst_w else dst_w int_s = src_s if src_s > dst_s else dst_s int_e = src_e if src_e < dst_e else dst_e int_n = src_n if src_n < dst_n else dst_n # 2. Compute the source window. src_window = windows.from_bounds( int_w, int_s, int_e, int_n, src.transform) logger.debug("Src %s window: %r", src.name, src_window) src_window = src_window.round_shape() # 3. Compute the destination window. dst_window = windows.from_bounds( int_w, int_s, int_e, int_n, output_transform) # 4. Initialize temp array. tcount = first.count trows, tcols = ( int(round(dst_window.height)), int(round(dst_window.width))) temp_shape = (tcount, trows, tcols) temp = src.read(out_shape=temp_shape, window=src_window, boundless=False, masked=True) # 5. Copy elements of temp into dest. roff, coff = ( int(round(dst_window.row_off)), int(round(dst_window.col_off))) region = dest[:, roff:roff + trows, coff:coff + tcols] np.copyto( region, temp, where=np.logical_and(region == nodataval, np.logical_not(temp.mask))) return dest, output_transform
def polyint(c, m=1, k=[], lbnd=0, scl=1, axis=0): """ Integrate a polynomial. Returns the polynomial coefficients `c` integrated `m` times from `lbnd` along `axis`. At each iteration the resulting series is **multiplied** by `scl` and an integration constant, `k`, is added. The scaling factor is for use in a linear change of variable. ("Buyer beware": note that, depending on what one is doing, one may want `scl` to be the reciprocal of what one might expect; for more information, see the Notes section below.) The argument `c` is an array of coefficients, from low to high degree along each axis, e.g., [1,2,3] represents the polynomial ``1 + 2*x + 3*x**2`` while [[1,2],[1,2]] represents ``1 + 1*x + 2*y + 2*x*y`` if axis=0 is ``x`` and axis=1 is ``y``. Parameters ---------- c : array_like 1-D array of polynomial coefficients, ordered from low to high. m : int, optional Order of integration, must be positive. (Default: 1) k : {[], list, scalar}, optional Integration constant(s). The value of the first integral at zero is the first value in the list, the value of the second integral at zero is the second value, etc. If ``k == []`` (the default), all constants are set to zero. If ``m == 1``, a single scalar can be given instead of a list. lbnd : scalar, optional The lower bound of the integral. (Default: 0) scl : scalar, optional Following each integration the result is *multiplied* by `scl` before the integration constant is added. (Default: 1) axis : int, optional Axis over which the integral is taken. (Default: 0). .. versionadded:: 1.7.0 Returns ------- S : ndarray Coefficient array of the integral. Raises ------ ValueError If ``m < 1``, ``len(k) > m``, ``np.ndim(lbnd) != 0``, or ``np.ndim(scl) != 0``. See Also -------- polyder Notes ----- Note that the result of each integration is *multiplied* by `scl`. Why is this important to note? Say one is making a linear change of variable :math:`u = ax + b` in an integral relative to `x`. Then :math:`dx = du/a`, so one will need to set `scl` equal to :math:`1/a` - perhaps not what one would have first thought. Examples -------- >>> from numpy.polynomial import polynomial as P >>> c = (1,2,3) >>> P.polyint(c) # should return array([0, 1, 1, 1]) array([0., 1., 1., 1.]) >>> P.polyint(c,3) # should return array([0, 0, 0, 1/6, 1/12, 1/20]) array([ 0. , 0. , 0. , 0.16666667, 0.08333333, # may vary 0.05 ]) >>> P.polyint(c,k=3) # should return array([3, 1, 1, 1]) array([3., 1., 1., 1.]) >>> P.polyint(c,lbnd=-2) # should return array([6, 1, 1, 1]) array([6., 1., 1., 1.]) >>> P.polyint(c,scl=-2) # should return array([0, -2, -2, -2]) array([ 0., -2., -2., -2.]) """ c = np.array(c, ndmin=1, copy=True) if c.dtype.char in '?bBhHiIlLqQpP': # astype doesn't preserve mask attribute. c = c + 0.0 cdt = c.dtype if not np.iterable(k): k = [k] cnt = pu._deprecate_as_int(m, "the order of integration") iaxis = pu._deprecate_as_int(axis, "the axis") if cnt < 0: raise ValueError("The order of integration must be non-negative") if len(k) > cnt: raise ValueError("Too many integration constants") if np.ndim(lbnd) != 0: raise ValueError("lbnd must be a scalar.") if np.ndim(scl) != 0: raise ValueError("scl must be a scalar.") iaxis = normalize_axis_index(iaxis, c.ndim) if cnt == 0: return c k = list(k) + [0] * (cnt - len(k)) c = np.moveaxis(c, iaxis, 0) for i in range(cnt): n = len(c) c *= scl if n == 1 and np.all(c[0] == 0): c[0] += k[i] else: tmp = np.empty((n + 1, ) + c.shape[1:], dtype=cdt) tmp[0] = c[0] * 0 tmp[1] = c[0] for j in range(1, n): tmp[j + 1] = c[j] / (j + 1) tmp[0] += k[i] - polyval(lbnd, tmp) c = tmp c = np.moveaxis(c, 0, iaxis) return c
def lagint(c, m=1, k=[], lbnd=0, scl=1, axis=0): """ Integrate a Laguerre series. Returns the Laguerre series coefficients `c` integrated `m` times from `lbnd` along `axis`. At each iteration the resulting series is **multiplied** by `scl` and an integration constant, `k`, is added. The scaling factor is for use in a linear change of variable. ("Buyer beware": note that, depending on what one is doing, one may want `scl` to be the reciprocal of what one might expect; for more information, see the Notes section below.) The argument `c` is an array of coefficients from low to high degree along each axis, e.g., [1,2,3] represents the series ``L_0 + 2*L_1 + 3*L_2`` while [[1,2],[1,2]] represents ``1*L_0(x)*L_0(y) + 1*L_1(x)*L_0(y) + 2*L_0(x)*L_1(y) + 2*L_1(x)*L_1(y)`` if axis=0 is ``x`` and axis=1 is ``y``. Parameters ---------- c : array_like Array of Laguerre series coefficients. If `c` is multidimensional the different axis correspond to different variables with the degree in each axis given by the corresponding index. m : int, optional Order of integration, must be positive. (Default: 1) k : {[], list, scalar}, optional Integration constant(s). The value of the first integral at ``lbnd`` is the first value in the list, the value of the second integral at ``lbnd`` is the second value, etc. If ``k == []`` (the default), all constants are set to zero. If ``m == 1``, a single scalar can be given instead of a list. lbnd : scalar, optional The lower bound of the integral. (Default: 0) scl : scalar, optional Following each integration the result is *multiplied* by `scl` before the integration constant is added. (Default: 1) axis : int, optional Axis over which the integral is taken. (Default: 0). .. versionadded:: 1.7.0 Returns ------- S : ndarray Laguerre series coefficients of the integral. Raises ------ ValueError If ``m < 0``, ``len(k) > m``, ``np.ndim(lbnd) != 0``, or ``np.ndim(scl) != 0``. See Also -------- lagder Notes ----- Note that the result of each integration is *multiplied* by `scl`. Why is this important to note? Say one is making a linear change of variable :math:`u = ax + b` in an integral relative to `x`. Then :math:`dx = du/a`, so one will need to set `scl` equal to :math:`1/a` - perhaps not what one would have first thought. Also note that, in general, the result of integrating a C-series needs to be "reprojected" onto the C-series basis set. Thus, typically, the result of this function is "unintuitive," albeit correct; see Examples section below. Examples -------- >>> from numpy.polynomial.laguerre import lagint >>> lagint([1,2,3]) array([ 1., 1., 1., -3.]) >>> lagint([1,2,3], m=2) array([ 1., 0., 0., -4., 3.]) >>> lagint([1,2,3], k=1) array([ 2., 1., 1., -3.]) >>> lagint([1,2,3], lbnd=-1) array([11.5, 1. , 1. , -3. ]) >>> lagint([1,2], m=2, k=[1,2], lbnd=-1) array([ 11.16666667, -5. , -3. , 2. ]) # may vary """ c = np.array(c, ndmin=1, copy=True) if c.dtype.char in '?bBhHiIlLqQpP': c = c.astype(np.double) if not np.iterable(k): k = [k] cnt = pu._deprecate_as_int(m, "the order of integration") iaxis = pu._deprecate_as_int(axis, "the axis") if cnt < 0: raise ValueError("The order of integration must be non-negative") if len(k) > cnt: raise ValueError("Too many integration constants") if np.ndim(lbnd) != 0: raise ValueError("lbnd must be a scalar.") if np.ndim(scl) != 0: raise ValueError("scl must be a scalar.") iaxis = normalize_axis_index(iaxis, c.ndim) if cnt == 0: return c c = np.moveaxis(c, iaxis, 0) k = list(k) + [0] * (cnt - len(k)) for i in range(cnt): n = len(c) c *= scl if n == 1 and np.all(c[0] == 0): c[0] += k[i] else: tmp = np.empty((n + 1, ) + c.shape[1:], dtype=c.dtype) tmp[0] = c[0] tmp[1] = -c[0] for j in range(1, n): tmp[j] += c[j] tmp[j + 1] = -c[j] tmp[0] += k[i] - lagval(lbnd, tmp) c = tmp c = np.moveaxis(c, 0, iaxis) return c
def draw_networkx_edges( G, pos, edgelist=None, width=1.0, edge_color="k", style="solid", alpha=None, arrowstyle="-|>", arrowsize=3, edge_cmap=None, edge_vmin=None, edge_vmax=None, ax=None, arrows=True, label=None, node_size=300, nodelist=None, node_shape="o", connectionstyle=None, min_source_margin=0, min_target_margin=0, ): """Draw the edges of the graph G. Adjusted from networkx.""" try: from numbers import Number import matplotlib.pyplot as plt from matplotlib.collections import LineCollection from matplotlib.colors import colorConverter, Colormap, Normalize from matplotlib.patches import FancyArrowPatch except ImportError: raise ImportError("Matplotlib required for draw()") except RuntimeError: print("Matplotlib unable to open display") raise if ax is None: ax = plt.gca() if edgelist is None: edgelist = list(G.edges()) if not edgelist or len(edgelist) == 0: # no edges! return None if nodelist is None: nodelist = list(G.nodes()) # FancyArrowPatch handles color=None different from LineCollection if edge_color is None: edge_color = "k" # set edge positions edge_pos = np.asarray([(pos[e[0]], pos[e[1]]) for e in edgelist]) # Check if edge_color is an array of floats and map to edge_cmap. # This is the only case handled differently from matplotlib if (np.iterable(edge_color) and (len(edge_color) == len(edge_pos)) and np.alltrue([isinstance(c, Number) for c in edge_color])): if edge_cmap is not None: assert isinstance(edge_cmap, Colormap) else: edge_cmap = plt.get_cmap() if edge_vmin is None: edge_vmin = min(edge_color) if edge_vmax is None: edge_vmax = max(edge_color) color_normal = Normalize(vmin=edge_vmin, vmax=edge_vmax) edge_color = [edge_cmap(color_normal(e)) for e in edge_color] if not G.is_directed() or not arrows: edge_collection = LineCollection( edge_pos, colors=edge_color, linewidths=width, antialiaseds=(1, ), linestyle=style, transOffset=ax.transData, alpha=alpha, ) edge_collection.set_cmap(edge_cmap) edge_collection.set_clim(edge_vmin, edge_vmax) edge_collection.set_zorder(1) # edges go behind nodes edge_collection.set_label(label) ax.add_collection(edge_collection) return edge_collection arrow_collection = None if G.is_directed() and arrows: # Note: Waiting for someone to implement arrow to intersection with # marker. Meanwhile, this works well for polygons with more than 4 # sides and circle. def to_marker_edge(marker_size, marker): if marker in "s^>v<d": # `large` markers need extra space return np.sqrt(2 * marker_size) / 2 else: return np.sqrt(marker_size) / 2 # Draw arrows with `matplotlib.patches.FancyarrowPatch` arrow_collection = [] mutation_scale = arrowsize # scale factor of arrow head # FancyArrowPatch doesn't handle color strings arrow_colors = colorConverter.to_rgba_array(edge_color, alpha) for i, (src, dst) in enumerate(edge_pos): x1, y1 = src x2, y2 = dst shrink_source = 0 # space from source to tail shrink_target = 0 # space from head to target if np.iterable(node_size): # many node sizes source, target = edgelist[i][:2] source_node_size = node_size[nodelist.index(source)] target_node_size = node_size[nodelist.index(target)] shrink_source = to_marker_edge(source_node_size, node_shape) shrink_target = to_marker_edge(target_node_size, node_shape) else: shrink_source = shrink_target = to_marker_edge( node_size, node_shape) if shrink_source < min_source_margin: shrink_source = min_source_margin if shrink_target < min_target_margin: shrink_target = min_target_margin if len(arrow_colors) == len(edge_pos): arrow_color = arrow_colors[i] elif len(arrow_colors) == 1: arrow_color = arrow_colors[0] else: # Cycle through colors arrow_color = arrow_colors[i % len(arrow_colors)] if np.iterable(width): if len(width) == len(edge_pos): line_width = width[i] else: line_width = width[i % len(width)] else: line_width = width arrow = FancyArrowPatch( (x1, y1), (x2, y2), arrowstyle=arrowstyle, shrinkA=shrink_source, shrinkB=shrink_target, mutation_scale=mutation_scale, color=arrow_color, linewidth=line_width, connectionstyle=connectionstyle, linestyle=style, zorder=1, ) # arrows go behind nodes # There seems to be a bug in matplotlib to make collections of # FancyArrowPatch instances. Until fixed, the patches are added # individually to the axes instance. arrow_collection.append(arrow) ax.add_patch(arrow) # update view minx = np.amin(np.ravel(edge_pos[:, :, 0])) maxx = np.amax(np.ravel(edge_pos[:, :, 0])) miny = np.amin(np.ravel(edge_pos[:, :, 1])) maxy = np.amax(np.ravel(edge_pos[:, :, 1])) w = maxx - minx h = maxy - miny padx, pady = 0.05 * w, 0.05 * h corners = (minx - padx, miny - pady), (maxx + padx, maxy + pady) ax.update_datalim(corners) ax.autoscale_view() ax.tick_params( axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False, ) return arrow_collection
def rdmds(fnamearg,itrs=-1,machineformat='b',rec=None,fill_value=0, returnmeta=False,astype=float,region=None,lev=(), usememmap=False,mm=False,squeeze=True): """ a = rdmds(fname,...) a = rdmds(fname,itrs,...) a,its,meta = rdmds(fname,...,returnmeta=True) Read meta-data files as written by MITgcm. Without itrs, will try to read fname.meta or fname.001.001.meta, ... If itrs is a list of integers of an integer, it will read the corresponding fname.000000iter.meta, ... If itrs is NaN, it will read all iterations for which files are found. If itrs is Inf, it will read the highest iteration found. fname may contain shell wildcards, which is useful for tile files organized into directories, e.g., T = rdmds('prefix*/T', 2880) will read prefix0000/T.0000002880.*, prefix0001/T.0000002880.*, ... (and any others that match the wildcard, so be careful how you name things!) Returns: a :: numpy array of the data read its :: list of iteration numbers read (only if itrs=NaN or Inf) meta :: dictionary of metadata (only if returnmeta=True) Keyword arguments: machineformat :: endianness ('b' or 'l', default 'b') rec :: list of records to read (default all) useful for pickups and multi-field diagnostics files fill_value :: fill value for missing (blank) tiles (default 0) astype :: data type to return (default: double precision) None: keep data type/precision of file region :: (x0,x1,y0,y1) read only this region (default (0,nx,0,ny)) lev :: list of levels to read, or, for multiple dimensions (excluding x,y), tuple(!) of lists (see examples below) usememmap :: if True, use a memory map for reading data (default False) recommended when using lev, or region with global files to save memory and, possibly, time Examples: XC = rdmds('XC') XC = rdmds('res_*/XC') T = rdmds('T.0000002880') T = rdmds('T',2880) T2 = rdmds('T',[2880,5760]) T,its = rdmds('T',numpy.Inf) VVEL = rdmds('pickup',2880,rec=range(50,100)) a5 = rdmds('diags',2880,rec=0,lev=[5]) a = rdmds('diags',2880,rec=0,lev=([0],[0,1,5,6,7])) from numpy import r_ a = rdmds('diags',2880,rec=0,lev=([0],r_[:2,5:8])) # same as previous a = rdmds('diags',2880,rec=0)[0, [0,1,5,6,7], ...] # same, but less efficient a = rdmds('diags',2880)[0, 0, [0,1,5,6,7], ...] # even less efficient """ usememmap = usememmap or mm if usememmap: readdata = np.memmap else: readdata = fromfileshape # add iteration number to file name unless itrs is -1 additrs = itrs != -1 if itrs is np.nan: # all iterations itrs = scanforfiles(fnamearg) warning('Reading {0} time levels: '.format(len(itrs)), *itrs) returnits = True itrsislist = True elif itrs is np.inf: # last iteration itrs = scanforfiles(fnamearg) if len(itrs): warning('Found {0} time levels, reading'.format(len(itrs)), itrs[-1]) else: warning('Found 0 time levels for {}'.format(fnamearg)) itrs = itrs[-1:] returnits = True itrsislist = False else: returnits = False itrsislist = np.iterable(itrs) # always make itrs a list itrs = aslist(itrs) allrec = rec is None reclist = aslist(rec) if not isinstance(lev,tuple): lev = (lev,) levs = tuple( aslist(l) for l in lev ) levdims = tuple(len(l) for l in levs) levinds = np.ix_(*levs) nlev = len(levdims) if usememmap: recsatonce = True readdata = np.memmap else: recsatonce = allrec readdata = fromfileshape try: typepre = _typeprefixes[machineformat] except KeyError: raise ValueError('Allowed machineformats: ' + ' '.join(_typeprefixes)) arr = None metaref = {} timeStepNumbers = [] timeIntervals = [] for iit,it in enumerate(itrs): if additrs: fname = fnamearg + '.{0:010d}'.format(int(it)) else: fname = fnamearg metafiles = glob.glob(fname + 2*('.'+3*'[0-9]') + '.meta') or glob.glob(fname+'.meta') if len(metafiles) == 0: raise IOError('No files found for ' + fname + '.meta') warning(metafiles[0]) if debug: warning('Found',len(metafiles),'metafiles for iteration',it) for metafile in metafiles: gdims,i0s,ies,timestep,timeinterval,map2gl,meta = readmeta(metafile) if arr is None: # initialize, allocate try: dataprec, = meta['dataprec'] except KeyError: dataprec, = meta['format'] tp = typepre + _typesuffixes[dataprec] size = np.dtype(tp).itemsize if astype is None: astype = tp recshape = tuple( ie-i0 for i0,ie in zip(i0s,ies) ) count = reduce(mul, recshape) nrecords, = meta['nrecords'] tileshape = (nrecords,) + recshape if allrec: reclist = range(nrecords) recinds = np.s_[:,] + levinds else: recinds = np.ix_(reclist, *levs) if region is None: ri0,rie,rj0,rje = 0,gdims[-1],0,gdims[-2] else: ri0,rie,rj0,rje = region if ri0 < 0: ri0 += gdims[-1] if rie < 0: rie += gdims[-1] if rj0 < 0: rj0 += gdims[-2] if rje < 0: rje += gdims[-2] assert nlev+2 <= len(gdims) rdims = levdims + gdims[len(levdims):-2] + (rje-rj0,rie-ri0) # always include itrs and rec dimensions and squeeze later arr = np.empty((len(itrs),len(reclist))+rdims, astype) arr[...] = fill_value metaref = meta else: if meta != metaref: raise ValueError('Meta files not compatible') datafile = metafile[:-4] + 'data' if region is not None: if map2gl is None: # overlap of tile with region: i0 = min(rie, max(ri0, i0s[-1])) ie = min(rie, max(ri0, ies[-1])) j0 = min(rje, max(rj0, i0s[-2])) je = min(rje, max(rj0, ies[-2])) # source indices I0 = i0 - i0s[-1] Ie = ie - i0s[-1] J0 = j0 - i0s[-2] Je = je - i0s[-2] # target indices i0s[-1] = i0 - ri0 ies[-1] = ie - ri0 i0s[-2] = j0 - rj0 ies[-2] = je - rj0 else: raise NotImplementedError('Region selection is not implemented for map2glob != [0,1]') sl = tuple( slice(i0,ie) for i0,ie in zip(i0s,ies) ) if map2gl is None: # part of arr that will receive tile (all records) arrtile = arr[(iit,slice(None))+sl] else: ny,nx = arr.shape[-2:] i0 = i0s[-1] j0 = i0s[-2] ie = ies[-1] je = ies[-2] # "flat" stride for j jstride = map2gl[1]*nx + map2gl[0] n = (je-j0)*jstride # start of a jstride by je-j0 block that contains this tile ii0 = min(i0+nx*j0, nx*ny-n) # tile starts at ioff+i0 ioff = nx*j0 - ii0 # flatten x,y dimensions arrflat = arr.reshape(arr.shape[:-2]+(nx*ny,)) # extract tile arrmap = arrflat[...,ii0:ii0+n].reshape(arr.shape[:-2]+(je-j0,jstride))[...,:,ioff+i0:ioff+ie] # slice non-x,y dimensions (except records) arrtile = arrmap[(iit,slice(None))+sl[:-2]] del arrflat,arrmap if recsatonce: if region is None: arrtile[...] = readdata(datafile, tp, shape=tileshape)[recinds] else: if Ie > I0 and Je > J0: if debug: message(datafile, I0,Ie,J0,Je) arrtile[...] = readdata(datafile, tp, shape=tileshape)[recinds + np.s_[...,J0:Je,I0:Ie]] else: f = open(datafile) for irec,recnum in enumerate(reclist): if recnum < 0: recnum += nrecords f.seek(recnum*count*size) if region is None: arrtile[irec] = np.fromfile(f, tp, count=count).reshape(recshape)[levinds] else: if Ie > I0 and Je > J0: if debug: message(datafile, I0,Ie,J0,Je) tilerec = np.fromfile(f, tp, count=count).reshape(recshape) arrtile[irec] = tilerec[levinds + np.s_[...,J0:Je,I0:Ie]] f.close() if timestep is not None: timeStepNumbers.extend(timestep) if timeinterval is not None: timeIntervals.append(timeinterval) # put list of iteration numbers back into metadata dictionary if len(timeStepNumbers): metaref['timeStepNumber'] = timeStepNumbers if len(timeIntervals): metaref['timeInterval'] = timeIntervals if arr is None: arr = np.array([]) else: # squeeze singleton iteration, record and level dimensions like matlab version dims = (len(itrs),len(reclist)) + levdims if squeeze: # squeeze all singleton dimensions squeezed = tuple( d for d in dims if d > 1 ) else: # squeeze all that came from scalar arguments keepers = [itrsislist, np.iterable(rec)] + [np.iterable(l) for l in lev] squeezed = tuple( d for d,keep in zip(dims, keepers) if keep ) arr = arr.reshape(squeezed+arr.shape[2+nlev:]) if returnmeta: meta = dict((k.lower(),v) for k,v in metaref.items()) return arr,itrs,meta # elif returnits: # return arr,itrs else: return arr
def legend_elements(self, prop="colors", num="auto", fmt=None, func=lambda x: x, **kwargs): """ Create legend handles and labels for a PathCollection. Each legend handle is a `.Line2D` representing the Path that was drawn, and each label is a string what each Path represents. This is useful for obtaining a legend for a `~.Axes.scatter` plot; e.g.:: scatter = plt.scatter([1, 2, 3], [4, 5, 6], c=[7, 2, 3]) plt.legend(*scatter.legend_elements()) creates three legend elements, one for each color with the numerical values passed to *c* as the labels. Also see the :ref:`automatedlegendcreation` example. Parameters ---------- prop : {"colors", "sizes"}, default: "colors" If "colors", the legend handles will show the different colors of the collection. If "sizes", the legend will show the different sizes. To set both, use *kwargs* to directly edit the `.Line2D` properties. num : int, None, "auto" (default), array-like, or `~.ticker.Locator` Target number of elements to create. If None, use all unique elements of the mappable array. If an integer, target to use *num* elements in the normed range. If *"auto"*, try to determine which option better suits the nature of the data. The number of created elements may slightly deviate from *num* due to a `~.ticker.Locator` being used to find useful locations. If a list or array, use exactly those elements for the legend. Finally, a `~.ticker.Locator` can be provided. fmt : str, `~matplotlib.ticker.Formatter`, or None (default) The format or formatter to use for the labels. If a string must be a valid input for a `~.StrMethodFormatter`. If None (the default), use a `~.ScalarFormatter`. func : function, default: ``lambda x: x`` Function to calculate the labels. Often the size (or color) argument to `~.Axes.scatter` will have been pre-processed by the user using a function ``s = f(x)`` to make the markers visible; e.g. ``size = np.log10(x)``. Providing the inverse of this function here allows that pre-processing to be inverted, so that the legend labels have the correct values; e.g. ``func = lambda x: 10**x``. **kwargs Allowed keyword arguments are *color* and *size*. E.g. it may be useful to set the color of the markers if *prop="sizes"* is used; similarly to set the size of the markers if *prop="colors"* is used. Any further parameters are passed onto the `.Line2D` instance. This may be useful to e.g. specify a different *markeredgecolor* or *alpha* for the legend handles. Returns ------- handles : list of `.Line2D` Visual representation of each element of the legend. labels : list of str The string labels for elements of the legend. """ import warnings import matplotlib as mpl mlines = mpl.lines handles = [] labels = [] if prop == "colors": arr = self.get_array() if arr is None: warnings.warn("Collection without array used. Make sure to " "specify the values to be colormapped via the " "`c` argument.") return handles, labels _size = kwargs.pop("size", mpl.rcParams["lines.markersize"]) def _get_color_and_size(value): return self.cmap(self.norm(value)), _size elif prop == "sizes": arr = self.get_sizes() _color = kwargs.pop("color", "k") def _get_color_and_size(value): return _color, np.sqrt(value) else: raise ValueError("Valid values for `prop` are 'colors' or " f"'sizes'. You supplied '{prop}' instead.") # Get the unique values and their labels: values = np.unique(arr) label_values = np.asarray(func(values)) label_values_are_numeric = np.issubdtype(label_values.dtype, np.number) # Handle the label format: if fmt is None and label_values_are_numeric: fmt = mpl.ticker.ScalarFormatter(useOffset=False, useMathText=True) elif fmt is None and not label_values_are_numeric: fmt = mpl.ticker.StrMethodFormatter("{x}") elif isinstance(fmt, str): fmt = mpl.ticker.StrMethodFormatter(fmt) fmt.create_dummy_axis() if num == "auto": num = 9 if len(values) <= num: num = None if label_values_are_numeric: label_values_min = label_values.min() label_values_max = label_values.max() fmt.set_bounds(label_values_min, label_values_max) if num is not None: # Labels are numerical but larger than the target # number of elements, reduce to target using matplotlibs # ticker classes: if isinstance(num, mpl.ticker.Locator): loc = num elif np.iterable(num): loc = mpl.ticker.FixedLocator(num) else: num = int(num) loc = mpl.ticker.MaxNLocator(nbins=num, min_n_ticks=num - 1, steps=[1, 2, 2.5, 3, 5, 6, 8, 10]) # Get nicely spaced label_values: label_values = loc.tick_values(label_values_min, label_values_max) # Remove extrapolated label_values: cond = (label_values >= label_values_min) & (label_values <= label_values_max) label_values = label_values[cond] # Get the corresponding values by creating a linear interpolant # with small step size: values_interp = np.linspace(values.min(), values.max(), 256) label_values_interp = func(values_interp) ix = np.argsort(label_values_interp) values = np.interp(label_values, label_values_interp[ix], values_interp[ix]) elif num is not None and not label_values_are_numeric: # Labels are not numerical so modifying label_values is not # possible, instead filter the array with nicely distributed # indexes: if type(num) == int: loc = mpl.ticker.LinearLocator(num) else: raise ValueError( "`num` only supports integers for non-numeric labels.") ind = loc.tick_values(0, len(label_values) - 1).astype(int) label_values = label_values[ind] values = values[ind] # Some formatters requires set_locs: if hasattr(fmt, "set_locs"): fmt.set_locs(label_values) # Default settings for handles, add or override with kwargs: kw = dict(markeredgewidth=self.get_linewidths()[0], alpha=self.get_alpha()) kw.update(kwargs) for val, lab in zip(values, label_values): color, size = _get_color_and_size(val) h = mlines.Line2D([0], [0], ls="", color=color, ms=size, marker=self.get_paths()[0], **kw) handles.append(h) labels.append(fmt(lab)) return handles, labels
def fit(self, X_l, y_l, X_h, y_h): """Fit Gaussian process regression model. Parameters ---------- X_l : array-like, shape = (n_l_samples, n_features) Training data y_l : array-like, shape = (n_l_samples, [n_output_dims]) Target values X_h : array-like, shape = (n_h_samples, n_features) Training data y_h : array-like, shape = (n_h_samples, [n_output_dims]) Target values Returns ------- self : returns an instance of self. """ if self.kernel is None: # Use an RBF kernel as default self.kernel_l_ = C(1.0, constant_value_bounds="fixed") \ * RBF(1.0, length_scale_bounds="fixed") else: self.kernel_l_ = clone(self.kernel) self.kernel_d_ = clone(self.kernel_l_) self.rng = check_random_state(self.random_state) X_l, y_l = check_X_y(X_l, y_l, multi_output=True, y_numeric=True) X_h, y_h = check_X_y(X_h, y_h, multi_output=True, y_numeric=True) self.n_l_ = len(X_l) # Normalize target value if self.normalize_y: self._y_l_train_mean = np.mean(y_l, axis=0) self._y_h_train_mean = np.mean(y_h, axis=0) # demean y y_l = y_l - self._y_l_train_mean y_h = y_h - self._y_h_train_mean else: self._y_l_train_mean = np.zeros(1) self._y_h_train_mean = np.zeros(1) if np.iterable(self.alpha) \ and self.alpha.shape[0] != y.shape[0]: if self.alpha.shape[0] == 1: self.alpha = self.alpha[0] else: raise ValueError("alpha must be a scalar or an array" " with same number of entries as y.(%d != %d)" % (self.alpha.shape[0], y.shape[0])) self.X_train_ = np.vstack((X_l, X_h)) self.y_train_ = np.hstack((y_l, y_h)) theta_initial = np.hstack((np.array([self.rho]), self.kernel_l_.theta, self.kernel_d_.theta)) if self.optimizer is not None and self.kernel_l_.n_dims > 0: # Choose hyperparameters based on maximizing the log-marginal # likelihood (potentially starting from several initial values) def obj_func(theta, eval_gradient=self.eval_gradient): if eval_gradient: raise Warning("eval_gradient = True mode is not implemented yet!") lml, grad = self.log_marginal_likelihood( theta, eval_gradient=True) return -lml, -grad else: return -self.log_marginal_likelihood(theta) theta_bounds = np.r_[np.array(self.rho_bounds)[np.newaxis], self.kernel_l_.bounds, self.kernel_d_.bounds] # First optimize starting from theta specified in kernel optima = [(self._constrained_optimization(obj_func, theta_initial, theta_bounds, self.eval_gradient))] # Additional runs are performed from log-uniform chosen initial # theta if self.n_restarts_optimizer > 0: if not (np.isfinite(self.kernel_l_.bounds).all() and np.isfinite(self.kernel_d_.bounds).all() and np.isfinite(self.rho_bounds).all()): raise ValueError( "Multiple optimizer restarts (n_restarts_optimizer>0) " "requires that all bounds are finite.") bounds = np.vstack((np.array(self.rho_bounds).reshape(1, -1), self.kernel_l_.bounds, self.kernel_d_.bounds)) for iteration in range(self.n_restarts_optimizer): theta_initial = np.hstack(( self.rng.uniform(bounds[0, 0], bounds[0, 1]), np.exp(self.rng.uniform(bounds[1:, 0], bounds[1:, 1])) )) optima.append( self._constrained_optimization(obj_func, theta_initial, bounds, self.eval_gradient)) # Select result from run with minimal (negative) log-marginal # likelihood lml_values = list(map(itemgetter(1), optima)) best_hyperparams = optima[np.argmin(lml_values)][0] self.rho = best_hyperparams[0] self.kernel_l_.theta = best_hyperparams[1:1 + len(self.kernel_l_.theta)] self.kernel_d_.theta = best_hyperparams[1 + len(self.kernel_l_.theta):] self.log_marginal_likelihood_value_ = -np.min(lml_values) else: self.log_marginal_likelihood_value_ = \ self.log_marginal_likelihood(theta_initial) # Precompute quantities required for predictions which are independent # of actual query points K_lf = self.kernel_l_(self.X_train_[:self.n_l_]) K = np.vstack(( np.hstack(( self.kernel_l_(self.X_train_[:self.n_l_]), self.rho * self.kernel_l_(self.X_train_[:self.n_l_], self.X_train_[self.n_l_:]) )), np.hstack(( self.rho * self.kernel_l_(self.X_train_[self.n_l_:], self.X_train_[:self.n_l_]), self.rho**2 *self.kernel_l_(self.X_train_[self.n_l_:]) + self.kernel_d_(self.X_train_[self.n_l_:]) )) )) K_lf[np.diag_indices_from(K_lf)] += self.alpha K[np.diag_indices_from(K)] += self.alpha try: self.L_lf_ = cholesky(K_lf, lower=True) # Line 2 (lf) self.L_ = cholesky(K, lower=True) # Line 2 # self.L_ changed, self._K_inv needs to be recomputed self._K_inv = None self._K_lf_inv = None except np.linalg.LinAlgError as exc: exc.args = ("The kernel is not returning a " "positive definite matrix. Try gradually " "increasing the 'alpha' parameter of your " "GaussianProcessRegressor estimator.",) + exc.args raise self.alpha_lf_ = cho_solve((self.L_lf_, True), self.y_train_[:self.n_l_]) # Line 3 (Lf) self.alpha_ = cho_solve((self.L_, True), self.y_train_) # Line 3 return self
def cut(x, bins, right=True, labels=None, retbins=False, precision=3, include_lowest=False, duplicates='raise'): """ Bin values into discrete intervals. Use `cut` when you need to segment and sort data values into bins. This function is also useful for going from a continuous variable to a categorical variable. For example, `cut` could convert ages to groups of age ranges. Supports binning into an equal number of bins, or a pre-specified array of bins. Parameters ---------- x : array-like The input array to be binned. Must be 1-dimensional. bins : int, sequence of scalars, or pandas.IntervalIndex The criteria to bin by. * int : Defines the number of equal-width bins in the range of `x`. The range of `x` is extended by .1% on each side to include the minimum and maximum values of `x`. * sequence of scalars : Defines the bin edges allowing for non-uniform width. No extension of the range of `x` is done. * IntervalIndex : Defines the exact bins to be used. Note that IntervalIndex for `bins` must be non-overlapping. right : bool, default True Indicates whether `bins` includes the rightmost edge or not. If ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]`` indicate (1,2], (2,3], (3,4]. This argument is ignored when `bins` is an IntervalIndex. labels : array or bool, optional Specifies the labels for the returned bins. Must be the same length as the resulting bins. If False, returns only integer indicators of the bins. This affects the type of the output container (see below). This argument is ignored when `bins` is an IntervalIndex. retbins : bool, default False Whether to return the bins or not. Useful when bins is provided as a scalar. precision : int, default 3 The precision at which to store and display the bins labels. include_lowest : bool, default False Whether the first interval should be left-inclusive or not. duplicates : {default 'raise', 'drop'}, optional If bin edges are not unique, raise ValueError or drop non-uniques. .. versionadded:: 0.23.0 Returns ------- out : pandas.Categorical, Series, or ndarray An array-like object representing the respective bin for each value of `x`. The type depends on the value of `labels`. * True (default) : returns a Series for Series `x` or a pandas.Categorical for all other inputs. The values stored within are Interval dtype. * sequence of scalars : returns a Series for Series `x` or a pandas.Categorical for all other inputs. The values stored within are whatever the type in the sequence is. * False : returns an ndarray of integers. bins : numpy.ndarray or IntervalIndex. The computed or specified bins. Only returned when `retbins=True`. For scalar or sequence `bins`, this is an ndarray with the computed bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For an IntervalIndex `bins`, this is equal to `bins`. See Also -------- qcut : Discretize variable into equal-sized buckets based on rank or based on sample quantiles. pandas.Categorical : Array type for storing data that come from a fixed set of values. Series : One-dimensional array with axis labels (including time series). pandas.IntervalIndex : Immutable Index implementing an ordered, sliceable set. Notes ----- Any NA values will be NA in the result. Out of bounds values will be NA in the resulting Series or pandas.Categorical object. Examples -------- Discretize into three equal-sized bins. >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3) ... # doctest: +ELLIPSIS [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ... >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True) ... # doctest: +ELLIPSIS ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ... array([0.994, 3. , 5. , 7. ])) Discovers the same bins, but assign them specific labels. Notice that the returned Categorical's categories are `labels` and is ordered. >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), ... 3, labels=["bad", "medium", "good"]) [bad, good, medium, medium, good, bad] Categories (3, object): [bad < medium < good] ``labels=False`` implies you just want the bins back. >>> pd.cut([0, 1, 1, 2], bins=4, labels=False) array([0, 1, 1, 3]) Passing a Series as an input returns a Series with categorical dtype: >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), ... index=['a', 'b', 'c', 'd', 'e']) >>> pd.cut(s, 3) ... # doctest: +ELLIPSIS a (1.992, 4.667] b (1.992, 4.667] c (4.667, 7.333] d (7.333, 10.0] e (7.333, 10.0] dtype: category Categories (3, interval[float64]): [(1.992, 4.667] < (4.667, ... Passing a Series as an input returns a Series with mapping value. It is used to map numerically to intervals based on bins. >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), ... index=['a', 'b', 'c', 'd', 'e']) >>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False) ... # doctest: +ELLIPSIS (a 0.0 b 1.0 c 2.0 d 3.0 e 4.0 dtype: float64, array([0, 2, 4, 6, 8])) Use `drop` optional when bins is not unique >>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True, ... right=False, duplicates='drop') ... # doctest: +ELLIPSIS (a 0.0 b 1.0 c 2.0 d 3.0 e 3.0 dtype: float64, array([0, 2, 4, 6, 8])) Passing an IntervalIndex for `bins` results in those categories exactly. Notice that values not covered by the IntervalIndex are set to NaN. 0 is to the left of the first bin (which is closed on the right), and 1.5 falls between two bins. >>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]) >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins) [NaN, (0, 1], NaN, (2, 3], (4, 5]] Categories (3, interval[int64]): [(0, 1] < (2, 3] < (4, 5]] """ # NOTE: this binning code is changed a bit from histogram for var(x) == 0 # for handling the cut for datetime and timedelta objects x_is_series, series_index, name, x = _preprocess_for_cut(x) x, dtype = _coerce_to_type(x) if not np.iterable(bins): if is_scalar(bins) and bins < 1: raise ValueError("`bins` should be a positive integer.") try: # for array-like sz = x.size except AttributeError: x = np.asarray(x) sz = x.size if sz == 0: raise ValueError('Cannot cut empty array') rng = (nanops.nanmin(x), nanops.nanmax(x)) mn, mx = [mi + 0.0 for mi in rng] if np.isinf(mn) or np.isinf(mx): # GH 24314 raise ValueError('cannot specify integer `bins` when input data ' 'contains infinity') elif mn == mx: # adjust end points before binning mn -= .001 * abs(mn) if mn != 0 else .001 mx += .001 * abs(mx) if mx != 0 else .001 bins = np.linspace(mn, mx, bins + 1, endpoint=True) else: # adjust end points after binning bins = np.linspace(mn, mx, bins + 1, endpoint=True) adj = (mx - mn) * 0.001 # 0.1% of the range if right: bins[0] -= adj else: bins[-1] += adj elif isinstance(bins, IntervalIndex): if bins.is_overlapping: raise ValueError('Overlapping IntervalIndex is not accepted.') else: bins = np.asarray(bins) bins = _convert_bin_to_numeric_type(bins, dtype) if (np.diff(bins) < 0).any(): raise ValueError('bins must increase monotonically.') fac, bins = _bins_to_cuts(x, bins, right=right, labels=labels, precision=precision, include_lowest=include_lowest, dtype=dtype, duplicates=duplicates) return _postprocess_for_cut(fac, bins, retbins, x_is_series, series_index, name, dtype)
def sliding_window_view(x, window_shape, axis=None, *, subok=False, writeable=False): """ Create a sliding window view into the array with the given window shape. Also known as rolling or moving window, the window slides across all dimensions of the array and extracts subsets of the array at all window positions. .. versionadded:: 1.20.0 Parameters ---------- x : array_like Array to create the sliding window view from. window_shape : int or tuple of int Size of window over each axis that takes part in the sliding window. If `axis` is not present, must have same length as the number of input array dimensions. Single integers `i` are treated as if they were the tuple `(i,)`. axis : int or tuple of int, optional Axis or axes along which the sliding window is applied. By default, the sliding window is applied to all axes and `window_shape[i]` will refer to axis `i` of `x`. If `axis` is given as a `tuple of int`, `window_shape[i]` will refer to the axis `axis[i]` of `x`. Single integers `i` are treated as if they were the tuple `(i,)`. subok : bool, optional If True, sub-classes will be passed-through, otherwise the returned array will be forced to be a base-class array (default). writeable : bool, optional When true, allow writing to the returned view. The default is false, as this should be used with caution: the returned view contains the same memory location multiple times, so writing to one location will cause others to change. Returns ------- view : ndarray Sliding window view of the array. The sliding window dimensions are inserted at the end, and the original dimensions are trimmed as required by the size of the sliding window. That is, ``view.shape = x_shape_trimmed + window_shape``, where ``x_shape_trimmed`` is ``x.shape`` with every entry reduced by one less than the corresponding window size. See Also -------- lib.stride_tricks.as_strided: A lower-level and less safe routine for creating arbitrary views from custom shape and strides. broadcast_to: broadcast an array to a given shape. Notes ----- For many applications using a sliding window view can be convenient, but potentially very slow. Often specialized solutions exist, for example: - `scipy.signal.fftconvolve` - filtering functions in `scipy.ndimage` - moving window functions provided by `bottleneck <https://github.com/pydata/bottleneck>`_. As a rough estimate, a sliding window approach with an input size of `N` and a window size of `W` will scale as `O(N*W)` where frequently a special algorithm can achieve `O(N)`. That means that the sliding window variant for a window size of 100 can be a 100 times slower than a more specialized version. Nevertheless, for small window sizes, when no custom algorithm exists, or as a prototyping and developing tool, this function can be a good solution. Examples -------- >>> x = np.arange(6) >>> x.shape (6,) >>> v = sliding_window_view(x, 3) >>> v.shape (4, 3) >>> v array([[0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5]]) This also works in more dimensions, e.g. >>> i, j = np.ogrid[:3, :4] >>> x = 10*i + j >>> x.shape (3, 4) >>> x array([[ 0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23]]) >>> shape = (2,2) >>> v = sliding_window_view(x, shape) >>> v.shape (2, 3, 2, 2) >>> v array([[[[ 0, 1], [10, 11]], [[ 1, 2], [11, 12]], [[ 2, 3], [12, 13]]], [[[10, 11], [20, 21]], [[11, 12], [21, 22]], [[12, 13], [22, 23]]]]) The axis can be specified explicitly: >>> v = sliding_window_view(x, 3, 0) >>> v.shape (1, 4, 3) >>> v array([[[ 0, 10, 20], [ 1, 11, 21], [ 2, 12, 22], [ 3, 13, 23]]]) The same axis can be used several times. In that case, every use reduces the corresponding original dimension: >>> v = sliding_window_view(x, (2, 3), (1, 1)) >>> v.shape (3, 1, 2, 3) >>> v array([[[[ 0, 1, 2], [ 1, 2, 3]]], [[[10, 11, 12], [11, 12, 13]]], [[[20, 21, 22], [21, 22, 23]]]]) Combining with stepped slicing (`::step`), this can be used to take sliding views which skip elements: >>> x = np.arange(7) >>> sliding_window_view(x, 5)[:, ::2] array([[0, 2, 4], [1, 3, 5], [2, 4, 6]]) or views which move by multiple elements >>> x = np.arange(7) >>> sliding_window_view(x, 3)[::2, :] array([[0, 1, 2], [2, 3, 4], [4, 5, 6]]) A common application of `sliding_window_view` is the calculation of running statistics. The simplest example is the `moving average <https://en.wikipedia.org/wiki/Moving_average>`_: >>> x = np.arange(6) >>> x.shape (6,) >>> v = sliding_window_view(x, 3) >>> v.shape (4, 3) >>> v array([[0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5]]) >>> moving_average = v.mean(axis=-1) >>> moving_average array([1., 2., 3., 4.]) Note that a sliding window approach is often **not** optimal (see Notes). """ window_shape = (tuple(window_shape) if np.iterable(window_shape) else (window_shape, )) # first convert input to array, possibly keeping subclass x = np.array(x, copy=False, subok=subok) window_shape_array = np.array(window_shape) if np.any(window_shape_array < 0): raise ValueError('`window_shape` cannot contain negative values') if axis is None: axis = tuple(range(x.ndim)) if len(window_shape) != len(axis): raise ValueError(f'Since axis is `None`, must provide ' f'window_shape for all dimensions of `x`; ' f'got {len(window_shape)} window_shape elements ' f'and `x.ndim` is {x.ndim}.') else: axis = normalize_axis_tuple(axis, x.ndim, allow_duplicate=True) if len(window_shape) != len(axis): raise ValueError(f'Must provide matching length window_shape and ' f'axis; got {len(window_shape)} window_shape ' f'elements and {len(axis)} axes elements.') out_strides = x.strides + tuple(x.strides[ax] for ax in axis) # note: same axis can be windowed repeatedly x_shape_trimmed = list(x.shape) for ax, dim in zip(axis, window_shape): if x_shape_trimmed[ax] < dim: raise ValueError( 'window shape cannot be larger than input array shape') x_shape_trimmed[ax] -= dim - 1 out_shape = tuple(x_shape_trimmed) + window_shape return as_strided(x, strides=out_strides, shape=out_shape, subok=subok, writeable=writeable)
def main(argv=None): if argv is None: argv = sys.argv[1:] header = get_colored_header() parser = argparse.ArgumentParser( description=header, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('datafile', help='data file') parser.add_argument('-e', '--extension', help='extension to consider for visualization', default='') if len(argv) == 0: parser.print_help() sys.exit() args = parser.parse_args(argv) filename = os.path.abspath(args.datafile) extension = args.extension params = CircusParser(filename) if os.path.exists(params.logfile): os.remove(params.logfile) logger = init_logging(params.logfile) logger = logging.getLogger(__name__) mytest = StrictVersion(phycontrib.__version__) >= StrictVersion("1.0.12") if not mytest: print_and_log( ['You need to update phy-contrib to the latest git version'], 'error', logger) sys.exit(1) if not test_patch_for_similarities(params, extension): print_and_log( ['You should re-export the data because of a fix in 0.6'], 'error', logger) continue_anyway = query_yes_no( Fore.WHITE + "Continue anyway (results may not be fully correct)?", default=None) if not continue_anyway: sys.exit(1) data_file = params.get_data_file() data_dtype = data_file.data_dtype if hasattr(data_file, 'data_offset'): data_offset = data_file.data_offset else: data_offset = 0 file_format = data_file.description file_out_suff = params.get('data', 'file_out_suff') if file_format not in supported_by_phy: print_and_log([ "File format %s is not supported by phy. TraceView disabled" % file_format ], 'info', logger) if numpy.iterable(data_file.gain): print_and_log( ['Multiple gains are not supported, using a default value of 1'], 'info', logger) gain = 1 else: if data_file.gain != 1: print_and_log([ "Gain of %g is not supported by phy. Expecting a scaling mismatch" % data_file.gain ], 'info', logger) gain = data_file.gain probe = params.probe if extension != '': extension = '-' + extension output_path = params.get('data', 'file_out_suff') + extension + '.GUI' if not os.path.exists(output_path): print_and_log( ['Data should be first exported with the converting method!'], 'error', logger) else: print_and_log(["Launching the phy GUI..."], 'info', logger) gui_params = {} if file_format in supported_by_phy: if not params.getboolean('data', 'overwrite'): gui_params['dat_path'] = params.get('data', 'data_file_no_overwrite') else: if params.get('data', 'stream_mode') == 'multi-files': data_file = params.get_data_file(source=True, has_been_created=False) gui_params['dat_path'] = ' '.join( data_file.get_file_names()) else: gui_params['dat_path'] = params.get('data', 'data_file') else: gui_params['dat_path'] = 'giverandomname.dat' gui_params['n_channels_dat'] = params.nb_channels gui_params['n_features_per_channel'] = 5 gui_params['dtype'] = data_dtype gui_params['offset'] = data_offset gui_params['sample_rate'] = params.rate gui_params['dir_path'] = output_path gui_params['hp_filtered'] = True os.chdir(output_path) create_app() controller = TemplateController(**gui_params) gui = controller.create_gui() gui.show() run_app() gui.close() del gui
def neighborhoods(positions, voronoi=False, size=None, reach=None, tess=None, tree=None): """Build a list of lists or padded array of neighborhoods around each point select neighbors by any combination of three basic choices: Voronoi/Delaunay, distance/ball, count/nearest/number parameters positions : array with shape (N, 2) or fields 'x' and 'y' voronoi : whether to require pairs to be voronoi or delaunay neighbors size : maximum size for each neighborhood excluding center/self reach : maximum distance to search (exclusive). scalar for distance/ball for other criteria, it may be an array of distances or a str such as '[min|max|mean]*{factor}' where the function is of neighbor distances tess, tree : optionally provide spatial.Delaunay or spatial.KDTree instance returns neighbors : list of lists (or padded array) with shape (npoints, size) neighbors[i] gives indices in positions to neighbors of positions[i] i.e., the coordinates for all neighbors of positions[i] are given by positions[neighbors[i]], with shape (size, 2) mask : True if not a real neighbor distances : distance to the neighbor, only calculated if needed. """ try: fewest, most = size except TypeError: fewest, most = None, size need_dist = True filter_reach = reach is not None try: dub = float(reach) filter_reach = False except (TypeError, ValueError): dub = np.inf if voronoi: tess = tess or Delaunay(positions) neighbors = get_neighbors(tess, 'all') elif most is not None: tree = tree or KDTree(positions) distances, neighbors = tree.query(positions, np.max(most) + 1, distance_upper_bound=dub) distances, neighbors = distances[:, 1:], neighbors[:, 1:] # remove self mask = np.isinf(distances) neighbors[mask] = np.where(mask)[0] need_dist = False elif reach is None: raise ValueError("No limits on neighborhood selection applied") else: tree = tree or KDTree(positions) neighbors = tree.query_ball_tree(tree, dub) for i in xrange(len(neighbors)): neighbors[i].remove(i) # remove self if need_dist: ix = np.arange(len(positions))[:, None] neighbors, mask = helpy.pad_uneven(neighbors, ix, True, int) distances = distance.cdist(positions, positions)[ix, neighbors] distances[mask] = np.inf sort = distances.argsort(1) distances, neighbors = distances[ix, sort], neighbors[ix, sort] if isinstance(reach, basestring): fun, fact = reach.split('*') if '*' in reach else (reach, 1) ix = np.arange(len(positions)) fun = { 'mean': np.nanmean, 'min': np.nanmin, 'max': np.nanmax, 'median': np.nanmedian }[fun] fact = float(fact) reach = fun(np.where(mask, np.nan, distances), 1, keepdims=True) * fact if filter_reach: mask[distances >= reach] = True distances[mask] = np.inf if fewest is not None: mask[(~mask).sum(1) < fewest] = True if np.iterable(most): extra = np.clip(mask.shape[1] - most, 0, None) i = np.where(extra) extra = extra[i] i = np.repeat(i[0], extra) j = mask.shape[1] - np.concatenate(map(range, extra)) - 1 mask[i, j] = True most = most.max() return neighbors[:, :most], mask[:, :most], distances[:, :most]
def histogram(a, bins="fd", range=None, max_num_bins=250, weights=None, **kwargs): """Enhanced histogram. This is a histogram function that enables the use of more sophisticated algorithms for determining bins. Parameters ---------- a : array_like Input data. The histogram is computed over the flattened array. %s range : (float, float), optional The lower and upper range of the bins. If not provided, range is simply ``(a.min(), a.max())``. Values outside the range are ignored. The first element of the range must be less than or equal to the second. `range` affects the automatic bin computation as well. While bin width is computed to be optimal based on the actual data within `range`, the bin count will fill the entire range including portions containing no data. %s weights : array_like, optional An array of weights, of the same shape as `a`. Each value in `a` only contributes its associated weight towards the bin count (instead of 1). This is currently not used by any of the bin estimators, but may be in the future. **kwargs : Passed to :py:func:`numpy.histogram` Returns ------- hist : array The values of the histogram. See `normed` and `weights` for a description of the possible semantics. bin_edges : array of dtype float Return the bin edges ``(length(hist)+1)``. See Also -------- * :py:func:`numpy.histogram` """ if isinstance(a, da.Array): return histogram_dask(a, bins=bins, max_num_bins=max_num_bins, **kwargs) if isinstance(bins, str): _deprecated_bins = {"scotts": "scott", "freedman": "fd"} new_bins = _deprecated_bins.get(bins, None) if new_bins: warnings.warn( f"`bins='{bins}'` has been deprecated and will be removed " f"in HyperSpy 2.0. Please use `bins='{new_bins}'` instead.", VisibleDeprecationWarning, ) bins = new_bins _old_bins = bins if isinstance(bins, str) and bins in ["knuth", "blocks"]: # if range is specified, we need to truncate # the data for these bin-finding routines if range is not None: a = a[(a >= range[0]) & (a <= range[1])] if bins == "knuth": _, bins = knuth_bin_width(a, return_bins=True, max_num_bins=max_num_bins) elif bins == "blocks": bins = bayesian_blocks(a) else: bins = np.histogram_bin_edges(a, bins=bins, range=range, weights=weights) _bins_len = bins if not np.iterable(bins) else len(bins) if _bins_len > max_num_bins: # To avoid memory errors such as that detailed in # https://github.com/hyperspy/hyperspy/issues/784, # we log a warning and cap the number of bins at # a sensible value. _logger.warning( f"Estimated number of bins using `bins='{_old_bins}'` " f"is too large ({_bins_len}). Capping the number of bins " f"at `max_num_bins={max_num_bins}`. Consider using an " "alternative method for calculating the bins such as " "`bins='scott'`, or increasing the value of the " "`max_num_bins` keyword argument.") bins = max_num_bins return np.histogram(a, bins=bins, **kwargs)
def inverse_transform(D): if np.iterable(D): return Anscombe.inverse_transform_jit(D) else: return approximate_inverse_transform(D)
def fit(self, X, y): """Fit Gaussian process regression model. Parameters ---------- X : array-like of shape (n_samples, n_features) or list of object Feature vectors or other representations of training data. y : array-like of shape (n_samples,) or (n_samples, n_targets) Target values. Returns ------- self : object GaussianProcessRegressor class instance. """ if self.kernel is None: # Use an RBF kernel as default self.kernel_ = C(1.0, constant_value_bounds="fixed") * RBF( 1.0, length_scale_bounds="fixed") else: self.kernel_ = clone(self.kernel) self._rng = check_random_state(self.random_state) if self.kernel_.requires_vector_input: dtype, ensure_2d = "numeric", True else: dtype, ensure_2d = None, False X, y = self._validate_data( X, y, multi_output=True, y_numeric=True, ensure_2d=ensure_2d, dtype=dtype, ) # Normalize target value if self.normalize_y: self._y_train_mean = np.mean(y, axis=0) self._y_train_std = _handle_zeros_in_scale(np.std(y, axis=0), copy=False) # Remove mean and make unit variance y = (y - self._y_train_mean) / self._y_train_std else: shape_y_stats = (y.shape[1], ) if y.ndim == 2 else 1 self._y_train_mean = np.zeros(shape=shape_y_stats) self._y_train_std = np.ones(shape=shape_y_stats) if np.iterable(self.alpha) and self.alpha.shape[0] != y.shape[0]: if self.alpha.shape[0] == 1: self.alpha = self.alpha[0] else: raise ValueError( "alpha must be a scalar or an array with same number of " f"entries as y. ({self.alpha.shape[0]} != {y.shape[0]})") self.X_train_ = np.copy(X) if self.copy_X_train else X self.y_train_ = np.copy(y) if self.copy_X_train else y if self.optimizer is not None and self.kernel_.n_dims > 0: # Choose hyperparameters based on maximizing the log-marginal # likelihood (potentially starting from several initial values) def obj_func(theta, eval_gradient=True): if eval_gradient: lml, grad = self.log_marginal_likelihood( theta, eval_gradient=True, clone_kernel=False) return -lml, -grad else: return -self.log_marginal_likelihood(theta, clone_kernel=False) # First optimize starting from theta specified in kernel optima = [(self._constrained_optimization(obj_func, self.kernel_.theta, self.kernel_.bounds))] # Additional runs are performed from log-uniform chosen initial # theta if self.n_restarts_optimizer > 0: if not np.isfinite(self.kernel_.bounds).all(): raise ValueError( "Multiple optimizer restarts (n_restarts_optimizer>0) " "requires that all bounds are finite.") bounds = self.kernel_.bounds for iteration in range(self.n_restarts_optimizer): theta_initial = self._rng.uniform(bounds[:, 0], bounds[:, 1]) optima.append( self._constrained_optimization(obj_func, theta_initial, bounds)) # Select result from run with minimal (negative) log-marginal # likelihood lml_values = list(map(itemgetter(1), optima)) self.kernel_.theta = optima[np.argmin(lml_values)][0] self.kernel_._check_bounds_params() self.log_marginal_likelihood_value_ = -np.min(lml_values) else: self.log_marginal_likelihood_value_ = self.log_marginal_likelihood( self.kernel_.theta, clone_kernel=False) # Precompute quantities required for predictions which are independent # of actual query points # Alg. 2.1, page 19, line 2 -> L = cholesky(K + sigma^2 I) K = self.kernel_(self.X_train_) K[np.diag_indices_from(K)] += self.alpha try: self.L_ = cholesky(K, lower=GPR_CHOLESKY_LOWER, check_finite=False) except np.linalg.LinAlgError as exc: exc.args = ( f"The kernel, {self.kernel_}, is not returning a positive " "definite matrix. Try gradually increasing the 'alpha' " "parameter of your GaussianProcessRegressor estimator.", ) + exc.args raise # Alg 2.1, page 19, line 3 -> alpha = L^T \ (L \ y) self.alpha_ = cho_solve( (self.L_, GPR_CHOLESKY_LOWER), self.y_train_, check_finite=False, ) return self
def polyint(cs, m=1, k=[], lbnd=0, scl=1): """ Integrate a polynomial. Returns the polynomial `cs`, integrated `m` times from `lbnd` to `x`. At each iteration the resulting series is **multiplied** by `scl` and an integration constant, `k`, is added. The scaling factor is for use in a linear change of variable. ("Buyer beware": note that, depending on what one is doing, one may want `scl` to be the reciprocal of what one might expect; for more information, see the Notes section below.) The argument `cs` is a sequence of coefficients, from lowest order term to highest, e.g., [1,2,3] represents the polynomial ``1 + 2*x + 3*x**2``. Parameters ---------- cs : array_like 1-d array of polynomial coefficients, ordered from low to high. m : int, optional Order of integration, must be positive. (Default: 1) k : {[], list, scalar}, optional Integration constant(s). The value of the first integral at zero is the first value in the list, the value of the second integral at zero is the second value, etc. If ``k == []`` (the default), all constants are set to zero. If ``m == 1``, a single scalar can be given instead of a list. lbnd : scalar, optional The lower bound of the integral. (Default: 0) scl : scalar, optional Following each integration the result is *multiplied* by `scl` before the integration constant is added. (Default: 1) Returns ------- S : ndarray Coefficients of the integral. Raises ------ ValueError If ``m < 1``, ``len(k) > m``. See Also -------- polyder Notes ----- Note that the result of each integration is *multiplied* by `scl`. Why is this important to note? Say one is making a linear change of variable :math:`u = ax + b` in an integral relative to `x`. Then :math:`dx = du/a`, so one will need to set `scl` equal to :math:`1/a` - perhaps not what one would have first thought. Examples -------- >>> from numpy import polynomial as P >>> cs = (1,2,3) >>> P.polyint(cs) # should return array([0, 1, 1, 1]) array([ 0., 1., 1., 1.]) >>> P.polyint(cs,3) # should return array([0, 0, 0, 1/6, 1/12, 1/20]) array([ 0. , 0. , 0. , 0.16666667, 0.08333333, 0.05 ]) >>> P.polyint(cs,k=3) # should return array([3, 1, 1, 1]) array([ 3., 1., 1., 1.]) >>> P.polyint(cs,lbnd=-2) # should return array([6, 1, 1, 1]) array([ 6., 1., 1., 1.]) >>> P.polyint(cs,scl=-2) # should return array([0, -2, -2, -2]) array([ 0., -2., -2., -2.]) """ cnt = int(m) if not np.iterable(k): k = [k] if cnt != m: raise ValueError("The order of integration must be integer") if cnt < 0: raise ValueError("The order of integration must be non-negative") if len(k) > cnt: raise ValueError("Too many integration constants") # cs is a trimmed copy [cs] = pu.as_series([cs]) if cnt == 0: return cs k = list(k) + [0] * (cnt - len(k)) for i in range(cnt): n = len(cs) cs *= scl if n == 1 and cs[0] == 0: cs[0] += k[i] else: tmp = np.empty(n + 1, dtype=cs.dtype) tmp[0] = cs[0] * 0 tmp[1:] = cs / np.arange(1, n + 1) tmp[0] += k[i] - polyval(lbnd, tmp) cs = tmp return cs
def histogram_dask(a, bins="fd", max_num_bins=250, **kwargs): """Enhanced histogram for dask arrays. The range keyword is ignored. Reads the data at most two times - once to determine best bins (if required), and second time to actually calculate the histogram. Parameters ---------- a : array_like array of data to be histogrammed bins : int or list or str, default 10 If bins is a string, then it must be one of: 'fd' (Freedman Diaconis Estimator) Robust (resilient to outliers) estimator that takes into account data variability and data size. 'scott' Less robust estimator that that takes into account data variability and data size. %s **kwargs : Passed to :py:func:`dask.histogram` Returns ------- hist : array The values of the histogram. See `normed` and `weights` for a description of the possible semantics. bin_edges : array of dtype float Return the bin edges ``(length(hist)+1)``. See Also -------- * :py:func:`dask.histogram` * :py:func:`numpy.histogram` """ if not isinstance(a, da.Array): raise TypeError("Expected a dask array") if a.ndim != 1: a = a.flatten() if isinstance(bins, str): _deprecated_bins = {"scotts": "scott", "freedman": "fd"} new_bins = _deprecated_bins.get(bins, None) if new_bins: warnings.warn( f"`bins='{bins}'` has been deprecated and will be removed " f"in HyperSpy 2.0. Please use `bins='{new_bins}'` instead.", VisibleDeprecationWarning, ) bins = new_bins _old_bins = bins if isinstance(bins, str): if bins == "scott": _, bins = _scott_bw_dask(a, True) elif bins == "fd": _, bins = _freedman_bw_dask(a, True) else: raise ValueError(f"Unrecognized 'bins' argument: got {bins}") elif not np.iterable(bins): kwargs["range"] = da.compute(a.min(), a.max()) _bins_len = bins if not np.iterable(bins) else len(bins) if _bins_len > max_num_bins: # To avoid memory errors such as that detailed in # https://github.com/hyperspy/hyperspy/issues/784, # we log a warning and cap the number of bins at # a sensible value. _logger.warning( f"Estimated number of bins using `bins='{_old_bins}'` " f"is too large ({_bins_len}). Capping the number of bins " f"at `max_num_bins={max_num_bins}`. Consider using an " "alternative method for calculating the bins such as " "`bins='scott'`, or increasing the value of the " "`max_num_bins` keyword argument.") bins = max_num_bins kwargs["range"] = da.compute(a.min(), a.max()) h, bins = da.histogram(a, bins=bins, **kwargs) return h.compute(), bins
def __init__( self, ax, cmap=None, norm=None, alpha=1.0, values=None, boundaries=None, orientation='vertical', extend='neither', spacing='uniform', # uniform or proportional ticks=None, format=None, drawedges=False, filled=True, ): self.ax = ax if cmap is None: cmap = cm.get_cmap() if norm is None: norm = colors.Normalize() self.alpha = alpha cm.ScalarMappable.__init__(self, cmap=cmap, norm=norm) self.values = values self.boundaries = boundaries self.extend = extend self.spacing = spacing self.orientation = orientation self.drawedges = drawedges self.filled = filled # artists self.solids = None self.lines = None self.dividers = None self.extension_patch1 = None self.extension_patch2 = None if orientation == "vertical": self.cbar_axis = self.ax.yaxis else: self.cbar_axis = self.ax.xaxis if format is None: if isinstance(self.norm, colors.LogNorm): # change both axis for proper aspect self.ax.set_xscale("log") self.ax.set_yscale("log") self.cbar_axis.set_minor_locator(ticker.NullLocator()) formatter = ticker.LogFormatter() else: formatter = None elif isinstance(format, str): formatter = ticker.FormatStrFormatter(format) else: formatter = format # Assume it is a Formatter if formatter is None: formatter = self.cbar_axis.get_major_formatter() else: self.cbar_axis.set_major_formatter(formatter) if np.iterable(ticks): self.cbar_axis.set_ticks(ticks) elif ticks is not None: self.cbar_axis.set_major_locator(ticks) else: self._select_locator(formatter) self._config_axes() self.update_artists() self.set_label_text('')
def dopythonphot(image, xc, yc, aparcsec=0.4, system='AB', ext=None, psfimage=None, psfradpix=3, recenter=False, imfilename=None, ntestpositions=100, snthresh=0.0, zeropoint=None, filtername=None, exptime=None, pixscale=None, skyannarcsec=[6.0, 12.0], skyval=None, skyalgorithm='sigmaclipping', target=None, printstyle=None, exact=True, fitsconvention=True, phpadu=None, returnflux=False, showfit=False, verbose=False, debug=False): """ Measure the flux through aperture(s) and/or psf fitting using the PythonPhot package. Inputs: image : string giving image file name OR a list or 2-tuple giving the header and data array as [hdr,data] xc,yc : aperture center in pixel coordinates aparcsec : aperture radius in arcsec, or a string with a comma-separated list of aperture radii psfimage : filename for a fits file containing a psf model system : report AB or Vega mags ('AB','Vega') snthresh : If the measured flux is below <snthresh>*fluxerr then the resulting magnitude is reported as a lower limit. zeropoint : fix the zeropoint (if not provided, we look it up from hardcoded tables) skyannarcsec : inner and outer radius of the sky annulus (in arcsec) target : name of the target object (for printing in snanastyle) printstyle : None or 'default' = report MJD, filter, and photometry 'verbose' or 'long' = include target name and position 'snana' = report mags in the format of a SNANA .dat file. fitsconvention : xc,yc position follows the fits convention with (1,1) as the lower left pixel. Otherwise, follow the python/pyfits convention with (0,0) as the lower left pixel. returnflux : instead of returning a list of strings containing all the flux and magnitude information, simply return a single flux val Note : No recentering is done (i.e. this does forced photometry at the given pixel position) """ from PythonPhot import photfunctions if debug == 1: import pdb pdb.set_trace() imhdr, imdat = getheaderanddata(image, ext=ext) if imfilename is None: if isinstance(image, str): imfilename = image elif 'FILENAME' in imhdr: imfilename = imhdr['FILENAME'] else: imfilename = 'unknown' if imdat.dtype != 'float64': imdat = imdat.astype('float64', copy=False) if not filtername: if 'FILTER1' in imhdr: if 'CLEAR' in imhdr['FILTER1']: filtername = imhdr['FILTER2'] else: filtername = imhdr['FILTER1'] else: filtername = imhdr['FILTER'] if not exptime: if 'EXPTIME' in imhdr: exptime = imhdr['EXPTIME'] else: raise exceptions.RuntimeError( "Cannot determine exposure time for %s" % imfilename) if not pixscale: pixscale = getpixscale(imhdr, ext=ext) if not np.iterable(aparcsec): aparcsec = np.array([aparcsec]) elif not isinstance(aparcsec, np.ndarray): aparcsec = np.array(aparcsec) appix = np.array([ap / pixscale for ap in aparcsec]) skyannpix = np.array([skyrad / pixscale for skyrad in skyannarcsec]) if len(appix) >= 1: assert skyannpix[0] >= np.max( appix), "Sky annulus must be >= largest aperture." camera = getcamera(imhdr) # Define the conversion factor from the values in this image # to photons : photons per ADU. if phpadu is None: if 'BUNIT' not in imhdr: if camera == 'WFC3-IR' and 'EXPTIME' in imhdr: phpadu = imhdr['EXPTIME'] else: phpadu = 1 elif imhdr['BUNIT'].lower() in ['cps', 'electrons/s']: phpadu = imhdr['EXPTIME'] elif imhdr['BUNIT'].lower() in ['counts', 'electrons']: phpadu = 1 assert (phpadu is not None), "Can't determine units from the image header." if fitsconvention: xpy, ypy = xc - 1, yc - 1 else: xpy, ypy = xc, yc if recenter: xim, yim = getxycenter([imhdr, imdat], xc, yc, fitsconvention=True, radec=False, verbose=verbose) if verbose: print("Recentered position (x,y) : %.2f %.2f" % (xim, yim)) ra, dec = xy2radec(imhdr, xim, yim) print("Recentered position (ra,dec) : %.6f %.6f" % (ra, dec)) output_PythonPhot = photfunctions.get_flux_and_err( imdat, psfimage, [xpy, ypy], psfradpix=psfradpix, apradpix=appix, ntestpositions=ntestpositions, skyannpix=skyannpix, skyalgorithm=skyalgorithm, setskyval=skyval, recenter_target=False, recenter_fakes=True, exact=exact, exptime=exptime, ronoise=1, phpadu=phpadu, showfit=showfit, verbose=verbose, debug=debug) apflux, apfluxerr, psfflux, psffluxerr, sky, skyerr = output_PythonPhot if not np.iterable(apflux): apflux = np.array([apflux]) apfluxerr = np.array([apfluxerr]) # Define aperture corrections for each aperture if zeropoint is not None: zpt = zeropoint apcor = np.zeros(len(aparcsec)) aperr = np.zeros(len(aparcsec)) else: zpt = hstzpt_apcorr.getzpt(image, system=system) if camera == 'WFC3-IR': # TODO: allow user to choose an alternate EE table? apcor, aperr = hstzpt_apcorr.apcorrWFC3IR(filtername, aparcsec) elif camera == 'WFC3-UVIS': apcor, aperr = hstzpt_apcorr.apcorrWFC3UVIS(filtername, aparcsec) elif camera == 'ACS-WFC': apcor, aperr = hstzpt_apcorr.apcorrACSWFC(filtername, aparcsec) # record the psf flux as a final infinite aperture for printing purposes if psfimage is not None: aparcsec = np.append(aparcsec, np.inf) apflux = np.append(apflux, [psfflux]) apfluxerr = np.append(apfluxerr, [psffluxerr]) apcor = np.append(apcor, 0) # apply aperture corrections to flux and mags # and define upper limit mags for fluxes with significance <snthresh mag, magerr = np.zeros(len(apflux)), np.zeros(len(apflux)) for i in range(len(apflux)): if np.isfinite(aparcsec[i]): # For actual aperture measurements (not the psf fitting flux), # apply aperture corrections to the measured fluxes # Flux rescaled to larger aperture: apflux[i] *= 10**(0.4 * apcor[i]) # Flux error rescaled: df = apfluxerr[i] * 10**(0.4 * apcor[i]) # Systematic err from aperture correction : dfap = 0.4 * np.log(10) * apflux[i] * aperr[i] apfluxerr[i] = np.sqrt(df**2 + dfap**2) # total flux err if verbose > 1: print(" FERRTOT FERRSTAT FERRSYS") print(" %.5f %.5f %.5f" % (apfluxerr[i], df, dfap)) if apflux[i] < abs(apfluxerr[i]) * snthresh: # no real detection. Report mag as an upper limit sigmafactor = snthresh or 3 mag[i] = -2.5 * np.log10(sigmafactor * abs(apfluxerr[i])) \ + zpt - apcor[i] magerr[i] = -9.0 else: # Good detection. convert to a magnitude (ap correction already # applied) mag[i] = -2.5 * np.log10(apflux[i]) + zpt magerr[i] = 1.0857 * apfluxerr[i] / apflux[i] if debug: import pdb pdb.set_trace() if returnflux: return apflux if 'EXPSTART' in imhdr and 'EXPEND' in imhdr: mjdobs = (imhdr['EXPEND'] + imhdr['EXPSTART']) / 2. else: mjdobs = 0.0 if verbose and printstyle == 'snana': # Convert to SNANA fluxcal units and Construct a SNANA-style OBS # line, e.g. # OBS: 56456.500 H wol 0.000 8.630 25.160 -9.000 fluxcal = apflux * 10**(0.4 * (27.5 - zpt)) fluxcalerr = apfluxerr * 10**(0.4 * (27.5 - zpt)) print('VARLIST: MJD FLT FIELD FLUXCAL FLUXCALERR MAG ' 'MAGERR ZPT') elif verbose: if printstyle.lower() in ['long', 'verbose']: print('# TARGET RA DEC MJD FILTER ' ' APER FLUX FLUXERR MAG MAGERR MAGSYS ' ' ZP SKY SKYERR IMAGE') else: print('# MJD FILTER APER FLUX FLUXERR MAG ' 'MAGERR MAGSYS ZP SKY SKYERR') if printstyle is not None: printstyle = printstyle.lower() ra, dec = 0, 0 if (printstyle is not None and printstyle.lower() in ['snana', 'long', 'verbose']): if not target and 'FILENAME' in imhdr.keys(): target = imhdr['FILENAME'].split('_')[0] elif not target: target = 'target' ra, dec = xy2radec(imhdr, xc, yc, ext=ext) maglinelist = [] for iap in range(len(aparcsec)): if printstyle == 'snana': magline = 'OBS: %8.2f %6s %s %8.3f %8.3f '\ '%8.3f %8.3f %.3f' % ( float(mjdobs), FilterAlpha[filtername], target, fluxcal[iap], fluxcalerr[iap], mag[iap], magerr[iap], zpt) elif printstyle in ['long', 'verbose']: magline = '%-15s %10.5f %10.5f %.3f %6s %4.2f %9.4f %8.4f '\ ' %9.4f %8.4f %5s %7.4f %7.4f %6.4f %s' % ( target, ra, dec, float(mjdobs), filtername, aparcsec[iap], apflux[iap], apfluxerr[iap], mag[iap], magerr[iap], system, zpt, sky, skyerr, imfilename) else: magline = '%.3f %6s %4.2f %9.4f %8.4f %9.4f %8.4f %5s ' \ '%7.4f %7.4f %6.4f' % ( float(mjdobs), filtername, aparcsec[iap], apflux[iap], apfluxerr[iap], mag[iap], magerr[iap], system, zpt, sky, skyerr) maglinelist.append(magline) return maglinelist
def merge(datasets, bounds=None, res=None, nodata=None, precision=10, indexes=None, method='first'): """Copy valid pixels from input files to an output file. All files must have the same number of bands, data type, and coordinate reference system. Input files are merged in their listed order using the reverse painter's algorithm (default) or another method. If the output file exists, its values will be overwritten by input values. Geospatial bounds and resolution of a new output file in the units of the input file coordinate reference system may be provided and are otherwise taken from the first input file. Parameters ---------- datasets: list of dataset objects opened in 'r' mode source datasets to be merged. bounds: tuple, optional Bounds of the output image (left, bottom, right, top). If not set, bounds are determined from bounds of input rasters. res: tuple, optional Output resolution in units of coordinate reference system. If not set, the resolution of the first raster is used. If a single value is passed, output pixels will be square. nodata: float, optional nodata value to use in output file. If not set, uses the nodata value in the first input raster. precision: float, optional Number of decimal points of precision when computing inverse transform. indexes : list of ints or a single int, optional bands to read and merge method : str or callable pre-defined method: first: reverse painting last: paint valid new on top of existing min: pixel-wise min of existing and new max: pixel-wise max of existing and new or custom callable with signature: def function(old_data, new_data, old_nodata, new_nodata): Parameters ---------- old_data : array_like array to update with new_data new_data : array_like data to merge same shape as old_data old_nodata, new_data : array_like boolean masks where old/new data is nodata same shape as old_data Returns ------- tuple Two elements: dest: numpy ndarray Contents of all input rasters in single array out_transform: affine.Affine() Information for mapping pixel coordinates in `dest` to another coordinate system """ first = datasets[0] first_res = first.res nodataval = first.nodatavals[0] dtype = first.dtypes[0] if method not in MERGE_METHODS and not callable(method): raise ValueError( 'Unknown method {0}, must be one of {1} or callable'.format( method, MERGE_METHODS)) # Determine output band count if indexes is None: output_count = first.count elif isinstance(indexes, int): output_count = 1 else: output_count = len(indexes) # Extent from option or extent of all inputs if bounds: dst_w, dst_s, dst_e, dst_n = bounds else: # scan input files xs = [] ys = [] for src in datasets: left, bottom, right, top = src.bounds xs.extend([left, right]) ys.extend([bottom, top]) dst_w, dst_s, dst_e, dst_n = min(xs), min(ys), max(xs), max(ys) logger.debug("Output bounds: %r", (dst_w, dst_s, dst_e, dst_n)) output_transform = Affine.translation(dst_w, dst_n) logger.debug("Output transform, before scaling: %r", output_transform) # Resolution/pixel size if not res: res = first_res elif not np.iterable(res): res = (res, res) elif len(res) == 1: res = (res[0], res[0]) output_transform *= Affine.scale(res[0], -res[1]) logger.debug("Output transform, after scaling: %r", output_transform) # Compute output array shape. We guarantee it will cover the output # bounds completely output_width = int(math.ceil((dst_e - dst_w) / res[0])) output_height = int(math.ceil((dst_n - dst_s) / res[1])) # Adjust bounds to fit dst_e, dst_s = output_transform * (output_width, output_height) logger.debug("Output width: %d, height: %d", output_width, output_height) logger.debug("Adjusted bounds: %r", (dst_w, dst_s, dst_e, dst_n)) # create destination array dest = np.zeros((output_count, output_height, output_width), dtype=dtype) if nodata is not None: nodataval = nodata logger.debug("Set nodataval: %r", nodataval) if nodataval is not None: # Only fill if the nodataval is within dtype's range inrange = False if np.dtype(dtype).kind in ('i', 'u'): info = np.iinfo(dtype) inrange = (info.min <= nodataval <= info.max) elif np.dtype(dtype).kind == 'f': info = np.finfo(dtype) if np.isnan(nodataval): inrange = True else: inrange = (info.min <= nodataval <= info.max) if inrange: dest.fill(nodataval) else: warnings.warn("Input file's nodata value, %s, is beyond the valid " "range of its data type, %s. Consider overriding it " "using the --nodata option for better results." % (nodataval, dtype)) else: nodataval = 0 if method == 'first': def copyto(old_data, new_data, old_nodata, new_nodata): mask = np.logical_and(old_nodata, ~new_nodata) old_data[mask] = new_data[mask] elif method == 'last': def copyto(old_data, new_data, old_nodata, new_nodata): mask = ~new_nodata old_data[mask] = new_data[mask] elif method == 'min': def copyto(old_data, new_data, old_nodata, new_nodata): mask = np.logical_and(~old_nodata, ~new_nodata) old_data[mask] = np.minimum(old_data[mask], new_data[mask]) mask = np.logical_and(old_nodata, ~new_nodata) old_data[mask] = new_data[mask] elif method == 'max': def copyto(old_data, new_data, old_nodata, new_nodata): mask = np.logical_and(~old_nodata, ~new_nodata) old_data[mask] = np.maximum(old_data[mask], new_data[mask]) mask = np.logical_and(old_nodata, ~new_nodata) old_data[mask] = new_data[mask] elif callable(method): copyto = method else: raise ValueError(method) for src in datasets: # Real World (tm) use of boundless reads. # This approach uses the maximum amount of memory to solve the # problem. Making it more efficient is a TODO. # 1. Compute spatial intersection of destination and source src_w, src_s, src_e, src_n = src.bounds int_w = src_w if src_w > dst_w else dst_w int_s = src_s if src_s > dst_s else dst_s int_e = src_e if src_e < dst_e else dst_e int_n = src_n if src_n < dst_n else dst_n # 2. Compute the source window src_window = windows.from_bounds(int_w, int_s, int_e, int_n, src.transform, precision=precision) logger.debug("Src %s window: %r", src.name, src_window) src_window = src_window.round_shape() # 3. Compute the destination window dst_window = windows.from_bounds(int_w, int_s, int_e, int_n, output_transform, precision=precision) dst_window = dst_window.round_shape() # 4. Read data in source window into temp trows, tcols = (int(round(dst_window.height)), int(round(dst_window.width))) temp_shape = (output_count, trows, tcols) temp = src.read(out_shape=temp_shape, window=src_window, boundless=False, masked=True, indexes=indexes) # 5. Copy elements of temp into dest roff, coff = (int(round(dst_window.row_off)), int(round(dst_window.col_off))) region = dest[:, roff:roff + trows, coff:coff + tcols] if np.isnan(nodataval): region_nodata = np.isnan(region) temp_nodata = np.isnan(temp) else: region_nodata = region == nodataval temp_nodata = temp.mask copyto(region, temp, region_nodata, temp_nodata) return dest, output_transform
def lowess(x, y, x0, deg=1, kernel=epanechnikov, l=1, robust=False,): """ Locally smoothed regression with the LOWESS algorithm. Parameters ---------- x: float n-d array Values of x for which f(x) is known (e.g. measured). The shape of this is (n, j), where n is the number the dimensions and j is the number of distinct coordinates sampled. y: float array The known values of f(x) at these points. This has shape (j,) x0: float or float array. Values of x for which we estimate the value of f(x). This is either a single scalar value (only possible for the 1d case, in which case f(x0) is estimated for just that one value of x), or an array of shape (n, k). deg: int The degree of smoothing functions. 0 is locally constant, 1 is locally linear, etc. Default: 1. kernel: callable A kernel function. {'epanechnikov', 'tri_cube', 'bi_square'} l: float or float array with shape = x.shape The metric window size for the kernel robust: bool Whether to apply the robustification procedure from [Cleveland79], page 831 Returns ------- The function estimated at x0. Notes ----- The solution to this problem is given by equation 6.8 in Hastie Tibshirani and Friedman (2008). The Elements of Statistical Learning (Chapter 6). Example ------- >>> import lowess as lo >>> import numpy as np # For the 1D case: >>> x = np.random.randn(100) >>> f = np.cos(x) + 0.2 * np.random.randn(100) >>> x0 = np.linspace(-1, 1, 10) >>> f_hat = lo.lowess(x, f, x0) >>> import matplotlib.pyplot as plt >>> fig, ax = plt.subplots(1) >>> ax.scatter(x, f) >>> ax.plot(x0, f_hat, 'ro') >>> plt.show() # 2D case (and more...) >>> x = np.random.randn(2, 100) >>> f = -1 * np.sin(x[0]) + 0.5 * np.cos(x[1]) + 0.2*np.random.randn(100) >>> x0 = np.mgrid[-1:1:.1, -1:1:.1] >>> x0 = np.vstack([x0[0].ravel(), x0[1].ravel()]) >>> f_hat = lo.lowess(x, f, x0, kernel=lo.tri_cube) >>> from mpl_toolkits.mplot3d import Axes3D >>> fig = plt.figure() >>> ax = fig.add_subplot(111, projection='3d') >>> ax.scatter(x[0], x[1], f) >>> ax.scatter(x0[0], x0[1], f_hat, color='r') >>> plt.show() """ if robust: # We use the procedure described in Cleveland1979 # Start by calling this function with robust set to false and the x0 # input being equal to the x input: y_est = lowess(x, y, x, kernel=epanechnikov, l=l, robust=False) resid = y_est - y median_resid = np.nanmedian(np.abs(resid)) # Calculate the bi-cube function on the residuals for robustness # weights: robustness_weights = bi_square(resid / (6 * median_resid)) # For the case where x0 is provided as a scalar: if not np.iterable(x0): x0 = np.asarray([x0]) ans = np.zeros(x0.shape[-1]) # We only need one design matrix for fitting: B = [np.ones(x.shape[-1])] for d in range(1, deg+1): B.append(x ** deg) B = np.vstack(B).T for idx, this_x0 in enumerate(x0.T): # This is necessary in the 1d case (?): if not np.iterable(this_x0): this_x0 = np.asarray([this_x0]) # Different weighting kernel for each x0: W = np.diag(do_kernel(this_x0, x, l=l, kernel=kernel)) # XXX It should be possible to calculate W outside the loop, if x0 and # x are both sampled in some regular fashion (that is, if W is the same # matrix in each iteration). That should save time. if robust: # We apply the robustness weights to the weighted least-squares # procedure: robustness_weights[np.isnan(robustness_weights)] = 0 W = np.dot(W, np.diag(robustness_weights)) #try: # Equation 6.8 in HTF: BtWB = np.dot(np.dot(B.T, W), B) BtW = np.dot(B.T, W) # Get the params: beta = np.dot(np.dot(la.pinv(BtWB), BtW), y.T) # We create a design matrix for this coordinat for back-predicting: B0 = [1] for d in range(1, deg+1): B0 = np.hstack([B0, this_x0 ** deg]) B0 = np.vstack(B0).T # Estimate the answer based on the parameters: ans[idx] += np.dot(B0, beta) # If we are trying to sample far away from where the function is # defined, we will be trying to invert a singular matrix. In that case, # the regression should not work for you and you should get a nan: #except la.LinAlgError : # ans[idx] += np.nan return ans.T
def get_chip(hs, cx_input): rchip_path = hs.get_rchip_path(cx_input) if np.iterable(cx_input): return [hs._read_chip(fpath) for fpath in rchip_path] else: return hs._read_chip(rchip_path)
def year2int(year, day=15): """Decimal year to integer representation -> YYYMMDD.""" if not np.iterable(year): year = np.asarray([year]) ym = [year2ym(y) for y in year] return [int(y * 10000 + m * 100 + day) for y, m in ym]
def rotate(self, alpha): """ Rotate PT array. Change the rotation angles attribute respectively. Rotation angle must be given in degrees. All angles are referenced to geographic North, positive in clockwise direction. (Mathematically negative!) In non-rotated state, X refs to North and Y to East direction. """ if self._pt is None : print 'pt-array is "None" - I cannot rotate that' return if np.iterable(self.rotation_angle) == 0: self.rotation_angle = np.array([self.rotation_angle for ii in self.pt]) #check for iterable list/set of angles - if so, it must have length 1 #or same as len(pt): if np.iterable(alpha) == 0: try: degreeangle = float(alpha%360) except: print '"Angle" must be a valid number (in degrees)' return #make an n long list of identical angles lo_angles = [degreeangle for i in self.pt] else: if len(alpha) == 1: try: degreeangle = float(alpha%360) except: print '"Angle" must be a valid number (in degrees)' return #make an n long list of identical angles lo_angles = [degreeangle for i in self.pt] else: try: lo_angles = [ float(i%360) for i in alpha] except: print '"Angles" must be valid numbers (in degrees)' return self.rotation_angle = list((np.array(lo_angles) + \ np.array(self.rotation_angle))%360) if len(lo_angles) != len(self._pt): print 'Wrong number Number of "angles" - need %i '%(len(self._pt)) self.rotation_angle = 0. return pt_rot = copy.copy(self._pt) pt_err_rot = copy.copy(self._pt_err) for idx_freq in range(len(self._pt)): angle = lo_angles[idx_freq] if np.isnan(angle): angle = 0. if self.pt_err is not None: pt_rot[idx_freq], pt_err_rot[idx_freq] = MTcc.rotatematrix_incl_errors(self.pt[idx_freq,:,:], angle, self.pt_err[idx_freq,:,:]) else: pt_rot[idx_freq], pt_err_rot = MTcc.rotatematrix_incl_errors(self.pt[idx_freq,:,:], angle) #--> set the rotated tensors as the current attributes self._pt = pt_rot self._pt_err = pt_err_rot