def pareto_min(*args): r"""Determine if observation is a Pareto point Find the Pareto-efficient points that minimize the provided features. Args: xi (iterable OR gr.Intention()): Feature to minimize; use -X to maximize Returns: np.array of boolean: Indicates if observation is Pareto-efficient """ # Check invariants lengths = map(len, args) if len(set(lengths)) > 1: raise ValueError("All arguments to pareto_min must be of equal length") # Compute pareto points costs = array([*args]).T is_efficient = ones(costs.shape[0], dtype=bool) for i, c in enumerate(costs): is_efficient[i] = npall(npany(costs[:i] > c, axis=1)) and npall( npany(costs[i + 1:] > c, axis=1)) return is_efficient
def pareto_min_rel(X_test, X_base=None): r"""Determine if rows in X_test are optimal, compared to X_base Finds the Pareto-efficient test-points that minimize the column values, relative to a given set of base-points. Args: X_test (2d numpy array): Test point observations; rows are observations, columns are features X_base (2d numpy array): Base point observations; rows are observations, columns are features Returns: array of boolean values: Indicates if test observation is Pareto-efficient, relative to base points References: Owen *Monte Carlo theory, methods and examples* (2013) """ # Compute Pareto points is_efficient = ones(X_test.shape[0], dtype=bool) if X_base is None: for i, x in enumerate(X_test): is_efficient[i] = npall(npany(X_test[:i] > x, axis=1)) and npall( npany(X_test[i + 1:] > x, axis=1)) else: for i, x in enumerate(X_test): is_efficient[i] = not (npany(npall(x >= X_base, axis=1)) and npany(npany(x > X_base, axis=1))) return is_efficient
def check_outcome(y, lik): if not isinstance(lik, (list, tuple)): lik = (lik,) str_err = "The first item of ``lik`` has to be a string." if not isinstance(lik[0], str): raise ValueError(str_err) lik_name = lik[0].lower() y = ascontiguousarray(y, float) lik = lik[:1] + tuple(ascontiguousarray(i, float) for i in lik[1:]) if not npall(isfinite(y)): raise ValueError("Outcome must be finite.") if lik_name == "poisson": return _check_poisson_outcome(y) if lik_name in ("binomial", "normal"): if len(lik) != 2: msg = "``lik`` must be a tuple of two elements for" msg += " {} likelihood.".format(lik_name[0].upper() + lik_name[1:]) raise ValueError(msg) return y
def check_economic_qs(QS): if not isinstance(QS, tuple): raise ValueError("QS must be a tuple.") if not isinstance(QS[0], tuple): raise ValueError("QS[0] must be a tuple.") fmsg = "QS has non-finite values." if not all(npall(isfinite(Q)) for Q in QS[0]): raise ValueError(fmsg) if not npall(isfinite(QS[1])): raise ValueError(fmsg) return QS
def check_covariates(X): if not X.ndim == 2: raise ValueError("Covariates must be a bidimensional array.") if not npall(isfinite(X)): raise ValueError("Covariates must have finite values only.") return X
def convert(self, verbose=False, maxrows=None): """Method to loop through the data and convert it""" # docstring is extended below dataslice = slice(0, maxrows) info = LoopInfo(total=len(self.data.ifiledata[0, dataslice]), t0=datetime.now(), verbose=verbose) wrongvalues = 0 wrongarea = 0 for datatuple in izip(*self.data.ifiledata[:, dataslice]): info.info() fields = npall( # normal gridcols [ self.data.maskdata[self.gridcols.index(col)] == datatuple[self.usecols.index(col)].astype( self.data.maskdata[self.gridcols.index(col)].dtype) for col in self.gridcols if col not in self.aliasdict ] + # alias cols [ self.data.maskdata[self.gridcols.index(col)] == self.aliasdict[col][datatuple[self.usecols.index(col)]] for col in self.aliasdict ], axis=0) itime = self.timefunc([ datatuple[self.usecols.index(col)] for col in sorted(self.time.keys()) ]) for catcol in self.catadddict: for adderinstance in self.catadddict[catcol][tuple( datatuple[self.usecols.index(col)] for col in catcol)]: adderinstance.addfunc( itime, fields, datatuple[self.usecols.index( self.valcol)].astype(float)) for col in self.defaultadddict: for adderinstance in self.defaultadddict[col][datatuple[ self.usecols.index(col)]]: adderinstance.addfunc( itime, fields, datatuple[self.usecols.index( self.valcol)].astype(float)) if not npany(fields): wrongvalues += 1 wrongarea += float(datatuple[self.usecols.index(self.valcol)]) if verbose: print('\nNumber of wrong values: %i' % wrongvalues) print('Missed Area [ha]: %6.4f' % wrongarea) print('Missed Area: %1.3e %%' % (wrongarea / sum(self.data.ifiledata[self.usecols.index( self.valcol)].astype(float)) * 100.))
def y(self, y): """ Set the outcome array. Parameters ---------- y : array_like Outcome array. """ from numpy import all as npall, isfinite if not npall(isfinite(y)): raise ValueError("Phenotype values must be finite.") self._glmm = None self._y = normalise_extreme_values(y, "normal")
def _drop_missing(self) -> ndarray: data = (self.dependent, self.exog, self.endog, self.instruments, self.weights) missing = any(c_[[dh.isnull for dh in data]], 0) # type: ndarray if any(missing): if npall(missing): raise ValueError('All observations contain missing data. ' 'Model cannot be estimated.') self.dependent.drop(missing) self.exog.drop(missing) self.endog.drop(missing) self.instruments.drop(missing) self.weights.drop(missing) missing_warning(missing) return missing
def mask(array, value): """ Creates a mask from an array against a value, depending on value's nature: * Numbers (any number type): mask(arr, 1) is the same as arr == 1 * List or tuples (flat, containing only numbers): mask(arr, (1., 1., 1.)) compares each pixel against a color If your image has alpha channel, you must compare like: mask(arr, (1., 1., 1., 1.)) Remember to respect the dimensions or numpy will complain. * IN (range) instances: mask(arr, IN(0.5, 1., false, true)) will make a mask for pixels greater than or equal 0.5 and lower than 1. :param array: :param value: :return: """ if _valid_real(value): return array == value elif isinstance(value, (list, tuple)): if not all(_valid_real(v) for v in value): raise TypeError("Cannot mask against list or tuples having values other than valid numbers, or being " "multi-dimensional or irregular sequences") return npall(array == value, axis=2) elif isinstance(value, IN): return value.contains(array) else: raise TypeError("Cannot take a mask from this argument. Only numpy-accepted numeric types, tuples, lists, or " "`IN` instances are accepted")
def append(self, m, name=None): from numpy import all as npall, asarray, atleast_2d, isfinite from glimix_core.mean import LinearMean m = asarray(m, float) if m.ndim > 2: raise ValueError("Fixed-effect has to have between one and two dimensions.") if not npall(isfinite(m)): raise ValueError("Fixed-effect values must be finite.") m = atleast_2d(m.T).T mean = LinearMean(m.shape[1]) mean.set_data(m) n = len(self._fixed_effects["impl"]) if name is None: name = "unnamed-fe-{}".format(n) self._fixed_effects["impl"].append(mean) self._fixed_effects["user"].append(user_mean.LinearMean(mean)) self._fixed_effects["user"][-1].name = name self._mean = None
def append(self, K, name=None): from numpy import all as npall, isfinite, issubdtype, number from glimix_core.cov import GivenCov if not issubdtype(K.dtype, number): raise ValueError("covariance-matrix is not numeric.") if K.ndim != 2: raise ValueError("Covariance-matrix has to have two dimensions.") if not npall(isfinite(K)): raise ValueError("Covariance-matrix values must be finite.") cov = GivenCov(K) cov.set_data((self._sample_idx, self._sample_idx)) n = len(self._covariance_matrices["impl"]) if name is None: name = "unnamed-re-{}".format(n) self._covariance_matrices["impl"].append(cov) self._covariance_matrices["user"].append(user_cov.GivenCov(cov)) self._covariance_matrices["user"][-1].name = name self._cov = None
stdin=subprocess.PIPE) stdout, stderr = proc.communicate() return stdout, stderr parser = argparse.ArgumentParser() parser.add_argument('args', nargs='+') ao = parser.parse_args() assert len(ao.args) == 1, 'one arg allowed: field_id' field_id = ao.args[0] arg0 = 'checkplotlist.py' arg1 = 'pkl' if field_id == 'weird': cpdir = '../data/weirdpkls' elif field_id == 'deb': cpdir = '../data/debpkls' else: cpdir = '../data/CPs_cut/G' + field_id + '_20' arg2 = cpdir cpnames = [f for f in os.listdir(cpdir) if 'checkplot' in f] #Extract checkplots .pkl.gz to .pkl files: if not npall(nparray([cpn.endswith('.pkl') for cpn in cpnames])): print('gunzip ' + cpdir + '/*pkl.gz') stdout, stderr = run_script('gunzip ' + cpdir + '/*pkl.gz') cpl.main([arg0, arg1, arg2])
def myop(a,b): r = mod(dot(a,b), MOD) assert npall(greater_equal(r, 0)) return r
def mask_signal(times, mags, errs, signalperiod, signalepoch, magsarefluxes=False, maskphases=[0, 0, 0.5, 1.0], maskphaselength=0.1, plotfit=None, plotfitphasedlconly=True, sigclip=30.0): '''This removes repeating signals in the magnitude time series. Useful for masking transit signals in light curves to search for other variability. ''' stimes, smags, serrs = sigclip_magseries(times, mags, errs, sigclip=sigclip, magsarefluxes=magsarefluxes) # now phase the light curve using the period and epoch provided phases = ((stimes - signalepoch) / signalperiod - npfloor( (stimes - signalepoch) / signalperiod)) # mask the requested phases using the mask length (in phase units) # this gets all the masks into one array masks = nparray([(npabs(phases - x) > maskphaselength) for x in maskphases]) # this flattens the masks to a single array for all combinations masks = npall(masks, axis=0) # apply the mask to the times, mags, and errs mphases = phases[masks] mtimes = stimes[masks] mmags = smags[masks] merrs = serrs[masks] returndict = { 'mphases': mphases, 'mtimes': mtimes, 'mmags': mmags, 'merrs': merrs } # make the fit plot if required if plotfit and isinstance(plotfit, str) or isinstance(plotfit, strio): if plotfitphasedlconly: plt.figure(figsize=(10, 4.8)) else: plt.figure(figsize=(16, 9.6)) if plotfitphasedlconly: # phased series before whitening plt.subplot(121) plt.plot(phases, smags, marker='.', color='k', linestyle='None', markersize=2.0, markeredgewidth=0) if not magsarefluxes: plt.gca().invert_yaxis() plt.ylabel('magnitude') else: plt.ylabel('fluxes') plt.xlabel('phase') plt.title('phased LC before signal masking') # phased series after whitening plt.subplot(122) plt.plot(mphases, mmags, marker='.', color='g', linestyle='None', markersize=2.0, markeredgewidth=0) if not magsarefluxes: plt.gca().invert_yaxis() plt.ylabel('magnitude') else: plt.ylabel('fluxes') plt.xlabel('phase') plt.title('phased LC after signal masking') else: # time series before whitening plt.subplot(221) plt.plot(stimes, smags, marker='.', color='k', linestyle='None', markersize=2.0, markeredgewidth=0) if not magsarefluxes: plt.gca().invert_yaxis() plt.ylabel('magnitude') else: plt.ylabel('fluxes') plt.xlabel('JD') plt.title('LC before signal masking') # time series after whitening plt.subplot(222) plt.plot(mtimes, mmags, marker='.', color='g', linestyle='None', markersize=2.0, markeredgewidth=0) if not magsarefluxes: plt.gca().invert_yaxis() plt.ylabel('magnitude') else: plt.ylabel('fluxes') plt.xlabel('JD') plt.title('LC after signal masking') # phased series before whitening plt.subplot(223) plt.plot(phases, smags, marker='.', color='k', linestyle='None', markersize=2.0, markeredgewidth=0) if not magsarefluxes: plt.gca().invert_yaxis() plt.ylabel('magnitude') else: plt.ylabel('fluxes') plt.xlabel('phase') plt.title('phased LC before signal masking') # phased series after whitening plt.subplot(224) plt.plot(mphases, mmags, marker='.', color='g', linestyle='None', markersize=2.0, markeredgewidth=0) if not magsarefluxes: plt.gca().invert_yaxis() plt.ylabel('magnitude') else: plt.ylabel('fluxes') plt.xlabel('phase') plt.title('phased LC after signal masking') plt.tight_layout() plt.savefig(plotfit, format='png', pad_inches=0.0) plt.close() if isinstance(plotfit, str) or isinstance(plotfit, strio): returndict['fitplotfile'] = plotfit return returndict
def is_positive_semi_definite(A, tol=1e-8): vals, vecs = eigh(A) return npall(vals > -tol), vals
def run_emcee(hm_options, sampling_options, args): # load halo model setup function, params, param_types, prior_types, \ val1, val2, val3, val4, params_join, hm_functions, \ starting, meta_names, fits_format = hm_options # load MCMC sampler setup datafile, datacols, covfile, covcols, exclude_bins, output, \ sampler, nwalkers, nsteps, nburn, \ thin, k, threads, sampler_type, update_freq = sampling_options #function = cloud.serialization.cloudpickle.dumps(model) #del model #print function #pickle.dumps(function) #print 'pickled' if args.demo: print ' ** Running demo only **' elif isfile(output): msg = 'Warning: output file %s exists. Overwrite? [y/N] ' %output answer = raw_input(msg) if len(answer) == 0: exit() if answer.lower() not in ('y', 'yes'): exit() if not args.demo: print 'Started -', ctime() #load data files Ndatafiles = len(datafile) R, esd = sampling_utils.load_datapoints(datafile, datacols, exclude_bins) Nobsbins, Nrbins = esd.shape rng_obsbins = xrange(Nobsbins) rng_rbins = xrange(Nrbins) # load covariance cov = sampling_utils.load_covariance(covfile, covcols, Nobsbins, Nrbins, exclude_bins) cov, icov, likenorm, esd_err, cov2d = cov # needed for offset central profile R, Rrange = sampling_utils.setup_integrand(R, k) angles = numpy.linspace(0, 2*pi, 540) val1 = numpy.append(val1, [Rrange, angles]) # identify fixed and free parameters jfixed = (prior_types == 'fixed') | (prior_types == 'read') | \ (prior_types == 'function') jfree = ~jfixed ndim = len(val1[(jfree)]) if len(starting) != ndim: msg = 'ERROR: Not all starting points defined for free parameters.' print msg exit() print 'starting =', starting # identify the function. Raises an AttributeError if not found #function = model.model() #sat_profile = params.sat_profile() #group_profile = params.group_profile() #function = model if not args.demo: hdrfile = '.'.join(output.split('.')[:-1]) + '.hdr' print 'Printing header information to', hdrfile hdr = open(hdrfile, 'w') print >>hdr, 'Started', ctime() print >>hdr, 'datafile', ','.join(datafile) print >>hdr, 'cols', ','.join([str(c) for c in datacols]) print >>hdr, 'covfile', covfile print >>hdr, 'covcols', ','.join([str(c) for c in covcols]) if exclude_bins is not None: print >>hdr, 'exclude_bins', ','.join([str(c) for c in exclude_bins]) print >>hdr, 'model %s' %function for p, pt, v1, v2, v3, v4 in izip(params, prior_types, val1, val2, val3, val4): try: line = '%s %s ' %(p, pt) line += ','.join(numpy.array(v1, dtype=str)) except TypeError: line = '%s %s %s %s %s %s' \ %(p, pt, str(v1), str(v2), str(v3), str(v4)) print >>hdr, line print >>hdr, 'nwalkers {0:5d}'.format(nwalkers) print >>hdr, 'nsteps {0:5d}'.format(nsteps) print >>hdr, 'nburn {0:5d}'.format(nburn) print >>hdr, 'thin {0:5d}'.format(thin) hdr.close() # are we just running a demo? if args.demo: import pylab from matplotlib import cm def plot_demo(ax, Ri, gt, gt_err, f, fsat, fhost): Ri = Ri[1:] ax.errorbar(Ri, gt, yerr=gt_err, fmt='ko', ms=10) ax.plot(Ri, f, 'r-', lw=3) ax.plot(Ri, fsat, 'b--', lw=2) ax.plot(Ri, fhost, 'g-.', lw=2) ax.set_xscale('log') for x, fi, gti, gei in izip(Ri, f, gt, gt_err): ax.annotate('{0:.2f}'.format((fi-gti)/gei), xy=(x,gti+20), ha='center', va='bottom', color='r') return val1[jfree] = starting if params_join is not None: v1 = list(val1) for p in params_join: # without this list comprehension numpy can't keep track of the # data type. I believe this is because there are elements of # different types in val1 and therefore its type is not # well defined (so it gets "object") v1[p[0]] = array([val1[pj] for pj in p]) # need to delete elements backwards to preserve indices aux = [[v1.pop(pj) for pj in p[1:][::-1]] for p in params_join[::-1]] val1 = v1 #array(v1) ?? model = function(val1, R) residuals = esd - model[0] dof = esd.size - starting.size - 1 chi2 = array([dot(residuals[m], dot(icov[m][n], residuals[n])) for m in rng_obsbins for n in rng_obsbins]).sum() print ' ** chi2 = %.2f/%d **' %(chi2, dof) fig, axes = pylab.subplots(figsize=(4*Ndatafiles,4), ncols=Ndatafiles) if Ndatafiles == 1: plot_demo(axes, R, esd, esd_err, model[0], model[1], model[2]) else: for i in izip(axes, R, esd, esd_err, model[0], model[1], model[2]): plot_demo(*i) if npall(esd - esd_err > 0): for ax in axes: ax.set_yscale('log') fig.tight_layout(w_pad=0.01) pylab.show() fig, axes = pylab.subplots(figsize=(8,8), nrows=cov.shape[0], ncols=cov.shape[0]) for m, axm in enumerate(axes): for n, axmn in enumerate(axm): axmn.imshow(cov[m][-n-1][::-1], interpolation='nearest', cmap=cm.CMRmap_r) fig.tight_layout() pylab.show() exit() # set up starting point for all walkers po = starting * numpy.random.uniform(0.99, 1.01, size=(nwalkers,ndim)) lnprior = zeros(ndim) mshape = meta_names.shape # this assumes that all parameters are floats -- can't imagine a # different scenario metadata = [[] for m in meta_names] for j in xrange(len(metadata)): for f in fits_format[j]: if len(f) == 1: metadata[j].append(zeros(nwalkers*nsteps/thin)) else: size = [nwalkers*nsteps/thin, int(f[:-1])] # only for ESDs. Note that there will be trouble if outputs # other than the ESD have the same length, so avoid them at # all cost. if exclude_bins is not None \ and size[1] == esd.shape[-1]+len(exclude_bins): size[1] -= len(exclude_bins) metadata[j].append(zeros(size)) metadata = [array(m) for m in metadata] fail_value = [] for m in metadata: shape = list(m.shape) shape.remove(max(shape)) fail_value.append(zeros(shape)) # the last numbers are data chi2, lnLdata, lnPderived for i in xrange(4): fail_value.append(9999) sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, threads=threads, args=(R,esd,icov,function, params,prior_types[jfree], val1,val2,val3,val4,params_join, jfree,lnprior,likenorm, rng_obsbins,fail_value, array,dot,inf,izip,outer,pi)) #isfinite,log,log10 #outer,sqrt,zeros)) # burn-in if nburn > 0: pos, prob, state, blobs = sampler.run_mcmc(po, nburn) sampler.reset() print '{0} Burn-in steps finished ({1})'.format(nburn, ctime()) else: pos = po # incrementally save output chi2 = [zeros(nwalkers*nsteps/thin) for i in xrange(4)] nwritten = 0 for i, result in enumerate(sampler.sample(pos, iterations=nsteps, thin=thin)): # make sure that nwalkers is a factor of this number! if i*nwalkers % update_freq == nwalkers: out = write_to_fits(output, chi2, sampler, nwalkers, thin, params, jfree, metadata, meta_names, i, nwritten, Nobsbins, array, BinTableHDU, Column, ctime, enumerate, isfile, izip, transpose, xrange) metadata, nwriten = out hdr = open(hdrfile, 'a') try: print 'acceptance_fraction =', sampler.acceptance_fraction print >>hdr, 'acceptance_fraction =', for af in sampler.acceptance_fraction: print >>hdr, af, except ImportError: pass try: print 'acor =', sampler.acor print >>hdr, '\nacor =', for ac in sampler.acor: print >>hdr, ac, except ImportError: pass try: print 'acor_time =', sampler.get_autocorr_time() print >>hdr, '\nacor_time =', for act in sampler.get_autocorr_time(): print >>hdr, act, except AttributeError: pass print >>hdr, '\nFinished', ctime() hdr.close() print 'Saved to', hdrfile cmd = 'mv {0} {1}'.format(output, output.replace('.fits', '.temp.fits')) print cmd os.system(cmd) print 'Saving everything to {0}...'.format(output) print i, nwalkers, nwritten write_to_fits(output, chi2, sampler, nwalkers, thin, params, jfree, metadata, meta_names, i+1, nwritten, Nobsbins, array, BinTableHDU, Column, ctime, enumerate, isfile, izip, transpose, xrange) os.remove(output.replace('.fits', '.temp.fits')) print 'Everything saved to {0}!'.format(output) return
def _get_acf_peakheights(lags, acf, npeaks=20, searchinterval=1): '''This calculates the relative peak heights for first npeaks in ACF. Usually, the first peak or the second peak (if its peak height > first peak) corresponds to the correct lag. When we know the correct lag, the period is then:: bestperiod = time[lags == bestlag] - time[0] Parameters ---------- lags : np.array An array of lags that the ACF is calculated at. acf : np.array The array containing the ACF values. npeaks : int THe maximum number of peaks to consider when finding peak heights. searchinterval : int From `scipy.signal.argrelmax`: "How many points on each side to use for the comparison to consider comparator(n, n+x) to be True." This effectively sets how many points on each of the current peak will be used to check if the current peak is the local maximum. Returns ------- dict This returns a dict of the following form:: {'maxinds':the indices of the lag array where maxes are, 'maxacfs':the ACF values at each max, 'maxlags':the lag values at each max, 'mininds':the indices of the lag array where mins are, 'minacfs':the ACF values at each min, 'minlags':the lag values at each min, 'relpeakheights':the relative peak heights of each rel. ACF peak, 'relpeaklags':the lags at each rel. ACF peak found, 'peakindices':the indices of arrays where each rel. ACF peak is, 'bestlag':the lag value with the largest rel. ACF peak height, 'bestpeakheight':the largest rel. ACF peak height, 'bestpeakindex':the largest rel. ACF peak's number in all peaks} ''' maxinds = argrelmax(acf, order=searchinterval)[0] maxacfs = acf[maxinds] maxlags = lags[maxinds] mininds = argrelmin(acf, order=searchinterval)[0] minacfs = acf[mininds] minlags = lags[mininds] relpeakheights = npzeros(npeaks) relpeaklags = npzeros(npeaks, dtype=npint64) peakindices = npzeros(npeaks, dtype=npint64) for peakind, mxi in enumerate(maxinds[:npeaks]): # check if there are no mins to the left # throw away this peak because it's probably spurious # (FIXME: is this OK?) if npall(mxi < mininds): continue leftminind = mininds[mininds < mxi][-1] # the last index to the left rightminind = mininds[mininds > mxi][0] # the first index to the right relpeakheights[peakind] = (acf[mxi] - (acf[leftminind] + acf[rightminind]) / 2.0) relpeaklags[peakind] = lags[mxi] peakindices[peakind] = peakind # figure out the bestperiod if possible if relpeakheights[0] > relpeakheights[1]: bestlag = relpeaklags[0] bestpeakheight = relpeakheights[0] bestpeakindex = peakindices[0] else: bestlag = relpeaklags[1] bestpeakheight = relpeakheights[1] bestpeakindex = peakindices[1] return { 'maxinds': maxinds, 'maxacfs': maxacfs, 'maxlags': maxlags, 'mininds': mininds, 'minacfs': minacfs, 'minlags': minlags, 'relpeakheights': relpeakheights, 'relpeaklags': relpeaklags, 'peakindices': peakindices, 'bestlag': bestlag, 'bestpeakheight': bestpeakheight, 'bestpeakindex': bestpeakindex }
def test_recvAndActvByOneInput(self): result = self.hiddenL.recvAndActvByOneInput(array((1, 1, 1))) self.assertAlmostEqual(result[0], 1.0 / (1 + npe ** (-npsum(array((0.4, 0.5, 0.6, 0.7)) * array((1, 1, 1, 1)))))) self.assertAlmostEqual(result[1], 1.0 / (1 + npe ** (-npsum(array((0.8, 0.9 , 1, 1.1)) * array((1, 1, 1, 1)))))) self.assertAlmostEqual(result[2], 1.0 / (1 + npe ** (-npsum(array((1.2, 1.3, 1.4, 1.5)) * array((1, 1, 1, 1)))))) self.assertTrue(npall(self.hiddenL == array((1, 1, 1, 1))))
def create_from_file(cls, name, infile, prod, metadata, timeIndex, transposeData=False, preprocessing=None): function = "(DataLayer.create_from_file)" #Open netCDF file try: dataset = Dataset(infile) except IOError as e: print("\n%s: %s inputfile %s does not exist" % (function, name, infile)) print(e.args) #Check netCDF file: Prints some info when in DEBUG mode. check_input(infile, prod, DataLayer.DEBUG) #Open netCDF file and check variable exists. dataset = Dataset(infile) ncVariable = dataset.variables[prod] #Find the right time dimension index and slice/copy the data appropriately dims = ncVariable.dimensions #Two spatial dimensions and a time dimensions if len(dims) == 3: if metadata.timeDimensionName in dims: if dims.index(metadata.timeDimensionName) == 0: data = ncVariable[timeIndex, :, :] elif dims.index(metadata.timeDimensionName) == 1: data = ncVariable[:, timeIndex, :] elif dims.index(metadata.timeDimensionName) == 2: data = ncVariable[:, :, timeIndex] else: raise RuntimeError( "Time dimension name ('%s') for Datalayer '%s' was not found. Try setting this manually in the configuration file using (for example) datalayername_timeDimensionName = time" % (metadata.timeDimensionName, name)) #No time dimension anyway elif len(dims) == 2: data = ncVariable[:] else: # raise RuntimeError( "Invalid number of dimensions (%d) when reading datalayer '%s' from '%s'" % (len(dims), name, infile)) #TODO: APPLY PREPROCESSING HERE instead of later. #Extract just the dimensions we want. #requiredDims = [None if v in ['latitude', 'lat', 'longitude', 'lon'] else 0 for v in ncVariable.dimensions] #data = squeeze(ncVariable[slice(*requiredDims)]); if data.shape == (1, 1, 1): #Remove temporal dimension data.shape = (1, 1) if data.shape != ( 1, 1 ): #Don't squeeze if there is a single lon/lat point or we'll end up with an empty array data = squeeze(data) #Remove any 1d dimensions. #check number of dimensions dataDims = data.shape if len(dataDims) != 2: raise ValueError( "\n%sError: Unexpected number of dimensions (%d) in %s when reading in %s variable from %s" % (function, len(dataDims), name, prod, infile)) #Convert from a masked array (np.ma.array) to a plain np.array data = array(data) #Seems to be a bug in netCDF4 which sometimes causes unmasked variables to be completely masked: https://github.com/Unidata/netcdf4-python/issues/707 # Needs looking into, but for now this work-around fixes things: # TODO: now irrelevent as we convert to standard arrays? if ma.isMaskedArray(data) and (npall(data.mask) == True): data.mask = False #Remove the mask... #If necessary flip the data #TODO: remove this as this should be handled in the pre-processing functions by the user data, flipped = flip_data(dataset, data, name) #If different from takahashi orientation, flip data. #Extract fill value from netCDF if it exists. Note that this will overwrite fill default or config specified fill value. if hasattr(ncVariable, "_FillValue"): fillValue = ncVariable._FillValue elif hasattr(ncVariable, "fill_value"): fillValue = ncVariable.fill_value else: fillValue = DataLayer.missing_value return cls(name, data, metadata, fillValue, preprocessing=preprocessing)
def periodicity_analysis(out, DSP_lim=None, field_id=None, DEBiL_write=False): ''' Given a specified field (e.g., G199), previous steps have created data/HATpipe/blsanalsums/cuts for that field and neighbors (imposing cuts on DSP_lim, Ntra_min, and NTV). They've also downloaded the appropriate LCs. Now rerun the periodicity analysis for these LCs (Box-Least-Squares and Stellingwerf Phase Dispersion Minimization), and make eb_checkplots for subsequent looking-at ("visual inspection"). Args: DEBiL_write (bool): whether to write a "name and best BLS period" file (in basically all use cases, not necessary). ''' assert type(field_id) == str print('\nBeginning periodicity analysis...\n\n') # File name format: HAT-199-0025234-V0-DR0-hatlc.sqlite.gz field_name = 'G' + field_id # e.g., 'G081' LC_read_path = '../data/LCs/' + field_name + '/' # where sqlitecurves already exist tail_str = '-V0-DR0-hatlc.sqlite.gz' # paths for LCs and EB checkplots LC_write_path = '../data/LCs_cut/' + field_name + '_' + str(DSP_lim) CP_write_path = '../data/CPs_cut/' + field_name + '_' + str(DSP_lim) for outpath in [ LC_write_path, LC_write_path + '/periodcut', LC_write_path + '/onedaycut', LC_write_path + '/shortcoveragecut', CP_write_path, CP_write_path + '/periodcut', CP_write_path + '/onedaycut', CP_write_path + '/shortcoveragecut' ]: if not os.path.isdir(outpath): os.makedirs(outpath) for ix, hatid in enumerate(out.index): if np.all(out.ix[hatid]['has_sqlc']): LC_cut_path = LC_write_path + '/' + hatid + tail_str LC_periodcut_path = LC_write_path + '/periodcut/' + hatid + tail_str LC_onedaycut_path = LC_write_path + '/onedaycut/' + hatid + tail_str LC_shortcoveragecut_path = LC_write_path + '/shortcoveragecut/' + hatid + tail_str CP_cut_path = CP_write_path + '/' + hatid + '.png' CP_periodcut_path = CP_write_path + '/periodcut/' + hatid + '.png' CP_onedaycut_path = CP_write_path + '/onedaycut/' + hatid + '.png' CP_shortcoveragecut_path = CP_write_path + '/shortcoveragecut/' + hatid + '.png' if (not os.path.exists(CP_cut_path)) \ and (not os.path.exists(CP_periodcut_path)) \ and (not os.path.exists(CP_onedaycut_path)) \ and (not os.path.exists(CP_shortcoveragecut_path)): # Get sqlitecurve data. obj_path = LC_read_path + hatid + tail_str lcd, msg = hatlc.read_and_filter_sqlitecurve(obj_path) # Make sure all observations are at the same zero-point. normlcd = hatlc.normalize_lcdict(lcd) # Select recommended EPD aperture with 'G' flag. (Alternate # approach: take the smallest aperture to minimize crowding). ap = next(iter(lcd['lcbestaperture']['ap'])) times = normlcd['rjd'][normlcd['aiq_' + ap] == 'G'] mags = normlcd['aep_' + ap][normlcd['aiq_' + ap] == 'G'] errs = normlcd['aie_' + ap][normlcd['aiq_' + ap] == 'G'] # Period analysis: Stellingwerf phase dispersion minimization # and rerun Box-Least-Squares. Range of interesting periods: # 0.5days-100days. BLS can only search for periods < half the # light curve observing baseline. (N.b. 100d signals are # basically always going to be stellar rotation) smallest_p = 0.5 biggest_p = min((times[-1] - times[0]) / 2.01, 100.) print('\nStellingwerf...\n') spdmp = periodbase.stellingwerf_pdm( times, mags, errs, autofreq=True, startp=smallest_p, endp=biggest_p, normalize=False, stepsize=1.0e-4, phasebinsize=0.05, mindetperbin=9, nbestpeaks=5, periodepsilon=0.1, # 0.1days sigclip=None, # no sigma clipping nworkers=None) print('\nBLS...\n') blsp = periodbase.bls_parallel_pfind( times, mags, errs, startp=smallest_p, endp=biggest_p, # don't search full timebase stepsize=1.0e-5, mintransitduration=0.01, # minimum transit length in phase maxtransitduration=0.7, # maximum transit length in phase nphasebins=200, autofreq=False, # figure out f0, nf, and df automatically nbestpeaks=5, periodepsilon=0.1, # 0.1 nworkers=None, sigclip=None) # Make and save checkplot to be looked at. cp = plotbase.make_eb_checkplot( spdmp, blsp, times, mags, errs, objectinfo=normlcd['objectinfo'], findercmap='gray_r', normto='globalmedian', normmingap=4.0, outfile=CP_cut_path, sigclip=None, varepoch='min', phasewrap=True, phasesort=True, phasebin=0.002, plotxlim=[-0.6, 0.6]) # Copy LCs with DSP>DSP_lim to /data/LCs_cut/G???_??/ if not os.path.exists(LC_cut_path): copyfile(obj_path, LC_cut_path) print('Copying {} -> {}\n'.format(obj_path, LC_cut_path)) #### CUTS #### maxperiod = 30. # days bestperiods = spdmp['nbestperiods'] + blsp['nbestperiods'] best3periods = spdmp['nbestperiods'][:3]+\ blsp['nbestperiods'][:3] bparr, b3parr = np.array(bestperiods), np.array(best3periods) minperiod = 0.5002 # days; else this harmonic of 1d happens proxto1d_s, proxto1d_m, proxto1d_b = 0.01, 0.015, 0.02 # days bestbls, bestspdm = blsp['bestperiod'], spdmp['bestperiod'] mindayscoverage = 3. cadence = 4. # minutes minnumpoints = mindayscoverage * 24 * 60 / cadence # (If 5 of the 6 best peaks are above max period (30 days)), OR # (If all of the SPDM peaks are above max period and the BLS # peaks are not, and all the BLS peaks less than the max period # are within 0.1days separate from e/other) OR # (The same, with BLS/SPDM switched), OR # (The difference between all SPDM is <0.1day and the # difference between all BLS is <0.1day) # # [n.b. latter broad-peak behavior happens b/c of stellar rotn] sb3parr = np.sort(b3parr) spdmn = np.array(spdmp['nbestperiods'][:3]) blsn = np.array(blsp['nbestperiods'][:3]) ps = 0.2 # peak_separation, days b3pint = b3parr[(b3parr < maxperiod) & ( b3parr > 2 * minperiod)] # interesting periods if npall(sb3parr[1:] > maxperiod) \ or \ ((npall(spdmn>maxperiod) and not npall(blsn>maxperiod)) and npall(abs(npdiff(blsn[blsn<maxperiod]))<ps)) \ or \ ((npall(blsn>maxperiod) and not npall(spdmn>maxperiod)) and npall(abs(npdiff(spdmn[spdmn<maxperiod]))<ps)) \ or \ (npall(abs(npdiff(spdmn))<ps) and npall(abs(npdiff(blsn))<ps)): os.rename(LC_cut_path, LC_periodcut_path) os.rename(CP_cut_path, CP_periodcut_path) # All 6 best peaks below max period (and above 1d) are within # 0.02days of a multiple of 1, OR # Both the BLS and SPDM max peak are within 0.015d of a # multiple of 1, OR # At least one of the best BLS&SPDM peaks are within 0.015d of one, # and of the remaining peaks > 1day, the rest are within 0.03days of # multiples of one. elif (npall(npisclose(npminimum(\ b3pint%1., abs((b3pint%1.)-1.)), 0., atol=proxto1d_b)))\ or \ ((npisclose(npminimum(\ bestbls%1., abs((bestbls%1.)-1.)), 0., atol=proxto1d_m)) and (npisclose(npminimum(\ bestspdm%1., abs((bestspdm%1.)-1.)), 0., atol=proxto1d_m)))\ or \ (\ (npisclose(abs(bestbls-1.), 0., atol=proxto1d_m) or npisclose(abs(bestspdm-1.), 0., atol=proxto1d_m)) and (npall(npisclose(npminimum(\ b3pint%1., abs((b3pint%1.)-1.)), 0., atol=proxto1d_m*2.)))\ ): os.rename(LC_cut_path, LC_onedaycut_path) os.rename(CP_cut_path, CP_onedaycut_path) # If there is not enough coverage. "Enough" means 3 days of # observations (at 4 minute cadence). elif len(mags) < minnumpoints: os.rename(LC_cut_path, LC_shortcoveragecut_path) os.rename(CP_cut_path, CP_shortcoveragecut_path) else: print('{:d}: {:s} or LC counterpart exists; continue.'.\ format(ix, CP_cut_path)) continue print('\nDone with periodicity analysis for {:s}.\n\n'.format(field_name)) if DEBiL_write: # Write DEBiL "input list" of HAT-IDs and periods. write_path = '../data/DEBiL_heads/' + field + '_DSP' + str( DSP_lim) + '.txt' if not os.path.exists(write_path): f_id = open(write_path, 'wb+') data = np.array([out.index, out['PERIOD']]) np.savetxt(f_id, data.T, fmt=['%15s', '%.6f']) f_id.close()