예제 #1
0
def pareto_min(*args):
    r"""Determine if observation is a Pareto point

    Find the Pareto-efficient points that minimize the provided features.

    Args:
        xi (iterable OR gr.Intention()): Feature to minimize; use -X to maximize

    Returns:
        np.array of boolean: Indicates if observation is Pareto-efficient

    """
    # Check invariants
    lengths = map(len, args)
    if len(set(lengths)) > 1:
        raise ValueError("All arguments to pareto_min must be of equal length")

    # Compute pareto points
    costs = array([*args]).T
    is_efficient = ones(costs.shape[0], dtype=bool)
    for i, c in enumerate(costs):
        is_efficient[i] = npall(npany(costs[:i] > c, axis=1)) and npall(
            npany(costs[i + 1:] > c, axis=1))

    return is_efficient
예제 #2
0
def pareto_min_rel(X_test, X_base=None):
    r"""Determine if rows in X_test are optimal, compared to X_base

    Finds the Pareto-efficient test-points that minimize the column values,
    relative to a given set of base-points.

    Args:
        X_test (2d numpy array): Test point observations; rows are observations, columns are features
        X_base (2d numpy array): Base point observations; rows are observations, columns are features

    Returns:
        array of boolean values: Indicates if test observation is Pareto-efficient, relative to base points

    References:
        Owen *Monte Carlo theory, methods and examples* (2013)
    """
    # Compute Pareto points
    is_efficient = ones(X_test.shape[0], dtype=bool)

    if X_base is None:
        for i, x in enumerate(X_test):
            is_efficient[i] = npall(npany(X_test[:i] > x, axis=1)) and npall(
                npany(X_test[i + 1:] > x, axis=1))
    else:
        for i, x in enumerate(X_test):
            is_efficient[i] = not (npany(npall(x >= X_base, axis=1))
                                   and npany(npany(x > X_base, axis=1)))

    return is_efficient
예제 #3
0
파일: check.py 프로젝트: limix/glimix-core
def check_outcome(y, lik):
    if not isinstance(lik, (list, tuple)):
        lik = (lik,)

    str_err = "The first item of ``lik`` has to be a string."
    if not isinstance(lik[0], str):
        raise ValueError(str_err)

    lik_name = lik[0].lower()

    y = ascontiguousarray(y, float)
    lik = lik[:1] + tuple(ascontiguousarray(i, float) for i in lik[1:])

    if not npall(isfinite(y)):
        raise ValueError("Outcome must be finite.")

    if lik_name == "poisson":
        return _check_poisson_outcome(y)

    if lik_name in ("binomial", "normal"):
        if len(lik) != 2:
            msg = "``lik`` must be a tuple of two elements for"
            msg += " {} likelihood.".format(lik_name[0].upper() + lik_name[1:])
            raise ValueError(msg)

    return y
예제 #4
0
파일: check.py 프로젝트: limix/glimix-core
def check_economic_qs(QS):
    if not isinstance(QS, tuple):
        raise ValueError("QS must be a tuple.")

    if not isinstance(QS[0], tuple):
        raise ValueError("QS[0] must be a tuple.")

    fmsg = "QS has non-finite values."

    if not all(npall(isfinite(Q)) for Q in QS[0]):
        raise ValueError(fmsg)

    if not npall(isfinite(QS[1])):
        raise ValueError(fmsg)

    return QS
예제 #5
0
파일: check.py 프로젝트: limix/glimix-core
def check_covariates(X):
    if not X.ndim == 2:
        raise ValueError("Covariates must be a bidimensional array.")

    if not npall(isfinite(X)):
        raise ValueError("Covariates must have finite values only.")

    return X
예제 #6
0
    def convert(self, verbose=False, maxrows=None):
        """Method to loop through the data and convert it"""
        # docstring is extended below
        dataslice = slice(0, maxrows)
        info = LoopInfo(total=len(self.data.ifiledata[0, dataslice]),
                        t0=datetime.now(),
                        verbose=verbose)
        wrongvalues = 0
        wrongarea = 0

        for datatuple in izip(*self.data.ifiledata[:, dataslice]):
            info.info()
            fields = npall(
                # normal gridcols
                [
                    self.data.maskdata[self.gridcols.index(col)]
                    == datatuple[self.usecols.index(col)].astype(
                        self.data.maskdata[self.gridcols.index(col)].dtype)
                    for col in self.gridcols if col not in self.aliasdict
                ] +
                # alias cols
                [
                    self.data.maskdata[self.gridcols.index(col)]
                    == self.aliasdict[col][datatuple[self.usecols.index(col)]]
                    for col in self.aliasdict
                ],
                axis=0)
            itime = self.timefunc([
                datatuple[self.usecols.index(col)]
                for col in sorted(self.time.keys())
            ])
            for catcol in self.catadddict:
                for adderinstance in self.catadddict[catcol][tuple(
                        datatuple[self.usecols.index(col)] for col in catcol)]:
                    adderinstance.addfunc(
                        itime, fields, datatuple[self.usecols.index(
                            self.valcol)].astype(float))
            for col in self.defaultadddict:
                for adderinstance in self.defaultadddict[col][datatuple[
                        self.usecols.index(col)]]:
                    adderinstance.addfunc(
                        itime, fields, datatuple[self.usecols.index(
                            self.valcol)].astype(float))
            if not npany(fields):
                wrongvalues += 1
                wrongarea += float(datatuple[self.usecols.index(self.valcol)])
        if verbose:
            print('\nNumber of wrong values: %i' % wrongvalues)
            print('Missed Area [ha]: %6.4f' % wrongarea)
            print('Missed Area: %1.3e %%' %
                  (wrongarea / sum(self.data.ifiledata[self.usecols.index(
                      self.valcol)].astype(float)) * 100.))
예제 #7
0
    def y(self, y):
        """ Set the outcome array.

        Parameters
        ----------
        y : array_like
            Outcome array.
        """
        from numpy import all as npall, isfinite

        if not npall(isfinite(y)):
            raise ValueError("Phenotype values must be finite.")
        self._glmm = None
        self._y = normalise_extreme_values(y, "normal")
예제 #8
0
    def _drop_missing(self) -> ndarray:
        data = (self.dependent, self.exog, self.endog, self.instruments, self.weights)
        missing = any(c_[[dh.isnull for dh in data]], 0)  # type: ndarray
        if any(missing):
            if npall(missing):
                raise ValueError('All observations contain missing data. '
                                 'Model cannot be estimated.')
            self.dependent.drop(missing)
            self.exog.drop(missing)
            self.endog.drop(missing)
            self.instruments.drop(missing)
            self.weights.drop(missing)

        missing_warning(missing)
        return missing
예제 #9
0
def mask(array, value):
    """
    Creates a mask from an array against a value, depending on value's nature:

    * Numbers (any number type):

      mask(arr, 1) is the same as arr == 1

    * List or tuples (flat, containing only numbers):

      mask(arr, (1., 1., 1.)) compares each pixel against a color

      If your image has alpha channel, you must compare like:

      mask(arr, (1., 1., 1., 1.))

      Remember to respect the dimensions or numpy will complain.

    * IN (range) instances:

      mask(arr, IN(0.5, 1., false, true)) will make a mask for pixels
        greater than or equal 0.5 and lower than 1.

    :param array:
    :param value:
    :return:
    """

    if _valid_real(value):
        return array == value
    elif isinstance(value, (list, tuple)):
        if not all(_valid_real(v) for v in value):
            raise TypeError("Cannot mask against list or tuples having values other than valid numbers, or being "
                            "multi-dimensional or irregular sequences")
        return npall(array == value, axis=2)
    elif isinstance(value, IN):
        return value.contains(array)
    else:
        raise TypeError("Cannot take a mask from this argument. Only numpy-accepted numeric types, tuples, lists, or "
                        "`IN` instances are accepted")
예제 #10
0
    def append(self, m, name=None):
        from numpy import all as npall, asarray, atleast_2d, isfinite
        from glimix_core.mean import LinearMean

        m = asarray(m, float)
        if m.ndim > 2:
            raise ValueError("Fixed-effect has to have between one and two dimensions.")

        if not npall(isfinite(m)):
            raise ValueError("Fixed-effect values must be finite.")

        m = atleast_2d(m.T).T
        mean = LinearMean(m.shape[1])
        mean.set_data(m)

        n = len(self._fixed_effects["impl"])
        if name is None:
            name = "unnamed-fe-{}".format(n)
        self._fixed_effects["impl"].append(mean)
        self._fixed_effects["user"].append(user_mean.LinearMean(mean))
        self._fixed_effects["user"][-1].name = name
        self._mean = None
예제 #11
0
    def append(self, K, name=None):
        from numpy import all as npall, isfinite, issubdtype, number
        from glimix_core.cov import GivenCov

        if not issubdtype(K.dtype, number):
            raise ValueError("covariance-matrix is not numeric.")

        if K.ndim != 2:
            raise ValueError("Covariance-matrix has to have two dimensions.")

        if not npall(isfinite(K)):
            raise ValueError("Covariance-matrix values must be finite.")

        cov = GivenCov(K)
        cov.set_data((self._sample_idx, self._sample_idx))

        n = len(self._covariance_matrices["impl"])
        if name is None:
            name = "unnamed-re-{}".format(n)

        self._covariance_matrices["impl"].append(cov)
        self._covariance_matrices["user"].append(user_cov.GivenCov(cov))
        self._covariance_matrices["user"][-1].name = name
        self._cov = None
예제 #12
0
                            stdin=subprocess.PIPE)
    stdout, stderr = proc.communicate()
    return stdout, stderr


parser = argparse.ArgumentParser()
parser.add_argument('args', nargs='+')
ao = parser.parse_args()
assert len(ao.args) == 1, 'one arg allowed: field_id'

field_id = ao.args[0]

arg0 = 'checkplotlist.py'
arg1 = 'pkl'
if field_id == 'weird':
    cpdir = '../data/weirdpkls'
elif field_id == 'deb':
    cpdir = '../data/debpkls'
else:
    cpdir = '../data/CPs_cut/G' + field_id + '_20'
arg2 = cpdir

cpnames = [f for f in os.listdir(cpdir) if 'checkplot' in f]

#Extract checkplots .pkl.gz to .pkl files:
if not npall(nparray([cpn.endswith('.pkl') for cpn in cpnames])):
    print('gunzip ' + cpdir + '/*pkl.gz')
    stdout, stderr = run_script('gunzip ' + cpdir + '/*pkl.gz')

cpl.main([arg0, arg1, arg2])
예제 #13
0
def myop(a,b):
    r = mod(dot(a,b), MOD)
    assert npall(greater_equal(r, 0))
    return r
예제 #14
0
def mask_signal(times,
                mags,
                errs,
                signalperiod,
                signalepoch,
                magsarefluxes=False,
                maskphases=[0, 0, 0.5, 1.0],
                maskphaselength=0.1,
                plotfit=None,
                plotfitphasedlconly=True,
                sigclip=30.0):
    '''This removes repeating signals in the magnitude time series.

    Useful for masking transit signals in light curves to search for other
    variability.

    '''

    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             sigclip=sigclip,
                                             magsarefluxes=magsarefluxes)

    # now phase the light curve using the period and epoch provided
    phases = ((stimes - signalepoch) / signalperiod - npfloor(
        (stimes - signalepoch) / signalperiod))

    # mask the requested phases using the mask length (in phase units)
    # this gets all the masks into one array
    masks = nparray([(npabs(phases - x) > maskphaselength)
                     for x in maskphases])
    # this flattens the masks to a single array for all combinations
    masks = npall(masks, axis=0)

    # apply the mask to the times, mags, and errs
    mphases = phases[masks]
    mtimes = stimes[masks]
    mmags = smags[masks]
    merrs = serrs[masks]

    returndict = {
        'mphases': mphases,
        'mtimes': mtimes,
        'mmags': mmags,
        'merrs': merrs
    }

    # make the fit plot if required
    if plotfit and isinstance(plotfit, str) or isinstance(plotfit, strio):

        if plotfitphasedlconly:
            plt.figure(figsize=(10, 4.8))
        else:
            plt.figure(figsize=(16, 9.6))

        if plotfitphasedlconly:

            # phased series before whitening
            plt.subplot(121)
            plt.plot(phases,
                     smags,
                     marker='.',
                     color='k',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC before signal masking')

            # phased series after whitening
            plt.subplot(122)
            plt.plot(mphases,
                     mmags,
                     marker='.',
                     color='g',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC after signal masking')

        else:

            # time series before whitening
            plt.subplot(221)
            plt.plot(stimes,
                     smags,
                     marker='.',
                     color='k',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('JD')
            plt.title('LC before signal masking')

            # time series after whitening
            plt.subplot(222)
            plt.plot(mtimes,
                     mmags,
                     marker='.',
                     color='g',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('JD')
            plt.title('LC after signal masking')

            # phased series before whitening
            plt.subplot(223)
            plt.plot(phases,
                     smags,
                     marker='.',
                     color='k',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC before signal masking')

            # phased series after whitening
            plt.subplot(224)
            plt.plot(mphases,
                     mmags,
                     marker='.',
                     color='g',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC after signal masking')

        plt.tight_layout()
        plt.savefig(plotfit, format='png', pad_inches=0.0)
        plt.close()

        if isinstance(plotfit, str) or isinstance(plotfit, strio):
            returndict['fitplotfile'] = plotfit

    return returndict
예제 #15
0
def is_positive_semi_definite(A, tol=1e-8):
    vals, vecs = eigh(A)
    return npall(vals > -tol), vals
예제 #16
0
def run_emcee(hm_options, sampling_options, args):
    # load halo model setup
    function, params, param_types, prior_types, \
        val1, val2, val3, val4, params_join, hm_functions, \
        starting, meta_names, fits_format = hm_options
    # load MCMC sampler setup
    datafile, datacols, covfile, covcols, exclude_bins, output, \
        sampler, nwalkers, nsteps, nburn, \
        thin, k, threads, sampler_type, update_freq = sampling_options

    #function = cloud.serialization.cloudpickle.dumps(model)
    #del model
    #print function

    #pickle.dumps(function)
    #print 'pickled'

    if args.demo:
        print ' ** Running demo only **'
    elif isfile(output):
        msg = 'Warning: output file %s exists. Overwrite? [y/N] ' %output
        answer = raw_input(msg)
        if len(answer) == 0:
            exit()
        if answer.lower() not in ('y', 'yes'):
            exit()
    if not args.demo:
        print 'Started -', ctime()

    #load data files
    Ndatafiles = len(datafile)
    R, esd = sampling_utils.load_datapoints(datafile, datacols, exclude_bins)
    Nobsbins, Nrbins = esd.shape
    rng_obsbins = xrange(Nobsbins)
    rng_rbins = xrange(Nrbins)
    # load covariance
    cov = sampling_utils.load_covariance(covfile, covcols,
                                         Nobsbins, Nrbins, exclude_bins)
    cov, icov, likenorm, esd_err, cov2d = cov

    # needed for offset central profile
    R, Rrange = sampling_utils.setup_integrand(R, k)
    angles = numpy.linspace(0, 2*pi, 540)
    val1 = numpy.append(val1, [Rrange, angles])

    # identify fixed and free parameters
    jfixed = (prior_types == 'fixed') | (prior_types == 'read') | \
             (prior_types == 'function')
    jfree = ~jfixed
    ndim = len(val1[(jfree)])
    if len(starting) != ndim:
        msg = 'ERROR: Not all starting points defined for free parameters.'
        print msg
        exit()
    print 'starting =', starting

    # identify the function. Raises an AttributeError if not found
    #function = model.model()
    #sat_profile = params.sat_profile()
    #group_profile = params.group_profile()
    #function = model

    if not args.demo:
        hdrfile = '.'.join(output.split('.')[:-1]) + '.hdr'
        print 'Printing header information to', hdrfile
        hdr = open(hdrfile, 'w')
        print >>hdr, 'Started', ctime()
        print >>hdr, 'datafile', ','.join(datafile)
        print >>hdr, 'cols', ','.join([str(c) for c in datacols])
        print >>hdr, 'covfile', covfile
        print >>hdr, 'covcols', ','.join([str(c) for c in covcols])
        if exclude_bins is not None:
            print >>hdr, 'exclude_bins', ','.join([str(c)
                                                   for c in exclude_bins])
        print >>hdr, 'model %s' %function
        for p, pt, v1, v2, v3, v4 in izip(params, prior_types,
                                        val1, val2, val3, val4):
            try:
                line = '%s  %s  ' %(p, pt)
                line += ','.join(numpy.array(v1, dtype=str))
            except TypeError:
                line = '%s  %s  %s  %s  %s  %s' \
                    %(p, pt, str(v1), str(v2), str(v3), str(v4))
            print >>hdr, line
        print >>hdr, 'nwalkers  {0:5d}'.format(nwalkers)
        print >>hdr, 'nsteps    {0:5d}'.format(nsteps)
        print >>hdr, 'nburn     {0:5d}'.format(nburn)
        print >>hdr, 'thin      {0:5d}'.format(thin)
        hdr.close()

    # are we just running a demo?
    if args.demo:
        import pylab
        from matplotlib import cm
        def plot_demo(ax, Ri, gt, gt_err, f, fsat, fhost):
            Ri = Ri[1:]
            ax.errorbar(Ri, gt, yerr=gt_err, fmt='ko', ms=10)
            ax.plot(Ri, f, 'r-', lw=3)
            ax.plot(Ri, fsat, 'b--', lw=2)
            ax.plot(Ri, fhost, 'g-.', lw=2)
            ax.set_xscale('log')
            for x, fi, gti, gei in izip(Ri, f, gt, gt_err):
                ax.annotate('{0:.2f}'.format((fi-gti)/gei),
                            xy=(x,gti+20), ha='center', va='bottom',
                            color='r')
            return
        val1[jfree] = starting
        if params_join is not None:
            v1 = list(val1)
            for p in params_join:
                # without this list comprehension numpy can't keep track of the
                # data type. I believe this is because there are elements of
                # different types in val1 and therefore its type is not 
                # well defined (so it gets "object")
                v1[p[0]] = array([val1[pj] for pj in p])
            # need to delete elements backwards to preserve indices
            aux = [[v1.pop(pj) for pj in p[1:][::-1]]
                   for p in params_join[::-1]]
            val1 = v1 #array(v1) ??
        model = function(val1, R)
        residuals = esd - model[0]
        dof = esd.size - starting.size - 1
        chi2 = array([dot(residuals[m], dot(icov[m][n], residuals[n]))
                      for m in rng_obsbins for n in rng_obsbins]).sum()
        print ' ** chi2 = %.2f/%d **' %(chi2, dof)
        fig, axes = pylab.subplots(figsize=(4*Ndatafiles,4), ncols=Ndatafiles)
        if Ndatafiles == 1:
            plot_demo(axes, R, esd, esd_err, model[0], model[1], model[2])
        else:
            for i in izip(axes, R, esd, esd_err, model[0], model[1], model[2]):
                plot_demo(*i)
        if npall(esd - esd_err > 0):
            for ax in axes:
                ax.set_yscale('log')
        fig.tight_layout(w_pad=0.01)
        pylab.show()
        fig, axes = pylab.subplots(figsize=(8,8), nrows=cov.shape[0],
                                   ncols=cov.shape[0])
        for m, axm in enumerate(axes):
            for n, axmn in enumerate(axm):
                axmn.imshow(cov[m][-n-1][::-1], interpolation='nearest',
                            cmap=cm.CMRmap_r)
        fig.tight_layout()
        pylab.show()
        exit()

    # set up starting point for all walkers
    po = starting * numpy.random.uniform(0.99, 1.01, size=(nwalkers,ndim))
    lnprior = zeros(ndim)
    mshape = meta_names.shape
    # this assumes that all parameters are floats -- can't imagine a
    # different scenario
    metadata = [[] for m in meta_names]
    for j in xrange(len(metadata)):
        for f in fits_format[j]:
            if len(f) == 1:
                metadata[j].append(zeros(nwalkers*nsteps/thin))
            else:
                size = [nwalkers*nsteps/thin, int(f[:-1])]
                # only for ESDs. Note that there will be trouble if outputs
                # other than the ESD have the same length, so avoid them at
                # all cost.
                if exclude_bins is not None \
                    and size[1] == esd.shape[-1]+len(exclude_bins):
                    size[1] -= len(exclude_bins)
                metadata[j].append(zeros(size))
    metadata = [array(m) for m in metadata]
    fail_value = []
    for m in metadata:
        shape = list(m.shape)
        shape.remove(max(shape))
        fail_value.append(zeros(shape))
    # the last numbers are data chi2, lnLdata, lnPderived
    for i in xrange(4):
        fail_value.append(9999)

    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob,
                                    threads=threads,
                                    args=(R,esd,icov,function,
                                          params,prior_types[jfree],
                                          val1,val2,val3,val4,params_join,
                                          jfree,lnprior,likenorm,
                                          rng_obsbins,fail_value,
                                          array,dot,inf,izip,outer,pi))
                                          #isfinite,log,log10
                                          #outer,sqrt,zeros))
    # burn-in
    if nburn > 0:
        pos, prob, state, blobs = sampler.run_mcmc(po, nburn)
        sampler.reset()
        print '{0} Burn-in steps finished ({1})'.format(nburn, ctime())
    else:
        pos = po
    # incrementally save output
    chi2 = [zeros(nwalkers*nsteps/thin) for i in xrange(4)]
    nwritten = 0
    for i, result in enumerate(sampler.sample(pos, iterations=nsteps,
                                              thin=thin)):
        # make sure that nwalkers is a factor of this number!
        if i*nwalkers % update_freq == nwalkers:
            out = write_to_fits(output, chi2, sampler, nwalkers, thin,
                                params, jfree, metadata, meta_names, i,
                                nwritten, Nobsbins,
                                array, BinTableHDU, Column, ctime, enumerate,
                                isfile, izip, transpose, xrange)
            metadata, nwriten = out

    hdr = open(hdrfile, 'a')
    try:
        print 'acceptance_fraction =', sampler.acceptance_fraction
        print >>hdr, 'acceptance_fraction =',
        for af in sampler.acceptance_fraction:
            print >>hdr, af,
    except ImportError:
        pass
    try:
        print 'acor =', sampler.acor
        print >>hdr, '\nacor =',
        for ac in sampler.acor:
            print >>hdr, ac,
    except ImportError:
        pass
    try:
        print 'acor_time =', sampler.get_autocorr_time()
        print >>hdr, '\nacor_time =',
        for act in sampler.get_autocorr_time():
            print >>hdr, act,
    except AttributeError:
        pass
    print >>hdr, '\nFinished', ctime()
    hdr.close()
    print 'Saved to', hdrfile

    cmd = 'mv {0} {1}'.format(output, output.replace('.fits', '.temp.fits'))
    print cmd
    os.system(cmd)
    print 'Saving everything to {0}...'.format(output)
    print i, nwalkers, nwritten
    write_to_fits(output, chi2, sampler, nwalkers, thin,
                  params, jfree, metadata, meta_names, i+1,
                  nwritten, Nobsbins,
                  array, BinTableHDU, Column, ctime, enumerate,
                  isfile, izip, transpose, xrange)
    os.remove(output.replace('.fits', '.temp.fits'))
    print 'Everything saved to {0}!'.format(output)
    return
예제 #17
0
파일: macf.py 프로젝트: JinbiaoJi/astrobase
def _get_acf_peakheights(lags, acf, npeaks=20, searchinterval=1):
    '''This calculates the relative peak heights for first npeaks in ACF.

    Usually, the first peak or the second peak (if its peak height > first peak)
    corresponds to the correct lag. When we know the correct lag, the period is
    then::

        bestperiod = time[lags == bestlag] - time[0]

    Parameters
    ----------

    lags : np.array
        An array of lags that the ACF is calculated at.

    acf : np.array
        The array containing the ACF values.

    npeaks : int
        THe maximum number of peaks to consider when finding peak heights.

    searchinterval : int
        From `scipy.signal.argrelmax`: "How many points on each side to use for
        the comparison to consider comparator(n, n+x) to be True." This
        effectively sets how many points on each of the current peak will be
        used to check if the current peak is the local maximum.

    Returns
    -------

    dict
        This returns a dict of the following form::

            {'maxinds':the indices of the lag array where maxes are,
             'maxacfs':the ACF values at each max,
             'maxlags':the lag values at each max,
             'mininds':the indices of the lag array where mins are,
             'minacfs':the ACF values at each min,
             'minlags':the lag values at each min,
             'relpeakheights':the relative peak heights of each rel. ACF peak,
             'relpeaklags':the lags at each rel. ACF peak found,
             'peakindices':the indices of arrays where each rel. ACF peak is,
             'bestlag':the lag value with the largest rel. ACF peak height,
             'bestpeakheight':the largest rel. ACF peak height,
             'bestpeakindex':the largest rel. ACF peak's number in all peaks}

    '''

    maxinds = argrelmax(acf, order=searchinterval)[0]
    maxacfs = acf[maxinds]
    maxlags = lags[maxinds]
    mininds = argrelmin(acf, order=searchinterval)[0]
    minacfs = acf[mininds]
    minlags = lags[mininds]

    relpeakheights = npzeros(npeaks)
    relpeaklags = npzeros(npeaks, dtype=npint64)
    peakindices = npzeros(npeaks, dtype=npint64)

    for peakind, mxi in enumerate(maxinds[:npeaks]):

        # check if there are no mins to the left
        # throw away this peak because it's probably spurious
        # (FIXME: is this OK?)
        if npall(mxi < mininds):
            continue

        leftminind = mininds[mininds < mxi][-1]  # the last index to the left
        rightminind = mininds[mininds > mxi][0]  # the first index to the right
        relpeakheights[peakind] = (acf[mxi] -
                                   (acf[leftminind] + acf[rightminind]) / 2.0)
        relpeaklags[peakind] = lags[mxi]
        peakindices[peakind] = peakind

    # figure out the bestperiod if possible
    if relpeakheights[0] > relpeakheights[1]:
        bestlag = relpeaklags[0]
        bestpeakheight = relpeakheights[0]
        bestpeakindex = peakindices[0]
    else:
        bestlag = relpeaklags[1]
        bestpeakheight = relpeakheights[1]
        bestpeakindex = peakindices[1]

    return {
        'maxinds': maxinds,
        'maxacfs': maxacfs,
        'maxlags': maxlags,
        'mininds': mininds,
        'minacfs': minacfs,
        'minlags': minlags,
        'relpeakheights': relpeakheights,
        'relpeaklags': relpeaklags,
        'peakindices': peakindices,
        'bestlag': bestlag,
        'bestpeakheight': bestpeakheight,
        'bestpeakindex': bestpeakindex
    }
예제 #18
0
 def test_recvAndActvByOneInput(self):
     result = self.hiddenL.recvAndActvByOneInput(array((1, 1, 1)))
     self.assertAlmostEqual(result[0], 1.0 / (1 + npe ** (-npsum(array((0.4, 0.5, 0.6, 0.7)) * array((1, 1, 1, 1))))))
     self.assertAlmostEqual(result[1], 1.0 / (1 + npe ** (-npsum(array((0.8, 0.9 , 1, 1.1)) * array((1, 1, 1, 1))))))
     self.assertAlmostEqual(result[2], 1.0 / (1 + npe ** (-npsum(array((1.2, 1.3, 1.4, 1.5)) * array((1, 1, 1, 1))))))
     self.assertTrue(npall(self.hiddenL == array((1, 1, 1, 1))))
예제 #19
0
    def create_from_file(cls,
                         name,
                         infile,
                         prod,
                         metadata,
                         timeIndex,
                         transposeData=False,
                         preprocessing=None):
        function = "(DataLayer.create_from_file)"

        #Open netCDF file
        try:
            dataset = Dataset(infile)
        except IOError as e:
            print("\n%s: %s inputfile %s does not exist" %
                  (function, name, infile))
            print(e.args)

        #Check netCDF file: Prints some info when in DEBUG mode.
        check_input(infile, prod, DataLayer.DEBUG)

        #Open netCDF file and check variable exists.
        dataset = Dataset(infile)
        ncVariable = dataset.variables[prod]

        #Find the right time dimension index and slice/copy the data appropriately
        dims = ncVariable.dimensions

        #Two spatial dimensions and a time dimensions
        if len(dims) == 3:
            if metadata.timeDimensionName in dims:
                if dims.index(metadata.timeDimensionName) == 0:
                    data = ncVariable[timeIndex, :, :]
                elif dims.index(metadata.timeDimensionName) == 1:
                    data = ncVariable[:, timeIndex, :]
                elif dims.index(metadata.timeDimensionName) == 2:
                    data = ncVariable[:, :, timeIndex]
            else:
                raise RuntimeError(
                    "Time dimension name ('%s') for Datalayer '%s' was not found. Try setting this manually in the configuration file using (for example) datalayername_timeDimensionName = time"
                    % (metadata.timeDimensionName, name))
        #No time dimension anyway
        elif len(dims) == 2:
            data = ncVariable[:]
        else:  #
            raise RuntimeError(
                "Invalid number of dimensions (%d) when reading datalayer '%s' from '%s'"
                % (len(dims), name, infile))

        #TODO: APPLY PREPROCESSING HERE instead of later.

        #Extract just the dimensions we want.
        #requiredDims = [None if v in ['latitude', 'lat', 'longitude', 'lon'] else 0 for v in ncVariable.dimensions]
        #data = squeeze(ncVariable[slice(*requiredDims)]);
        if data.shape == (1, 1, 1):  #Remove temporal dimension
            data.shape = (1, 1)

        if data.shape != (
                1, 1
        ):  #Don't squeeze if there is a single lon/lat point or we'll end up with an empty array
            data = squeeze(data)
            #Remove any 1d dimensions.

        #check number of dimensions
        dataDims = data.shape
        if len(dataDims) != 2:
            raise ValueError(
                "\n%sError: Unexpected number of dimensions (%d) in %s when reading in %s variable from %s"
                % (function, len(dataDims), name, prod, infile))

        #Convert from a masked array (np.ma.array) to a plain np.array
        data = array(data)

        #Seems to be a bug in netCDF4 which sometimes causes unmasked variables to be completely masked: https://github.com/Unidata/netcdf4-python/issues/707
        #    Needs looking into, but for now this work-around fixes things:
        #    TODO: now irrelevent as we convert to standard arrays?
        if ma.isMaskedArray(data) and (npall(data.mask) == True):
            data.mask = False
            #Remove the mask...

        #If necessary flip the data #TODO: remove this as this should be handled in the pre-processing functions by the user
        data, flipped = flip_data(dataset, data, name)
        #If different from takahashi orientation, flip data.

        #Extract fill value from netCDF if it exists. Note that this will overwrite fill default or config specified fill value.
        if hasattr(ncVariable, "_FillValue"):
            fillValue = ncVariable._FillValue
        elif hasattr(ncVariable, "fill_value"):
            fillValue = ncVariable.fill_value
        else:
            fillValue = DataLayer.missing_value

        return cls(name,
                   data,
                   metadata,
                   fillValue,
                   preprocessing=preprocessing)
예제 #20
0
def periodicity_analysis(out, DSP_lim=None, field_id=None, DEBiL_write=False):
    '''
    Given a specified field (e.g., G199), previous steps have created
    data/HATpipe/blsanalsums/cuts for that field and neighbors (imposing cuts
    on DSP_lim, Ntra_min, and NTV). They've also downloaded the appropriate
    LCs.
    Now rerun the periodicity analysis for these LCs (Box-Least-Squares and
    Stellingwerf Phase Dispersion Minimization), and make eb_checkplots for
    subsequent looking-at ("visual inspection").

    Args:
       DEBiL_write (bool): whether to write a "name and best BLS period" file
           (in basically all use cases, not necessary).
    '''
    assert type(field_id) == str
    print('\nBeginning periodicity analysis...\n\n')

    # File name format: HAT-199-0025234-V0-DR0-hatlc.sqlite.gz
    field_name = 'G' + field_id  # e.g., 'G081'
    LC_read_path = '../data/LCs/' + field_name + '/'  # where sqlitecurves already exist
    tail_str = '-V0-DR0-hatlc.sqlite.gz'
    # paths for LCs and EB checkplots
    LC_write_path = '../data/LCs_cut/' + field_name + '_' + str(DSP_lim)
    CP_write_path = '../data/CPs_cut/' + field_name + '_' + str(DSP_lim)

    for outpath in [
            LC_write_path, LC_write_path + '/periodcut',
            LC_write_path + '/onedaycut', LC_write_path + '/shortcoveragecut',
            CP_write_path, CP_write_path + '/periodcut',
            CP_write_path + '/onedaycut', CP_write_path + '/shortcoveragecut'
    ]:

        if not os.path.isdir(outpath):
            os.makedirs(outpath)

    for ix, hatid in enumerate(out.index):
        if np.all(out.ix[hatid]['has_sqlc']):
            LC_cut_path = LC_write_path + '/' + hatid + tail_str
            LC_periodcut_path = LC_write_path + '/periodcut/' + hatid + tail_str
            LC_onedaycut_path = LC_write_path + '/onedaycut/' + hatid + tail_str
            LC_shortcoveragecut_path = LC_write_path + '/shortcoveragecut/' + hatid + tail_str
            CP_cut_path = CP_write_path + '/' + hatid + '.png'
            CP_periodcut_path = CP_write_path + '/periodcut/' + hatid + '.png'
            CP_onedaycut_path = CP_write_path + '/onedaycut/' + hatid + '.png'
            CP_shortcoveragecut_path = CP_write_path + '/shortcoveragecut/' + hatid + '.png'

            if (not os.path.exists(CP_cut_path)) \
            and (not os.path.exists(CP_periodcut_path)) \
            and (not os.path.exists(CP_onedaycut_path)) \
            and (not os.path.exists(CP_shortcoveragecut_path)):
                # Get sqlitecurve data.
                obj_path = LC_read_path + hatid + tail_str
                lcd, msg = hatlc.read_and_filter_sqlitecurve(obj_path)
                # Make sure all observations are at the same zero-point.
                normlcd = hatlc.normalize_lcdict(lcd)
                # Select recommended EPD aperture with 'G' flag. (Alternate
                # approach: take the smallest aperture to minimize crowding).
                ap = next(iter(lcd['lcbestaperture']['ap']))
                times = normlcd['rjd'][normlcd['aiq_' + ap] == 'G']
                mags = normlcd['aep_' + ap][normlcd['aiq_' + ap] == 'G']
                errs = normlcd['aie_' + ap][normlcd['aiq_' + ap] == 'G']

                # Period analysis: Stellingwerf phase dispersion minimization
                # and rerun Box-Least-Squares. Range of interesting periods:
                # 0.5days-100days. BLS can only search for periods < half the
                # light curve observing baseline. (N.b. 100d signals are
                # basically always going to be stellar rotation)
                smallest_p = 0.5
                biggest_p = min((times[-1] - times[0]) / 2.01, 100.)

                print('\nStellingwerf...\n')
                spdmp = periodbase.stellingwerf_pdm(
                    times,
                    mags,
                    errs,
                    autofreq=True,
                    startp=smallest_p,
                    endp=biggest_p,
                    normalize=False,
                    stepsize=1.0e-4,
                    phasebinsize=0.05,
                    mindetperbin=9,
                    nbestpeaks=5,
                    periodepsilon=0.1,  # 0.1days
                    sigclip=None,  # no sigma clipping
                    nworkers=None)

                print('\nBLS...\n')
                blsp = periodbase.bls_parallel_pfind(
                    times,
                    mags,
                    errs,
                    startp=smallest_p,
                    endp=biggest_p,  # don't search full timebase
                    stepsize=1.0e-5,
                    mintransitduration=0.01,  # minimum transit length in phase
                    maxtransitduration=0.7,  # maximum transit length in phase
                    nphasebins=200,
                    autofreq=False,  # figure out f0, nf, and df automatically
                    nbestpeaks=5,
                    periodepsilon=0.1,  # 0.1
                    nworkers=None,
                    sigclip=None)

                # Make and save checkplot to be looked at.
                cp = plotbase.make_eb_checkplot(
                    spdmp,
                    blsp,
                    times,
                    mags,
                    errs,
                    objectinfo=normlcd['objectinfo'],
                    findercmap='gray_r',
                    normto='globalmedian',
                    normmingap=4.0,
                    outfile=CP_cut_path,
                    sigclip=None,
                    varepoch='min',
                    phasewrap=True,
                    phasesort=True,
                    phasebin=0.002,
                    plotxlim=[-0.6, 0.6])

                # Copy LCs with DSP>DSP_lim to /data/LCs_cut/G???_??/
                if not os.path.exists(LC_cut_path):
                    copyfile(obj_path, LC_cut_path)
                    print('Copying {} -> {}\n'.format(obj_path, LC_cut_path))

                #### CUTS ####
                maxperiod = 30.  # days
                bestperiods = spdmp['nbestperiods'] + blsp['nbestperiods']
                best3periods = spdmp['nbestperiods'][:3]+\
                        blsp['nbestperiods'][:3]
                bparr, b3parr = np.array(bestperiods), np.array(best3periods)

                minperiod = 0.5002  # days; else this harmonic of 1d happens
                proxto1d_s, proxto1d_m, proxto1d_b = 0.01, 0.015, 0.02  # days
                bestbls, bestspdm = blsp['bestperiod'], spdmp['bestperiod']

                mindayscoverage = 3.
                cadence = 4.  # minutes
                minnumpoints = mindayscoverage * 24 * 60 / cadence

                # (If 5 of the 6 best peaks are above max period (30 days)), OR
                # (If all of the SPDM peaks are above max period and the BLS
                # peaks are not, and all the BLS peaks less than the max period
                # are within 0.1days separate from e/other) OR
                # (The same, with BLS/SPDM switched), OR
                # (The difference between all SPDM is <0.1day and the
                # difference between all BLS is <0.1day)
                #
                # [n.b. latter broad-peak behavior happens b/c of stellar rotn]
                sb3parr = np.sort(b3parr)
                spdmn = np.array(spdmp['nbestperiods'][:3])
                blsn = np.array(blsp['nbestperiods'][:3])
                ps = 0.2  # peak_separation, days
                b3pint = b3parr[(b3parr < maxperiod) & (
                    b3parr > 2 * minperiod)]  # interesting periods

                if npall(sb3parr[1:] > maxperiod) \
                   or \
                   ((npall(spdmn>maxperiod) and not npall(blsn>maxperiod)) and
                   npall(abs(npdiff(blsn[blsn<maxperiod]))<ps)) \
                   or \
                   ((npall(blsn>maxperiod) and not npall(spdmn>maxperiod)) and
                   npall(abs(npdiff(spdmn[spdmn<maxperiod]))<ps)) \
                   or \
                   (npall(abs(npdiff(spdmn))<ps) and
                   npall(abs(npdiff(blsn))<ps)):

                    os.rename(LC_cut_path, LC_periodcut_path)
                    os.rename(CP_cut_path, CP_periodcut_path)

                # All 6 best peaks below max period (and above 1d) are within
                # 0.02days of a multiple of 1, OR
                # Both the BLS and SPDM max peak are within 0.015d of a
                # multiple of 1, OR
                # At least one of the best BLS&SPDM peaks are within 0.015d of one,
                # and of the remaining peaks > 1day, the rest are within 0.03days of
                # multiples of one.
                elif (npall(npisclose(npminimum(\
                    b3pint%1., abs((b3pint%1.)-1.)), 0., atol=proxto1d_b)))\
                    or \
                    ((npisclose(npminimum(\
                    bestbls%1., abs((bestbls%1.)-1.)), 0., atol=proxto1d_m))
                    and (npisclose(npminimum(\
                    bestspdm%1., abs((bestspdm%1.)-1.)), 0., atol=proxto1d_m)))\
                    or \
                    (\
                    (npisclose(abs(bestbls-1.), 0., atol=proxto1d_m) or
                    npisclose(abs(bestspdm-1.), 0., atol=proxto1d_m)) and
                    (npall(npisclose(npminimum(\
                    b3pint%1., abs((b3pint%1.)-1.)), 0., atol=proxto1d_m*2.)))\
                    ):

                    os.rename(LC_cut_path, LC_onedaycut_path)
                    os.rename(CP_cut_path, CP_onedaycut_path)

                # If there is not enough coverage. "Enough" means 3 days of
                # observations (at 4 minute cadence).
                elif len(mags) < minnumpoints:
                    os.rename(LC_cut_path, LC_shortcoveragecut_path)
                    os.rename(CP_cut_path, CP_shortcoveragecut_path)

        else:
            print('{:d}: {:s} or LC counterpart exists; continue.'.\
                    format(ix, CP_cut_path))
            continue

    print('\nDone with periodicity analysis for {:s}.\n\n'.format(field_name))

    if DEBiL_write:
        # Write DEBiL "input list" of HAT-IDs and periods.
        write_path = '../data/DEBiL_heads/' + field + '_DSP' + str(
            DSP_lim) + '.txt'
        if not os.path.exists(write_path):
            f_id = open(write_path, 'wb+')
            data = np.array([out.index, out['PERIOD']])
            np.savetxt(f_id, data.T, fmt=['%15s', '%.6f'])
            f_id.close()