def _read_salt2(f, **kwargs): """Read a new-style SALT2 file. Such a file has metadata on lines starting with '@' and column names on lines starting with '#' and containing a ':' after the column name. There is optionally a line containing '#end' before the start of data. """ meta = odict() colnames = [] cols = [] readingdata = False for line in f: # strip leading & trailing whitespace & newline line = line.strip() if len(line) == 0: continue if not readingdata: # Read metadata if line[0] == '@': pos = line.find(' ') # Find first space. if pos in [-1, 1]: # Space must exist and key must exist. raise ValueError('Incorrectly formatted metadata line: ' + line) meta[line[1:pos]] = _cast_str(line[pos:]) continue # Read header line if line[0] == '#': pos = line.find(':') if pos in [-1, 1]: continue # comment line colname = line[1:pos].strip() if colname == 'end': continue colnames.append(colname) cols.append([]) continue # If the first non-whitespace character is not '@' or '#', # assume the line is the first data line. readingdata = True # strip comments pos = line.find('#') if pos > -1: line = line[:pos] if len(line) == 0: continue # Now we're reading data items = line.split() for col, item in zip(cols, items): col.append(_cast_str(item)) data = odict(zip(colnames, cols)) return meta, data
def _write_json(f, data, meta, **kwargs): # Build a dictionary of pure-python objects output = odict([('meta', meta), ('data', odict())]) for key in data.dtype.names: output['data'][key] = data[key].tolist() json.dump(output, f, encoding=sys.getdefaultencoding()) del output
def _write_json(f, data, meta, **kwargs): # Build a dictionary of pure-python objects output = odict([("meta", meta), ("data", odict())]) for key in data.dtype.names: output["data"][key] = data[key].tolist() json.dump(output, f) del output
def _parse_meta_from_line(line): """Return dictionary from key, value pairs on a line. Helper function for snana_read_simlib.""" meta = odict() # Find position of all the colons colon_pos = [] i = line.find(':') while i != -1: colon_pos.append(i) i = line.find(':', i + 1) # Find position of start of words before colons key_pos = [] for i in colon_pos: j = line.rfind(' ', 0, i) key_pos.append(j + 1) # append an extra key position so that we know when to end the last value. key_pos.append(len(line)) # get the keys, values based on positions above. for i in range(len(colon_pos)): key = line[key_pos[i]:colon_pos[i]] val = line[colon_pos[i] + 1:key_pos[i + 1]].strip() try: val = int(val) except ValueError: try: val = float(val) except ValueError: pass meta[key] = val return meta
def _parse_meta_from_line(line): """Return dictionary from key, value pairs on a line. Helper function for snana_read_simlib.""" meta = odict() # Find position of all the colons colon_pos = [] i = line.find(':') while i != -1: colon_pos.append(i) i = line.find(':', i+1) # Find position of start of words before colons key_pos = [] for i in colon_pos: j = line.rfind(' ', 0, i) key_pos.append(j+1) # append an extra key position so that we know when to end the last value. key_pos.append(len(line)) # get the keys, values based on positions above. for i in range(len(colon_pos)): key = line[key_pos[i]: colon_pos[i]] val = line[colon_pos[i]+1: key_pos[i+1]].strip() try: val = int(val) except ValueError: try: val = float(val) except ValueError: pass meta[key] = val return meta
def _salt2_rename_keys(d): newd = odict() for key, val in d.iteritems(): key = key.lower() if key in SALT2KEY_TO_KEY: key = SALT2KEY_TO_KEY[key] newd[key] = val return newd
def _read_ascii(f, **kwargs): delim = kwargs.get('delim', None) metachar = kwargs.get('metachar', '@') commentchar = kwargs.get('commentchar', '#') meta = odict() colnames = [] cols = [] readingdata = False for line in f: # strip leading & trailing whitespace, newline, and comments line = line.strip() pos = line.find(commentchar) if pos > -1: line = line[:pos] if len(line) == 0: continue if not readingdata: # Read metadata if line[0] == metachar: pos = line.find(' ') # Find first space. if pos in [-1, 1]: # Space must exist and key must exist. raise ValueError('Incorrectly formatted metadata line: ' + line) meta[line[1:pos]] = _cast_str(line[pos:]) continue # Read header line for item in line.split(delim): colnames.append(item.strip()) cols.append([]) readingdata = True continue # Now we're reading data items = line.split(delim) for col, item in zip(cols, items): col.append(_cast_str(item)) data = odict(zip(colnames, cols)) return meta, data
def normalize_data(data, zp=25.0, zpsys="ab"): """Return a copy of the data with all flux and fluxerr values normalized to the given zeropoint. Assumes data has already been standardized. Parameters ---------- data : `~numpy.ndarray` Structured array. zp : float zpsys : str Returns ------- normalized_data : `~numpy.ndarray` """ normmagsys = get_magsystem(zpsys) factor = np.empty(len(data), dtype=np.float) for b in set(data["band"].tolist()): idx = data["band"] == b b = get_bandpass(b) bandfactor = 10.0 ** (0.4 * (zp - data["zp"][idx])) bandzpsys = data["zpsys"][idx] for ms in set(bandzpsys): idx2 = bandzpsys == ms ms = get_magsystem(ms) bandfactor[idx2] *= ms.zpbandflux(b) / normmagsys.zpbandflux(b) factor[idx] = bandfactor normalized_data = odict( [ ("time", data["time"]), ("band", data["band"]), ("flux", data["flux"] * factor), ("fluxerr", data["fluxerr"] * factor), ("zp", zp), ("zpsys", zpsys), ] ) return dict_to_array(normalized_data)
def write_lc(data, fname, format='ascii', **kwargs): """Write light curve data. Parameters ---------- data : `~astropy.table.Table` Light curve data. fname : str Filename. format : {'ascii', 'salt2', 'snana', 'json'}, optional Format of file. Default is 'ascii'. 'salt2' is the new format available in snfit version >= 2.3.0. delim : str, optional **[ascii only]** Character used to separate entries on a line. Default is ' '. metachar : str, optional **[ascii only]** Metadata designator. Default is '@'. raw : bool, optional **[salt2, snana]** By default, the SALT2 and SNANA writers rename some metadata keys and column names in order to comply with what snfit and SNANA expect. Set to True to override this. Default is False. pedantic : bool, optional **[salt2, snana]** If True, check that output column names and header keys comply with expected formatting, and raise a ValueError if not. It is probably a good idea to set to False when raw is True. Default is True. """ if format not in WRITERS: raise ValueError( "Writer not defined for format {0!r}. Options: ".format(format) + ", ".join(WRITERS.keys())) if isinstance(data, Table): meta = data.meta data = np.asarray(data) else: meta = odict() if not isinstance(data, np.ndarray): data = dict_to_array(data) with open(fname, 'w') as f: WRITERS[format](f, data, meta, **kwargs)
def dict_to_array(d): """Convert a dictionary of lists (or single values) to a structured numpy.ndarray.""" # Convert all lists/values to 1-d arrays, in order to let numpy # figure out the necessary size of the string arrays. new_d = odict() for key in d: new_d[key] = np.atleast_1d(d[key]) # Determine dtype of output array. dtype = [(key, arr.dtype) for key, arr in six.iteritems(new_d)] # Initialize ndarray and then fill it. col_len = max([len(v) for v in new_d.values()]) result = np.empty(col_len, dtype=dtype) for key in new_d: result[key] = new_d[key] return result
def dict_to_array(d): """Convert a dictionary of lists (or single values) to a structured numpy.ndarray.""" # Convert all lists/values to 1-d arrays, in order to let numpy # figure out the necessary size of the string arrays. new_d = odict() for key in d: new_d[key] = np.atleast_1d(d[key]) # Determine dtype of output array. dtype = [(key, arr.dtype) for key, arr in new_d.iteritems()] # Initialize ndarray and then fill it. col_len = max([len(v) for v in new_d.values()]) result = np.empty(col_len, dtype=dtype) for key in new_d: result[key] = new_d[key] return result
def write_lc(data, fname, format='ascii', **kwargs): """Write light curve data. Parameters ---------- data : `~astropy.table.Table` Light curve data. fname : str Filename. format : {'ascii', 'salt2', 'snana', 'json'}, optional Format of file. Default is 'ascii'. 'salt2' is the new format available in snfit version >= 2.3.0. delim : str, optional **[ascii only]** Character used to separate entries on a line. Default is ' '. metachar : str, optional **[ascii only]** Metadata designator. Default is '@'. raw : bool, optional **[salt2, snana]** By default, the SALT2 and SNANA writers rename some metadata keys and column names in order to comply with what snfit and SNANA expect. Set to True to override this. Default is False. pedantic : bool, optional **[salt2, snana]** If True, check that output column names and header keys comply with expected formatting, and raise a ValueError if not. It is probably a good idea to set to False when raw is True. Default is True. """ if format not in WRITERS: raise ValueError("Writer not defined for format {0!r}. Options: " .format(format) + ", ".join(WRITERS.keys())) if isinstance(data, Table): meta = data.meta data = np.asarray(data) else: meta = odict() if not isinstance(data, np.ndarray): data = dict_to_array(data) with open(fname, 'wb') as f: WRITERS[format](f, data, meta, **kwargs)
def normalize_data(data, zp=25., zpsys='ab'): """Return a copy of the data with all flux and fluxerr values normalized to the given zeropoint. Assumes data has already been standardized. Parameters ---------- data : `~numpy.ndarray` Structured array. zp : float zpsys : str Returns ------- normalized_data : `~numpy.ndarray` """ normmagsys = get_magsystem(zpsys) factor = np.empty(len(data), dtype=np.float) for b in set(data['band'].tolist()): idx = data['band'] == b b = get_bandpass(b) bandfactor = 10.**(0.4 * (zp - data['zp'][idx])) bandzpsys = data['zpsys'][idx] for ms in set(bandzpsys): idx2 = bandzpsys == ms ms = get_magsystem(ms) bandfactor[idx2] *= (ms.zpbandflux(b) / normmagsys.zpbandflux(b)) factor[idx] = bandfactor normalized_data = odict([('time', data['time']), ('band', data['band']), ('flux', data['flux'] * factor), ('fluxerr', data['fluxerr'] * factor), ('zp', zp), ('zpsys', zpsys)]) return dict_to_array(normalized_data)
def nest_lc(data, model, vparam_names, bounds, guess_amplitude_bound=False, minsnr=5., priors=None, ppfs=None, npoints=100, method='single', maxiter=None, maxcall=None, modelcov=False, rstate=None, verbose=False, flux_cov=None, fixed_mcov=None, **kwargs): """Run nested sampling algorithm to estimate model parameters and evidence. Parameters ---------- data : `~astropy.table.Table` or `~numpy.ndarray` or `dict` Table of photometric data. Must include certain columns. See the "Photometric Data" section of the documentation for required columns. model : `~sncosmo.Model` The model to fit. vparam_names : list Model parameters to vary in the fit. bounds : `dict` Bounded range for each parameter. Bounds must be given for each parameter, with the exception of ``t0``: by default, the minimum bound is such that the latest phase of the model lines up with the earliest data point and the maximum bound is such that the earliest phase of the model lines up with the latest data point. guess_amplitude_bound : bool, optional If true, bounds for the model's amplitude parameter are determined automatically based on the data and do not need to be included in `bounds`. The lower limit is set to zero and the upper limit is 10 times the amplitude "guess" (which is based on the highest-flux data point in any band). Default is False. minsnr : float, optional Minimum signal-to-noise ratio of data points to use when guessing amplitude bound. Default is 5. priors : `dict`, optional Prior probability distribution function for each parameter. The keys should be parameter names and the values should be callables that accept a float. If a parameter is not in the dictionary, the prior defaults to a flat distribution between the bounds. ppfs : `dict`, optional Prior percent point function (inverse of the cumulative distribution function) for each parameter. If a parameter is in this dictionary, the ppf takes precedence over a prior pdf specified in ``priors``. npoints : int, optional Number of active samples to use. Increasing this value increases the accuracy (due to denser sampling) and also the time to solution. method : {'classic', 'single', 'multi'}, optional Method used to select new points. Choices are 'classic', single-ellipsoidal ('single'), multi-ellipsoidal ('multi'). Default is 'single'. maxiter : int, optional Maximum number of iterations. Iteration may stop earlier if termination condition is reached. Default is no limit. modelcov : bool, optional Include model covariance when calculating chisq. Default is False. rstate : `~numpy.random.RandomState`, optional RandomState instance. If not given, the global random state of the ``numpy.random`` module will be used. verbose : bool, optional Print running evidence sum on a single line. flux_cov : NxN matrix [where N = len(data)], optional Covariance matrix of fluxes. When used, data["fluxerr"] is ignored fixed_mcov : NxN matrix [where N = len(data)], optional Overwrite mcov of fit during with this; can be used to fix mcov while fitting Returns ------- res : Result Attributes are: * ``niter``: total number of iterations * ``ncall``: total number of likelihood function calls * ``time``: time in seconds spent in iteration loop. * ``logz``: natural log of the Bayesian evidence Z. * ``logzerr``: estimate of uncertainty in logz (due to finite sampling) * ``h``: Bayesian information. * ``vparam_names``: list of parameter names varied. * ``samples``: 2-d `~numpy.ndarray`, shape is (nsamples, nparameters). Each row is the parameter values for a single sample. For example, ``samples[0, :]`` is the parameter values for the first sample. * ``logprior``: 1-d `~numpy.ndarray` (length=nsamples); log(prior volume) for each sample. * ``logl``: 1-d `~numpy.ndarray` (length=nsamples); log(likelihood) for each sample. * ``weights``: 1-d `~numpy.ndarray` (length=nsamples); Weight corresponding to each sample. The weight is proportional to the prior * likelihood for the sample. * ``parameters``: 1-d `~numpy.ndarray` of weighted-mean parameter values from samples (including fixed parameters). Order corresponds to ``model.param_names``. * ``covariance``: 2-d `~numpy.ndarray` of parameter covariance; indicies correspond to order of ``vparam_names``. Calculated from ``samples`` and ``weights``. * ``errors``: OrderedDict of varied parameter uncertainties. Corresponds to square root of diagonal entries in covariance matrix. * ``ndof``: Number of degrees of freedom (len(data) - len(vparam_names)). * ``bounds``: Dictionary of bounds on varied parameters (including any automatically determined bounds). estimated_model : `~sncosmo.Model` A copy of the model with parameters set to the values in ``res.parameters``. """ try: import nestle except ImportError: raise ImportError("nest_lc() requires the nestle package.") if "nobj" in kwargs: warn("The nobj keyword is deprecated and will be removed in a future " "sncosmo release. Use `npoints` instead.") npoints = kwargs.pop("nobj") # experimental parameters tied = kwargs.get("tied", None) data = standardize_data(data) model = copy.copy(model) bounds = copy.copy(bounds) # need to copy this b/c we modify it below # Order vparam_names the same way it is ordered in the model: vparam_names = [s for s in model.param_names if s in vparam_names] # Drop data that the model doesn't cover. data = cut_bands(data, model, z_bounds=bounds.get('z', None)) if guess_amplitude_bound: if model.param_names[2] not in vparam_names: raise ValueError("Amplitude bounds guessing enabled but " "amplitude parameter {0!r} is not varied" .format(model.param_names[2])) if model.param_names[2] in bounds: raise ValueError("cannot supply bounds for parameter {0!r}" " when guess_amplitude_bound=True" .format(model.param_names[2])) # If redshift is bounded, set model redshift to midpoint of bounds # when doing the guess. if 'z' in bounds: model.set(z=sum(bounds['z']) / 2.) _, amplitude = guess_t0_and_amplitude(data, model, minsnr) bounds[model.param_names[2]] = (0., 10. * amplitude) # Find t0 bounds to use, if not explicitly given if 't0' in vparam_names and 't0' not in bounds: bounds['t0'] = t0_bounds(data, model) if ppfs is None: ppfs = {} if tied is None: tied = {} # Convert bounds/priors combinations into ppfs if bounds is not None: for key, val in six.iteritems(bounds): if key in ppfs: continue # ppfs take priority over bounds/priors a, b = val if priors is not None and key in priors: # solve ppf at discrete points and return interpolating # function x_samples = np.linspace(0., 1., 101) ppf_samples = ppf(priors[key], x_samples, a, b) f = Interp1D(0., 1., ppf_samples) else: f = Interp1D(0., 1., np.array([a, b])) ppfs[key] = f # NOTE: It is important that iparam_names is in the same order # every time, otherwise results will not be reproducible, even # with same random seed. This is because iparam_names[i] is # matched to u[i] below and u will be in a reproducible order, # so iparam_names must also be. iparam_names = [key for key in vparam_names if key in ppfs] ppflist = [ppfs[key] for key in iparam_names] npdim = len(iparam_names) # length of u ndim = len(vparam_names) # length of v # Check that all param_names either have a direct prior or are tied. for name in vparam_names: if name in iparam_names: continue if name in tied: continue raise ValueError("Must supply ppf or bounds or tied for parameter '{}'" .format(name)) def prior_transform(u): d = {} for i in range(npdim): d[iparam_names[i]] = ppflist[i](u[i]) v = np.empty(ndim, dtype=np.float) for i in range(ndim): key = vparam_names[i] if key in d: v[i] = d[key] else: v[i] = tied[key](d) return v # Indicies of the model parameters in vparam_names idx = np.array([model.param_names.index(name) for name in vparam_names]) def loglike(parameters): model.parameters[idx] = parameters return -0.5 * _chisq(data, model, modelcov=modelcov, flux_cov=flux_cov, fixed_mcov=fixed_mcov) t0 = time.time() res = nestle.sample(loglike, prior_transform, ndim, npdim=npdim, npoints=npoints, method=method, maxiter=maxiter, maxcall=maxcall, rstate=rstate, callback=(nestle.print_progress if verbose else None)) elapsed = time.time() - t0 # estimate parameters and covariance from samples vparameters, cov = nestle.mean_and_cov(res.samples, res.weights) # update model parameters to estimated ones. model.set(**dict(zip(vparam_names, vparameters))) # `res` is a nestle.Result object. Collect result into a sncosmo.Result # object for consistency, and add more fields. res = Result(niter=res.niter, ncall=res.ncall, logz=res.logz, logzerr=res.logzerr, h=res.h, samples=res.samples, weights=res.weights, logvol=res.logvol, logl=res.logl, vparam_names=copy.copy(vparam_names), ndof=len(data) - len(vparam_names), bounds=bounds, time=elapsed, parameters=model.parameters.copy(), covariance=cov, errors=odict(zip(vparam_names, np.sqrt(np.diagonal(cov)))), param_dict=odict(zip(model.param_names, model.parameters))) # Deprecated result fields. depmsg = ("The `param_names` attribute is deprecated in sncosmo v1.0 " "and will be removed in a future release. " "Use `vparam_names` instead.") res.__dict__['deprecated']['param_names'] = (res.vparam_names, depmsg) depmsg = ("The `logprior` attribute is deprecated in sncosmo v1.2 " "and will be changed in a future release. " "Use `logvol` instead.") res.__dict__['deprecated']['logprior'] = (res.logvol, depmsg) return res, model
def fit_lc(data, model, vparam_names, bounds=None, method='minuit', guess_amplitude=True, guess_t0=True, guess_z=True, minsnr=5., modelcov=False, verbose=False, maxcall=10000, flux_cov=None, fixed_mcov=None, **kwargs): """Fit model parameters to data by minimizing chi^2. Ths function defines a chi^2 to minimize, makes initial guesses for t0 and amplitude, then runs a minimizer. Parameters ---------- data : `~astropy.table.Table` or `~numpy.ndarray` or `dict` Table of photometric data. Must include certain columns. See the "Photometric Data" section of the documentation for required columns. model : `~sncosmo.Model` The model to fit. vparam_names : list Model parameters to vary in the fit. bounds : `dict`, optional Bounded range for each parameter. Keys should be parameter names, values are tuples. If a bound is not given for some parameter, the parameter is unbounded. The exception is ``t0``: by default, the minimum bound is such that the latest phase of the model lines up with the earliest data point and the maximum bound is such that the earliest phase of the model lines up with the latest data point. guess_amplitude : bool, optional Whether or not to guess the amplitude from the data. If false, the current model amplitude is taken as the initial value. Only has an effect when fitting amplitude. Default is True. guess_t0 : bool, optional Whether or not to guess t0. Only has an effect when fitting t0. Default is True. guess_z : bool, optional Whether or not to guess z (redshift). Only has an effect when fitting redshift. Default is True. minsnr : float, optional When guessing amplitude and t0, only use data with signal-to-noise ratio (flux / fluxerr) greater than this value. Default is 5. method : {'minuit'}, optional Minimization method to use. Currently there is only one choice. modelcov : bool, optional Include model covariance when calculating chisq. Default is False. flux_cov : NxN matrix [where N = len(data)], optional Covariance matrix of fluxes. When used, data["fluxerr"] is ignored fixed_mcov : NxN matrix [where N = len(data)], optional Overwrite mcov of fit during with this; can be used to fix mcov while fitting verbose : bool, optional Print messages during fitting. Returns ------- res : Result The optimization result represented as a ``Result`` object, which is a `dict` subclass with attribute access. Therefore, ``res.keys()`` provides a list of the attributes. Attributes are: - ``success``: boolean describing whether fit succeeded. - ``message``: string with more information about exit status. - ``ncall``: number of function evaluations. - ``chisq``: minimum chi^2 value. - ``ndof``: number of degrees of freedom (len(data) - len(vparam_names)). - ``param_names``: same as ``model.param_names``. - ``parameters``: 1-d `~numpy.ndarray` of best-fit values (including fixed parameters) corresponding to ``param_names``. - ``vparam_names``: list of varied parameter names. - ``covariance``: 2-d `~numpy.ndarray` of parameter covariance; indicies correspond to order of ``vparam_names``. - ``errors``: OrderedDict of varied parameter uncertainties. Corresponds to square root of diagonal entries in covariance matrix. fitmodel : `~sncosmo.Model` A copy of the model with parameters set to best-fit values. Notes ----- **t0 guess:** If ``t0`` is being fit and ``guess_t0=True``, the function will guess the initial starting point for ``t0`` based on the data. The guess is made as follows: * Evaluate the time and value of peak flux for the model in each band given the current model parameters. * Determine the data point with maximum flux in each band, for points with signal-to-noise ratio > ``minsnr`` (default is 5). If no points meet this criteria, the band is ignored (for the purpose of guessing only). * For each band, compare model's peak flux to the peak data point. Choose the band with the highest ratio of data / model. * Set ``t0`` so that the model's time of peak in the chosen band corresponds to the peak data point in this band. **amplitude guess:** If amplitude (assumed to be the first model parameter) is being fit and ``guess_amplitude=True``, the function will guess the initial starting point for the amplitude based on the data. **redshift guess:** If redshift (``z``) is being fit and ``guess_z=True``, the function will set the initial value of ``z`` to the average of the bounds on ``z``. Examples -------- The `~sncosmo.flatten_result` function can be used to make the result a dictionary suitable for appending as rows of a table: >>> from astropy.table import Table # doctest: +SKIP >>> table_rows = [] # doctest: +SKIP >>> for sn in sne: # doctest: +SKIP ... res, fitmodel = sncosmo.fit_lc( # doctest: +SKIP ... sn, model, ['t0', 'x0', 'x1', 'c']) # doctest: +SKIP ... table_rows.append(flatten_result(res)) # doctest: +SKIP >>> t = Table(table_rows) # doctest: +SKIP """ # Standardize and normalize data. data = standardize_data(data) data = normalize_data(data) # Make a copy of the model so we can modify it with impunity. model = copy.copy(model) # Check that vparam_names isn't empty and contains only parameters # known to the model. if len(vparam_names) == 0: raise ValueError("no parameters supplied") for s in vparam_names: if s not in model.param_names: raise ValueError("Parameter not in model: " + repr(s)) # Order vparam_names the same way it is ordered in the model: vparam_names = [s for s in model.param_names if s in vparam_names] # initialize bounds if bounds is None: bounds = {} # Check that 'z' is bounded (if it is going to be fit). if 'z' in vparam_names: if 'z' not in bounds or None in bounds['z']: raise ValueError('z must be bounded if fit.') if guess_z: model.set(z=sum(bounds['z']) / 2.) if model.get('z') < bounds['z'][0] or model.get('z') > bounds['z'][1]: raise ValueError('z out of range.') # Cut bands that are not allowed by the wavelength range of the model. data = cut_bands(data, model, z_bounds=bounds.get('z', None)) # Unique set of bands in data bands = set(data['band'].tolist()) # Find t0 bounds to use, if not explicitly given if 't0' in vparam_names and 't0' not in bounds: bounds['t0'] = t0_bounds(data, model) # Note that in the parameter guessing below, we assume that the source # amplitude is the 3rd parameter of the Model (1st parameter of the Source) # Turn off guessing if we're not fitting the parameter. if model.param_names[2] not in vparam_names: guess_amplitude = False if 't0' not in vparam_names: guess_t0 = False # Make guesses for t0 and amplitude. # (For now, we assume it is the 3rd parameter of the model.) if (guess_amplitude or guess_t0): t0, amplitude = guess_t0_and_amplitude(data, model, minsnr) if guess_amplitude: model.parameters[2] = amplitude if guess_t0: model.set(t0=t0) # count degrees of freedom ndof = len(data) - len(vparam_names) if method == 'minuit': try: import iminuit except ImportError: raise ValueError("Minimization method 'minuit' requires the " "iminuit package") # The iminuit minimizer expects the function signature to have an # argument for each parameter. def fitchisq(*parameters): model.parameters = parameters return _chisq(data, model, modelcov=modelcov, flux_cov=flux_cov, fixed_mcov=fixed_mcov) # Set up keyword arguments to pass to Minuit initializer. kwargs = {} for name in model.param_names: kwargs[name] = model.get(name) # Starting point. # Fix parameters not being varied in the fit. if name not in vparam_names: kwargs['fix_' + name] = True kwargs['error_' + name] = 0. continue # Bounds if name in bounds: if None in bounds[name]: raise ValueError('one-sided bounds not allowed for ' 'minuit minimizer') kwargs['limit_' + name] = bounds[name] # Initial step size if name in bounds: step = 0.02 * (bounds[name][1] - bounds[name][0]) elif model.get(name) != 0.: step = 0.1 * model.get(name) else: step = 1. kwargs['error_' + name] = step if verbose: print("Initial parameters:") for name in vparam_names: print(name, kwargs[name], 'step=', kwargs['error_' + name], end=" ") if 'limit_' + name in kwargs: print('bounds=', kwargs['limit_' + name], end=" ") print() m = iminuit.Minuit(fitchisq, errordef=1., forced_parameters=model.param_names, print_level=(1 if verbose else 0), throw_nan=True, **kwargs) d, l = m.migrad(ncall=maxcall) # Build a message. message = [] if d.has_reached_call_limit: message.append('Reached call limit.') if d.hesse_failed: message.append('Hesse Failed.') if not d.has_covariance: message.append('No covariance.') elif not d.has_accurate_covar: # iminuit docs wrong message.append('Covariance may not be accurate.') if not d.has_posdef_covar: # iminuit docs wrong message.append('Covariance not positive definite.') if d.has_made_posdef_covar: message.append('Covariance forced positive definite.') if not d.has_valid_parameters: message.append('Parameter(s) value and/or error invalid.') if len(message) == 0: message.append('Minimization exited successfully.') # iminuit: m.np_matrix() doesn't work # numpy array of best-fit values (including fixed parameters). parameters = np.array([m.values[name] for name in model.param_names]) model.parameters = parameters # set model parameters to best fit. # Covariance matrix (only varied parameters) as numpy array. if m.covariance is None: covariance = None else: covariance = np.array([ [m.covariance[(n1, n2)] for n1 in vparam_names] for n2 in vparam_names]) # OrderedDict of errors if m.errors is None: errors = None else: errors = odict([(name, m.errors[name]) for name in vparam_names]) # Compile results res = Result(success=d.is_valid, message=' '.join(message), ncall=d.nfcn, chisq=d.fval, ndof=ndof, param_names=model.param_names, parameters=parameters, vparam_names=vparam_names, covariance=covariance, errors=errors) # TODO remove cov_names in a future release. depmsg = ("The `cov_names` attribute is deprecated in sncosmo v1.0 " "and will be removed in v1.1. Use `vparam_names` instead.") res.__dict__['deprecated']['cov_names'] = (vparam_names, depmsg) else: raise ValueError("unknown method {0:r}".format(method)) # TODO remove this in a future release. if "flatten" in kwargs: warn("The `flatten` keyword is deprecated in sncosmo v1.0 " "and will be removed in v1.1. Use the flatten_result() " "function instead.") if kwargs["flatten"]: res = flatten_result(res) return res, model
def read_snana_simlib(fname): """Read an SNANA 'simlib' (simulation library) ascii file. Parameters ---------- fname : str Filename. Returns ------- meta : `OrderedDict` Global meta data, not associated with any one LIBID. observation_sets : `OrderedDict` of `astropy.table.Table` keys are LIBIDs, values are observation sets. Notes ----- * Anything following '#' on each line is ignored as a comment. * Keywords are space separated strings ending wth a colon. * If a line starts with 'LIBID:', the following lines are associated with the value of LIBID, until 'END_LIBID:' is encountered. * While reading a given LIBID, lines starting with 'S' or 'T' keywords are assumed to contain 12 space-separated values after the keyword. These are (1) MJD, (2) IDEXPT, (3) FLT, (4) CCD GAIN, (5) CCD NOISE, (6) SKYSIG, (7) PSF1, (8) PSF2, (9) PSF 2/1 RATIO, (10) ZPTAVG, (11) ZPTSIG, (12) MAG. * Other lines inside a 'LIBID:'/'END_LIBID:' pair are treated as metadata for that LIBID. * Any other keywords outside a 'LIBID:'/'END_LIBID:' pair are treated as global header keywords and are returned in the `meta` dictionary. Examples -------- >>> meta, obs_sets = read_snana_simlib('filename') # doctest: +SKIP The second object is a dictionary of astropy Tables indexed by LIBID: >>> obs_sets.keys() # doctest: +SKIP [0, 1, 2, 3, 4] Each table (libid) has metadata: >>> obs_sets[0].meta # doctest: +SKIP OrderedDict([('LIBID', 0), ('RA', 52.5), ('DECL', -27.5), ('NOBS', 161), ('MWEBV', 0.0), ('PIXSIZE', 0.27)]) Each table has the following columns: >>> obs_sets[0].colnames # doctest: +SKIP ['SEARCH', 'MJD', 'IDEXPT', 'FLT', 'CCD_GAIN', 'CCD_NOISE', 'SKYSIG', 'PSF1', 'PSF2', 'PSFRATIO', 'ZPTAVG', 'ZPTSIG', 'MAG'] """ from astropy.table import Table COLNAMES = [ 'SEARCH', 'MJD', 'IDEXPT', 'FLT', 'CCD_GAIN', 'CCD_NOISE', 'SKYSIG', 'PSF1', 'PSF2', 'PSFRATIO', 'ZPTAVG', 'ZPTSIG', 'MAG' ] # Not used yet... if present in header, add to table. SPECIAL = ['FIELD', 'TELESCOPE', 'PIXSIZE'] meta = odict() # global metadata observation_sets = odict() # dictionary of tables indexed by LIBID reading_obsset = False with open(fname, 'r') as infile: for line in infile.readlines(): # strip comments idx = line.find('#') if idx != -1: line = line[0:idx] # split on spaces. words = line.split() if len(words) == 0: continue # If we're not currently reading an obs set, check if this line # is the start of one. If it isn't, update the global metadata. if not reading_obsset: if line[0:6] == 'LIBID:': reading_obsset = True current_meta = _parse_meta_from_line(line) current_data = odict([(key, []) for key in COLNAMES]) else: meta.update(_parse_meta_from_line(line)) # If we are currently reading an obsset... else: # Check for the explicit end of the obs set. if line[0:10] == 'END_LIBID:': reading_obsset = False observation_sets[current_meta['LIBID']] = \ Table(current_data, meta=current_meta) # Sometimes there's not an explicit end, but the next one # starts anyway. elif line[0:6] == 'LIBID:': observation_sets[current_meta['LIBID']] = \ Table(current_data, meta=current_meta) current_meta = _parse_meta_from_line(line) current_data = odict([(key, []) for key in COLNAMES]) # Otherwise, read the line into the current obs set. elif line[0:2] in ['S:', 'T:']: words = line.split() for colname, val in [('SEARCH', words[0] == 'S:'), ('MJD', float(words[1])), ('IDEXPT', int(words[2])), ('FLT', words[3]), ('CCD_GAIN', float(words[4])), ('CCD_NOISE', float(words[5])), ('SKYSIG', float(words[6])), ('PSF1', float(words[7])), ('PSF2', float(words[8])), ('PSFRATIO', float(words[9])), ('ZPTAVG', float(words[10])), ('ZPTSIG', float(words[11])), ('MAG', float(words[12]))]: current_data[colname].append(val) else: current_meta.update(_parse_meta_from_line(line)) # At the end, check for the case where there's not an explicit end # to the last obs set: if reading_obsset: observation_sets[current_meta['LIBID']] = \ Table(current_data, meta=current_meta) return meta, observation_sets
def _read_salt2_old(dirname, **kwargs): """Read old-style SALT2 files from a directory. A file named 'lightfile' must exist in the directory. """ filenames = kwargs.get('filenames', None) # Get list of files in directory. if not (os.path.exists(dirname) and os.path.isdir(dirname)): raise IOError("Not a directory: '{0}'".format(dirname)) dirfilenames = os.listdir(dirname) # Read metadata from lightfile. if 'lightfile' not in dirfilenames: raise IOError("no lightfile in directory: '{0}'".format(dirname)) with open(os.path.join(dirname, 'lightfile'), 'r') as lightfile: meta = odict() for line in lightfile.readlines(): line = line.strip() if len(line) == 0: continue try: key, val = line.split() except ValueError: raise ValueError('expected space-separated key value pairs in ' 'lightfile: {0}'.format( os.path.join(dirname, 'lightfile'))) meta[key] = _cast_str(val) # Get list of filenames to read. if filenames is None: filenames = dirfilenames if 'lightfile' in filenames: filenames.remove('lightfile') # We already read the lightfile. fullfilenames = [os.path.join(dirname, f) for f in filenames] # Read data from files. data = None for fname in fullfilenames: with open(fname, 'r') as f: filemeta, filedata = _read_salt2(f) # Check that all necessary file metadata was defined. if not ('INSTRUMENT' in filemeta and 'BAND' in filemeta and 'MAGSYS' in filemeta): raise ValueError( 'not all necessary global keys (INSTRUMENT, ' 'BAND, MAGSYS) are defined in file {0}'.format(fname)) # Add the instrument/band to the file data, in anticipation of # aggregating it with other files. firstkey = filedata.keys()[0] data_length = len(filedata[firstkey]) filter_name = '{0}::{1}'.format(filemeta.pop('INSTRUMENT'), filemeta.pop('BAND')) filedata['Filter'] = data_length * [filter_name] filedata['MagSys'] = data_length * [filemeta.pop('MAGSYS')] # If this if the first file, initialize data lists, otherwise if keys # match, append this file's data to the main data. if data is None: data = filedata elif set(filedata.keys()) == set(data.keys()): for key in data: data[key].extend(filedata[key]) else: raise ValueError('column names do not match between files') # Append any extra metadata in this file to the master metadata. if len(filemeta) > 0: meta[filter_name] = filemeta return meta, data
def nest_lc(data, model, vparam_names, bounds, guess_amplitude_bound=False, minsnr=5., priors=None, ppfs=None, nobj=100, maxiter=10000, maxcall=1000000, modelcov=False, verbose=False): """Run nested sampling algorithm to estimate model parameters and evidence. Parameters ---------- data : `~astropy.table.Table` or `~numpy.ndarray` or `dict` Table of photometric data. Must include certain column names. model : `~sncosmo.Model` The model to fit. vparam_names : list Model parameters to vary in the fit. bounds : `dict` Bounded range for each parameter. Bounds must be given for each parameter, with the exception of ``t0``: by default, the minimum bound is such that the latest phase of the model lines up with the earliest data point and the maximum bound is such that the earliest phase of the model lines up with the latest data point. guess_amplitude_bound : bool, optional If true, bounds for the model's amplitude parameter are determined automatically based on the data and do not need to be included in `bounds`. The lower limit is set to zero and the upper limit is 10 times the amplitude "guess" (which is based on the highest-flux data point in any band). Default is False. minsnr : float, optional Minimum signal-to-noise ratio of data points to use when guessing amplitude bound. Default is 5. priors : `dict`, optional Prior probability distribution function for each parameter. The keys should be parameter names and the values should be callables that accept a float. If a parameter is not in the dictionary, the prior defaults to a flat distribution between the bounds. ppfs : `dict`, optional Prior percent point function (inverse of the cumulative distribution function) for each parameter. If a parameter is in this dictionary, the ppf takes precedence over a prior pdf specified in ``priors``. nobj : int, optional Number of objects (e.g., concurrent sample points) to use. Increasing nobj increases the accuracy (due to denser sampling) and also the time to solution. maxiter : int, optional Maximum number of iterations. Default is 10000. maxcall : int, optional Maximum number of likelihood evaluations. Default is 1000000. modelcov : bool, optional Include model covariance when calculating chisq. Default is False. verbose : bool, optional Returns ------- res : Result Attributes are: * ``niter``: total number of iterations * ``ncall``: total number of likelihood function calls * ``time``: time in seconds spent in iteration loop. * ``logz``: natural log of the Bayesian evidence Z. * ``logzerr``: estimate of uncertainty in logz (due to finite sampling) * ``h``: Bayesian information. * ``vparam_names``: list of parameter names varied. * ``samples``: 2-d `~numpy.ndarray`, shape is (nsamples, nparameters). Each row is the parameter values for a single sample. For example, ``samples[0, :]`` is the parameter values for the first sample. * ``logprior``: 1-d `~numpy.ndarray` (length=nsamples); log(prior volume) for each sample. * ``logl``: 1-d `~numpy.ndarray` (length=nsamples); log(likelihood) for each sample. * ``weights``: 1-d `~numpy.ndarray` (length=nsamples); Weight corresponding to each sample. The weight is proportional to the prior * likelihood for the sample. * ``parameters``: 1-d `~numpy.ndarray` of weighted-mean parameter values from samples (including fixed parameters). Order corresponds to ``model.param_names``. * ``covariance``: 2-d `~numpy.ndarray` of parameter covariance; indicies correspond to order of ``vparam_names``. Calculated from ``samples`` and ``weights``. * ``errors``: OrderedDict of varied parameter uncertainties. Corresponds to square root of diagonal entries in covariance matrix. * ``ndof``: Number of degrees of freedom (len(data) - len(vparam_names)). * ``bounds``: Dictionary of bounds on varied parameters (including any automatically determined bounds). estimated_model : `~sncosmo.Model` A copy of the model with parameters set to the values in ``res.parameters``. Notes ----- The algorithm uses the numpy random number generator to generate samples, and is therefore non-deterministic in default use. To get reproducible results, simply seed the random number generator before calling `nest_lc`: >>> import numpy as np >>> np.random.seed(0) """ data = standardize_data(data) model = copy.copy(model) bounds = copy.copy(bounds) # need to copy this dict b/c we modify it below # Drop data that the model doesn't cover. data = cut_bands(data, model, z_bounds=bounds.get('z', None)) if guess_amplitude_bound: if model.param_names[2] in bounds: raise ValueError("cannot supply bounds for parameter {0!r}" " when guess_amplitude_bound=True") # If redshift is bounded, set model redshift to midpoint of bounds # when doing the guess. if 'z' in bounds: model.set(z=sum(bounds['z']) / 2.) _, amplitude = guess_t0_and_amplitude(data, model, minsnr) bounds[model.param_names[2]] = (0., 10. * amplitude) # Find t0 bounds to use, if not explicitly given if 't0' in vparam_names and 't0' not in bounds: bounds['t0'] = t0_bounds(data, model) res = _nest_lc(data, model, vparam_names, modelcov=modelcov, bounds=bounds, priors=priors, ppfs=ppfs, nobj=nobj, maxiter=maxiter, maxcall=maxcall, verbose=verbose) res.bounds = bounds # calculate weighted average and weighted covariance matrix of samples vparameters, cov = weightedcov(res['samples'], res['weights']) model.set(**dict(zip(vparam_names, vparameters))) res.parameters = model.parameters.copy() res.covariance = cov res.errors = np.sqrt(np.diagonal(cov)) # backwards compatibility. TODO remove these in a future release. res.param_names = res.vparam_names res.param_dict = odict(zip(model.param_names, model.parameters)) return res, model
#!/usr/bin/env python import numpy as np import sncosmo from astropy.utils import OrderedDict as odict from astropy.table import Table model = sncosmo.Model(source='salt2') model.set(z=0.5, c=0.2, t0=55100., x1=0.5) model.set_source_peakabsmag(-19.5, 'bessellb', 'ab') times = np.linspace(55070., 55150., 40) bands = np.array(10 * ['sdssg', 'sdssr', 'sdssi', 'sdssz']) zp = 25. * np.ones(40) zpsys = np.array(40 * ['ab']) flux = model.bandflux(bands, times, zp=zp, zpsys=zpsys) fluxerr = (0.05 * np.max(flux)) * np.ones(40, dtype=np.float) flux += fluxerr * np.random.randn(40) data = Table(odict([('time', times), ('band', bands), ('flux', flux), ('fluxerr', fluxerr), ('zp', zp), ('zpsys', zpsys)]), meta=dict(zip(model.param_names, model.parameters))) sncosmo.write_lc(data, 'example_photometric_data.dat')
def read_snana_fits(head_file, phot_file, snids=None, n=None): """Read the SNANA FITS format: two FITS files jointly representing metadata and photometry for a set of SNe. Parameters ---------- head_file : str Filename of "HEAD" ("header") FITS file. phot_file : str Filename of "PHOT" ("photometry") FITS file. snids : list of str, optional If given, only return the single entry with the matching SNIDs. n : int If given, only return the first `n` entries. Returns ------- sne : list of `~astropy.table.Table` Each item in the list is an astropy Table instance. Notes ----- If `head_file` contains a column 'SNID' containing strings, leading and trailing whitespace is stripped from all the values in that column. If `phot_file` contains a column 'FLT', leading and trailing whitespace is stripped from all the values in that column. Examples -------- >>> sne = read_snana_fits('HEAD.fits', 'PHOT.fits') # doctest: +SKIP >>> for sn in sne: # doctest: +SKIP ... sn.meta # Metadata in an OrderedDict. # doctest: +SKIP ... sn['MJD'] # MJD column # doctest: +SKIP """ # Should we memmap? Only if we're going to read only a part of the file memmap = (snids is not None or n is not None) # Get metadata for all the SNe head_data = fits.getdata(head_file, 1, view=np.ndarray) phot_data = fits.getdata(phot_file, 1, view=np.ndarray, memmap=memmap) # Strip trailing whitespace characters from SNID. if 'SNID' in head_data.dtype.names: try: head_data['SNID'][:] = np.char.strip(head_data['SNID']) except TypeError: pass # Check which indicies to return. if snids is None and n is None: idx = range(len(head_data)) elif n is None: if 'SNID' not in head_data.dtype.names: raise RuntimeError('Specific snids requested, but head file does' ' not contain SNID column') idx = [] for snid in snids: i = np.flatnonzero(head_data['SNID'] == snid) if len(i) != 1: raise RuntimeError('Unique snid requested, but there are ' '{0:d} matching entries'.format(len(i))) idx.append(i[0]) elif snids is None: idx = range(n) else: raise ValueError("cannot specify both 'snids' and 'n' arguments") # Loop over SNe in HEAD file sne = [] for i in idx: meta = odict(zip(head_data.dtype.names, head_data[i])) j0 = head_data['PTROBS_MIN'][i] - 1 j1 = head_data['PTROBS_MAX'][i] data = phot_data[j0:j1] if 'FLT' in data.dtype.names: data['FLT'][:] = np.char.strip(data['FLT']) sne.append(Table(data, meta=meta, copy=False)) return sne
def nest_lc(data, model, vparam_names, bounds, guess_amplitude_bound=False, minsnr=5., priors=None, ppfs=None, nobj=100, maxiter=10000, maxcall=1000000, modelcov=False, verbose=False): """Run nested sampling algorithm to estimate model parameters and evidence. Parameters ---------- data : `~astropy.table.Table` or `~numpy.ndarray` or `dict` Table of photometric data. Must include certain columns. See the "Photometric Data" section of the documentation for required columns. model : `~sncosmo.Model` The model to fit. vparam_names : list Model parameters to vary in the fit. bounds : `dict` Bounded range for each parameter. Bounds must be given for each parameter, with the exception of ``t0``: by default, the minimum bound is such that the latest phase of the model lines up with the earliest data point and the maximum bound is such that the earliest phase of the model lines up with the latest data point. guess_amplitude_bound : bool, optional If true, bounds for the model's amplitude parameter are determined automatically based on the data and do not need to be included in `bounds`. The lower limit is set to zero and the upper limit is 10 times the amplitude "guess" (which is based on the highest-flux data point in any band). Default is False. minsnr : float, optional Minimum signal-to-noise ratio of data points to use when guessing amplitude bound. Default is 5. priors : `dict`, optional Prior probability distribution function for each parameter. The keys should be parameter names and the values should be callables that accept a float. If a parameter is not in the dictionary, the prior defaults to a flat distribution between the bounds. ppfs : `dict`, optional Prior percent point function (inverse of the cumulative distribution function) for each parameter. If a parameter is in this dictionary, the ppf takes precedence over a prior pdf specified in ``priors``. nobj : int, optional Number of objects (e.g., concurrent sample points) to use. Increasing nobj increases the accuracy (due to denser sampling) and also the time to solution. maxiter : int, optional Maximum number of iterations. Default is 10000. maxcall : int, optional Maximum number of likelihood evaluations. Default is 1000000. modelcov : bool, optional Include model covariance when calculating chisq. Default is False. verbose : bool, optional Returns ------- res : Result Attributes are: * ``niter``: total number of iterations * ``ncall``: total number of likelihood function calls * ``time``: time in seconds spent in iteration loop. * ``logz``: natural log of the Bayesian evidence Z. * ``logzerr``: estimate of uncertainty in logz (due to finite sampling) * ``h``: Bayesian information. * ``vparam_names``: list of parameter names varied. * ``samples``: 2-d `~numpy.ndarray`, shape is (nsamples, nparameters). Each row is the parameter values for a single sample. For example, ``samples[0, :]`` is the parameter values for the first sample. * ``logprior``: 1-d `~numpy.ndarray` (length=nsamples); log(prior volume) for each sample. * ``logl``: 1-d `~numpy.ndarray` (length=nsamples); log(likelihood) for each sample. * ``weights``: 1-d `~numpy.ndarray` (length=nsamples); Weight corresponding to each sample. The weight is proportional to the prior * likelihood for the sample. * ``parameters``: 1-d `~numpy.ndarray` of weighted-mean parameter values from samples (including fixed parameters). Order corresponds to ``model.param_names``. * ``covariance``: 2-d `~numpy.ndarray` of parameter covariance; indicies correspond to order of ``vparam_names``. Calculated from ``samples`` and ``weights``. * ``errors``: OrderedDict of varied parameter uncertainties. Corresponds to square root of diagonal entries in covariance matrix. * ``ndof``: Number of degrees of freedom (len(data) - len(vparam_names)). * ``bounds``: Dictionary of bounds on varied parameters (including any automatically determined bounds). estimated_model : `~sncosmo.Model` A copy of the model with parameters set to the values in ``res.parameters``. Notes ----- The algorithm uses the numpy random number generator to generate samples, and is therefore non-deterministic in default use. To get reproducible results, simply seed the random number generator before calling `nest_lc`: >>> import numpy as np >>> np.random.seed(0) """ data = standardize_data(data) model = copy.copy(model) bounds = copy.copy(bounds) # need to copy this dict b/c we modify it below # Order vparam_names the same way it is ordered in the model: vparam_names = [s for s in model.param_names if s in vparam_names] # Drop data that the model doesn't cover. data = cut_bands(data, model, z_bounds=bounds.get('z', None)) if guess_amplitude_bound: if model.param_names[2] not in vparam_names: raise ValueError("Amplitude bounds guessing enabled but " "amplitude parameter {0!r} is not varied" .format(model.param_names[2])) if model.param_names[2] in bounds: raise ValueError("cannot supply bounds for parameter {0!r}" " when guess_amplitude_bound=True" .format(model.param_names[2])) # If redshift is bounded, set model redshift to midpoint of bounds # when doing the guess. if 'z' in bounds: model.set(z=sum(bounds['z']) / 2.) _, amplitude = guess_t0_and_amplitude(data, model, minsnr) bounds[model.param_names[2]] = (0., 10. * amplitude) # Find t0 bounds to use, if not explicitly given if 't0' in vparam_names and 't0' not in bounds: bounds['t0'] = t0_bounds(data, model) res = _nest_lc(data, model, vparam_names, modelcov=modelcov, bounds=bounds, priors=priors, ppfs=ppfs, nobj=nobj, maxiter=maxiter, maxcall=maxcall, verbose=verbose) res.bounds = bounds # calculate weighted average and weighted covariance matrix of samples vparameters, cov = weightedcov(res['samples'], res['weights']) model.set(**dict(zip(vparam_names, vparameters))) res.parameters = model.parameters.copy() res.covariance = cov res.errors = odict(zip(vparam_names, np.sqrt(np.diagonal(cov)))) res.param_dict = odict(zip(model.param_names, model.parameters)) # TODO remove/change in a future release. depmsg = ("The `param_names` attribute is deprecated in sncosmo v1.0 " "and will be changed in v1.1. Use `vparam_names` instead.") res.__dict__['deprecated']['param_names'] = (res.vparam_names, depmsg) return res, model
def standardize_data(data): """Standardize photometric data by converting to a structured numpy array with standard column names (if necessary) and sorting entries in order of increasing time. Parameters ---------- data : `~astropy.table.Table`, `~numpy.ndarray` or dict Returns ------- standardized_data : `~numpy.ndarray` """ if isinstance(data, Table): data = np.asarray(data) if isinstance(data, np.ndarray): colnames = data.dtype.names # Check if the data already complies with what we want # (correct column names & ordered by date) if (set(colnames) == set(_photdata_aliases.keys()) and np.all(np.ediff1d(data['time']) >= 0.)): return data elif isinstance(data, dict): colnames = data.keys() else: raise ValueError('Unrecognized data type') # Create mapping from lowercased column names to originals lower_to_orig = dict([(colname.lower(), colname) for colname in colnames]) # Set of lowercase column names lower_colnames = set(lower_to_orig.keys()) orig_colnames_to_use = [] for aliases in _photdata_aliases.values(): i = lower_colnames & aliases if len(i) != 1: raise ValueError('Data must include exactly one column from {0} ' '(case independent)'.format(', '.join(aliases))) orig_colnames_to_use.append(lower_to_orig[i.pop()]) if isinstance(data, np.ndarray): new_data = data[orig_colnames_to_use].copy() new_data.dtype.names = _photdata_aliases.keys() else: new_data = odict() for newkey, oldkey in zip(_photdata_aliases.keys(), orig_colnames_to_use): new_data[newkey] = data[oldkey] new_data = dict_to_array(new_data) # Sort by time, if necessary. if not np.all(np.ediff1d(new_data['time']) >= 0.): new_data.sort(order=['time']) return new_data
def read_snana_ascii(fname, default_tablename=None): """Read an SNANA-format ascii file. Such files may contain metadata lines and one or more tables. See Notes for a summary of the format. Parameters ---------- fname : str Filename of object to read. default_tablename : str, optional Default tablename, or the string that indicates a table row, when a table starts with 'NVAR:' rather than 'NVAR_TABLENAME:'. array : bool, optional If True, each table is converted to a numpy array. If False, each table is a dictionary of lists (each list is a column). Default is True. Returns ------- meta : OrderedDict Metadata from keywords. tables : dict of `~astropy.table.Table` Tables, indexed by table name. Notes ----- The file can contain one or more tables, as well as optional metadata. Here is an example of the expected format:: META1: a META2: 6 NVAR_SN: 3 VARNAMES: A B C SN: 1 2.0 x SN: 4 5.0 y Behavior: * Any strings ending in a colon (:) are treated as keywords. * The start of a new table is indicated by a keyword starting with 'NVAR'. * If the 'NVAR' is followed by an underscore (e.g., 'NVAR_TABLENAME'), then 'TABLENAME' is taken to be the name of the table. Otherwise the user *must specify* a ``default_tablename``. This is because data rows are identified by the tablename. * After a keyword starting with 'NVAR', the next keyword must be 'VARNAMES'. The strings following give the column names. * Any other keywords anywhere in the file are treated as metadata. The first string after the keyword is treated as the value for that keyword. * **Note:** Newlines are treated as equivalent to spaces; they do not indicate a new row. Examples -------- >>> from StringIO import StringIO # StringIO behaves like a file >>> f = StringIO('META1: a\\n' ... 'META2: 6\\n' ... 'NVAR_SN: 3\\n' ... 'VARNAMES: A B C\\n' ... 'SN: 1 2.0 x\\n' ... 'SN: 4 5.0 y\\n') ... >>> meta, tables = read_snana_ascii(f) The first object is a dictionary of metadata: >>> meta OrderedDict([('META1', 'a'), ('META2', 6)]) The second is a dictionary of all the tables in the file: >>> tables['SN'] <Table rows=2 names=('A','B','C')> array([(1, 2.0, 'x'), (4, 5.0, 'y')], dtype=[('A', '<i8'), ('B', '<f8'), ('C', 'S1')]) If the file had an 'NVAR' keyword rather than 'NVAR_SN', for example:: NVAR: 3 VARNAMES: A B C SN: 1 2.0 x SN: 4 5.0 y SN: 5 8.2 z it can be read by supplying a default table name: >>> meta, tables = read_snana_ascii(f, default_tablename='SN') ... # doctest: +SKIP """ meta = odict() # initialize structure to hold metadata. tables = {} # initialize structure to hold data. if isinstance(fname, basestring): fh = open(fname, 'U') else: fh = fname words = fh.read().split() fh.close() i = 0 nvar = None tablename = None while i < len(words): word = words[i] # If the word starts with 'NVAR', we are starting a new table. if word.startswith('NVAR'): nvar = int(words[i + 1]) # Infer table name. The name will be used to designate a data row. if '_' in word: pos = word.find('_') + 1 tablename = word[pos:].rstrip(':') elif default_tablename is not None: tablename = default_tablename else: raise ValueError( 'Table name must be given as part of NVAR keyword so ' 'that rows belonging to this table can be identified. ' 'Alternatively, supply the default_tablename keyword.') table = odict() tables[tablename] = table i += 2 # If the word starts with 'VARNAMES', the following `nvar` words # define the column names of the table. elif word.startswith('VARNAMES'): # Check that nvar is defined and that no column names are defined # for the current table. if nvar is None or len(table) > 0: raise Exception('NVAR must directly precede VARNAMES') # Read the column names for j in range(i + 1, i + 1 + nvar): table[words[j]] = [] i += nvar + 1 # If the word matches the current tablename, we are reading a data row. elif word.rstrip(':') == tablename: for j, colname in enumerate(table.keys()): table[colname].append(words[i + 1 + j]) i += nvar + 1 # Otherwise, we are reading metadata or some comment # If the word ends with ":", it is metadata. elif word[-1] == ':': name = word[:-1] # strip off the ':' if len(words) >= i + 2: try: val = int(words[i + 1]) except ValueError: try: val = float(words[i + 1]) except ValueError: val = words[i + 1] meta[name] = val else: meta[name] = None i += 2 else: # It is some comment; continue onto next word. i += 1 # All values in each column are currently strings. Convert to int or # float if possible. for table in tables.values(): for colname, values in table.iteritems(): try: table[colname] = [int(val) for val in values] except ValueError: try: table[colname] = [float(val) for val in values] except ValueError: pass # All tables are dictionaries. Convert them to Tables for tablename in tables.keys(): tables[tablename] = Table(tables[tablename]) return meta, tables
def read_snana_simlib(fname): """Read an SNANA 'simlib' (simulation library) ascii file. Parameters ---------- fname : str Filename. Returns ------- meta : `OrderedDict` Global meta data, not associated with any one LIBID. observation_sets : `OrderedDict` of `astropy.table.Table` keys are LIBIDs, values are observation sets. Notes ----- * Anything following '#' on each line is ignored as a comment. * Keywords are space separated strings ending wth a colon. * If a line starts with 'LIBID:', the following lines are associated with the value of LIBID, until 'END_LIBID:' is encountered. * While reading a given LIBID, lines starting with 'S' or 'T' keywords are assumed to contain 12 space-separated values after the keyword. These are (1) MJD, (2) IDEXPT, (3) FLT, (4) CCD GAIN, (5) CCD NOISE, (6) SKYSIG, (7) PSF1, (8) PSF2, (9) PSF 2/1 RATIO, (10) ZPTAVG, (11) ZPTSIG, (12) MAG. * Other lines inside a 'LIBID:'/'END_LIBID:' pair are treated as metadata for that LIBID. * Any other keywords outside a 'LIBID:'/'END_LIBID:' pair are treated as global header keywords and are returned in the `meta` dictionary. Examples -------- >>> meta, obs_sets = read_snana_simlib('filename') # doctest: +SKIP The second object is a dictionary of astropy Tables indexed by LIBID: >>> obs_sets.keys() # doctest: +SKIP [0, 1, 2, 3, 4] Each table (libid) has metadata: >>> obs_sets[0].meta # doctest: +SKIP OrderedDict([('LIBID', 0), ('RA', 52.5), ('DECL', -27.5), ('NOBS', 161), ('MWEBV', 0.0), ('PIXSIZE', 0.27)]) Each table has the following columns: >>> obs_sets[0].colnames # doctest: +SKIP ['SEARCH', 'MJD', 'IDEXPT', 'FLT', 'CCD_GAIN', 'CCD_NOISE', 'SKYSIG', 'PSF1', 'PSF2', 'PSFRATIO', 'ZPTAVG', 'ZPTSIG', 'MAG'] """ from astropy.table import Table COLNAMES = ['SEARCH', 'MJD', 'IDEXPT', 'FLT', 'CCD_GAIN', 'CCD_NOISE', 'SKYSIG', 'PSF1', 'PSF2', 'PSFRATIO', 'ZPTAVG', 'ZPTSIG', 'MAG'] # Not used yet... if present in header, add to table. SPECIAL = ['FIELD', 'TELESCOPE', 'PIXSIZE'] meta = odict() # global metadata observation_sets = odict() # dictionary of tables indexed by LIBID reading_obsset = False with open(fname, 'r') as infile: for line in infile.readlines(): # strip comments idx = line.find('#') if idx != -1: line = line[0:idx] # split on spaces. words = line.split() if len(words) == 0: continue # are we currently reading an obsset? if not reading_obsset: if line[0:6] == 'LIBID:': reading_obsset = True current_meta = _parse_meta_from_line(line) current_data = odict([(key, []) for key in COLNAMES]) else: meta.update(_parse_meta_from_line(line)) else: if line[0:10] == 'END_LIBID:': reading_obsset = False observation_sets[current_meta['LIBID']] = \ Table(current_data, meta=current_meta) elif line[0:2] in ['S:', 'T:']: words = line.split() for colname, val in [('SEARCH', words[0] == 'S:'), ('MJD', float(words[1])), ('IDEXPT', int(words[2])), ('FLT', words[3]), ('CCD_GAIN', float(words[4])), ('CCD_NOISE', float(words[5])), ('SKYSIG', float(words[6])), ('PSF1', float(words[7])), ('PSF2', float(words[8])), ('PSFRATIO', float(words[9])), ('ZPTAVG', float(words[10])), ('ZPTSIG', float(words[11])), ('MAG', float(words[12]))]: current_data[colname].append(val) else: current_meta.update(_parse_meta_from_line(line)) return meta, observation_sets
def mcmc_lc(data, model, vparam_names, bounds=None, priors=None, guess_amplitude=True, guess_t0=True, guess_z=True, minsnr=5., modelcov=False, nwalkers=10, nburn=200, nsamples=1000, thin=1, a=2.0, flux_cov=None, fixed_mcov=None): """Run an MCMC chain to get model parameter samples. This is a convenience function around `emcee.EnsembleSampler`. It defines the likelihood function and makes a heuristic guess at a good set of starting points for the walkers. It then runs the sampler, starting with a burn-in run. If you're not getting good results, you might want to try increasing the burn-in, increasing the walkers, or specifying a better starting position. To get a better starting position, you could first run `~sncosmo.fit_lc`, then run this function with all ``guess_[name]`` keyword arguments set to False, so that the current model parameters are used as the starting point. Parameters ---------- data : `~astropy.table.Table` or `~numpy.ndarray` or `dict` Table of photometric data. Must include certain columns. See the "Photometric Data" section of the documentation for required columns. model : `~sncosmo.Model` The model to fit. vparam_names : iterable Model parameters to vary. bounds : `dict`, optional Bounded range for each parameter. Keys should be parameter names, values are tuples. If a bound is not given for some parameter, the parameter is unbounded. The exception is ``t0``: by default, the minimum bound is such that the latest phase of the model lines up with the earliest data point and the maximum bound is such that the earliest phase of the model lines up with the latest data point. priors : `dict`, optional Prior probability functions. Keys are parameter names, values are functions that return probability given the parameter value. The default prior is a flat distribution. guess_amplitude : bool, optional Whether or not to guess the amplitude from the data. If false, the current model amplitude is taken as the initial value. Only has an effect when fitting amplitude. Default is True. guess_t0 : bool, optional Whether or not to guess t0. Only has an effect when fitting t0. Default is True. guess_z : bool, optional Whether or not to guess z (redshift). Only has an effect when fitting redshift. Default is True. minsnr : float, optional When guessing amplitude and t0, only use data with signal-to-noise ratio (flux / fluxerr) greater than this value. Default is 5. modelcov : bool, optional Include model covariance when calculating chisq. Default is False. nwalkers : int, optional Number of walkers in the EnsembleSampler nburn : int, optional Number of samples in burn-in phase. nsamples : int, optional Number of samples in production run. thin : int, optional Factor by which to thin samples in production run. Output samples array will have (nsamples/thin) samples. a : float, optional Proposal scale parameter passed to the EnsembleSampler. flux_cov : NxN matrix [where N = len(data)], optional Covariance matrix of fluxes. When used, data["fluxerr"] is ignored fixed_mcov : NxN matrix [where N = len(data)], optional Overwrite mcov of fit during with this; can be used to fix mcov while fitting Returns ------- res : Result Has the following attributes: * ``param_names``: All parameter names of model, including fixed. * ``parameters``: Model parameters, with varied parameters set to mean value in samples. * ``vparam_names``: Names of parameters varied. Order of parameters matches order of samples. * ``samples``: 2-d array with shape ``(N, len(vparam_names))``. Order of parameters in each row matches order in ``res.vparam_names``. * ``covariance``: 2-d array giving covariance, measured from samples. Order corresponds to ``res.vparam_names``. * ``errors``: dictionary giving square root of diagonal of covariance matrix for varied parameters. Useful for ``plot_lc``. * ``mean_acceptance_fraction``: mean acceptance fraction for all walkers in the sampler. est_model : `~sncosmo.Model` Copy of input model with varied parameters set to mean value in samples. """ try: import emcee except ImportError: raise ImportError("mcmc_lc() requires the emcee package.") # Standardize and normalize data. data = standardize_data(data) data = normalize_data(data) # Make a copy of the model so we can modify it with impunity. model = copy.copy(model) if bounds is None: bounds = {} if priors is None: priors = {} # Check that vparam_names isn't empty, check for unknown parameters. if len(vparam_names) == 0: raise ValueError("no parameters supplied") for names in (vparam_names, bounds, priors): for name in names: if name not in model.param_names: raise ValueError("Parameter not in model: " + repr(name)) # Order vparam_names the same way it is ordered in the model: vparam_names = [s for s in model.param_names if s in vparam_names] ndim = len(vparam_names) # Check that 'z' is bounded (if it is going to be fit). if 'z' in vparam_names: if 'z' not in bounds or None in bounds['z']: raise ValueError('z must be bounded if allowed to vary.') if guess_z: model.set(z=sum(bounds['z']) / 2.) if model.get('z') < bounds['z'][0] or model.get('z') > bounds['z'][1]: raise ValueError('z out of range.') # Cut bands that are not allowed by the wavelength range of the model. data = cut_bands(data, model, z_bounds=bounds.get('z', None)) # Find t0 bounds to use, if not explicitly given if 't0' in vparam_names and 't0' not in bounds: bounds['t0'] = t0_bounds(data, model) # Note that in the parameter guessing below, we assume that the source # amplitude is the 3rd parameter of the Model (1st parameter of the Source) # Turn off guessing if we're not fitting the parameter. if model.param_names[2] not in vparam_names: guess_amplitude = False if 't0' not in vparam_names: guess_t0 = False # Make guesses for t0 and amplitude. # (we assume amplitude is the 3rd parameter of the model.) if guess_amplitude or guess_t0: t0, amplitude = guess_t0_and_amplitude(data, model, minsnr) if guess_amplitude: model.parameters[2] = amplitude if guess_t0: model.set(t0=t0) # Indicies used in probability function. # modelidx: Indicies of model parameters corresponding to vparam_names. # idxbounds: tuples of (varied parameter index, low bound, high bound). # idxpriors: tuples of (varied parameter index, function). modelidx = np.array([model.param_names.index(k) for k in vparam_names]) idxbounds = [(vparam_names.index(k), bounds[k][0], bounds[k][1]) for k in bounds] idxpriors = [(vparam_names.index(k), priors[k]) for k in priors] # Posterior function. def lnprob(parameters): for i, low, high in idxbounds: if not low < parameters[i] < high: return -np.inf model.parameters[modelidx] = parameters logp = -0.5 * _chisq(data, model, modelcov=modelcov, flux_cov=flux_cov, fixed_mcov=fixed_mcov) for i, func in idxpriors: logp += math.log(func(parameters[i])) return logp # Heuristic determination of walker initial positions: # distribute walkers in a symmetric gaussian ball, with heuristically # determined scale. ctr = model.parameters[modelidx] scale = np.ones(ndim) for i, name in enumerate(vparam_names): if name in bounds: scale[i] = 0.0001 * (bounds[name][1] - bounds[name][0]) elif model.get(name) != 0.: scale[i] = 0.01 * model.get(name) else: scale[i] = 0.1 pos = ctr + scale * np.random.normal(size=(nwalkers, ndim)) # Run the sampler. sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, a=a) pos, prob, state = sampler.run_mcmc(pos, nburn) # burn-in sampler.reset() sampler.run_mcmc(pos, nsamples, thin=thin) # production run samples = sampler.flatchain # Summary statistics. vparameters = np.mean(samples, axis=0) cov = np.cov(samples, rowvar=0) model.set(**dict(zip(vparam_names, vparameters))) errors = odict(zip(vparam_names, np.sqrt(np.diagonal(cov)))) mean_acceptance_fraction = np.mean(sampler.acceptance_fraction) res = Result(param_names=copy.copy(model.param_names), parameters=model.parameters.copy(), vparam_names=vparam_names, samples=samples, covariance=cov, errors=errors, mean_acceptance_fraction=mean_acceptance_fraction) return res, model
#!/usr/bin/env python import numpy as np import sncosmo from astropy.utils import OrderedDict as odict from astropy.table import Table model = sncosmo.ObsModel(source='salt2') model.set(z=0.5, c=0.2, t0=55100., x1=0.5) model.set_source_peakabsmag(-19.5, 'bessellb', 'ab') times = np.linspace(55070., 55150., 40) bands = np.array(10 * ['sdssg', 'sdssr', 'sdssi', 'sdssz']) zp = 25. * np.ones(40) zpsys = np.array(40 * ['ab']) flux = model.bandflux(bands, times, zp=zp, zpsys=zpsys) fluxerr = (0.05 * np.max(flux)) * np.ones(40, dtype=np.float) flux += fluxerr * np.random.randn(40) data = Table(odict([('time', times), ('band', bands), ('flux', flux), ('fluxerr', fluxerr), ('zp', zp), ('zpsys', zpsys)]), meta=dict(zip(model.param_names, model.parameters))) sncosmo.write_lc(data, 'example_photometric_data.dat')
def _read_salt2_old(dirname, **kwargs): """Read old-style SALT2 files from a directory. A file named 'lightfile' must exist in the directory. """ filenames = kwargs.get('filenames', None) # Get list of files in directory. if not (os.path.exists(dirname) and os.path.isdir(dirname)): raise IOError("Not a directory: '{0}'".format(dirname)) dirfilenames = os.listdir(dirname) # Read metadata from lightfile. if 'lightfile' not in dirfilenames: raise IOError("no lightfile in directory: '{0}'".format(dirname)) with open(os.path.join(dirname, 'lightfile'), 'r') as lightfile: meta = odict() for line in lightfile.readlines(): line = line.strip() if len(line) == 0: continue try: key, val = line.split() except ValueError: raise ValueError('expected space-separated key value pairs in ' 'lightfile: {0}' .format(os.path.join(dirname, 'lightfile'))) meta[key] = val # Get list of filenames to read. if filenames is None: filenames = dirfilenames if 'lightfile' in filenames: filenames.remove('lightfile') # We already read the lightfile. fullfilenames = [os.path.join(dirname, f) for f in filenames] # Read data from files. data = None for fname in fullfilenames: with open(fname, 'r') as f: filemeta, filedata = _read_salt2(f) # Check that all necessary file metadata was defined. if not ('INSTRUMENT' in filemeta and 'BAND' in filemeta and 'MAGSYS' in filemeta): raise ValueError('not all necessary global keys (INSTRUMENT, ' 'BAND, MAGSYS) are defined in file {0}' .format(fname)) # Add the instrument/band to the file data, in anticipation of # aggregating it with other files. firstkey = filedata.keys()[0] data_length = len(filedata[firstkey]) filter_name = '{0}::{1}'.format(filemeta.pop('INSTRUMENT'), filemeta.pop('BAND')) filedata['Filter'] = data_length * [filter_name] filedata['MagSys'] = data_length * [filemeta.pop('MAGSYS')] # If this if the first file, initialize data lists, otherwise if keys # match, append this file's data to the main data. if data is None: data = filedata elif set(filedata.keys()) == set(data.keys()): for key in data: data[key].extend(filedata[key]) else: raise ValueError('column names do not match between files') # Append any extra metadata in this file to the master metadata. if len(filemeta) > 0: meta[filter_name] = filemeta return meta, data
"""Convenience functions for photometric data.""" from __future__ import division import math import numpy as np from astropy.utils import OrderedDict as odict from astropy.table import Table from astropy.extern import six from .spectral import get_magsystem, get_bandpass _photdata_aliases = odict([ ('time', set(['time', 'date', 'jd', 'mjd', 'mjdobs', 'mjd_obs'])), ('band', set(['band', 'bandpass', 'filter', 'flt'])), ('flux', set(['flux', 'f'])), ('fluxerr', set(['fluxerr', 'fe', 'fluxerror', 'flux_error', 'flux_err'])), ('zp', set(['zp', 'zpt', 'zeropoint', 'zero_point'])), ('zpsys', set(['zpsys', 'zpmagsys', 'magsys'])) ]) # Descriptions for docstring only. _photdata_descriptions = { 'time': 'Time of observation in days', 'band': 'Bandpass of observation', 'flux': 'Flux of observation', 'fluxerr': 'Gaussian uncertainty on flux', 'zp': 'Zeropoint corresponding to flux', 'zpsys': 'Magnitude system for zeropoint' } _photdata_types = { 'time': 'float',
def nest_lc(data, model, vparam_names, bounds, guess_amplitude_bound=False, minsnr=5., priors=None, ppfs=None, npoints=100, method='single', maxiter=None, maxcall=None, modelcov=False, rstate=None, verbose=False, **kwargs): """Run nested sampling algorithm to estimate model parameters and evidence. Parameters ---------- data : `~astropy.table.Table` or `~numpy.ndarray` or `dict` Table of photometric data. Must include certain columns. See the "Photometric Data" section of the documentation for required columns. model : `~sncosmo.Model` The model to fit. vparam_names : list Model parameters to vary in the fit. bounds : `dict` Bounded range for each parameter. Bounds must be given for each parameter, with the exception of ``t0``: by default, the minimum bound is such that the latest phase of the model lines up with the earliest data point and the maximum bound is such that the earliest phase of the model lines up with the latest data point. guess_amplitude_bound : bool, optional If true, bounds for the model's amplitude parameter are determined automatically based on the data and do not need to be included in `bounds`. The lower limit is set to zero and the upper limit is 10 times the amplitude "guess" (which is based on the highest-flux data point in any band). Default is False. minsnr : float, optional Minimum signal-to-noise ratio of data points to use when guessing amplitude bound. Default is 5. priors : `dict`, optional Prior probability distribution function for each parameter. The keys should be parameter names and the values should be callables that accept a float. If a parameter is not in the dictionary, the prior defaults to a flat distribution between the bounds. ppfs : `dict`, optional Prior percent point function (inverse of the cumulative distribution function) for each parameter. If a parameter is in this dictionary, the ppf takes precedence over a prior pdf specified in ``priors``. npoints : int, optional Number of active samples to use. Increasing this value increases the accuracy (due to denser sampling) and also the time to solution. method : {'classic', 'single', 'multi'}, optional Method used to select new points. Choices are 'classic', single-ellipsoidal ('single'), multi-ellipsoidal ('multi'). Default is 'single'. maxiter : int, optional Maximum number of iterations. Iteration may stop earlier if termination condition is reached. Default is no limit. modelcov : bool, optional Include model covariance when calculating chisq. Default is False. rstate : `~numpy.random.RandomState`, optional RandomState instance. If not given, the global random state of the ``numpy.random`` module will be used. verbose : bool, optional Print running evidence sum on a single line. Returns ------- res : Result Attributes are: * ``niter``: total number of iterations * ``ncall``: total number of likelihood function calls * ``time``: time in seconds spent in iteration loop. * ``logz``: natural log of the Bayesian evidence Z. * ``logzerr``: estimate of uncertainty in logz (due to finite sampling) * ``h``: Bayesian information. * ``vparam_names``: list of parameter names varied. * ``samples``: 2-d `~numpy.ndarray`, shape is (nsamples, nparameters). Each row is the parameter values for a single sample. For example, ``samples[0, :]`` is the parameter values for the first sample. * ``logprior``: 1-d `~numpy.ndarray` (length=nsamples); log(prior volume) for each sample. * ``logl``: 1-d `~numpy.ndarray` (length=nsamples); log(likelihood) for each sample. * ``weights``: 1-d `~numpy.ndarray` (length=nsamples); Weight corresponding to each sample. The weight is proportional to the prior * likelihood for the sample. * ``parameters``: 1-d `~numpy.ndarray` of weighted-mean parameter values from samples (including fixed parameters). Order corresponds to ``model.param_names``. * ``covariance``: 2-d `~numpy.ndarray` of parameter covariance; indicies correspond to order of ``vparam_names``. Calculated from ``samples`` and ``weights``. * ``errors``: OrderedDict of varied parameter uncertainties. Corresponds to square root of diagonal entries in covariance matrix. * ``ndof``: Number of degrees of freedom (len(data) - len(vparam_names)). * ``bounds``: Dictionary of bounds on varied parameters (including any automatically determined bounds). estimated_model : `~sncosmo.Model` A copy of the model with parameters set to the values in ``res.parameters``. """ try: import nestle except ImportError: raise ImportError("nest_lc() requires the nestle package.") if "nobj" in kwargs: warn("The nobj keyword is deprecated and will be removed in a future " "sncosmo release. Use `npoints` instead.") npoints = kwargs.pop("nobj") # experimental parameters tied = kwargs.get("tied", None) data = standardize_data(data) model = copy.copy(model) bounds = copy.copy(bounds) # need to copy this b/c we modify it below # Order vparam_names the same way it is ordered in the model: vparam_names = [s for s in model.param_names if s in vparam_names] # Drop data that the model doesn't cover. data = cut_bands(data, model, z_bounds=bounds.get('z', None)) if guess_amplitude_bound: if model.param_names[2] not in vparam_names: raise ValueError("Amplitude bounds guessing enabled but " "amplitude parameter {0!r} is not varied" .format(model.param_names[2])) if model.param_names[2] in bounds: raise ValueError("cannot supply bounds for parameter {0!r}" " when guess_amplitude_bound=True" .format(model.param_names[2])) # If redshift is bounded, set model redshift to midpoint of bounds # when doing the guess. if 'z' in bounds: model.set(z=sum(bounds['z']) / 2.) _, amplitude = guess_t0_and_amplitude(data, model, minsnr) bounds[model.param_names[2]] = (0., 10. * amplitude) # Find t0 bounds to use, if not explicitly given if 't0' in vparam_names and 't0' not in bounds: bounds['t0'] = t0_bounds(data, model) if ppfs is None: ppfs = {} if tied is None: tied = {} # Convert bounds/priors combinations into ppfs if bounds is not None: for key, val in six.iteritems(bounds): if key in ppfs: continue # ppfs take priority over bounds/priors a, b = val if priors is not None and key in priors: # solve ppf at discrete points and return interpolating # function x_samples = np.linspace(0., 1., 101) ppf_samples = ppf(priors[key], x_samples, a, b) f = Interp1D(0., 1., ppf_samples) else: f = Interp1D(0., 1., np.array([a, b])) ppfs[key] = f # NOTE: It is important that iparam_names is in the same order # every time, otherwise results will not be reproducible, even # with same random seed. This is because iparam_names[i] is # matched to u[i] below and u will be in a reproducible order, # so iparam_names must also be. iparam_names = [key for key in vparam_names if key in ppfs] ppflist = [ppfs[key] for key in iparam_names] npdim = len(iparam_names) # length of u ndim = len(vparam_names) # length of v # Check that all param_names either have a direct prior or are tied. for name in vparam_names: if name in iparam_names: continue if name in tied: continue raise ValueError("Must supply ppf or bounds or tied for parameter '{}'" .format(name)) def prior_transform(u): d = {} for i in range(npdim): d[iparam_names[i]] = ppflist[i](u[i]) v = np.empty(ndim, dtype=np.float) for i in range(ndim): key = vparam_names[i] if key in d: v[i] = d[key] else: v[i] = tied[key](d) return v # Indicies of the model parameters in vparam_names idx = np.array([model.param_names.index(name) for name in vparam_names]) def loglike(parameters): model.parameters[idx] = parameters return -0.5 * _chisq(data, model, modelcov=modelcov) t0 = time.time() res = nestle.sample(loglike, prior_transform, ndim, npdim=npdim, npoints=npoints, method=method, maxiter=maxiter, maxcall=maxcall, rstate=rstate, callback=(nestle.print_progress if verbose else None)) elapsed = time.time() - t0 # estimate parameters and covariance from samples vparameters, cov = nestle.mean_and_cov(res.samples, res.weights) # update model parameters to estimated ones. model.set(**dict(zip(vparam_names, vparameters))) # `res` is a nestle.Result object. Collect result into a sncosmo.Result # object for consistency, and add more fields. res = Result(niter=res.niter, ncall=res.ncall, logz=res.logz, logzerr=res.logzerr, h=res.h, samples=res.samples, weights=res.weights, logvol=res.logvol, logl=res.logl, vparam_names=copy.copy(vparam_names), ndof=len(data) - len(vparam_names), bounds=bounds, time=elapsed, parameters=model.parameters.copy(), covariance=cov, errors=odict(zip(vparam_names, np.sqrt(np.diagonal(cov)))), param_dict=odict(zip(model.param_names, model.parameters))) # Deprecated result fields. depmsg = ("The `param_names` attribute is deprecated in sncosmo v1.0 " "and will be removed in a future release. " "Use `vparam_names` instead.") res.__dict__['deprecated']['param_names'] = (res.vparam_names, depmsg) depmsg = ("The `logprior` attribute is deprecated in sncosmo v1.2 " "and will be changed in a future release. " "Use `logvol` instead.") res.__dict__['deprecated']['logprior'] = (res.logvol, depmsg) return res, model
def nest_lc(data, model, param_names, bounds, guess_amplitude_bound=False, minsnr=5., priors=None, nobj=100, maxiter=10000, verbose=False): """Run nested sampling algorithm to estimate model parameters and evidence. Parameters ---------- data : `~astropy.table.Table` or `~numpy.ndarray` or `dict` Table of photometric data. Must include certain column names. model : `~sncosmo.Model` The model to fit. param_names : list Model parameters to vary in the fit. bounds : `dict` Bounded range for each parameter. Bounds must be given for each parameter, with the exception of ``t0``: by default, the minimum bound is such that the latest phase of the model lines up with the earliest data point and the maximum bound is such that the earliest phase of the model lines up with the latest data point. guess_amplitude_bound : bool, optional If true, bounds for the model's amplitude parameter are determined automatically based on the data and do not need to be included in `bounds`. The lower limit is set to zero and the upper limit is 10 times the amplitude "guess" (which is based on the highest-flux data point in any band). Default is False. minsnr : float, optional Minimum signal-to-noise ratio of data points to use when guessing amplitude bound. Default is 5. priors : dict, optional Not currently used. nobj : int, optional Number of objects (e.g., concurrent sample points) to use. Increasing nobj increases the accuracy (due to denser sampling) and also the time to solution. maxiter : int, optional Maximum number of iterations. Default is 10000. verbose : bool, optional Returns ------- res : Result Attributes are: * ``niter``: total number of iterations * ``ncall``: total number of likelihood function calls * ``time``: time in seconds spent in iteration loop. * ``logz``: natural log of the Bayesian evidence Z. * ``logzerr``: estimate of uncertainty in logz (due to finite sampling) * ``h``: Bayesian information. * ``param_names``: list of parameter names varied. * ``samples``: 2-d `~numpy.ndarray`, shape is (nsamples, nparameters). Each row is the parameter values for a single sample. For example, ``samples[0, :]`` is the parameter values for the first sample. * ``weights``: 1-d `~numpy.ndarray`, length=nsamples; Weight corresponding to each sample. The weight is proportional to the prior * likelihood for the sample. * ``logprior``: 1-d `~numpy.ndarray`, length=nsamples; log(prior volume) for each sample. * ``logl``: 1-d `~numpy.ndarray`, length=nsamples; log(likelihood) for each sample. * ``param_dict``: Dictionary of weighted average of sample parameter values (includes fixed parameters). * ``errors``: Dictionary of weighted standard deviation of sample parameter values (does not include fixed parameters). * ``bounds``: Dictionary of bounds on varied parameters (including any automatically determined bounds). * ``ndof``: Number of degrees of freedom. est_model : `~sncosmo.Model` Copy of model with parameters set to the values in ``res.param_dict``. """ data = standardize_data(data) model = copy.copy(model) bounds = copy.copy(bounds) # need to copy this dict b/c we modify it below # Find t0 bounds to use, if not explicitly given if 't0' in param_names and 't0' not in bounds: bounds['t0'] = t0_bounds(data, model) if guess_amplitude_bound: if model.param_names[2] in bounds: raise ValueError("cannot supply bounds for parameter {0!r}" " when guess_amplitude_bound=True") else: _, amplitude = guess_t0_and_amplitude(data, model, minsnr) bounds[model.param_names[2]] = (0., 10. * amplitude) res = _nest_lc(data, model, param_names, bounds=bounds, priors=priors, nobj=nobj, maxiter=maxiter, verbose=verbose) # Weighted average of samples parameters = np.average(res['samples'], weights=res['weights'], axis=0) model.set(**dict(zip(param_names, parameters))) res.param_dict = dict(zip(model.param_names, model.parameters)) # Weighted st. dev. of samples std = np.sqrt(np.sum(res['weights'][:, np.newaxis] * (res['samples']-parameters)**2, axis=0)) res.errors = odict(zip(res.param_names, std)) res.bounds = bounds return res, model
def realize_lcs(observations, model, params, thresh=None): """Realize data for a set of SNe given a set of observations. Parameters ---------- observations : `~astropy.table.Table` or `~numpy.ndarray` Table of observations. Must contain the following column names: ``band``, ``time``, ``zp``, ``zpsys``, ``gain``, ``skynoise``. model : `sncosmo.Model` The model to use in the simulation. params : list (or generator) of dict List of parameters to feed to the model for realizing each light curve. thresh : float, optional If given, light curves are skipped (not returned) if none of the data points have signal-to-noise greater than ``thresh``. Returns ------- sne : list of `~astropy.table.Table` Table of realized data for each item in ``params``. Notes ----- ``skynoise`` is the image background contribution to the flux measurement error (in units corresponding to the specified zeropoint and zeropoint system). To get the error on a given measurement, ``skynoise`` is added in quadrature to the photon noise from the source. It is left up to the user to calculate ``skynoise`` as they see fit as the details depend on how photometry is done and possibly how the PSF is is modeled. As a simple example, assuming a Gaussian PSF, and perfect PSF photometry, ``skynoise`` would be ``4 * pi * sigma_PSF * sigma_pixel`` where ``sigma_PSF`` is the standard deviation of the PSF in pixels and ``sigma_pixel`` is the background noise in a single pixel in counts. """ observations = np.asarray(observations) lcs = [] # TODO: copy model so we don't mess up the user's model? for p in params: model.set(**p) flux = model.bandflux(observations['band'], observations['time'], zp=observations['zp'], zpsys=observations['zpsys']) fluxerr = np.sqrt(observations['skynoise']**2 + np.abs(flux) / observations['gain']) # Scatter fluxes by the fluxerr flux = np.random.normal(flux, fluxerr) # Check if any of the fluxes are significant if thresh is not None and not np.any(flux/fluxerr > thresh): continue data = odict([('time', observations['time']), ('band', observations['band']), ('flux', flux), ('fluxerr', fluxerr), ('zp', observations['zp']), ('zpsys', observations['zpsys'])]) lcs.append(Table(data, meta=p)) return lcs
def simulate_vol(obs_sets, model, gen_params, vrate, cosmo=FlatLambdaCDM(H0=70., Om0=0.3), z_range=(0., 1.), default_area=1., nsim=None, nret=10, thresh=5.): """Simulate transient photometric data according to observations (EXPERIMENTAL). .. warning:: This function is experimental in v0.4 Parameters ---------- obs_sets : dict of `astropy.table.Table` A dictionary of "observation sets". Each observation set is a table of observations. See the notes section below for information on what the table must contain. model : `sncosmo.Model` The model to use in the simulation. gen_params : callable A callable that accepts a single float (redshift) and returns a dictionary on each call. Typically the callable would randomly select parameters from some underlying distribution on each call. vrate : callable A callable that returns the SN rate per comoving volume as a function of redshift, in units yr^-1 Mpc^-3. cosmo : astropy.cosmology.Cosmology, optional Cosmology used to determine volume. The default is a FlatLambdaCDM cosmology with ``Om0=0.3``, ``H0=70.``. z_range : (float, float), optional Redshift range in which to generate transients. default_area : float, optional Area in deg^2 for observation sets that do not have an 'AREA' keyword in their metadata. nsim : int, optional Number of transients to simulate. Cannot set both `nsim` and `nret`. Default is `None`. nret : int, optional Number of transients to return (number simulated that pass flux significance threshold). Cannot set both `nsim` and `nret`. Default is 10. Set both `nsim` and `nret` to `None` to let the function automatically determine the number of SNe based on the area of each observation set and the volumetric rate. thresh : float, optional Minimum flux significant threshold for a transient to be returned. Returns ------- sne : list of `~astropy.table.Table` List of tables where each table is the photometric data for a single simulated SN. Notes ----- Each ``obs_set`` (values in ``obs_sets``) must have the following columns: * ``MJD`` * ``FLT`` * ``CCD_GAIN`` * ``SKYSIG`` * ``PSF1`` * ``ZPTAVG`` These are currently just what the SIMLIB files from SNANA have. In the future these can be more flexible. Examples -------- Define a set of just three observations: >>> from astropy.table import Table >>> obs_set = Table({'MJD': [56176.19, 56188.254, 56207.172], ... 'FLT': ['desg', 'desr', 'desi'], ... 'CCD_GAIN': [1., 1., 1.], ... 'SKYSIG': [91.27, 47.62, 60.40], ... 'PSF1': [2.27, 2.5, 1.45], ... 'ZPTAVG': [32.97, 33.05, 32.49]}) >>> print obs_set MJD ZPTAVG FLT PSF1 SKYSIG CCD_GAIN --------- ------ ---- ---- ------ -------- 56176.19 32.97 desg 2.27 91.27 1.0 56188.254 33.05 desr 2.5 47.62 1.0 56207.172 32.49 desi 1.45 60.4 1.0 >>> obs_sets = {0: obs_set} Get a model and a cosmology >>> import sncosmo # doctest: +SKIP >>> from astropy import cosmology >>> model = sncosmo.Model(source='salt2-extended') # doctest: +SKIP >>> cosmo = cosmology.FlatLambdaCDM(Om0=0.3, H0=70.) Get x0 corresponding to apparent mag = -19.1: >>> model.source.set_peakmag(-19.1, 'bessellb', 'vega') # doctest: +SKIP >>> x0_0 = model.get('x0') # doctest: +SKIP Define a function that generates parameters of the model given a redshift: >>> from numpy.random import normal >>> def gen_params(z): ... x1 = normal(0., 1.) ... c = normal(0., 0.1) ... resid = normal(0., 0.15) ... hubble_offset = -0.13*x1 + 2.5*c + resid ... dm = cosmo.distmod(z).value ... x0 = x0_0 * 10**(-(dm + hubble_offset) / 2.5) ... return {'c': c, 'x1': x1, 'x0': x0} Define a volumetric SN rate in SN / yr / Mpc^3: >>> def snrate(z): ... return 0.25e-4 * (1. + 2.5 * z) Generate simulated SNe: >>> sne = simulate_vol(obs_sets, model, gen_params, snrate, cosmo=cosmo, ... nret=10) # doctest: +SKIP >>> print len(sne) # doctest: +SKIP 10 >>> print sne[0] # doctest: +SKIP date band flux fluxerr zp zpsys --------- ---- ------------- ------------- ----- ----- 56176.19 desg 780.472570859 2603.54291491 32.97 ab 56188.254 desr 17206.2994496 1501.37068134 33.05 ab 56207.172 desi 10323.4485412 1105.34529777 32.49 ab >>> print sne[0].meta # doctest: +SKIP {'z': 0.52007602908199813, 'c': -0.09298497453338518, 'x1': 1.1684716363315284, 'x0': 1.4010952818384196e-05, 't0': 56200.279703804845} """ if nsim is not None and nret is not None: raise ValueError('cannot specify both nsim and nret') # Get comoving volume in each redshift shell. z_bins = 100 # Good enough for now. z_min, z_max = z_range z_binedges = np.linspace(z_min, z_max, z_bins + 1) z_binctrs = 0.5 * (z_binedges[1:] + z_binedges[:-1]) sphere_vols = cosmo.comoving_volume(z_binedges) shell_vols = sphere_vols[1:] - sphere_vols[:-1] # SN / (observer year) in shell shell_snrate = shell_vols * vrate(z_binctrs) / (1. + z_binctrs) # SN / (observer year) within z_binedges vol_snrate = np.zeros_like(z_binedges) vol_snrate[1:] = np.add.accumulate(shell_snrate) # Create a ppf (inverse cdf). We'll use this later to get # a random SN redshift from the distribution. snrate_cdf = vol_snrate / vol_snrate[-1] snrate_ppf = Spline1d(snrate_cdf, z_binedges, k=1) # Get obs sets' data, time ranges, areas and weights. # We do this now so we can weight the location of sne # according to the area and time ranges of the observation sets. obs_sets = obs_sets.values() obs_sets_data = [np.asarray(obs_set) for obs_set in obs_sets] time_ranges = [(obs['MJD'].min() - 10. * (1. + z_max), obs['MJD'].max() + 10. * (1. + z_max)) for obs in obs_sets_data] areas = [obs_set.meta['AREA'] if 'AREA' in obs_set.meta else default_area for obs_set in obs_sets] area_time_products = [a * (t[1] - t[0]) for a, t in zip(areas, time_ranges)] total_area_time = sum(area_time_products) weights = [a_t / total_area_time for a_t in area_time_products] cumweights = np.add.accumulate(np.array(weights)) # How many to simulate? if nsim is not None: nret = 0 elif nret is not None: nsim = 0 else: nsim = total_area_time / wholesky_sqdeg * vol_snrate[-1] i = 0 sne = [] while i < nsim or len(sne) < nret: i += 1 # which obs_set did this occur in? j = 0 x = np.random.rand() while cumweights[j] < x: j += 1 obsdata = obs_sets_data[j] time_range = time_ranges[j] # Get a redshift from the distribution z = snrate_ppf(np.random.rand()) t0 = np.random.uniform(time_range[0], time_range[1]) # Get rest of parameters from user-defined gen_params(): params = gen_params(z) params.update(z=z, t0=t0) model.set(**params) # Get model fluxes flux = model.bandflux(obsdata['FLT'], obsdata['MJD'], zp=obsdata['ZPTAVG'], zpsys='ab') # Get flux errors noise_area = 4. * math.pi * obsdata['PSF1'] bkgpixnoise = obsdata['SKYSIG'] fluxerr = np.sqrt((noise_area * bkgpixnoise) ** 2 + np.abs(flux) / obsdata['CCD_GAIN']) # Scatter fluxes by the fluxerr flux = np.random.normal(flux, fluxerr) # Check if any of the fluxes are significant if not np.any((flux / fluxerr) > thresh): continue simulated_data = odict([('date', obsdata['MJD']), ('band', obsdata['FLT']), ('flux', flux), ('fluxerr', fluxerr), ('zp', obsdata['ZPTAVG']), ('zpsys', ['ab'] * len(flux))]) sne.append(Table(simulated_data, meta=params)) return sne
def read_snana_ascii(fname, default_tablename=None): """Read an SNANA-format ascii file. Such files may contain metadata lines and one or more tables. See Notes for a summary of the format. Parameters ---------- fname : str Filename of object to read. default_tablename : str, optional Default tablename, or the string that indicates a table row, when a table starts with 'NVAR:' rather than 'NVAR_TABLENAME:'. array : bool, optional If True, each table is converted to a numpy array. If False, each table is a dictionary of lists (each list is a column). Default is True. Returns ------- meta : OrderedDict Metadata from keywords. tables : dict of `~astropy.table.Table` Tables, indexed by table name. Notes ----- The file can contain one or more tables, as well as optional metadata. Here is an example of the expected format:: META1: a META2: 6 NVAR_SN: 3 VARNAMES: A B C SN: 1 2.0 x SN: 4 5.0 y Behavior: * Any strings ending in a colon (:) are treated as keywords. * The start of a new table is indicated by a keyword starting with 'NVAR'. * If the 'NVAR' is followed by an underscore (e.g., 'NVAR_TABLENAME'), then 'TABLENAME' is taken to be the name of the table. Otherwise the user *must specify* a ``default_tablename``. This is because data rows are identified by the tablename. * After a keyword starting with 'NVAR', the next keyword must be 'VARNAMES'. The strings following give the column names. * Any other keywords anywhere in the file are treated as metadata. The first string after the keyword is treated as the value for that keyword. * **Note:** Newlines are treated as equivalent to spaces; they do not indicate a new row. This is necessary because some SNANA-format files have multiple metadata on a single row or single table rows split over multiple lines, making newline characters meaningless. Examples -------- >>> from astropy.extern.six import StringIO # StringIO behaves like a file >>> f = StringIO('META1: a\\n' ... 'META2: 6\\n' ... 'NVAR_SN: 3\\n' ... 'VARNAMES: A B C\\n' ... 'SN: 1 2.0 x\\n' ... 'SN: 4 5.0 y\\n') ... >>> meta, tables = read_snana_ascii(f) The first object is a dictionary of metadata: >>> meta OrderedDict([('META1', 'a'), ('META2', 6)]) The second is a dictionary of all the tables in the file: >>> tables['SN'] # doctest: +SKIP <Table rows=2 names=('A','B','C')> array([(1, 2.0, 'x'), (4, 5.0, 'y')], dtype=[('A', '<i8'), ('B', '<f8'), ('C', 'S1')]) If the file had an 'NVAR' keyword rather than 'NVAR_SN', for example:: NVAR: 3 VARNAMES: A B C SN: 1 2.0 x SN: 4 5.0 y SN: 5 8.2 z it can be read by supplying a default table name: >>> meta, tables = read_snana_ascii(f, default_tablename='SN') ... # doctest: +SKIP """ meta = odict() # initialize structure to hold metadata. tables = {} # initialize structure to hold data. if isinstance(fname, six.string_types): fh = open(fname, 'U') else: fh = fname words = fh.read().split() fh.close() i = 0 nvar = None tablename = None while i < len(words): word = words[i] # If the word starts with 'NVAR', we are starting a new table. if word.startswith('NVAR'): nvar = int(words[i + 1]) # Infer table name. The name will be used to designate a data row. if '_' in word: pos = word.find('_') + 1 tablename = word[pos:].rstrip(':') elif default_tablename is not None: tablename = default_tablename else: raise ValueError( 'Table name must be given as part of NVAR keyword so ' 'that rows belonging to this table can be identified. ' 'Alternatively, supply the default_tablename keyword.') table = odict() tables[tablename] = table i += 2 # If the word starts with 'VARNAMES', the following `nvar` words # define the column names of the table. elif word.startswith('VARNAMES') or word.startswith('VARLIST'): # Check that nvar is defined and that no column names are defined # for the current table. if nvar is None or len(table) > 0: raise Exception('NVAR must directly precede VARNAMES') # Read the column names for j in range(i + 1, i + 1 + nvar): table[words[j]] = [] i += nvar + 1 # If the word matches the current tablename, we are reading a data row. elif word.rstrip(':') == tablename: for j, colname in enumerate(table.keys()): table[colname].append(words[i + 1 + j]) i += nvar + 1 # Otherwise, we are reading metadata or some comment # If the word ends with ":", it is metadata. elif word[-1] == ':': name = word[:-1] # strip off the ':' if len(words) >= i + 2: try: val = int(words[i + 1]) except ValueError: try: val = float(words[i + 1]) except ValueError: val = words[i + 1] meta[name] = val else: meta[name] = None i += 2 else: # It is some comment; continue onto next word. i += 1 # All values in each column are currently strings. Convert to int or # float if possible. for table in tables.values(): for colname, values in six.iteritems(table): try: table[colname] = [int(val) for val in values] except ValueError: try: table[colname] = [float(val) for val in values] except ValueError: pass # All tables are dictionaries. Convert them to Tables for tablename in tables.keys(): tables[tablename] = Table(tables[tablename]) return meta, tables
def fit_lc(data, model, vparam_names, bounds=None, method='minuit', guess_amplitude=True, guess_t0=True, guess_z=True, minsnr=5., modelcov=False, verbose=False, maxcall=10000, **kwargs): """Fit model parameters to data by minimizing chi^2. Ths function defines a chi^2 to minimize, makes initial guesses for t0 and amplitude, then runs a minimizer. Parameters ---------- data : `~astropy.table.Table` or `~numpy.ndarray` or `dict` Table of photometric data. Must include certain columns. See the "Photometric Data" section of the documentation for required columns. model : `~sncosmo.Model` The model to fit. vparam_names : list Model parameters to vary in the fit. bounds : `dict`, optional Bounded range for each parameter. Keys should be parameter names, values are tuples. If a bound is not given for some parameter, the parameter is unbounded. The exception is ``t0``: by default, the minimum bound is such that the latest phase of the model lines up with the earliest data point and the maximum bound is such that the earliest phase of the model lines up with the latest data point. guess_amplitude : bool, optional Whether or not to guess the amplitude from the data. If false, the current model amplitude is taken as the initial value. Only has an effect when fitting amplitude. Default is True. guess_t0 : bool, optional Whether or not to guess t0. Only has an effect when fitting t0. Default is True. guess_z : bool, optional Whether or not to guess z (redshift). Only has an effect when fitting redshift. Default is True. minsnr : float, optional When guessing amplitude and t0, only use data with signal-to-noise ratio (flux / fluxerr) greater than this value. Default is 5. method : {'minuit'}, optional Minimization method to use. Currently there is only one choice. modelcov : bool, optional Include model covariance when calculating chisq. Default is False. verbose : bool, optional Print messages during fitting. Returns ------- res : Result The optimization result represented as a ``Result`` object, which is a `dict` subclass with attribute access. Therefore, ``res.keys()`` provides a list of the attributes. Attributes are: - ``success``: boolean describing whether fit succeeded. - ``message``: string with more information about exit status. - ``ncall``: number of function evaluations. - ``chisq``: minimum chi^2 value. - ``ndof``: number of degrees of freedom (len(data) - len(vparam_names)). - ``param_names``: same as ``model.param_names``. - ``parameters``: 1-d `~numpy.ndarray` of best-fit values (including fixed parameters) corresponding to ``param_names``. - ``vparam_names``: list of varied parameter names. - ``covariance``: 2-d `~numpy.ndarray` of parameter covariance; indicies correspond to order of ``vparam_names``. - ``errors``: OrderedDict of varied parameter uncertainties. Corresponds to square root of diagonal entries in covariance matrix. fitmodel : `~sncosmo.Model` A copy of the model with parameters set to best-fit values. Notes ----- **t0 guess:** If ``t0`` is being fit and ``guess_t0=True``, the function will guess the initial starting point for ``t0`` based on the data. The guess is made as follows: * Evaluate the time and value of peak flux for the model in each band given the current model parameters. * Determine the data point with maximum flux in each band, for points with signal-to-noise ratio > ``minsnr`` (default is 5). If no points meet this criteria, the band is ignored (for the purpose of guessing only). * For each band, compare model's peak flux to the peak data point. Choose the band with the highest ratio of data / model. * Set ``t0`` so that the model's time of peak in the chosen band corresponds to the peak data point in this band. **amplitude guess:** If amplitude (assumed to be the first model parameter) is being fit and ``guess_amplitude=True``, the function will guess the initial starting point for the amplitude based on the data. **redshift guess:** If redshift (``z``) is being fit and ``guess_z=True``, the function will set the initial value of ``z`` to the average of the bounds on ``z``. Examples -------- The `~sncosmo.flatten_result` function can be used to make the result a dictionary suitable for appending as rows of a table: >>> from astropy.table import Table # doctest: +SKIP >>> table_rows = [] # doctest: +SKIP >>> for sn in sne: # doctest: +SKIP ... res, fitmodel = sncosmo.fit_lc( # doctest: +SKIP ... sn, model, ['t0', 'x0', 'x1', 'c']) # doctest: +SKIP ... table_rows.append(flatten_result(res)) # doctest: +SKIP >>> t = Table(table_rows) # doctest: +SKIP """ # Standardize and normalize data. data = standardize_data(data) data = normalize_data(data) # Make a copy of the model so we can modify it with impunity. model = copy.copy(model) # Check that vparam_names isn't empty and contains only parameters # known to the model. if len(vparam_names) == 0: raise ValueError("no parameters supplied") for s in vparam_names: if s not in model.param_names: raise ValueError("Parameter not in model: " + repr(s)) # Order vparam_names the same way it is ordered in the model: vparam_names = [s for s in model.param_names if s in vparam_names] # initialize bounds if bounds is None: bounds = {} # Check that 'z' is bounded (if it is going to be fit). if 'z' in vparam_names: if 'z' not in bounds or None in bounds['z']: raise ValueError('z must be bounded if fit.') if guess_z: model.set(z=sum(bounds['z']) / 2.) if model.get('z') < bounds['z'][0] or model.get('z') > bounds['z'][1]: raise ValueError('z out of range.') # Cut bands that are not allowed by the wavelength range of the model. data = cut_bands(data, model, z_bounds=bounds.get('z', None)) # Unique set of bands in data bands = set(data['band'].tolist()) # Find t0 bounds to use, if not explicitly given if 't0' in vparam_names and 't0' not in bounds: bounds['t0'] = t0_bounds(data, model) # Note that in the parameter guessing below, we assume that the source # amplitude is the 3rd parameter of the Model (1st parameter of the Source) # Turn off guessing if we're not fitting the parameter. if model.param_names[2] not in vparam_names: guess_amplitude = False if 't0' not in vparam_names: guess_t0 = False # Make guesses for t0 and amplitude. # (For now, we assume it is the 3rd parameter of the model.) if (guess_amplitude or guess_t0): t0, amplitude = guess_t0_and_amplitude(data, model, minsnr) if guess_amplitude: model.parameters[2] = amplitude if guess_t0: model.set(t0=t0) # count degrees of freedom ndof = len(data) - len(vparam_names) if method == 'minuit': try: import iminuit except ImportError: raise ValueError("Minimization method 'minuit' requires the " "iminuit package") # The iminuit minimizer expects the function signature to have an # argument for each parameter. def fitchisq(*parameters): model.parameters = parameters return _chisq(data, model, modelcov=modelcov) # Set up keyword arguments to pass to Minuit initializer. kwargs = {} for name in model.param_names: kwargs[name] = model.get(name) # Starting point. # Fix parameters not being varied in the fit. if name not in vparam_names: kwargs['fix_' + name] = True kwargs['error_' + name] = 0. continue # Bounds if name in bounds: if None in bounds[name]: raise ValueError('one-sided bounds not allowed for ' 'minuit minimizer') kwargs['limit_' + name] = bounds[name] # Initial step size if name in bounds: step = 0.02 * (bounds[name][1] - bounds[name][0]) elif model.get(name) != 0.: step = 0.1 * model.get(name) else: step = 1. kwargs['error_' + name] = step if verbose: print("Initial parameters:") for name in vparam_names: print(name, kwargs[name], 'step=', kwargs['error_' + name], end=" ") if 'limit_' + name in kwargs: print('bounds=', kwargs['limit_' + name], end=" ") print() m = iminuit.Minuit(fitchisq, errordef=1., forced_parameters=model.param_names, print_level=(1 if verbose else 0), throw_nan=True, **kwargs) d, l = m.migrad(ncall=maxcall) # Build a message. message = [] if d.has_reached_call_limit: message.append('Reached call limit.') if d.hesse_failed: message.append('Hesse Failed.') if not d.has_covariance: message.append('No covariance.') elif not d.has_accurate_covar: # iminuit docs wrong message.append('Covariance may not be accurate.') if not d.has_posdef_covar: # iminuit docs wrong message.append('Covariance not positive definite.') if d.has_made_posdef_covar: message.append('Covariance forced positive definite.') if not d.has_valid_parameters: message.append('Parameter(s) value and/or error invalid.') if len(message) == 0: message.append('Minimization exited successfully.') # iminuit: m.np_matrix() doesn't work # numpy array of best-fit values (including fixed parameters). parameters = np.array([m.values[name] for name in model.param_names]) model.parameters = parameters # set model parameters to best fit. # Covariance matrix (only varied parameters) as numpy array. if m.covariance is None: covariance = None else: covariance = np.array([ [m.covariance[(n1, n2)] for n1 in vparam_names] for n2 in vparam_names]) # OrderedDict of errors if m.errors is None: errors = None else: errors = odict([(name, m.errors[name]) for name in vparam_names]) # Compile results res = Result(success=d.is_valid, message=' '.join(message), ncall=d.nfcn, chisq=d.fval, ndof=ndof, param_names=model.param_names, parameters=parameters, vparam_names=vparam_names, covariance=covariance, errors=errors) # TODO remove cov_names in a future release. depmsg = ("The `cov_names` attribute is deprecated in sncosmo v1.0 " "and will be removed in v1.1. Use `vparam_names` instead.") res.__dict__['deprecated']['cov_names'] = (vparam_names, depmsg) else: raise ValueError("unknown method {0:r}".format(method)) # TODO remove this in a future release. if "flatten" in kwargs: warnings.warn("The `flatten` keyword is deprecated in sncosmo v1.0 " "and will be removed in v1.1. Use the flatten_result() " "function instead.") if kwargs["flatten"]: res = flatten_result(res) return res, model
def _read_salt2_old(dirname, **kwargs): """Read old-style SALT2 files from a directory. A file named 'lightfile' must exist in the directory. """ filenames = kwargs.get("filenames", None) # Get list of files in directory. if not (os.path.exists(dirname) and os.path.isdir(dirname)): raise IOError("Not a directory: '{0}'".format(dirname)) dirfilenames = os.listdir(dirname) # Read metadata from lightfile. if "lightfile" not in dirfilenames: raise IOError("no lightfile in directory: '{0}'".format(dirname)) with open(os.path.join(dirname, "lightfile"), "r") as lightfile: meta = odict() for line in lightfile.readlines(): line = line.strip() if len(line) == 0: continue try: key, val = line.split() except ValueError: raise ValueError( "expected space-separated key value pairs in " "lightfile: {0}".format(os.path.join(dirname, "lightfile")) ) meta[key] = _cast_str(val) # Get list of filenames to read. if filenames is None: filenames = dirfilenames if "lightfile" in filenames: filenames.remove("lightfile") # We already read the lightfile. fullfilenames = [os.path.join(dirname, f) for f in filenames] # Read data from files. data = None for fname in fullfilenames: with open(fname, "r") as f: filemeta, filedata = _read_salt2(f) # Check that all necessary file metadata was defined. if not ("INSTRUMENT" in filemeta and "BAND" in filemeta and "MAGSYS" in filemeta): raise ValueError( "not all necessary global keys (INSTRUMENT, " "BAND, MAGSYS) are defined in file {0}".format(fname) ) # Add the instrument/band to the file data, in anticipation of # aggregating it with other files. # PY3: next(iter(filedata.vlues())) firstcol = six.next(six.itervalues(filedata)) data_length = len(firstcol) filter_name = "{0}::{1}".format(filemeta.pop("INSTRUMENT"), filemeta.pop("BAND")) filedata["Filter"] = data_length * [filter_name] filedata["MagSys"] = data_length * [filemeta.pop("MAGSYS")] # If this if the first file, initialize data lists, otherwise if keys # match, append this file's data to the main data. if data is None: data = filedata elif set(filedata.keys()) == set(data.keys()): for key in data: data[key].extend(filedata[key]) else: raise ValueError("column names do not match between files") # Append any extra metadata in this file to the master metadata. if len(filemeta) > 0: meta[filter_name] = filemeta return meta, data
def mcmc_lc(data, model, vparam_names, bounds=None, priors=None, guess_amplitude=True, guess_t0=True, guess_z=True, minsnr=5., modelcov=False, nwalkers=10, nburn=200, nsamples=1000, thin=1, a=2.0): """Run an MCMC chain to get model parameter samples. This is a convenience function around `emcee.EnsembleSampler`. It defines the likelihood function and makes a heuristic guess at a good set of starting points for the walkers. It then runs the sampler, starting with a burn-in run. If you're not getting good results, you might want to try increasing the burn-in, increasing the walkers, or specifying a better starting position. To get a better starting position, you could first run `~sncosmo.fit_lc`, then run this function with all ``guess_[name]`` keyword arguments set to False, so that the current model parameters are used as the starting point. Parameters ---------- data : `~astropy.table.Table` or `~numpy.ndarray` or `dict` Table of photometric data. Must include certain columns. See the "Photometric Data" section of the documentation for required columns. model : `~sncosmo.Model` The model to fit. vparam_names : iterable Model parameters to vary. bounds : `dict`, optional Bounded range for each parameter. Keys should be parameter names, values are tuples. If a bound is not given for some parameter, the parameter is unbounded. The exception is ``t0``: by default, the minimum bound is such that the latest phase of the model lines up with the earliest data point and the maximum bound is such that the earliest phase of the model lines up with the latest data point. priors : `dict`, optional Prior probability functions. Keys are parameter names, values are functions that return probability given the parameter value. The default prior is a flat distribution. guess_amplitude : bool, optional Whether or not to guess the amplitude from the data. If false, the current model amplitude is taken as the initial value. Only has an effect when fitting amplitude. Default is True. guess_t0 : bool, optional Whether or not to guess t0. Only has an effect when fitting t0. Default is True. guess_z : bool, optional Whether or not to guess z (redshift). Only has an effect when fitting redshift. Default is True. minsnr : float, optional When guessing amplitude and t0, only use data with signal-to-noise ratio (flux / fluxerr) greater than this value. Default is 5. modelcov : bool, optional Include model covariance when calculating chisq. Default is False. nwalkers : int, optional Number of walkers in the EnsembleSampler nburn : int, optional Number of samples in burn-in phase. nsamples : int, optional Number of samples in production run. thin : int, optional Factor by which to thin samples in production run. Output samples array will have (nsamples/thin) samples. a : float, optional Proposal scale parameter passed to the EnsembleSampler. Returns ------- res : Result Has the following attributes: * ``param_names``: All parameter names of model, including fixed. * ``parameters``: Model parameters, with varied parameters set to mean value in samples. * ``vparam_names``: Names of parameters varied. Order of parameters matches order of samples. * ``samples``: 2-d array with shape ``(N, len(vparam_names))``. Order of parameters in each row matches order in ``res.vparam_names``. * ``covariance``: 2-d array giving covariance, measured from samples. Order corresponds to ``res.vparam_names``. * ``errors``: dictionary giving square root of diagonal of covariance matrix for varied parameters. Useful for ``plot_lc``. * ``mean_acceptance_fraction``: mean acceptance fraction for all walkers in the sampler. est_model : `~sncosmo.Model` Copy of input model with varied parameters set to mean value in samples. """ try: import emcee except: raise ImportError("mcmc_lc() requires the emcee package.") # Standardize and normalize data. data = standardize_data(data) data = normalize_data(data) # Make a copy of the model so we can modify it with impunity. model = copy.copy(model) if bounds is None: bounds = {} if priors is None: priors = {} # Check that vparam_names isn't empty, check for unknown parameters. if len(vparam_names) == 0: raise ValueError("no parameters supplied") for names in (vparam_names, bounds, priors): for name in names: if name not in model.param_names: raise ValueError("Parameter not in model: " + repr(name)) # Order vparam_names the same way it is ordered in the model: vparam_names = [s for s in model.param_names if s in vparam_names] ndim = len(vparam_names) # Check that 'z' is bounded (if it is going to be fit). if 'z' in vparam_names: if 'z' not in bounds or None in bounds['z']: raise ValueError('z must be bounded if allowed to vary.') if guess_z: model.set(z=sum(bounds['z']) / 2.) if model.get('z') < bounds['z'][0] or model.get('z') > bounds['z'][1]: raise ValueError('z out of range.') # Cut bands that are not allowed by the wavelength range of the model. data = cut_bands(data, model, z_bounds=bounds.get('z', None)) # Find t0 bounds to use, if not explicitly given if 't0' in vparam_names and 't0' not in bounds: bounds['t0'] = t0_bounds(data, model) # Note that in the parameter guessing below, we assume that the source # amplitude is the 3rd parameter of the Model (1st parameter of the Source) # Turn off guessing if we're not fitting the parameter. if model.param_names[2] not in vparam_names: guess_amplitude = False if 't0' not in vparam_names: guess_t0 = False # Make guesses for t0 and amplitude. # (we assume amplitude is the 3rd parameter of the model.) if guess_amplitude or guess_t0: t0, amplitude = guess_t0_and_amplitude(data, model, minsnr) if guess_amplitude: model.parameters[2] = amplitude if guess_t0: model.set(t0=t0) # Indicies used in probability function. # modelidx: Indicies of model parameters corresponding to vparam_names. # idxbounds: tuples of (varied parameter index, low bound, high bound). # idxpriors: tuples of (varied parameter index, function). modelidx = np.array([model.param_names.index(k) for k in vparam_names]) idxbounds = [(vparam_names.index(k), bounds[k][0], bounds[k][1]) for k in bounds] idxpriors = [(vparam_names.index(k), priors[k]) for k in priors] # Posterior function. def lnprob(parameters): for i, low, high in idxbounds: if not low < parameters[i] < high: return -np.inf model.parameters[modelidx] = parameters logp = -0.5 * _chisq(data, model, modelcov=modelcov) for i, func in idxpriors: logp += math.log(func(parameters[i])) return logp # Heuristic determination of walker initial positions: # distribute walkers in a symmetric gaussian ball, with heuristically # determined scale. ctr = model.parameters[modelidx] scale = np.ones(ndim) for i, name in enumerate(vparam_names): if name in bounds: scale[i] = 0.0001 * (bounds[name][1] - bounds[name][0]) elif model.get(name) != 0.: scale[i] = 0.01 * model.get(name) else: scale[i] = 0.1 pos = ctr + scale * np.random.normal(size=(nwalkers, ndim)) # Run the sampler. sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, a=a) pos, prob, state = sampler.run_mcmc(pos, nburn) # burn-in sampler.reset() sampler.run_mcmc(pos, nsamples, thin=thin) # production run samples = sampler.flatchain # Summary statistics. vparameters = np.mean(samples, axis=0) cov = np.cov(samples, rowvar=0) model.set(**dict(zip(vparam_names, vparameters))) errors = odict(zip(vparam_names, np.sqrt(np.diagonal(cov)))) mean_acceptance_fraction = np.mean(sampler.acceptance_fraction) res = Result(param_names=copy.copy(model.param_names), parameters=model.parameters.copy(), vparam_names=vparam_names, samples=samples, covariance=cov, errors=errors, mean_acceptance_fraction=mean_acceptance_fraction) return res, model
# Licensed under a 3-clause BSD style license - see LICENSE.rst """Convenience functions for photometric data.""" from __future__ import division import math import numpy as np from astropy.utils import OrderedDict as odict from astropy.table import Table from .spectral import get_magsystem, get_bandpass _photdata_aliases = odict( [ ("time", set(["time", "date", "jd", "mjd", "mjdobs"])), ("band", set(["band", "bandpass", "filter", "flt"])), ("flux", set(["flux", "f"])), ("fluxerr", set(["fluxerr", "fe", "fluxerror", "flux_error", "flux_err"])), ("zp", set(["zp", "zpt", "zeropoint", "zero_point"])), ("zpsys", set(["zpsys", "zpmagsys", "magsys"])), ] ) # Descriptions for docstring only. _photdata_descriptions = { "time": "Time of observation in days", "band": "Bandpass of observation", "flux": "Flux of observation", "fluxerr": "Gaussian uncertainty on flux", "zp": "Zeropoint corresponding to flux", "zpsys": "Magnitude system for zeropoint", } _photdata_types = {"time": "float", "band": "str", "flux": "float", "fluxerr": "float", "zp": "float", "zpsys": "str"}