def __init__(self, data, model, SaltModel): self.data = data test_data = photdata.photometric_data(deepcopy(data)) self.model = model self.z = self.model.get('z') self.bands = np.unique(self.data['band']) self.x1_prior = (0, np.sqrt(2)) self.s_prior = (0, np.sqrt(2)) self.c_prior = (0, 0.15) self.SaltModel = SaltModel self.ndim = 5 self.nwalkers = 100 self.tmax_guess, self.x0_start = sncosmo.fitting.guess_t0_and_amplitude( test_data, self.model, minsnr=3) self.tmax_bounds = sncosmo.fitting.t0_bounds(test_data, self.model) # try guess tmax ourselves t = np.arange(self.tmax_bounds[0], self.tmax_bounds[1] + 1, 1) test_chi = 1e20 test_time = 0 test_x0 = 0 x0arr = [] for time in t: params = [self.x0_start, 0, 0, 0, time] self.SaltModel.set(t0=time, x0=self.x0_start, x1=0, c=0, z=self.z) res, fitted_model = sncosmo.fit_lc(deepcopy(self.data), self.SaltModel, ['x0'], guess_amplitude=False, guess_t0=False, modelcov=False) c = res.chisq x0arr.append(res.parameters[2]) if np.isnan(c) or np.isinf(c): continue if c < test_chi: test_chi = c test_time = time test_x0 = res.parameters[2] self.tmax_guess = test_time self.x0_start = test_x0 self.nest_bounds = { 't0': self.tmax_bounds, 'x1': [-4, 4], 'c': [-1, 1], 'x0': [np.min(x0arr), np.max(x0arr)] }
def chisq(self, data): """ Calculates the chisquare of this model and the data. imid (=imageid) is used to distinguish between ground (=0) where the sum of the fluxes matter and space (=1,..,4) where we can use the individual fluxes Parameters ----------- self : this model data : photometric data returns chisquare of data and model """ chi2, totmag = 0., 0. for imid in range(self.nimg + 1): mask = data['imageid'] == imid if not np.any(mask): continue d = photometric_data(data[mask]) # if the image-id is zero it should be compared to the sum # of the flux from all images if imid > 0: m = self.models[imid - 1] model_flux = m.bandflux(d.band, d.time, zp=d.zp, zpsys=d.zpsys) else: model_flux = np.zeros(len(d.time)) i = 0 for m in self.models: mf = m.bandflux(d.band, d.time, zp=d.zp, zpsys=d.zpsys) varmag = self.cleanmax(d.band, i, d.time, d.zp, d.zpsys) / self.physmax( d.band, d.time, d.zp, 'ab') totmag += varmag model_flux += mf i += 1 if not np.all(d.fluxerr > 0.): print d.fluxerr print "OOPS" diff = d.flux - model_flux # is this going to work when we are masking? cov = np.diag(d.fluxerr**2) if d.fluxcov is None else d.fluxcov invcov = np.linalg.inv(cov) chi2 += np.dot(np.dot(diff, invcov), diff) self.tot_amp.append(totmag) self.chi.append(chi2) return chi2
def chisq(data, model, modelcov=False): """Calculate chisq statistic for the model, given the data. Parameters ---------- model : `~sncosmo.Model` data : `~astropy.table.Table` or `~numpy.ndarray` or `dict` Table of photometric data. Must include certain columns. See the "Photometric Data" section of the documentation for required columns. modelcov : bool Include model covariance? Calls ``model.bandfluxcov`` method instead of ``model.bandflux``. The source in the model must therefore implement covariance. Returns ------- chisq : float """ data = photometric_data(data) data.sort_by_time() if data.fluxcov is None and not modelcov: mflux = model.bandflux(data.band, data.time, zp=data.zp, zpsys=data.zpsys) return np.sum(((data.flux - mflux) / data.fluxerr)**2) else: # need to invert a covariance matrix cov = (np.diag(data.fluxerr**2) if data.fluxcov is None else data.fluxcov) if modelcov: mflux, mcov = model.bandfluxcov(data.band, data.time, zp=data.zp, zpsys=data.zpsys) cov = cov + mcov else: mflux = model.bandflux(data.band, data.time, zp=data.zp, zpsys=data.zpsys) invcov = np.linalg.pinv(cov) diff = data.flux - mflux return np.dot(np.dot(diff, invcov), diff)
def plot(self, data, zp=25., zpsys='ab', ncol=2): """ Plots the data for all bands Parameters ---------- self : this model data : photometric data returns : Visualization of the photometric data and this model in 16geu_nestfit.jpg """ all_bands = list(set(data['band'])) nbands = len(all_bands) nrow = int(np.ceil(nbands / float(ncol))) tmin, tmax = np.min(data['time']), np.max(data['time']) fig = plt.figure(figsize=(10, 12)) ground = ['P60g', 'P48R', 'P60i', 'P60r'] #timegrid = np.linspace(model.mintime(), model.maxtime(), # int(model.maxtime() - model.mintime() + 1)) #totmag=0. #mag_array,i = np.zeros(len(ground)),0 tgrid = np.linspace(tmin, tmax, 300) marker = ['.', 'o', 's', 'd', '^'] color = ['k', 'c', 'm', 'g', 'b'] for n, band in enumerate(all_bands): mask = data['band'] == band ax = fig.add_subplot(nrow, ncol, n + 1) ax.axhline(0., color='k', ls=':') #plt.axhline(0.,color='k',ls=':') sum_model_flux, plot_sum, diff, sigma = None, False, None, None for n in range(self.nimg + 1): imask = data['imageid'] == n m = mask * imask if n > 0: model_flux = self.models[n - 1].bandflux(band, tgrid, zp=zp, zpsys=zpsys) if sum_model_flux is None: sum_model_flux = model_flux else: sum_model_flux += model_flux if not np.any(m): continue d = photometric_data(data[m]) d = d.normalized(zp=zp, zpsys=zpsys) if not np.all(np.ediff1d(d.time) >= 0.0): sortidx = np.argsort(d.time) d = d[sortidx] else: sortidx = None ax.errorbar(d.time, d.flux, yerr=d.fluxerr, marker=marker[n], capsize=0., ls='', color=color[n]) if n > 0: myfile = open( 'tables/16geufit_' + band + '_nimg' + str(n) + '.dat', 'w') myfile.write('date flux\n') for l in range(len(tgrid)): myfile.write( str(tgrid[l]) + ' ' + str(model_flux[l]) + '\n') myfile.close() ax.plot(tgrid, model_flux, color=color[n], ls='-') var = interp1d(tgrid, model_flux) if diff is None: diff = (d.flux - var(d.time)) / d.fluxerr sigma = np.sum(((d.flux - var(d.time)) / d.fluxerr)**2) else: diff += (d.flux - var(d.time)) / d.fluxerr sigma += np.sum( ((d.flux - var(d.time)) / d.fluxerr)**2) else: plot_sum = True if (sigma is not None) and not (self.nimg * len(d.time) == 1): sigma = sigma / (self.nimg * len(d.time) - 1.) if plot_sum: myfile = open('tables/16geufit_' + band + '.dat', 'w') myfile.write('date flux\n') for l in range(len(tgrid)): myfile.write( str(tgrid[l]) + ' ' + str(model_flux[l]) + '\n') myfile.close() var = interp1d(tgrid, sum_model_flux) diff = (d.flux - var(d.time)) / d.fluxerr if not (len(d.time) == 1): sigma = np.sum(((d.flux - var(d.time)) / d.fluxerr)** 2) / (len(d.time) - 1.) else: sigma = np.sum(( (d.flux - var(d.time)) / d.fluxerr)**2) / (len(d.time)) ax.plot(tgrid, sum_model_flux, color=color[0], ls='-') ax.set_title(band) ax.set_xlim((tmin, tmax)) ax.set_ylabel('flux') ca = fig.gca().get_xticks() ax.set_xticks(ca[1:-1]) divider = make_axes_locatable(ax) sigma = np.sqrt(sigma) axpulls = divider.append_axes('bottom', size='30%', pad=0.0, sharex=ax) axpulls.plot(d.time, diff, 'x') axpulls.set_ylim(-4. * sigma, 4. * sigma) axpulls.set_yticks([-3 * sigma, -1 * sigma, 1 * sigma, 3 * sigma]) axpulls.set_yticklabels( ['$-3\sigma$', '$-1\sigma$', '$+1\sigma$', '$+3\sigma$']) axpulls.fill_between(tgrid, -1 * sigma, 1 * sigma, facecolor='green', alpha=0.3) axpulls.set_xlabel('Julian Date') fig.tight_layout() fig.savefig('16geu_nestfit.png') plt.close()
def nest_lc(data, model, vparam_names, bounds, guess_amplitude_bound=False, minsnr=5., priors=None, ppfs=None, npoints=100, method='single', maxiter=None, maxcall=None, modelcov=False, rstate=None, verbose=False, warn=True, **kwargs): """Run nested sampling algorithm to estimate model parameters and evidence. Parameters ---------- data : `~astropy.table.Table` or `~numpy.ndarray` or `dict` Table of photometric data. Must include certain columns. See the "Photometric Data" section of the documentation for required columns. model : `~sncosmo.Model` The model to fit. vparam_names : list Model parameters to vary in the fit. bounds : `dict` Bounded range for each parameter. Bounds must be given for each parameter, with the exception of ``t0``: by default, the minimum bound is such that the latest phase of the model lines up with the earliest data point and the maximum bound is such that the earliest phase of the model lines up with the latest data point. guess_amplitude_bound : bool, optional If true, bounds for the model's amplitude parameter are determined automatically based on the data and do not need to be included in `bounds`. The lower limit is set to zero and the upper limit is 10 times the amplitude "guess" (which is based on the highest-flux data point in any band). Default is False. minsnr : float, optional Minimum signal-to-noise ratio of data points to use when guessing amplitude bound. Default is 5. priors : `dict`, optional Prior probability distribution function for each parameter. The keys should be parameter names and the values should be callables that accept a float. If a parameter is not in the dictionary, the prior defaults to a flat distribution between the bounds. ppfs : `dict`, optional Prior percent point function (inverse of the cumulative distribution function) for each parameter. If a parameter is in this dictionary, the ppf takes precedence over a prior pdf specified in ``priors``. npoints : int, optional Number of active samples to use. Increasing this value increases the accuracy (due to denser sampling) and also the time to solution. method : {'classic', 'single', 'multi'}, optional Method used to select new points. Choices are 'classic', single-ellipsoidal ('single'), multi-ellipsoidal ('multi'). Default is 'single'. maxiter : int, optional Maximum number of iterations. Iteration may stop earlier if termination condition is reached. Default is no limit. maxcall : int, optional Maximum number of likelihood evaluations. Iteration may stop earlier if termination condition is reached. Default is no limit. modelcov : bool, optional Include model covariance when calculating chisq. Default is False. rstate : `~numpy.random.RandomState`, optional RandomState instance. If not given, the global random state of the ``numpy.random`` module will be used. verbose : bool, optional Print running evidence sum on a single line. warn : bool, optional Issue warning when dropping bands outside the model range. Default is True. *New in version 1.5.0* Returns ------- res : Result Attributes are: * ``niter``: total number of iterations * ``ncall``: total number of likelihood function calls * ``time``: time in seconds spent in iteration loop. * ``logz``: natural log of the Bayesian evidence Z. * ``logzerr``: estimate of uncertainty in logz (due to finite sampling) * ``h``: Bayesian information. * ``vparam_names``: list of parameter names varied. * ``samples``: 2-d `~numpy.ndarray`, shape is (nsamples, nparameters). Each row is the parameter values for a single sample. For example, ``samples[0, :]`` is the parameter values for the first sample. * ``logprior``: 1-d `~numpy.ndarray` (length=nsamples); log(prior volume) for each sample. * ``logl``: 1-d `~numpy.ndarray` (length=nsamples); log(likelihood) for each sample. * ``weights``: 1-d `~numpy.ndarray` (length=nsamples); Weight corresponding to each sample. The weight is proportional to the prior * likelihood for the sample. * ``parameters``: 1-d `~numpy.ndarray` of weighted-mean parameter values from samples (including fixed parameters). Order corresponds to ``model.param_names``. * ``covariance``: 2-d `~numpy.ndarray` of parameter covariance; indicies correspond to order of ``vparam_names``. Calculated from ``samples`` and ``weights``. * ``errors``: OrderedDict of varied parameter uncertainties. Corresponds to square root of diagonal entries in covariance matrix. * ``ndof``: Number of degrees of freedom (len(data) - len(vparam_names)). * ``bounds``: Dictionary of bounds on varied parameters (including any automatically determined bounds). * ``data_mask``: Boolean array the same length as data specifying whether each observation was used. *New in version 1.5.0.* estimated_model : `~sncosmo.Model` A copy of the model with parameters set to the values in ``res.parameters``. """ try: import nestle except ImportError: raise ImportError("nest_lc() requires the nestle package.") # experimental parameters tied = kwargs.get("tied", None) data = photometric_data(data) # sort by time if not np.all(np.ediff1d(data.time) >= 0.0): sortidx = np.argsort(data.time) data = data[sortidx] else: sortidx = None model = copy.copy(model) bounds = copy.copy(bounds) # need to copy this b/c we modify it below # Order vparam_names the same way it is ordered in the model: vparam_names = [s for s in model.param_names if s in vparam_names] # Drop data that the model doesn't cover. fitdata, data_mask = cut_bands(data, model, z_bounds=bounds.get('z', None), warn=warn) if guess_amplitude_bound: if model.param_names[2] not in vparam_names: raise ValueError("Amplitude bounds guessing enabled but " "amplitude parameter {0!r} is not varied".format( model.param_names[2])) if model.param_names[2] in bounds: raise ValueError("cannot supply bounds for parameter {0!r}" " when guess_amplitude_bound=True".format( model.param_names[2])) # If redshift is bounded, set model redshift to midpoint of bounds # when doing the guess. if 'z' in bounds: model.set(z=sum(bounds['z']) / 2.) _, amplitude = guess_t0_and_amplitude(fitdata, model, minsnr) bounds[model.param_names[2]] = (0., 10. * amplitude) # Find t0 bounds to use, if not explicitly given if 't0' in vparam_names and 't0' not in bounds: bounds['t0'] = t0_bounds(fitdata, model) if ppfs is None: ppfs = {} if tied is None: tied = {} # Convert bounds/priors combinations into ppfs if bounds is not None: for key, val in bounds.items(): if key in ppfs: continue # ppfs take priority over bounds/priors a, b = val if priors is not None and key in priors: # solve ppf at discrete points and return interpolating # function x_samples = np.linspace(0., 1., 101) ppf_samples = ppf(priors[key], x_samples, a, b) f = Interp1D(0., 1., ppf_samples) else: f = Interp1D(0., 1., np.array([a, b])) ppfs[key] = f # NOTE: It is important that iparam_names is in the same order # every time, otherwise results will not be reproducible, even # with same random seed. This is because iparam_names[i] is # matched to u[i] below and u will be in a reproducible order, # so iparam_names must also be. iparam_names = [key for key in vparam_names if key in ppfs] ppflist = [ppfs[key] for key in iparam_names] npdim = len(iparam_names) # length of u ndim = len(vparam_names) # length of v # Check that all param_names either have a direct prior or are tied. for name in vparam_names: if name in iparam_names: continue if name in tied: continue raise ValueError( "Must supply ppf or bounds or tied for parameter '{}'".format( name)) def prior_transform(u): d = {} for i in range(npdim): d[iparam_names[i]] = ppflist[i](u[i]) v = np.empty(ndim, dtype=np.float) for i in range(ndim): key = vparam_names[i] if key in d: v[i] = d[key] else: v[i] = tied[key](d) return v # Indicies of the model parameters in vparam_names idx = np.array([model.param_names.index(name) for name in vparam_names]) fitdata = photometric_data(fitdata) fitdata.sort_by_time() fluxdata = np.array(fitdata.flux).astype(np.float32) fluxerrdata = np.array(fitdata.fluxerr).astype(np.float32) cov = (np.diag(fitdata.fluxerr**2) if fitdata.fluxcov is None else fitdata.fluxcov) all_wave = [] all_dwave = [] all_trans = [] all_bands = np.zeros((len(fitdata), 2), dtype=np.int32) all_zps = np.array(fitdata.zp).astype(np.float32) all_zpsys = np.array([ get_magsystem(fitdata.zpsys[i]).zpbandflux(fitdata.band[i]) for i in range(len(fitdata)) ]) for b in set(fitdata.band): wave, dwave, trans = _bandflux_single_spacing(b) inds = np.where(np.array([x.name for x in fitdata.band]) == b.name)[0] all_bands[inds, 0] = len(all_wave) all_bands[inds, 1] = len(all_wave) + len(wave) all_wave = np.append(all_wave, wave) all_dwave = np.append(all_dwave, [dwave] * len(wave)) all_trans = np.append(all_trans, trans) bandDict = { 'all_wave': all_wave.astype(np.float32), 'all_dwave': all_dwave.astype(np.float32), 'all_trans': all_trans.astype(np.float32), 'all_zps': all_zps, 'all_zpsys': all_zpsys.astype(np.float32), 'all_bands': all_bands.astype(np.int32) } bandDict2 = {b: _bandflux_single_spacing(b) for b in set(fitdata.band)} def loglike(parameters): model.parameters[idx] = parameters if fitdata.fluxcov is None and not modelcov: mflux = sntd_bandflux(model, fitdata.band, fitdata.time, bandDict, bandDict2, zp=fitdata.zp, zpsys=fitdata.zpsys).astype(np.float32) return -0.5 * fast_chisq(fluxdata, fluxerrdata, mflux) else: if modelcov: mflux, mcov = model.bandfluxcov(fitdata.band, fitdata.time, zp=fitdata.zp, zpsys=fitdata.zpsys) cov += mcov else: mflux = model.bandflux(data.band, data.time, zp=data.zp, zpsys=data.zpsys).astype(np.float32) invcov = np.linalg.pinv(cov) diff = data.flux - mflux return -0.5 * np.dot(np.dot(diff, invcov), diff) t0 = time.time() res = nestle.sample(loglike, prior_transform, ndim, npdim=npdim, npoints=npoints, method=method, maxiter=maxiter, maxcall=maxcall, rstate=rstate, callback=(nestle.print_progress if verbose else None)) elapsed = time.time() - t0 # estimate parameters and covariance from samples vparameters, cov = nestle.mean_and_cov(res.samples, res.weights) # update model parameters to estimated ones. model.set(**dict(zip(vparam_names, vparameters))) # If we need to, unsort the mask so mask applies to input data if sortidx is not None: unsort_idx = np.argsort(sortidx) # indicies that will unsort array data_mask = data_mask[unsort_idx] # `res` is a nestle.Result object. Collect result into a sncosmo.Result # object for consistency, and add more fields. res = Result( niter=res.niter, ncall=res.ncall, logz=res.logz, logzerr=res.logzerr, h=res.h, samples=res.samples, weights=res.weights, logvol=res.logvol, logl=res.logl, vparam_names=copy.copy(vparam_names), ndof=len(fitdata) - len(vparam_names), bounds=bounds, time=elapsed, parameters=model.parameters.copy(), covariance=cov, errors=OrderedDict(zip(vparam_names, np.sqrt(np.diagonal(cov)))), param_dict=OrderedDict(zip(model.param_names, model.parameters)), data_mask=data_mask) return res, model