def pulsar_check(self): """LAT pulsar check %(atable)s """ # compare with LAT pulsar catalog lat = self.lcatdf lat['ts'] = self.df[self.df.psr]['ts'] lat['aprob'] = self.df[self.df.psr]['aprob'] lat['ROI_index'] = [ Band(12).index(SkyDir(float(ra), float(dec))) for ra, dec in zip(lat.RAJ2000, lat.DEJ2000) ] lat['skydir'] = [ SkyDir(float(ra), float(dec)) for ra, dec in zip(lat.RAJ2000, lat.DEJ2000) ] lat['sourcedir'] = self.df.skydir[self.df.psr] lat['delta'] = [ np.degrees(s.difference(t)) if not type(t) == float else np.nan for s, t in zip(lat.skydir, lat.sourcedir) ] far = lat.delta > 0.25 dc2names = set(self.lcatdf.index) tt = set(self.df.name[self.df.psr]) print 'Catalog entries not found:', list(dc2names.difference(tt)) missing = np.array([np.isnan(x) or x < 10. for x in lat.ts]) missing |= np.array((lat.aprob == 0) & (lat.ts < 1000)) missing_names = lat.index[missing] cols = 'RAJ2000 DEJ2000 ts delta ROI_index'.split() self.latsel = latsel = pd.DataFrame(np.array( [lat[id][missing] for id in cols]), index=cols, columns=missing_names).T self.atable = '<h4>Compare with LAT pulsar catalog: {}</h4>'.format( self.version) label_info = dict( ts='TS,Test Statistic', delta='delta,distance to fit position (deg)', ROI_index='ROI Index,Index of the ROI, a HEALPix ring index') self.atable += html_table( latsel.query('ts<10'), label_info, heading='<p>LAT catalog entries with weak or no fit (TS<10)', name=self.plotfolder + '/weak', maxlines=20, float_format=(FloatFormat(2))) self.atable += html_table( latsel.query('ts>10'), label_info, heading= '<p>LAT catalog entries with nearby, but unassociated source ', name=self.plotfolder + '/far', maxlines=20, float_format=(FloatFormat(2)))
def selection(self, curvature_cut=0.1, ts_cut=10): """Select candidates. %(selection_info)s """ self.curvature_cut = curvature_cut self.ts_cut = ts_cut df = self.df probfun = lambda x: x['prob'][0] if not pd.isnull(x) else 0 aprob = np.array([probfun(assoc) for assoc in self.df.associations]) no3fgl = np.asarray([s is None for s in self.df.cat3fgl]) self.keep= keep = no3fgl &(~self.psr) \ & (self.df.curvature>curvature_cut) & (self.df.ts>ts_cut) & (self.df.locqual<8) &(aprob<0.1) self.total = sum(keep) self.cvsname = 'pulsar_candidates.csv' t = self.df[keep][ 'ra dec glat ts pivot_energy pindex eflux_ratio curvature roiname'. split()] t.to_csv(self.cvsname) print 'wrote %d sources to %s' % (len(t), self.cvsname) self.selection_info = """\ Cuts: non-3FGL, non-LAT PSR, association probability < 0.1, curvature>%(curvature_cut)s, TS>%(ts_cut)s<br>, <br>Total:%(total)s <br>%(html_list)s <br> Link to csv format table: <a href="../../%(cvsname)s?download=true">%(cvsname)s</a></li> """ self.html_list = html_table( t, name=self.plotfolder + '/candidates', heading='<h4>%d Candidate pulsar sources</h4>' % len(t), float_format=FloatFormat(2))
def check_a_list(self, filename='../uw7002/7yr_LOFAR_uw7000.txt'): """Make a table with curvature and variability %(check_list)s """ lofar = self.load_list(filename) self.check_list = html_table(lofar, name=self.plotfolder+'/checked_sources', heading='<h4>%d checked sources</h4>'%len(lofarset), float_format=FloatFormat(2))
def no_curvature(self, prefix='S966', ts_high_cut=2): """Weak new sources with PW fits %(weak_list)s """ df = self.df pcut = np.array([n.startswith(prefix) for n in df.index],bool); cut = (df.ts>10) & (df.locqual<8) & (df.curvature<0.01) & pcut & (df.ts_high<ts_high_cut) & (df.ts_low<5) t = self.df[cut]['ra dec glat ts pivot_energy pindex fitqual locqual ts_low ts_med ts_high roiname'.split()] self.noc_df=t.sort_index(by='roiname') print 'selected %d %s sources' % (len(t), prefix) self.weak_list = html_table(t, name=self.plotfolder+'/weak_pl_sources', heading='<h4>%d weak new power-law sources</h4>' % len(t), float_format=FloatFormat(2))
def bad_seed_plots(self): """Plots of the %(bad_seeds)d seeds that were not imported to the model Mostly this is because there was no sensible localization. The table below has the localization TS map plots for each one, linked to the source name. <br>Left: size of cluster, in 0.15 degree pixels <br>Center: maximum TS in the cluster <br>Right: distribution in sin(|b|), showing cut if any. %(bad_seed_list)s """ self.bad_seeds = sum(self.seeds.bad) s = 'Bad seed list' df = self.seeds[self.seeds.bad][ 'ra dec ts size l b roi'.split()].sort_index(by='roi') df.to_csv(self.plotfolder + '/bad_seeds.csv') print 'Wrote file {}'.format(self.plotfolder + '/bad_seeds.csv') s += html_table(df, name=self.plotfolder + '/bad_seeds', heading='<h4>Table of %d failed seeds</h4>' % self.bad_seeds, href_pattern='tsmap_fail/%s_tsmap.jpg', float_format=FloatFormat(2)) self.bad_seed_list = s return self.seed_plots(subset=self.seeds.bad, title='{} failed seeds'.format(self.bad_seeds))
def new_candidates(self): """Potential pulsar candidates Make a list of sources with the selections <ul> <li>not associated <li>not in 4FGL or withinn 0.5 deg of one <li>nearest 4FGL source is extended or has TS>1000 <ii> </ul> The plots are of this list, showing effect of curvature selection. <h4>%(candidate_table)s</h4> <br>A csv file of the above is <a href="../../%(pulsar_candidate_filename)s?download=true">here</a> """ # add info about 4FGL self.check4FGL(pattern=None) df = self.df # select subset not in 4FGL and not associated and not close to a 4FGL source and that the closest is very strong dfx = df.query( 'fl8y==False & aprob<0.8 & locqual<8 & distance>0.5 & other_extended==False & otherts<1000' ) # values to display ts = dfx.ts.astype(float).clip(0, 1000) singlat = np.sin(np.radians(dfx.glat.astype(float))) curvature = dfx.curvature.astype(float).clip(0, 1) #curvature selection cut = np.logical_and(curvature < 0.75, curvature > 0.15) label_info = dict() dfcut = dfx[cut][ 'ra dec ts glat pindex curvature locqual distance otherid otherts'. split()].sort_values(by='ts', ascending=False) self.candidate_table = html_table( dfcut, label_info, heading= '<b>Table of {} pointlike sources not in 4FGL, not assocated and with curvature selection</b>' .format(len(dfcut)), name=self.plotfolder + '/candidates', maxlines=20, float_format=(FloatFormat(2))) self.pulsar_candidate_filename = self.plotfolder + '/pulsar_candidates.csv' dfcut.to_csv(self.pulsar_candidate_filename) self.df_pulsar_candidates = dfcut #for interactive fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 5)) hkw = dict(histtype='step', lw=2) def doit(ax, x, bins, xlabel, xlog=False): ax.hist(x, bins, **hkw) ax.hist(x[cut], bins, label='curvature cut', **hkw) ax.set(xlabel=xlabel, xscale='log' if xlog else 'linear') doit(ax2, ts, np.logspace(1, 3, 51), 'TS', xlog=True) doit(ax3, singlat, np.linspace(-1, 1, 41), 'sin(b)') doit(ax1, curvature, np.linspace(0, 1, 21), 'curvature') return fig
def bigfile_associations(self, test=False): """BigFile Associations Construct a list of non LAT pulsar point sources associated with the BigFile pulsar list (Version %(bigfile_version)s). <br>Exclude sources with poor localization (quality>5) and BigFile pulsars in clusters. <ul> <li>%(bigfile_hi_table)s </li> <li>%(bigfile_lo_table)s </li> </ul> """ class BigFile(object): """"manage look up in the BigFile""" def __init__(self): ff = sorted( glob.glob( os.path.expandvars( '$FERMI/catalog/srcid/cat/Pulsars_BigFile_*.fits')) ) t = fits.open(ff[-1]) print 'Read file {}'.format(ff[-1]) self.version = ff[-1].split('_')[-1].split('.')[0] self.d = pd.DataFrame(t[1].data) self.names = [t.strip() for t in self.d.NAME.values] self.jnames = [t.strip() for t in self.d.PSRJ.values] def __call__(self, name): """Find the entry with given name""" if name in self.names: i = self.names.index(name) elif name in self.jnames: i = self.jnames.index(name) else: error = 'Data for source %s not found' % name print error raise ValueError(error) return self.d.iloc[i] not_psr = np.array([not n.startswith('PSR') for n in self.df.index], bool) psrx = np.array([x == 'pulsar_big' for x in self.df.acat], bool) & not_psr & (self.df.locqual < 5) print '%d sources associated with BigFile pulsar list' % sum(psrx) pt = self.df[psrx][ 'aprob aname aang ts glat glon pivot_energy curvature locqual'. split()] # look it up in BigFile, add other stuff self.bf = bf = BigFile() self.bigfile_version = bf.version anames = self.df[psrx].aname pt['jname'] = jname = [bf(n).PSRJ for n in anames] # test for the jname not ending in numeric character not_incluster = [n[-1] in '0123456789' for n in pt.jname] print ' Selected {} out of {} associations not in clusters'.format( sum(not_incluster), len(pt)) def hmax(n): t = bf(n) return max(t.Hall32, t.Hall36, t.Hall40, t.Hval32, t.Hval36, t.Hval40) pt['Hmax'] = [hmax(n) for n in anames] pt['history'] = [bf(n).History[1:-1].replace("'", "") for n in anames] pt['edot'] = ['%.2e' % bf(n).EDOT for n in anames] pt['P0'] = ['{:.3f}'.format(bf(n).P0) for n in anames] # make file table ptx = pt[not_incluster][ 'jname glat glon edot P0 history Hmax ts aprob aang curvature pivot_energy locqual' .split()] hilat = abs(pt.glat) > 5 if len(ptx) > 0: colinfo = dict( name='Source Name,click for link to SED', jname='Pulsar name,J version', edot='Edot, rate of energy change', Hmax= 'Hmax, max(Hall32, Hall36, Hall40, Hval32, Hval36, Hval40)', pivot_energy= 'Pivot Energy,Energy of zero correlation between spectral index and normalization ', history='History,BigFile history entry', ts='TS,Test Statistic for the source', aprob='Probability,Bayesian association probability', aang='Angle,angular distance (deg)', #curvature='curvature,?', locqual= 'Localization quality,measure of the goodness of the localization fit\n greater than 5 is questionable', ) self.bigfile_hi_table= \ html_table(ptx[hilat], colinfo, float_format=FloatFormat(2), heading = """<b>Table of %d high-latitude (|b|>5) associations.</b>""" % sum(hilat), name=self.plotfolder+'/hilat_table', maxlines=10) self.bigfile_lo_table= \ html_table(ptx[~hilat], colinfo, float_format=FloatFormat(2), heading = """<b>Table of %d low-latitude (|b|<5) associations.</b> """ % sum(~hilat), name=self.plotfolder+'/lolat_table', maxlines=10) else: self.bigfile_hi_table = self.bigfile_lo_table = '' return ptx if test else None
def spectra(self, index_min=0.0, index_max=2.5, cutoff_max=1e4, taillist=True): """ Spectral distributions Spectral parameters for %(spectral_fits)d pulsars with significant fits (TS>16) %(pulsar_tail_check)s """ psrmodel = (self.df.ts > 16) & (self.df.modelname == 'PLSuperExpCutoff') & self.df.psr self.spectral_fits = sum(psrmodel) t = self.df.loc[psrmodel]\ ['ts flux pindex cutoff e0 index2 index2_unc roiname freebits fitqual msec'.split()] t['eflux'] = t.flux * t.e0**2 * 1e6 msec = np.array(t.msec.values, bool) def histit(ax, bins, vals): hkw = dict(histtype='stepfilled', alpha=0.5, lw=2) ax.hist(vals[msec], bins, label='msec', color='lightblue', edgecolor='blue', **hkw) ax.hist(vals[~msec], bins, label='young', color='pink', edgecolor='red', **hkw) def plot1(ax, efmin=1e-2, efmax=1e3): bins = np.logspace(np.log10(efmin), np.log10(efmax), 26) vals = np.array(t.eflux, float).clip(efmin, efmax) histit(ax, bins, vals) ax.set(xscale='log', xlabel='energy flux', xlim=(efmin, efmax)) ax.grid(alpha=0.5) ax.legend(prop=dict(size=10)) def plot3(ax): bins = np.linspace(index_min, index_max, 16) vals = np.array(t.pindex, float).clip(index_min, index_max) histit(ax, bins, vals) ax.set(xlabel='spectral index') ax.grid(alpha=0.5) ax.legend(prop=dict(size=10)) def plot2(ax): bins = np.logspace(2, 4, 26) vals = np.array(t.cutoff, float).clip(None, cutoff_max) histit(ax, bins, vals) ax.set(xscale='log', xlabel='cutoff energy (GeV)') ax.grid(alpha=0.5) ax.legend(prop=dict(size=10)) ax.xaxis.set_major_formatter( ticker.FuncFormatter(lambda val, pos: { 100: '0.1', 1000: '1', 10000: '10' }.get(val, ''))) def plot4(ax): xvals = np.array(t.cutoff, float).clip(None, cutoff_max) yvals = np.array(t.pindex, float).clip(index_min, index_max) ax.plot(xvals[msec], yvals[msec], 'o', color='blue', label='msec') ax.plot(xvals[~msec], yvals[~msec], 'D', color='orange', label='young') ax.set( xscale='log', xlabel='cutoff [GeV]', ylabel='spectral index', ylim=(index_min - 0.1, index_max + 0.1), ) ax.grid(alpha=0.5) ax.legend(loc='lower right', prop=dict(size=10)) ax.xaxis.set_major_formatter( ticker.FuncFormatter(lambda val, pos: { 100: '0.1', 1000: '1', 10000: '10' }.get(val, ''))) fig, axx = plt.subplots(2, 2, figsize=(12, 12)) plt.subplots_adjust(wspace=0.3, left=0.05, bottom=0.15) map(lambda f, ax: f(ax), ( plot1, plot2, plot3, plot4, ), axx.flatten()) tail_cut = (t.pindex <= index_min) | (t.pindex > index_max) | ( t.cutoff > cutoff_max) tails = t.loc[tail_cut].index print '%d pulsar sources found in tails of index or cutoff' % sum( tail_cut) if taillist & (sum(tail_cut) > 0): tails = t[tail_cut][ 'ts eflux pindex cutoff freebits roiname'.split()] filename = 'pulsar_tails.html' html_file = self.plotfolder + '/%s' % filename #html = tails.sort_values(by='roiname').to_html(float_format=FloatFormat(2)) html = html_table(tails.sort_values(by='roiname'), float_format=FloatFormat(2)) open(html_file, 'w').write('<head>\n' + _html.style + '</head>\n<body>' + html + '\n</body>') self.pulsar_tail_check = '<p><a href="%s?skipDecoration">Table of %d sources on tails</a>: ' % ( filename, len(tails)) self.pulsar_tail_check += 'Criteria: require index between 0 and 2.5, cutoff < {:.1f} GeV'.format( cutoff_max * 1e-3) else: self.pulsar_tail_check = '<p>No sources on tails' return fig
def setup(self, **kwargs): self.plotfolder = 'counts' gcorr = GalacticCorrection(self.config) def load_rois(): # get the basic pickles with the model files, pkls = self.load_pickles() self.pkls = pkls # for development assert len(pkls) == 1728, 'expect to find 1728 pickled roi files' def chisq10(counts): total, observed = counts['total'], counts['observed'] return ((observed - total)**2 / total)[:8].sum() def lat180(l): return l if l < 180 else l - 360 rdict = dict() skipped = 0 for index, r in enumerate(pkls): if 'counts' not in r or r['counts'] is None: print '***No counts in %s: skipping' % r['name'] skipped += 1 continue cnts = r['counts'] rdict[r['name']] = dict( glon=lat180(r['skydir'].l()), glat=r['skydir'].b(), chisq=cnts['chisq'], chisq10=chisq10(r['counts']), uchisq=cnts['uchisq'] if 'uchisq' in cnts else 0, gnorm=gcorr(index, r), ) if skipped > 0: self.missing_info = '<p>%d missing ROIS' % skipped print '***Skipped %d ROIs' % skipped else: self.missing_info = '' self.rois = pd.DataFrame(rdict).transpose() self.rois['singlat'] = np.sin( np.radians(np.asarray(self.rois.glat, float))) self.rois['glon'] = np.asarray(self.rois.glon, float) # add columns for total counts d = {} for x in self.pkls: d[x['name']] = dict([(t[0], sum(t[1]).round()) for t in x['counts']['models']]) self.rois = pd.concat([self.rois, pd.DataFrame(d).T], axis=1) # roi_file='roi_info.pickle' # print 'Saved ROI info to {}'.format(roi_file) # self.rois.to_pickle(roi_file) load_rois() # dict of dataframes with count info. columns are energies self.energy = self.pkls[0]['counts'][ 'energies'] # extract list from first pickle counts = [p['counts'] for p in self.pkls if p['counts'] is not None] self.counts = dict() for key in ['observed', 'total']: self.counts[key] = pd.DataFrame([x[key] for x in counts], index=self.rois.index) #try: self.add_model_info() #except Exception, msg: # print msg if 'history' in self.pkls[0].keys(): print 'Extracting history info from the ROI analyses' self.sinfo = t = self.history_info() if self.sinfo is None: self.iteration_info = 'No iterations yet' self.history = [] return # check for previous creation, ignore them: look for last "monthly*" or "create" y = [ ( x.startswith('monthly') or x.startswith('create') #or x.startswith('update_full') ) for x in t.stage ] if sum(y) == 0: y = [(x.startswith('update_full')) for x in t.stage] # no create: use update_full if sum(y) > 0: lc = t.index[y][-1] self.history = t[t.index >= lc] skipped = t.index[y][:-1] print 'Skipped starts:\n{}'.format(t.ix[skipped]) else: self.history = t input_model = self.config['input_model']['path'] # note use of 'plots/' below since not done with setup maxlines = 40 toshow_html = analysis_base.html_table( self.history, maxlines=maxlines, name='plots/' + self.plotfolder + '/iteration_history', heading= '<h4>Iteration history: log likelihood change for each stage</h4>', float_format=FloatFormat(1), href=False, ) if len(self.history) > maxlines: toshow_html += '<p>Last 10 out of {} stages'.format(len(self.history))\ +self.history.tail(10).to_html(float_format=FloatFormat(1)) self.iteration_info = """<p>Input model: <a href="../../%s/plots/index.html?skipDecoration">%s</a> <p>%s""" % (input_model, input_model, toshow_html) else: self.iteration_info = ''
def fit_quality(self, xlim=(0, 50), ndf=10, tsbandcut=20, grid_flag=True, make_table=True, legend_flag=True): """ Spectral fit quality This is the difference between the TS from the fits in the individual energy bands, and that for the spectral fit. It should be distributed approximately as chi squared of at most 14-2 =12 degrees of freedom. However, high energy bins usually do not contribute, so we compare with ndf=%(ndf)d. All sources with TS_bands>%(tsbandcut)d are shown.<br> <b>Left</b>: Power-law fits. Tails in this distribution perhaps could be improved by converting to log parabola. <br><b>Center</b>: Log parabola fits. <br><b>Right</b>: Fits for the pulsars, showing high latitude subset. <br><br> Averages: %(fit_quality_average)s %(badfit_check)s %(poorfit_table)s """ from scipy import stats fig, axx = plt.subplots(1, 3, figsize=(12, 6)) plt.subplots_adjust(left=0.1) s = self.df psr = np.asarray(s.psr, bool) fq = np.array(s.fitqual, float) beta = s.beta logparabola = (~psr) & (beta > 0.01) powerlaw = (~psr) & (beta.isnull() | (beta < 0.01)) self.tsbandcut = tsbandcut cut = np.array((s.band_ts > tsbandcut) & (fq > 0), bool) dom = np.linspace(xlim[0], xlim[1], 26) d = np.linspace(xlim[0], xlim[1], 51) delta = dom[1] - dom[0] chi2 = lambda x: stats.chi2.pdf(x, ndf) fudge = 1.0 # to scale, not sure why hilat = np.abs(self.df.glat) > 5 self.average = [0] * 4 i = 0 def tobool(a): return np.array(a, bool) for ax, label, cut_expr in zip(axx[:2], ( 'power law', 'log-normal', ), ('powerlaw', 'logparabola')): mycut = tobool(cut & eval(cut_expr)) count = sum(mycut) ax.hist(fq[mycut].clip(*xlim), dom, histtype='stepfilled', label=label + ' (%d)' % count) self.average[i] = fq[mycut].mean() i += 1 ax.plot(d, chi2(d) * count * delta / fudge, 'r', lw=2, label=r'$\mathsf{\chi^2\ ndf=%d}$' % ndf) ax.grid(grid_flag) ax.set_xlabel('fit quality') if legend_flag: ax.legend(prop=dict(size=10)) else: ax.set_title(label) def right(ax, label='exponential cutoff', cut_expr='psr'): mycut = cut & (psr) #tobool(cut&eval(cut_expr)) count = sum(mycut) if legend_flag: labels = [ label + ' (%d)' % count, label + ' [|b|>5] (%d)' % sum(mycut * hilat), r'$\mathsf{\chi^2\ ndf=%d}$' % ndf ] else: labels = ['all', '|b|>5', '_nolegend_'] ax.set_title(label) ax.hist(fq[tobool(mycut)].clip(*xlim), dom, histtype='stepfilled', label=labels[0]) ax.hist(fq[tobool(mycut & hilat)].clip(*xlim), dom, histtype='stepfilled', color='orange', label=labels[1]) self.average[i] = fq[tobool(mycut & hilat)].mean() self.average[i + 1] = fq[tobool(mycut & (~hilat))].mean() ax.plot(d, chi2(d) * count * delta / fudge, 'r', lw=2, label=labels[2]) ax.grid(grid_flag) ax.set_xlabel('fit quality') ax.legend(loc='upper right', prop=dict(size=10)) right(axx[2]) self.df['badfit2'] = np.array(self.df.badfit.values, bool) t = self.df.ix[(self.df.badfit2) & (self.df.ts > 10)].sort_index(by='roiname') print '%d sources with bad fits' % len(t) if len(t) > 0: print '%d sources with missing errors' % len(t) self.badfit = t[[ 'ts', 'freebits', 'badbits', 'pindex', 'beta', 'e0', 'roiname' ]] self.badfit_check = html_table( self.badfit, name=self.plotfolder + '/badfits', heading='<h4>%d Sources with missing errors</h4>' % len(t), float_format=FloatFormat(1)) else: self.badfit_check = '<p>All sources fit ok.' self.fit_quality_average = ', '.join( map( lambda x, n: '%s: %.1f' % (n, x), self.average, 'powerlaw logparabola expcutoff(hilat) expcutoff(lolat)'.split( ))) self.ndf = ndf print 'fit quality averages:', self.fit_quality_average if make_table: # Make table of the poor fits s['pull0'] = np.array( [x.pull[0] if x is not None else np.nan for x in s.sedrec]) t = s.ix[((s.fitqual > 30) | (np.abs(s.pull0) > 3)) & ( s.ts > 10 )]['ra dec glat fitqual pull0 ts modelname freebits index2 roiname' .split()].sort_index(by='roiname') #poorfit_csv = 'poor_spectral_fits.csv' #t.to_csv(poorfit_csv) #bs =sorted(list(set(t.roiname))) #print 'Wrote out list of poor fits to %s, %d with fitqual>30 or abs(pull0)>3, in %d ROIs' % (poorfit_csv, len(t), len(bs)) self.poorfit_table = html_table( t, name=self.plotfolder + '/poorfit', heading='<h4>Table of %d poor spectral fits</h4>' % len(t), float_format=FloatFormat(2), formatters=dict(ra=FloatFormat(3), dec=FloatFormat(3), ts=FloatFormat(0), index2=FloatFormat(3))) # flag sources that made it into the list self.df.flags[t.index] = np.asarray(self.df.flags[t.index], int) | 2 print '%d sources flagged (2) as poor fits' % len(t) return fig
def pulsar_spectra(self, index_min=0.0, index_max=2.5, cutoff_max=8000): """ Distributions for sources fit with PLSuperExpCutoff spectral model, mostly LAT pulsars For each plot, the subset with a poor fit is shown. %(pulsar_tail_check)s %(pulsar_fixed)s %(pulsar_b)s """ fig, axx = plt.subplots(1, 4, figsize=(14, 4)) plt.subplots_adjust(wspace=0.3, left=0.05, bottom=0.15) psrmodel = (self.df.ts > 10) & (self.df.modelname == 'PLSuperExpCutoff') t = self.df.ix[psrmodel]\ ['ts flux pindex cutoff e0 index2 index2_unc roiname freebits fitqual'.split()] t['eflux'] = t.flux * t.e0**2 * 1e6 badfit = t.fitqual > 30 def plot1(ax, efmin=1e-1, efmax=1e3): bins = np.logspace(np.log10(efmin), np.log10(efmax), 26) vals = t.eflux.clip(efmin, efmax) ax.hist(vals, bins) if sum(badfit) > 0: ax.hist(vals[badfit], bins, color='red', label='poor fit') plt.setp(ax, xscale='log', xlabel='energy flux', xlim=(efmin, efmax)) ax.grid() ax.legend(prop=dict(size=10)) def plot2(ax): bins = np.linspace(index_min, index_max, 26) vals = t.pindex.clip(index_min, index_max) ax.hist(vals, bins) if sum(badfit) > 0: ax.hist(vals[badfit], bins, color='red', label='poor fit') plt.setp(ax, xlabel='spectral index') ax.grid() ax.legend(prop=dict(size=10)) def plot3(ax): bins = np.linspace(0, cutoff_max / 1e3, 26) vals = t.cutoff.clip(0, cutoff_max) / 1e3 ax.hist(vals, bins) if sum(badfit) > 0: ax.hist(vals[badfit], bins, color='red', label='poor fit') plt.setp(ax, xlabel='cutoff energy (GeV)') ax.grid() ax.legend(prop=dict(size=10)) def plot4(ax, xlim=(0, cutoff_max)): xvals = t.cutoff.clip(*xlim) / 1e3 yvals = t.pindex.clip(index_min, index_max) ax.plot(xvals, yvals, 'o') if sum(badfit) > 0: ax.plot(xvals[badfit], yvals[badfit], 'or', label='poor fit') plt.setp( ax, xlabel='cutoff energy', ylabel='spectral index', xlim=(xlim[0] - 0.1, 1.03 * xlim[1] / 1e3), ylim=(index_min - 0.1, index_max + 0.1), ) ax.grid() ax.legend(loc='lower right', prop=dict(size=10)) for f, ax in zip(( plot1, plot2, plot3, plot4, ), axx.flatten()): f(ax) flags = self.df.flags tail_cut = (t.pindex <= index_min) | (t.pindex > index_max) | ( t.cutoff > cutoff_max) tails = t.ix[tail_cut].index flags[tails] += 1 ### bit 1 print '%d pulsar sources flagged (1) in tails of index or cutoff' % sum( tail_cut) if sum(tail_cut) > 0: tails = t[tail_cut][ 'ts eflux pindex cutoff freebits roiname'.split()] filename = 'pulsar_tails.html' html_file = self.plotfolder + '/%s' % filename #html = tails.sort_index(by='roiname').to_html(float_format=FloatFormat(2)) html = html_table(tails.sort_index(by='roiname'), float_format=FloatFormat(2)) open(html_file, 'w').write('<head>\n' + _html.style + '</head>\n<body>' + html + '\n</body>') self.pulsar_tail_check = '<p><a href="%s?skipDecoration">Table of %d sources on tails</a>: ' % ( filename, len(tails)) self.pulsar_tail_check += 'Criteria: require index between 0 and 2.5, cutoff<8 GeV' else: self.pulsar_tail_check = '<p>No sources on tails' # table of pulsars with b<1 tt = t[t.index2 < 1][ 'ts fitqual pindex cutoff index2 index2_unc'.split()] tt['significance'] = (1 - tt.index2) / tt.index2_unc self.pulsar_b = html_table( tt, name=self.plotfolder + '/pulsar_b', heading='<h4>Table of %d sources with b<1</h4>' % len(tt), float_format=FloatFormat(2)) print '%d pulsar sources with b<1' % len(tt) # table of fits with any fixed parame er other than b tt = t[((np.array(t.freebits, int) & 7) != 7)][ 'ts fitqual pindex cutoff freebits roiname'.split()].sort_index( by='roiname') if len(tt) > 0: print '%d pulsar-like sources with fixed parameters' % len(tt) self.pulsar_fixed = html_table( tt, name=self.plotfolder + '/pulsar_fixed', heading='<h4>%d pulsar-like sources with fixed parameters</h4>' % len(tt), float_format=FloatFormat(2)) else: self.pulsar_fixed = '' return fig
def non_psr_spectral_plots(self, index_min=1.0, index_max=3.5, beta_max=2.0): """ Plots showing spectral parameters for PowerLaw and LogParabola spectra Left: energy flux in eV/cm**2/s. This is the differential flux at the pivot energy <br> Center: the spectral index. <br> Right: the curvature index for the subset with log parabola fits. %(tail_check)s %(beta_check)s """ fig, axx = plt.subplots(1, 3, figsize=(12, 4)) plt.subplots_adjust(wspace=0.2, left=0.05, bottom=0.15) t = self.df.ix[ (self.df.ts > 10) & (self.df.modelname == 'LogParabola')][ 'ts flux pindex beta beta_unc freebits e0 roiname'.split()] t['eflux'] = t.flux * t.e0**2 * 1e6 ax = axx[0] [ ax.hist(t.eflux[t.ts > tscut].clip(4e-2, 1e2), np.logspace(-2, 2, 26), label='TS>%d' % tscut) for tscut in [10, 25] ] plt.setp(ax, xscale='log', xlabel='energy flux', xlim=(4e-2, 1e2)) ax.grid() ax.legend(prop=dict(size=10)) ax = axx[1] [ ax.hist(t.pindex[t.ts > tscut].clip(index_min, index_max), np.linspace(index_min, index_max, 26), label='TS>%d' % tscut) for tscut in [10, 25] ] plt.setp(ax, xlabel='spectral index') ax.grid() ax.legend(prop=dict(size=10)) ax = axx[2] [ ax.hist(t.beta[(t.ts > tscut) & (t.beta > 0.01)].clip(0, beta_max), np.linspace(0, beta_max, 26), label='TS>%d' % tscut) for tscut in [10, 25] ] plt.setp(ax, xlabel='beta') ax.grid() ax.legend(prop=dict(size=10)) # get tails tail_cut = (t.eflux < 5e-2) | ( (t.pindex < index_min) | (t.pindex > index_max)) & (t.beta == 0) | (t.beta > beta_max) | (t.beta < 0) if sum(tail_cut) > 0: tails = t[tail_cut]['ts eflux pindex beta freebits roiname'.split( )].sort_index(by='roiname') filename = 'non_pulsar_tails.html' html_file = self.plotfolder + '/%s' % filename #html = tails.sort_index(by='roiname').to_html(float_format=FloatFormat(2)) self.tail_check = html_table( tails, name=self.plotfolder + '/pulsar_tails', heading='<h4>Table of %d sources on tails</h4>' % len(tails), float_format=FloatFormat(2)) #open(html_file,'w').write('<head>\n'+ _html.style + '</head>\n<body>'+ html+'\n</body>') #self.tail_check = '<p><a href="%s?skipDecoration">Table of %d sources on tails</a>: '% (filename, len(tails)) self.tail_check += 'Criteria: require index between 1 and 3.5 for powerlaw, beta<2.0 for log parabola' # flag sources flags = self.df.flags tails = tails.index flags[tails] += 1 ### bit 1 print '%d sources flagged (1) in tails of flux, index, or beta' % len( tails) else: self.tail_check = '<p>No sources on tails' # check errors, especially that beta is at least 2 sigma self.beta_check = '' #beta_bad = (t.beta>0.001) & ((t.beta_unc==0) | (t.beta/t.beta_unc<2) | (t.freebits!=7)) #if sum(beta_bad)>0: # print '%d sources fail beta check' % sum(beta_bad) # self.beta_check = html_table(t[beta_bad]['ts beta beta_unc freebits roiname'.split()], # name=self.plotfolder+'/beta_check', # heading = '<h4>Table of %d sources failing beta 2-sigma check</h4>'%sum(beta_bad), # float_format=FloatFormat(2)) return fig
def new_candidates(self, in_4fgl=False): """Potential pulsar candidates Make a list of sources with the selections <ul> <li>not associated <li>not in 4FGL or withinn 0.5 deg of one <li>nearest 4FGL source is extended or has TS<1000 </ul> The plots are for sources from this list, showing the effects of subsequent cuts: <ol> <li>0.15 < curvature < 0.75 <li>pivot energy < 3 GeV <li>R95 < 15 arcmin </ol> <h4>%(candidate_table)s</h4> <br>A csv file of the above, including both cuts is <a href="../../%(pulsar_candidate_filename)s?download=true">here</a> """ # add info about 4FGL self.check4FGL(pattern=None) df = self.df # select subset not in 4FGL and not associated and not close to a 4FGL source and that the closest is very strong if in_4fgl: # alternative: only in 4FGL dfx = df.query('fl8y==True & aprob<0.8 & locqual<8 & psr==False') dfx['sname'] = self.gdf.loc[dfx.index, 'sname'] else: dfx = df.query( 'fl8y==False & aprob<0.8 & locqual<8 & distance>0.5 & other_extended==False & otherts<1000' ) # values to display ts = dfx.ts.astype(float).clip(0, 250) singlat = np.sin(np.radians(dfx.glat.astype(float))) curvature = dfx.curvature.astype(float).clip(0, 1) r95_arcmin = dfx.r95.astype(float) * 60 pivot = dfx.pivot_energy.astype(float) eflux = dfx.eflux.astype(float) #curvature selection curvature_cut = np.logical_and(curvature < 0.75, curvature > 0.15) r95_cut = r95_arcmin < 15 #7.9 pivot_cut = pivot < 3e3 cuts = [] cut_labels = ['curvature', 'pivot', 'R95'] cuts.append(curvature_cut) cuts.append(cuts[-1] & pivot_cut) cuts.append(cuts[-1] & r95_cut) label_info = dict( name='Source Name,click for link to SED', pindex='Spectral index, at pivot energy', pivot_energy='pivot energy,', curvature='curvature,at the pivot energy', r95='localization,in degrees, 95%', distance='distance,in degrees to nearest 4FGL source', otherid='4FGL nick name,', otherts='4FGL TS,', locqual='locqual,localization quality', ) dfcut = dfx[cuts[-1]][ '''ra dec ts glat pindex pivot_energy curvature locqual r95 pars errs distance otherid otherts'''.split()].sort_values( by='ts', ascending=False) if in_4fgl: dfcut['sname'] = self.gdf.loc[dfcut.index, 'sname'] self.df_pulsar_candidates_in_4fgl = dfcut self.candidate_table_in_4fgl = html_table( dfcut, label_info, heading='''<b>Table of {} pointlike sources not in {}, not assocated and with curvature selection</b>'''.format( self.fgl_name, len(dfcut)), name=self.plotfolder + '/candidates_in_4fgl', maxlines=20, float_format=(FloatFormat(2))) self.pulsar_candidate_filename_in_4fgl = self.plotfolder + '/pulsar_candidates_in_4fgl.csv' dfcut.to_csv(self.pulsar_candidate_filename_in_4fgl) else: self.df_pulsar_candidates = dfcut self.candidate_table = html_table( dfcut, label_info, heading='''<b>Table of {} pointlike sources not in {}, not assocated and with curvature selection</b>'''.format( self.fgl_name, len(dfcut)), name=self.plotfolder + '/candidates', maxlines=20, float_format=(FloatFormat(2))) self.pulsar_candidate_filename = self.plotfolder + '/pulsar_candidates.csv' dfcut.to_csv(self.pulsar_candidate_filename) fig, axx = plt.subplots(2, 3, figsize=(12, 10)) ax1, ax2, ax3, ax4, ax5, ax6 = axx.flatten() hkw = dict(histtype='step', lw=2) def doit(ax, x, bins, xlabel, xlog=False): ax.hist(x, bins, label='', **hkw) for i, cut in enumerate(cuts): ax.hist(x[cut], bins, label=cut_labels[i], **hkw) ax.set(xlabel=xlabel, xscale='log' if xlog else 'linear') ax.legend() doit(ax1, curvature, np.linspace(0, 1, 21), 'curvature') doit(ax2, pivot, np.logspace(np.log10(200), np.log10(2e4), 21), 'pivot energy', xlog=True) doit(ax3, r95_arcmin, np.linspace(0, 25, 26), 'R95 (arcmin)') doit(ax4, ts, np.logspace(1, np.log10(250), 25), 'TS', xlog=True) doit(ax5, singlat, np.linspace(-1, 1, 21), 'sin(b)') doit(ax6, eflux, np.logspace(np.log10(3e-2), np.log10(3), 21), 'energy flux', xlog=True) fig.tight_layout() fig.set_facecolor('white') return fig
def pulsar_check(self): """LAT pulsar check %(atable)s %(missing_table)s """ # compare with LAT pulsar catalog lat = self.lcatdf psrdf = self.df[self.df.psr].copy() diff = list(set(lat.index).difference(set(psrdf.index))) if len(diff) > 0: print 'Missing from model: {}'.format(diff) lat['ts'] = self.df[self.df.psr]['ts'] lat['aprob'] = self.df[self.df.psr]['aprob'] lat['ROI_index'] = [ Band(12).index(SkyDir(float(ra), float(dec))) for ra, dec in zip(lat.RAJ2000, lat.DEJ2000) ] lat['skydir'] = [ SkyDir(float(ra), float(dec)) for ra, dec in zip(lat.RAJ2000, lat.DEJ2000) ] lat['sourcedir'] = self.df.skydir[self.df.psr] lat['delta'] = [ np.degrees(s.difference(t)) if not type(t) == float else np.nan for s, t in zip(lat.skydir, lat.sourcedir) ] psrdf.loc[:, 'r95'] = 2.5 * np.sqrt( np.array(psrdf.a.values**2 + psrdf.b.values**2, float)) lat.loc[:, 'r95'] = psrdf.r95 far = lat.delta > 0.25 dc2names = set(self.lcatdf.index) tt = set(self.df.name[self.df.psr]) print 'Catalog entries not found:', list(dc2names.difference(tt)) missing = np.array([np.isnan(x) or x < 10. for x in lat.ts]) missing |= np.array((lat.aprob == 0) & (lat.ts < 1000)) missing_names = lat.index[missing] cols = 'RAJ2000 DEJ2000 ts delta r95 ROI_index'.split() self.latsel = latsel = pd.DataFrame(np.array( [lat[id][missing] for id in cols]), index=cols, columns=missing_names).T self.atable = '<h4>Compare with LAT pulsar catalog id: {}</h4>'.format( self.version) label_info = dict( ts='TS,Test Statistic from full fit', RAJ2000='RAJ2000, position of pulsar', delta='delta,distance to source position (deg)', r95='R95,95 radius (deg), NaN if not calculated', ROI_index='ROI Index,Index of the ROI, a HEALPix ring index') # Table of weak or not fit weak_df = latsel.query('ts<10') self.atable += html_table( weak_df, label_info, heading= '<p>{} LAT catalog entries with weak or no fit (TS<10); positioned at Pulsar' .format(len(weak_df)), name=self.plotfolder + '/weak', maxlines=20, float_format=(FloatFormat(2))) self.atable += html_table( latsel.query('ts>10'), label_info, heading= '<p>LAT catalog entries with nearby, but currently unassociated source', name=self.plotfolder + '/far', maxlines=20, float_format=(FloatFormat(2))) self.missing_table = 'All LAT pulsasrs are in the current model!' self.dfm = dfm = self.latsel[pd.isna(self.latsel.ts)].copy() if len(dfm > 0): dfm.pop('ts') dfm.pop('delta') dfm.rename(columns=dict(RAJ2000='ra', DEJ2000='dec'), inplace=True) close = tools.find_close(dfm, self.df) dfm.loc[:, 'distance'] = close.distance dfm.loc[:, 'pointlike_name'] = close.otherid self.missing_df = dfm self.missing_table = html_table( dfm, label_info, heading='<p>LAT catalog entries not in this model', name=self.plotfolder + '/missing', float_format=(FloatFormat(2)))