def __init__(self, test=False): """Initializing.""" self.test = test self.set_file_name() # Setup database self.db = False self.step = 0.1 self.rounding = 2 # For parallel processes self.temp_path = None if self.test: self.step = 0.1 if os.path.exists(self.file_name): os.remove(self.file_name) if os.path.exists(self.file_name) and self.test is False: self.db = True else: # Calculations take quite some time # Provide a way for people to quit try: self.create_table() except KeyboardInterrupt: pprint('Losing all progress in calculations') os.remove(self.file_name) if self.temp: os.remove(self.temp_path) sys.exit()
def __init__(self, H_0=67.74, W_m=0.3089, W_v=0.6911, test=False): """Initializing.""" self.H_0 = H_0 self.W_m = W_m self.W_v = W_v self.test = test self.set_file_name() # Setup database self.db = False self.step = 0.00001 self.z_max = 6.5 if self.test: self.step = 0.001 self.z_max = 6.5 if os.path.exists(self.file_name): os.remove(self.file_name) if os.path.exists(self.file_name) and self.test is False: self.db = True else: # Calculations take quite some time # Provide a way for people to quit try: self.create_table() except KeyboardInterrupt: pprint('Losing all progress in calculations') os.remove(self.file_name) sys.exit()
def match_surveys(self, interrupt=True): """Match up frbs with surveys.""" self.df['survey'] = None # Add single survey instruments # Bit rough, but will work in a pinch def cond(telescope=None, mode=None): mask = (self.df.survey.isnull()) if telescope: mask &= (self.df.telescope.str.lower() == telescope.lower()) if mode: mask &= (self.df.telescope_mode.str.lower() == mode.lower()) return mask # Should be accurate up till 2020 self.df.at[cond('WSRT', 'Apertif'), 'survey'] = 'wsrt-apertif' self.df.at[cond('askap', 'Incoherent'), 'survey'] = 'askap-incoh' self.df.at[cond('askap', '900MHz'), 'survey'] = 'askap-incoh' self.df.at[cond('askap', 'Coherent'), 'survey'] = 'askap-coh' self.df.at[cond('askap', 'FlysEye'), 'survey'] = 'askap-fly' self.df.at[cond('CHIME'), 'survey'] = 'chime-frb' self.df.at[cond('MOST'), 'survey'] = 'utmost-1d' self.df.at[cond('VLA'), 'survey'] = 'vla-realfast' self.df.at[cond('GMRT'), 'survey'] = 'gmrt' self.df.at[cond('SRT'), 'survey'] = 'srt' self.df.at[cond('GBT', 'GUPPI'), 'survey'] = 'gbt-guppi' self.df.at[cond('EFFELSBERG'), 'survey'] = 'effelsberg' self.df.at[cond('OVRO', 'DSA-10'), 'survey'] = 'dsa10' self.df.at[cond('FAST'), 'survey'] = 'fast-crafts' self.df.at[cond('LPA'), 'survey'] = 'pushchino' self.df.at[cond('Arecibo', 'ALFA'), 'survey'] = 'arecibo-palfa' self.df.at[cond('Arecibo', 'L-Wide'), 'survey'] = 'arecibo-l-wide' # Parkes is more tricky c = 'photometry_date' pmsurv = cond('Parkes') & (self.df.back_end == 'AFB-MB20') self.df.at[pmsurv, 'survey'] = 'parkes-pmsurv' htru = cond('Parkes') & (self.df[c].dt.year > 2008) htru &= (self.df[c].dt.year < 2015) self.df.at[htru, 'survey'] = 'parkes-htru' # This means the default survey for Parkes is superb! superb = cond('Parkes') & (self.df[c].dt.year >= 2015) self.df.at[superb, 'survey'] = 'parkes-superb' # Manually add some tricky ones self.df.at[self.df.name == 'FRB 20010125A', 'survey'] = 'parkes-swmb' self.df.at[self.df.name == 'FRB 20150807A', 'survey'] = 'parkes-party' ppta = [171209, 171209, 180309, 180309, 180311, 180311, 180714, 180714] for frb in ppta: mask = self.df.name == f'FRB 20{frb}A' self.df.at[mask, 'survey'] = 'parkes-ppta' # Check whether any FRBs have not yet been assigned no_surveys = self.df['survey'].isnull() if interrupt and any(no_surveys): pprint(f'There are {sum(no_surveys)} FRBs with undefined surveys') m = 'TNS().match_surveys() in frbpoppy/tns.py should be updated' pprint(m)
def check(self, path): """Perform checks on path.""" # Just convient to have files ending in a slash if path[-1] != '/': path += '/' if not os.path.exists(path): pprint(f"Creating directory {path}") os.makedirs(path) return path
def gen_time(self): """Generate time stamps.""" # Only relevant for repeaters if not self.repeaters: return pprint('Adding burst times') self.frbs.time = self.time_func() # Set size for all other parameters self.shape = self.frbs.time.shape pprint('Finished adding burst times')
def generate(self): """Generate a full CosmicPopulation.""" pprint(f'Generating {self.name} population') self.gen_index() self.gen_dist() self.gen_time() self.gen_direction() self.gen_gal_coords() self.gen_dm() self.gen_w() self.gen_lum() self.gen_si() pprint(f'Finished generating {self.name} population')
def get_data(self): """Read in populations.""" # Read in files for f in self.files: # Check whether file exists if os.path.isfile(f): try: df = unpickle(f).frbs.to_df() except ValueError: pprint(f'Unpacking {f} seemed to have failed.') continue if '.' in f: name = '.'.join(f.split('/')[-1].split('.')[:-1]) if '_for_plotting' in name: name = name.split('_for_plotting')[0] if len(name) > 15: name = name.split('_')[-1] else: name = f # If things haven't worked if df is None: m = 'Skipping population {} - contains no sources'.format(f) pprint(m) continue # Downsample population size if it's too large if df.shape[0] > 10000: pprint(f'Downsampling population {f} (else too big to plot)') df = df.sample(n=10000) df['color'] = self.colours[self.n_df] df['lum_bol'] = df['lum_bol'] / 1e30 # Sidestepping Bokeh issue if df.empty: m = 'Skipping population {} - contains no sources'.format(f) pprint(m) continue else: self.dfs.append(df) self.labels.append(name) self.n_df += 1 # Add on tns if self.tns: df = TNS(frbpoppy=True).df # Filter by survey if wished if isinstance(self.tns, str): if not df[df.survey == self.tns].empty: df = df[df.survey == self.tns] elif not df[df.telescope == self.tns].empty: df = df[df.telescope == self.tns] else: m = 'Your chosen input for tns is not found.' raise ValueError(m) df['color'] = self.colours[len(self.dfs)] self.dfs.append(df) self.labels.append(f'tns {self.tns}')
def __str__(self): """How to print the class.""" # Set up title f = '{:20.19} {:>10} {:>10} {:>10}\n' t = f.format(self.name, 'Days', f'{self.object_type.title()}s', '%') line = '-' * len(t.split('\n')[-2].strip()) + '\n' t += line def r(value, d=4): """Round a value""" return round(value, d) def per(value): """Calculate the percentage.""" return r(value / self.tot * 100) # Format rates days = r(self.days) t += f.format('Cosmic Population', days, r(self.tot), 100) t += f.format('Too late', days, r(self.late), per(self.late)) t += f.format('Outside survey', days, r(self.out), per(self.out)) t += f.format('Outside pointings', days, r(self.pointing), per(self.pointing)) t += f.format('Too faint', days, r(self.faint), per(self.faint)) t += f.format('Detected', days, r(self.det), per(self.det)) t += f.format('/Gpc^3', 365.25, r(self.vol, 2), '-') t += f.format('Expected', r(self.exp, 4), 1, '-') t += line return pprint(t, output=False)
def run(self): """Run the generating and surveying of a large population.""" pprint(f'Running a large {self.base_name} population') d = divmod(self.pop.n_srcs, self.max_size) sizes = [self.max_size for i in range(d[0])] if d[1] != 0: sizes.append(d[1]) self.uids = [str(uuid.uuid4())[:8] for s in sizes] for i, n in enumerate(tqdm(sizes, desc='Subpopulations')): pop = self.pop pop.n_srcs = n pop.uid = self.uids[i] pop.generate() for surv in self.surveys: surv_pop = SurveyPopulation(pop, surv, scale_by_area=False) surv_pop.uid = pop.uid surv_pop.save()
def match_surveys(self, interrupt=True): """Match up frbs with surveys.""" # Merge survey names surf = os.path.join(self.path, 'paper_survey.csv') self._surveys = pd.read_csv(surf) cols = ['frb_name', 'pub_description'] self.df = pd.merge(self.df, self._surveys, on=cols, how='left') # Clean up possible unnamed columns self.df = self.df.loc[:, ~self.df.columns.str.contains('unnamed')] # Add single survey instruments # Bit rough, but will work in a pinch def cond(t): return (self.df.telescope == t) & (self.df.survey.isnull()) self.df.at[cond('wsrt-apertif'), 'survey'] = 'wsrt-apertif' self.df.at[cond('askap'), 'survey'] = 'askap-incoh' self.df.at[cond('chime'), 'survey'] = 'chime' self.df.at[cond('srt'), 'survey'] = 'srt' self.df.at[cond('effelsberg'), 'survey'] = 'effelsberg' self.df.at[cond('gbt'), 'survey'] = 'guppi' self.df.at[cond('fast'), 'survey'] = 'crafts' # Check whether any FRBs have not yet been assigned no_surveys = self.df['survey'].isnull() if interrupt and any(no_surveys): cols = ['pub_description', 'frb_name'] ns_df = self.df[no_surveys].drop_duplicates(subset=cols, keep='first') pprint('It seems there are new FRBs!') m = " - Frbcat doesn't know which *survey* was running when the " m += "FRB was seen" pprint(m) m = " - To use these recent detections, please link the FRB to a " m += "survey by:" pprint(m) pprint(' - Adding these frbs to {}'.format(surf)) for i, r in ns_df[cols].iterrows(): title, name = r if isinstance(title, str): title = title.replace('\n', '') print(f'"{title}","{name}",""')
def __init__(self, cosmic_pop, survey, scat=False, scin=False, mute=False, scale_by_area=True): """ Run a survey to detect FRB sources. Args: cosmic_pop (Population): Population class of FRB sources to observe survey (Survey): Survey class with which to observe scat (bool, optional): Whether to include scattering in signal to noise calculations. scin (bool, optional): Whether to apply scintillation to observations. mute (bool): Whether to suppress printing to terminal scale_by_area (bool): Whether to scale detection rates to the sky area visible to a survey. Only relevant for one-offs. """ if not mute: pprint(f'Surveying {cosmic_pop.name} with {survey.name}') # Stops RuntimeWarnings about nan values np.warnings.filterwarnings('ignore') # Check whether CosmicPopulation has been generated try: if cosmic_pop.frbs.ra is None: m = 'You may have forgotten to generate your CosmicPopulation' raise ValueError(m) except AttributeError: m = 'You probably switched the population and survey in the' m += 'input of SurveyPopulation' raise ValueError(m) # Set up population Population.__init__(self) # Set attributes self.name = f'{cosmic_pop.name}_{survey.name}' self.vol_co_max = cosmic_pop.vol_co_max self.n_days = cosmic_pop.n_days self.repeaters = cosmic_pop.repeaters self.frbs = deepcopy(cosmic_pop.frbs) self.source_rate = Rates('source') if self.repeaters: self.burst_rate = Rates('burst') self.scat = scat self.scin = scin self.survey = survey self.scale_by_area = scale_by_area # Set survey attributes if not available if survey.n_days is None: survey.n_days = self.n_days # Calculations differ for repeaters if self.repeaters is True and scin is True: m = 'Scintillation is currently not implemented for ' m += 'RepeaterPopulations' raise ValueError(m) # For convenience frbs = self.frbs sr = self.source_rate sr.tot = cosmic_pop.n_srcs if self.repeaters: br = self.burst_rate br.tot = self.frbs.time.size # Bursts which are too late have already been removed self.n_brst_pr_src = np.count_nonzero(~np.isnan(self.frbs.time), 1) br.late += br.tot - np.sum(self.n_brst_pr_src) sr.late += sr.tot - len(self.n_brst_pr_src) # Check whether source is in region region_mask = survey.in_region(frbs.ra, frbs.dec, frbs.gl, frbs.gb) frbs.apply(region_mask) # Keep track of detection numbers sr.out = np.sum(~region_mask) if self.repeaters: br.out = np.sum(self.n_brst_pr_src[~region_mask]) self.n_brst_pr_src = self.n_brst_pr_src[region_mask] # Calculate dispersion measure across single channel frbs.t_dm = survey.calc_dm_smear(frbs.dm) # Set scattering timescale if scat: frbs.t_scat = survey.calc_scat(frbs.dm) # Calculate total temperature frbs.T_sky, frbs.T_sys = survey.calc_Ts(frbs.gl, frbs.gb) # Calculate effective pulse width frbs.w_eff = survey.calc_w_eff(frbs.w_arr, frbs.t_dm, frbs.t_scat) # Calculate peak flux density frbs.s_peak = survey.calc_s_peak(frbs.si, frbs.lum_bol, frbs.z, frbs.dist_co, frbs.w_arr, frbs.w_eff, f_low=cosmic_pop.f_min, f_high=cosmic_pop.f_max) # Calculations differ whether dealing with repeaters or not if self.repeaters: self.det_repeaters() else: self.det_oneoffs() # Prevent additional memory usage self.survey = None
def plot(*pops, files=[], tns=False, show=True, mute=True, port=5006, print_command=False): """ Plot populations with bokeh. Has to save populations before plotting. Args: *pops (Population, optional): Add the populations you would like to see plotted files (list, optional): List of population files to plot. tns (bool, optional): Whether to plot tns parameters. Defaults to True show (bool, optional): Whether to display the plot or not. Mainly used for debugging purposes. Defaults to True. mute (bool): Show output from Bokeh or not port (int): The port on which to launch Bokeh print_command (bool): Whether to show the command do_plot is running """ if len(pops) > 0: # Save populations for pop in pops: if type(pop) == str: name = pop else: # Check whether empty population if pop.n_sources() < 1: pprint(f'Skipping {pop.name} population as no sources') continue pop.name = pop.name.lower() if '_for_plotting' not in pop.name: pop.name += '_for_plotting' name = pop.name pop.save() # Save location file_name = name + '.p' out = os.path.join(paths.populations(), file_name) files.append(out) # Command for starting up server command = 'nice -n 19'.split(' ') if show: command.extend('bokeh serve --show'.split(' ')) else: command.append('python3') # Command for changing port if port != 5006: command.append(f'--port={port}') # Add script path script = 'plot.py' out = os.path.join(os.path.dirname(__file__), script) command.append(out) # For the arguments command.append('--args') # Add tns command.append('-tns') if tns is False: command.append('False') if tns is True: command.append('True') elif type(tns) == str and len(tns) > 0: command.append(f'{tns}') # Add in populations for f in files: command.append(f'"{f}"') # Check whether plotting is needed if len(files) == 0 and tns is False: pprint('No populations to plot') return # Let people know what's happening pprint('Plotting populations') if print_command: pprint(' '.join(command)) pprint('Press Ctrl+C to quit') # Add method to gracefully quit plotting try: with open(os.devnull, 'w') as f: if mute: out = f else: out = None subprocess.run(command, stderr=out, stdout=out) except KeyboardInterrupt: print(' ') sys.exit()
curdoc().title = 'frbpoppy' curdoc().add_root(L) # Parse system arguments # (I know ArgumentParser is nicer, but bokeh only works with argv) args = sys.argv # Whether to plot the tns population if '-tns' in args: tns = args[args.index('-tns') + 1] if tns == 'True': tns = True elif tns == 'False': tns = False else: frcat = True # Which files to plot files = [] for a in args: a = a.strip('"') if a.endswith('.p'): files.append(a) # Check whether populations have been given as input if len(files) == 0: pprint('Nothing to plot: plot arguments are empty') else: Plot(files=files, tns=tns)
def create_table(self, parallel=True): """Create a lookup table for dispersion measure.""" # Connect to database conn = sqlite3.connect(self.file_name) c = conn.cursor() # Set array of coordinates gls = np.arange(-180., 180. + self.step, self.step).round(1) gbs = np.arange(-90., 90. + self.step, self.step).round(1) dist = 0.1 # [Gpc] gls = gls.astype(np.float32) gbs = gbs.astype(np.float32) # Create database c.execute('create table dm ' + '(gl real, gb real, dm_mw real)') # Give an update on the progress m = [ 'Creating a DM lookup table', ' - Only needs to happen once', ' - Unfortunately pretty slow', ' - Prepare to wait for ~1.5h (4 cores)', ' - Time given as [time_spent<time_left] in (hh:)mm:ss', 'Starting to calculate DM values' ] for n in m: pprint(n) n_opt = len(gls) * len(gbs) options = np.array(np.meshgrid(gls, gbs)).T.reshape(-1, 2) dm_mw = np.zeros(len(options)).astype(np.float32) def dm_tot(i, dm_mw): gl, gb = options[i] dm_mw[i] = go.ne2001_dist_to_dm(dist, gl, gb) if parallel: temp_path = os.path.join(paths.models(), 'universe/') + 'temp.mmap' self.temp_path = temp_path # Make a temp memmap to have a sharedable memory object temp = np.memmap(temp_path, dtype=dm_mw.dtype, shape=len(dm_mw), mode='w+') # Parallel process in order to populate array r = range(n_opt) j = min([4, os.cpu_count() - 1]) print(os.cpu_count()) Parallel(n_jobs=j)(delayed(dm_tot)(i, temp) for i in tqdm(r)) # Map results r = np.concatenate((options, temp[:, np.newaxis]), axis=1) results = map(tuple, r.tolist()) # Delete the temporary directory and contents try: os.remove(temp_path) except FileNotFoundError: print(f'Unable to remove {temp_path}') else: for i in tqdm(range(n_opt)): dm_tot(i, dm_mw) # Save results to database dm_mw = dm_mw.astype(np.float32) r = np.concatenate((options, dm_mw[:, np.newaxis]), axis=1) results = map(tuple, r.tolist()) pprint(' - Saving results') c.executemany('insert into dm values (?,?,?)', results) # Make for easier searching c.execute('create index ix on dm (gl, gb)') # Save conn.commit() pprint('Finished DM table')
def create_table(self): """Create a lookup table for distances.""" m = [ 'Creating a distance table', ' - Only needs to happen once', ' - May take up to 2m on a single core' ] for n in m: pprint(n) # Connect to database conn = sqlite3.connect(self.file_name) c = conn.cursor() H_0 = self.H_0 W_m = self.W_m W_v = self.W_v W_k = 1.0 - W_m - W_v # Omega curvature if W_k != 0.0: pprint('Careful - Your cosmological parameters do not sum to 1.0') zs = np.arange(0, self.z_max + self.step, self.step) # Create database t = 'real' par = f'(z {t}, dist {t}, vol {t}, dvol {t}, cdf_sfr {t}, cdf_smd {t})' s = f'create table distances {par}' c.execute(s) results = [] pprint(' - Calculating parameters at various redshifts') conv = go.Redshift(zs, H_0=H_0, W_m=W_m, W_v=W_v) dists = conv.dist_co() vols = conv.vol_co() # Get dV dvols = np.zeros_like(vols) dvols[1:] = np.diff(vols) pprint(' - Calculating Star Formation Rate') # Get pdf sfr pdf_sfr = sfr(zs) * dvols cdf_sfr = np.cumsum(pdf_sfr) # Unnormalized cdf_sfr /= cdf_sfr[-1] pprint(' - Calculating Stellar Mass Density') # Get pdf csmd pdf_smd = smd(zs, H_0=H_0, W_m=W_m, W_v=W_v) * dvols cdf_smd = np.cumsum(pdf_smd) # Unnormalized cdf_smd /= cdf_smd[-1] results = np.stack((zs, dists, vols, dvols, cdf_sfr, cdf_smd)).T pprint(' - Saving values to database') # Save results to database data = map(tuple, results.tolist()) c.executemany('insert into distances values (?,?,?,?,?,?)', data) # Make for easier searching # I don't really understand SQL index names... c.execute('create index ix on distances (z)') c.execute('create index ixx on distances (dist)') c.execute('create index ixxx on distances (vol)') c.execute('create index ixxxx on distances (dvol)') c.execute('create index ixxxxx on distances (cdf_sfr)') c.execute('create index ixxxxxx on distances (cdf_smd)') # Save conn.commit() pprint('Finished distance table')
def merge(self): """Merge populations.""" pprint('Merging populations') self.pops = [] for s in self.surveys: pops = [] files = [f'{self.base_name}_{s.name}_{uid}' for uid in self.uids] for f in files: pops.append(unpickle(f)) # Main population mp = pops[0] # Merge each parameter for attr in mp.frbs.__dict__.keys(): parm = getattr(mp.frbs, attr) if type(parm) is np.ndarray: parms = [] for pop in pops: parms.append(getattr(pop.frbs, attr)) try: merged_parm = np.concatenate(parms, axis=0) except ValueError: # Check maximum size values should be padded to max_size = max([p.shape[1] for p in parms]) new_parms = [] # Ensure matrices are the same shapes by padding them for p in parms: if p.shape[1] != max_size: padded_p = np.zeros((p.shape[0], max_size)) padded_p[:] = np.nan padded_p[:, :p.shape[1]] = p new_parms.append(padded_p) else: new_parms.append(p) merged_parm = np.concatenate(new_parms, axis=0) setattr(mp.frbs, attr, merged_parm) # Add up detections for pop in pops[1:]: mp.source_rate.faint += pop.source_rate.faint mp.source_rate.late += pop.source_rate.late mp.source_rate.out += pop.source_rate.out mp.source_rate.det += pop.source_rate.det if mp.repeaters: mp.burst_rate.faint += pop.burst_rate.faint mp.burst_rate.late += pop.burst_rate.late mp.burst_rate.out += pop.burst_rate.out mp.burst_rate.det += pop.burst_rate.det mp.burst_rate.pointing += pop.burst_rate.pointing # Recalculate detection rates mp.calc_rates(s) # Save the main population as one big population mp.uid = None mp.save() # Remove all of the smaller ones for f in files: p = paths.populations() + f'{f}.p' os.remove(p) self.pops.append(mp)
pop.set_emission_range(low=10e7, high=10e9) pop.set_lum(model='powerlaw', low=1e40, high=1e45, power=-0.8) pop.set_w(model='lognormal', mean=6.3e-3, std=.6) pop.set_si(model='constant', value=-0.4) if pop.repeaters: pop.set_time(model='poisson', rate=9) if generate: pop.generate() return pop if __name__ == '__main__': # Quick test whether everything seems to be working or not import os import matplotlib.pyplot as plt pop = CosmicPopulation(1e4) pop.generate() frbs = pop.frbs for arg in frbs.__dict__: pprint(f'Plotting {arg}') values = getattr(frbs, arg) if values is not None: plt.hist(values, bins=50) plt.xlabel(arg) p = f'../tests/plots/{arg}.png' p = os.path.join(os.path.abspath(os.path.dirname(__file__)), p) plt.savefig(p) plt.clf()