def riptide_search(bursts, pmin=1, pmax=50, ts_bin_width=0.05, nbins_profile=40, nopbar=False): """ Periodicity search by evaluating the fraction of folded profile without any detectable activity, as used in Rajwade et al (2020) :param bursts: List or array of burst MJDs :param pmin: Minimum period to search (in units of days) :param pmax: Maximum period to search (in units of days) :param ts_bin_width: Time resolution for binning the burst MJDs :param nbins_profile: Number of bins in the folded profile :param nopbar: Disable progress bar :return: continuous_frac, periods """ pmin = pmin * 24 * 60 * 60 pmax = pmax * 24 * 60 * 60 ts_arr = np.linspace(np.min(bursts), np.max(bursts), int((np.max(bursts) - np.min(bursts)) / ts_bin_width)) hist, edges = np.histogram(bursts, bins=ts_arr) bin_mids = (edges[1:] + edges[:-1]) / 2 hist[hist >= 1] = 1 tsamp = ts_bin_width * 24 * 60 * 60 ts = TimeSeries(hist * bin_mids, tsamp) fs = np.linspace(1 / pmax, 1 / pmin, int((pmax - pmin) / max(bin_mids))) periods = 1 / fs valid_period_mask = periods / nbins_profile > ts.tsamp if valid_period_mask.sum() < len(periods): periods = periods[valid_period_mask] logging.warning( f'Period/nbins should be greater than tsamp. Not all periods in the given range are valid. ' f'Selecting the valid periods from {np.min(periods)/(24*60*60)} days for search.' ) continuous_frac = [] for p in tqdm.tqdm(periods, disable=nopbar): folded = ts.fold(p, bins=nbins_profile, subints=1) continuous_frac.append(get_continuous_frac(folded)) arg = np.argmax(continuous_frac) logging.info( f'Max continuous fraction without data is {continuous_frac[arg]} ' f'at a period of {periods[arg]/(24*60*60)} days') return np.array(continuous_frac), periods / (24 * 60 * 60)
def generate_data_presto(outdir, basename, tobs=128.0, tsamp=256e-6, period=1.0, dm=0.0, amplitude=20.0, ducy=0.05): """ Generate some time series data with a fake signal, and save it in PRESTO inf/dat format in the specified output directory. Parameters ---------- outdir : str Path to the output directory basename : str Base file name (not path) under which the .inf and .dat files will be saved. **kwargs: self-explanatory """ ### IMPORTANT: seed the RNG to get reproducible results ### np.random.seed(0) ts = TimeSeries.generate( tobs, tsamp, period, amplitude=amplitude, ducy=ducy, stdnoise=1.0 ) inf_text = INF_TEMPLATE.format(basename=basename, nsamp=ts.nsamp, tsamp=tsamp, dm=dm) inf_path = os.path.join(outdir, f"{basename}.inf") dat_path = os.path.join(outdir, f"{basename}.dat") with open(inf_path, 'w') as fobj: fobj.write(inf_text) ts.data.tofile(dat_path)
def test_presto(): fname = os.path.join(DATA_DIR, 'fake_presto.inf') ts = TimeSeries.from_presto_inf(fname) refdata = np.arange(16) # what is supposed to be in the data assert ts.nsamp == 16 assert ts.tsamp == 64e-6 assert ts.data.dtype == np.float32 assert np.allclose(ts.data, refdata)
def test_sigproc(): refdata = np.arange(16) # what is supposed to be in the data filenames = [ 'fake_sigproc_float32.tim', 'fake_sigproc_uint8.tim', 'fake_sigproc_int8.tim', ] for fname in filenames: fname = os.path.join(DATA_DIR, fname) ts = TimeSeries.from_sigproc(fname) assert ts.nsamp == 16 assert ts.tsamp == 64e-6 assert ts.data.dtype == np.float32 assert np.allclose(ts.data, refdata) # Check that trying to read 8-bit SIGPROC data without a 'signed' # header key raises an error with raises(ValueError): fname = os.path.join(DATA_DIR, 'fake_sigproc_uint8_nosignedkey.tim') ts = TimeSeries.from_sigproc(fname)
def test_generate(): length = 10.0 # s tsamp = 0.01 # s period = 1.0 # s amplitude = 25.0 # Generate noiseless data to check its amplitude ts = TimeSeries.generate(length, tsamp, period, amplitude=amplitude, stdnoise=0) assert ts.length == length assert ts.tsamp == tsamp assert ts.data.dtype == np.float32 assert np.allclose(sum(ts.data ** 2) ** 0.5, amplitude, atol=FLOAT_ATOL)
def test_numpy_binary(): refdata = np.arange(16) tsamp = 64e-6 def check_ts(ts): assert ts.nsamp == refdata.size assert ts.tsamp == tsamp assert ts.data.dtype == np.float32 assert np.allclose(ts.data, refdata) ts = TimeSeries.from_numpy_array(refdata, tsamp) check_ts(ts) with tempfile.NamedTemporaryFile(suffix='.npy') as f: # re-creates the file, still gets deleted on exiting context mgr np.save(f.name, refdata) ts = TimeSeries.from_npy_file(f.name, tsamp) check_ts(ts) with tempfile.NamedTemporaryFile(suffix='.bin') as f: # re-creates the file, still gets deleted on exiting context mgr refdata.astype(np.float32).tofile(f.name) ts = TimeSeries.from_binary(f.name, tsamp) check_ts(ts)
def test_presto(): def check_data(ts, refdata): assert ts.nsamp == 16 assert ts.tsamp == 64e-6 assert ts.data.dtype == np.float32 assert np.allclose(ts.data, refdata) # The actual data expected to be in all test .dat files refdata = np.arange(16) fname = os.path.join(DATA_DIR, 'fake_presto_radio.inf') ts = TimeSeries.from_presto_inf(fname) check_data(ts, refdata) fname = os.path.join(DATA_DIR, 'fake_presto_radio_breaks.inf') ts = TimeSeries.from_presto_inf(fname) check_data(ts, refdata) # Calling TimeSeries.from_presto_inf() on X-ray and Gamma data should raise a warning # about the noise stats being non-Gaussian with warns(UserWarning): fname = os.path.join(DATA_DIR, 'fake_presto_xray.inf') ts = TimeSeries.from_presto_inf(fname) check_data(ts, refdata)
def test_serialization(): length = 10.0 # s tsamp = 1.0e-3 # s period = 1.0 # s amplitude = 25.0 stdnoise = 1.0 ts = TimeSeries.generate(length, tsamp, period, amplitude=amplitude, stdnoise=stdnoise) with tempfile.NamedTemporaryFile(suffix='.json') as f: save_json(f.name, ts) tscopy = load_json(f.name) assert ts.tsamp == tscopy.tsamp assert ts.nsamp == tscopy.nsamp assert ts.length == tscopy.length assert np.allclose(ts.data, tscopy.data, atol=FLOAT_ATOL)
def test_ffa_search_no_downsampling(): """ Having period_min = bins_min * tsamp used to raise an error in v0.2.1, where the code complained about a downsampling factor not being > 1 """ length = 200.0 tsamp = 1e-3 period = 1.0 amplitude = 20.0 ts = TimeSeries.generate(length, tsamp, period, amplitude=amplitude) bins_min = 800 bins_max = 1200 period_min = bins_min * tsamp period_max = bins_max * tsamp ffa_search(ts, period_min=period_min, period_max=period_max, bins_min=bins_min, bins_max=bins_max)
def test_methods(): """ NOTE: This tests that the code runs, but not the output data quality, i.e. if dereddening removes low-frequency noise well """ length = 10.0 # s tsamp = 1.0e-3 # s period = 1.0 # s amplitude = 25.0 stdnoise = 1.0 tsorig = TimeSeries.generate(length, tsamp, period, amplitude=amplitude, stdnoise=stdnoise) ts = tsorig.copy() ### Normalisation inplace / out of place ### tscopy = ts.copy() ts = ts.normalise() tscopy.normalise(inplace=True) assert np.allclose(ts.data.mean(), 0.0, atol=FLOAT_ATOL) assert np.allclose(ts.data.std(), 1.0, atol=FLOAT_ATOL) assert np.allclose(ts.data, tscopy.data, atol=FLOAT_ATOL) ### Dereddening inplace / out of place ### tscopy = ts.copy() ts = ts.deredden(width=0.5, minpts=51) tscopy.deredden(width=0.5, minpts=51, inplace=True) assert np.allclose(ts.data, tscopy.data, atol=FLOAT_ATOL) # De-reddening should turn constant data into zeros tsconst = TimeSeries.generate(length, tsamp, period, amplitude=0, stdnoise=0) tsconst._data += 42.42 assert np.allclose(tsconst.deredden(0.5, minpts=51).data, 0.0, atol=FLOAT_ATOL) ### Downsampling ## dsfactor = 10 ts = tsorig.downsample(dsfactor) tscopy = tsorig.copy() tscopy.downsample(dsfactor, inplace=True) assert ts.tsamp == tsorig.tsamp * dsfactor assert ts.nsamp == tsorig.nsamp // dsfactor assert ts.length == tsorig.length assert tscopy.tsamp == tsorig.tsamp * dsfactor assert tscopy.nsamp == tsorig.nsamp // dsfactor assert tscopy.length == tsorig.length with raises(ValueError): # stricly < 1 ts = tsorig.downsample(0.55) with raises(ValueError): # excessive ts = tsorig.downsample(tsorig.nsamp * 10) ### Folding ### bins = 100 # Fold with nsubs = None (default to number of periods that fit in data) X10 = tsorig.fold(1.0, bins, subints=None) assert X10.shape == (10, bins) # Fold with nsubs < number of periods X2 = tsorig.fold(1.0, bins, subints=2) assert X2.shape == (2, bins) # Fold with nsubs = number of periods that fit in data # This is a special case where internally fold() has to avoid downsampling along the time axis m = int(length / period) Xm = tsorig.fold(1.0, bins, subints=m) # Fold into a single 1D profile array prof = tsorig.fold(1.0, bins, subints=1) # All methods should return the same folded profile assert np.allclose(prof, X2.sum(axis=0), atol=FLOAT_ATOL) assert np.allclose(prof, X10.sum(axis=0), atol=FLOAT_ATOL) assert np.allclose(prof, Xm.sum(axis=0), atol=FLOAT_ATOL) # Too many requested subints with raises(ValueError): Xerr = tsorig.fold(1.0, bins, subints=1000000) # subints can't be < 1 with raises(ValueError): Xerr = tsorig.fold(1.0, bins, subints=0) # Too many requested bins with raises(ValueError): Xerr = tsorig.fold(1.0, 1000000, subints=None) # Period too long with raises(ValueError): Xerr = tsorig.fold(1.0e6, bins, subints=None) # Period too short with raises(ValueError): Xerr = tsorig.fold(1.0e-6, bins, subints=None)
os.makedirs(dict['FOLD_DIR']) ############################################################################ # Profile code execution. prog_start_time = time.time() print('Parsing .dat files using glob string: %s' % (dict['glob_basename'])) inf_list = sorted(glob.glob(dict['DAT_DIR'] + dict['glob_basename'] + '.inf')) Nfiles = len(inf_list) print('No. of .dat files to process = %d \n' % (Nfiles)) for i in range(Nfiles): basename = inf_list[i].split('.inf')[0].split(dict['DAT_DIR'])[-1] print('File: %s' % (basename)) # Read in dedispersed time-series as a riptide TimeSeries object. timeseries = TimeSeries.from_presto_inf(inf_list[i]) tsamp = timeseries.tsamp # Sampling time (s) nsamp = timeseries.nsamp # No. of samples (s) times = np.arange(nsamp) * tsamp # 1D array of times (s) print('Sampling time (s) = %.4f ms' % (tsamp * 1e3)) print('No. of samples = %d' % (nsamp)) # Read DM value from file name. if 'DM' in inf_list[i]: DM = float(inf_list[i].split('DM')[1].split('.inf')[0]) basename = basename.split('DM')[0] + 'DM%06.1f' % (DM) else: DM = None # Detrend the time-series. timeseries = timeseries.deredden(width=dict['rmed_width'])
def test_ffa_search(): # NOTE: we chose a length long enough so that running the # 'periodogram pruning' function was actually necessary # (and thus the function gets properly covered by the tests) length = 200.0 tsamp = 0.001 period = 1.0 amplitude = 20.0 ts = TimeSeries.generate(length, tsamp, period, amplitude=amplitude) bins_min = 240 bins_max = 260 period_min = 0.8 * period period_max = 1.2 * period tsdr, pgram = ffa_search(ts, period_min=period_min, period_max=period_max, bins_min=bins_min, bins_max=bins_max) # check trial periods are increasing assert all(np.maximum.accumulate(pgram.periods) == pgram.periods) assert pgram.snrs.shape == (len(pgram.periods), len(pgram.widths)) assert pgram.metadata == ts.metadata == tsdr.metadata assert pgram.tobs == length assert all(pgram.freqs == 1.0 / pgram.periods) # Test that running with deredden = False and already_normalised = True # returns a reference to the input TimeSeries (data left untouched) # This is how ffa_search() is called by the pipeline tsdr, pgram = ffa_search(ts, period_min=period_min, period_max=period_max, bins_min=bins_min, bins_max=bins_max, already_normalised=True, deredden=False) assert id(tsdr) == id(ts) ### Periodogram serialization ### with tempfile.NamedTemporaryFile(suffix='.json') as f: save_json(f.name, pgram) f.flush() pgram_copy = load_json(f.name) assert np.allclose(pgram.snrs, pgram_copy.snrs) assert np.allclose(pgram.periods, pgram_copy.periods) assert np.allclose(pgram.widths, pgram_copy.widths) assert pgram.metadata == pgram_copy.metadata ### Periodogram plotting ### plt.switch_backend('Agg') fig = plt.figure(figsize=(20, 5), dpi=100) pgram.plot() with tempfile.NamedTemporaryFile(suffix='.png') as fobj: plt.savefig(fobj.name) plt.close(fig) # Same with iwidth = 0 fig = plt.figure(figsize=(20, 5), dpi=100) pgram.plot(iwidth=0) with tempfile.NamedTemporaryFile(suffix='.png') as fobj: plt.savefig(fobj.name) plt.close(fig)