def write_ordered(fin, df, fout): '''Writes out a vdif file according to the order of DataFrame df ''' with vdif.open(fin, 'rb') as fr, vdif.open(fout, 'wb') as fw: for r in df.itertuples(): fr.seek(r.offset) frame = fr.read_frame(edv=2, verify=False) fw.write_frame(frame)
def split(f, delta_t=1, nfiles=None): with vdif.open(f, 'rb') as fr: fw = None previous_second = None starting_second = None basename = None while True: try: frame = fr.read_frame(edv=2, verify=False) except EOFError as e: print('saw exception {} in {}, terminating'.format( e, basename), file=sys.stderr) return except Exception as e: print('saw exception {}, skipping frame'.format(e), file=sys.stderr) continue seconds = frame.header['seconds'] if frame.header['invalid_data']: fw.write_frame(frame) continue if starting_second is None: starting_second = seconds if previous_second is None or seconds >= previous_second + delta_t: if nfiles is not None: if nfiles == 0: break nfiles -= 1 previous_second = seconds #head, tail = os.path.split(f) #basename, ext = os.path.splitext(tail) basename, ext = os.path.splitext( f) # write output in the same directory as input new = basename + '-' + str(seconds - starting_second) + ext if os.path.isfile(new): raise ValueError(new + ' already exists') fw = vdif.open(new, 'wb') print('t={} file {}'.format(seconds, new)) if seconds < previous_second: print('input was not strictly sorted by time: saw {} after {}'. format(seconds, previous_second), file=sys.stderr) fw.write_frame(frame)
def setup(self): self._fh = vdif.open(SAMPLE_VDIF) self.fh = SetAttribute(self._fh, frequency=311.25 * u.MHz + (np.arange(8.) // 2) * 16. * u.MHz, sideband=np.array(1), polarization=np.tile(['L', 'R'], 4))
def test_missing_frequency_sideband(self): fh = vdif.open(SAMPLE_VDIF) with Channelize(fh, self.n) as ct: with pytest.raises(AttributeError): ct.frequency with pytest.raises(AttributeError): ct.sideband
def test_function_tasks(self, task, sample_factor): """Test setting a channel to zero.""" # Load baseband file and get reference intensities. fh = vdif.open(SAMPLE_VDIF) ref_data = task(fh.read()) ft = Task(fh, task, sample_rate=fh.sample_rate * sample_factor) assert ft.shape[0] == fh.shape[0] * sample_factor # Apply to everything. data1 = ft.read() assert ft.tell() == ft.shape[0] assert (ft.time - ft.start_time - ft.shape[0] / ft.sample_rate) < 1*u.ns assert ft.dtype is ref_data.dtype is data1.dtype assert np.allclose(ref_data, data1) # Seeking and selective zeroing. ft.seek(-3, 2) assert ft.tell() == ft.shape[0] - 3 data2 = ft.read() assert data2.shape[0] == 3 assert np.allclose(ref_data[-3:], data2) ft.close()
def get_multifile_data(vbs, nif): vbsname = vbs + "_" + str(nif) disk2fileout = scriptdir + "/checkdata.vdif" ss = fbcmd("scan_set=" + vbsname + ":+2.0s:+" + extractiontime) if " does not exist" in ss: return [-1, 0, -1] sc = fbcmd("scan_check?") nbbcs = int(int(sc.split(":")[4]) / 2) fbcmd("disk2file=" + disk2fileout + ":::w") nwait = 0 time.sleep(0.25) # Wait for disk2file while True: stat = fbcmd("disk2file?") if "inactive" in stat: break if nwait > 5: print( "ERROR: Waited more than 5 sec for disk2file! Something is wrong, exiting..." ) sys.exit(1) time.sleep(1) # Wait for disk2file nwait += 1 # Read file fh = vdif.open( disk2fileout, 'rs', sample_rate=sample_rate * u.MHz) # Need to specify sample rate, too short to autodetect. start_time = fh.info()['start_time'] # Ensure file pointer is at beginning of file fh.seek(0) # Read all data until end ifdata = fh.read() # Close infile fh.close() return [nbbcs, ifdata, start_time]
def test_channelizetask(self): """Test channelization task.""" fh = vdif.open(SAMPLE_VDIF) ct = Channelize(fh, self.n) # Channelize everything. data1 = ct.read() assert ct.tell() == ct.shape[0] assert (ct.time - ct.start_time - ct.shape[0] / ct.sample_rate) < 1*u.ns assert ct.dtype is self.ref_data.dtype is data1.dtype assert np.all(self.ref_data == data1) # Seeking and selective decode. ct.seek(-3, 2) assert ct.tell() == ct.shape[0] - 3 data2 = ct.read() assert data2.shape[0] == 3 assert np.all(self.ref_data[-3:] == data2) ct.seek(-2, 2) with pytest.raises(EOFError): ct.read(10) # Quick test of channel sanity check in __init__. with pytest.raises(AssertionError): ct = Channelize(fh, 400001) ct.close()
def test_wrong_polarization_vdif(self): with vdif.open(SAMPLE_VDIF) as fh: with pytest.raises(AttributeError): Power(fh) fh.polarization = np.array(['L', 'R'] * 4) with pytest.raises(ValueError): # Too many. Power(fh)
def test_taskbase(self, n, samples_per_frame): """Test properties and methods of TaskBase, including self-consistency with varying ``n`` and ``samples_per_frame``. """ fh = vdif.open(SAMPLE_VDIF) rt = ReshapeTime(fh, n, samples_per_frame=samples_per_frame) # Check sample pointer. assert rt.sample_rate == fh.sample_rate / n nsample = samples_per_frame * (fh.shape[0] // n // samples_per_frame) assert rt.shape == (nsample, n) + fh.sample_shape assert rt.size == np.prod(rt.shape) assert rt.ndim == fh.ndim + 1 assert rt.tell() == 0 assert rt.tell(unit='time') == rt.time == rt.start_time assert abs(rt.stop_time - rt.start_time - (nsample * n) / fh.sample_rate) < 1*u.ns # Get reference data. ref_data = fh.read(nsample * n).reshape((-1, n) + fh.sample_shape) # Check sequential reading. data1 = rt.read() assert rt.tell() == rt.shape[0] assert abs(rt.time - rt.start_time - rt.shape[0] / rt.sample_rate) < 1*u.ns assert rt.dtype is data1.dtype assert np.all(ref_data == data1) # Check seeking and selective decode. rt.seek(-7, 2) assert rt.tell() == rt.shape[0] - 7 data2 = rt.read() assert data2.shape[0] == 7 assert np.all(data2 == ref_data[-7:]) sec_offset = -0.25 * u.ms rt.seek(sec_offset, 'end') assert rt.tell() == rt.shape[0] + self.convert_time_offset( sec_offset, rt.sample_rate) assert rt.tell(unit=u.ms) == (rt.tell() / rt.sample_rate).to(u.ms) current_offset = rt.tell() rt.seek(2, 'current') assert rt.tell() == current_offset + 2 time_offset = rt.start_time + 0.13 * u.ms rt.seek(time_offset, 'start') assert rt.tell() == self.convert_time_offset( time_offset - rt.start_time, rt.sample_rate) # Check reading to external array. out = np.empty((11,) + rt.sample_shape) rt.seek(0) rt.read(out=out) assert np.all(out == ref_data[:11]) # Check closing. rt.close() assert fh.closed with pytest.raises(ValueError): rt.read(1)
def test_wrong_axes(self): with vdif.open(SAMPLE_VDIF) as fh: with pytest.raises(ValueError): self.get_reshape_and_transpose(fh, (4, 2), (1, 0)) with pytest.raises(ValueError): self.get_reshape_and_transpose(fh, (4, 2), (2, 3)) with pytest.raises(ValueError): self.get_reshape_and_transpose(fh, (4, 2), (1, 1))
def get_fh(): """Get sample VDIF file with correct frequency, sideband, polarization.""" fh = vdif.open(SAMPLE_VDIF) # Add frequency, sideband, and polarization information by hand. fh.frequency = 311.25 * u.MHz + (np.arange(8.) // 2) * 16. * u.MHz fh.sideband = 1 fh.polarization = np.tile(['L', 'R'], 4) return fh
def test_frequency_sideband_propagation(self): fh = vdif.open(SAMPLE_VDIF) # Add frequency and sideband information by hand. # (Note: sideband is incorrect; just for testing purposes) fh.frequency = 311.25 * u.MHz + (np.arange(8.) // 2) * 16. * u.MHz fh.sideband = np.tile([-1, +1], 4) rt = ReshapeTime(fh, 256) assert np.all(rt.sideband == fh.sideband) assert np.all(rt.frequency == fh.frequency)
def test_missing_frequency_sideband_polarization(self): fh = vdif.open(SAMPLE_VDIF) st = Square(fh) with pytest.raises(AttributeError): st.frequency with pytest.raises(AttributeError): st.sideband with pytest.raises(AttributeError): st.polarization
def test_basic(self): fh = vdif.open(SAMPLE_VDIF) ref_data = fh.read().reshape((-1, 4, 2)).transpose(0, 2, 1) tt = self.get_reshape_and_transpose(fh, (4, 2), (2, 1)) assert tt.start_time == fh.start_time assert tt.sample_rate == fh.sample_rate data = tt.read() assert_array_equal(data, ref_data) tt.close()
def test_frequency_sideband_setting(self): fh = vdif.open(SAMPLE_VDIF) # Add frequency and sideband information by hand, broadcasting it. # (Note: sideband is incorrect; just for testing purposes) frequency_in = 311.25 * u.MHz + (np.arange(8.) // 2) * 16. * u.MHz sideband_in = np.tile([-1, +1], 4) rt = ReshapeTime(fh, 256, frequency=frequency_in, sideband=sideband_in) assert np.all(rt.sideband == sideband_in) assert np.all(rt.frequency == frequency_in)
def test_invalid(self): fh = vdif.open(SAMPLE_VDIF) with pytest.raises(ValueError): SquareHat(fh, -1) with pytest.raises(ValueError): SquareHat(fh, 10, offset=-1) with pytest.raises(ValueError): SquareHat(fh, 10, samples_per_frame=9) with warnings.catch_warnings(record=True) as w: SquareHat(fh, 10, samples_per_frame=11) assert any('inefficient' in str(_w) for _w in w)
def test_frequency_sideband_propagation(self): fh = vdif.open(SAMPLE_VDIF) # Add frequency and sideband information by hand. # (Note: sideband is incorrect; just for testing purposes) fh.frequency = 311.25 * u.MHz + (np.arange(8.) // 2) * 16. * u.MHz fh.sideband = np.tile([-1, +1], 4) fh.polarization = np.tile(['L', 'R'], 4) st = Square(fh) assert np.all(st.frequency == fh.frequency) assert np.all(st.sideband == fh.sideband) assert np.all(st.polarization == np.tile(['LL', 'RR'], 4))
def test_method_task(self, samples_per_frame): fh = vdif.open(SAMPLE_VDIF) count = fh.shape[0] if samples_per_frame is not None: count = (count // samples_per_frame) * samples_per_frame ref_data = zero_every_8th_sample(fh.read(count)) with Task(fh, zero_every_8th_complex, samples_per_frame=samples_per_frame) as ft: data1 = ft.read() assert np.all(data1 == ref_data)
def test_wrong_polarization(self): fh = dada.open(SAMPLE_DADA) with pytest.raises(ValueError): Power(fh, polarization=['L']) with pytest.raises(ValueError): Power(fh, polarization=[['L'], ['R']]) with pytest.raises(ValueError): Power(fh, polarization=[['L'], ['L']]) fh = vdif.open(SAMPLE_VDIF) with pytest.raises(ValueError): Power(fh)
def test_basics(self): fh = vdif.open(SAMPLE_VDIF) sh = SquareHat(fh, 3) expected_size = ((fh.shape[0] - 2) // 4) * 4 assert sh.sample_rate == fh.sample_rate assert sh.shape == (expected_size,) + fh.shape[1:] assert abs(sh.start_time - fh.start_time - 2. / fh.sample_rate) < 1. * u.ns raw = fh.read(12) expected = raw[:-2] + raw[1:-1] + raw[2:] data = sh.read(10) assert np.all(data == expected)
def open(self, fname): """Open data with appropriate baseband reader""" if self.dtype == 'vdif': self.fh = vdif.open(fname, mode='rs', sample_rate=self.sample_rate) if self.dtype == 'mark4': self.fh = mark4.open(fname, mode='rs', decade=2010, ntrack=self.ntrack, sample_rate=self.sample_rate, thread_ids=self.thread_ids) if self.dtype == 'mark5b': self.fh = mark5b.open(fname, mode='rs', nchan=self.nIF, ref_mjd=57000, sample_rate=self.sample_rate, thread_ids=self.thread_ids)
def test_reshape(self, sample_shape): fh = vdif.open(SAMPLE_VDIF) ref_data = fh.read().reshape((-1, ) + sample_shape) rt = Reshape(fh, sample_shape=sample_shape) assert fh.sample_shape == (8, ) assert rt.sample_shape == sample_shape assert rt.start_time == fh.start_time assert rt.sample_rate == fh.sample_rate data = rt.read() assert_array_equal(data, ref_data) rt.close()
def read_vdif(vdif_files, sample_rate, max=2**15): """ Reads the data from vdif_files. Sets up self.background, self.NFREQ, and self.NTIME. Will handle the cases where vdif_files is a single vdif_file and the cases where vdir_files is a list of files. If there is more than 2**15 samples contained in vdif_files, the data will be binned in the time dimension to reduce the size of the data. Args: vdif_files (list or str): The path to the vdif file to be read, or a list of paths to different vdif files to be read. If a list is given the vdifs will be opened in the order they are given in the list. sample_rate (Quantity): The sampling rate of the vdifs. max (int): The upper limit on final # of samples to contain the data in. The largest factor of the number of samples in vdif_files <= max will be the actual size of the data. If there are no factors of the number of samples less than max than the data will not be binned. Returns: background (ndarray): The data contained in the vdifs. files (str): The names of the vdif files with prefix/suffix removed, joined by dashes. """ try: temp = vdif_files.split() # Will split if str, fail if list vdif_files = [vdif_files] # Convert to a list with 1 element except AttributeError: pass # Do nothing because vdif_files is already a list vdif_files = sf.open(vdif_files) with vdif.open(vdif_files, 'rs', sample_rate=sample_rate) as fh: # if fh.shape[0] > max: # Read data in chunks, reduce size to be <= max # complete_data, new_rate = chunk_read(fh, max, sample_rate) # else: # Read data all at once complete_data = fh.read() # Get the power from data complete_data = (np.abs(complete_data)**2).mean(1) complete_data -= np.nanmean(complete_data, axis=1, keepdims=True) complete_data = complete_data.T new_rate = sample_rate # Get the input filenames without their path or suffix files = vdif_files.files files = ['.'.join(x.split('/')[-1].split('.')[:-1]) for x in files] file_names = '-'.join(files) return complete_data, file_names, new_rate
def __init__(self, raw_files, blocksize, samplerate, fedge, fedge_at_top, time_offset=0.0 * u.s, dtype='cu4bit,cu4bit', comm=None): """ARO data acquired with a CHIME correlator, saved in VDIF format. Files are 2**16 time, 2 pol, 1024 freq, at 800MHz / (2*1024) samplerate Read with baseband VDIF package, need byte to sample conversions for folding """ fh = vdif.open(raw_files[0], 'rs', sample_rate=samplerate) self.time0 = fh.tell(unit='time') self.npol = fh.nthread nchan = fh.nchan self.samplerate = samplerate self.fedge_at_top = fedge_at_top if fedge.isscalar: self.fedge = fedge f = fftshift(fftfreq(nchan, (2. / samplerate).to(u.s).value)) * u.Hz if fedge_at_top: self.frequencies = fedge - (f - f[0]) else: self.frequencies = fedge + (f - f[0]) else: assert fedge.shape == (nchan, ) self.frequencies = fedge if fedge_at_top: self.fedge = self.frequencies.max() else: self.fedge = self.frequencies.min() self.dtsample = (nchan * 2 / samplerate).to(u.s) if comm is None or comm.rank == 0: print("In AROCHIMEVdifData, calling super") print("Start time: ", self.time0.iso) super(AROCHIMEVdifData, self).__init__(raw_files, blocksize, dtype, nchan, comm=comm) if self.filesize % self.fh_raw.header0.framesize != 0: raise ValueError("File size is not an integer number of packets") self.filesize = (self.filesize // self.fh_raw.header0.framesize * self.fh_raw.header0.payloadsize)
def test_frequency_sideband_polarization_propagation2(self): fh = vdif.open(SAMPLE_VDIF) # Add different frequency, sideband, and polarization information. # (Note: these are incorrect; just for testing purposes.) fh.frequency = 311.25 * u.MHz + (np.arange(8.) // 4) * 16. * u.MHz fh.sideband = np.tile([-1, 1], 4) fh.polarization = np.tile(['L', 'L', 'R', 'R'], 2) with self.get_reshape_and_transpose(fh, (2, 2, 2), (-1, -3, -2)) as tt: assert tt.frequency.shape == (2, 1) assert np.all(tt.frequency == fh.frequency[::4].reshape(2, 1)) assert tt.sideband.shape == (2, 1, 1) assert np.all(tt.sideband == fh.sideband[:2].reshape(2, 1, 1)) assert tt.polarization.shape == (2, ) assert np.all(tt.polarization == fh.polarization[:4:2])
def open(self, number=0): """Open a new file in the sequence. Parameters ---------- file_number : int The number of the file to open. Default is 0, i.e., the first one. """ if number != self.current_file_number: self.close() self.fh_raw = vdif.open(self.files[number], 'rs', sample_rate=(1/self.dtsample).to(u.Hz)) self.current_file_number = number return self.fh_raw
def test_channelize_frequency_real(self): """Test frequency calculation.""" fh = vdif.open(SAMPLE_VDIF) # Add frequency information by hand for now. fh.frequency = 311.25 * u.MHz + (np.arange(8.) // 2) * 16. * u.MHz # Note: sideband is actually incorrect for this VDIF file; # this is for testing only. fh.sideband = np.tile([-1, +1], 4) ct = Channelize(fh, self.n) assert np.all(ct.sideband == self.ref_sideband) assert np.all(ct.frequency == self.ref_frequency) ct.close()
def open(self, number=0): """Open a new file in the sequence. Parameters ---------- file_number : int The number of the file to open. Default is 0, i.e., the first one. """ if number != self.current_file_number: self.close() self.fh_raw = vdif.open(self.files[number], 'rs', sample_rate=(1 / self.dtsample).to(u.Hz)) self.current_file_number = number return self.fh_raw
def test_basetaskbase(self): fh = vdif.open(SAMPLE_VDIF) mh = Multiply(fh, 2.) # Check sample pointer. assert mh.sample_rate == fh.sample_rate assert mh.shape == fh.shape assert mh.size == np.prod(mh.shape) assert mh.ndim == fh.ndim assert mh.tell() == 0 assert mh.tell(unit='time') == mh.time == mh.start_time assert mh.stop_time == fh.stop_time expected = fh.read() * 2. data = mh.read() assert np.all(data == expected) assert mh.time == fh.stop_time
def setup(self): """Pre-calculate channelized data.""" self.n = 1024 with vdif.open(SAMPLE_VDIF) as fh: self.ref_start_time = fh.start_time self.ref_sample_rate = fh.sample_rate data = fh.read() self.raw_data = data last_sample = self.n * (data.shape[0] // self.n) part = data[:last_sample].reshape((-1, self.n) + data.shape[1:]) FFT = get_fft_maker() rfft = FFT(shape=part.shape, dtype=part.dtype, axis=1, sample_rate=self.ref_sample_rate) self.ref_data = rfft(part) self.ref_sideband = np.tile([-1, 1], 4) self.ref_frequency = ((311.25 + 16 * (np.arange(8) // 2)) * u.MHz + self.ref_sideband * rfft.frequency)
def test_taskbase_exceptions(self): """Test exceptions in TaskBase.""" with vdif.open(SAMPLE_VDIF) as fh: rt = ReshapeTime(fh, 1024, samples_per_frame=3) # Check that reading beyond the bounds of the data leads to an # error. rt.seek(0, 2) with pytest.raises(EOFError): rt.read(1) rt.seek(-2, 'end') with pytest.raises(EOFError): rt.read(10) rt.seek(-2, 'end') with pytest.raises(EOFError): rt.read(out=np.empty((5,) + rt.sample_shape)) rt.seek(-4, 'start') with pytest.raises(OSError): rt.read(1) # Check invalid whence. with pytest.raises(ValueError): rt.seek(1, 'now') with pytest.raises(ValueError): rt.seek(1, 3) # Check external array shape mismatch raises an error. with pytest.raises(AssertionError): rt.read(out=np.empty(3)) # Check missing frequency/sideband definitions with pytest.raises(AttributeError): rt.frequency with pytest.raises(AttributeError): rt.sideband with pytest.raises(ValueError): ReshapeTime(fh, 1024, samples_per_frame=3, frequency=np.arange(4.)*u.GHz) with pytest.raises(ValueError): ReshapeTime(fh, 1024, samples_per_frame=3, sideband=np.ones((2, 8), dtype=int))
def __init__(self, source_strm, dm, reference_frequency=None, samples_per_frame=None, frequencies=None, sideband=None, fft=None): if isinstance(source_strm, list): self.fh = vdif.open(source_strm) else: self.fh = source_strm self.dedispersed = Dedisperse(self.fh, dm, reference_frequency=reference_frequency, samples_per_frame=samples_per_frame, frequency=frequencies, sideband=sideband) self.squared = Square(self.dedispersed) self.outstream = self.squared
def __init__(self, raw_files, blocksize, samplerate, fedge, fedge_at_top, time_offset=0.0*u.s, dtype='cu4bit,cu4bit', comm=None): """ARO data acquired with a CHIME correlator, saved in VDIF format. Files are 2**16 time, 2 pol, 1024 freq, at 800MHz / (2*1024) samplerate Read with baseband VDIF package, need byte to sample conversions for folding """ fh = vdif.open(raw_files[0], 'rs', sample_rate=samplerate) self.time0 = fh.tell(unit='time') self.npol = fh.nthread nchan = fh.nchan self.samplerate = samplerate self.fedge_at_top = fedge_at_top if fedge.isscalar: self.fedge = fedge f = fftshift(fftfreq(nchan, (2./samplerate).to(u.s).value)) * u.Hz if fedge_at_top: self.frequencies = fedge - (f-f[0]) else: self.frequencies = fedge + (f-f[0]) else: assert fedge.shape == (nchan,) self.frequencies = fedge if fedge_at_top: self.fedge = self.frequencies.max() else: self.fedge = self.frequencies.min() self.dtsample = (nchan * 2 / samplerate).to(u.s) if comm is None or comm.rank == 0: print("In AROCHIMEVdifData, calling super") print("Start time: ", self.time0.iso) super(AROCHIMEVdifData, self).__init__(raw_files, blocksize, dtype, nchan, comm=comm) if self.filesize % self.fh_raw.header0.framesize != 0: raise ValueError("File size is not an integer number of packets") self.filesize = (self.filesize // self.fh_raw.header0.framesize * self.fh_raw.header0.payloadsize)
def open(self): self.fh = vdif.open(self.flist[self.findex], 'rs', sample_rate=self.sample_rate)
path = sys.argv[3] filelist = glob.glob('%s*vdif' % (path)) filelist = np.sort(filelist) N = int(T//tfile)+1 Npcore = int(T // (tfile*Csize)) + 1 for i in range(rank*Npcore + dstart, (rank+1)*Npcore + dstart): if (i - dstart) > N: print("Rank %s exiting" % (rank)) break print("Rank %s on %s of %s" % ( rank, (i - rank*Npcore - dstart), Npcore) ) fn = filelist[i] fh = vdif.open(fn, mode='rs', sample_rate=sample_rate) print("reading") d = fh.read(size) # Turn into chunked dask array print("de-dispersing") d = da.from_array(d, chunks=(d.shape[0],d.shape[1], 32)) d = da.fft.fft(d, axis=0) d *= dd_coh[:,np.newaxis] d = da.fft.ifft(d, axis=0) # De-Chunk the array, to allow efficient rechaping print("reshaping and forming waterfall") d = d.rechunk(d.shape) d = abs(d).reshape(-1, binf, 2, 1024).mean(1) w[(i-dstart)*ntbin:(i-dstart+1)*ntbin] = d