Example #1
0
    def get_data_to_write(self, start_sample, nsamp):
        """

        Read data to self.data, selects channels
        Optionally perform RFI filtering and zero-DM subtraction

        Args:

            start_sample (int): Start sample number to read from
            nsamp (int): Number of samples to read

        """
        data = self.your_object.get_data(start_sample, nsamp)
        data = data[:, self.chan_min:self.chan_max]
        if self.flag_rfi:
            mask = sk_sg_filter(
                data,
                self.your_object,
                self.nchans,
                self.sk_sig,
                self.sg_fw,
                self.sg_sig,
            )

            if self.your_object.your_header.dtype == np.uint8:
                data[:, mask] = np.around(np.mean(data[:, ~mask]))
            else:
                data[:, mask] = np.mean(data[:, ~mask])

        if self.zero_dm_subt:
            logger.debug("Subtracting 0-DM time series from the data")
            data = data - data.mean(1)[:, None]

        data = data.astype(self.your_object.your_header.dtype)
        self.data = data
Example #2
0
    def get_data_to_write(self, start_sample, nsamp):
        """

        Read data to self.data, selects channels
        Optionally perform RFI filtering and zero-DM subtraction

        Args:

            start_sample (int): Start sample number to read from
            nsamp (int): Number of samples to read

        """
        data = self.your_object.get_data(start_sample, nsamp, npoln=self.npoln)

        if self.npoln == 1:
            data = np.expand_dims(data, 1)

        # shape of data is (nt, npoln, nf)
        data = data[:, :, self.chan_min : self.chan_max]
        if self.flag_rfi:
            for i in range(data.shape[1]):
                data_to_flag = data[:, i, :]
                mask = sk_sg_filter(
                    data_to_flag,
                    self.your_object,
                    self.sk_sig,
                    self.sg_fw,
                    self.sg_sig,
                )

                if self.replacement_policy == "mean":
                    fill_value = np.mean(data_to_flag[:, ~mask])
                elif self.replacement_policy == "median":
                    fill_value = np.median(data_to_flag[:, ~mask])
                else:
                    fill_value = 0

                if self.your_object.your_header.nbits < 32:
                    fill_value = np.around(fill_value).astype(
                        self.your_object.your_header.dtype
                    )

                data[:, i, mask] = fill_value

        if self.zero_dm_subt:
            if self.npoln > 1:
                raise NotImplementedError(
                    "0-DM subtraction is implemented only for 1 output pol."
                )

            logger.debug("Subtracting 0-DM time series from the data.")
            min_value = np.iinfo(self.your_object.your_header.dtype).min
            max_value = np.iinfo(self.your_object.your_header.dtype).max
            nt, npoln, nf = data.shape
            ts = data.mean(-1)
            bp = data.mean(0).squeeze()

            for channel in range(nf):
                data[:, :, channel] = np.clip(
                    data[:, :, channel].astype("float32") - ts + bp[channel],
                    min_value,
                    max_value,
                )

        data = data.astype(self.your_object.your_header.dtype)

        # shape of data is (nt, npoln, nf)
        if self.highest_frequency_first and self.your_object.your_header.foff > 0:
            data = data[:, :, ::-1]
        self.data = data
Example #3
0
    def get_chunk(self, tstart=None, tstop=None, for_preprocessing=True):
        """
        Get a chunk of data. The data is saved in `self.data`.

        Args:
            tstart (float): start time of the chunk in seconds
            tstop (float): stop time of the chunk in seconds
            for_preprocessing (bool): if the data is to be preprocessed later. This will modify the number of samples
            read based on the width of the candidate

        """
        if tstart is None:
            tstart = (self.tcand - self.dispersion_delay() -
                      self.width * self.native_tsamp)
        if tstop is None:
            tstop = (self.tcand + self.dispersion_delay() +
                     self.width * self.native_tsamp)
        logger.debug(f"tstart is {tstart}")
        logger.debug(f"tstop is {tstop}")

        nstart = int(tstart / self.native_tsamp)
        nsamp = int((tstop - tstart) / self.native_tsamp)
        nsamp_read = nsamp

        if for_preprocessing:
            if self.width > 2 and nsamp_read // (self.width //
                                                 2) < self.min_samp:
                nsamp_read = self.min_samp * self.width // 2
                nstart_read = nstart - (nsamp_read - nsamp) // 2
            if nsamp_read < self.min_samp:
                nsamp_read = self.min_samp
                nstart_read = nstart - (nsamp_read - nsamp) // 2
            else:
                nstart_read = nstart
        else:
            nstart_read = nstart
        logging.debug(
            f"nstart_read is {nstart_read}, nsamp_read is {nsamp_read},"
            f"nstart is {nstart}, nsamp is {nsamp}")

        nspectra = int(self.your_header.nspectra)
        if nstart_read >= 0 and nstart_read + nsamp_read <= nspectra:
            logging.debug(
                f"All the data available in the file, no need to pad. \n"
                f"nstart_read({nstart_read})>=0 and \n"
                f"nstart_read({nstart_read})+nsamp_read({nsamp_read})<=nspectra({nspectra})"
            )
            data = self.get_data(nstart=nstart_read, nsamp=nsamp_read)
        elif nstart_read < 0:
            if nstart_read + nsamp_read <= nspectra:
                logging.debug(
                    f"nstart_read({nstart_read})<0 and nstart_read({nstart_read})\n"
                    f"+nsamp_read({nsamp_read})<=nspectra({nspectra})")
                logging.info("Padding with median in the beginning")
                d = self.get_data(nstart=0, nsamp=nsamp_read + nstart_read)
                dmedian = np.median(d, axis=0)
                data = (np.ones(
                    (nsamp_read, self.your_header.nchans),
                    dtype=self.your_header.dtype,
                ) * dmedian[None, :])
                data[-nstart_read:, :] = d
            else:
                logging.debug(
                    f"nstart_read({nstart_read})<0 and nstart_read({nstart_read})"
                    f"+nsamp_read({nsamp_read})>nspectra({nspectra})")
                logging.info(
                    "Padding with median in the beginning and the end")
                d = self.get_data(nstart=0, nsamp=nspectra)
                dmedian = np.median(d, axis=0)
                data = (np.ones(
                    (nsamp_read, self.your_header.nchans),
                    dtype=self.your_header.dtype,
                ) * dmedian[None, :])
                data[-nstart_read:-nstart_read + nspectra, :] = d
        else:
            logging.debug(
                f"nstart_read({nstart_read})>=0 and nstart_read({nstart_read})"
                f"+nsamp_read({nsamp_read})>nspectra({nspectra})")
            logging.info("Padding with median in the end")
            d = self.get_data(nstart=nstart_read, nsamp=nspectra - nstart_read)
            dmedian = np.median(d, axis=0)
            data = (np.ones((nsamp_read, self.your_header.nchans),
                            dtype=self.your_header.dtype) * dmedian[None, :])
            data[:nspectra - nstart_read, :] = d

        self.data = data.astype(self.your_header.dtype)

        if self.kill_mask.any():
            logger.info("Applying the kill mask")
            assert len(self.kill_mask) == self.data.shape[1]
            data_copy = self.data.copy()
            data_copy[:, self.kill_mask] = 0
            self.data = data_copy
            del data_copy

        if self.flag_rfi:
            data_copy = self.data.copy()
            mask = sk_sg_filter(
                data=data_copy,
                your_object=self,
                spectral_kurtosis_sigma=self.spectral_kurtosis_sigma,
                savgol_frequency_window=self.savgol_frequency_window,
                savgol_sigma=self.savgol_sigma,
            )
            self.rfi_mask = mask
            data_copy[:, self.rfi_mask] = 0
            self.data = data_copy
            del data_copy
        return self
Example #4
0
    def to_fil(self,
               nstart=None,
               nsamp=None,
               c=None,
               outdir=None,
               outname=None,
               flag_rfi=False,
               progress=None,
               sk_sig=4,
               sg_fw=15,
               sg_sig=4,
               zero_dm_subt=False):
        """
        Writes out a Filterbank File.

        Args:

            nstart: Start sample to read from

            nsamp: Number of samples to write

            c: Required frequency channel range [min_chan, max_chan] (excludes the higher channel number)

            outdir: Output directory for Filterbank file

            outname: Name of the Filterbank file to write to

            progress: Turn on/off progress bar

            flag_rfi: To turn on RFI flagging

            sk_sig: Sigma for spectral kurtosis filter

            sg_fw: Filter window for savgol filter

            sg_sig: Sigma for savgol filter

            zero_dm_subt: Enable zero-DM RFI excision

        """

        if c:
            min_c = int(np.min(c))
            max_c = int(np.max(c))
        else:
            min_c = 0
            max_c = len(self.your_obj.chan_freqs)

        chan_freq = self.your_obj.chan_freqs[min_c:max_c]
        nchans = len(chan_freq)

        # Calculate loop of spectra
        if not nstart:
            nstart = 0

        if not nsamp:
            nsamp = self.your_obj.your_header.native_nspectra

        interval = 4096 * 24
        if nsamp < interval:
            interval = nsamp

        if nsamp > interval:
            nloops = 1 + nsamp // interval
        else:
            nloops = 1
        nstarts = np.arange(nstart, interval * nloops, interval, dtype=int)
        nsamps = np.full(nloops, interval)
        if nsamp % interval != 0:
            nsamps = np.append(nsamps, [nsamp % interval])

        original_dir, orig_basename = os.path.split(
            self.your_obj.your_header.filename)
        if not outname:
            name, ext = os.path.splitext(orig_basename)
            if ext == '.fits':
                temp = name.split('_')
                if len(temp) > 1:
                    outname = '_'.join(temp[:-1]) + '_converted.fil'
                else:
                    outname = name + '_converted.fil'
            else:
                outname = name + '_converted.fil'

        if not outdir:
            outdir = original_dir

        # Read data
        for st, samp in tqdm.tqdm(zip(nstarts, nsamps),
                                  total=len(nstarts),
                                  disable=progress):
            logger.debug(
                f'Reading spectra {st}-{st + samp} in file {self.your_obj.your_header.filename}'
            )
            data = self.your_obj.get_data(st, samp)
            data = data[:, min_c:max_c]
            if flag_rfi:
                mask = sk_sg_filter(data, self.your_obj, sk_sig, nchans, sg_fw,
                                    sg_sig)

                if self.your_obj.your_header.dtype == np.uint8:
                    data[:, mask] = np.around(np.mean(data[:, ~mask]))
                else:
                    data[:, mask] = np.mean(data[:, ~mask])

            if zero_dm_subt:
                logger.debug('Subtracting 0-DM time series from the data')
                data = data - data.mean(1)[:, None]

            data = data.astype(self.your_obj.your_header.dtype)
            logger.info(
                f'Writing data from spectra {st}-{st + samp} in the frequency channel range {min_c}-{max_c} '
                f'to filterbank')
            write_fil(data,
                      self.your_obj,
                      nchans=nchans,
                      chan_freq=chan_freq,
                      outdir=outdir,
                      filename=outname,
                      nstart=nstart)
            logger.debug(
                f'Successfully written data from spectra {st}-{st + samp} to filterbank'
            )

        logging.debug(f'Read all the necessary spectra')
Example #5
0
    def to_fits(self,
                nstart=None,
                c=None,
                nsamp=None,
                npsub=-1,
                outdir=None,
                outname=None,
                progress=None,
                flag_rfi=False,
                sk_sig=4,
                sg_fw=15,
                sg_sig=4,
                zero_dm_subt=False):
        """
        Writes out a PSRFITS file

        Args:

            nstart: Start sample number to read from

            nsamp: Number of samples to read/write

            c: Required frequency channel range [min_chan, max_chan] (excludes the higher channel number)

            npsub: Number of spectra per subint

            outdir: Output directory for Filterbank file

            outname: Name of the PSRFITS file to write to

            progress: Turn on/off progress bar

            flag_rfi: To turn on RFI flagging

            sk_sig: Sigma for spectral kurtosis filter

            sg_fw: Filter window for savgol filter

            sg_sig: Sigma for savgol filter

            zero_dm_subt: Enable zero-DM RFI excision

        """

        tsamp = self.your_obj.your_header.tsamp

        if npsub == -1:
            npsub = int(1.0 / tsamp)
        else:
            pass

        if nsamp:
            if nsamp < npsub:
                npsub = nsamp

        if not outname:
            original_dir, orig_basename = os.path.split(
                self.your_obj.your_header.filename)
            name, ext = os.path.splitext(orig_basename)
            if ext == '.fits':
                temp = name.split('_')
                if len(temp) > 1:
                    outname = '_'.join(temp[:-1]) + '_converted.fits'
                else:
                    outname = name + '_converted.fits'
            else:
                outname = name + '_converted.fits'

        if not outdir:
            outdir = os.getcwd()

        outfile = outdir + '/' + outname

        if c:
            min_c = int(np.min(c))
            max_c = int(np.max(c))
        else:
            min_c = 0
            max_c = len(self.your_obj.chan_freqs)

        chan_freqs = self.your_obj.chan_freqs[min_c:max_c]
        nchans = len(chan_freqs)

        initialize_psrfits(outfile=outfile,
                           y=self.your_obj,
                           npsub=npsub,
                           nstart=nstart,
                           nsamp=nsamp,
                           chan_freqs=chan_freqs)

        nifs = self.your_obj.your_header.npol

        logger.info("Filling PSRFITS file with data")

        # Open PSRFITS file
        hdulist = fits.open(outfile, mode='update')
        hdu = hdulist[1]
        nsubints = len(hdu.data[:]['data'])

        # Loop through chunks of data to write to PSRFITS
        n_read_subints = 10
        if not nstart:
            nstart = 0
        logger.info(f'Number of subints to write {nsubints}')

        for istart in tqdm.tqdm(np.arange(0, nsubints, n_read_subints),
                                disable=progress):
            istop = istart + n_read_subints
            if istop > nsubints:
                istop = nsubints
            else:
                pass
            isub = istop - istart

            logger.info(
                f"Writing data to {outfile} from subint = {istart} to {istop}."
            )

            # Read in nread samples from filfile
            nread = isub * npsub
            data = self.your_obj.get_data(nstart=nstart, nsamp=nread)
            data = data[:, min_c:max_c]
            if flag_rfi:
                mask = sk_sg_filter(data, self.your_obj, sk_sig, nchans, sg_fw,
                                    sg_sig)

                if self.your_obj.your_header.dtype == np.uint8:
                    data[:, mask] = np.around(np.mean(data[:, ~mask]))
                else:
                    data[:, mask] = np.mean(data[:, ~mask])

            if zero_dm_subt:
                logger.debug('Subtracting 0-DM time series from the data')
                data = data - data.mean(1)[:, None]

            logger.debug(f'Shape of data array after get_data is {data.shape}')
            nstart += nread

            nvals = isub * npsub * nifs
            if data.shape[0] < nvals:
                logger.debug(
                    f'nspectra in this chunk ({data.shape[0]}) < nsubints * npsub * nifs ({nvals})'
                )
                logger.debug(f'Appending zeros at the end to fill the subint')
                pad_back = np.zeros((nvals - data.shape[0], data.shape[1]))
                data = np.vstack((data, pad_back))
            else:
                pass

            data = np.reshape(data, (isub, npsub, nifs, nchans))

            # If foff is negative, we need to flip the freq axis
            #            if foff < 0:
            #                logger.debug(f"Flipping band as {foff} < 0")
            #                data = data[:, :, :, ::-1]
            #            else:
            #                pass

            # Put data in hdu data array
            logger.debug(f'Writing data of shape {data.shape} to {outfile}.')
            hdu.data[istart:istop]['data'][:, :, :, :] = data[:].astype(
                self.your_obj.your_header.dtype)

            # Write to file
            hdulist.flush()

        logger.info(f'All spectra written to {outfile}')
        # Close open FITS file
        hdulist.close()