Ejemplo n.º 1
0
 def clean_file(self, middle, modes):
     params = self.params
     file_name = (params['input_root'] + middle + params['input_end'])
     # Output parameters.
     Writer = core.fitsGBT.Writer(feedback=self.feedback)
     out_filename = (params['output_root'] + middle + params['output_end'])
     band_inds = params["IFs"]
     Reader = core.fitsGBT.Reader(file_name, feedback=self.feedback)
     n_bands = len(Reader.IF_set)
     if not band_inds:
         band_inds = range(n_bands)
     # Number of bands we acctually process.
     n_bands_proc = len(band_inds)
     if not band_inds:
         band_inds = range(n_bands)
     # Number of bands we acctually process.
     n_bands_proc = len(band_inds)
     # Get the key that will group this file with other files.
     key = get_key(middle)
     # Read one block to figure out how many polarizations and channels
     # there are.
     Data = Reader.read(0, 0)
     n_pol = Data.dims[1]
     n_cal = Data.dims[2]
     n_chan = Data.dims[3]
     for ii in range(n_bands_proc):
         Blocks = Reader.read((), ii)
         this_band_modes = modes[ii, ...]
         for Data in Blocks:
             clean_data(Data, this_band_modes)
             Writer.add_data(Data)
     # Write the data back out.
     utils.mkparents(out_filename)
     Writer.write(out_filename)
Ejemplo n.º 2
0
    def execute(self) :
        """Process all data."""
        
        # You have access to the input parameters through the dictionary
        # self.params.
        params = self.params
        # If you have output files, make parent directories if need be.
        utils.mkparents(params['output_root'])
        # Write the input parameters to file so you can go back and look at
        # them.
        parse_ini.write_params(params, params['output_root'] + 'params.ini',
                               prefix=self.prefix)

        # Loop over the files to process.
        for file_middle in params['file_middles'] :
            input_fname = (params['input_root'] + file_middle +
                           params['input_end'])
            
            # Read in the data.  The reader is an object that can read
            # DataBlock objects out of a fits file.
            Reader = fitsGBT.Reader(input_fname, feedback=self.feedback)
            
            # Some examples of how you would read DataBlock Objects:
            first_scan_and_IF_DataBlock = Reader.read(scans=0,IFs=0)
            second_scan_and_first_IF_DataBlock = Reader.read(scans=1,IFs=0)
            list_of_a_few_data_blocks = Reader.read(scans=(1,2,3),IFs=0)
            list_of_all_data_blocks = Reader.read(scans=(),IFs=())
Ejemplo n.º 3
0
    def process_file(self, file_ind):

        params = self.params

        scan_length = params["scan_length"]

        file_middle = params["file_middles"][file_ind]
        input_fname = params["input_root"] + file_middle + params["input_end"]
        output_fname = params["output_root"] + file_middle + params["output_end"]
        Writer = fitsGBT.Writer(feedback=self.feedback)
        Reader = fitsGBT.Reader(input_fname, feedback=self.feedback)
        scan_inds = params["scans"]
        if len(scan_inds) == 0 or scan_inds is None:
            scan_inds = range(len(Reader.scan_set))
        # Loop over scans.
        jj = 0
        for thisscan in scan_inds:
            Blocks = Reader.read(thisscan, params["IFs"], force_tuple=True)
            for Data in Blocks:
                self.action(Data, Writer)
            del Blocks
            gc.collect()

        # Go to a new line if we are printing statistics.
        if hasattr(self, "feedback_title") and self.feedback > 1:
            print ""
        # Finally write the data back to file.
        utils.mkparents(output_fname)
        Writer.write(output_fname)
Ejemplo n.º 4
0
    def mpiexecute(self, n_processes=1):
        """
        Process all data with MPI
        To start with MPI, you need to change manager.py 
        calling mpiexecute instead of execute. 
        and do,

        $ mpirun -np 9 --bynode  python  manager.py pipeline.pipe

        """

        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
        size = comm.Get_size()

        params = self.params
        if rank == 0:
            output_fname = params["output_root"] + params["file_middles"][0] + params["output_end"]
            utils.mkparents(output_fname)

        comm.barrier()

        n_files = len(params["file_middles"])
        for file_ind in range(n_files)[rank::size]:

            self.process_file(file_ind)

        comm.barrier()
Ejemplo n.º 5
0
    def execute(self):
        """Process all data."""

        # You have access to the input parameters through the dictionary
        # self.params.
        params = self.params
        # If you have output files, make parent directories if need be.
        utils.mkparents(params['output_root'])
        # Write the input parameters to file so you can go back and look at
        # them.
        parse_ini.write_params(params,
                               params['output_root'] + 'params.ini',
                               prefix=self.prefix)

        # Loop over the files to process.
        for file_middle in params['file_middles']:
            input_fname = (params['input_root'] + file_middle +
                           params['input_end'])

            # Read in the data.  The reader is an object that can read
            # DataBlock objects out of a fits file.
            Reader = fitsGBT.Reader(input_fname, feedback=self.feedback)

            # Some examples of how you would read DataBlock Objects:
            first_scan_and_IF_DataBlock = Reader.read(scans=0, IFs=0)
            second_scan_and_first_IF_DataBlock = Reader.read(scans=1, IFs=0)
            list_of_a_few_data_blocks = Reader.read(scans=(1, 2, 3), IFs=0)
            list_of_all_data_blocks = Reader.read(scans=(), IFs=())
Ejemplo n.º 6
0
 def process_file(self, file_ind) :
     params = self.params
     file_middle = params['file_middles'][file_ind]
     input_fname = (params['input_root'] + file_middle +
                    params['input_end'])
     sub_input_fname = (params['subtracted_input_root'] + file_middle
                        + params['input_end'])
     output_fname = (params['output_root']
                     + file_middle + params['output_end'])
     sub_output_fname = (params['subtracted_output_root']
                         + file_middle + params['output_end'])
     Writer = fitsGBT.Writer(feedback=self.feedback)
     SubWriter = fitsGBT.Writer(feedback=self.feedback)
     
     # Read in the data, and loop over data blocks.
     Reader = fitsGBT.Reader(input_fname, feedback=self.feedback)
     SubReader = fitsGBT.Reader(sub_input_fname, feedback=self.feedback)
     if (sp.any(Reader.scan_set != SubReader.scan_set)
         or sp.any(Reader.IF_set != SubReader.IF_set)) :
         raise ce.DataError("IFs and scans don't match signal subtracted"
                            " data.")
     # Get the number of scans if asked for all of them.
     scan_inds = params['scans']
     if len(scan_inds) == 0 or scan_inds is None :
         scan_inds = range(len(Reader.scan_set))
     if_inds = params['IFs']
     if len(if_inds) == 0 or scan_inds is None :
         if_inds = range(len(Reader.IF_set))
     if self.feedback > 1 :
         print "New flags each block:",
     # Loop over scans and IFs
     for thisscan in scan_inds :
         for thisIF in if_inds :
             Data = Reader.read(thisscan, thisIF)
             SubData = SubReader.read(thisscan, thisIF)
             n_flags = ma.count_masked(Data.data)
             # Now do the flagging.
             flag(Data, SubData, params['thres'])
             Data.add_history("Reflaged for outliers.", ("Used file: "
                 + utils.abbreviate_file_path(sub_input_fname),))
             SubData.add_history("Reflaged for outliers.")
             Writer.add_data(Data)
             SubWriter.add_data(SubData)
             # Report the numbe of new flags.
             n_flags = ma.count_masked(Data.data) - n_flags
             if self.feedback > 1 :
                 print n_flags,
     if self.feedback > 1 :
         print ''
     # Finally write the data back to file.
     utils.mkparents(output_fname)
     utils.mkparents(sub_output_fname)
     Writer.write(output_fname)
     SubWriter.write(sub_output_fname)
Ejemplo n.º 7
0
    def execute(self, n_processes=1):
        """Process all data.
        
        If n_processes > 1 then this function spawns a bunch of subprocesses
        in parralelle, each of which deals with a single data file.  This both
        speeds things up and avoids any memory leaks (like the bad one in
        pyfits).
        """

        params = self.params
        # Make parent directories if need be.
        utils.mkparents(params['output_root'])
        parse_ini.write_params(params,
                               params['output_root'] + 'params.ini',
                               prefix=self.prefix)
        n_new = n_processes - 1
        n_files = len(params['file_middles'])
        # Loop over the files to process.
        if n_new <= 0:
            # Single process mode.
            for file_ind in range(n_files):
                self.process_file(file_ind)
        elif n_new > 32:
            raise ValueError("Asked for a rediculouse number of processes: " +
                             str(n_new) + ".  Limit is 32.")
        else:
            # Spawn a bunch of new processes each with a single file to
            # analyse.
            # Can't us an mp.Pool here because we don't want to reuse processes
            # due to pyfits memory leak.
            process_list = range(n_new)
            for ii in xrange(n_files + n_new):
                if ii >= n_new:
                    process_list[ii % n_new].join()
                    if process_list[ii % n_new].exitcode != 0:
                        raise RuntimeError("A thread failed with exit code: " +
                                           str(process_list[ii %
                                                            n_new].exitcode))
                if ii < n_files:
                    process_list[ii % n_new] = mp.Process(
                        target=self.process_file, args=(ii, ))
                    process_list[ii % n_new].start()
Ejemplo n.º 8
0
    def execute(self, n_processes=1) :
        """Process all data.
        
        If n_processes > 1 then this function spawns a bunch of subprocesses
        in parralelle, each of which deals with a single data file.  This both
        speeds things up and avoids any memory leaks (like the bad one in
        pyfits).
        """

        params = self.params
        # Make parent directories if need be.
        utils.mkparents(params['output_root'])
        parse_ini.write_params(params, params['output_root'] + 'params.ini',
                               prefix=self.prefix)
        n_new = n_processes - 1
        n_files = len(params['file_middles'])
        # Loop over the files to process.
        if n_new <= 0 :
            # Single process mode.
            for file_ind in range(n_files) :
                self.process_file(file_ind)
        elif n_new > 32 :
            raise ValueError("Asked for a rediculouse number of processes: " +
                             str(n_new) + ".  Limit is 32.")
        else :
            # Spawn a bunch of new processes each with a single file to
            # analyse.
            # Can't us an mp.Pool here because we don't want to reused processes
            # due to pyfits memory leak.
            process_list = range(n_new)
            for ii in xrange(n_files + n_new) :
                if ii > n_new :
                    process_list[ii%n_new].join()
                    if process_list[ii%n_new].exitcode != 0 : 
                        raise RuntimeError("A thread failed with exit code: "
                                        + str(process_list[ii%n_new].exitcode))
                if ii < n_files :
                    process_list[ii%n_new] = mp.Process(
                        target=self.process_file, args=(ii,))
                    process_list[ii%n_new].start()
Ejemplo n.º 9
0
    def process_file(self, file_ind):
        """Process on file from the list to be processed based on the passed
        index.
        """

        self.file_ind = file_ind
        params = self.params
        file_middle = params['file_middles'][file_ind]
        input_fname = (params['input_root'] + file_middle +
                       params['input_end'])
        output_fname = (params['output_root'] + file_middle +
                        params['output_end'])
        Writer = fitsGBT.Writer(feedback=self.feedback)

        # Read in the data, and loop over data blocks.
        Reader = fitsGBT.Reader(input_fname, feedback=self.feedback)
        if hasattr(self, 'feedback_title') and self.feedback > 1:
            print self.feedback_title,
        # Get the number of scans if asked for all of them.
        scan_inds = params['scans']
        if len(scan_inds) == 0 or scan_inds is None:
            scan_inds = range(len(Reader.scan_set))
        # Loop over scans.
        for thisscan in scan_inds:
            Blocks = Reader.read(thisscan, params['IFs'], force_tuple=True)

            # Function that loops over DataBlocks within a scan.
            NewBlocks = self.scan_action(Blocks)
            del Blocks
            Writer.add_data(NewBlocks)

        # Go to a new line if we are printing statistics.
        if hasattr(self, 'feedback_title') and self.feedback > 1:
            print ''
        # Finally write the data back to file.
        utils.mkparents(output_fname)
        Writer.write(output_fname)
Ejemplo n.º 10
0
    def process_file(self, file_ind) :
        """Process on file from the list to be processed based on the passed
        index.
        """
	
	self.file_ind = file_ind
        params = self.params
        file_middle = params['file_middles'][file_ind]
        input_fname = (params['input_root'] + file_middle +
                       params['input_end'])
        output_fname = (params['output_root']
                        + file_middle + params['output_end'])
        Writer = fitsGBT.Writer(feedback=self.feedback)
        
        # Read in the data, and loop over data blocks.
        Reader = fitsGBT.Reader(input_fname, feedback=self.feedback)
        if hasattr(self, 'feedback_title') and self.feedback > 1:
            print self.feedback_title,
        # Get the number of scans if asked for all of them.
        scan_inds = params['scans']
        if len(scan_inds) == 0 or scan_inds is None :
            scan_inds = range(len(Reader.scan_set))
        # Loop over scans.
        for thisscan in scan_inds :
            Blocks = Reader.read(thisscan, params['IFs'], force_tuple=True)
            
            # Function that loops over DataBlocks within a scan.
            NewBlocks = self.scan_action(Blocks)
            del Blocks
            Writer.add_data(NewBlocks)
        
        # Go to a new line if we are printing statistics.
        if hasattr(self, 'feedback_title') and self.feedback > 1:
            print ''
        # Finally write the data back to file.
        utils.mkparents(output_fname)
        Writer.write(output_fname)
Ejemplo n.º 11
0
 def clean_file(self, middle, modes):
     params = self.params
     file_name = (params['input_root'] + middle
                  + params['input_end'])
     # Output parameters.
     Writer = core.fitsGBT.Writer(feedback=self.feedback)
     out_filename = (params['output_root'] + middle
                     + params['output_end'])
     band_inds = params["IFs"]
     Reader = core.fitsGBT.Reader(file_name, feedback=self.feedback)
     n_bands = len(Reader.IF_set)
     if not band_inds:
         band_inds = range(n_bands)
     # Number of bands we acctually process.
     n_bands_proc = len(band_inds)
     if not band_inds:
         band_inds = range(n_bands)
     # Number of bands we acctually process.
     n_bands_proc = len(band_inds)
     # Get the key that will group this file with other files.
     key = get_key(middle)
     # Read one block to figure out how many polarizations and channels
     # there are.
     Data = Reader.read(0,0)
     n_pol = Data.dims[1]
     n_cal = Data.dims[2]
     n_chan = Data.dims[3]
     for ii in range(n_bands_proc):
         Blocks = Reader.read((), ii)
         this_band_modes = modes[ii,...]
         for Data in Blocks:
             clean_data(Data, this_band_modes)
             Writer.add_data(Data)
     # Write the data back out.
     utils.mkparents(out_filename)
     Writer.write(out_filename)
Ejemplo n.º 12
0
    def process_file(self, file_ind):
        params = self.params
        file_middle = params['file_middles'][file_ind]
        input_fname = (params['input_root'] + file_middle +
                       params['input_end'])
        sub_input_fname = (params['subtracted_input_root'] + file_middle +
                           params['input_end'])
        output_fname = (params['output_root'] + file_middle +
                        params['output_end'])
        sub_output_fname = (params['subtracted_output_root'] + file_middle +
                            params['output_end'])
        Writer = fitsGBT.Writer(feedback=self.feedback)
        SubWriter = fitsGBT.Writer(feedback=self.feedback)

        # Read in the data, and loop over data blocks.
        Reader = fitsGBT.Reader(input_fname, feedback=self.feedback)
        SubReader = fitsGBT.Reader(sub_input_fname, feedback=self.feedback)
        if (sp.any(Reader.scan_set != SubReader.scan_set)
                or sp.any(Reader.IF_set != SubReader.IF_set)):
            raise ce.DataError("IFs and scans don't match signal subtracted"
                               " data.")
        # Get the number of scans if asked for all of them.
        scan_inds = params['scans']
        if len(scan_inds) == 0 or scan_inds is None:
            scan_inds = range(len(Reader.scan_set))
        if_inds = params['IFs']
        if len(if_inds) == 0 or scan_inds is None:
            if_inds = range(len(Reader.IF_set))
        if self.feedback > 1:
            print "New flags each block:",
        # Loop over scans and IFs
        for thisscan in scan_inds:
            for thisIF in if_inds:
                Data = Reader.read(thisscan, thisIF)
                SubData = SubReader.read(thisscan, thisIF)
                # Make sure they have agreeing masks to start.
                SubData.data[ma.getmaskarray(Data.data)] = ma.masked
                Data.data[ma.getmaskarray(SubData.data)] = ma.masked
                # Get initial number of flags.
                n_flags = ma.count_masked(Data.data)
                # Now do the flagging.
                flag(Data, SubData, params['thres'],
                     params['max_noise_factor'],
                     params['smooth_modes_subtract'], params['filter_type'])
                Data.add_history(
                    "Reflaged for outliers.",
                    ("Used file: " +
                     utils.abbreviate_file_path(sub_input_fname), ))
                SubData.add_history("Reflaged for outliers.")
                Writer.add_data(Data)
                SubWriter.add_data(SubData)
                # Report the number of new flags.
                n_flags = ma.count_masked(Data.data) - n_flags
                if self.feedback > 1:
                    print n_flags,
        if self.feedback > 1:
            print ''
        # Finally write the data back to file.
        utils.mkparents(output_fname)
        utils.mkparents(sub_output_fname)
        Writer.write(output_fname)
        SubWriter.write(sub_output_fname)
Ejemplo n.º 13
0
 def execute(self, n_processes=1):
     utils.mkparents(self.params['subtracted_output_root'])
     base_single.BaseSingle.execute(self, n_processes)
Ejemplo n.º 14
0
    def execute(self, nprocesses=1) :
        params = self.params
        scans = list(params["scans"])
        # Make sure that the output directory exists.
        utils.mkparents(params["output_root"])

        # Now we need to read in the Scan Log fits file.
        log_dir = params["fits_log_dir"]
        scan_log_list = pyfits.open(log_dir + "/ScanLog.fits", "readonly")
        # From the header we need the project session.
        session = scan_log_list[0].header["PROJID"].split('_')[-1]
        scan_log = scan_log_list[1].data
        self.scan_log = scan_log
        
        # Keep track of scans already processed because some scans are 
        # processed by being in the same map as another.
        finished_scans = []
        for initial_scan in scans :
            if initial_scan in finished_scans :
                continue
            self.initial_scan = initial_scan

            # Open the go fits file.
            scan_log_files = scan_log.field('FILEPATH')[
                scan_log.field('SCAN')==initial_scan]
            go_file = log_dir + get_filename_from_key(scan_log_files, "GO")
            go_hdu = pyfits.open(go_file)[0].header

            # From the go information get the source and the scan type.
            object = go_hdu["OBJECT"].strip()
            self.proceedure = go_hdu["PROCNAME"].strip().lower()

            if params["combine_map_scans"] :
                # Read the go file and figure out all the scans in the same
                # map.
                # Check the go files for all scans make sure everything is
                # consistant.
                self.n_scans_proc = go_hdu["PROCSIZE"]
                # Which scan this is of the sequence (1 indexed).
                self.initial_scan_ind = go_hdu["PROCSEQN"]
                scans_this_file = (sp.arange(self.n_scans_proc, dtype=int) + 1 
                                   - self.initial_scan_ind + initial_scan)
                scans_this_file = list(scans_this_file)

            else :
                scans_this_file = [initial_scan]
            finished_scans += scans_this_file
            # Initialize a list to store all the data that will be saved to a
            # single fits file (generally 8 scans).
            Block_list = []
            # Loop over the scans to process for this output file.
            np = nprocesses
            n = len(scans_this_file)
            procs = [None]*np
            pipes = [None]*np
            for ii in range(n+np) :
                if ii >= np :
                    scan = scans_this_file[ii-np]
                    if not scan in params["blacklist"] :
                        Data = pipes[ii%np].recv()
                        procs[ii%np].join()
                        # Store our processed data.
                        if Data == -1 :
                            # Scan proceedure aborted.
                            message = ("Scan proceedures do not agree."
                                    " Perhase a scan was aborted. Scans: "
                                    + str(scan) + ", " + str(initial_scan) 
                                    + " in directory: " + log_dir)
                            raise ce.DataError(message)
                        elif Data is None :
                            message = ("Missing or corrupted psrfits file."
                                " Scan: " + str(scan) + " file roots: " 
                                + str(params["guppi_input_roots"]))
                            warnings.warn(message)
                        else :
                            Block_list.append(Data)
                if ii < n :
                    scan = scans_this_file[ii]
                    # The acctual reading of the guppi fits file needs to
                    # be split off in a different process due to a memory 
                    # leak in pyfits. This also allow parralization.
                    # Make a pipe over which we will receive out data back.
                    if not scan in params["blacklist"] :
                        P_here, P_far = mp.Pipe()
                        pipes[ii%np] = P_here
                        # Start the forked process.
                        p = mp.Process(target=self.processfile,
                                       args=(scan, P_far))
                        p.start()
                        procs[ii%np] = p
            # End loop over scans (input files).
            # Now we can write our list of scans to disk.
            if len(scans_this_file) > 1 :
                str_scan_range = (str(scans_this_file[0]) + '-' +
                                  str(scans_this_file[-1]))
            else :
                str_scan_range = str(scans_this_file[0])
            out_file = (params["output_root"] + session + '_' + object + '_' +
                        self.proceedure + '_' + str_scan_range + '.fits')
            
            # Output data is pretty large so we'd better protect the 
            # pyfits part in a process lest memory leaks kill us.
            if len(Block_list) > 0 :
                p = mp.Process(target=out_write, args=(Block_list, out_file))
                p.start()
                del Block_list
                p.join()
Ejemplo n.º 15
0
def svd_spec_time(data, params, file_ind, freq=None, time=None):

    # data[...,:100] = np.inf
    # data[...,-100:] = np.inf
    # data[...,1640:1740] = np.inf
    # data[...,2066:2166] = np.inf

    time_mask = np.logical_not(np.all(np.logical_not(np.isfinite(data)), axis=(2, 3)))
    freq_mask = np.all(np.isfinite(data[time_mask[..., None, None]]), axis=(0, 2))

    data[freq_mask[None, :, None, :]] = np.ma.masked

    # freq_mask = np.any(np.isfinite(data), axis=(0, 2))
    weights = np.ones(data.shape)
    data_mask = np.logical_not(np.isfinite(data))
    weights[data_mask] = 0.0
    data[data_mask] = 0.0

    # if np.sum(weights) < np.prod(weights.shape) * 0.1:
    #    #print "Warning: too much data masked, no svd performed"
    #    msg = ("WARNING: too much data masked, no svd performed")
    #    warnings.warn(msg)
    #    data[data_mask] = np.inf
    #    return data

    sh = data.shape

    # for XX
    data_svd = data[:, 0, :, :].reshape([-1, sh[-1]])[:, freq_mask[0, :]]
    weight_svd = weights[:, 0, :, :].reshape([-1, sh[-1]])[:, freq_mask[0, :]]
    # check flag percent
    weight_svd = np.ma.array(weight_svd)
    weight_svd[weight_svd == 0] = np.ma.masked
    percent = float(np.ma.count_masked(weight_svd)) / weight_svd.size * 100
    print "Flag percent XX: %f%%" % percent
    if np.sum(weight_svd) < np.prod(weight_svd.shape) * 0.1 or data_svd.shape[-1] < 10:
        # print "Warning: too much data masked, no svd performed"
        msg = "WARNING: too much data masked for XX, no svd performed"
        warnings.warn(msg)
        data[data_mask] = np.inf
        return data
    vec_t, val, vec_f = linalg.svd(data_svd)
    vec_f = vec_f.T
    sorted_index = np.argsort(val)[::-1]

    vec_t = vec_t[:, sorted_index]
    vec_f = vec_f[:, sorted_index]
    val = val[sorted_index]

    modes = params["modes"]
    amps = sp.empty((modes, data_svd.shape[0]))
    for i in np.arange(modes):

        amp = sp.tensordot(vec_f[:, i], data_svd * weight_svd, axes=(0, 1))
        amp /= sp.tensordot(vec_f[:, i], vec_f[:, i][None, :] * weight_svd, axes=(0, 1))

        data_svd -= vec_f[:, i][None, :] * amp[:, None]

        amps[i, :] = amp
        del amp

    data[:, 0, :, :][..., freq_mask[0, :]] = data_svd.reshape([sh[0], 2, -1])

    if params["save_svd"]:

        f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_XX.hdf5"
        utils.mkparents(f_name)
        f = h5py.File(f_name, "w")
        f["singular_values"] = val
        f["left_vectors"] = vec_t.T
        f["right_vectors"] = vec_f.T
        # f['outmap_left'] = outmap_left
        f["outmap_right"] = amps
        # f['map_left'] = map1
        f["map_right"] = data[:, 0, :, :]
        f["freq_mask"] = freq_mask[0, :]
        f["freq"] = freq
        f["time"] = time

        f.close()

    if params["save_plot"]:
        f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_XX.hdf5"
        utils.mkparents(f_name)
        check_svd(f_name, [val, vec_t.T, vec_f.T], freq_mask[0, :], freq)
        check_map(f_name, np.ma.array(data[:, 0, :, :]), time, freq)

    del data_svd, weight_svd, val, vec_t, vec_f, amps
    gc.collect()

    # for YY
    data_svd = data[:, 3, :, :].reshape([-1, sh[-1]])[:, freq_mask[3, :]]
    weight_svd = weights[:, 3, :, :].reshape([-1, sh[-1]])[:, freq_mask[3, :]]
    # check flag percent
    weight_svd = np.ma.array(weight_svd)
    weight_svd[weight_svd == 0] = np.ma.masked
    percent = float(np.ma.count_masked(weight_svd)) / weight_svd.size * 100
    print "Flag percent XX: %f%%" % percent
    if np.sum(weight_svd) < np.prod(weight_svd.shape) * 0.1 or data_svd.shape[-1] < 10:
        # print "Warning: too much data masked, no svd performed"
        msg = "WARNING: too much data masked for YY, no svd performed"
        warnings.warn(msg)
        data[data_mask] = np.inf
        return data
    vec_t, val, vec_f = linalg.svd(data_svd)
    vec_f = vec_f.T
    sorted_index = np.argsort(val)[::-1]

    vec_t = vec_t[:, sorted_index]
    vec_f = vec_f[:, sorted_index]
    val = val[sorted_index]

    modes = params["modes"]
    amps = sp.empty((modes, data_svd.shape[0]))
    for i in np.arange(modes):

        amp = sp.tensordot(vec_f[:, i], data_svd * weight_svd, axes=(0, 1))
        amp /= sp.tensordot(vec_f[:, i], vec_f[:, i][None, :] * weight_svd, axes=(0, 1))

        data_svd -= vec_f[:, i][None, :] * amp[:, None]

        amps[i, :] = amp
        del amp

    data[:, 3, :, :][..., freq_mask[3, :]] = data_svd.reshape([sh[0], 2, -1])

    if params["save_svd"]:

        f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_YY.hdf5"
        utils.mkparents(f_name)
        f = h5py.File(f_name, "w")
        f["singular_values"] = val
        f["left_vectors"] = vec_t.T
        f["right_vectors"] = vec_f.T
        # f['outmap_left'] = outmap_left
        f["outmap_right"] = amps
        # f['map_left'] = map1
        f["map_right"] = data[:, 3, :, :]
        f["freq_mask"] = freq_mask[3, :]
        f["freq"] = freq
        f["time"] = time

        f.close()

    if params["save_plot"]:
        f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_YY.hdf5"
        utils.mkparents(f_name)
        check_svd(f_name, [val, vec_t.T, vec_f.T], freq_mask[0, :], freq)
        check_map(f_name, np.ma.array(data[:, 0, :, :]), time, freq)

    del data_svd, weight_svd, val, vec_t, vec_f, amps
    gc.collect()

    data[data_mask] = np.inf

    return data
Ejemplo n.º 16
0
 def calibrate_file(self, middle, gain, freq):
     # This function is largely cut and pasted from process file. I should
     # really combine the code into an iterator but that's a lot of work.
     # Alternativly, I could make a meta function and pass a function to it.
     params = self.params
     file_name = (params['input_root'] + middle
                  + params['input_end'])
     # Output parameters.
     Writer = core.fitsGBT.Writer(feedback=self.feedback)
     out_filename = (params['output_root'] + middle
                     + params['output_end'])
     band_inds = params["IFs"]
     Reader = core.fitsGBT.Reader(file_name, feedback=self.feedback)
     n_bands = len(Reader.IF_set)
     if not band_inds:
         band_inds = range(n_bands)
     # Number of bands we acctually process.
     n_bands_proc = len(band_inds)
     if not band_inds:
         band_inds = range(n_bands)
     # Number of bands we acctually process.
     n_bands_proc = len(band_inds)
     # Get the key that will group this file with other files.
     key = get_key(middle)
     # Read one block to figure out how many polarizations and channels
     # there are.
     Data = Reader.read(0,0)
     n_pol = Data.dims[1]
     n_cal = Data.dims[2]
     n_chan = Data.dims[3]
     # Allowcate memory for the outputs.
     corr = np.zeros((n_bands_proc, n_pol, n_cal, n_chan),
                      dtype=float)
     norm = np.zeros(corr.shape, dtype=int)
     freq = np.empty((n_bands_proc, n_chan))
     for ii in range(n_bands_proc):
         Blocks = Reader.read((), ii)
         Blocks[0].calc_freq()
         freq[ii,:] = Blocks[0].freq
         # We are going to look for an exact match in for the map 
         # frequencies. This could be made more general since the sub_map
         # function can handle partial overlap, but this will be fine for
         # now.
         for band_maps in self.maps:
             maps_freq = band_maps[0].get_axis('freq')
             if np.allclose(maps_freq, freq[ii,:]):
                 maps = band_maps
                 break
         else:
             raise NotImplementedError('No maps with frequency axis exactly'
                                       ' matching data.')
         # Check the polarization axis. If the same number of maps where
         # passed, check that the polarizations are in order.  If only one
         # map was passed, correlate all data polarizations against it.
         data_pols = Blocks[0].field['CRVAL4'].copy()
         if len(band_maps) == 1:
             maps_to_correlate = band_maps * len(data_pols)
         else:
             for ii in range(len(data_pols)):
                 if (misc.polint2str(data_pols[ii])
                     != self.params['map_polarizations'][ii]):
                     msg = ('Map polarizations not in same order'
                            ' as data polarizations.')
                     raise NotImplementedError(map)
             maps_to_correlate = band_maps
         # Now process each block.
         for Data in Blocks:
             if params['diff_gain_cal_only']:
                 if tuple(Data.field['CRVAL4']) != (-5, -7, -8, -6):
                     msg = ("Expected polarizations to be ordered "
                            "(XX, XY, YX, YY).")
                     raise NotImplementedError(msg)
                 Data.data[:,0,:,:] /= gain[ii,0,:,:]
                 Data.data[:,3,:,:] /= gain[ii,3,:,:]
                 cross_gain = np.sqrt(gain[ii,0,:,:] * gain[ii,3,:,:])
                 Data.data[:,1,:,:] /= cross_gain
                 Data.data[:,2,:,:] /= cross_gain
             else:
                 Data.data /= gain[ii,...]
             Writer.add_data(Data)
     # Write the data back out.
     utils.mkparents(out_filename)
     Writer.write(out_filename)
Ejemplo n.º 17
0
 def execute(self, n_processes=1) :
     utils.mkparents(self.params['subtracted_output_root'])
     base_single.BaseSingle.execute(self, n_processes)
Ejemplo n.º 18
0
def corr_svd(data, params, file_ind, freq=None, time=None):

    if not hasattr(data, "mask"):
        data = np.ma.array(data)
    data.mask = np.logical_or(data.mask, np.logical_not(np.isfinite(data)))

    if np.all(data.mask[:, ::3, ...]):
        msg = "WARNING: all data are masked, no svd performed"
        warnings.warn(msg)
        return data

    # flag out the bad dots
    # sigma = np.ma.var(data)
    # mean = np.ma.mean(data)
    # data.mask = np.logical_or(data.mask, data < mean - 5*sigma)
    threshold = 4
    bad_mask = np.any(data[:, ::3, ...] < threshold, axis=(1, 2))
    data.mask = np.logical_or(data.mask, bad_mask[:, None, None, :])

    mask_perc = np.ma.count_masked(data[:, ::3, ...]) / float(data[:, ::3, ...].size)
    if mask_perc > 0.8:
        msg = ("WARNING: %f%% data are masked, no svd performed") % (mask_perc * 100)
        warnings.warn(msg)
        return data

    data[..., :100] = np.ma.masked
    data[..., -100:] = np.ma.masked
    data[..., 1640:1740] = np.ma.masked
    data[..., 2066:2166] = np.ma.masked

    # freq_mask = np.logical_not(np.any(np.all(data.mask, axis=0), axis=1))
    time_mask = np.logical_not(np.any(np.all(data.mask, axis=-1), axis=(1, 2)))
    freq_mask = np.logical_not(np.any(data.mask[time_mask, ...], axis=(0, 2)))
    # data[:,0,:,np.logical_not(freq_mask[0,:])] = np.ma.masked
    # data[:,1,:,np.logical_not(freq_mask[1,:])] = np.ma.masked
    # data[:,2,:,np.logical_not(freq_mask[2,:])] = np.ma.masked
    # data[:,3,:,np.logical_not(freq_mask[3,:])] = np.ma.masked
    data.mask = np.logical_or(data.mask, np.logical_not(freq_mask[None, :, None, :]))

    if not np.any(freq_mask[::3, :]):
        msg = "WARNING: all freq channels are masked, no svd performed"
        warnings.warn(msg)
        return data

    weights = np.ma.ones(data.shape)
    weights[data.mask] = np.ma.masked

    # subtract mean of each frequency
    # data_mean = np.sum(data, axis=0) / np.sum(weights, axis=0)
    # data_mean[data_mean==0] = np.inf
    # data /= data_mean[None, ...]

    # for XX
    if params["save_svd"]:
        map1_raw = copy.deepcopy(data[:, 0, 0, :].T)
        map2_raw = copy.deepcopy(data[:, 0, 1, :].T)

    if params["save_plot"]:
        f_name = params["output_root"] + params["file_middles"][file_ind] + "_raw_XX.hdf5"
        utils.mkparents(f_name)
        check_map(f_name, data[:, 0, :, :], time, freq)
        f_name = params["output_root"] + params["file_middles"][file_ind] + "_raw_YY.hdf5"
        utils.mkparents(f_name)
        check_map(f_name, data[:, 3, :, :], time, freq)

    corr, weight = find_modes.freq_covariance(
        data[:, 0, 0, :].T,
        data[:, 0, 1, :].T,
        weights[:, 0, 0, :].T,
        weights[:, 0, 1, :].T,
        freq_mask[0, :],
        freq_mask[0, :],
        no_weight=False,
    )

    svd_result = find_modes.get_freq_svd_modes(corr, corr.shape[0])

    map1, map2, outmap_left, outmap_right = subtract_foregrounds(
        svd_result,
        data[:, 0, 0, :].T,
        data[:, 0, 1, :].T,
        weights[:, 0, 0, :].T,
        weights[:, 0, 1, :].T,
        freq_mask[0, :],
        0,
        params["modes"],
    )

    data[:, 0, 0, :] = map1.T
    data[:, 0, 1, :] = map2.T

    if params["save_svd"]:

        f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_XX.hdf5"
        utils.mkparents(f_name)
        f = h5py.File(f_name, "w")
        f["singular_values"] = svd_result[0]
        f["left_vectors"] = svd_result[1]
        f["right_vectors"] = svd_result[2]
        f["outmap_left"] = outmap_left
        f["outmap_right"] = outmap_right
        f["map_left"] = map1
        f["map_right"] = map2
        f["raw_left"] = map1_raw
        f["raw_right"] = map2_raw
        f["freq_mask"] = freq_mask[0, :]
        f["freq"] = freq
        f["time"] = time

        f.close()

    if params["save_plot"]:
        f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_XX.hdf5"
        utils.mkparents(f_name)
        check_svd(f_name, svd_result, freq_mask[0, :], freq)
        check_map(f_name, np.ma.array(data[:, 0, :, :]), time, freq)
        f_name = params["output_root"] + params["file_middles"][file_ind] + "_spec_XX.hdf5"
        check_spec(f_name, np.ma.array(data[:, 0, :, :]), freq)
        # f_name = params['output_root'] + \
        #        params['file_middles'][file_ind] + '_corr_XX.hdf5'
        # check_corr(f_name, corr, weight)

    del corr, weight, svd_result, map1, map2, outmap_left, outmap_right
    gc.collect()

    # for YY
    if params["save_svd"]:
        map1_raw = copy.deepcopy(data[:, 3, 0, :].T)
        map2_raw = copy.deepcopy(data[:, 3, 1, :].T)

    corr, weight = find_modes.freq_covariance(
        data[:, 3, 0, :].T,
        data[:, 3, 1, :].T,
        weights[:, 3, 0, :].T,
        weights[:, 3, 1, :].T,
        freq_mask[3, :],
        freq_mask[3, :],
        no_weight=False,
    )

    svd_result = find_modes.get_freq_svd_modes(corr, corr.shape[0])

    map1, map2, outmap_left, outmap_right = subtract_foregrounds(
        svd_result,
        data[:, 3, 0, :].T,
        data[:, 3, 1, :].T,
        weights[:, 3, 0, :].T,
        weights[:, 3, 1, :].T,
        freq_mask[3, :],
        0,
        params["modes"],
    )

    data[:, 3, 0, :] = map1.T
    data[:, 3, 1, :] = map2.T

    if params["save_svd"]:

        f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_YY.hdf5"
        utils.mkparents(f_name)
        f = h5py.File(f_name, "w")
        f["singular_values"] = svd_result[0]
        f["left_vectors"] = svd_result[1]
        f["right_vectors"] = svd_result[2]
        f["outmap_left"] = outmap_left
        f["outmap_right"] = outmap_right
        f["map_left"] = map1
        f["map_right"] = map2
        f["raw_left"] = map1_raw
        f["raw_right"] = map2_raw
        f["freq_mask"] = freq_mask[3, :]
        f["freq"] = freq
        f["time"] = time

        f.close()

    if params["save_plot"]:
        f_name = params["output_root"] + params["file_middles"][file_ind] + "_svd_YY.hdf5"
        utils.mkparents(f_name)
        check_svd(f_name, svd_result, freq_mask[3, :], freq)
        check_map(f_name, np.ma.array(data[:, 3, :, :]), time, freq)
        f_name = params["output_root"] + params["file_middles"][file_ind] + "_spec_YY.hdf5"
        check_spec(f_name, np.ma.array(data[:, 3, :, :]), freq)
        # f_name = params['output_root'] + \
        #        params['file_middles'][file_ind] + '_corr_XX.hdf5'
        # check_corr(f_name, corr, weight)

    del corr, weight, svd_result, map1, map2, outmap_left, outmap_right
    gc.collect()

    # data[data_mask] = np.inf
    # data = np.ma.array(data)
    # data[data_mask] = np.ma.masked

    return data