Beispiel #1
0
    def run(self, output_dir, dir_is_organized=True):
        
        processed_data_folder = processed_data_dir( self.timeID )
        
        if dir_is_organized:
            output_dir = processed_data_folder +'/' + output_dir
        
        polarization_flips = read_antenna_pol_flips( processed_data_folder + '/' + self.pol_flips_fname )
        bad_antennas = read_bad_antennas( processed_data_folder + '/' + self.bad_antennas_fname )
        additional_antenna_delays = read_antenna_delays(  processed_data_folder + '/' + self.additional_antenna_delays_fname )
        station_timing_offsets = read_station_delays( processed_data_folder+'/'+ self.station_delays_fname )
        
        raw_fpaths = filePaths_by_stationName( self.timeID )

        self.station_data_files = []
        ref_station_i = None
        referance_station_set = None
        station_i = 0
        self.posix_timestamp = None
        
        for station, fpaths  in raw_fpaths.items():
            if (station in station_timing_offsets) and (station not in self.stations_to_exclude ):
                print('opening', station)
                
                raw_data_file = MultiFile_Dal1(fpaths, polarization_flips=polarization_flips, bad_antennas=bad_antennas, additional_ant_delays=additional_antenna_delays, pol_flips_are_bad=self.pol_flips_are_bad)
                self.station_data_files.append( raw_data_file )
                
                raw_data_file.set_station_delay( station_timing_offsets[station] )
                raw_data_file.find_and_set_polarization_delay()
                
                if self.posix_timestamp is None:
                    self.posix_timestamp = raw_data_file.get_timestamp()
                elif self.posix_timestamp != raw_data_file.get_timestamp():
                    print("ERROR: POSIX timestamps are different")
                    quit()
                
                if self.referance_station is None:
                    if (referance_station_set is None) or ( int(station[2:]) < int(referance_station_set[2:]) ): ## if no referance station, use one with smallist number
                        ref_station_i = station_i
                        referance_station_set = station
                elif self.referance_station == station:
                    ref_station_i = station_i
                    referance_station_set = station
                        
                station_i += 1
                
        ## set reference station first
        tmp = self.station_data_files[0]
        self.station_data_files[0] = self.station_data_files[ ref_station_i ]
        self.station_data_files[ ref_station_i ] = tmp
        self.referance_station = referance_station_set
        
        ## organize antenana info
        self.station_antenna_indeces = [] ## 2D list. First index is station_i, second is a local antenna index, value is station antenna index
        self.station_antenna_RMS = [] ## same as above, but value is RMS
        self.num_total_antennas = 0
        current_ant_i = 0
        for station_i,stationFile in enumerate(self.station_data_files):
            ant_names = stationFile.get_antenna_names()
            num_evenAntennas = int( len(ant_names)/2 )
            
            ## get RMS planewave fits
            if self.antenna_RMS_info is not None:
                
                antenna_RMS_dict = read_planewave_fits(processed_data_folder+'/'+self.antenna_RMS_info, stationFile.get_station_name() )
            else:
                antenna_RMS_dict = {}
                
                
            antenna_RMS_array = np.array([ antenna_RMS_dict[ant_name] if ant_name in antenna_RMS_dict else self.default_expected_RMS  for ant_name in ant_names if antName_is_even(ant_name)  ])
            antenna_indeces = np.arange(num_evenAntennas)*2
            
            ## remove bad antennas and sort
            ant_is_good = np.isfinite( antenna_RMS_array )
            antenna_RMS_array = antenna_RMS_array[ ant_is_good ]
            antenna_indeces = antenna_indeces[ ant_is_good ]
            
            sorter = np.argsort( antenna_RMS_array )
            antenna_RMS_array = antenna_RMS_array[ sorter ]
            antenna_indeces = antenna_indeces[ sorter ]
            
            ## select only the best!
            antenna_indeces = antenna_indeces[:self.max_antennas_per_station]
            antenna_RMS_array = antenna_RMS_array[:self.max_antennas_per_station]
                
            ## fill info
            self.station_antenna_indeces.append( antenna_indeces )
            self.station_antenna_RMS.append( antenna_RMS_array )
            
            self.num_total_antennas += len(antenna_indeces)
            
            current_ant_i += len(antenna_indeces)
        
        ### output to header!
        if not isdir(output_dir):
            mkdir(output_dir)
            
        logging_folder = output_dir + '/logs_and_plots'
        if not isdir(logging_folder):
            mkdir(logging_folder)
            
        header_outfile = h5py.File(output_dir + "/header.h5", "w")
        header_outfile.attrs["timeID"] = self.timeID
        header_outfile.attrs["initial_datapoint"] = self.initial_datapoint
        header_outfile.attrs["max_antennas_per_station"] = self.max_antennas_per_station
        header_outfile.attrs["referance_station"] = self.station_data_files[0].get_station_name()
        header_outfile.attrs["station_delays_fname"] = self.station_delays_fname
        header_outfile.attrs["additional_antenna_delays_fname"] = self.additional_antenna_delays_fname
        header_outfile.attrs["bad_antennas_fname"] = self.bad_antennas_fname
        header_outfile.attrs["pol_flips_fname"] = self.pol_flips_fname
        header_outfile.attrs["blocksize"] = self.blocksize
        header_outfile.attrs["remove_saturation"] = self.remove_saturation
        header_outfile.attrs["remove_RFI"] = self.remove_RFI
        header_outfile.attrs["positive_saturation"] = self.positive_saturation
        header_outfile.attrs["negative_saturation"] = self.negative_saturation
        header_outfile.attrs["saturation_removal_length"] = self.saturation_removal_length
        header_outfile.attrs["saturation_half_hann_length"] = self.saturation_half_hann_length
        header_outfile.attrs["hann_window_fraction"] = self.hann_window_fraction
        header_outfile.attrs["num_zeros_dataLoss_Threshold"] = self.num_zeros_dataLoss_Threshold
        header_outfile.attrs["min_amplitude"] = self.min_amplitude
        header_outfile.attrs["upsample_factor"] = self.upsample_factor
        header_outfile.attrs["max_events_perBlock"] = self.max_events_perBlock
        header_outfile.attrs["min_pulse_length_samples"] = self.min_pulse_length_samples
        header_outfile.attrs["erasure_length"] = self.erasure_length
        header_outfile.attrs["guess_distance"] = self.guess_distance
        header_outfile.attrs["kalman_devations_toSearch"] = self.kalman_devations_toSearch
        header_outfile.attrs["pol_flips_are_bad"] = self.pol_flips_are_bad
        header_outfile.attrs["antenna_RMS_info"] = self.antenna_RMS_info
        header_outfile.attrs["default_expected_RMS"] = self.default_expected_RMS
        header_outfile.attrs["max_planewave_RMS"] = self.max_planewave_RMS
        header_outfile.attrs["stop_chi_squared"] = self.stop_chi_squared
        header_outfile.attrs["max_minimize_itters"] = self.max_minimize_itters
        header_outfile.attrs["minimize_ftol"] = self.minimize_ftol
        header_outfile.attrs["minimize_xtol"] = self.minimize_xtol
        header_outfile.attrs["minimize_gtol"] = self.minimize_gtol
        
        header_outfile.attrs["refStat_delay"] = station_timing_offsets[  self.station_data_files[0].get_station_name()  ]
        header_outfile.attrs["refStat_timestamp"] = self.posix_timestamp#self.station_data_files[0].get_timestamp()
        header_outfile.attrs["refStat_sampleNumber"] = self.station_data_files[0].get_nominal_sample_number()
        
        header_outfile.attrs["stations_to_exclude"] = np.array(self.stations_to_exclude, dtype='S')
        
        header_outfile.attrs["polarization_flips"] = np.array(polarization_flips, dtype='S')
        
        header_outfile.attrs["num_stations"] = len(self.station_data_files)
        header_outfile.attrs["num_antennas"] = self.num_total_antennas
        
        for stat_i, (stat_file, antenna_indeces, antenna_RMS) in enumerate(zip(self.station_data_files, self.station_antenna_indeces, self.station_antenna_RMS)):
            
            station_group = header_outfile.create_group( str(stat_i) )
            station_group.attrs["sname"] = stat_file.get_station_name()
            station_group.attrs["num_antennas"] = len( antenna_indeces )
            
            locations = stat_file.get_LOFAR_centered_positions()
            total_delays = stat_file.get_total_delays()
            ant_names = stat_file.get_antenna_names()
            
            for ant_i, (stat_index, RMS) in enumerate(zip(antenna_indeces, antenna_RMS)):
                
                antenna_group = station_group.create_group( str(ant_i) )
                antenna_group.attrs['antenna_name'] = ant_names[stat_index]
                antenna_group.attrs['location'] = locations[stat_index]
                antenna_group.attrs['delay'] = total_delays[stat_index]
                antenna_group.attrs['planewave_RMS'] = RMS
utilities.default_raw_data_loc = "/home/student4/Marten/KAP_data_link/lightning_data"
utilities.default_processed_data_loc = "/home/student4/Marten/processed_files"
processed_data_folder = processed_data_dir(timeID)

station_delay_file = "station_delays.txt"
polarization_flips = "polarization_flips.txt"
bad_antennas = "bad_antennas.txt"
additional_antenna_delays = "ant_delays.txt"

polarization_flips = read_antenna_pol_flips(processed_data_folder + '/' +
                                            polarization_flips)
bad_antennas = read_bad_antennas(processed_data_folder + '/' + bad_antennas)
additional_antenna_delays = read_antenna_delays(processed_data_folder + '/' +
                                                additional_antenna_delays)
station_timing_offsets = read_station_delays(processed_data_folder + '/' +
                                             station_delay_file)

raw_fpaths = filePaths_by_stationName(timeID)

TBB_data = {
    sname: MultiFile_Dal1(fpath,
                          force_metadata_ant_pos=True,
                          polarization_flips=polarization_flips,
                          bad_antennas=bad_antennas,
                          additional_ant_delays=additional_antenna_delays,
                          only_complete_pairs=True)
    for sname, fpath in raw_fpaths.items() if sname in station_timing_offsets
}


def model(p, r):
    def open_files(self, log_func=do_nothing, force_metadata_ant_pos=True):
        processed_data_loc = processed_data_dir(self.timeID)

        #### open callibration data files ####
        ### TODO: fix these so they are accounted for in a more standard maner
        self.station_timing_offsets = read_station_delays(
            processed_data_loc + '/' + self.station_delays_fname)
        self.additional_ant_delays = read_antenna_delays(
            processed_data_loc + '/' + self.additional_antenna_delays_fname)
        self.bad_antennas = read_bad_antennas(processed_data_loc + '/' +
                                              self.bad_antennas_fname)
        self.pol_flips = read_antenna_pol_flips(processed_data_loc + '/' +
                                                self.pol_flips_fname)

        #### open data files, and find RFI ####
        raw_fpaths = filePaths_by_stationName(self.timeID)
        self.station_names = []
        self.input_files = []
        self.RFI_filters = []
        CS002_index = None
        self.station_to_antenna_indeces_dict = {}
        for station, fpaths in raw_fpaths.items():
            if (station in self.station_timing_offsets) and (
                    station not in self.stations_to_exclude) and (
                        self.use_core_stations_S1 or self.use_core_stations_S2
                        or (not station[:2] == 'CS') or station == "CS002"):
                #            if (station not in self.stations_to_exclude) and ( self.use_core_stations_S1 or self.use_core_stations_S2 or (not station[:2]=='CS') or station=="CS002" ):
                log_func("opening", station)
                self.station_names.append(station)
                self.station_to_antenna_indeces_dict[station] = []

                if station == 'CS002':
                    CS002_index = len(self.station_names) - 1

                new_file = MultiFile_Dal1(
                    fpaths, force_metadata_ant_pos=force_metadata_ant_pos)
                new_file.set_polarization_flips(self.pol_flips)
                self.input_files.append(new_file)

                RFI_result = None
                if self.do_RFI_filtering and self.use_saved_RFI_info:
                    self.RFI_filters.append(
                        window_and_filter(timeID=self.timeID, sname=station))

                elif self.do_RFI_filtering:
                    RFI_result = FindRFI(new_file,
                                         self.block_size,
                                         self.initial_RFI_block,
                                         self.RFI_num_blocks,
                                         self.RFI_max_blocks,
                                         verbose=False,
                                         figure_location=None)
                    self.RFI_filters.append(
                        window_and_filter(find_RFI=RFI_result))

                else:  ## only basic filtering
                    self.RFI_filters.append(
                        window_and_filter(blocksize=self.block_size))

        #### find antenna pairs ####
        boundingBox_center = np.average(self.bounding_box, axis=-1)
        self.antennas_to_use = pairData_NumAntPerStat(
            self.input_files, self.num_antennas_per_station, self.bad_antennas)
        #        self.antennas_to_use = pairData_NumAntPerStat_PolO(self.input_files, self.num_antennas_per_station, self.bad_antennas )
        #        self.antennas_to_use = pairData_NumAntPerStat_DualPol(self.input_files, self.num_antennas_per_station, self.bad_antennas )
        self.num_antennas = len(self.antennas_to_use)

        #### get antenna locations and delays ####
        self.antenna_locations = np.zeros((self.num_antennas, 3),
                                          dtype=np.double)
        self.antenna_delays = np.zeros(self.num_antennas, dtype=np.double)
        self.is_not_core = np.zeros(self.num_antennas, dtype=np.bool)

        #### TODO: Do we need this "CSOO2 correction??? will removing it fix our time base?? ####
        #        clock_corrections = getClockCorrections()
        #        CS002_correction = -clock_corrections["CS002"] - self.input_files[CS002_index].get_nominal_sample_number()*5.0E-9 ## wierd sign. But is just to correct for previous definiitions
        CS002_correction = self.station_timing_offsets[
            'CS002'] - self.input_files[CS002_index].get_nominal_sample_number(
            ) * 5.0E-9  ## wierd sign. But is just to correct for previous definiitions

        for ant_i, (station_i,
                    station_ant_i) in enumerate(self.antennas_to_use):
            self.station_to_antenna_indeces_dict[
                self.station_names[station_i]].append(
                    ant_i
                )  ### TODO: this should probably be a result of pairData_NumAntPerStat

            data_file = self.input_files[station_i]
            station = self.station_names[station_i]
            self.is_not_core[ant_i] = (not station[:2]
                                       == 'CS') or station == "CS002"

            ant_name = data_file.get_antenna_names()[station_ant_i]

            self.antenna_locations[
                ant_i] = data_file.get_LOFAR_centered_positions(
                )[station_ant_i]
            self.antenna_delays[
                ant_i] = data_file.get_timing_callibration_delays(
                )[station_ant_i]

            ## account for station timing offsets
            #            self.antenna_delays[ant_i]  += self.station_timing_offsets[station] +  (-clock_corrections[station] - data_file.get_nominal_sample_number()*5.0E-9) - CS002_correction
            self.antenna_delays[ant_i] += self.station_timing_offsets[
                station] + (-data_file.get_nominal_sample_number() *
                            5.0E-9) - CS002_correction

            ## add additional timing delays
            ##note this only works for even antennas!
            if ant_name in self.additional_ant_delays:
                self.antenna_delays[ant_i] += self.additional_ant_delays[
                    ant_name][0]

#        #### find prefered station ####
        if self.prefered_station is None:
            prefered_stat_shortestWindowTime = np.inf
            prefered_station_input_file = None
            for station, antenna_indeces in self.station_to_antenna_indeces_dict.items(
            ):
                ant_i = antenna_indeces[0]  ## just use first antenna for test

                if not (self.use_core_stations_S1 or self.is_not_core[ant_i]):
                    continue

                longest_window_time = 0
                for ant_j in range(self.num_antennas):
                    if not (self.use_core_stations_S1
                            or self.is_not_core[ant_j]):
                        continue

                    window_time, throw, throw = find_max_duration(
                        self.antenna_locations[ant_i],
                        self.antenna_locations[ant_j],
                        self.bounding_box,
                        center=boundingBox_center)
                    if window_time > longest_window_time:
                        longest_window_time = window_time

                if longest_window_time < prefered_stat_shortestWindowTime:
                    prefered_stat_shortestWindowTime = longest_window_time
                    self.prefered_station = station
                    prefered_station_input_file = self.input_files[
                        self.antennas_to_use[ant_i][0]]
        else:  ### TODO: throw error is prefered station is in core, but core not included in stage 1
            ant_i = self.station_to_antenna_indeces_dict[
                self.prefered_station][0]

            prefered_stat_shortestWindowTime = 0
            for ant_j in range(self.num_antennas):
                if not (self.use_core_stations_S1 or self.is_not_core[ant_j]):
                    continue

                window_time, throw, throw = find_max_duration(
                    self.antenna_locations[ant_i],
                    self.antenna_locations[ant_j],
                    self.bounding_box,
                    center=boundingBox_center)
                if window_time > prefered_stat_shortestWindowTime:
                    prefered_stat_shortestWindowTime = window_time

            prefered_station_input_file = self.input_files[
                self.antennas_to_use[ant_i][0]]

        log_func("prefered station:", self.prefered_station)
        log_func("    max. half window time:",
                 prefered_stat_shortestWindowTime)
        log_func()
        log_func()

        self.prefered_station_timing_offset = self.station_timing_offsets[
            self.
            prefered_station] - prefered_station_input_file.get_nominal_sample_number(
            ) * 5.0E-9
        self.prefered_station_antenna_timing_offsets = prefered_station_input_file.get_timing_callibration_delays(
        )

        self.startBlock_exclusion = int(
            0.1 * self.block_size)  ### TODO: save these to header.
        self.endBlock_exclusion = int(
            prefered_stat_shortestWindowTime / 5.0E-9) + 1 + int(
                0.1 * self.block_size)  ## last bit accounts for hann-window
        self.active_block_length = self.block_size - self.startBlock_exclusion - self.endBlock_exclusion  ##the length of block used for looking at data

        ##### find window offsets and lengths ####
        self.antenna_data_offsets = np.zeros(self.num_antennas, dtype=np.long)
        self.half_antenna_data_length = np.zeros(self.num_antennas,
                                                 dtype=np.long)

        for ant_i, (station_i,
                    station_ant_i) in enumerate(self.antennas_to_use):

            ## now we account for distance to the source
            travel_time = np.linalg.norm(self.antenna_locations[ant_i] -
                                         boundingBox_center) / v_air
            self.antenna_data_offsets[ant_i] = int(travel_time / 5.0E-9)

            ### find max duration to any of the prefered antennas
            max_duration = 0.0
            for prefered_ant_i in self.station_to_antenna_indeces_dict[
                    self.prefered_station]:
                if prefered_ant_i == ant_i:
                    continue

                duration, throw, throw = find_max_duration(
                    self.antenna_locations[prefered_ant_i],
                    self.antenna_locations[ant_i], self.bounding_box,
                    boundingBox_center)

                if duration > max_duration:
                    max_duration = duration

            self.half_antenna_data_length[ant_i] = int(
                self.pulse_length / 2) + int(duration / 5.0E-9)

            self.antenna_data_offsets[ant_i] -= self.half_antenna_data_length[
                ant_i]

            #### now adjust the data offsets and antenna delays so they are consistent

            ## first we amount to adjust the offset by time delays mod the sampling time
            offset_adjust = int(
                self.antenna_delays[ant_i] / 5.0E-9
            )  ##this needs to be added to offsets and subtracted from delays

            ## then we can adjust the delays accounting for the data offset
            self.antenna_delays[
                ant_i] -= self.antenna_data_offsets[ant_i] * 5.0E-9

            ##now we finally account for large time delays
            self.antenna_data_offsets[ant_i] += offset_adjust
            self.antenna_delays[ant_i] -= offset_adjust * 5.0E-9

        if (not self.use_core_stations_S1) or (not self.use_core_stations_S2):
            core_filtered_ant_locs = np.array(
                self.antenna_locations[self.is_not_core])
            core_filtered_ant_delays = np.array(
                self.antenna_delays[self.is_not_core])

        #### allocate some memory ####
        self.data_block = np.empty((self.num_antennas, self.block_size),
                                   dtype=np.complex)
        self.hilbert_envelope_tmp = np.empty(self.block_size, dtype=np.double)

        #### initialize stage 1 ####
        if self.use_core_stations_S1:
            self.trace_length_stage1 = 2 * np.max(
                self.half_antenna_data_length)
            S1_ant_locs = self.antenna_locations
            S1_ant_delays = self.antenna_delays
        else:
            self.trace_length_stage1 = 2 * np.max(
                self.half_antenna_data_length[self.is_not_core])
            S1_ant_locs = core_filtered_ant_locs
            S1_ant_delays = core_filtered_ant_delays
        self.trace_length_stage1 = 2**(int(np.log2(self.trace_length_stage1)) +
                                       1)
        self.stage_1_imager = II_tools.image_data_stage1(
            S1_ant_locs, S1_ant_delays, self.trace_length_stage1,
            self.upsample_factor)

        #### initialize stage 2 ####
        if self.use_core_stations_S2:
            S2_ant_locs = self.antenna_locations
            S2_ant_delays = self.antenna_delays
        else:
            S2_ant_locs = core_filtered_ant_locs
            S2_ant_delays = core_filtered_ant_delays
        self.trace_length_stage2 = 2**(int(np.log2(self.pulse_length)) + 1)
        self.stage_2_window = half_hann_window(self.pulse_length,
                                               self.hann_window_fraction)
        self.stage_2_imager = II_tools.image_data_stage2_absBefore(
            S2_ant_locs, S2_ant_delays, self.trace_length_stage2,
            self.upsample_factor)

        self.erasure_window = 1.0 - self.stage_2_window