def calibrate_with_file( self, file_data: [ndarray], calibration_file_path: str, console: Console, session_controller: SessionController) -> [ndarray]: """ Calibrate given set of images by subtracting a fixed image file from each. (clipping at zero - no negative pixel values will be produced) Given calibration file must be the same size as all the images to be calibrated. :param file_data: List of images' data. Each is 2d image matrix :param calibration_file_path: Full path to calibration file :param console: Redirectable console output object :param session_controller: Controller for this subtask :return: List of calibrated images """ console.message(f"Calibrate with file: {calibration_file_path}", 0) result = file_data.copy() calibration_image = RmFitsUtil.fits_data_from_path( calibration_file_path) (calibration_x, calibration_y) = calibration_image.shape for index in range(len(result)): if session_controller.thread_cancelled(): raise MasterMakerExceptions.SessionCancelled (layer_x, layer_y) = result[index].shape if (layer_x != calibration_x) or (layer_y != calibration_y): raise MasterMakerExceptions.IncompatibleSizes difference = result[index] - calibration_image result[index] = difference.clip(0, 0xFFFF) return result
def handle_input_files_disposition(self, disposition_type: int, sub_folder_name: str, descriptors: [FileDescriptor], console: Console): """ Move the given files if the given disposition type requests it. Return a list of any files that were moved so the UI can be adjusted if necessary :param disposition_type: Code for what to do with file after processing :param sub_folder_name: Where to put file if we're moving it :param descriptors: List of files for potential processing :param console: Redirectable console output option """ if disposition_type == Constants.INPUT_DISPOSITION_NOTHING: # User doesn't want us to do anything with the input files return else: assert (disposition_type == Constants.INPUT_DISPOSITION_SUBFOLDER) console.message("Moving processed files to " + sub_folder_name, 0) # User wants us to move the input files into a sub-folder for descriptor in descriptors: if SharedUtils.dispose_one_file_to_sub_folder( descriptor, sub_folder_name): # Successfully moved the file; tell the user interface self.callback_method(descriptor.get_absolute_path())
def original_non_grouped_processing(self, selected_files: [FileDescriptor], data_model: DataModel, output_file: str, console: Console): console.push_level() console.message("Using single-file processing", +1) # We'll use the first file in the list as a sample for things like image size assert len(selected_files) > 0 # Confirm that these are all dark frames, and can be combined (same binning and dimensions) if FileCombiner.all_compatible_sizes(selected_files): self.check_cancellation() if data_model.get_ignore_file_type() or FileCombiner.all_of_type( selected_files, FileDescriptor.FILE_TYPE_DARK): # Get (most common) filter name in the set # Since these are darks, the filter is meaningless, but we need the value # for the shared "create file" routine filter_name = SharedUtils.most_common_filter_name( selected_files) # Do the combination self.combine_files(selected_files, data_model, filter_name, output_file, console) self.check_cancellation() # Files are combined. Put away the inputs? # Return list of any that were moved, in case the UI needs to be adjusted substituted_folder_name = SharedUtils.substitute_date_time_filter_in_string( data_model.get_disposition_subfolder_name()) self.handle_input_files_disposition( data_model.get_input_file_disposition(), substituted_folder_name, selected_files, console) else: raise MasterMakerExceptions.NotAllDarkFrames else: raise MasterMakerExceptions.IncompatibleSizes console.message("Combining complete", 0) console.pop_level()
def combine_median(cls, file_names: [str], calibrator: Calibrator, console: Console, session_controller: SessionController) -> ndarray: """ Combine the files in the given list using a simple median Check, as reading, that they all have the same dimensions :param file_names: Names of files to be combined :param calibrator: Calibration object, abstracting precalibration operations :param console: Redirectable console output handler :param session_controller: Controller for this subtask, checking for cancellation :return: ndarray giving the 2-dimensional matrix of resulting pixel values """ assert len( file_names ) > 0 # Otherwise the combine button would have been disabled console.push_level() console.message("Combine by simple Median", +1) descriptors = RmFitsUtil.make_file_descriptions(file_names) file_data = RmFitsUtil.read_all_files_data(file_names) cls.check_cancellation(session_controller) file_data = calibrator.calibrate_images(file_data, descriptors, console, session_controller) cls.check_cancellation(session_controller) median_result = numpy.median(file_data, axis=0) console.pop_level() return median_result
def calibrate_with_auto_directory( self, file_data: [ndarray], auto_directory_path: str, descriptors: [FileDescriptor], console: Console, session_controller: SessionController) -> [ndarray]: """ Calibrate the given files' contents, each with the best-matching calibration file from a directory. "Best" is measured by trying to match both the exposure time and temperature, with more weight to the exposure time. A separate file is chosen for each input image, since the exposure times of collected flats often vary during the collection session, to keep the ADU level constant as the light changes. :param file_data: List of images' data (list of 2-d matrix of pixel values) :param auto_directory_path: Path to folder of calibration images :param descriptors: Descs of files corresponding to the given images :param console: Redirectable console output object :param session_controller: Controller for this subtask :return: List of calibrated images """ assert len(file_data) > 0 assert len(file_data) == len(descriptors) # Get all calibration files from directory so we only have to read it once directory_files = self.all_descriptors_from_directory( auto_directory_path, self._data_model.get_auto_directory_recursive()) if session_controller.thread_cancelled(): raise MasterMakerExceptions.SessionCancelled if len(directory_files) == 0: # No files in that directory, raise exception raise MasterMakerExceptions.AutoCalibrationDirectoryEmpty( auto_directory_path) console.push_level() console.message( f"Calibrating from directory containing {len(directory_files)} files.", +1) result = file_data.copy() for input_index in range(len(descriptors)): if session_controller.thread_cancelled(): raise MasterMakerExceptions.SessionCancelled this_file: FileDescriptor = descriptors[input_index] calibration_file = self.get_best_calibration_file( directory_files, this_file, session_controller, console) if session_controller.thread_cancelled(): raise MasterMakerExceptions.SessionCancelled calibration_image = RmFitsUtil.fits_data_from_path( calibration_file) (calibration_x, calibration_y) = calibration_image.shape (layer_x, layer_y) = result[input_index].shape if (layer_x != calibration_x) or (layer_y != calibration_y): raise MasterMakerExceptions.IncompatibleSizes difference = result[input_index] - calibration_image result[input_index] = difference.clip(0, 0xFFFF) console.pop_level() return result
def calibrate_with_auto_directory(self, file_data: [ndarray], auto_directory_path: str, sample_file: FileDescriptor, console: Console, session_controller: SessionController) -> [ndarray]: console.message(f"Selecting best calibration file from {auto_directory_path}", 0) calibration_file = self.get_best_calibration_file(auto_directory_path, sample_file, session_controller) # Should never come back None because an exception will have handled failure assert calibration_file is not None if session_controller.thread_running(): return self.calibrate_with_file(file_data, calibration_file, console, session_controller) else: return None
def calibrate_with_pedestal(self, file_data: [ndarray], pedestal: int, console: Console, session_controller: SessionController) -> [ndarray]: result = file_data.copy() console.message(f"Calibrate with pedestal = {pedestal}", 0) for index in range(len(result)): if session_controller.thread_cancelled(): break reduced_by_pedestal: ndarray = result[index] - pedestal result[index] = reduced_by_pedestal.clip(0, 0xFFFF) return result
def calibrate_with_file(self, file_data: [ndarray], calibration_file_path: str, console: Console, session_controller: SessionController) -> [ndarray]: console.message(f"Calibrate with file: {calibration_file_path}", 0) result = file_data.copy() calibration_image = RmFitsUtil.fits_data_from_path(calibration_file_path) (calibration_x, calibration_y) = calibration_image.shape for index in range(len(result)): if session_controller.thread_cancelled(): break (layer_x, layer_y) = result[index].shape if (layer_x != calibration_x) or (layer_y != calibration_y): raise MasterMakerExceptions.IncompatibleSizes difference = result[index] - calibration_image result[index] = difference.clip(0, 0xFFFF) return result
def handle_input_files_disposition(self, disposition_type: int, sub_folder_name: str, descriptors: [FileDescriptor], console: Console): if disposition_type == Constants.INPUT_DISPOSITION_NOTHING: # User doesn't want us to do anything with the input files return else: assert (disposition_type == Constants.INPUT_DISPOSITION_SUBFOLDER) console.message("Moving processed files to " + sub_folder_name, 0) # User wants us to move the input files into a sub-folder for descriptor in descriptors: if SharedUtils.dispose_one_file_to_sub_folder( descriptor, sub_folder_name): # Successfully moved the file; tell the user interface self.callback_method(descriptor.get_absolute_path())
def combine_median(cls, file_names: [str], calibrator: Calibrator, console: Console, session_controller: SessionController) -> ndarray: assert len( file_names ) > 0 # Otherwise the combine button would have been disabled console.push_level() console.message("Combine by simple Median", +1) file_data = RmFitsUtil.read_all_files_data(file_names) cls.check_cancellation(session_controller) sample_file = RmFitsUtil.make_file_descriptor(file_names[0]) file_data = calibrator.calibrate_images(file_data, sample_file, console, session_controller) cls.check_cancellation(session_controller) median_result = numpy.median(file_data, axis=0) console.pop_level() return median_result
def describe_group(data_model: DataModel, number_files: int, sample_file: FileDescriptor, console: Console): binning = sample_file.get_binning() exposure = sample_file.get_exposure() temperature = sample_file.get_temperature() processing_message = "" if data_model.get_group_by_size(): processing_message += f"binned {binning} x {binning}" if data_model.get_group_by_exposure(): if len(processing_message) > 0: processing_message += "," processing_message += f" exposed {exposure} seconds" if data_model.get_group_by_temperature(): if len(processing_message) > 0: processing_message += "," processing_message += f" at {temperature} degrees." console.message( f"Processing {number_files} files {processing_message}", +1)
def original_non_grouped_processing(self, selected_files: [FileDescriptor], data_model: DataModel, output_file: str, console: Console): """ Process one set of files to a single output file. Output to the given path, if provided. If not provided, prompt the user for it. :param selected_files: List of descriptions of files to be combined :param data_model: Data model that gives combination method and other options :param output_file: Path for the combined output file :param console: Re-directable console output object """ console.push_level() console.message("Using single-file processing", +1) # We'll use the first file in the list as a sample for things like image size assert len(selected_files) > 0 # Confirm that these are all flat frames, and can be combined (same binning and dimensions) if FileCombiner.all_compatible_sizes(selected_files): self.check_cancellation() if data_model.get_ignore_file_type() or FileCombiner.all_of_type( selected_files, FileDescriptor.FILE_TYPE_FLAT): # Get (most common) filter name in the set # What filter should we put in the metadata for the output file? filter_name = SharedUtils.most_common_filter_name( selected_files) # Do the combination self.combine_files(selected_files, data_model, filter_name, output_file, console) self.check_cancellation() # Files are combined. Put away the inputs? # Return list of any that were moved, in case the UI needs to be adjusted substituted_folder_name = SharedUtils.substitute_date_time_filter_in_string( data_model.get_disposition_subfolder_name()) self.handle_input_files_disposition( data_model.get_input_file_disposition(), substituted_folder_name, selected_files, console) else: raise MasterMakerExceptions.NotAllFlatFrames else: raise MasterMakerExceptions.IncompatibleSizes console.message("Combining complete", 0) console.pop_level()
def closest_match(self, descriptors: [FileDescriptor], target_exposure: float, target_temperature: float, console: Console) -> FileDescriptor: """ Find the calibration file, from the given list of candidates, that is the best match for calibrating an image with the given exposure and temperature. We have already ensured that the candidate calibration files are all the correct size, so we now try to match both the exposure time and the temperature, giving more weight to the exposure time. :param descriptors: Descriptions of potential calibration files :param target_exposure: Exposure time of the image to be calibrated :param target_temperature: CCD temperature of the image to be calibrated :param console: Redirectable console output object :return: Description of the best-matching calibration file """ # Assign a score to each possible calibration file, based on exposure and temperature f: FileDescriptor file_temperatures = numpy.array( [f.get_temperature() for f in descriptors]) file_exposures = numpy.array([f.get_exposure() for f in descriptors]) scores = numpy.abs(file_temperatures - target_temperature) \ + numpy.abs(file_exposures - target_exposure) * Constants.AUTO_CALIBRATION_EXPOSURE_WEIGHT # The score is the deviation from the target, so the smallest score is the best choice minimum_score = numpy.min(scores) indices = numpy.where(scores == minimum_score) assert len( indices ) > 0 # Min was from the list, so there must be at least one match_index = indices[0].tolist()[0] best_match = descriptors[match_index] if self._data_model.get_display_auto_select_results(): console.message( f"Target {target_exposure:.1f}s at {target_temperature:.1f} C," f" best match is {best_match.get_exposure():.1f}s at" f" {best_match.get_temperature():.1f} C: " f"{best_match.get_name()}", +1, temp=True) return best_match
def calibrate_with_pedestal( self, file_data: [ndarray], pedestal: int, console: Console, session_controller: SessionController) -> [ndarray]: """ 'Pedestal-calibrate' given set of images by subtracting a fixed amounnt from each pixel (clipping at zero - no negative pixel values will be produced) :param file_data: List of images' data. Each is 2d image matrix :param pedestal: Fixed amount to subtract from each pixel :param console: Redirectable console output object :param session_controller: Controller for this subtask :return: List of calibrated images """ result = file_data.copy() console.message(f"Calibrate with pedestal = {pedestal}", 0) for index in range(len(result)): if session_controller.thread_cancelled(): raise MasterMakerExceptions.SessionCancelled reduced_by_pedestal: ndarray = result[index] - pedestal result[index] = reduced_by_pedestal.clip(0, 0xFFFF) return result
def describe_group(data_model: DataModel, number_files: int, sample_file: FileDescriptor, console: Console): """ Display, on the console, a descriptive text string for the group being processed, using a given sample file :param data_model: Data model giving the processing options :param number_files: Number of files in the group being processed :param sample_file: Sample file, representative of the characterstics of files in the group :param console: Redirectable output console """ binning = sample_file.get_binning() temperature = sample_file.get_temperature() message_parts: [str] = [] if data_model.get_group_by_size(): message_parts.append(f"binned {binning} x {binning}") if data_model.get_group_by_filter(): message_parts.append( f"with {sample_file.get_filter_name()} filter") if data_model.get_group_by_temperature(): message_parts.append(f"at {temperature} degrees") processing_message = ", ".join(message_parts) console.message( f"Processing {number_files} files {processing_message}.", +1)
def combine_mean(cls, file_names: [str], calibrator: Calibrator, console: Console, session_controller: SessionController) -> ndarray: """Combine FITS files in given list using simple mean. Return an ndarray containing the combined data.""" assert len( file_names ) > 0 # Otherwise the combine button would have been disabled console.push_level() console.message("Combining by simple mean", +1) sample_file = RmFitsUtil.make_file_descriptor(file_names[0]) file_data: [ndarray] file_data = RmFitsUtil.read_all_files_data(file_names) cls.check_cancellation(session_controller) calibrated_data = calibrator.calibrate_images(file_data, sample_file, console, session_controller) cls.check_cancellation(session_controller) mean_result = numpy.mean(calibrated_data, axis=0) console.pop_level() return mean_result
def min_max_clip_version_5(cls, file_data: ndarray, number_dropped_values: int, console: Console, session_controller: SessionController): console.push_level() console.message( f"Using min-max clip with {number_dropped_values} iterations", +1) masked_array = ma.MaskedArray(file_data) drop_counter = 1 while drop_counter <= number_dropped_values: cls.check_cancellation(session_controller) console.push_level() console.message( f"Iteration {drop_counter} of {number_dropped_values}.", +1) drop_counter += 1 # Find the minimums in all columns. This will give a 2d matrix the same size as the images # with the column-minimum in each position minimum_values = masked_array.min(axis=0) cls.check_cancellation(session_controller) # Now compare that matrix of minimums down the layers, so we get Trues where # each minimum exists in its column (minimums might exist more than once, and # we want to find all of them) masked_array = ma.masked_where(masked_array == minimum_values, masked_array) cls.check_cancellation(session_controller) console.message("Masked minimums.", +1, temp=True) # Now find and mask the maximums, same approach maximum_values = masked_array.max(axis=0) masked_array = ma.masked_where(masked_array == maximum_values, masked_array) cls.check_cancellation(session_controller) console.message("Masked maximums.", +1, temp=True) console.pop_level() console.message(f"Calculating mean of remaining data.", 0) masked_means = numpy.mean(masked_array, axis=0) cls.check_cancellation(session_controller) # If the means matrix contains any masked values, that means that in that column the clipping # eliminated *all* the data. We will find the offending columns and re-calculate those with # fewer dropped extremes. This should exactly reproduce the results of the cell-by-cell methods if ma.is_masked(masked_means): console.message( "Some columns lost all their values; reducing drops for those columns.", 0) # Get the mask, and get a 2D matrix showing which columns were entirely masked the_mask = masked_array.mask eliminated_columns_map = ndarray.all(the_mask, axis=0) masked_coordinates = numpy.where(eliminated_columns_map) cls.check_cancellation(session_controller) x_coordinates = masked_coordinates[0] y_coordinates = masked_coordinates[1] assert len(x_coordinates) == len(y_coordinates) repairs = len(x_coordinates) cp = "s" if repairs > 1 else "" np = "" if repairs > 1 else "s" console.message(f"{repairs} column{cp} need{np} repair.", +1) for index in range(repairs): cls.check_cancellation(session_controller) # print(".", end="\n" if (index > 0) and (index % 50 == 0) else "") column_x = x_coordinates[index] column_y = y_coordinates[index] column = file_data[:, column_x, column_y] min_max_clipped_mean: int = round( cls.calc_mm_clipped_mean(column, number_dropped_values - 1, console, session_controller)) masked_means[column_x, column_y] = min_max_clipped_mean # We've replaced the problematic columns, now the mean should calculate cleanly assert not ma.is_masked(masked_means) console.pop_level() return masked_means.round()
def process_groups(self, data_model: DataModel, selected_files: [FileDescriptor], output_directory: str, console: Console): console.push_level() exposure_bandwidth = data_model.get_exposure_group_bandwidth() temperature_bandwidth = data_model.get_temperature_group_bandwidth() disposition_folder = data_model.get_disposition_subfolder_name() substituted_folder_name = SharedUtils.substitute_date_time_filter_in_string( disposition_folder) console.message( "Process groups into output directory: " + output_directory, +1) if not SharedUtils.ensure_directory_exists(output_directory): raise MasterMakerExceptions.NoGroupOutputDirectory( output_directory) minimum_group_size = data_model.get_minimum_group_size() \ if data_model.get_ignore_groups_fewer_than() else 0 # Process size groups, or all sizes if not grouping groups_by_size = self.get_groups_by_size( selected_files, data_model.get_group_by_size()) group_by_size = data_model.get_group_by_size() group_by_exposure = data_model.get_group_by_exposure() group_by_temperature = data_model.get_group_by_temperature() for size_group in groups_by_size: self.check_cancellation() console.push_level() # Message about this group only if this grouping was requested if len(size_group) < minimum_group_size: if group_by_size: console.message( f"Ignoring one size group: {len(size_group)} " f"files {size_group[0].get_size_key()}", +1) else: if group_by_size: console.message( f"Processing one size group: {len(size_group)} " f"files {size_group[0].get_size_key()}", +1) # Within this size group, process exposure groups, or all exposures if not grouping groups_by_exposure = self.get_groups_by_exposure( size_group, data_model.get_group_by_exposure(), exposure_bandwidth) for exposure_group in groups_by_exposure: self.check_cancellation() (mean_exposure, _) = ImageMath.mean_exposure_and_temperature( exposure_group) console.push_level() if len(exposure_group) < minimum_group_size: if group_by_exposure: console.message( f"Ignoring one exposure group: {len(exposure_group)} " f"files exposed at mean {mean_exposure:.2f} seconds", +1) else: if group_by_exposure: console.message( f"Processing one exposure group: {len(exposure_group)} " f"files exposed at mean {mean_exposure:.2f} seconds", +1) # Within this exposure group, process temperature groups, or all temperatures if not grouping groups_by_temperature = \ self.get_groups_by_temperature(exposure_group, data_model.get_group_by_temperature(), temperature_bandwidth) for temperature_group in groups_by_temperature: self.check_cancellation() console.push_level() (_, mean_temperature ) = ImageMath.mean_exposure_and_temperature( temperature_group) if len(temperature_group) < minimum_group_size: if group_by_temperature: console.message( f"Ignoring one temperature group: {len(temperature_group)} " f"files with mean temperature {mean_temperature:.1f}", +1) else: if group_by_temperature: console.message( f"Processing one temperature group: {len(temperature_group)} " f"files with mean temperature {mean_temperature:.1f}", +1) # Now we have a list of descriptors, grouped as appropriate, to process self.process_one_group( data_model, temperature_group, output_directory, data_model.get_master_combine_method(), substituted_folder_name, console) self.check_cancellation() console.pop_level() console.pop_level() console.pop_level() console.message("Group combining complete", 0) console.pop_level()
def process_groups(self, data_model: DataModel, selected_files: [FileDescriptor], output_directory: str, console: Console): """ Process the given selected files in groups by size, exposure, or temperature (or any combination) Exceptions thrown: NoGroupOutputDirectory Output directory does not exist and unable to create it :param data_model: Data model specifying options for the current run :param selected_files: List of descriptions of files to be grouped then processed :param output_directory: Directory to contain output files from processed groups :param console: Re-directable console output object """ console.push_level() temperature_bandwidth = data_model.get_temperature_group_bandwidth() disposition_folder = data_model.get_disposition_subfolder_name() substituted_folder_name = SharedUtils.substitute_date_time_filter_in_string( disposition_folder) console.message( "Process groups into output directory: " + output_directory, +1) if not SharedUtils.ensure_directory_exists(output_directory): raise MasterMakerExceptions.NoGroupOutputDirectory( output_directory) minimum_group_size = data_model.get_minimum_group_size() \ if data_model.get_ignore_groups_fewer_than() else 0 # Process size groups, or all sizes if not grouping groups_by_size = self.get_groups_by_size( selected_files, data_model.get_group_by_size()) group_by_size = data_model.get_group_by_size() group_by_temperature = data_model.get_group_by_temperature() group_by_filter = data_model.get_group_by_filter() for size_group in groups_by_size: self.check_cancellation() console.push_level() # Message about this group only if this grouping was requested if len(size_group) < minimum_group_size: if group_by_size: console.message( f"Ignoring one size group: {len(size_group)} " f"files {size_group[0].get_size_key()}", +1) else: if group_by_size: console.message( f"Processing one size group: {len(size_group)} " f"files {size_group[0].get_size_key()}", +1) # Within this size group, process temperature groups, or all temperatures if not grouping groups_by_temperature = \ self.get_groups_by_temperature(size_group, data_model.get_group_by_temperature(), temperature_bandwidth) for temperature_group in groups_by_temperature: self.check_cancellation() console.push_level() (_, mean_temperature ) = ImageMath.mean_exposure_and_temperature( temperature_group) if len(temperature_group) < minimum_group_size: if group_by_temperature: console.message( f"Ignoring one temperature group: {len(temperature_group)} " f"files with mean temperature {mean_temperature:.1f}", +1) else: if group_by_temperature: console.message( f"Processing one temperature group: {len(temperature_group)} " f"files with mean temperature {mean_temperature:.1f}", +1) # Within this temperature group, process filter groups, or all filters if not grouping groups_by_filter = \ self.get_groups_by_filter(temperature_group, data_model.get_group_by_filter()) for filter_group in groups_by_filter: self.check_cancellation() console.push_level() filter_name = filter_group[0].get_filter_name() if len(filter_group) < minimum_group_size: if group_by_filter: console.message( f"Ignoring one filter group: {len(filter_group)} " f"files with {filter_name} filter ", +1) else: if group_by_filter: console.message( f"Processing one filter group: {len(filter_group)} " f"files with {filter_name} filter ", +1) self.process_one_group( data_model, filter_group, output_directory, data_model.get_master_combine_method(), substituted_folder_name, console) console.pop_level() self.check_cancellation() console.pop_level() console.pop_level() console.message("Group combining complete", 0) console.pop_level()
def combine_sigma_clip( cls, file_names: [str], sigma_threshold: float, calibrator: Calibrator, console: Console, session_controller: SessionController) -> Optional[ndarray]: console.push_level() console.message( f"Combine by sigma-clipped mean, z-score threshold {sigma_threshold}", +1) sample_file = RmFitsUtil.make_file_descriptor(file_names[0]) file_data = numpy.asarray(RmFitsUtil.read_all_files_data(file_names)) cls.check_cancellation(session_controller) file_data = calibrator.calibrate_images(file_data, sample_file, console, session_controller) cls.check_cancellation(session_controller) console.message("Calculating unclipped means", +1) column_means = numpy.mean(file_data, axis=0) cls.check_cancellation(session_controller) console.message("Calculating standard deviations", 0) column_stdevs = numpy.std(file_data, axis=0) cls.check_cancellation(session_controller) console.message("Calculating z-scores", 0) # Now what we'd like to do is just: # z_scores = abs(file_data - column_means) / column_stdevs # Unfortunately, standard deviations can be zero, so that simplistic # statement would generate division-by-zero errors. # Std for a column would be zero if all the values in the column were identical. # In that case we wouldn't want to eliminate any anyway, so we'll set the # zero stdevs to a large number, which causes the z-scores to be small, which # causes no values to be eliminated. column_stdevs[column_stdevs == 0.0] = sys.float_info.max z_scores = abs(file_data - column_means) / column_stdevs cls.check_cancellation(session_controller) console.message("Eliminated data outside threshold", 0) exceeds_threshold = z_scores > sigma_threshold cls.check_cancellation(session_controller) # Calculate and display how much data we are ignoring dimensions = exceeds_threshold.shape total_pixels = dimensions[0] * dimensions[1] * dimensions[2] number_masked = numpy.count_nonzero(exceeds_threshold) percentage_masked = 100.0 * number_masked / total_pixels console.message( f"Discarded {number_masked:,} pixels of {total_pixels:,} " f"({percentage_masked:.3f}% of data)", +1) masked_array = ma.masked_array(file_data, exceeds_threshold) cls.check_cancellation(session_controller) console.message("Calculating adjusted means", -1) masked_means = ma.mean(masked_array, axis=0) cls.check_cancellation(session_controller) # If the means matrix contains any masked values, that means that in that column the clipping # eliminated *all* the data. We will find the offending columns and re-calculate those using # simple min-max clipping. if ma.is_masked(masked_means): console.message( "Some columns lost all their values; min-max clipping those columns.", 0) # Get the mask, and get a 2D matrix showing which columns were entirely masked eliminated_columns_map = ndarray.all(exceeds_threshold, axis=0) masked_coordinates = numpy.where(eliminated_columns_map) x_coordinates = masked_coordinates[0] y_coordinates = masked_coordinates[1] assert len(x_coordinates) == len(y_coordinates) for index in range(len(x_coordinates)): cls.check_cancellation(session_controller) column_x = x_coordinates[index] column_y = y_coordinates[index] column = file_data[:, column_x, column_y] min_max_clipped_mean: int = round( cls.calc_mm_clipped_mean(column, 2, console, session_controller)) masked_means[column_x, column_y] = min_max_clipped_mean # We've replaced the problematic columns, now the mean should calculate cleanly assert not ma.is_masked(masked_means) cls.check_cancellation(session_controller) console.pop_level() result = masked_means.round().filled() return result