def check_cube_dimensions(n_axes, axis_size, cube_name="data cube", min_dim=2, max_dim=4): err.ensure( n_axes >= min_dim and n_axes <= max_dim, str(cube_name).capitalize() + " must have {0:d} to {1:d} dimensions.".format(min_dim, max_dim)) err.ensure( n_axes != 4 or axis_size[3] == 1, "Size of 4th axis of " + str(cube_name) + " is > 1. 4-D cubes can\n" "only be processed if 4th axis has size 1.") return
def filter(mask, cube, header, clipMethod, threshold, rmsMode, fluxRange, verbose): err.message("Running threshold finder.") # Sanity checks of user input err.ensure( clipMethod in {"absolute", "relative"}, "Threshold finder failed. Illegal clip method: '" + str(clipMethod) + "'.") err.ensure( rmsMode in {"std", "mad", "gauss", "negative"}, "Threshold finder failed. Illegal RMS mode: '" + str(rmsMode) + "'.") err.ensure( fluxRange in {"positive", "negative", "all"}, "Threshold finder failed. Illegal flux range: '" + str(fluxRange) + "'.") # Scale threshold by RMS if requested if clipMethod == "relative": threshold *= GetRMS(cube, rmsMode=rmsMode, fluxRange=fluxRange, zoomx=1, zoomy=1, zoomz=1, verbose=verbose) # Print some information and check sign of threshold err.message(" Using threshold of " + str(threshold) + ".") err.ensure(threshold >= 0.0, "Threshold finder failed. Threshold value is negative.") # Run the threshold finder, setting bit 1 of the mask for |cube| >= |threshold|: np.bitwise_or(mask, np.greater_equal(np.absolute(cube), threshold), out=mask) return
def apply_weights_file(data, weightsFile, subcube): # Load weights cube err.message("Applying weights cube:\n " + str(weightsFile)) try: f = fits.open(weightsFile, memmap=False) header_weights = f[0].header except: err.error("Failed to read weights cube.") # Extract axis sizes and types n_axes_weights, axis_size_weights, axis_type_weights = extract_axis_size(header_weights) # Ensure correct dimensionality check_cube_dimensions(n_axes_weights, axis_size_weights, cube_name="weights cube", min_dim=1, max_dim=4) # Multiply data by weights # 1-D spectrum if n_axes_weights == 1: err.warning("Weights cube has 1 axis; interpreted as spectrum.\nAdding first and second axis.") if len(subcube): err.ensure(len(subcube) == 6, "Subcube list must have 6 entries ({0:d} given).".format(len(subcube))) data *= np.reshape(f[0].section[subcube[4]:subcube[5]], (-1, 1, 1)) else: data *= reshape(f[0].data, (-1, 1, 1)) # 2-D image elif n_axes_weights == 2: if len(subcube) == 6 or len(subcube) == 4: data *= np.array([f[0].section[subcube[2]:subcube[3], subcube[0]:subcube[1]]]) else: data *= np.array([f[0].data]) # 3-D cube elif n_axes_weights == 3: if len(subcube) == 6: data *= f[0].section[subcube[4]:subcube[5], subcube[2]:subcube[3], subcube[0]:subcube[1]] else: data *= f[0].data # 4-D hypercube else: if len(subcube) == 6: data *= f[0].section[0, subcube[4]:subcube[5], subcube[2]:subcube[3], subcube[0]:subcube[1]] else: data *= f[0].section[0] f.close() err.message(" Weights cube applied.") return data
def apply_weights_function(data, weightsFunction): err.message("Applying weights function:\n " + str(weightsFunction)) # Define whitelist of allowed character sequences and import relevant Numpy functions whitelist = [ "x", "y", "z", "e", "E", "sin", "cos", "tan", "arcsin", "arccos", "arctan", "arctan2", "sinh", "cosh", "tanh", "arcsinh", "arccosh", "arctanh", "exp", "log", "sqrt", "square", "power", "absolute", "fabs", "sign" ] from numpy import sin, cos, tan, arcsin, arccos, arctan, arctan2, sinh, cosh, tanh, arcsinh, arccosh, arctanh, exp, log, sqrt, square, power, absolute, fabs, sign # Search for all keywords consisting of consecutive sequences of alphabetical characters keywordsFound = filter(None, re.split("[^a-zA-Z]+", str(weightsFunction))) # Check for non-whitelisted sequences for keyword in keywordsFound: err.ensure( keyword in whitelist, "Unknown keyword '" + str(keyword) + "' found in weights function:\n" " " + str(weightsFunction) + "\n" "Please check your input.") # Loop over all channels for i in range(data.shape[0]): # Create index arrays over 2-D planes (i.e. of width dz = 1) z, y, x = np.indices((1, data.shape[1], data.shape[2])) z += i # Multiply each plane by weights function try: data[z, y, x] *= eval(str(weightsFunction)) # NOTE: eval() should be safe now as we don't allow for non-whitelisted keywords. except: err.error("Failed to evaluate weights function:\n" " " + str(weightsFunction) + "\n" "Please check your input.") err.message(" Weights function applied.") return data
def apply_weights_function(data, weightsFunction): err.message("Applying weights function:\n " + str(weightsFunction)) # Define whitelist of allowed character sequences and import relevant Numpy functions whitelist = ["x", "y", "z", "e", "E", "sin", "cos", "tan", "arcsin", "arccos", "arctan", "arctan2", "sinh", "cosh", "tanh", "arcsinh", "arccosh", "arctanh", "exp", "log", "sqrt", "square", "power", "absolute", "fabs", "sign"] from numpy import sin, cos, tan, arcsin, arccos, arctan, arctan2, sinh, cosh, tanh, arcsinh, arccosh, arctanh, exp, log, sqrt, square, power, absolute, fabs, sign # Search for all keywords consisting of consecutive sequences of alphabetical characters keywordsFound = filter(None, re.split("[^a-zA-Z]+", str(weightsFunction))) # Check for non-whitelisted sequences for keyword in keywordsFound: err.ensure(keyword in whitelist, "Unknown keyword '" + str(keyword) + "' found in weights function:\n" " " + str(weightsFunction) + "\n" "Please check your input.") # Loop over all channels for i in range(data.shape[0]): # Create index arrays over 2-D planes (i.e. of width dz = 1) z, y, x = np.indices((1, data.shape[1], data.shape[2])) z += i # Multiply each plane by weights function try: data[z, y, x] *= eval(str(weightsFunction)) # NOTE: eval() should be safe now as we don't allow for non-whitelisted keywords. except: err.error( "Failed to evaluate weights function:\n" " " + str(weightsFunction) + "\n" "Please check your input.") err.message(" Weights function applied.") return data
def nan_standard_deviation(x): y = x[~np.isnan(x)] err.ensure(y.size > 0, "Array size of 0 encountered in calculation of std. dev.") return np.sqrt(np.sum(y * y) / y.size)
def moment2(x, y): err.ensure(x.size == y.size, "Incompatible array sizes encountered in moment calculation.") return np.sqrt(np.sum(x * x * y, dtype=np.float64) / np.sum(y, dtype=np.float64))
def get_subcube_range(header, n_axes, axis_size, subcube, subcubeMode): # Basic sanity checks err.ensure(subcubeMode in {"pixel", "world"}, "Subcube mode must be 'pixel' or 'world'.") err.ensure((len(subcube) == 4 and n_axes == 2) or (len(subcube) == 6 and n_axes > 2), "Subcube range must contain 4 values for 2-D cubes\n" "or 6 values for 3-D/4-D cubes.") # ----------------- # World coordinates # ----------------- if subcubeMode == "world": # Read WCS information try: wcsin = wcs.WCS(header) except: err.error("Failed to read WCS information from data cube header.") # Calculate cos(dec) correction for RA range: if wcsin.wcs.cunit[0] == "deg" and wcsin.wcs.cunit[1] == "deg": corrfact = math.cos(math.radians(subcube[1])) if n_axes == 4: subcube = wcsin.wcs_world2pix( np.array([[ subcube[0] - subcube[3] / corrfact, subcube[1] - subcube[4], subcube[2] - subcube[5], 0 ], [ subcube[0] + subcube[3] / corrfact, subcube[1] + subcube[4], subcube[2] + subcube[5], 0 ]]), 0)[:, :3] elif n_axes == 3: subcube = wcsin.wcs_world2pix( np.array([[ subcube[0] - subcube[3] / corrfact, subcube[1] - subcube[4], subcube[2] - subcube[5] ], [ subcube[0] + subcube[3] / corrfact, subcube[1] + subcube[4], subcube[2] + subcube[5] ]]), 0) elif n_axes == 2: subcube = wcsin.wcs_world2pix( np.array([[ subcube[0] - subcube[2] / corrfact, subcube[1] - subcube[3] ], [ subcube[0] + subcube[2] / corrfact, subcube[1] + subcube[3] ]]), 0) else: err.error("Unsupported number of axes.") # Flatten array subcube = np.ravel(subcube, order="F") # Ensure that min pix coord < max pix coord for all axes. # This operation is meaningful because wcs_world2pix returns negative pixel coordinates # only for pixels located before an axis' start (i.e., negative pixel coordinates should # not be interpreted as counting backward from an axis' end). subcube[0], subcube[1] = correct_order(subcube[0], subcube[1]) subcube[2], subcube[3] = correct_order(subcube[2], subcube[3]) if len(subcube) == 6: subcube[4], subcube[5] = correct_order(subcube[4], subcube[5]) # Convert to integer subcube = list(subcube.astype(int)) # Constrain subcube to be within cube boundaries for axis in range(min(3, n_axes)): err.ensure( subcube[2 * axis + 1] >= 0 and subcube[2 * axis] < axis_size[axis], "Subcube outside input cube range for axis {0:d}.".format( axis)) subcube[2 * axis] = max(subcube[2 * axis], 0) subcube[2 * axis + 1] = min(subcube[2 * axis + 1] + 1, axis_size[axis]) # ----------------- # Pixel coordinates # ----------------- else: # Ensure that pixel coordinates are integers for value in subcube: err.ensure( type(value) == int, "Subcube boundaries must be integer values.") # Sanity checks on boundaries for axis in range(min(3, n_axes)): # Ensure correct order err.ensure( subcube[2 * axis] < subcube[2 * axis + 1], "Lower subcube boundary greater than upper boundary.\nPlease check your input." ) # Adjust lower boundary subcube[2 * axis] = max(subcube[2 * axis], 0) subcube[2 * axis] = min(subcube[2 * axis], axis_size[axis] - 1) # Adjust upper boundary: subcube[2 * axis + 1] = max(subcube[2 * axis + 1], 1) subcube[2 * axis + 1] = min(subcube[2 * axis + 1], axis_size[axis]) # Report final subcube boundaries err.message(" Loading subcube of range " + str(subcube) + '.') return subcube
def smooth(indata, kernel, edgeMode, kernelX, kernelY, kernelZ): """ Smooth a data cube with the specified kernel type and size. Arguments: indata: The input data cube. kernel: The smoothing kernel; "gaussian", "boxcar" or "median". edgeMode: Determines how borders are handled; "reflect", "constant", "nearest", "mirror" or "wrap". kernelX/Y/Z: Size of the kernel (standard deviation in the case of a Gaussian kernel). Returns: Smoothed copy of the data cube. """ err.message("Smoothing data cube") # Sanity checks of user input err.ensure( kernel in {"gaussian", "boxcar", "median"}, "Smoothing failed. Illegal smoothing type: '" + str(kernel) + "'.") err.ensure( edgeMode in {"reflect", "constant", "nearest", "mirror", "wrap"}, "Smoothing failed. Illegal edge mode: '" + str(edgeMode) + "'.") err.ensure(kernelX or kernelY or kernelZ, "Smoothing failed. All smoothing kernels are zero.") err.ensure( kernel != "median" or (kernelX and kernelY and kernelZ), "Smoothing failed. Cannot determine median for kernel size of zero.") # Print some information err.message(" Kernel type: " + str(kernel).title()) err.message(" Kernel size: [" + str(kernelX) + ", " + str(kernelY) + ", " + str(kernelZ) + "]") err.message(" Edge mode: " + str(edgeMode)) # Create copy of input cube to be smoothed outdata = np.copy(indata) # Remove NaNs (and INFs) if necessary found_nan = np.isnan(indata).sum() if found_nan: outdata = np.nan_to_num(outdata) # Smooth with the selected kernel if kernel == "gaussian": outdata = ndimage.filters.gaussian_filter(outdata, sigma=(kernelZ, kernelX, kernelY), mode=edgeMode) elif kernel == "boxcar": outdata = ndimage.filters.uniform_filter(outdata, size=(kernelZ, kernelX, kernelY), mode=edgeMode) else: # kernel == "median" outdata = ndimage.filters.median_filter(outdata, size=(kernelZ, kernelX, kernelY), mode=edgeMode) # Put NaNs back in if necessary if found_nan: outdata[np.isnan(indata)] = np.nan return outdata
def import_data(doSubcube, inFile, weightsFile, maskFile, weightsFunction=None, subcube=[], subcubeMode="pixel", doFlag=False, flagRegions=False, flagFile="", cubeOnly=False): # Basic sanity checks on user input err.ensure( os.path.isfile(inFile), "Data file not found:\n " + str(inFile)) # ------------------------------- # Open input cube and read header # ------------------------------- err.message("Loading input data cube.") try: f = fits.open(inFile, mode="readonly", memmap=False, do_not_scale_image_data=False) header = f[0].header except: err.error("Failed to load primary HDU of data file:\n " + str(inFile)) # Extract axis sizes and types n_axes, axis_size, axis_type = extract_axis_size(header) # Check dimensionality of data cube check_cube_dimensions(n_axes, axis_size, cube_name="data cube") # Print some information err.message(" Data cube has {0:d} axes.".format(header["NAXIS"])) err.message(" Types: " + str(axis_type)) err.message(" Sizes: " + str(axis_size)) # Extract subcube boundaries if requested if len(subcube): subcube = get_subcube_range(header, n_axes, axis_size, subcube, subcubeMode) else: subcube = [] # -------------------------------- # Read requested subregion of data # -------------------------------- # 2-D image if n_axes == 2: fullshape = [axis_size[1], axis_size[0]] if len(subcube): data = np.array([f[0].section[subcube[2]:subcube[3], subcube[0]:subcube[1]]]) header["CRPIX1"] -= subcube[0] header["CRPIX2"] -= subcube[2] header["NAXIS1"] = subcube[1] - subcube[0] header["NAXIS2"] = subcube[3] - subcube[2] else: data = np.array([f[0].data]) # 3-D cube elif n_axes == 3: fullshape = [axis_size[2], axis_size[1], axis_size[0]] if len(subcube): data = f[0].section[subcube[4]:subcube[5], subcube[2]:subcube[3], subcube[0]:subcube[1]] header["CRPIX1"] -= subcube[0] header["CRPIX2"] -= subcube[2] header["CRPIX3"] -= subcube[4] header["NAXIS1"] = subcube[1] - subcube[0] header["NAXIS2"] = subcube[3] - subcube[2] header["NAXIS3"] = subcube[5] - subcube[4] else: data = f[0].data #4-D hypercube else: fullshape = [axis_size[2], axis_size[1], axis_size[0]] if len(subcube): data = f[0].section[0, subcube[4]:subcube[5], subcube[2]:subcube[3], subcube[0]:subcube[1]] header["CRPIX1"] -= subcube[0] header["CRPIX2"] -= subcube[2] header["CRPIX3"] -= subcube[4] header["NAXIS1"] = subcube[1] - subcube[0] header["NAXIS2"] = subcube[3] - subcube[2] header["NAXIS3"] = subcube[5] - subcube[4] else: data = f[0].section[0] # Close input cube f.close() err.message("Input data cube loaded.") # --------------------------------------------------------- # If no additional actions required, return data and header # --------------------------------------------------------- if cubeOnly: return data, header # --------------------------------------------------- # Otherwise carry out additional actions as requested # --------------------------------------------------- # Weighting if weightsFile: data = apply_weights_file(data, weightsFile, subcube) elif weightsFunction: data = apply_weights_function(data, weightsFunction) # Flagging if doFlag: data = apply_flagging(data, flagFile, flagRegions, subcube) # Masking if maskFile: mask = import_mask(maskFile, header, axis_size, subcube) else: # Create an empty mask if none is provided. mask = np.zeros(data.shape, dtype=bool) return data, header, mask, subcube
def get_subcube_range(header, n_axes, axis_size, subcube, subcubeMode): # Basic sanity checks err.ensure( subcubeMode in {"pixel", "world"}, "Subcube mode must be 'pixel' or 'world'.") err.ensure( (len(subcube) == 4 and n_axes == 2) or (len(subcube) == 6 and n_axes > 2), "Subcube range must contain 4 values for 2-D cubes\n" "or 6 values for 3-D/4-D cubes.") # ----------------- # World coordinates # ----------------- if subcubeMode == "world": # Read WCS information try: wcsin = wcs.WCS(header) except: err.error("Failed to read WCS information from data cube header.") # Calculate cos(dec) correction for RA range: if wcsin.wcs.cunit[0] == "deg" and wcsin.wcs.cunit[1] == "deg": corrfact = math.cos(math.radians(subcube[1])) if n_axes == 4: subcube = wcsin.wcs_world2pix(np.array([[subcube[0] - subcube[3] / corrfact, subcube[1] - subcube[4], subcube[2] - subcube[5], 0], [subcube[0] + subcube[3] / corrfact, subcube[1] + subcube[4], subcube[2] + subcube[5], 0]]), 0)[:, :3] elif n_axes == 3: subcube = wcsin.wcs_world2pix(np.array([[subcube[0] - subcube[3] / corrfact, subcube[1] - subcube[4], subcube[2] - subcube[5]], [subcube[0] + subcube[3] / corrfact, subcube[1] + subcube[4], subcube[2] + subcube[5]]]), 0) elif n_axes == 2: subcube = wcsin.wcs_world2pix(np.array([[subcube[0] - subcube[2] / corrfact, subcube[1] - subcube[3]], [subcube[0] + subcube[2] / corrfact, subcube[1] + subcube[3]]]), 0) else: err.error("Unsupported number of axes.") # Flatten array subcube = np.ravel(subcube, order="F") # Ensure that min pix coord < max pix coord for all axes. # This operation is meaningful because wcs_world2pix returns negative pixel coordinates # only for pixels located before an axis' start (i.e., negative pixel coordinates should # not be interpreted as counting backward from an axis' end). subcube[0], subcube[1] = correct_order(subcube[0], subcube[1]) subcube[2], subcube[3] = correct_order(subcube[2], subcube[3]) if len(subcube) == 6: subcube[4], subcube[5] = correct_order(subcube[4], subcube[5]) # Convert to integer subcube = list(subcube.astype(int)) # Constrain subcube to be within cube boundaries for axis in range(min(3, n_axes)): err.ensure(subcube[2 * axis + 1] >= 0 and subcube[2 * axis] < axis_size[axis], "Subcube outside input cube range for axis {0:d}.".format(axis)) subcube[2 * axis] = max(subcube[2 * axis], 0) subcube[2 * axis + 1] = min(subcube[2 * axis + 1] + 1, axis_size[axis]) # ----------------- # Pixel coordinates # ----------------- else: # Ensure that pixel coordinates are integers for value in subcube: err.ensure(type(value) == int, "Subcube boundaries must be integer values.") # Sanity checks on boundaries for axis in range(min(3, n_axes)): # Ensure correct order err.ensure(subcube[2 * axis] < subcube[2 * axis + 1], "Lower subcube boundary greater than upper boundary.\nPlease check your input.") # Adjust lower boundary subcube[2 * axis] = max(subcube[2 * axis], 0) subcube[2 * axis] = min(subcube[2 * axis], axis_size[axis] - 1) # Adjust upper boundary: subcube[2 * axis + 1] = max(subcube[2 * axis + 1], 1) subcube[2 * axis + 1] = min(subcube[2 * axis + 1], axis_size[axis]) # Report final subcube boundaries err.message(" Loading subcube of range " + str(subcube) + '.') return subcube
def GetRMS(cube, rmsMode="negative", fluxRange="all", zoomx=1, zoomy=1, zoomz=1, verbose=0, min_hist_peak=0.05, sample=1, twoPass=False): """ Description of arguments ------------------------ rmsMode Select which algorithm should be used for calculating the noise. Allowed options: 'std' Standard deviation about 0. 'mad' Median absolute deviation about 0. 'moment' 2nd moment of flux histogram, assuming a 1st moment of 0. 'gauss' Width of Gaussian fitted to flux histogram, assuming a centroid of 0. 'negative' Width of Gaussian fitted to negative side of the flux histogram, again assuming a centroid of 0. This is a legacy option and may be removed from SoFiA in the future. fluxRange Define which part of the data are to be used in the noise measurement. Allowed options: 'negative' Use only pixels with negative flux. 'positive' Use only pixels with positive flux. 'all' Use both positive and negative (i.e. all) pixels. verbose Print additional progress messages if set to True. twoPass Run a second pass of MAD and STD, this time with a clip level of 5 times the RMS from the first pass. """ # Check input for sanity if fluxRange != "all" and fluxRange != "positive" and fluxRange != "negative": sys.stderr.write("WARNING: Illegal value of fluxRange = '" + str(fluxRange) + "'.\n") sys.stderr.write(" Using default value of 'all' instead.\n") fluxRange = "all" if rmsMode != "std" and rmsMode != "mad" and rmsMode != "negative" and rmsMode != "gauss" and rmsMode != "moment": sys.stderr.write("WARNING: Illegal value of rmsMode = '" + str(rmsMode) + "'.\n") sys.stderr.write(" Using default value of 'mad' instead.\n") rmsMode = "mad" # Ensure that we have a 3D cube if len(cube.shape) == 2: cube = np.array([cube]) x0, x1 = int(math.ceil((1 - 1.0 / zoomx) * cube.shape[2] / 2)), int(math.floor((1 + 1.0 / zoomx) * cube.shape[2] / 2)) + 1 y0, y1 = int(math.ceil((1 - 1.0 / zoomy) * cube.shape[1] / 2)), int(math.floor((1 + 1.0 / zoomy) * cube.shape[1] / 2)) + 1 z0, z1 = int(math.ceil((1 - 1.0 / zoomz) * cube.shape[0] / 2)), int(math.floor((1 + 1.0 / zoomz) * cube.shape[0] / 2)) + 1 err.message(" Estimating rms on subcube (x,y,z zoom = %.0f,%.0f,%.0f) ..." % (zoomx, zoomy, zoomz), verbose) err.message(" Estimating rms on subcube sampling every %i voxels ..." % (sample), verbose) err.message(" ... Subcube shape is " + str(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample].shape) + " ...", verbose) # Check if only negative or positive pixels are to be used: if fluxRange == "negative": with np.errstate(invalid="ignore"): halfCube = cube[z0:z1:sample, y0:y1:sample, x0:x1:sample][cube[z0:z1:sample, y0:y1:sample, x0:x1:sample] < 0] err.ensure(halfCube.size, "Cannot measure noise from negative flux values.\nNo negative fluxes found in data cube.") elif fluxRange == "positive": with np.errstate(invalid="ignore"): halfCube = cube[z0:z1:sample, y0:y1:sample, x0:x1:sample][cube[z0:z1:sample, y0:y1:sample, x0:x1:sample] > 0] err.ensure(halfCube.size, "Cannot measure noise from positive flux values.\nNo positive fluxes found in data cube.") # NOTE: The purpose of the with... statement is to temporarily disable certain warnings, as otherwise the # Python interpreter would print a warning whenever a value of NaN is compared to 0. The comparison # is defined to yield False, which conveniently removes NaNs by default without having to do that # manually in a separate step, but the associated warning message is unfortunately a nuisance. # GAUSSIAN FIT TO NEGATIVE FLUXES if rmsMode == "negative": nrbins = max(100, int(math.ceil(float(cube.size) / 1e+5))) cubemin = np.nanmin(cube) err.ensure(cubemin < 0, "Cannot estimate noise from Gaussian fit to negative flux\nhistogram; no negative fluxes found in data cube.") bins = np.arange(cubemin, abs(cubemin) / nrbins - 1e-12, abs(cubemin) / nrbins) fluxval = (bins[:-1] + bins[1:]) / 2 rmshisto = np.histogram(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample][~np.isnan(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample])], bins=bins)[0] nrsummedbins = 0 while rmshisto[-nrsummedbins-1:].sum() < min_hist_peak * rmshisto.sum(): nrsummedbins += 1 if nrsummedbins: if verbose: sys.stdout.write(" ... adjusting bin size to get a fraction of voxels in central bin >= " + str(min_hist_peak) + "\n") nrbins /= (nrsummedbins + 1) bins = np.arange(cubemin, abs(cubemin) / nrbins - 1e-12, abs(cubemin) / nrbins) fluxval = (bins[:-1] + bins[1:]) / 2.0 rmshisto = np.histogram(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample][~np.isnan(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample])], bins=bins)[0] rms = abs(sp.optimize.curve_fit(Gaussian, fluxval, rmshisto, p0=[rmshisto.max(), -fluxval[rmshisto < rmshisto.max() / 2.0].max() * 2.0 / 2.355])[0][1]) # GAUSSIAN FIT TO FLUX HISTOGRAM / SECOND MOMENT OF FLUX HISTOGRAM elif rmsMode == "gauss" or rmsMode == "moment": nBins = 100 dataMin = float(np.nanmin(cube)) dataMax = float(np.nanmax(cube)) err.ensure(dataMin < dataMax, "Maximum not greater than minimum. Cannot determine noise level.") if fluxRange == "negative": # Set upper limit to 0 err.ensure(dataMin < 0.0, "Minimum > 0. Cannot determine noise level for negative pixels.") dataMax = 0.0 elif fluxRange == "positive": # Set lower limit to 0 err.ensure(dataMax > 0.0, "Maximum < 0. Cannot determine noise level for positive pixels.") dataMin = 0.0 else: # Select the smallest of the two for both limits err.ensure(dataMin < 0.0 and dataMax > 0.0, "Noise values not scattered around 0. Cannot measure noise level.") dataMin = -min(abs(dataMin), abs(dataMax)) dataMax = min(abs(dataMin), abs(dataMax)) binWidth = (dataMax - dataMin) / float(nBins) bins = np.arange(dataMin, dataMax, binWidth) binCtr = (bins[:-1] + bins[1:]) / 2.0 hist = np.histogram(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample][~np.isnan(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample])], bins=bins)[0] # Calculate 2nd moment mom2 = moment2(binCtr, hist) # Adjust bin size if necessary counter = 0 while mom2 < 5.0 * binWidth and counter < 2: counter += 1 err.message("Increasing number of bins by factor of " + str(int(20.0 * binWidth / mom2)) + " for Gaussian fit.") nBins = int(nBins * 20.0 * binWidth / mom2) binWidth = (dataMax - dataMin) / float(nBins) binCtr = (bins[:-1] + bins[1:]) / 2.0 hist = np.histogram(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample][~np.isnan(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample])], bins=bins)[0] mom2 = moment2(binCtr, hist) # Carry out Gaussian fitting if requested if rmsMode == "gauss": rms = abs(sp.optimize.curve_fit(Gaussian, binCtr, hist, p0=[hist.max(), mom2])[0][1]) else: rms = mom2 # MEDIAN ABSOLUTE DEVIATION elif rmsMode == "mad": if fluxRange == "all": # NOTE: Here we assume that the median of the data is zero! rms = 1.4826 * nanmedian(abs(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample]), axis=None) if twoPass: err.message("Repeating noise estimation with 5-sigma clip.", verbose) with np.errstate(invalid="ignore"): rms = 1.4826 * nanmedian(abs(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample][abs(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample]) < 5.0 * rms]), axis=None) else: # NOTE: Here we assume that the median of the data is zero! There are no more NaNs in halfCube. rms = 1.4826 * np.median(abs(halfCube), axis=None) if twoPass: err.message("Repeating noise estimation with 5-sigma clip.", verbose) rms = 1.4826 * np.median(abs(halfCube[abs(halfCube) < 5.0 * rms]), axis=None) # STANDARD DEVIATION elif rmsMode == "std": if fluxRange == "all": # NOTE: Here we assume that the mean of the data is zero! rms = nan_standard_deviation(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample]) if twoPass: err.message("Repeating noise estimation with 5-sigma clip.", verbose) with np.errstate(invalid="ignore"): rms = nan_standard_deviation(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample][abs(cube[z0:z1:sample, y0:y1:sample, x0:x1:sample]) < 5.0 * rms]) else: # NOTE: Here we assume that the mean of the data is zero! There are no more NaNs in halfCube. rms = standard_deviation(halfCube) if twoPass: err.message("Repeating noise estimation with 5-sigma clip.", verbose) rms = standard_deviation(halfCube[abs(halfCube) < 5.0 * rms]) err.message(" ... %s rms = %.2e (data units)" % (rmsMode, rms), verbose) return rms
def nan_standard_deviation(x): y = x[~np.isnan(x)] err.ensure(y.size > 0, "Array size of 0 encountered in calculation of std. dev.") return np.sqrt(np.sum(y * y, dtype=np.float64) / y.size)
def standard_deviation(x): err.ensure(x.size > 0, "Array size of 0 encountered in calculation of std. dev.") return np.sqrt(np.sum(x * x, dtype=np.float64) / x.size)
def EstimateRel(data, pdfoutname, parNames, parSpace=["snr_sum", "snr_max", "n_pix"], logPars=[1, 1, 1], autoKernel=True, scaleKernel=1, negPerBin=1, skellamTol=-0.5, kernel=[0.15, 0.05, 0.1], usecov=False, doscatter=1, docontour=1, doskellam=1, dostats=0, saverel=1, threshold=0.99, fMin=0, verb=0, makePlot=False): # Always work on logarithmic parameter values; the reliability.logPars parameter should be removed if 0 in logPars: err.warning(" Setting all reliability.logPars entries to 1. This parameter is no longer editable by users.") logPars=[1 for pp in parSpace] # Import Matplotlib if diagnostic plots requested if makePlot: import matplotlib # The following line is necessary to run SoFiA remotely matplotlib.use("Agg") import matplotlib.pyplot as plt # -------------------------------- # Build array of source parameters # -------------------------------- idCOL = parNames.index("id") ftotCOL = parNames.index("snr_sum") fmaxCOL = parNames.index("snr_max") fminCOL = parNames.index("snr_min") # Get columns of requested parameters parCol = [] for ii in range(len(parSpace)): parCol.append(parNames.index(parSpace[ii])) # Get position and number of positive and negative sources pos = data[:, ftotCOL] > 0 neg = data[:, ftotCOL] <= 0 Npos = pos.sum() Nneg = neg.sum() err.ensure(Npos, "No positive sources found; cannot proceed.") err.ensure(Nneg, "No negative sources found; cannot proceed.") # Get array of relevant source parameters (and take log of them if requested) ids = data[:,idCOL] pars = np.empty((data.shape[0], 0)) for ii in range(len(parSpace)): if parSpace[ii] == "snr_max": parsTmp = data[:,fmaxCOL] * pos - data[:,fminCOL] * neg if logPars[ii]: parsTmp = np.log10(parsTmp) pars = np.concatenate((pars, parsTmp.reshape(-1, 1)), axis=1) elif parSpace[ii] == "snr_sum" or parSpace[ii] == "snr_mean": parsTmp = abs(data[:,parCol[ii]].reshape(-1, 1)) if logPars[ii]: parsTmp = np.log10(parsTmp) pars = np.concatenate((pars, parsTmp), axis=1) else: parsTmp = data[:,parCol[ii]].reshape(-1, 1) if logPars[ii]: parsTmp = np.log10(parsTmp) pars = np.concatenate((pars, parsTmp), axis=1) err.message(" Working in parameter space {0:}".format(str(parSpace))) err.message(" Will convolve the distribution of positive and negative sources in this space to derive the P and N density fields") pars = np.transpose(pars) # ---------------------------------------------------------- # Set parameters to work with and gridding/plotting for each # ---------------------------------------------------------- # Axis labels when plotting labs = [] for ii in range(len(parSpace)): labs.append("") if logPars[ii]: labs[ii] += "log " labs[ii] += parSpace[ii] # Axis limits when plotting pmin, pmax = pars.min(axis=1), pars.max(axis=1) pmin, pmax = pmin - 0.1 * (pmax - pmin), pmax + 0.1 * (pmax - pmin) lims = [[pmin[i], pmax[i]] for i in range(len(parSpace))] # Grid on which to evaluate Np and Nn in order to plot contours grid = [[pmin[i], pmax[i], 0.02 * (pmax[i] - pmin[i])] for i in range(len(parSpace))] # Calculate the number of rows and columns in figure projections = [subset for subset in combinations(range(len(parSpace)), 2)] nr = int(np.floor(np.sqrt(len(projections)))) nc = int(np.ceil(float(len(projections)) / nr)) # --------------------------------------- # Set smoothing kernel in parameter space # --------------------------------------- # If autoKernel is True, then the initial kernel is taken as a scaled version of the covariance matrix # of the negative sources. The kernel size along each axis is such that the number of sources per kernel # width (sigma**2) is equal to "negPerBin". Optionally, the user can decide to use only the diagonal # terms of the covariance matrix. The kernel is then grown until convergence is reached on the Skellam # plot. If autoKernel is False, then use the kernel given by "kernel" parameter (argument of EstimateRel); # this is sigma, and is squared to be consistent with the auto kernel above. if autoKernel: # Set the kernel shape to that of the variance or covariance matrix kernel = np.cov(pars[:, neg]) kernelType = "covariance" # Check if kernel matrix can be inverted try: np.linalg.inv(kernel) except: err.error( "The reliability cannot be calculated because the smoothing kernel\n" "derived from " + str(pars[:,neg].shape[1]) + " negative sources cannot be inverted.\n" "This is likely due to an insufficient number of negative sources.\n" "Try to increase the number of negative sources by changing the\n" "source finding and/or filtering settings.", fatal=True, frame=True) if np.isnan(kernel).sum(): err.error( "The reliability cannot be calculated because the smoothing kernel\n" "derived from " + str(pars[:,neg].shape[1]) + " negative sources contains NaNs.\n" "A good kernel is required to calculate the density field of positive\n" "and negative sources in parameter space.\n" "Try to increase the number of negative sources by changing the\n" "source finding and/or filtering settings.", fatal=True, frame=True) if not usecov: kernel = np.diag(np.diag(kernel)) kernelType = "variance" kernelIter = 0.0 deltplot = [] # Scale the kernel size as requested by the user (scaleKernel>0) or use the autoscale algorithm (scaleKernel=0) if scaleKernel>0: # Scale kernel size as requested by the user # Note that the scale factor is squared because users are asked to give a factor to apply to sqrt(kernel) kernel *= scaleKernel**2 err.message(" Using the {0:s} matrix scaled by a factor {1:.2f} as convolution kernel".format(kernelType, scaleKernel)) err.message(" The sqrt(kernel) size is:") err.message(" " + str(np.sqrt(np.abs(kernel)))) elif scaleKernel==0: # Scale kernel size to get started the kernel-growing loop # The scale factor for sqrt(kernel) is elevated to the power of 1.0 / len(parCol) err.message(" Will search for the best convolution kernel by scaling the {0:s} matrix".format(kernelType)) err.message(" The {0:s} matrix has sqrt:".format(kernelType)) err.message(" " + str(np.sqrt(np.abs(kernel)))) # negPerBin must be >=1 err.ensure(negPerBin>=1,"The parameter reliability.negPerBin used to start the convolution kernel search was set to {0:.1f} but must be >= 1. Please change your settings.".format(negPerBin)) kernel *= ((negPerBin + kernelIter) / Nneg)**(2.0 / len(parCol)) err.message(" Search starting from the kernel with sqrt:") err.message(" " + str(np.sqrt(np.abs(kernel)))) err.message(" Iteratively growing kernel until the distribution of (P-N)/sqrt(P+N) reaches median/width = {0:.2f} ...".format(skellamTol)) err.ensure(skellamTol<=0,"The parameter reliability.skellamTol was set to {0:.2f} but must be <= 0. Please change your settings.".format(skellamTol)) else: err.ensure(scaleKernel>=0,\ "The reliability.scaleKernel parameter cannot be negative.\n"\ "It should be = 0 if you want SoFiA to find the optimal kernel scaling\n"\ "or > 0 if you want to set the scaling yourself.\n"\ "Please change your settings.") #deltOLD=-1e+9 # Used to stop kernel growth if P-N stops moving closer to zero [NOT USED CURRENTLY] if doskellam and makePlot: fig0 = plt.figure() else: # Note that the user must give sigma, which then gets squared err.message(" Using user-defined variance kernel with sqrt(kernel) size: {0}".format(kernel)) err.ensure(len(parSpace)==len(kernel),"The number of entries in the kernel above does not match the number of parameters you requested for the reliability calculation.") kernel = np.identity(len(kernel)) * np.array(kernel)**2 # Set grow_kernel to 1 to start the kernel growing loop below. grow_kernel = 1 # This loop will estimate the reliability, check whether the kernel is large enough, # and if not pick a larger kernel. If autoKernel = 0 or scaleKernel = 0, we will do # just one pass (i.e., we will not grow the kernel). while grow_kernel: # ------------------------ # Evaluate N-d reliability # ------------------------ if verb: err.message(" estimate normalised positive and negative density fields ...") Np = gaussian_kde_set_covariance(pars[:,pos], kernel) Nn = gaussian_kde_set_covariance(pars[:,neg], kernel) # Calculate the number of positive and negative sources at the location of positive sources Nps = Np(pars[:,pos]) * Npos Nns = Nn(pars[:,pos]) * Nneg # Calculate the number of positive and negative sources at the location of negative sources nNps = Np(pars[:,neg]) * Npos nNns = Nn(pars[:,neg]) * Nneg # Calculate the reliability at the location of positive sources Rs = (Nps - Nns) / Nps # The reliability must be <= 1. If not, something is wrong. err.ensure(Rs.max() <= 1, "Maximum reliability greater than 1; something is wrong.\nPlease ensure that enough negative sources are detected\nand decrease your source finding threshold if necessary.", frame=True) # Find pseudo-reliable sources (taking maximum(Rs, 0) in order to include objects with Rs < 0 # if threshold == 0; Rs may be < 0 because of insufficient statistics) # These are called pseudo-reliable because some objects may be discarded later based on additional criteria below pseudoreliable = np.maximum(Rs, 0) >= threshold # Find reliable sources (taking maximum(Rs, 0) in order to include objects with Rs < 0 if # threshold == 0; Rs may be < 0 because of insufficient statistics) #reliable=(np.maximum(Rs, 0)>=threshold) * (data[pos, ftotCOL].reshape(-1,) > fMin) * (data[pos, fmaxCOL].reshape(-1,) > 4) reliable = (np.maximum(Rs, 0) >= threshold) * ((data[pos, ftotCOL] / np.sqrt(data[pos, parNames.index("n_pix")])).reshape(-1,) > fMin) if autoKernel: # Calculate quantities needed for comparison to Skellam distribution delt = (nNps - nNns) / np.sqrt(nNps + nNns) deltstd = delt.std() deltmed = np.median(delt) deltmin = delt.min() deltmax = delt.max() if deltmed / deltstd > -100 and doskellam and makePlot: plt.hist(delt / deltstd, bins=np.arange(deltmin / deltstd, max(5.1, deltmax / deltstd), 0.01), cumulative=True, histtype="step", color=(min(1, float(max(1.,negPerBin) + kernelIter) / Nneg), 0,0), normed=True) deltplot.append([((max(1.,negPerBin) + kernelIter) / Nneg)**(1.0 / len(parCol)), deltmed / deltstd]) if scaleKernel: grow_kernel = 0 else: err.message(" iteration, median, width, median/width = %3i, %9.2e, %9.2e, %9.2e" % (kernelIter, deltmed, deltstd, deltmed / deltstd)) if deltmed / deltstd > skellamTol or negPerBin + kernelIter >= Nneg: grow_kernel = 0 err.message(" Found good kernel after %i kernel growth iterations. The sqrt(kernel) size is:" % kernelIter) err.message(np.sqrt(np.abs(kernel))) elif deltmed / deltstd < 5 * skellamTol: kernel *= (float(negPerBin + kernelIter + 20) / (negPerBin + kernelIter))**(2.0 / len(parCol)) kernelIter += 20 elif deltmed / deltstd < 2 * skellamTol: kernel *= (float(negPerBin + kernelIter + 10) / (negPerBin + kernelIter))**(2.0 / len(parCol)) kernelIter += 10 elif deltmed / deltstd < 1.5 * skellamTol: kernel *= (float(negPerBin + kernelIter + 3) / (negPerBin + kernelIter))**(2.0 / len(parCol)) kernelIter += 3 else: kernel *= (float(negPerBin + kernelIter + 1) / (negPerBin + kernelIter))**(2.0 / len(parCol)) kernelIter += 1 else: grow_kernel = 0 # ------------ # Skellam plot # ------------ if autoKernel and deltmed / deltstd > -100 and doskellam and makePlot: plt.plot(np.arange(-10, 10, 0.01), stats.norm().cdf(np.arange(-10, 10, 0.01)), "k-") plt.plot(np.arange(-10, 10, 0.01), stats.norm(scale=0.4).cdf(np.arange(-10, 10, 0.01)), "k:") plt.legend(("Gaussian (sigma=1)", "Gaussian (sigma=0.4)"), loc="lower right", prop={"size":13}) plt.hist(delt / deltstd, bins=np.arange(deltmin / deltstd, max(5.1, deltmax / deltstd), 0.01), cumulative=True, histtype="step", color="r", normed=True) plt.xlim(-5, 5) plt.ylim(0, 1) plt.xlabel("(P-N)/sqrt(N+P)") plt.ylabel("cumulative distribution") plt.plot([0, 0], [0, 1], "k--") fig0.savefig("%s_rel_skellam.pdf" % pdfoutname, rasterized=True) if not scaleKernel: fig3 = plt.figure() deltplot = np.array(deltplot) plt.plot(deltplot[:,0], deltplot[:,1], "ko-") plt.xlabel("kernel size (1D-sigma, aribtrary units)") plt.ylabel("median/std of (P-N)/sqrt(P+N)") plt.axhline(y=skellamTol, linestyle="--", color="r") fig3.savefig("%s_rel_skellam-delta.pdf" % pdfoutname, rasterized=True) # ----------------------- # Scatter plot of sources # ----------------------- specialids = [] if doscatter and makePlot: if verb: err.message(" plotting sources ...") fig1 = plt.figure(figsize=(18, 4.5 * nr)) plt.subplots_adjust(left=0.06, bottom=0.15/nr, right = 0.97, top=1-0.08/nr, wspace=0.35, hspace=0.25) n_p = 0 for jj in projections: if verb: err.message(" projection %i/%i" % (projections.index(jj) + 1, len(projections))) n_p, p1, p2 = n_p + 1, jj[0], jj[1] plt.subplot(nr, nc, n_p) plt.scatter(pars[p1,pos], pars[p2,pos], marker="o", c="b", s=10, edgecolor="face", alpha=0.5) plt.scatter(pars[p1,neg], pars[p2,neg], marker="o", c="r", s=10, edgecolor="face", alpha=0.5) for si in specialids: plt.plot(pars[p1, ids==si], pars[p2, ids==si], "kd", zorder=10000, ms=7, mfc="none", mew=2) # Plot Integrated SNR threshold if fMin>0 and (parSpace[jj[0]],parSpace[jj[1]])==("snr_sum","snr_mean"): xArray=np.arange(lims[p1][0],lims[p1][1]+(lims[p1][1]-lims[p1][0])/100,(lims[p1][1]-lims[p1][0])/100) plt.plot(xArray,np.log10(fMin)*2-xArray,'k:') elif fMin>0 and (parSpace[jj[0]],parSpace[jj[1]])==("snr_mean","snr_sum"): yArray=np.arange(lims[p2][0],lims[p2][1]+(lims[p2][1]-lims[p2][0])/100,(lims[p2][1]-lims[p2][0])/100) plt.plot(np.log10(fMin)*2-yArray,yArray,'k:') plt.xlim(lims[p1][0], lims[p1][1]) plt.ylim(lims[p2][0], lims[p2][1]) plt.xlabel(labs[p1]) plt.ylabel(labs[p2]) plt.grid(color='k',linestyle='-',linewidth=0.2) fig1.savefig("%s_rel_scatter.pdf" % pdfoutname, rasterized=True) # ------------- # Plot contours # ------------- if docontour and makePlot: levs = 10**np.arange(-1.5, 2, 0.5) if verb: err.message(" plotting contours ...") fig2 = plt.figure(figsize=(18, 4.5 * nr)) plt.subplots_adjust(left=0.06, bottom=0.15/nr, right=0.97, top=1-0.08/nr, wspace=0.35, hspace=0.25) n_p = 0 for jj in projections: if verb: err.message(" projection %i/%i" % (projections.index(jj) + 1, len(projections))) n_p, p1, p2 = n_p + 1, jj[0], jj[1] g1, g2 = grid[p1], grid[p2] x1 = np.arange(g1[0], g1[1], g1[2]) x2 = np.arange(g2[0], g2[1], g2[2]) pshape = (x2.shape[0], x1.shape[0]) # Get array of source parameters on current projection parsp = np.concatenate((pars[p1:p1+1], pars[p2:p2+1]), axis=0) # Derive Np and Nn density fields on the current projection setcov = kernel[p1:p2+1:p2-p1,p1:p2+1:p2-p1] try: Np = gaussian_kde_set_covariance(parsp[:,pos], setcov) Nn = gaussian_kde_set_covariance(parsp[:,neg], setcov) except: err.error( "Reliability determination failed because of issues with the\n" "smoothing kernel. This is likely due to an insufficient number\n" "of negative detections. Please review your filtering and source\n" "finding settings to ensure that a sufficient number of negative\n" "detections is found.", fatal=True, frame=True) # Evaluate density fields on grid on current projection g = np.transpose(np.transpose(np.mgrid[slice(g1[0], g1[1], g1[2]), slice(g2[0], g2[1], g2[2])]).reshape(-1, 2)) Np = Np(g) Nn = Nn(g) Np = Np / Np.sum() * Npos Nn = Nn / Nn.sum() * Nneg Np.resize(pshape) Nn.resize(pshape) plt.subplot(nr, nc, n_p) plt.contour(x1, x2, Np, origin="lower", colors="b", levels=levs, zorder=2) plt.contour(x1, x2, Nn, origin="lower", colors="r", levels=levs, zorder=1) # Plot Integrated SNR threshold if fMin>0 and (parSpace[jj[0]],parSpace[jj[1]])==("snr_sum","snr_mean"): xArray=np.arange(lims[p1][0],lims[p1][1]+(lims[p1][1]-lims[p1][0])/100,(lims[p1][1]-lims[p1][0])/100) plt.plot(xArray,np.log10(fMin)*2-xArray,'k:') elif fMin>0 and (parSpace[jj[0]],parSpace[jj[1]])==("snr_mean","snr_sum"): yArray=np.arange(lims[p2][0],lims[p2][1]+(lims[p2][1]-lims[p2][0])/100,(lims[p2][1]-lims[p2][0])/100) plt.plot(np.log10(fMin)*2-yArray,yArray,'k:') if reliable.sum(): plt.scatter(pars[p1,pos][reliable], pars[p2,pos][reliable], marker="o", s=10, edgecolor="k", facecolor="k", zorder=4) if (pseudoreliable * (reliable == False)).sum(): plt.scatter(pars[p1,pos][pseudoreliable * (reliable == False)], pars[p2,pos][pseudoreliable * (reliable == False)], marker="x", s=40, edgecolor="0.5", facecolor="0.5", zorder=3) for si in specialids: plt.plot(pars[p1,ids==si], pars[p2,ids==si], "kd", zorder=10000, ms=7, mfc="none", mew=2) plt.xlim(lims[p1][0], lims[p1][1]) plt.ylim(lims[p2][0], lims[p2][1]) plt.xlabel(labs[p1]) plt.ylabel(labs[p2]) plt.grid(color='k',linestyle='-',linewidth=0.2) fig2.savefig("%s_rel_contour.pdf" % pdfoutname, rasterized=True) # ------------------------- # Add Np, Nn and R to table # ------------------------- # This allows me not to calculate R every time I want to do some plot analysis, # but just read it from the file if saverel: if not (docontour or dostats): Nps = Np(pars[:,pos]) * Npos Nns = Nn(pars[:,pos]) * Nneg Np = np.zeros((data.shape[0],)) Np[pos] = Nps Nn = np.zeros((data.shape[0],)) Nn[pos] = Nns R = -np.ones((data.shape[0],)) # R will be -1 for negative sources # Set R to zero for positive sources if R < 0 because of Nn > Np R[pos] = np.maximum(0, (Np[pos] - Nn[pos]) / Np[pos]) data = np.concatenate((data, Np.reshape(-1, 1), Nn.reshape(-1, 1), R.reshape(-1, 1)), axis=1) data = [list(jj) for jj in list(data)] return data, ids[pos][reliable].astype(int)
def import_mask(maskFile, header, axis_size, subcube): err.message("Loading mask cube:\n " + str(maskFile)) try: f = fits.open(maskFile, memmap=False) header_mask = f[0].header except: err.error("Failed to read mask cube.") # Extract axis sizes and types n_axes_mask, axis_size_mask, axis_type_mask = extract_axis_size(header_mask) # Ensure correct dimensionality check_cube_dimensions(n_axes_mask, axis_size_mask, cube_name="mask cube", min_dim = 1, max_dim = 4) # 1-D spectrum if n_axes_mask == 1: err.warning("Mask cube has 1 axis; interpreted as spectrum.\nAdding first and second axis.") ensure(header_mask['CRVAL1'] == header['CRVAL1'], "Input cube and mask are not on the same WCS grid.") if len(subcube) == 6: if header_mask["NAXIS1"] == axis_size[2]: err.message(" Input mask cube already matches size of data subcube.\n No subcube selection applied.") mask = np.reshape(f[0].data, (-1, 1, 1)) elif header_mask["NAXIS1"] == fullshape[0]: err.message(" Subcube selection applied to input mask cube.") mask = np.reshape(f[0].section[subcube[4]:subcube[5]], (-1, 1, 1)) else: err.error("Data subcube does not match size of mask subcube or full mask.") elif not len(subcube): mask = np.reshape(f[0].data, (-1, 1, 1)) else: err.error("The subcube list must have 6 entries ({0:d} given).".format(len(subcube))) # 2-D image elif n_axes_mask == 2: err.ensure(header_mask["CRVAL1"] == header["CRVAL1"] and header_mask["CRVAL2"] == header["CRVAL2"], "Input cube and mask are not on the same WCS grid.") if len(subcube) == 6 or len(subcube) == 4: if header_mask["NAXIS1"] == axis_size[0] and header_mask["NAXIS2"] == axis_size[1]: err.message(" Input mask cube already matches size of data subcube.\n No subcube selection applied.") mask = np.array([f[0].data]) elif header_mask["NAXIS1"] == fullshape[2] and header_mask["NAXIS2"] == fullshape[1]: err.message(" Subcube selection applied to input mask cube.") mask = np.array([f[0].section[subcube[2]:subcube[3], subcube[0]:subcube[1]]]) else: err.error("Data subcube does not match size of mask subcube or full mask.") else: mask = np.array([f[0].data]) # 3-D cube elif n_axes_mask == 3: err.ensure(header_mask["CRVAL1"] == header["CRVAL1"] and header_mask["CRVAL2"] == header["CRVAL2"] and header_mask["CRVAL3"] == header["CRVAL3"], "Input cube and mask are not on the same WCS grid.") if len(subcube) == 6: if header_mask["NAXIS1"] == axis_size[0] and header_mask["NAXIS2"] == axis_size[1] and header_mask["NAXIS3"] == axis_size[2]: err.message(" Input mask cube already matches size of data subcube.\n No subcube selection applied.") mask = f[0].data elif header_mask["NAXIS1"] == fullshape[2] and header_mask["NAXIS2"] == fullshape[1] and header_mask["NAXIS3"] == fullshape[0]: err.message(" Subcube selection applied to input mask cube.") mask = f[0].section[subcube[4]:subcube[5], subcube[2]:subcube[3], subcube[0]:subcube[1]] else: err.error("Data subcube does not match size of mask subcube or full mask.") else: mask = f[0].data # 4-D hypercube else: err.ensure(header_mask["CRVAL1"] == header["CRVAL1"] and header_mask["CRVAL2"] == header["CRVAL2"] and header_mask["CRVAL3"] == header["CRVAL3"], "Input cube and mask are not on the same WCS grid.") if len(subcube) == 6: if header_mask["NAXIS1"] == axis_size[0] and header_mask["NAXIS2"] == axis_size[1] and header_mask["NAXIS3"] == axis_size[2]: err.message(" Input mask cube already matches size of data subcube.\n No subcube selection applied.") mask = f[0].section[0] elif header_mask["NAXIS1"] == fullshape[2] and header_mask["NAXIS2"] == fullshape[1] and header_mask["NAXIS3"] == fullshape[0]: err.message(" Subcube selection applied to input mask cube.") mask = f[0].section[0, subcube[4]:subcube[5], subcube[2]:subcube[3], subcube[0]:subcube[1]] else: err.error("Data subcube does not match size of mask subcube or full mask.") else: mask = f[0].section[0] mask[mask > 0] = 1 f.close() err.message("Mask cube loaded.") # In all cases, convert mask to Boolean with masked pixels set to 1. return (mask > 0).astype(bool)
def EstimateRel(data, pdfoutname, parNames, parSpace=["snr_sum", "snr_max", "n_pix"], logPars=[1, 1, 1], autoKernel=True, scaleKernel=1, negPerBin=1, skellamTol=-0.5, kernel=[0.15, 0.05, 0.1], usecov=False, doscatter=1, docontour=1, doskellam=1, dostats=0, saverel=1, threshold=0.99, fMin=0, verb=0, makePlot=False): # Always work on logarithmic parameter values; the reliability.logPars parameter should be removed if 0 in logPars: err.warning( " Setting all reliability.logPars entries to 1. This parameter is no longer editable by users." ) logPars = [1 for pp in parSpace] # Import Matplotlib if diagnostic plots requested if makePlot: import matplotlib # The following line is necessary to run SoFiA remotely matplotlib.use("Agg") import matplotlib.pyplot as plt # -------------------------------- # Build array of source parameters # -------------------------------- idCOL = parNames.index("id") ftotCOL = parNames.index("snr_sum") fmaxCOL = parNames.index("snr_max") fminCOL = parNames.index("snr_min") # Get columns of requested parameters parCol = [] for ii in range(len(parSpace)): parCol.append(parNames.index(parSpace[ii])) # Get position and number of positive and negative sources pos = data[:, ftotCOL] > 0 neg = data[:, ftotCOL] <= 0 Npos = pos.sum() Nneg = neg.sum() err.ensure(Npos, "No positive sources found; cannot proceed.") err.ensure(Nneg, "No negative sources found; cannot proceed.") # Get array of relevant source parameters (and take log of them if requested) ids = data[:, idCOL] pars = np.empty((data.shape[0], 0)) for ii in range(len(parSpace)): if parSpace[ii] == "snr_max": parsTmp = data[:, fmaxCOL] * pos - data[:, fminCOL] * neg if logPars[ii]: parsTmp = np.log10(parsTmp) pars = np.concatenate((pars, parsTmp.reshape(-1, 1)), axis=1) elif parSpace[ii] == "snr_sum" or parSpace[ii] == "snr_mean": parsTmp = abs(data[:, parCol[ii]].reshape(-1, 1)) if logPars[ii]: parsTmp = np.log10(parsTmp) pars = np.concatenate((pars, parsTmp), axis=1) else: parsTmp = data[:, parCol[ii]].reshape(-1, 1) if logPars[ii]: parsTmp = np.log10(parsTmp) pars = np.concatenate((pars, parsTmp), axis=1) err.message(" Working in parameter space {0:}".format(str(parSpace))) err.message( " Will convolve the distribution of positive and negative sources in this space to derive the P and N density fields" ) pars = np.transpose(pars) # ---------------------------------------------------------- # Set parameters to work with and gridding/plotting for each # ---------------------------------------------------------- # Axis labels when plotting labs = [] for ii in range(len(parSpace)): labs.append("") if logPars[ii]: labs[ii] += "log " labs[ii] += parSpace[ii] # Axis limits when plotting pmin, pmax = pars.min(axis=1), pars.max(axis=1) pmin, pmax = pmin - 0.1 * (pmax - pmin), pmax + 0.1 * (pmax - pmin) lims = [[pmin[i], pmax[i]] for i in range(len(parSpace))] # Grid on which to evaluate Np and Nn in order to plot contours grid = [[pmin[i], pmax[i], 0.02 * (pmax[i] - pmin[i])] for i in range(len(parSpace))] # Calculate the number of rows and columns in figure projections = [subset for subset in combinations(range(len(parSpace)), 2)] nr = int(np.floor(np.sqrt(len(projections)))) nc = int(np.ceil(float(len(projections)) / nr)) # --------------------------------------- # Set smoothing kernel in parameter space # --------------------------------------- # If autoKernel is True, then the initial kernel is taken as a scaled version of the covariance matrix # of the negative sources. The kernel size along each axis is such that the number of sources per kernel # width (sigma**2) is equal to "negPerBin". Optionally, the user can decide to use only the diagonal # terms of the covariance matrix. The kernel is then grown until convergence is reached on the Skellam # plot. If autoKernel is False, then use the kernel given by "kernel" parameter (argument of EstimateRel); # this is sigma, and is squared to be consistent with the auto kernel above. if autoKernel: # Set the kernel shape to that of the variance or covariance matrix kernel = np.cov(pars[:, neg]) kernelType = "covariance" # Check if kernel matrix can be inverted try: np.linalg.inv(kernel) except: err.error( "The reliability cannot be calculated because the smoothing kernel\n" "derived from " + str(pars[:, neg].shape[1]) + " negative sources cannot be inverted.\n" "This is likely due to an insufficient number of negative sources.\n" "Try to increase the number of negative sources by changing the\n" "source finding and/or filtering settings.", fatal=True, frame=True) if np.isnan(kernel).sum(): err.error( "The reliability cannot be calculated because the smoothing kernel\n" "derived from " + str(pars[:, neg].shape[1]) + " negative sources contains NaNs.\n" "A good kernel is required to calculate the density field of positive\n" "and negative sources in parameter space.\n" "Try to increase the number of negative sources by changing the\n" "source finding and/or filtering settings.", fatal=True, frame=True) if not usecov: kernel = np.diag(np.diag(kernel)) kernelType = "variance" kernelIter = 0.0 deltplot = [] # Scale the kernel size as requested by the user (scaleKernel>0) or use the autoscale algorithm (scaleKernel=0) if scaleKernel > 0: # Scale kernel size as requested by the user # Note that the scale factor is squared because users are asked to give a factor to apply to sqrt(kernel) kernel *= scaleKernel**2 err.message( " Using the {0:s} matrix scaled by a factor {1:.2f} as convolution kernel" .format(kernelType, scaleKernel)) err.message(" The sqrt(kernel) size is:") err.message(" " + str(np.sqrt(np.abs(kernel)))) elif scaleKernel == 0: # Scale kernel size to get started the kernel-growing loop # The scale factor for sqrt(kernel) is elevated to the power of 1.0 / len(parCol) err.message( " Will search for the best convolution kernel by scaling the {0:s} matrix" .format(kernelType)) err.message(" The {0:s} matrix has sqrt:".format(kernelType)) err.message(" " + str(np.sqrt(np.abs(kernel)))) # negPerBin must be >=1 err.ensure( negPerBin >= 1, "The parameter reliability.negPerBin used to start the convolution kernel search was set to {0:.1f} but must be >= 1. Please change your settings." .format(negPerBin)) kernel *= ((negPerBin + kernelIter) / Nneg)**(2.0 / len(parCol)) err.message(" Search starting from the kernel with sqrt:") err.message(" " + str(np.sqrt(np.abs(kernel)))) err.message( " Iteratively growing kernel until the distribution of (P-N)/sqrt(P+N) reaches median/width = {0:.2f} ..." .format(skellamTol)) err.ensure( skellamTol <= 0, "The parameter reliability.skellamTol was set to {0:.2f} but must be <= 0. Please change your settings." .format(skellamTol)) else: err.ensure(scaleKernel>=0,\ "The reliability.scaleKernel parameter cannot be negative.\n"\ "It should be = 0 if you want SoFiA to find the optimal kernel scaling\n"\ "or > 0 if you want to set the scaling yourself.\n"\ "Please change your settings.") #deltOLD=-1e+9 # Used to stop kernel growth if P-N stops moving closer to zero [NOT USED CURRENTLY] if doskellam and makePlot: fig0 = plt.figure() else: # Note that the user must give sigma, which then gets squared err.message( " Using user-defined variance kernel with sqrt(kernel) size: {0}". format(kernel)) err.ensure( len(parSpace) == len(kernel), "The number of entries in the kernel above does not match the number of parameters you requested for the reliability calculation." ) kernel = np.identity(len(kernel)) * np.array(kernel)**2 # Set grow_kernel to 1 to start the kernel growing loop below. grow_kernel = 1 # This loop will estimate the reliability, check whether the kernel is large enough, # and if not pick a larger kernel. If autoKernel = 0 or scaleKernel = 0, we will do # just one pass (i.e., we will not grow the kernel). while grow_kernel: # ------------------------ # Evaluate N-d reliability # ------------------------ if verb: err.message( " estimate normalised positive and negative density fields ..." ) Np = gaussian_kde_set_covariance(pars[:, pos], kernel) Nn = gaussian_kde_set_covariance(pars[:, neg], kernel) # Calculate the number of positive and negative sources at the location of positive sources Nps = Np(pars[:, pos]) * Npos Nns = Nn(pars[:, pos]) * Nneg # Calculate the number of positive and negative sources at the location of negative sources nNps = Np(pars[:, neg]) * Npos nNns = Nn(pars[:, neg]) * Nneg # Calculate the reliability at the location of positive sources Rs = (Nps - Nns) / Nps # The reliability must be <= 1. If not, something is wrong. err.ensure( Rs.max() <= 1, "Maximum reliability greater than 1; something is wrong.\nPlease ensure that enough negative sources are detected\nand decrease your source finding threshold if necessary.", frame=True) # Find pseudo-reliable sources (taking maximum(Rs, 0) in order to include objects with Rs < 0 # if threshold == 0; Rs may be < 0 because of insufficient statistics) # These are called pseudo-reliable because some objects may be discarded later based on additional criteria below pseudoreliable = np.maximum(Rs, 0) >= threshold # Find reliable sources (taking maximum(Rs, 0) in order to include objects with Rs < 0 if # threshold == 0; Rs may be < 0 because of insufficient statistics) #reliable=(np.maximum(Rs, 0)>=threshold) * (data[pos, ftotCOL].reshape(-1,) > fMin) * (data[pos, fmaxCOL].reshape(-1,) > 4) reliable = (np.maximum(Rs, 0) >= threshold) * ( (data[pos, ftotCOL] / np.sqrt(data[pos, parNames.index("n_pix")])).reshape(-1, ) > fMin) if autoKernel: # Calculate quantities needed for comparison to Skellam distribution delt = (nNps - nNns) / np.sqrt(nNps + nNns) deltstd = delt.std() deltmed = np.median(delt) deltmin = delt.min() deltmax = delt.max() if deltmed / deltstd > -100 and doskellam and makePlot: plt.hist(delt / deltstd, bins=np.arange(deltmin / deltstd, max(5.1, deltmax / deltstd), 0.01), cumulative=True, histtype="step", color=(min( 1, float(max(1., negPerBin) + kernelIter) / Nneg), 0, 0), density=True) deltplot.append([((max(1., negPerBin) + kernelIter) / Nneg)**(1.0 / len(parCol)), deltmed / deltstd]) if scaleKernel: grow_kernel = 0 else: err.message( " iteration, median, width, median/width = %3i, %9.2e, %9.2e, %9.2e" % (kernelIter, deltmed, deltstd, deltmed / deltstd)) if deltmed / deltstd > skellamTol or negPerBin + kernelIter >= Nneg: grow_kernel = 0 err.message( " Found good kernel after %i kernel growth iterations. The sqrt(kernel) size is:" % kernelIter) err.message(np.sqrt(np.abs(kernel))) elif deltmed / deltstd < 5 * skellamTol: kernel *= (float(negPerBin + kernelIter + 20) / (negPerBin + kernelIter))**(2.0 / len(parCol)) kernelIter += 20 elif deltmed / deltstd < 2 * skellamTol: kernel *= (float(negPerBin + kernelIter + 10) / (negPerBin + kernelIter))**(2.0 / len(parCol)) kernelIter += 10 elif deltmed / deltstd < 1.5 * skellamTol: kernel *= (float(negPerBin + kernelIter + 3) / (negPerBin + kernelIter))**(2.0 / len(parCol)) kernelIter += 3 else: kernel *= (float(negPerBin + kernelIter + 1) / (negPerBin + kernelIter))**(2.0 / len(parCol)) kernelIter += 1 else: grow_kernel = 0 # ------------ # Skellam plot # ------------ if autoKernel and deltmed / deltstd > -100 and doskellam and makePlot: plt.plot(np.arange(-10, 10, 0.01), stats.norm().cdf(np.arange(-10, 10, 0.01)), "k-") plt.plot(np.arange(-10, 10, 0.01), stats.norm(scale=0.4).cdf(np.arange(-10, 10, 0.01)), "k:") plt.legend(("Gaussian (sigma=1)", "Gaussian (sigma=0.4)"), loc="lower right", prop={"size": 13}) plt.hist(delt / deltstd, bins=np.arange(deltmin / deltstd, max(5.1, deltmax / deltstd), 0.01), cumulative=True, histtype="step", color="r", density=True) plt.xlim(-5, 5) plt.ylim(0, 1) plt.xlabel("(P-N)/sqrt(N+P)") plt.ylabel("cumulative distribution") plt.plot([0, 0], [0, 1], "k--") fig0.savefig("%s_rel_skellam.pdf" % pdfoutname, rasterized=True) if not scaleKernel: fig3 = plt.figure() deltplot = np.array(deltplot) plt.plot(deltplot[:, 0], deltplot[:, 1], "ko-") plt.xlabel("kernel size (1D-sigma, aribtrary units)") plt.ylabel("median/std of (P-N)/sqrt(P+N)") plt.axhline(y=skellamTol, linestyle="--", color="r") fig3.savefig("%s_rel_skellam-delta.pdf" % pdfoutname, rasterized=True) # ----------------------- # Scatter plot of sources # ----------------------- specialids = [] if doscatter and makePlot: if verb: err.message(" plotting sources ...") fig1 = plt.figure(figsize=(18, 4.5 * nr)) plt.subplots_adjust(left=0.06, bottom=0.15 / nr, right=0.97, top=1 - 0.08 / nr, wspace=0.35, hspace=0.25) n_p = 0 for jj in projections: if verb: err.message(" projection %i/%i" % (projections.index(jj) + 1, len(projections))) n_p, p1, p2 = n_p + 1, jj[0], jj[1] plt.subplot(nr, nc, n_p) plt.scatter(pars[p1, pos], pars[p2, pos], marker="o", c="b", s=10, edgecolor="face", alpha=0.5) plt.scatter(pars[p1, neg], pars[p2, neg], marker="o", c="r", s=10, edgecolor="face", alpha=0.5) for si in specialids: plt.plot(pars[p1, ids == si], pars[p2, ids == si], "kd", zorder=10000, ms=7, mfc="none", mew=2) # Plot Integrated SNR threshold if fMin > 0 and (parSpace[jj[0]], parSpace[jj[1]]) == ("snr_sum", "snr_mean"): xArray = np.arange( lims[p1][0], lims[p1][1] + (lims[p1][1] - lims[p1][0]) / 100, (lims[p1][1] - lims[p1][0]) / 100) plt.plot(xArray, np.log10(fMin) * 2 - xArray, 'k:') elif fMin > 0 and (parSpace[jj[0]], parSpace[jj[1]]) == ("snr_mean", "snr_sum"): yArray = np.arange( lims[p2][0], lims[p2][1] + (lims[p2][1] - lims[p2][0]) / 100, (lims[p2][1] - lims[p2][0]) / 100) plt.plot(np.log10(fMin) * 2 - yArray, yArray, 'k:') plt.xlim(lims[p1][0], lims[p1][1]) plt.ylim(lims[p2][0], lims[p2][1]) plt.xlabel(labs[p1]) plt.ylabel(labs[p2]) plt.grid(color='k', linestyle='-', linewidth=0.2) fig1.savefig("%s_rel_scatter.pdf" % pdfoutname, rasterized=True) # ------------- # Plot contours # ------------- if docontour and makePlot: levs = 10**np.arange(-1.5, 2, 0.5) if verb: err.message(" plotting contours ...") fig2 = plt.figure(figsize=(18, 4.5 * nr)) plt.subplots_adjust(left=0.06, bottom=0.15 / nr, right=0.97, top=1 - 0.08 / nr, wspace=0.35, hspace=0.25) n_p = 0 for jj in projections: if verb: err.message(" projection %i/%i" % (projections.index(jj) + 1, len(projections))) n_p, p1, p2 = n_p + 1, jj[0], jj[1] g1, g2 = grid[p1], grid[p2] x1 = np.arange(g1[0], g1[1], g1[2]) x2 = np.arange(g2[0], g2[1], g2[2]) pshape = (x2.shape[0], x1.shape[0]) # Get array of source parameters on current projection parsp = np.concatenate((pars[p1:p1 + 1], pars[p2:p2 + 1]), axis=0) # Derive Np and Nn density fields on the current projection setcov = kernel[p1:p2 + 1:p2 - p1, p1:p2 + 1:p2 - p1] try: Np = gaussian_kde_set_covariance(parsp[:, pos], setcov) Nn = gaussian_kde_set_covariance(parsp[:, neg], setcov) except: err.error( "Reliability determination failed because of issues with the\n" "smoothing kernel. This is likely due to an insufficient number\n" "of negative detections. Please review your filtering and source\n" "finding settings to ensure that a sufficient number of negative\n" "detections is found.", fatal=True, frame=True) # Evaluate density fields on grid on current projection g = np.transpose( np.transpose(np.mgrid[slice(g1[0], g1[1], g1[2]), slice(g2[0], g2[1], g2[2])]).reshape( -1, 2)) Np = Np(g) Nn = Nn(g) Np = Np / Np.sum() * Npos Nn = Nn / Nn.sum() * Nneg Np.resize(pshape) Nn.resize(pshape) plt.subplot(nr, nc, n_p) plt.contour(x1, x2, Np, origin="lower", colors="b", levels=levs, zorder=2) plt.contour(x1, x2, Nn, origin="lower", colors="r", levels=levs, zorder=1) # Plot Integrated SNR threshold if fMin > 0 and (parSpace[jj[0]], parSpace[jj[1]]) == ("snr_sum", "snr_mean"): xArray = np.arange( lims[p1][0], lims[p1][1] + (lims[p1][1] - lims[p1][0]) / 100, (lims[p1][1] - lims[p1][0]) / 100) plt.plot(xArray, np.log10(fMin) * 2 - xArray, 'k:') elif fMin > 0 and (parSpace[jj[0]], parSpace[jj[1]]) == ("snr_mean", "snr_sum"): yArray = np.arange( lims[p2][0], lims[p2][1] + (lims[p2][1] - lims[p2][0]) / 100, (lims[p2][1] - lims[p2][0]) / 100) plt.plot(np.log10(fMin) * 2 - yArray, yArray, 'k:') if reliable.sum(): plt.scatter(pars[p1, pos][reliable], pars[p2, pos][reliable], marker="o", s=10, edgecolor="k", facecolor="k", zorder=4) if (pseudoreliable * (reliable == False)).sum(): plt.scatter(pars[p1, pos][pseudoreliable * (reliable == False)], pars[p2, pos][pseudoreliable * (reliable == False)], marker="x", s=40, edgecolor="0.5", facecolor="0.5", zorder=3) for si in specialids: plt.plot(pars[p1, ids == si], pars[p2, ids == si], "kd", zorder=10000, ms=7, mfc="none", mew=2) plt.xlim(lims[p1][0], lims[p1][1]) plt.ylim(lims[p2][0], lims[p2][1]) plt.xlabel(labs[p1]) plt.ylabel(labs[p2]) plt.grid(color='k', linestyle='-', linewidth=0.2) fig2.savefig("%s_rel_contour.pdf" % pdfoutname, rasterized=True) # ------------------------- # Add Np, Nn and R to table # ------------------------- # This allows me not to calculate R every time I want to do some plot analysis, # but just read it from the file if saverel: if not (docontour or dostats): Nps = Np(pars[:, pos]) * Npos Nns = Nn(pars[:, pos]) * Nneg Np = np.zeros((data.shape[0], )) Np[pos] = Nps Nn = np.zeros((data.shape[0], )) Nn[pos] = Nns R = -np.ones((data.shape[0], )) # R will be -1 for negative sources # Set R to zero for positive sources if R < 0 because of Nn > Np R[pos] = np.maximum(0, (Np[pos] - Nn[pos]) / Np[pos]) data = np.concatenate( (data, Np.reshape(-1, 1), Nn.reshape(-1, 1), R.reshape(-1, 1)), axis=1) data = [list(jj) for jj in list(data)] return data, ids[pos][reliable].astype(int)
def sigma_scale(cube, scaleX=False, scaleY=False, scaleZ=True, edgeX=0, edgeY=0, edgeZ=0, statistic="mad", fluxRange="all", method="global", windowSpatial=20, windowSpectral=20, gridSpatial=0, gridSpectral=0, interpolation="none"): # Print some informational messages err.message("Generating noise-scaled data cube:") err.message(" Selecting " + str(method) + " noise measurement method.") if statistic == "mad": err.message(" Applying median absolute deviation to " + str(fluxRange) + " pixels.") if statistic == "std": err.message(" Applying standard deviation to " + str(fluxRange) + " pixels.") if statistic == "gauss": err.message(" Applying Gaussian fit to " + str(fluxRange) + " pixels.") if statistic == "negative": err.message(" Applying Gaussian fit to negative pixels.") # Check the dimensions of the cube (could be obtained from header information) dimensions = np.shape(cube) # LOCAL noise measurement within running window (slower and less memory-friendly) if method == "local": # Make window sizes integers >= 1 windowSpatial = max(int(windowSpatial), 1) windowSpectral = max(int(windowSpectral), 1) # Ensure that window sizes are odd windowSpatial += (1 - windowSpatial % 2) windowSpectral += (1 - windowSpectral % 2) # Set grid sizes to half the window sizes if undefined if not gridSpatial: gridSpatial = windowSpatial // 2 if not gridSpectral: gridSpectral = windowSpectral // 2 # Make grid sizes integers >= 1 gridSpatial = max(int(gridSpatial), 1) gridSpectral = max(int(gridSpectral), 1) # Ensure that grid sizes are odd gridSpatial += (1 - gridSpatial % 2) gridSpectral += (1 - gridSpectral % 2) # Print grid and window sizes adopted err.message(" Using grid size of [" + str(gridSpatial) + ", " + str(gridSpectral) + "]") err.message(" and window size of [" + str(windowSpatial) + ", " + str(windowSpectral) + "].") # Generate grid points to be used gridPointsZ = np.arange((dimensions[0] - gridSpectral * (int(math.ceil(float(dimensions[0]) / float(gridSpectral))) - 1)) // 2, dimensions[0], gridSpectral) gridPointsY = np.arange((dimensions[1] - gridSpatial * (int(math.ceil(float(dimensions[1]) / float(gridSpatial))) - 1)) // 2, dimensions[1], gridSpatial) gridPointsX = np.arange((dimensions[2] - gridSpatial * (int(math.ceil(float(dimensions[2]) / float(gridSpatial))) - 1)) // 2, dimensions[2], gridSpatial) # Divide grid and window sizes by 2 to get radii radiusGridSpatial = gridSpatial // 2 radiusGridSpectral = gridSpectral // 2 radiusWindowSpatial = windowSpatial // 2 radiusWindowSpectral = windowSpectral // 2 # Create empty cube (filled with NaN) to hold noise values rms_cube = np.full(cube.shape, np.nan, dtype=cube.dtype) # Determine RMS across window centred on grid cell for z in gridPointsZ: for y in gridPointsY: for x in gridPointsX: grid = (max(0, z - radiusGridSpectral), min(dimensions[0], z + radiusGridSpectral + 1), max(0, y - radiusGridSpatial), min(dimensions[1], y + radiusGridSpatial + 1), max(0, x - radiusGridSpatial), min(dimensions[2], x + radiusGridSpatial + 1)) window = (max(0, z - radiusWindowSpectral), min(dimensions[0], z + radiusWindowSpectral + 1), max(0, y - radiusWindowSpatial), min(dimensions[1], y + radiusWindowSpatial + 1), max(0, x - radiusWindowSpatial), min(dimensions[2], x + radiusWindowSpatial + 1)) if not np.all(np.isnan(cube[window[0]:window[1], window[2]:window[3], window[4]:window[5]])): if interpolation == "linear" or interpolation == "cubic": # Write value into grid point for later interpolation rms_cube[z, y, x] = GetRMS(cube[window[0]:window[1], window[2]:window[3], window[4]:window[5]], rmsMode=statistic, fluxRange=fluxRange, zoomx=1, zoomy=1, zoomz=1, verbose=0) else: # Fill entire grid cell rms_cube[grid[0]:grid[1], grid[2]:grid[3], grid[4]:grid[5]] = GetRMS(cube[window[0]:window[1], window[2]:window[3], window[4]:window[5]], rmsMode=statistic, fluxRange=fluxRange, zoomx=1, zoomy=1, zoomz=1, verbose=0) del grid, window # Carry out interpolation if requested, taking NaNs into account if interpolation == "linear" or interpolation == "cubic": err.message(" Interpolating in between grid points (" + str(interpolation) + ").") # First across each spatial plane if gridSpatial > 1: for z in gridPointsZ: for y in gridPointsY: data_values = rms_cube[z, y, gridPointsX] not_nan = np.logical_not(np.isnan(data_values)) if any(not_nan): interp_coords = np.arange(0, dimensions[2]) if interpolation == "cubic": spline = InterpolatedUnivariateSpline(gridPointsX[not_nan], data_values[not_nan]) rms_cube[z, y, 0:dimensions[2]] = spline(interp_coords) del spline else: interp_values = np.interp(interp_coords, gridPointsX[not_nan], data_values[not_nan]) rms_cube[z, y, 0:dimensions[2]] = interp_values del interp_values del interp_coords del data_values, not_nan for x in range(dimensions[2]): data_values = rms_cube[z, gridPointsY, x] not_nan = np.logical_not(np.isnan(data_values)) if any(not_nan): interp_coords = np.arange(0, dimensions[1]) if interpolation == "cubic": spline = InterpolatedUnivariateSpline(gridPointsY[not_nan], data_values[not_nan]) rms_cube[z, 0:dimensions[1], x] = spline(interp_coords) del spline else: interp_values = np.interp(interp_coords, gridPointsY[not_nan], data_values[not_nan]) rms_cube[z, 0:dimensions[1], x] = interp_values del interp_values del interp_coords del data_values, not_nan # Alternative option: 2-D spatial interpolation using SciPy's interp2d #from scipy.interpolate import interp2d #xx, yy = np.meshgrid(gridPointsX, gridPointsY) #data_values = rms_cube[z, yy, xx] #f = interp2d(gridPointsX, gridPointsY, data_values, kind="cubic") #interp_coords_x = np.arange(0, dimensions[2]) #interp_coords_y = np.arange(0, dimensions[1]) #rms_cube[z, :, :] = f(interp_coords_x, interp_coords_y) # Then along the spectral axis if gridSpectral > 1: for y in range(dimensions[1]): for x in range(dimensions[2]): data_values = rms_cube[gridPointsZ, y, x] not_nan = np.logical_not(np.isnan(data_values)) if any(not_nan): interp_coords = np.arange(0, dimensions[0]) if interpolation == "cubic": spline = InterpolatedUnivariateSpline(gridPointsZ[not_nan], data_values[not_nan]) rms_cube[0:dimensions[0], y, x] = spline(interp_coords) del spline else: interp_values = np.interp(interp_coords, gridPointsZ[not_nan], data_values[not_nan]) rms_cube[0:dimensions[0], y, x] = interp_values del interp_values del interp_coords del data_values, not_nan # Replace any invalid RMS values with NaN with np.errstate(invalid="ignore"): rms_cube[rms_cube <= 0] = np.nan # Divide data cube by RMS cube cube /= rms_cube # Delete the RMS cube again to release its memory #del rms_cube # GLOBAL noise measurement on entire 2D plane (faster and more memory-friendly) else: # Define the range over which statistics are calculated z1 = int(edgeZ) z2 = int(dimensions[0] - edgeZ) y1 = int(edgeY) y2 = int(dimensions[1] - edgeY) x1 = int(edgeX) x2 = int(dimensions[2] - edgeX) # Make sure edges don't exceed cube size err.ensure(z1 < z2 and y1 < y2 and x1 < x2, "Edge size exceeds cube size for at least one axis.") # Create empty cube (filled with 1) to hold noise values rms_cube = np.ones(cube.shape, dtype=cube.dtype) # Measure noise across 2D planes and scale cube accordingly if scaleZ: for i in range(dimensions[0]): if not np.all(np.isnan(cube[i, y1:y2, x1:x2])): rms = GetRMS(cube[i, y1:y2, x1:x2], rmsMode=statistic, fluxRange=fluxRange, zoomx=1, zoomy=1, zoomz=1, verbose=0) if rms > 0: rms_cube[i, :, :] *= rms cube[i, :, :] /= rms if scaleY: for i in range(dimensions[1]): if not np.all(np.isnan(cube[z1:z2, i, x1:x2])): rms = GetRMS(cube[z1:z2, i, x1:x2], rmsMode=statistic, fluxRange=fluxRange, zoomx=1, zoomy=1, zoomz=1, verbose=0) if rms > 0: rms_cube[:, i, :] *= rms cube[:, i, :] /= rms if scaleX: for i in range(dimensions[2]): if not np.all(np.isnan(cube[z1:z2, y1:y2, i])): rms = GetRMS(cube[z1:z2, y1:y2, i], rmsMode=statistic, fluxRange=fluxRange, zoomx=1, zoomy=1, zoomz=1, verbose=0) if rms > 0: rms_cube[:, :, i] *= rms cube[:, :, i] /= rms err.message("Noise-scaled data cube generated.\n") return cube, rms_cube
def import_data(doSubcube, inFile, weightsFile, maskFile, weightsFunction=None, subcube=[], subcubeMode="pixel", doFlag=False, flagRegions=False, flagFile="", cubeOnly=False): # Basic sanity checks on user input err.ensure(os.path.isfile(inFile), "Data file not found:\n " + str(inFile)) # ------------------------------- # Open input cube and read header # ------------------------------- err.message("Loading input data cube.") try: f = fits.open(inFile, mode="readonly", memmap=False, do_not_scale_image_data=False) header = f[0].header except: err.error("Failed to load primary HDU of data file:\n " + str(inFile)) # Extract axis sizes and types n_axes, axis_size, axis_type = extract_axis_size(header) # Check dimensionality of data cube check_cube_dimensions(n_axes, axis_size, cube_name="data cube") # Print some information err.message(" Data cube has {0:d} axes.".format(header["NAXIS"])) err.message(" Types: " + str(axis_type)) err.message(" Sizes: " + str(axis_size)) # Extract subcube boundaries if requested if len(subcube): subcube = get_subcube_range(header, n_axes, axis_size, subcube, subcubeMode) else: subcube = [] # -------------------------------- # Read requested subregion of data # -------------------------------- # 2-D image if n_axes == 2: fullshape = [axis_size[1], axis_size[0]] if len(subcube): data = np.array( [f[0].section[subcube[2]:subcube[3], subcube[0]:subcube[1]]]) header["CRPIX1"] -= subcube[0] header["CRPIX2"] -= subcube[2] header["NAXIS1"] = subcube[1] - subcube[0] header["NAXIS2"] = subcube[3] - subcube[2] else: data = np.array([f[0].data]) # 3-D cube elif n_axes == 3: fullshape = [axis_size[2], axis_size[1], axis_size[0]] if len(subcube): data = f[0].section[subcube[4]:subcube[5], subcube[2]:subcube[3], subcube[0]:subcube[1]] header["CRPIX1"] -= subcube[0] header["CRPIX2"] -= subcube[2] header["CRPIX3"] -= subcube[4] header["NAXIS1"] = subcube[1] - subcube[0] header["NAXIS2"] = subcube[3] - subcube[2] header["NAXIS3"] = subcube[5] - subcube[4] else: data = f[0].data #4-D hypercube else: fullshape = [axis_size[2], axis_size[1], axis_size[0]] if len(subcube): data = f[0].section[0, subcube[4]:subcube[5], subcube[2]:subcube[3], subcube[0]:subcube[1]] header["CRPIX1"] -= subcube[0] header["CRPIX2"] -= subcube[2] header["CRPIX3"] -= subcube[4] header["NAXIS1"] = subcube[1] - subcube[0] header["NAXIS2"] = subcube[3] - subcube[2] header["NAXIS3"] = subcube[5] - subcube[4] else: data = f[0].section[0] # Close input cube f.close() err.message("Input data cube loaded.") # --------------------------------------------------------- # If no additional actions required, return data and header # --------------------------------------------------------- if cubeOnly: return data, header # --------------------------------------------------- # Otherwise carry out additional actions as requested # --------------------------------------------------- # Weighting if weightsFile: data = apply_weights_file(data, weightsFile, subcube) elif weightsFunction: data = apply_weights_function(data, weightsFunction) # Flagging if doFlag: data = apply_flagging(data, flagFile, flagRegions, subcube) # Masking if maskFile: mask = import_mask(maskFile, header, axis_size, subcube) else: # Create an empty mask if none is provided. mask = np.zeros(data.shape, dtype=bool) return data, header, mask, subcube
def Gaussian(x, A, sigma): err.ensure(sigma != 0, "Invalid width of 0 encountered in Gaussian function.") return A * np.exp(-x**2 / (2.0 * sigma**2))
def apply_weights_file(data, weightsFile, subcube): # Load weights cube err.message("Applying weights cube:\n " + str(weightsFile)) try: f = fits.open(weightsFile, memmap=False) header_weights = f[0].header except: err.error("Failed to read weights cube.") # Extract axis sizes and types n_axes_weights, axis_size_weights, axis_type_weights = extract_axis_size( header_weights) # Ensure correct dimensionality check_cube_dimensions(n_axes_weights, axis_size_weights, cube_name="weights cube", min_dim=1, max_dim=4) # Multiply data by weights # 1-D spectrum if n_axes_weights == 1: err.warning( "Weights cube has 1 axis; interpreted as spectrum.\nAdding first and second axis." ) if len(subcube): err.ensure( len(subcube) == 6, "Subcube list must have 6 entries ({0:d} given).".format( len(subcube))) data *= np.reshape(f[0].section[subcube[4]:subcube[5]], (-1, 1, 1)) else: data *= reshape(f[0].data, (-1, 1, 1)) # 2-D image elif n_axes_weights == 2: if len(subcube) == 6 or len(subcube) == 4: data *= np.array( [f[0].section[subcube[2]:subcube[3], subcube[0]:subcube[1]]]) else: data *= np.array([f[0].data]) # 3-D cube elif n_axes_weights == 3: if len(subcube) == 6: data *= f[0].section[subcube[4]:subcube[5], subcube[2]:subcube[3], subcube[0]:subcube[1]] else: data *= f[0].data # 4-D hypercube else: if len(subcube) == 6: data *= f[0].section[0, subcube[4]:subcube[5], subcube[2]:subcube[3], subcube[0]:subcube[1]] else: data *= f[0].section[0] f.close() err.message(" Weights cube applied.") return data
def moment2(x, y): err.ensure(x.size == y.size, "Incompatible array sizes encountered in moment calculation.") return np.sqrt(np.sum(x * x * y) / np.sum(y))
def import_mask(maskFile, header, axis_size, subcube): err.message("Loading mask cube:\n " + str(maskFile)) try: f = fits.open(maskFile, memmap=False) header_mask = f[0].header except: err.error("Failed to read mask cube.") # Extract axis sizes and types n_axes_mask, axis_size_mask, axis_type_mask = extract_axis_size( header_mask) # Ensure correct dimensionality check_cube_dimensions(n_axes_mask, axis_size_mask, cube_name="mask cube", min_dim=1, max_dim=4) # 1-D spectrum if n_axes_mask == 1: err.warning( "Mask cube has 1 axis; interpreted as spectrum.\nAdding first and second axis." ) ensure(header_mask['CRVAL1'] == header['CRVAL1'], "Input cube and mask are not on the same WCS grid.") if len(subcube) == 6: if header_mask["NAXIS1"] == axis_size[2]: err.message( " Input mask cube already matches size of data subcube.\n No subcube selection applied." ) mask = np.reshape(f[0].data, (-1, 1, 1)) elif header_mask["NAXIS1"] == fullshape[0]: err.message(" Subcube selection applied to input mask cube.") mask = np.reshape(f[0].section[subcube[4]:subcube[5]], (-1, 1, 1)) else: err.error( "Data subcube does not match size of mask subcube or full mask." ) elif not len(subcube): mask = np.reshape(f[0].data, (-1, 1, 1)) else: err.error( "The subcube list must have 6 entries ({0:d} given).".format( len(subcube))) # 2-D image elif n_axes_mask == 2: err.ensure( header_mask["CRVAL1"] == header["CRVAL1"] and header_mask["CRVAL2"] == header["CRVAL2"], "Input cube and mask are not on the same WCS grid.") if len(subcube) == 6 or len(subcube) == 4: if header_mask["NAXIS1"] == axis_size[0] and header_mask[ "NAXIS2"] == axis_size[1]: err.message( " Input mask cube already matches size of data subcube.\n No subcube selection applied." ) mask = np.array([f[0].data]) elif header_mask["NAXIS1"] == fullshape[2] and header_mask[ "NAXIS2"] == fullshape[1]: err.message(" Subcube selection applied to input mask cube.") mask = np.array([ f[0].section[subcube[2]:subcube[3], subcube[0]:subcube[1]] ]) else: err.error( "Data subcube does not match size of mask subcube or full mask." ) else: mask = np.array([f[0].data]) # 3-D cube elif n_axes_mask == 3: err.ensure( header_mask["CRVAL1"] == header["CRVAL1"] and header_mask["CRVAL2"] == header["CRVAL2"] and header_mask["CRVAL3"] == header["CRVAL3"], "Input cube and mask are not on the same WCS grid.") if len(subcube) == 6: if header_mask["NAXIS1"] == axis_size[0] and header_mask[ "NAXIS2"] == axis_size[1] and header_mask[ "NAXIS3"] == axis_size[2]: err.message( " Input mask cube already matches size of data subcube.\n No subcube selection applied." ) mask = f[0].data elif header_mask["NAXIS1"] == fullshape[2] and header_mask[ "NAXIS2"] == fullshape[1] and header_mask[ "NAXIS3"] == fullshape[0]: err.message(" Subcube selection applied to input mask cube.") mask = f[0].section[subcube[4]:subcube[5], subcube[2]:subcube[3], subcube[0]:subcube[1]] else: err.error( "Data subcube does not match size of mask subcube or full mask." ) else: mask = f[0].data # 4-D hypercube else: err.ensure( header_mask["CRVAL1"] == header["CRVAL1"] and header_mask["CRVAL2"] == header["CRVAL2"] and header_mask["CRVAL3"] == header["CRVAL3"], "Input cube and mask are not on the same WCS grid.") if len(subcube) == 6: if header_mask["NAXIS1"] == axis_size[0] and header_mask[ "NAXIS2"] == axis_size[1] and header_mask[ "NAXIS3"] == axis_size[2]: err.message( " Input mask cube already matches size of data subcube.\n No subcube selection applied." ) mask = f[0].section[0] elif header_mask["NAXIS1"] == fullshape[2] and header_mask[ "NAXIS2"] == fullshape[1] and header_mask[ "NAXIS3"] == fullshape[0]: err.message(" Subcube selection applied to input mask cube.") mask = f[0].section[0, subcube[4]:subcube[5], subcube[2]:subcube[3], subcube[0]:subcube[1]] else: err.error( "Data subcube does not match size of mask subcube or full mask." ) else: mask = f[0].section[0] mask[mask > 0] = 1 f.close() err.message("Mask cube loaded.") # In all cases, convert mask to Boolean with masked pixels set to 1. return (mask > 0).astype(bool)
def standard_deviation(x): err.ensure(x.size > 0, "Array size of 0 encountered in calculation of std. dev.") return np.sqrt(np.sum(x * x) / x.size)
def sigma_scale(cube, scaleX=False, scaleY=False, scaleZ=True, edgeX=0, edgeY=0, edgeZ=0, statistic="mad", fluxRange="all", method="global", windowSpatial=20, windowSpectral=20, gridSpatial=0, gridSpectral=0, interpolation="none"): # Print some informational messages err.message("Generating noise-scaled data cube:") err.message(" Selecting " + str(method) + " noise measurement method.") if statistic == "mad": err.message(" Applying median absolute deviation to " + str(fluxRange) + " pixels.") if statistic == "std": err.message(" Applying standard deviation to " + str(fluxRange) + " pixels.") if statistic == "gauss": err.message(" Applying Gaussian fit to " + str(fluxRange) + " pixels.") if statistic == "negative": err.message(" Applying Gaussian fit to negative pixels.") # Check the dimensions of the cube (could be obtained from header information) dimensions = np.shape(cube) # LOCAL noise measurement within running window (slower and less memory-friendly) if method == "local": # Make window sizes integers >= 1 windowSpatial = max(int(windowSpatial), 1) windowSpectral = max(int(windowSpectral), 1) # Ensure that window sizes are odd windowSpatial += (1 - windowSpatial % 2) windowSpectral += (1 - windowSpectral % 2) # Set grid sizes to half the window sizes if undefined if not gridSpatial: gridSpatial = windowSpatial // 2 if not gridSpectral: gridSpectral = windowSpectral // 2 # Make grid sizes integers >= 1 gridSpatial = max(int(gridSpatial), 1) gridSpectral = max(int(gridSpectral), 1) # Ensure that grid sizes are odd gridSpatial += (1 - gridSpatial % 2) gridSpectral += (1 - gridSpectral % 2) # Print grid and window sizes adopted err.message(" Using grid size of [" + str(gridSpatial) + ", " + str(gridSpectral) + "]") err.message(" and window size of [" + str(windowSpatial) + ", " + str(windowSpectral) + "].") # Generate grid points to be used gridPointsZ = np.arange( (dimensions[0] - gridSpectral * (int(math.ceil(float(dimensions[0]) / float(gridSpectral))) - 1)) // 2, dimensions[0], gridSpectral) gridPointsY = np.arange( (dimensions[1] - gridSpatial * (int(math.ceil(float(dimensions[1]) / float(gridSpatial))) - 1)) // 2, dimensions[1], gridSpatial) gridPointsX = np.arange( (dimensions[2] - gridSpatial * (int(math.ceil(float(dimensions[2]) / float(gridSpatial))) - 1)) // 2, dimensions[2], gridSpatial) # Divide grid and window sizes by 2 to get radii radiusGridSpatial = gridSpatial // 2 radiusGridSpectral = gridSpectral // 2 radiusWindowSpatial = windowSpatial // 2 radiusWindowSpectral = windowSpectral // 2 # Create empty cube (filled with NaN) to hold noise values rms_cube = np.full(cube.shape, np.nan, dtype=cube.dtype) # Determine RMS across window centred on grid cell for z in gridPointsZ: for y in gridPointsY: for x in gridPointsX: grid = (max(0, z - radiusGridSpectral), min(dimensions[0], z + radiusGridSpectral + 1), max(0, y - radiusGridSpatial), min(dimensions[1], y + radiusGridSpatial + 1), max(0, x - radiusGridSpatial), min(dimensions[2], x + radiusGridSpatial + 1)) window = (max(0, z - radiusWindowSpectral), min(dimensions[0], z + radiusWindowSpectral + 1), max(0, y - radiusWindowSpatial), min(dimensions[1], y + radiusWindowSpatial + 1), max(0, x - radiusWindowSpatial), min(dimensions[2], x + radiusWindowSpatial + 1)) if not np.all( np.isnan( cube[window[0]:window[1], window[2]:window[3], window[4]:window[5]])): if interpolation == "linear" or interpolation == "cubic": # Write value into grid point for later interpolation rms_cube[z, y, x] = GetRMS(cube[window[0]:window[1], window[2]:window[3], window[4]:window[5]], rmsMode=statistic, fluxRange=fluxRange, zoomx=1, zoomy=1, zoomz=1, verbose=0) else: # Fill entire grid cell rms_cube[grid[0]:grid[1], grid[2]:grid[3], grid[4]:grid[5]] = GetRMS( cube[window[0]:window[1], window[2]:window[3], window[4]:window[5]], rmsMode=statistic, fluxRange=fluxRange, zoomx=1, zoomy=1, zoomz=1, verbose=0) del grid, window # Carry out interpolation if requested, taking NaNs into account if interpolation == "linear" or interpolation == "cubic": err.message(" Interpolating in between grid points (" + str(interpolation) + ").") # First across each spatial plane if gridSpatial > 1: for z in gridPointsZ: for y in gridPointsY: data_values = rms_cube[z, y, gridPointsX] not_nan = np.logical_not(np.isnan(data_values)) if any(not_nan): interp_coords = np.arange(0, dimensions[2]) if interpolation == "cubic": spline = InterpolatedUnivariateSpline( gridPointsX[not_nan], data_values[not_nan]) rms_cube[z, y, 0:dimensions[2]] = spline( interp_coords) del spline else: interp_values = np.interp( interp_coords, gridPointsX[not_nan], data_values[not_nan]) rms_cube[z, y, 0:dimensions[2]] = interp_values del interp_values del interp_coords del data_values, not_nan for x in range(dimensions[2]): data_values = rms_cube[z, gridPointsY, x] not_nan = np.logical_not(np.isnan(data_values)) if any(not_nan): interp_coords = np.arange(0, dimensions[1]) if interpolation == "cubic": spline = InterpolatedUnivariateSpline( gridPointsY[not_nan], data_values[not_nan]) rms_cube[z, 0:dimensions[1], x] = spline(interp_coords) del spline else: interp_values = np.interp( interp_coords, gridPointsY[not_nan], data_values[not_nan]) rms_cube[z, 0:dimensions[1], x] = interp_values del interp_values del interp_coords del data_values, not_nan # Alternative option: 2-D spatial interpolation using SciPy's interp2d #from scipy.interpolate import interp2d #xx, yy = np.meshgrid(gridPointsX, gridPointsY) #data_values = rms_cube[z, yy, xx] #f = interp2d(gridPointsX, gridPointsY, data_values, kind="cubic") #interp_coords_x = np.arange(0, dimensions[2]) #interp_coords_y = np.arange(0, dimensions[1]) #rms_cube[z, :, :] = f(interp_coords_x, interp_coords_y) # Then along the spectral axis if gridSpectral > 1: for y in range(dimensions[1]): for x in range(dimensions[2]): data_values = rms_cube[gridPointsZ, y, x] not_nan = np.logical_not(np.isnan(data_values)) if any(not_nan): interp_coords = np.arange(0, dimensions[0]) if interpolation == "cubic": spline = InterpolatedUnivariateSpline( gridPointsZ[not_nan], data_values[not_nan]) rms_cube[0:dimensions[0], y, x] = spline(interp_coords) del spline else: interp_values = np.interp( interp_coords, gridPointsZ[not_nan], data_values[not_nan]) rms_cube[0:dimensions[0], y, x] = interp_values del interp_values del interp_coords del data_values, not_nan # Replace any invalid RMS values with NaN with np.errstate(invalid="ignore"): rms_cube[rms_cube <= 0] = np.nan # Divide data cube by RMS cube cube /= rms_cube # Delete the RMS cube again to release its memory #del rms_cube # GLOBAL noise measurement on entire 2D plane (faster and more memory-friendly) else: # Define the range over which statistics are calculated z1 = int(edgeZ) z2 = int(dimensions[0] - edgeZ) y1 = int(edgeY) y2 = int(dimensions[1] - edgeY) x1 = int(edgeX) x2 = int(dimensions[2] - edgeX) # Make sure edges don't exceed cube size err.ensure(z1 < z2 and y1 < y2 and x1 < x2, "Edge size exceeds cube size for at least one axis.") # Create empty cube (filled with 1) to hold noise values rms_cube = np.ones(cube.shape, dtype=cube.dtype) # Measure noise across 2D planes and scale cube accordingly if scaleZ: for i in range(dimensions[0]): if not np.all(np.isnan(cube[i, y1:y2, x1:x2])): rms = GetRMS(cube[i, y1:y2, x1:x2], rmsMode=statistic, fluxRange=fluxRange, zoomx=1, zoomy=1, zoomz=1, verbose=0) if rms > 0: rms_cube[i, :, :] *= rms cube[i, :, :] /= rms if scaleY: for i in range(dimensions[1]): if not np.all(np.isnan(cube[z1:z2, i, x1:x2])): rms = GetRMS(cube[z1:z2, i, x1:x2], rmsMode=statistic, fluxRange=fluxRange, zoomx=1, zoomy=1, zoomz=1, verbose=0) if rms > 0: rms_cube[:, i, :] *= rms cube[:, i, :] /= rms if scaleX: for i in range(dimensions[2]): if not np.all(np.isnan(cube[z1:z2, y1:y2, i])): rms = GetRMS(cube[z1:z2, y1:y2, i], rmsMode=statistic, fluxRange=fluxRange, zoomx=1, zoomy=1, zoomz=1, verbose=0) if rms > 0: rms_cube[:, :, i] *= rms cube[:, :, i] /= rms err.message("Noise-scaled data cube generated.\n") return cube, rms_cube