def _prepare_flux(self, flux): """ Preparation of images for Enhanced Correlation Coefficient (ECC) Maximization estimation of movement - used for estimation of jitter. Parameters: flux (array): flux pixel image Returns: array: Gradient (using Scharr method) of image in logarithmic units. .. codeauthor:: Mikkel N. Lund .. codeauthor:: Rasmus Handberg <*****@*****.**> """ # Convert to logarithmic units, avoiding taking log if zero: flux = np.asarray(flux) flux = np.log10(flux - np.nanmin(flux) + 1.0) # Convert image to flux in range -1 to 1 (for gradient determination) fmax = np.nanmax(flux) fmin = np.nanmin(flux) ran = np.abs(fmax - fmin) flux1 = -1 + 2*((flux - fmin)/ran) # Calculate Scharr gradient flux1 = scharr(flux1) # Remove potential NaNs in gradient image replace(flux1, np.NaN, 0) # Make sure image is in proper units for ECC routine return np.asarray(flux1, dtype='float32')
def smear(self, img): """CCD dark current and smear correction. TODO: - Should we weight everything with the number of rows used in masked vs virtual regions? - Should we take self.frametransfer_time into account? - Cosmic ray rejection requires images before and after in time? """ self.logger.info("Doing smear correction...") # Remove cosmic rays in collateral data: # TODO: Can cosmic rays also show up in virtual pixels? If so, also include img.virtual_smear #index_collateral_cosmicrays = cosmic_rays(img.masked_smear) index_collateral_cosmicrays = np.zeros_like(img.masked_smear, dtype='bool') img.masked_smear[index_collateral_cosmicrays] = np.nan # Average the masked and virtual smear across their rows: masked_smear = nanmedian(img.masked_smear, axis=0) virtual_smear = nanmedian(img.virtual_smear, axis=0) # Estimate dark current: # TODO: Should this be self.frametransfer_time? fdark = nanmedian(masked_smear - virtual_smear * (self.exposure_time + self.readout_time) / self.exposure_time) img.dark = fdark # Save for later use self.logger.info('Dark current: %f', img.dark) if np.isnan(fdark): fdark = 0 # Correct the smear regions for the dark current: masked_smear -= fdark virtual_smear -= fdark * (self.exposure_time + self.readout_time) / self.exposure_time # Weights from number of pixels in different regions: Nms = np.sum(~np.isnan(img.masked_smear), axis=0) Nvs = np.sum(~np.isnan(img.virtual_smear), axis=0) c_ms = Nms / np.maximum(Nms + Nvs, 1) c_vs = Nvs / np.maximum(Nms + Nvs, 1) # Weights as in Kepler where you only have one row in each sector: #g_ms = ~np.isnan(masked_smear) #g_vs = ~np.isnan(virtual_smear) #c_ms = g_ms/np.maximum(g_ms + g_vs, 1) #c_vs = g_vs/np.maximum(g_ms + g_vs, 1) # Estimate the smear for all columns, taking into account # that some columns could be missing: replace(masked_smear, np.nan, 0) replace(virtual_smear, np.nan, 0) fsmear = c_ms * masked_smear + c_vs * virtual_smear # Correct the science pixels for dark current and smear: img.target_data -= fdark for k, col in enumerate(img.collateral_columns): img.target_data[img.columns == col] -= fsmear[k] return img
def init(self, mode='random', assign=False): # Predefined assignment vector if assign: self.assignment = np.array(assign) self.cells_per_cluster = {} cl, cl_size = np.unique(assign, return_counts=True) for i in range(cl.size): bn.replace(self.assignment, cl[i], i) self.cells_per_cluster[i] = cl_size[i] self.parameters = self._init_cl_params('assign') elif mode == 'separate': self.assignment = np.arange(self.cells_total, dtype=int) self.cells_per_cluster = {i: 1 for i in range(self.cells_total)} self.parameters = self._init_cl_params(mode) # All cells in one cluster elif mode == 'together': self.assignment = np.zeros(self.cells_total, dtype=int) self.cells_per_cluster = {0: self.cells_total} self.parameters = self._init_cl_params(mode) # Complete random elif mode == 'random': self.assignment = np.random.randint(0, high=self.cells_total, size=self.cells_total) self.cells_per_cluster = {} cl, cl_size = np.unique(self.assignment, return_counts=True) for i in range(cl.size): bn.replace(self.assignment, cl[i], i) self.cells_per_cluster[i] = cl_size[i] self.parameters = self._init_cl_params(mode) else: raise TypeError(f'Unsupported Initialization: {mode}') self.init_DP_prior()
def test_replace_view(dtype): array = np.arange(20, dtype=dtype) view = array[::2] bn.replace(view, 5, -1) assert view.min() == 0 assert array.min() == 0
def nanToZeros(matrix, value=0): '''change all nan-values in a given nD-array to 0''' #whereAreNaNs = np.isnan(matrix) matrix2 = deepcopy(matrix) #matrix2[whereAreNaNs] = 0 bn.replace(matrix2, np.nan, value) return matrix2
def reorder_values_array(self, arr, variables): for col, var in enumerate(variables): if var.fix_order and len(var.values) < 1000: new_order = var.ordered_values(var.values) if new_order == var.values: continue arr[:, col] += 1000 for i, val in enumerate(var.values): bn.replace(arr[:, col], 1000 + i, new_order.index(val)) var.values = new_order delattr(var, "fix_order")
def sanitize_variable(valuemap, values, orig_values, coltype, coltype_kwargs, domain_vars, existing_var, new_var_name, data=None): if valuemap: # Map discrete data to ints def valuemap_index(val): try: return valuemap.index(val) except ValueError: return np.nan values = np.vectorize(valuemap_index, otypes=[float])(orig_values) coltype_kwargs.update(values=valuemap) if coltype is StringVariable: values = ['' if i is np.nan else i for i in orig_values] var = None if domain_vars is not None: if existing_var: # Use existing variable if available var = coltype.make(existing_var.strip(), **coltype_kwargs) else: # Never use existing for un-named variables var = coltype(new_var_name, **coltype_kwargs) # Reorder discrete values to match existing variable if var.is_discrete and not var.ordered: new_order, old_order = var.values, coltype_kwargs.get( 'values', var.values) if new_order != old_order: offset = len(new_order) column = values if data.ndim > 1 else data column += offset for i, val in enumerate(var.values): try: oldval = old_order.index(val) except ValueError: continue bn.replace(column, offset + oldval, new_order.index(val)) if isinstance(var, TimeVariable) or coltype is TimeVariable: # Re-parse the values because only now after coltype.make call # above, variable var is the correct one _var = var if isinstance(var, TimeVariable) else TimeVariable('_') values = [_var.parse(i) for i in orig_values] return values, var
def test_replace_nan_int(): "Test replace, int array, old=nan, new=0" a = np.arange(2*3*4).reshape(2, 3, 4) actual = a.copy() bn.replace(actual, np.nan, 0) desired = a.copy() msg = 'replace failed on int input looking for nans' assert_array_equal(actual, desired, err_msg=msg) actual = a.copy() bn.slow.replace(actual, np.nan, 0) msg = 'slow.replace failed on int input looking for nans' assert_array_equal(actual, desired, err_msg=msg)
def test_replace_nan_int(dtype): """Test replace, int array, old=nan, new=0""" a = np.arange(2 * 3 * 4, dtype=dtype).reshape(2, 3, 4) actual = a.copy() bn.replace(actual, np.nan, 0) desired = a.copy() msg = "replace failed on int input looking for nans" assert_array_equal(actual, desired, err_msg=msg) actual = a.copy() bn.slow.replace(actual, np.nan, 0) msg = "slow.replace failed on int input looking for nans" assert_array_equal(actual, desired, err_msg=msg)
def reorder_values(): new_order, old_order = \ var.values, coltype_kwargs.get('values', var.values) if new_order != old_order: offset = len(new_order) column = values if data.ndim > 1 else data column += offset for _, val in enumerate(var.values): try: oldval = old_order.index(val) except ValueError: continue bn.replace(column, offset + oldval, new_order.index(val))
def sanitize_variable(valuemap, values, orig_values, coltype, coltype_kwargs, domain_vars, existing_var, new_var_name, data=None): if valuemap: # Map discrete data to ints def valuemap_index(val): try: return valuemap.index(val) except ValueError: return np.nan values = np.vectorize(valuemap_index, otypes=[float])(orig_values) coltype_kwargs.update(values=valuemap) if coltype is StringVariable: values = ['' if i is np.nan else i for i in orig_values] var = None if domain_vars is not None: if existing_var: # Use existing variable if available var = coltype.make(existing_var.strip(), **coltype_kwargs) else: # Never use existing for un-named variables var = coltype(new_var_name, **coltype_kwargs) # Reorder discrete values to match existing variable if var.is_discrete and not var.ordered: new_order, old_order = var.values, coltype_kwargs.get('values', var.values) if new_order != old_order: offset = len(new_order) column = values if data.ndim > 1 else data column += offset for i, val in enumerate(var.values): try: oldval = old_order.index(val) except ValueError: continue bn.replace(column, offset + oldval, new_order.index(val)) if isinstance(var, TimeVariable) or coltype is TimeVariable: # Re-parse the values because only now after coltype.make call # above, variable var is the correct one _var = var if isinstance(var, TimeVariable) else TimeVariable('_') values = [_var.parse(i) for i in orig_values] return values, var
def spike_sep(self): """ Separate CBVs into a "slow" and a "spiky" component. This is done by filtering the deta and identifying outlier with a peak-finding algorithm. .. codeauthor:: Mikkel N. Lund <*****@*****.**> """ logger = logging.getLogger(__name__) logger.info('running CBV spike separation') logger.info('------------------------------------') if 'cbv-single-scale' in self.hdf and 'cbv-spike' in self.hdf: logger.info( 'Separated CBVs for SECTOR=%d, CADENCE=%d, AREA=%d already calculated.', self.sector, self.cadence, self.cbv_area) return logger.info( 'Computing CBV spike separation for SECTOR=%d, CADENCE=%d, AREA=%d...', self.sector, self.cadence, self.cbv_area) # Load initial CBV from "compute_CBV" cbv = self.hdf['cbv-ini'] # padding window, just needs to be bigger than savgol filtering window wmir = 50 # Initiate arrays for cleaned and spike CBVs cbv_new = np.zeros_like(cbv) cbv_spike = np.zeros_like(cbv) # Iterate over basis vectors xs = np.arange(0, cbv.shape[0] + 2 * wmir - 2) for j in range(cbv.shape[1]): # Pad ends for better peak detection at boundaries of data data0 = cbv[:, j] data0 = np.append(np.flip(data0[0:wmir])[:-1], data0) data0 = np.append(data0, np.flip(data0[-wmir::])[1:]) data = data0.copy() # Iterate peak detection, with different savgol filter widths: for w in (31, 29, 27, 25, 23): # For savgol filter data must be continuous data2 = pchip_interpolate(xs[np.isfinite(data)], data[np.isfinite(data)], xs) # Smooth, filtered version of data, to use to identify "outliers", i.e., spikes y = savgol_filter(data2, w, 2, mode='constant') y2 = data2 - y # Run peak detection sigma = mad_to_sigma * nanmedian(np.abs(y2)) peaks, properties = find_peaks(np.abs(y2), prominence=(3 * sigma, None), wlen=500) data[peaks] = np.nan # Interpolate CBVs where spike has been identified data = pchip_interpolate(xs[np.isfinite(data)], data[np.isfinite(data)], xs) # Remove padded ends and store in CBV matrices # Spike signal is difference between original data and data with masked spikes cbv_spike[:, j] = data0[wmir - 1:-wmir + 1] - data[wmir - 1:-wmir + 1] replace(cbv_spike[:, j], np.nan, 0) cbv_new[:, j] = data[wmir - 1:-wmir + 1] # Save files self.hdf.create_dataset('cbv-single-scale', data=cbv_new) self.hdf.create_dataset('cbv-spike', data=cbv_spike) # Signal-to-Noise test (here only for plotting) indx_lowsnr = cbv_snr_test(cbv_new, self.threshold_snrtest) # Plot all the CBVs: fig, axes = plt.subplots(int(np.ceil(self.ncomponents / 2)), 2, figsize=(12, 16)) fig2, axes2 = plt.subplots(int(np.ceil(self.ncomponents / 2)), 2, figsize=(12, 16)) fig.subplots_adjust(wspace=0.23, hspace=0.46, left=0.08, right=0.96, top=0.94, bottom=0.055) fig2.subplots_adjust(wspace=0.23, hspace=0.46, left=0.08, right=0.96, top=0.94, bottom=0.055) axes = axes.flatten() axes2 = axes2.flatten() for k in range(cbv_new.shape[1]): if indx_lowsnr is not None and indx_lowsnr[k]: col = 'c' else: col = 'k' axes[k].plot(cbv_new[:, k], ls='-', color=col) axes[k].set_title(f'Basis Vector {k+1:d}') axes2[k].plot(cbv_spike[:, k], ls='-', color=col) axes2[k].set_title(f'Spike Basis Vector {k+1:d}') fig.savefig( os.path.join( self.cbv_plot_folder, f'cbvs-s{self.sector:04d}-c{self.cadence:04d}-a{self.cbv_area:d}.png' )) fig2.savefig( os.path.join( self.cbv_plot_folder, f'spike-cbvs-s{self.sector:04d}-c{self.cadence:04d}-a{self.cbv_area:d}.png' )) plt.close(fig) plt.close(fig2)
def stats(self, lmean=False, lmed=False, lskew=False, lvar=False, lstd=False, lcoefvar=False, lperc=False, p=0.95): """Calculate some statistics among every realisation. Each statistic is calculated node-wise along the complete number of realisations. Parameters ---------- lmean : boolean, default False Calculate the mean. lmed : boolean, default False Calculate the median. lskew : boolean, default False Calculate skewness. lvar : boolean, default False Calculate the variance. lstd : boolean, default False Calculate the standard deviation. lcoefvar : boolean, default False Calculate the coefficient of variation. lperc : boolean, default False Calculate the percentile `100 * (1 - p)`. p : number, default 0.95 Probability value. Returns ------- retdict : dict of GridArr Dictionary containing one GridArr for each calculated statistic. See Also -------- stats_area : same but considering a circular (and horizontal) area of a specified radius around a given point. """ # check if the map files are already opened or not if isinstance(self.files[0], file): opened_files = True else: opened_files = False if lmean: meanmap = np.zeros(self.cells) if lmed: medmap = np.zeros(self.cells) if lskew: skewmap = np.zeros(self.cells) if lvar: varmap = np.zeros(self.cells) if lstd: stdmap = np.zeros(self.cells) if lcoefvar: coefvarmap = np.zeros(self.cells) if lperc: percmap = np.zeros((self.cells, 2)) arr = np.zeros(self.nfiles) skip = True offset = os.SEEK_SET for cell in xrange(self.cells - self.header): for i, gridfile in enumerate(self.files): # deal with map files not open yet if opened_files: grid = gridfile else: grid = open(gridfile, 'rb') grid.seek(offset) if skip: skip_lines(grid, self.header) arr[i] = grid.readline() if not opened_files: offset = grid.tell() grid.close() skip = False # replace no data's with NaN bn.replace(arr, self.nodata, np.nan) if lmean: meanmap[cell] = bn.nanmean(arr) if lmed: medmap[cell] = bn.nanmedian(arr) if lskew: skewmap[cell] = pd.Series(arr).skew() if lvar: varmap[cell] = bn.nanvar(arr, ddof=1) if lstd: stdmap[cell] = bn.nanstd(arr, ddof=1) if lcoefvar: if lstd and lmean: coefvarmap[cell] = stdmap[cell] / meanmap[cell] * 100 else: std = bn.nanstd(arr, ddof=1) mean = bn.nanmean(arr) coefvarmap[cell] = std / mean * 100 if lperc: percmap[cell] = pd.Series(arr).quantile([(1 - p) / 2, 1 - (1 - p) / 2]) retdict = dict() if lmean: meangrid = GridArr(name='meanmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=meanmap) retdict['meanmap'] = meangrid if lmed: medgrid = GridArr(name='medianmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=medmap) retdict['medianmap'] = medgrid if lskew: skewgrid = GridArr(name='skewmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=skewmap) retdict['skewmap'] = skewgrid if lvar: vargrid = GridArr(name='varmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=varmap) retdict['varmap'] = vargrid if lstd: stdgrid = GridArr(name='stdmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=stdmap) retdict['stdmap'] = stdgrid if lcoefvar: coefvargrid = GridArr(name='coefvarmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=coefvarmap) retdict['coefvarmap'] = coefvargrid if lperc: percgrid = GridArr(name='percmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=percmap) retdict['percmap'] = percgrid return retdict
def stats_area(self, loc, tol=0, lmean=False, lmed=False, lskew=False, lvar=False, lstd=False, lcoefvar=False, lperc=False, p=0.95, save=False): """Calculate some statistics among every realisation, considering a circular (only horizontaly) area of radius `tol` around the point located at `loc`. Parameters ---------- loc : array_like Location of the vertical line [x, y]. tol : number, default 0 Tolerance radius used to search for neighbour nodes. lmean : boolean, default False Calculate the mean. lmed : boolean, default False Calculate the median. lskew : boolean, default False Calculate skewness. lvar : boolean, default False Calculate the variance. lstd : boolean, default False Calculate the standard deviation. lcoefvar : boolean, default False Calculate the coefficient of variation. lperc : boolean, default False Calculate the percentile `100 * (1 - p)`. p : number, default 0.95 Probability value. save : boolean, default False Write the points used to calculate the chosen statistics in PointSet format to a file named 'sim values at (x, y, line).prn'. Returns ------- statspset : PointSet PointSet instance containing the calculated statistics. .. TODO: checkar stats variance com geoms """ if lmean: meanline = np.zeros(self.dz) if lmed: medline = np.zeros(self.dz) if lskew: skewline = np.zeros(self.dz) if lvar: varline = np.zeros(self.dz) if lstd: stdline = np.zeros(self.dz) if lcoefvar: coefvarline = np.zeros(self.dz) if lperc: percline = np.zeros((self.dz, 2)) # convert the coordinates of the first point to grid nodes loc = coord_to_grid(loc, [self.cellx, self.celly, self.cellz], [self.xi, self.yi, self.zi])[:2] # find the nodes coordinates within a circle centred in the first point neighbours_nodes = circle(loc[0], loc[1], tol) # compute the lines numbers for each point in the neighbourhood, across # each grid layer. this yields a N*M matrix, with N equal to the number # of neighbour nodes, and M equal to the number of layers in the grid. neighbours_lines = [line_zmirror(node, [self.dx, self.dy, self.dz]) for node in neighbours_nodes] # sort the lines in ascending order neighbours_lines = np.sort(neighbours_lines, axis=0) # create an array to store the neighbour nodes in each grid file nnodes = neighbours_lines.shape[0] arr = np.zeros(self.nfiles * nnodes) skip = True curr_line = np.zeros(self.nfiles) for layer in xrange(neighbours_lines.shape[1]): for i, line in enumerate(neighbours_lines[:, layer]): for j, grid in enumerate(self.files): # skip header lines only once per grid file if skip and self.header: skip_lines(grid, self.header) # advance to the next line with a neighbour node skip_lines(grid, int(line - curr_line[j] - 1)) # read the line and store its value a = grid.readline() arr[i + j * nnodes] = float(a) curr_line[j] = line skip = False # replace no data's with NaN bn.replace(arr, self.nodata, np.nan) # compute the required statistics if lmean: meanline[layer] = bn.nanmean(arr) if lmed: medline[layer] = bn.nanmedian(arr) if lskew: skewline[layer] = pd.Series(arr).skew() if lvar: varline[layer] = bn.nanvar(arr, ddof=1) if lstd: stdline[layer] = bn.nanstd(arr, ddof=1) if lcoefvar: if lstd and lmean: coefvarline[layer] = stdline[layer] / meanline[layer] * 100 else: std = bn.nanstd(arr, ddof=1) mean = bn.nanmean(arr) coefvarline[layer] = std / mean * 100 if lperc: percline[layer] = pd.Series(arr).quantile([(1 - p) / 2, 1 - (1 - p) / 2]) if save and tol == 0: # FIXME: not working with the tolerance feature # need to adjust the arrpset or cherry-pick arr arrpset = PointSet('realisations at location ({0}, {1}, {2})'. format(loc[0], loc[1], layer * self.cellz + self.zi), self.nodata, 3, ['x', 'y', 'value'], values=np.zeros((self.nfiles, 3))) arrout = os.path.join(os.path.dirname(self.files[0].name), 'sim values at ({0}, {1}, {2}).prn'.format( loc[0], loc[1], layer * self.cellz + self.zi)) arrpset.values.iloc[:, 2] = arr arrpset.values.iloc[:, :2] = np.repeat(np.array(loc) [np.newaxis, :], self.nfiles, axis=0) arrpset.save(arrout, header=True) ncols = sum((lmean, lmed, lvar, lstd, lcoefvar, lskew)) if lperc: ncols += 2 statspset = PointSet(name='vertical line stats at (x,y) = ({0},{1})'. format(loc[0], loc[1]), nodata=self.nodata, nvars=3 + ncols, varnames=['x', 'y', 'z'], values=np.zeros((self.dz, 3 + ncols))) statspset.values.iloc[:, :3] = (np.column_stack (((np.repeat(np.array(loc) [np.newaxis, :], self.dz, axis=0)), np.arange(self.zi, self.zi + self.cellz * self.dz)))) j = 3 if lmean: statspset.varnames.append('mean') statspset.values.iloc[:, j] = meanline j += 1 if lmed: statspset.varnames.append('median') statspset.values.iloc[:, j] = medline j += 1 if lskew: statspset.varnames.append('skewness') statspset.values.iloc[:, j] = skewline j += 1 if lvar: statspset.varnames.append('variance') statspset.values.iloc[:, j] = varline j += 1 if lstd: statspset.varnames.append('std') statspset.values.iloc[:, j] = stdline j += 1 if lcoefvar: statspset.varnames.append('coefvar') statspset.values.iloc[:, j] = coefvarline j += 1 if lperc: statspset.varnames.append('lperc') statspset.varnames.append('rperc') statspset.values.iloc[:, -2:] = percline # reset the reading pointer in each grid file self.reset_read() # update varnames statspset.flush_varnames() return statspset
def test_replace_newaxis(dtype): array = np.ones((2, 2), dtype=dtype)[..., np.newaxis] result = bn.replace(array, 1, 2) assert (result == 2).all().all()
def test_replace_bad_args(): array = np.ones((10, 10)) bad_vals = [None, "", [0], "0"] for bad_val in bad_vals: with pytest.raises(TypeError, match="`old` must be a number"): bn.replace(array, bad_val, 0) with pytest.raises(TypeError, match="`new` must be a number"): bn.replace(array, 0, bad_val) with pytest.raises(TypeError, match="Cannot find `a` keyword input"): bn.replace(foo=array) with pytest.raises(TypeError, match="Cannot find `old` keyword input"): bn.replace(a=array) with pytest.raises(TypeError, match="Cannot find `new` keyword input"): bn.replace(a=array, old=0) with pytest.raises(TypeError, match="wrong number of arguments 4"): bn.replace(array, 0) with pytest.raises(TypeError, match="wrong number of arguments 4"): bn.replace(array, 0, 0, 0)
def prepare_photometry(input_folder=None, sectors=None, cameras=None, ccds=None, calc_movement_kernel=False, backgrounds_pixels_threshold=0.5, output_file=None): """ Restructure individual FFI images (in FITS format) into a combined HDF5 file which is used in the photometry pipeline. In this process the background flux in each FFI is estimated using the `backgrounds.fit_background` function. Parameters: input_folder (string): Input folder to create TODO list for. If ``None``, the input directory in the environment variable ``TESSPHOT_INPUT`` is used. cameras (iterable of integers, optional): TESS camera number (1-4). If ``None``, all cameras will be processed. ccds (iterable of integers, optional): TESS CCD number (1-4). If ``None``, all cameras will be processed. calc_movement_kernel (boolean, optional): Should Image Movement Kernels be calculated for each image? If it is not calculated, only the default WCS movement kernel will be available when doing the folllowing photometry. Default=False. backgrounds_pixels_threshold (float): Percentage of times a pixel has to use used in background calculation in order to be included in the final list of contributing pixels. Default=0.5. output_file (string, optional): The file path where the output file should be saved. If not specified, the file will be saved into the input directory. Should only be used for testing, since the file would (proberly) otherwise end up with a wrong file name for running with the rest of the pipeline. Raises: NotADirectoryError: If the specified ``input_folder`` is not an existing directory or if settings table could not be loaded from the catalog SQLite file. .. codeauthor:: Rasmus Handberg <*****@*****.**> """ logger = logging.getLogger(__name__) tqdm_settings = { 'disable': not logger.isEnabledFor(logging.INFO), 'dynamic_ncols': True } # Check the input folder, and load the default if not provided: if input_folder is None: input_folder = os.environ.get( 'TESSPHOT_INPUT', os.path.join(os.path.dirname(__file__), 'tests', 'input')) # Check that the given input directory is indeed a directory: if not os.path.isdir(input_folder): raise NotADirectoryError( "The given path does not exist or is not a directory") # Make sure cameras and ccds are iterable: cameras = (1, 2, 3, 4) if cameras is None else (cameras, ) ccds = (1, 2, 3, 4) if ccds is None else (ccds, ) # Common settings for HDF5 datasets: args = {'compression': 'lzf', 'shuffle': True, 'fletcher32': True} imgchunks = (64, 64) # If no sectors are provided, find all the available FFI files and figure out # which sectors they are all from: if sectors is None: sectors = [] # TODO: Could we change this so we don't have to parse the filenames? for fname in find_ffi_files(input_folder): m = re.match(r'^tess.+-s(\d+)-.+\.fits', os.path.basename(fname)) if int(m.group(1)) not in sectors: sectors.append(int(m.group(1))) # Also collect sectors from TPFs. They are needed for ensuring that # catalogs are available. Can be added directly to the sectors list, # since the HDF5 creation below will simply skip any sectors with # no FFIs available for fname in find_tpf_files(input_folder): m = re.match(r'^.+-s(\d+)[-_].+_tp\.fits', os.path.basename(fname)) if int(m.group(1)) not in sectors: sectors.append(int(m.group(1))) logger.debug("Sectors found: %s", sectors) else: sectors = (sectors, ) # Check if any sectors were found/provided: if not sectors: logger.error("No sectors were found") return # Make sure that catalog files are available in the input directory. # If they are not already, they will be downloaded from the cache: for sector, camera, ccd in itertools.product(sectors, cameras, ccds): download_catalogs(input_folder, sector, camera=camera, ccd=ccd) # Get the number of processes we can spawn in case it is needed for calculations: threads = int( os.environ.get('SLURM_CPUS_PER_TASK', multiprocessing.cpu_count())) logger.info("Using %d processes.", threads) # Start pool of workers: if threads > 1: pool = multiprocessing.Pool(threads) m = pool.imap else: m = map # Loop over each combination of camera and CCD: for sector, camera, ccd in itertools.product(sectors, cameras, ccds): logger.info("Running SECTOR=%s, CAMERA=%s, CCD=%s", sector, camera, ccd) tic_total = default_timer() # Find all the FFI files associated with this camera and CCD: files = find_ffi_files(input_folder, sector=sector, camera=camera, ccd=ccd) numfiles = len(files) logger.info("Number of files: %d", numfiles) if numfiles == 0: continue # Catalog file: catalog_file = find_catalog_files(input_folder, sector=sector, camera=camera, ccd=ccd) if len(catalog_file) != 1: logger.error( "Catalog file could not be found: SECTOR=%s, CAMERA=%s, CCD=%s", sector, camera, ccd) continue logger.debug("Catalog File: %s", catalog_file[0]) # Load catalog settings from the SQLite database: with contextlib.closing(sqlite3.connect(catalog_file[0])) as conn: conn.row_factory = sqlite3.Row cursor = conn.cursor() cursor.execute( "SELECT sector,reference_time FROM settings LIMIT 1;") row = cursor.fetchone() if row is None: raise OSError("Settings could not be loaded from catalog") #sector = row['sector'] sector_reference_time = row['reference_time'] cursor.close() # HDF5 file to be created/modified: if output_file is None: hdf_file = os.path.join( input_folder, 'sector{0:03d}_camera{1:d}_ccd{2:d}.hdf5'.format( sector, camera, ccd)) else: output_file = os.path.abspath(output_file) if not output_file.endswith('.hdf5'): output_file = output_file + '.hdf5' hdf_file = output_file logger.debug("HDF5 File: %s", hdf_file) # Get image shape from the first file: img = load_ffi_fits(files[0]) img_shape = img.shape # Open the HDF5 file for editing: with h5py.File(hdf_file, 'a', libver='latest') as hdf: images = hdf.require_group('images') images_err = hdf.require_group('images_err') backgrounds = hdf.require_group('backgrounds') pixel_flags = hdf.require_group('pixel_flags') if 'wcs' in hdf and isinstance(hdf['wcs'], h5py.Dataset): del hdf['wcs'] wcs = hdf.require_group('wcs') time_smooth = backgrounds.attrs.get('time_smooth', 3) flux_cutoff = backgrounds.attrs.get('flux_cutoff', 8e4) bkgiters = backgrounds.attrs.get('bkgiters', 3) radial_cutoff = backgrounds.attrs.get('radial_cutoff', 2400) radial_pixel_step = backgrounds.attrs.get('radial_pixel_step', 15) radial_smooth = backgrounds.attrs.get('radial_smooth', 3) if len(backgrounds) < numfiles: # Because HDF5 is stupid, and it cant figure out how to delete data from # the file once it is in, we are creating another temp hdf5 file that # will hold thing we dont need in the final HDF5 file. tmp_hdf_file = hdf_file.replace('.hdf5', '.tmp.hdf5') with h5py.File(tmp_hdf_file, 'a', libver='latest') as hdftmp: dset_bck_us = hdftmp.require_group( 'backgrounds_unsmoothed') if len(pixel_flags) < numfiles: logger.info('Calculating backgrounds...') # Create wrapper function freezing some of the # additional keyword inputs: fit_background_wrapper = functools.partial( fit_background, flux_cutoff=flux_cutoff, bkgiters=bkgiters, radial_cutoff=radial_cutoff, radial_pixel_step=radial_pixel_step, radial_smooth=radial_smooth) tic = default_timer() last_bck_fit = -1 if len(pixel_flags) == 0 else int( sorted(list(pixel_flags.keys()))[-1]) k = last_bck_fit + 1 for bck, mask in tqdm(m(fit_background_wrapper, files[k:]), initial=k, total=numfiles, **tqdm_settings): dset_name = '%04d' % k logger.debug("Background %d complete", k) logger.debug("Estimate: %f sec/image", (default_timer() - tic) / (k - last_bck_fit)) dset_bck_us.create_dataset(dset_name, data=bck) # If we ever defined pixel flags above 256, we have to change this to uint16 mask = np.asarray(np.where( mask, PixelQualityFlags.NotUsedForBackground, 0), dtype='uint8') pixel_flags.create_dataset(dset_name, data=mask, chunks=imgchunks, **args) k += 1 hdf.flush() hdftmp.flush() toc = default_timer() logger.info("Background estimation: %f sec/image", (toc - tic) / (numfiles - last_bck_fit)) # Smooth the backgrounds along the time axis: logger.info('Smoothing backgrounds in time...') backgrounds.attrs['time_smooth'] = time_smooth backgrounds.attrs['flux_cutoff'] = flux_cutoff backgrounds.attrs['bkgiters'] = bkgiters backgrounds.attrs['radial_cutoff'] = radial_cutoff backgrounds.attrs['radial_pixel_step'] = radial_pixel_step backgrounds.attrs['radial_smooth'] = radial_smooth w = time_smooth // 2 tic = default_timer() for k in trange(numfiles, **tqdm_settings): dset_name = '%04d' % k if dset_name in backgrounds: continue indx1 = max(k - w, 0) indx2 = min(k + w + 1, numfiles) logger.debug("Smoothing background %d: %d -> %d", k, indx1, indx2) block = np.empty( (img_shape[0], img_shape[1], indx2 - indx1), dtype='float32') logger.debug(block.shape) for i, k in enumerate(range(indx1, indx2)): block[:, :, i] = dset_bck_us['%04d' % k] bck = nanmean(block, axis=2) #bck_err = np.sqrt(nansum(block_err**2, axis=2)) / time_smooth backgrounds.create_dataset(dset_name, data=bck, chunks=imgchunks, **args) toc = default_timer() logger.info("Background smoothing: %f sec/image", (toc - tic) / numfiles) # Flush changes to the permanent HDF5 file: hdf.flush() # Delete the temporary HDF5 file again: if os.path.exists(tmp_hdf_file): os.remove(tmp_hdf_file) if len(images) < numfiles or len( wcs ) < numfiles or 'sumimage' not in hdf or 'backgrounds_pixels_used' not in hdf or 'time_start' not in hdf: SumImage = np.zeros((img_shape[0], img_shape[1]), dtype='float64') Nimg = np.zeros_like(SumImage, dtype='int32') time = np.empty(numfiles, dtype='float64') timecorr = np.empty(numfiles, dtype='float32') time_start = np.empty(numfiles, dtype='float64') time_stop = np.empty(numfiles, dtype='float64') cadenceno = np.empty(numfiles, dtype='int32') quality = np.empty(numfiles, dtype='int32') UsedInBackgrounds = np.zeros_like(SumImage, dtype='int32') # Save list of file paths to the HDF5 file: filenames = [ os.path.basename(fname).rstrip('.gz').encode( 'ascii', 'strict') for fname in files ] hdf.require_dataset('imagespaths', (numfiles, ), data=filenames, dtype=h5py.special_dtype(vlen=bytes), **args) is_tess = False attributes = { 'CAMERA': None, 'CCD': None, 'DATA_REL': None, 'NUM_FRM': None, 'NREADOUT': None, 'CRMITEN': None, 'CRBLKSZ': None, 'CRSPOC': None } logger.info('Final processing of individual images...') tic = default_timer() for k, fname in enumerate(tqdm(files, **tqdm_settings)): dset_name = '%04d' % k # Load the FITS file data and the header: flux0, hdr, flux0_err = load_ffi_fits(fname, return_header=True, return_uncert=True) # Check if this is real TESS data: # Could proberly be done more elegant, but if it works, it works... if not is_tess and hdr.get( 'TELESCOP') == 'TESS' and hdr.get( 'NAXIS1') == 2136 and hdr.get( 'NAXIS2') == 2078: is_tess = True # Pick out the important bits from the header: # Keep time in BTJD. If we want BJD we could # simply add BJDREFI + BJDREFF: time_start[k] = hdr['TSTART'] time_stop[k] = hdr['TSTOP'] time[k] = 0.5 * (hdr['TSTART'] + hdr['TSTOP']) timecorr[k] = hdr.get('BARYCORR', 0) # Get cadence-numbers from headers, if they are available. # This header is not added before sector 6, so in that case # we are doing a simple scaling of the timestamps. if 'FFIINDEX' in hdr: cadenceno[k] = hdr['FFIINDEX'] elif is_tess: # The following numbers comes from unofficial communication # with Doug Caldwell and Roland Vanderspek: # The timestamp in TJD and the corresponding cadenceno: first_time = 0.5 * (1325.317007851970 + 1325.337841177751) - 3.9072474e-03 first_cadenceno = 4697 timedelt = 1800 / 86400 # Extracpolate the cadenceno as a simple linear relation: offset = first_cadenceno - first_time / timedelt cadenceno[k] = np.round((time[k] - timecorr[k]) / timedelt + offset) else: cadenceno[k] = k + 1 # Data quality flags: quality[k] = hdr.get('DQUALITY', 0) if k == 0: for key in attributes.keys(): attributes[key] = hdr.get(key) else: for key, value in attributes.items(): if hdr.get(key) != value: logger.error("%s is not constant!", key) # Find pixels marked for manual exclude: manexcl = pixel_manual_exclude(flux0, hdr) # Add manual excludes to pixel flags: if np.any(manexcl): pixel_flags[dset_name][ manexcl] |= PixelQualityFlags.ManualExclude if dset_name not in images: # Mask out manually excluded data before saving: flux0[manexcl] = np.nan flux0_err[manexcl] = np.nan # Load background from HDF file and subtract background from image, # if the background has not already been subtracted: if not hdr.get('BACKAPP', False): flux0 -= backgrounds[dset_name] # Save image subtracted the background in HDF5 file: images.create_dataset(dset_name, data=flux0, chunks=imgchunks, **args) images_err.create_dataset(dset_name, data=flux0_err, chunks=imgchunks, **args) else: flux0 = np.asarray(images[dset_name]) flux0[manexcl] = np.nan # Save the World Coordinate System of each image: if dset_name not in wcs: dset = wcs.create_dataset( dset_name, (1, ), dtype=h5py.special_dtype(vlen=bytes), **args) dset[0] = WCS(header=hdr).to_header_string( relax=True).strip().encode('ascii', 'strict') # Add together images for sum-image: if TESSQualityFlags.filter(quality[k]): Nimg += np.isfinite(flux0) replace(flux0, np.nan, 0) SumImage += flux0 # Add together the number of times each pixel was used in the background estimation: UsedInBackgrounds += ( np.asarray(pixel_flags[dset_name]) & PixelQualityFlags.NotUsedForBackground == 0) # Normalize sumimage SumImage /= Nimg # Single boolean image indicating if the pixel was (on average) used in the background estimation: if 'backgrounds_pixels_used' not in hdf: UsedInBackgrounds = (UsedInBackgrounds / numfiles > backgrounds_pixels_threshold) dset_uibkg = hdf.create_dataset('backgrounds_pixels_used', data=UsedInBackgrounds, dtype='bool', chunks=imgchunks, **args) dset_uibkg.attrs[ 'threshold'] = backgrounds_pixels_threshold # Save attributes images.attrs['SECTOR'] = sector for key, value in attributes.items(): logger.debug("Saving attribute %s = %s", key, value) images.attrs[key] = value # Set pixel offsets: if is_tess: images.attrs['PIXEL_OFFSET_ROW'] = 0 images.attrs['PIXEL_OFFSET_COLUMN'] = 44 else: images.attrs['PIXEL_OFFSET_ROW'] = 0 images.attrs['PIXEL_OFFSET_COLUMN'] = 0 # Add other arrays to HDF5 file: if 'time' in hdf: del hdf['time'] if 'timecorr' in hdf: del hdf['timecorr'] if 'time_start' in hdf: del hdf['time_start'] if 'time_stop' in hdf: del hdf['time_stop'] if 'sumimage' in hdf: del hdf['sumimage'] if 'cadenceno' in hdf: del hdf['cadenceno'] if 'quality' in hdf: del hdf['quality'] hdf.create_dataset('sumimage', data=SumImage, **args) hdf.create_dataset('time', data=time, **args) hdf.create_dataset('timecorr', data=timecorr, **args) hdf.create_dataset('time_start', data=time_start, **args) hdf.create_dataset('time_stop', data=time_stop, **args) hdf.create_dataset('cadenceno', data=cadenceno, **args) hdf.create_dataset('quality', data=quality, **args) hdf.flush() logger.info("Individual image processing: %f sec/image", (default_timer() - tic) / numfiles) else: # Extract things that are needed further down: SumImage = np.asarray(hdf['sumimage']) timecorr = np.asarray(hdf['timecorr']) time_start = np.asarray(hdf['time_start']) time_stop = np.asarray(hdf['time_stop']) quality = np.asarray(hdf['quality']) # Detections and flagging of Background Shenanigans: if pixel_flags.attrs.get('bkgshe_done', -1) < numfiles - 1: logger.info("Detecting background shenanigans...") tic_bkgshe = default_timer() # Load settings and create wrapper function with keywords set: bkgshe_threshold = pixel_flags.attrs.get( 'bkgshe_threshold', 40) pixel_flags.attrs['bkgshe_threshold'] = bkgshe_threshold pixel_background_shenanigans_wrapper = functools.partial( pixel_background_shenanigans, SumImage=SumImage) tmp_hdf_file = hdf_file.replace('.hdf5', '.tmp.hdf5') with h5py.File(tmp_hdf_file, 'a', libver='latest') as hdftmp: # Temporary dataset that will be used to store large array # of background shenanigans indicator images: pixel_flags_ind = hdftmp.require_dataset( 'pixel_flags_individual', shape=(SumImage.shape[0], SumImage.shape[1], numfiles), chunks=(SumImage.shape[0], SumImage.shape[1], 1), dtype='float32') # Run the background shenanigans extractor in parallel: last_bkgshe = pixel_flags_ind.attrs.get('bkgshe_done', -1) if last_bkgshe < numfiles - 1: tic = default_timer() k = last_bkgshe + 1 for bckshe in tqdm(m( pixel_background_shenanigans_wrapper, _iterate_hdf_group(images, start=k)), initial=k, total=numfiles, **tqdm_settings): pixel_flags_ind[:, :, k] = bckshe pixel_flags_ind.attrs['bkgshe_done'] = k k += 1 hdftmp.flush() logger.info("Background Shenanigans: %f sec/image", (default_timer() - tic) / (numfiles - last_bkgshe)) # Calculate the mean Background Shenanigans indicator: if 'mean_shenanigans' not in hdftmp: logger.info("Calculating mean shenanigans...") tic = default_timer() # Calculate robust mean by calculating the # median in chunks and then taking the mean of them. # This is to avoid loading the entire array into memory mean_shenanigans = np.zeros_like(SumImage, dtype='float64') block = 25 indicies = list(range(numfiles)) np.random.seed(0) np.random.shuffle(indicies) mean_shenanigans_block = np.empty( (SumImage.shape[0], SumImage.shape[1], block)) for k in trange(0, numfiles, block, **tqdm_settings): # Take median of a random block of images: for j, i in enumerate(indicies[k:k + block]): mean_shenanigans_block[:, :, j] = pixel_flags_ind[:, :, i] bckshe = nanmedian(mean_shenanigans_block, axis=2) # Add the median block to the mean image: replace(bckshe, np.NaN, 0) mean_shenanigans += bckshe mean_shenanigans /= np.ceil(numfiles / block) logger.info( "Mean Background Shenanigans: %f sec/image", (default_timer() - tic) / numfiles) # Save the mean shenanigans to the HDF5 file: hdftmp.create_dataset('mean_shenanigans', data=mean_shenanigans) else: mean_shenanigans = np.asarray( hdftmp['mean_shenanigans']) #msmax = max(np.abs(np.min(mean_shenanigans)), np.abs(np.max(mean_shenanigans))) #fig = plt.figure() #plot_image(mean_shenanigans, scale='linear', vmin=-msmax, vmax=msmax, cmap='coolwarm', make_cbar=True, xlabel=None, ylabel=None) #fig.savefig('test.png', bbox_inches='tight') logger.info("Setting background shenanigans...") tic = default_timer() for k in trange(numfiles, **tqdm_settings): dset_name = '%04d' % k bckshe = np.asarray(pixel_flags_ind[:, :, k]) #img = bckshe - mean_shenanigans #img[np.abs(img) <= bkgshe_threshold/2] = 0 #fig = plt.figure(figsize=(8,9)) #ax = fig.add_subplot(111) #plot_image(img, ax=ax, scale='linear', vmin=-bkgshe_threshold, vmax=bkgshe_threshold, xlabel=None, ylabel=None, cmap="RdBu_r", make_cbar=True) #ax.set_xticks([]) #ax.set_yticks([]) #fig.savefig(dset_name + '.png', bbox_inches='tight') #plt.close(fig) # Create the mask as anything that significantly pops out # (both positive and negative) in the image: bckshe = np.abs(bckshe - mean_shenanigans) > bkgshe_threshold # Clear any old flags: indx = (np.asarray(pixel_flags[dset_name]) & PixelQualityFlags.BackgroundShenanigans != 0) if np.any(indx): pixel_flags[dset_name][ indx] -= PixelQualityFlags.BackgroundShenanigans # Save the new flags to the permanent HDF5 file: if np.any(bckshe): pixel_flags[dset_name][ bckshe] |= PixelQualityFlags.BackgroundShenanigans pixel_flags.attrs['bkgshe_done'] = k hdf.flush() logger.info("Setting Background Shenanigans: %f sec/image", (default_timer() - tic) / numfiles) # Delete the temporary HDF5 file again: if os.path.exists(tmp_hdf_file): os.remove(tmp_hdf_file) logger.info("Total Background Shenanigans: %f sec/image", (default_timer() - tic_bkgshe) / numfiles) # Check that the time vector is sorted: if not np.all(hdf['time'][:-1] < hdf['time'][1:]): logger.error("Time vector is not sorted") return # Check that the sector reference time is within the timespan of the time vector: sector_reference_time_tjd = sector_reference_time - 2457000 if sector_reference_time_tjd < hdf['time'][ 0] or sector_reference_time_tjd > hdf['time'][-1]: logger.error("Sector reference time outside timespan of data") #return # Find the reference image: refindx = find_nearest(hdf['time'], sector_reference_time_tjd) logger.info("WCS reference frame: %d", refindx) # Save WCS to the file: wcs.attrs['ref_frame'] = refindx if calc_movement_kernel and 'movement_kernel' not in hdf: # Calculate image motion: logger.info("Calculation Image Movement Kernels...") imk = ImageMovementKernel(image_ref=images['%04d' % refindx], warpmode='translation') kernel = np.empty((numfiles, imk.n_params), dtype='float64') tic = default_timer() datasets = _iterate_hdf_group(images) for k, knl in enumerate( tqdm(m(imk.calc_kernel, datasets), **tqdm_settings)): kernel[k, :] = knl logger.debug("Kernel: %s", knl) logger.debug("Estimate: %f sec/image", (default_timer() - tic) / (k + 1)) toc = default_timer() logger.info("Movement Kernel: %f sec/image", (toc - tic) / numfiles) # Save Image Motion Kernel to HDF5 file: dset = hdf.create_dataset('movement_kernel', data=kernel, **args) dset.attrs['warpmode'] = imk.warpmode dset.attrs['ref_frame'] = refindx # Transfer quality flags from TPF files from the same CAMERA and CCD to the FFIs: if not hdf['quality'].attrs.get('TRANSFER_FROM_TPF', False): logger.info("Transfering QUALITY flags from TPFs to FFIs...") # Select (max) five random TPF targets from the given sector, camera and ccd: tpffiles = find_tpf_files(input_folder, sector=sector, camera=camera, ccd=ccd, findmax=5) if len(tpffiles) == 0: logger.warning( "No TPF files found for SECTOR=%d, CAMERA=%d, CCD=%d and quality flags could therefore not be propergated.", sector, camera, ccd) else: # Run through each of the found TPF files and build the quality column from them, # by simply setting the flag if it is found in any of the files: quality_tpf = np.zeros(numfiles, dtype='int32') for tpffile in tpffiles: quality_tpf |= quality_from_tpf( tpffile, time_start - timecorr, time_stop - timecorr) # Inspect the differences with the the qualities set in indx_diff = (quality | quality_tpf != quality) logger.info("%d qualities will be updated (%.1f%%).", np.sum(indx_diff), 100 * np.sum(indx_diff) / numfiles) # New quality: quality |= quality_tpf # Update the quality column in the HDF5 file: hdf['quality'][:] = quality hdf['quality'].attrs['TRANSFER_FROM_TPF'] = True hdf.flush() logger.info("Done.") logger.info("Total: %f sec/image", (default_timer() - tic_total) / numfiles) # Close workers again: if threads > 1: pool.close() pool.join()
def time_replace(self, dtype, shape, order): bn.replace(self.arr, self.old, self.new)
def __init__(self, protobuf): # Load the protobuf file: pd = PixelData() ph = PixelHeader() with gzip.open(protobuf, 'rb') as fid: d = fid.read() pd.ParseFromString(d) ph.ParseFromString(d) # self.PixelHeader = ph self.camera = int(self.PixelHeader.camera_number) self.ccd = int(self.PixelHeader.ccd_number) #print(ph) #print(pd.target_data) #print(pd.collateral_data) # Store pixel data as 1D arrays: self.target_data = np.array( pd.target_data[1:], dtype='float64') # FIXME: Why is there a leading zero?! self.collateral_data = np.array( pd.collateral_data[1:], dtype='float64') # FIXME: Why is there a leading one?! # Replace missing data with NaN: replace(self.target_data, 0xFFFFFFFF, np.nan) replace(self.collateral_data, 0xFFFFFFFF, np.nan) # Properties which will be filled out later: self.dark = None # TODO: All the following is actually common to all dataset with same target_pixel_table_id # Find rows and columns on the 2D CCD matching the 1D pixel data: target_pixel_table_id = int(self.PixelHeader.target_pixel_table_id) target_pixel_table = etree.parse( 'test_data/%04d-target-pixel-table.xml' % (target_pixel_table_id, )).getroot() Npixels = len(self.target_data) self.rows = np.zeros(Npixels, dtype='int32') self.columns = np.zeros(Npixels, dtype='int32') self.outputs = np.zeros(Npixels, dtype='str') for pixel in target_pixel_table.xpath( './ccd[@cameraNumber="%d"][@ccdNumber="%d"]/pixel' % (self.camera, self.ccd)): index = int(pixel.get('index')) - 1 column = int(pixel.get('column')) self.rows[index] = int(pixel.get('row')) self.columns[index] = column # Figure out what CCD outputs each column corresponds to: if column >= 1581: self.outputs[index] = 'D' elif column >= 1069: self.outputs[index] = 'C' elif column >= 557: self.outputs[index] = 'B' elif column >= 45: self.outputs[index] = 'A' # Convert the row and column addresses to indicies in the flatfield and 2d black images: self.index_columns = self.columns - 1 self.index_rows = 512 - self.rows # FIXME: 2078 instead of 512 #print(self.outputs) #print(self.rows, self.columns) #print(self.index_rows, self.index_columns) Ncollateral = len(self.collateral_data) collateral_rows = np.zeros(Ncollateral, dtype='int32') collateral_columns = np.zeros(Ncollateral, dtype='int32') collateral_pixel_table_id = int( self.PixelHeader.collateral_pixel_table_id) collateral_pixel_table = etree.parse( 'test_data/%04d-collateral-pixel-table.xml' % (collateral_pixel_table_id, )).getroot() for pixel in collateral_pixel_table.xpath( './ccd[@cameraNumber="%d"][@ccdNumber="%d"]/pixel' % (self.camera, self.ccd)): index = int(pixel.get('index')) - 1 collateral_rows[index] = int(pixel.get('row')) collateral_columns[index] = int(pixel.get('column')) unique_collateral_columns = np.unique(collateral_columns) Ncolcolumns = len(unique_collateral_columns) self.masked_smear = np.full((10, Ncolcolumns), np.nan, dtype='float64') self.virtual_smear = np.full((10, Ncolcolumns), np.nan, dtype='float64') for index, (row, column) in enumerate( zip(collateral_rows, collateral_columns)): index_column = np.where(column == unique_collateral_columns)[0] if column >= 2093 or column <= 44: # Virtual columns or Serial register columns pass elif row >= 2069: # Virtual rows index_row = (2078 - row) self.virtual_smear[index_row, index_column] = self.collateral_data[index] elif row >= 2059: # Smear rows index_row = (2068 - row) self.masked_smear[index_row, index_column] = self.collateral_data[index] elif row >= 2049: # Buffer rows pass else: print("Invalid collateral pixel: (%d,%d)" % (row, column)) self.collateral_columns = unique_collateral_columns print(self.collateral_columns) print(self.masked_smear) print(self.virtual_smear)
def data_table(self, data, headers=None): """ Return Orange.data.Table given rows of `headers` (iterable of iterable) and rows of `data` (iterable of iterable; if ``numpy.ndarray``, might as well **have it sorted column-major**, e.g. ``order='F'``). Basically, the idea of subclasses is to produce those two iterables, however they might. If `headers` is not provided, the header rows are extracted from `data`, assuming they precede it. """ if not headers: headers, data = self.parse_headers(data) # Consider various header types (single-row, two-row, three-row, none) if 3 == len(headers): names, types, flags = map(list, headers) else: if 1 == len(headers): HEADER1_FLAG_SEP = '#' # First row format either: # 1) delimited column names # 2) -||- with type and flags prepended, separated by #, # e.g. d#sex,c#age,cC#IQ _flags, names = zip(*[i.split(HEADER1_FLAG_SEP, 1) if HEADER1_FLAG_SEP in i else ('', i) for i in headers[0]]) names = list(names) elif 2 == len(headers): names, _flags = map(list, headers) else: # Use heuristics for everything names, _flags = [], [] types = [''.join(filter(str.isupper, flag)).lower() for flag in _flags] flags = [Flags.join(filter(str.islower, flag)) for flag in _flags] # Determine maximum row length rowlen = max(map(len, (names, types, flags))) def _equal_length(lst): lst.extend(['']*(rowlen - len(lst))) return lst # Ensure all data is of equal width in a column-contiguous array data = np.array([_equal_length(list(row)) for row in data if any(row)], copy=False, dtype=object, order='F') # Data may actually be longer than headers were try: rowlen = data.shape[1] except IndexError: pass else: for lst in (names, types, flags): _equal_length(lst) NAMEGEN = namegen('Feature ', 1) Xcols, attrs = [], [] Mcols, metas = [], [] Ycols, clses = [], [] Wcols = [] # Rename variables if necessary # Reusing across files still works if both files have same duplicates name_counts = Counter(names) del name_counts[""] if len(name_counts) != len(names) and name_counts: uses = {name: 0 for name, count in name_counts.items() if count > 1} for i, name in enumerate(names): if name in uses: uses[name] += 1 names[i] = "{}_{}".format(name, uses[name]) # Iterate through the columns for col in range(rowlen): flag = Flags(Flags.split(flags[col])) if flag.i: continue type_flag = types and types[col].strip() try: orig_values = [np.nan if i in MISSING_VALUES else i for i in (i.strip() for i in data[:, col])] except IndexError: # No data instances leads here orig_values = [] # In this case, coltype could be anything. It's set as-is # only to satisfy test_table.TableTestCase.test_append coltype = DiscreteVariable coltype_kwargs = {} valuemap = [] values = orig_values if type_flag in StringVariable.TYPE_HEADERS: coltype = StringVariable elif type_flag in ContinuousVariable.TYPE_HEADERS: coltype = ContinuousVariable try: values = [float(i) for i in orig_values] except ValueError: for row, num in enumerate(orig_values): try: float(num) except ValueError: break raise ValueError('Non-continuous value in (1-based) ' 'line {}, column {}'.format(row + len(headers) + 1, col + 1)) elif type_flag in TimeVariable.TYPE_HEADERS: coltype = TimeVariable elif (type_flag in DiscreteVariable.TYPE_HEADERS or _RE_DISCRETE_LIST.match(type_flag)): if _RE_DISCRETE_LIST.match(type_flag): valuemap = Flags.split(type_flag) coltype_kwargs.update(ordered=True) else: valuemap = sorted(set(orig_values) - {np.nan}) else: # No known type specified, use heuristics is_discrete = is_discrete_values(orig_values) if is_discrete: valuemap = sorted(is_discrete) else: try: values = [float(i) for i in orig_values] except ValueError: tvar = TimeVariable('_') try: values = [tvar.parse(i) for i in orig_values] except ValueError: coltype = StringVariable else: coltype = TimeVariable else: coltype = ContinuousVariable if valuemap: # Map discrete data to ints def valuemap_index(val): try: return valuemap.index(val) except ValueError: return np.nan values = np.vectorize(valuemap_index, otypes=[float])(orig_values) coltype = DiscreteVariable coltype_kwargs.update(values=valuemap) if coltype is StringVariable: values = ['' if i is np.nan else i for i in orig_values] if flag.m or coltype is StringVariable: append_to = (Mcols, metas) elif flag.w: append_to = (Wcols, None) elif flag.c: append_to = (Ycols, clses) else: append_to = (Xcols, attrs) cols, domain_vars = append_to cols.append(col) var = None if domain_vars is not None: if names and names[col]: # Use existing variable if available var = coltype.make(names[col].strip(), **coltype_kwargs) else: # Never use existing for un-named variables var = coltype(next(NAMEGEN), **coltype_kwargs) var.attributes.update(flag.attributes) domain_vars.append(var) # Reorder discrete values to match existing variable if var.is_discrete and not var.ordered: new_order, old_order = var.values, coltype_kwargs.get('values', var.values) if new_order != old_order: offset = len(new_order) column = values if data.ndim > 1 else data column += offset for i, val in enumerate(var.values): try: oldval = old_order.index(val) except ValueError: continue bn.replace(column, offset + oldval, new_order.index(val)) if isinstance(var, TimeVariable) or coltype is TimeVariable: # Re-parse the values because only now after coltype.make call # above, variable var is the correct one _var = var if isinstance(var, TimeVariable) else TimeVariable('_') values = [_var.parse(i) for i in orig_values] # Write back the changed data. This is needeed to pass the # correct, converted values into Table.from_numpy below try: data[:, col] = values except IndexError: pass domain = Domain(attrs, clses, metas) if not data.size: return Table.from_domain(domain, 0) table = Table.from_numpy(domain, data[:, Xcols].astype(float, order='C'), data[:, Ycols].astype(float, order='C'), data[:, Mcols].astype(object, order='C'), data[:, Wcols].astype(float, order='C')) return table
def data_table(self, data, headers=None): """ Return Orange.data.Table given rows of `headers` (iterable of iterable) and rows of `data` (iterable of iterable; if ``numpy.ndarray``, might as well **have it sorted column-major**, e.g. ``order='F'``). Basically, the idea of subclasses is to produce those two iterables, however they might. If `headers` is not provided, the header rows are extracted from `data`, assuming they precede it. """ if not headers: headers, data = self.parse_headers(data) # Consider various header types (single-row, two-row, three-row, none) if 3 == len(headers): names, types, flags = map(list, headers) else: if 1 == len(headers): HEADER1_FLAG_SEP = '#' # First row format either: # 1) delimited column names # 2) -||- with type and flags prepended, separated by #, # e.g. d#sex,c#age,cC#IQ _flags, names = zip(*[i.split(HEADER1_FLAG_SEP, 1) if HEADER1_FLAG_SEP in i else ('', i) for i in headers[0]]) names = list(names) elif 2 == len(headers): names, _flags = map(list, headers) else: # Use heuristics for everything names, _flags = [], [] types = [''.join(filter(str.isupper, flag)).lower() for flag in _flags] flags = [Flags.join(filter(str.islower, flag)) for flag in _flags] # Determine maximum row length rowlen = max(map(len, (names, types, flags))) def _equal_length(lst): lst.extend(['']*(rowlen - len(lst))) return lst # Ensure all data is of equal width in a column-contiguous array data = np.array([_equal_length(list(row)) for row in data if any(row)], copy=False, dtype=object, order='F') # Data may actually be longer than headers were try: rowlen = data.shape[1] except IndexError: pass else: for lst in (names, types, flags): _equal_length(lst) NAMEGEN = namegen('Feature ', 1) Xcols, attrs = [], [] Mcols, metas = [], [] Ycols, clses = [], [] Wcols = [] # Iterate through the columns for col in range(rowlen): flag = Flags(Flags.split(flags[col])) if flag.i: continue type_flag = types and types[col].strip() try: orig_values = [np.nan if i in MISSING_VALUES else i for i in (i.strip() for i in data[:, col])] except IndexError: # No data instances leads here orig_values = [] # In this case, coltype could be anything. It's set as-is # only to satisfy test_table.TableTestCase.test_append coltype = DiscreteVariable coltype_kwargs = {} valuemap = [] values = orig_values if type_flag in StringVariable.TYPE_HEADERS: coltype = StringVariable elif type_flag in ContinuousVariable.TYPE_HEADERS: coltype = ContinuousVariable try: values = [float(i) for i in orig_values] except ValueError: for row, num in enumerate(orig_values): try: float(num) except ValueError: break raise ValueError('Non-continuous value in (1-based) ' 'line {}, column {}'.format(row + len(headers) + 1, col + 1)) elif type_flag in TimeVariable.TYPE_HEADERS: coltype = TimeVariable elif (type_flag in DiscreteVariable.TYPE_HEADERS or _RE_DISCRETE_LIST.match(type_flag)): if _RE_DISCRETE_LIST.match(type_flag): valuemap = Flags.split(type_flag) coltype_kwargs.update(ordered=True) else: valuemap = sorted(set(orig_values) - {np.nan}) else: # No known type specified, use heuristics is_discrete = is_discrete_values(orig_values) if is_discrete: valuemap = sorted(is_discrete) else: try: values = [float(i) for i in orig_values] except ValueError: tvar = TimeVariable('_') try: values = [tvar.parse(i) for i in orig_values] except ValueError: coltype = StringVariable else: coltype = TimeVariable else: coltype = ContinuousVariable if valuemap: # Map discrete data to ints def valuemap_index(val): try: return valuemap.index(val) except ValueError: return np.nan values = np.vectorize(valuemap_index, otypes=[float])(orig_values) coltype = DiscreteVariable coltype_kwargs.update(values=valuemap) if coltype is StringVariable: values = ['' if i is np.nan else i for i in orig_values] if flag.m or coltype is StringVariable: append_to = (Mcols, metas) elif flag.w: append_to = (Wcols, None) elif flag.c: append_to = (Ycols, clses) else: append_to = (Xcols, attrs) cols, domain_vars = append_to cols.append(col) if domain_vars is not None: if names and names[col]: # Use existing variable if available var = coltype.make(names[col].strip(), **coltype_kwargs) else: # Never use existing for un-named variables var = coltype(next(NAMEGEN), **coltype_kwargs) var.attributes.update(flag.attributes) domain_vars.append(var) # Reorder discrete values to match existing variable if var.is_discrete and not var.ordered: new_order, old_order = var.values, coltype_kwargs.get('values', var.values) if new_order != old_order: offset = len(new_order) column = values if data.ndim > 1 else data column += offset for i, val in enumerate(var.values): try: oldval = old_order.index(val) except ValueError: continue bn.replace(column, offset + oldval, new_order.index(val)) if coltype is TimeVariable: # Re-parse the values because only now after coltype.make call # above, variable var is the correct one values = [var.parse(i) for i in orig_values] # Write back the changed data. This is needeed to pass the # correct, converted values into Table.from_numpy below try: data[:, col] = values except IndexError: pass domain = Domain(attrs, clses, metas) if not data.size: return Table.from_domain(domain, 0) table = Table.from_numpy(domain, data[:, Xcols].astype(float, order='C'), data[:, Ycols].astype(float, order='C'), data[:, Mcols].astype(object, order='C'), data[:, Wcols].astype(float, order='C')) return table
def create_hdf5(input_folder=None, cameras=None, ccds=None): """ Restructure individual FFI images (in FITS format) into a combined HDF5 file which is used in the photometry pipeline. In this process the background flux in each FFI is estimated using the `backgrounds.fit_background` function. Parameters: input_folder (string): Input folder to create TODO list for. If ``None``, the input directory in the environment variable ``TESSPHOT_INPUT`` is used. cameras (iterable of integers, optional): TESS camera number (1-4). If ``None``, all cameras will be processed. ccds (iterable of integers, optional): TESS CCD number (1-4). If ``None``, all cameras will be processed. Raises: IOError: If the specified ``input_folder`` is not an existing directory or if settings table could not be loaded from the catalog SQLite file. .. codeauthor:: Rasmus Handberg <*****@*****.**> """ logger = logging.getLogger(__name__) # Check the input folder, and load the default if not provided: if input_folder is None: input_folder = os.environ.get('TESSPHOT_INPUT', os.path.join(os.path.dirname(__file__), 'tests', 'input')) # Check that the given input directory is indeed a directory: if not os.path.isdir(input_folder): raise IOError("The given path does not exist or is not a directory") # Make sure cameras and ccds are iterable: cameras = (1, 2, 3, 4) if cameras is None else (cameras, ) ccds = (1, 2, 3, 4) if ccds is None else (ccds, ) # Common settings for HDF5 datasets: args = { 'compression': 'lzf', 'shuffle': True, 'fletcher32': True } imgchunks = (64, 64) # Get the number of processes we can spawn in case it is needed for calculations: threads = int(os.environ.get('SLURM_CPUS_PER_TASK', multiprocessing.cpu_count())) logger.info("Using %d processes.", threads) # Loop over each combination of camera and CCD: for camera, ccd in itertools.product(cameras, ccds): logger.info("Running CAMERA=%s, CCD=%s", camera, ccd) tic_total = default_timer() # Find all the FFI files associated with this camera and CCD: files = find_ffi_files(input_folder, camera, ccd) numfiles = len(files) logger.info("Number of files: %d", numfiles) if numfiles == 0: continue # Catalog file: catalog_file = os.path.join(input_folder, 'catalog_camera{0:d}_ccd{1:d}.sqlite'.format(camera, ccd)) logger.debug("Catalog File: %s", catalog_file) if not os.path.exists(catalog_file): logger.error("Catalog file could not be found: '%s'", catalog_file) continue # Load catalog settings from the SQLite database: conn = sqlite3.connect(catalog_file) conn.row_factory = sqlite3.Row cursor = conn.cursor() cursor.execute("SELECT sector,reference_time FROM settings LIMIT 1;") row = cursor.fetchone() if row is None: raise IOError("Settings could not be loaded from catalog") #sector = row['sector'] sector_reference_time = row['reference_time'] cursor.close() conn.close() # HDF5 file to be created/modified: hdf_file = os.path.join(input_folder, 'camera{0:d}_ccd{1:d}.hdf5'.format(camera, ccd)) logger.debug("HDF5 File: %s", hdf_file) # Get image shape from the first file: img = load_ffi_fits(files[0]) img_shape = img.shape # Open the HDF5 file for editing: with h5py.File(hdf_file, 'a', libver='latest') as hdf: images = hdf.require_group('images') images_err = hdf.require_group('images_err') backgrounds = hdf.require_group('backgrounds') masks = hdf.require_group('backgrounds_masks') if 'wcs' in hdf and isinstance(hdf['wcs'], h5py.Dataset): del hdf['wcs'] wcs = hdf.require_group('wcs') time_smooth = backgrounds.attrs.get('time_smooth', 3) if len(backgrounds) < numfiles: # Because HDF5 is stupid, and it cant figure out how to delete data from # the file once it is in, we are creating another temp hdf5 file that # will hold thing we dont need in the final HDF5 file. tmp_hdf_file = hdf_file.replace('.hdf5', '.tmp.hdf5') with h5py.File(tmp_hdf_file, 'a', libver='latest') as hdftmp: dset_bck_us = hdftmp.require_group('backgrounds_unsmoothed') if len(masks) < numfiles: tic = default_timer() if threads > 1: pool = multiprocessing.Pool(threads) m = pool.imap else: m = map last_bck_fit = -1 if len(masks) == 0 else int(sorted(list(masks.keys()))[-1]) k = last_bck_fit+1 for bck, mask in m(fit_background, files[k:]): dset_name = '%04d' % k logger.debug("Background %d complete", k) logger.debug("Estimate: %f sec/image", (default_timer()-tic)/(k-last_bck_fit)) dset_bck_us.create_dataset(dset_name, data=bck) indicies = np.asarray(np.nonzero(mask), dtype='uint16') masks.create_dataset(dset_name, data=indicies, **args) k += 1 if threads > 1: pool.close() pool.join() hdf.flush() hdftmp.flush() toc = default_timer() logger.info("Background estimation: %f sec/image", (toc-tic)/(numfiles-last_bck_fit)) # Smooth the backgrounds along the time axis: backgrounds.attrs['time_smooth'] = time_smooth w = time_smooth//2 tic = default_timer() for k in range(numfiles): dset_name = '%04d' % k if dset_name in backgrounds: continue indx1 = max(k-w, 0) indx2 = min(k+w+1, numfiles) logger.debug("Smoothing background %d: %d -> %d", k, indx1, indx2) block = np.empty((img_shape[0], img_shape[1], indx2-indx1), dtype='float32') logger.debug(block.shape) for i, k in enumerate(range(indx1, indx2)): block[:, :, i] = dset_bck_us['%04d' % k] bck = nanmean(block, axis=2) #bck_err = np.sqrt(ss(block_err, axis=2)) / time_smooth backgrounds.create_dataset(dset_name, data=bck, chunks=imgchunks, **args) toc = default_timer() logger.info("Background smoothing: %f sec/image", (toc-tic)/numfiles) # Flush changes to the permanent HDF5 file: hdf.flush() # Delete the temporary HDF5 file again: if os.path.exists(tmp_hdf_file): os.remove(tmp_hdf_file) if len(images) < numfiles or len(wcs) < numfiles or 'sumimage' not in hdf: SumImage = np.zeros((img_shape[0], img_shape[1]), dtype='float64') time = np.empty(numfiles, dtype='float64') timecorr = np.empty(numfiles, dtype='float32') cadenceno = np.empty(numfiles, dtype='int32') quality = np.empty(numfiles, dtype='int32') # Save list of file paths to the HDF5 file: filenames = [os.path.basename(fname).rstrip('.gz').encode('ascii', 'strict') for fname in files] hdf.require_dataset('imagespaths', (numfiles,), data=filenames, dtype=h5py.special_dtype(vlen=bytes), **args) is_tess = False attributes = { 'DATA_REL': None, 'NUM_FRM': None, 'CRMITEN': None, 'CRBLKSZ': None, 'CRSPOC': None } for k, fname in enumerate(files): logger.debug("Processing image: %.2f%% - %s", 100*k/numfiles, fname) dset_name ='%04d' % k # Load the FITS file data and the header: flux0, hdr, flux0_err = load_ffi_fits(fname, return_header=True, return_uncert=True) # Check if this is real TESS data: # Could proberly be done more elegant, but if it works, it works... if not is_tess and hdr.get('TELESCOP') == 'TESS' and hdr.get('NAXIS1') == 2136 and hdr.get('NAXIS2') == 2078: is_tess = True # Pick out the important bits from the header: # Keep time in BTJD. If we want BJD we could # simply add BJDREFI + BJDREFF: time[k] = 0.5*(hdr['TSTART'] + hdr['TSTOP']) timecorr[k] = hdr.get('BARYCORR', 0) # Cadence-number is currently not in the FFIs. # The following numbers comes from unofficial communication # with Doug Caldwell and Roland Vanderspek: # The timestamp in TJD and the corresponding cadenceno: first_time = 0.5*(1325.317007851970 + 1325.337841177751) - 3.9072474E-03 first_cadenceno = 4697 timedelt = 1800/86400 # Extracpolate the cadenceno as a simple linear relation: offset = first_cadenceno - first_time/timedelt cadenceno[k] = np.round((time[k] - timecorr[k])/timedelt + offset) # Data quality flags: quality[k] = hdr.get('DQUALITY', 0) if k == 0: for key in attributes.keys(): attributes[key] = hdr.get(key) else: for key, value in attributes.items(): if hdr.get(key) != value: logger.error("%s is not constant!", key) #if hdr.get('SECTOR') != sector: # logger.error("Incorrect SECTOR: Catalog=%s, FITS=%s", sector, hdr.get('SECTOR')) if hdr.get('CAMERA') != camera or hdr.get('CCD') != ccd: logger.error("Incorrect CAMERA/CCD: FITS=(%s, %s)", hdr.get('CAMERA'), hdr.get('CCD')) if dset_name not in images: # Load background from HDF file and subtract background from image, # if the background has not already been subtracted: if not hdr.get('BACKAPP', False): flux0 -= backgrounds[dset_name] # Save image subtracted the background in HDF5 file: images.create_dataset(dset_name, data=flux0, chunks=imgchunks, **args) images_err.create_dataset(dset_name, data=flux0_err, chunks=imgchunks, **args) else: flux0 = np.asarray(images[dset_name]) # Save the World Coordinate System of each image: if dset_name not in wcs: dset = wcs.create_dataset(dset_name, (1,), dtype=h5py.special_dtype(vlen=bytes), **args) dset[0] = WCS(header=hdr).to_header_string(relax=True).strip().encode('ascii', 'strict') # Add together images for sum-image: if TESSQualityFlags.filter(quality[k]): replace(flux0, np.nan, 0) SumImage += flux0 SumImage /= numfiles # Save attributes for key, value in attributes.items(): logger.debug("Saving attribute %s = %s", key, value) images.attrs[key] = value # Set pixel offsets: if is_tess: images.attrs['PIXEL_OFFSET_ROW'] = 0 images.attrs['PIXEL_OFFSET_COLUMN'] = 44 else: images.attrs['PIXEL_OFFSET_ROW'] = 0 images.attrs['PIXEL_OFFSET_COLUMN'] = 0 # Add other arrays to HDF5 file: if 'time' in hdf: del hdf['time'] if 'timecorr' in hdf: del hdf['timecorr'] if 'sumimage' in hdf: del hdf['sumimage'] if 'cadenceno' in hdf: del hdf['cadenceno'] if 'quality' in hdf: del hdf['quality'] hdf.create_dataset('sumimage', data=SumImage, **args) hdf.create_dataset('time', data=time, **args) hdf.create_dataset('timecorr', data=timecorr, **args) hdf.create_dataset('cadenceno', data=cadenceno, **args) hdf.create_dataset('quality', data=quality, **args) hdf.flush() # Check that the time vector is sorted: if not np.all(hdf['time'][:-1] < hdf['time'][1:]): logger.error("Time vector is not sorted") return # Check that the sector reference time is within the timespan of the time vector: sector_reference_time_tjd = sector_reference_time - 2457000 if sector_reference_time_tjd < hdf['time'][0] or sector_reference_time_tjd > hdf['time'][-1]: logger.error("Sector reference time outside timespan of data") #return # Find the reference image: refindx = np.searchsorted(hdf['time'], sector_reference_time_tjd, side='left') if refindx > 0 and (refindx == len(hdf['time']) or abs(sector_reference_time_tjd - hdf['time'][refindx-1]) < abs(sector_reference_time_tjd - hdf['time'][refindx])): refindx -= 1 logger.info("WCS reference frame: %d", refindx) # Save WCS to the file: wcs.attrs['ref_frame'] = refindx if 'movement_kernel' not in hdf: # Calculate image motion: logger.info("Calculation Image Movement Kernels...") imk = ImageMovementKernel(image_ref=images['%04d' % refindx], warpmode='translation') kernel = np.empty((numfiles, imk.n_params), dtype='float64') tic = default_timer() if threads > 1: pool = multiprocessing.Pool(threads) datasets = _iterate_hdf_group(images) for k, knl in enumerate(pool.imap(imk.calc_kernel, datasets)): kernel[k, :] = knl logger.debug("Kernel: %s", knl) logger.debug("Estimate: %f sec/image", (default_timer()-tic)/(k+1)) pool.close() pool.join() else: for k, dset in enumerate(images): kernel[k, :] = imk.calc_kernel(images[dset]) logger.info("Kernel: %s", kernel[k, :]) logger.debug("Estimate: %f sec/image", (default_timer()-tic)/(k+1)) toc = default_timer() logger.info("Movement Kernel: %f sec/image", (toc-tic)/numfiles) # Save Image Motion Kernel to HDF5 file: dset = hdf.create_dataset('movement_kernel', data=kernel, **args) dset.attrs['warpmode'] = imk.warpmode dset.attrs['ref_frame'] = refindx logger.info("Done.") logger.info("Total: %f sec/image", (default_timer()-tic_total)/numfiles)