Exemple #1
0
	def _prepare_flux(self, flux):
		"""
		Preparation of images for Enhanced Correlation Coefficient (ECC) Maximization
		estimation of movement - used for estimation of jitter.

		Parameters:
			flux (array): flux pixel image

		Returns:
			array: Gradient (using Scharr method) of image in logarithmic units.

		.. codeauthor:: Mikkel N. Lund
		.. codeauthor:: Rasmus Handberg <*****@*****.**>
		"""

		# Convert to logarithmic units, avoiding taking log if zero:
		flux = np.asarray(flux)
		flux = np.log10(flux - np.nanmin(flux) + 1.0)

		# Convert image to flux in range -1 to 1 (for gradient determination)
		fmax = np.nanmax(flux)
		fmin = np.nanmin(flux)
		ran = np.abs(fmax - fmin)
		flux1 = -1 + 2*((flux - fmin)/ran)

		# Calculate Scharr gradient
		flux1 = scharr(flux1)

		# Remove potential NaNs in gradient image
		replace(flux1, np.NaN, 0)

		# Make sure image is in proper units for ECC routine
		return np.asarray(flux1, dtype='float32')
Exemple #2
0
    def smear(self, img):
        """CCD dark current and smear correction.

		TODO:
			 - Should we weight everything with the number of rows used in masked vs virtual regions?
			 - Should we take self.frametransfer_time into account?
			 - Cosmic ray rejection requires images before and after in time?
		"""
        self.logger.info("Doing smear correction...")

        # Remove cosmic rays in collateral data:
        # TODO: Can cosmic rays also show up in virtual pixels? If so, also include img.virtual_smear
        #index_collateral_cosmicrays = cosmic_rays(img.masked_smear)
        index_collateral_cosmicrays = np.zeros_like(img.masked_smear,
                                                    dtype='bool')
        img.masked_smear[index_collateral_cosmicrays] = np.nan

        # Average the masked and virtual smear across their rows:
        masked_smear = nanmedian(img.masked_smear, axis=0)
        virtual_smear = nanmedian(img.virtual_smear, axis=0)

        # Estimate dark current:
        # TODO: Should this be self.frametransfer_time?
        fdark = nanmedian(masked_smear - virtual_smear *
                          (self.exposure_time + self.readout_time) /
                          self.exposure_time)
        img.dark = fdark  # Save for later use
        self.logger.info('Dark current: %f', img.dark)
        if np.isnan(fdark):
            fdark = 0

        # Correct the smear regions for the dark current:
        masked_smear -= fdark
        virtual_smear -= fdark * (self.exposure_time +
                                  self.readout_time) / self.exposure_time

        # Weights from number of pixels in different regions:
        Nms = np.sum(~np.isnan(img.masked_smear), axis=0)
        Nvs = np.sum(~np.isnan(img.virtual_smear), axis=0)
        c_ms = Nms / np.maximum(Nms + Nvs, 1)
        c_vs = Nvs / np.maximum(Nms + Nvs, 1)

        # Weights as in Kepler where you only have one row in each sector:
        #g_ms = ~np.isnan(masked_smear)
        #g_vs = ~np.isnan(virtual_smear)
        #c_ms = g_ms/np.maximum(g_ms + g_vs, 1)
        #c_vs = g_vs/np.maximum(g_ms + g_vs, 1)

        # Estimate the smear for all columns, taking into account
        # that some columns could be missing:
        replace(masked_smear, np.nan, 0)
        replace(virtual_smear, np.nan, 0)
        fsmear = c_ms * masked_smear + c_vs * virtual_smear

        # Correct the science pixels for dark current and smear:
        img.target_data -= fdark
        for k, col in enumerate(img.collateral_columns):
            img.target_data[img.columns == col] -= fsmear[k]

        return img
Exemple #3
0
    def init(self, mode='random', assign=False):
        # Predefined assignment vector
        if assign:
            self.assignment = np.array(assign)
            self.cells_per_cluster = {}
            cl, cl_size = np.unique(assign, return_counts=True)
            for i in range(cl.size):
                bn.replace(self.assignment, cl[i], i)
                self.cells_per_cluster[i] = cl_size[i]
            self.parameters = self._init_cl_params('assign')
        elif mode == 'separate':
            self.assignment = np.arange(self.cells_total, dtype=int)
            self.cells_per_cluster = {i: 1 for i in range(self.cells_total)}
            self.parameters = self._init_cl_params(mode)
        # All cells in one cluster
        elif mode == 'together':
            self.assignment = np.zeros(self.cells_total, dtype=int)
            self.cells_per_cluster = {0: self.cells_total}
            self.parameters = self._init_cl_params(mode)
        # Complete random
        elif mode == 'random':
            self.assignment = np.random.randint(0,
                                                high=self.cells_total,
                                                size=self.cells_total)
            self.cells_per_cluster = {}
            cl, cl_size = np.unique(self.assignment, return_counts=True)
            for i in range(cl.size):
                bn.replace(self.assignment, cl[i], i)
                self.cells_per_cluster[i] = cl_size[i]
            self.parameters = self._init_cl_params(mode)
        else:
            raise TypeError(f'Unsupported Initialization: {mode}')

        self.init_DP_prior()
Exemple #4
0
def test_replace_view(dtype):
    array = np.arange(20, dtype=dtype)
    view = array[::2]

    bn.replace(view, 5, -1)
    assert view.min() == 0
    assert array.min() == 0
Exemple #5
0
def nanToZeros(matrix, value=0):
	'''change all nan-values in a given nD-array to 0'''
	#whereAreNaNs = np.isnan(matrix)
	matrix2 = deepcopy(matrix)
	#matrix2[whereAreNaNs] = 0
	bn.replace(matrix2, np.nan, value)
	return matrix2
Exemple #6
0
 def reorder_values_array(self, arr, variables):
     for col, var in enumerate(variables):
         if var.fix_order and len(var.values) < 1000:
             new_order = var.ordered_values(var.values)
             if new_order == var.values:
                 continue
             arr[:, col] += 1000
             for i, val in enumerate(var.values):
                 bn.replace(arr[:, col], 1000 + i, new_order.index(val))
             var.values = new_order
         delattr(var, "fix_order")
Exemple #7
0
 def reorder_values_array(self, arr, variables):
     for col, var in enumerate(variables):
         if var.fix_order and len(var.values) < 1000:
             new_order = var.ordered_values(var.values)
             if new_order == var.values:
                 continue
             arr[:, col] += 1000
             for i, val in enumerate(var.values):
                 bn.replace(arr[:, col], 1000 + i, new_order.index(val))
             var.values = new_order
         delattr(var, "fix_order")
Exemple #8
0
def sanitize_variable(valuemap,
                      values,
                      orig_values,
                      coltype,
                      coltype_kwargs,
                      domain_vars,
                      existing_var,
                      new_var_name,
                      data=None):
    if valuemap:
        # Map discrete data to ints
        def valuemap_index(val):
            try:
                return valuemap.index(val)
            except ValueError:
                return np.nan

        values = np.vectorize(valuemap_index, otypes=[float])(orig_values)
        coltype_kwargs.update(values=valuemap)

    if coltype is StringVariable:
        values = ['' if i is np.nan else i for i in orig_values]

    var = None
    if domain_vars is not None:
        if existing_var:
            # Use existing variable if available
            var = coltype.make(existing_var.strip(), **coltype_kwargs)
        else:
            # Never use existing for un-named variables
            var = coltype(new_var_name, **coltype_kwargs)

        # Reorder discrete values to match existing variable
        if var.is_discrete and not var.ordered:
            new_order, old_order = var.values, coltype_kwargs.get(
                'values', var.values)
            if new_order != old_order:
                offset = len(new_order)
                column = values if data.ndim > 1 else data
                column += offset
                for i, val in enumerate(var.values):
                    try:
                        oldval = old_order.index(val)
                    except ValueError:
                        continue
                    bn.replace(column, offset + oldval, new_order.index(val))

    if isinstance(var, TimeVariable) or coltype is TimeVariable:
        # Re-parse the values because only now after coltype.make call
        # above, variable var is the correct one
        _var = var if isinstance(var, TimeVariable) else TimeVariable('_')
        values = [_var.parse(i) for i in orig_values]

    return values, var
def test_replace_nan_int():
    "Test replace, int array, old=nan, new=0"
    a = np.arange(2*3*4).reshape(2, 3, 4)
    actual = a.copy()
    bn.replace(actual, np.nan, 0)
    desired = a.copy()
    msg = 'replace failed on int input looking for nans'
    assert_array_equal(actual, desired, err_msg=msg)
    actual = a.copy()
    bn.slow.replace(actual, np.nan, 0)
    msg = 'slow.replace failed on int input looking for nans'
    assert_array_equal(actual, desired, err_msg=msg)
Exemple #10
0
def test_replace_nan_int(dtype):
    """Test replace, int array, old=nan, new=0"""
    a = np.arange(2 * 3 * 4, dtype=dtype).reshape(2, 3, 4)
    actual = a.copy()
    bn.replace(actual, np.nan, 0)
    desired = a.copy()
    msg = "replace failed on int input looking for nans"
    assert_array_equal(actual, desired, err_msg=msg)
    actual = a.copy()
    bn.slow.replace(actual, np.nan, 0)
    msg = "slow.replace failed on int input looking for nans"
    assert_array_equal(actual, desired, err_msg=msg)
Exemple #11
0
 def reorder_values():
     new_order, old_order = \
         var.values, coltype_kwargs.get('values', var.values)
     if new_order != old_order:
         offset = len(new_order)
         column = values if data.ndim > 1 else data
         column += offset
         for _, val in enumerate(var.values):
             try:
                 oldval = old_order.index(val)
             except ValueError:
                 continue
             bn.replace(column, offset + oldval, new_order.index(val))
Exemple #12
0
 def reorder_values():
     new_order, old_order = \
         var.values, coltype_kwargs.get('values', var.values)
     if new_order != old_order:
         offset = len(new_order)
         column = values if data.ndim > 1 else data
         column += offset
         for _, val in enumerate(var.values):
             try:
                 oldval = old_order.index(val)
             except ValueError:
                 continue
             bn.replace(column, offset + oldval, new_order.index(val))
Exemple #13
0
def sanitize_variable(valuemap, values, orig_values, coltype, coltype_kwargs,
                      domain_vars, existing_var, new_var_name, data=None):
    if valuemap:
        # Map discrete data to ints
        def valuemap_index(val):
            try:
                return valuemap.index(val)
            except ValueError:
                return np.nan

        values = np.vectorize(valuemap_index, otypes=[float])(orig_values)
        coltype_kwargs.update(values=valuemap)

    if coltype is StringVariable:
        values = ['' if i is np.nan else i for i in orig_values]

    var = None
    if domain_vars is not None:
        if existing_var:
            # Use existing variable if available
            var = coltype.make(existing_var.strip(), **coltype_kwargs)
        else:
            # Never use existing for un-named variables
            var = coltype(new_var_name, **coltype_kwargs)

        # Reorder discrete values to match existing variable
        if var.is_discrete and not var.ordered:
            new_order, old_order = var.values, coltype_kwargs.get('values',
                                                                  var.values)
            if new_order != old_order:
                offset = len(new_order)
                column = values if data.ndim > 1 else data
                column += offset
                for i, val in enumerate(var.values):
                    try:
                        oldval = old_order.index(val)
                    except ValueError:
                        continue
                    bn.replace(column, offset + oldval, new_order.index(val))

    if isinstance(var, TimeVariable) or coltype is TimeVariable:
        # Re-parse the values because only now after coltype.make call
        # above, variable var is the correct one
        _var = var if isinstance(var, TimeVariable) else TimeVariable('_')
        values = [_var.parse(i) for i in orig_values]

    return values, var
Exemple #14
0
    def spike_sep(self):
        """
		Separate CBVs into a "slow" and a "spiky" component.

		This is done by filtering the deta and identifying outlier
		with a peak-finding algorithm.

		.. codeauthor:: Mikkel N. Lund <*****@*****.**>
		"""

        logger = logging.getLogger(__name__)
        logger.info('running CBV spike separation')
        logger.info('------------------------------------')

        if 'cbv-single-scale' in self.hdf and 'cbv-spike' in self.hdf:
            logger.info(
                'Separated CBVs for SECTOR=%d, CADENCE=%d, AREA=%d already calculated.',
                self.sector, self.cadence, self.cbv_area)
            return
        logger.info(
            'Computing CBV spike separation for SECTOR=%d, CADENCE=%d, AREA=%d...',
            self.sector, self.cadence, self.cbv_area)

        # Load initial CBV from "compute_CBV"
        cbv = self.hdf['cbv-ini']

        # padding window, just needs to be bigger than savgol filtering window
        wmir = 50

        # Initiate arrays for cleaned and spike CBVs
        cbv_new = np.zeros_like(cbv)
        cbv_spike = np.zeros_like(cbv)

        # Iterate over basis vectors
        xs = np.arange(0, cbv.shape[0] + 2 * wmir - 2)
        for j in range(cbv.shape[1]):

            # Pad ends for better peak detection at boundaries of data
            data0 = cbv[:, j]
            data0 = np.append(np.flip(data0[0:wmir])[:-1], data0)
            data0 = np.append(data0, np.flip(data0[-wmir::])[1:])
            data = data0.copy()

            # Iterate peak detection, with different savgol filter widths:
            for w in (31, 29, 27, 25, 23):
                # For savgol filter data must be continuous
                data2 = pchip_interpolate(xs[np.isfinite(data)],
                                          data[np.isfinite(data)], xs)

                # Smooth, filtered version of data, to use to identify "outliers", i.e., spikes
                y = savgol_filter(data2, w, 2, mode='constant')
                y2 = data2 - y

                # Run peak detection
                sigma = mad_to_sigma * nanmedian(np.abs(y2))
                peaks, properties = find_peaks(np.abs(y2),
                                               prominence=(3 * sigma, None),
                                               wlen=500)

                data[peaks] = np.nan

            # Interpolate CBVs where spike has been identified
            data = pchip_interpolate(xs[np.isfinite(data)],
                                     data[np.isfinite(data)], xs)

            # Remove padded ends and store in CBV matrices
            # Spike signal is difference between original data and data with masked spikes
            cbv_spike[:,
                      j] = data0[wmir - 1:-wmir + 1] - data[wmir - 1:-wmir + 1]
            replace(cbv_spike[:, j], np.nan, 0)

            cbv_new[:, j] = data[wmir - 1:-wmir + 1]

        # Save files
        self.hdf.create_dataset('cbv-single-scale', data=cbv_new)
        self.hdf.create_dataset('cbv-spike', data=cbv_spike)

        # Signal-to-Noise test (here only for plotting)
        indx_lowsnr = cbv_snr_test(cbv_new, self.threshold_snrtest)

        # Plot all the CBVs:
        fig, axes = plt.subplots(int(np.ceil(self.ncomponents / 2)),
                                 2,
                                 figsize=(12, 16))
        fig2, axes2 = plt.subplots(int(np.ceil(self.ncomponents / 2)),
                                   2,
                                   figsize=(12, 16))
        fig.subplots_adjust(wspace=0.23,
                            hspace=0.46,
                            left=0.08,
                            right=0.96,
                            top=0.94,
                            bottom=0.055)
        fig2.subplots_adjust(wspace=0.23,
                             hspace=0.46,
                             left=0.08,
                             right=0.96,
                             top=0.94,
                             bottom=0.055)

        axes = axes.flatten()
        axes2 = axes2.flatten()
        for k in range(cbv_new.shape[1]):
            if indx_lowsnr is not None and indx_lowsnr[k]:
                col = 'c'
            else:
                col = 'k'

            axes[k].plot(cbv_new[:, k], ls='-', color=col)
            axes[k].set_title(f'Basis Vector {k+1:d}')

            axes2[k].plot(cbv_spike[:, k], ls='-', color=col)
            axes2[k].set_title(f'Spike Basis Vector {k+1:d}')

        fig.savefig(
            os.path.join(
                self.cbv_plot_folder,
                f'cbvs-s{self.sector:04d}-c{self.cadence:04d}-a{self.cbv_area:d}.png'
            ))
        fig2.savefig(
            os.path.join(
                self.cbv_plot_folder,
                f'spike-cbvs-s{self.sector:04d}-c{self.cadence:04d}-a{self.cbv_area:d}.png'
            ))
        plt.close(fig)
        plt.close(fig2)
Exemple #15
0
    def stats(self, lmean=False, lmed=False, lskew=False, lvar=False,
              lstd=False, lcoefvar=False, lperc=False, p=0.95):
        """Calculate some statistics among every realisation.

        Each statistic is calculated node-wise along the complete number of
        realisations.

        Parameters
        ----------
        lmean : boolean, default False
            Calculate the mean.
        lmed : boolean, default False
            Calculate the median.
        lskew : boolean, default False
            Calculate skewness.
        lvar : boolean, default False
            Calculate the variance.
        lstd : boolean, default False
            Calculate the standard deviation.
        lcoefvar : boolean, default False
            Calculate the coefficient of variation.
        lperc : boolean, default False
            Calculate the percentile `100 * (1 - p)`.
        p : number, default 0.95
            Probability value.

        Returns
        -------
        retdict : dict of GridArr
            Dictionary containing one GridArr for each calculated statistic.

        See Also
        --------
        stats_area : same but considering a circular (and horizontal) area of
        a specified radius around a given point.

        """
        # check if the map files are already opened or not
        if isinstance(self.files[0], file):
            opened_files = True
        else:
            opened_files = False

        if lmean:
            meanmap = np.zeros(self.cells)
        if lmed:
            medmap = np.zeros(self.cells)
        if lskew:
            skewmap = np.zeros(self.cells)
        if lvar:
            varmap = np.zeros(self.cells)
        if lstd:
            stdmap = np.zeros(self.cells)
        if lcoefvar:
            coefvarmap = np.zeros(self.cells)
        if lperc:
            percmap = np.zeros((self.cells, 2))

        arr = np.zeros(self.nfiles)
        skip = True
        offset = os.SEEK_SET
        for cell in xrange(self.cells - self.header):
            for i, gridfile in enumerate(self.files):
                # deal with map files not open yet
                if opened_files:
                    grid = gridfile
                else:
                    grid = open(gridfile, 'rb')
                    grid.seek(offset)

                if skip:
                    skip_lines(grid, self.header)
                arr[i] = grid.readline()

            if not opened_files:
                offset = grid.tell()
                grid.close()

            skip = False
            # replace no data's with NaN
            bn.replace(arr, self.nodata, np.nan)
            if lmean:
                meanmap[cell] = bn.nanmean(arr)
            if lmed:
                medmap[cell] = bn.nanmedian(arr)
            if lskew:
                skewmap[cell] = pd.Series(arr).skew()
            if lvar:
                varmap[cell] = bn.nanvar(arr, ddof=1)
            if lstd:
                stdmap[cell] = bn.nanstd(arr, ddof=1)
            if lcoefvar:
                if lstd and lmean:
                    coefvarmap[cell] = stdmap[cell] / meanmap[cell] * 100
                else:
                    std = bn.nanstd(arr, ddof=1)
                    mean = bn.nanmean(arr)
                    coefvarmap[cell] = std / mean * 100
            if lperc:
                percmap[cell] = pd.Series(arr).quantile([(1 - p) / 2,
                                                         1 - (1 - p) / 2])

        retdict = dict()

        if lmean:
            meangrid = GridArr(name='meanmap', dx=self.dx, dy=self.dy,
                               dz=self.dz, nodata=self.nodata, val=meanmap)
            retdict['meanmap'] = meangrid
        if lmed:
            medgrid = GridArr(name='medianmap', dx=self.dx, dy=self.dy,
                              dz=self.dz, nodata=self.nodata, val=medmap)
            retdict['medianmap'] = medgrid
        if lskew:
            skewgrid = GridArr(name='skewmap', dx=self.dx, dy=self.dy,
                               dz=self.dz, nodata=self.nodata, val=skewmap)
            retdict['skewmap'] = skewgrid
        if lvar:
            vargrid = GridArr(name='varmap', dx=self.dx, dy=self.dy,
                              dz=self.dz, nodata=self.nodata, val=varmap)
            retdict['varmap'] = vargrid
        if lstd:
            stdgrid = GridArr(name='stdmap', dx=self.dx, dy=self.dy,
                              dz=self.dz, nodata=self.nodata, val=stdmap)
            retdict['stdmap'] = stdgrid
        if lcoefvar:
            coefvargrid = GridArr(name='coefvarmap', dx=self.dx, dy=self.dy,
                                  dz=self.dz, nodata=self.nodata,
                                  val=coefvarmap)
            retdict['coefvarmap'] = coefvargrid
        if lperc:
            percgrid = GridArr(name='percmap', dx=self.dx, dy=self.dy,
                               dz=self.dz, nodata=self.nodata, val=percmap)
            retdict['percmap'] = percgrid

        return retdict
Exemple #16
0
    def stats_area(self, loc, tol=0, lmean=False, lmed=False, lskew=False,
                   lvar=False, lstd=False, lcoefvar=False, lperc=False,
                   p=0.95, save=False):
        """Calculate some statistics among every realisation, considering a
        circular (only horizontaly) area of radius `tol` around the point
        located at `loc`.

        Parameters
        ----------
        loc : array_like
            Location of the vertical line [x, y].
        tol : number, default 0
            Tolerance radius used to search for neighbour nodes.
        lmean : boolean, default False
            Calculate the mean.
        lmed : boolean, default False
            Calculate the median.
        lskew : boolean, default False
            Calculate skewness.
        lvar : boolean, default False
            Calculate the variance.
        lstd : boolean, default False
            Calculate the standard deviation.
        lcoefvar : boolean, default False
            Calculate the coefficient of variation.
        lperc : boolean, default False
            Calculate the percentile `100 * (1 - p)`.
        p : number, default 0.95
            Probability value.
        save : boolean, default False
            Write the points used to calculate the chosen statistics in
            PointSet format to a file named 'sim values at (x, y, line).prn'.

        Returns
        -------
        statspset : PointSet
            PointSet instance containing the calculated statistics.

        .. TODO: checkar stats variance com geoms

        """
        if lmean:
            meanline = np.zeros(self.dz)
        if lmed:
            medline = np.zeros(self.dz)
        if lskew:
            skewline = np.zeros(self.dz)
        if lvar:
            varline = np.zeros(self.dz)
        if lstd:
            stdline = np.zeros(self.dz)
        if lcoefvar:
            coefvarline = np.zeros(self.dz)
        if lperc:
            percline = np.zeros((self.dz, 2))

        # convert the coordinates of the first point to grid nodes
        loc = coord_to_grid(loc, [self.cellx, self.celly, self.cellz],
                            [self.xi, self.yi, self.zi])[:2]
        # find the nodes coordinates within a circle centred in the first point
        neighbours_nodes = circle(loc[0], loc[1], tol)
        # compute the lines numbers for each point in the neighbourhood, across
        # each grid layer. this yields a N*M matrix, with N equal to the number
        # of neighbour nodes, and M equal to the number of layers in the grid.
        neighbours_lines = [line_zmirror(node, [self.dx, self.dy, self.dz])
                            for node in neighbours_nodes]
        # sort the lines in ascending order
        neighbours_lines = np.sort(neighbours_lines, axis=0)
        # create an array to store the neighbour nodes in each grid file
        nnodes = neighbours_lines.shape[0]
        arr = np.zeros(self.nfiles * nnodes)

        skip = True
        curr_line = np.zeros(self.nfiles)

        for layer in xrange(neighbours_lines.shape[1]):
            for i, line in enumerate(neighbours_lines[:, layer]):
                for j, grid in enumerate(self.files):
                    # skip header lines only once per grid file
                    if skip and self.header:
                        skip_lines(grid, self.header)

                    # advance to the next line with a neighbour node
                    skip_lines(grid, int(line - curr_line[j] - 1))
                    # read the line and store its value
                    a = grid.readline()
                    arr[i + j * nnodes] = float(a)

                    curr_line[j] = line
                    skip = False

            # replace no data's with NaN
            bn.replace(arr, self.nodata, np.nan)
            # compute the required statistics
            if lmean:
                meanline[layer] = bn.nanmean(arr)
            if lmed:
                medline[layer] = bn.nanmedian(arr)
            if lskew:
                skewline[layer] = pd.Series(arr).skew()
            if lvar:
                varline[layer] = bn.nanvar(arr, ddof=1)
            if lstd:
                stdline[layer] = bn.nanstd(arr, ddof=1)
            if lcoefvar:
                if lstd and lmean:
                    coefvarline[layer] = stdline[layer] / meanline[layer] * 100
                else:
                    std = bn.nanstd(arr, ddof=1)
                    mean = bn.nanmean(arr)
                    coefvarline[layer] = std / mean * 100
            if lperc:
                percline[layer] = pd.Series(arr).quantile([(1 - p) / 2,
                                                           1 - (1 - p) / 2])
            if save and tol == 0:
                # FIXME: not working with the tolerance feature
                # need to adjust the arrpset or cherry-pick arr
                arrpset = PointSet('realisations at location ({0}, {1}, {2})'.
                                   format(loc[0], loc[1], layer * self.cellz +
                                          self.zi), self.nodata, 3,
                                   ['x', 'y', 'value'],
                                   values=np.zeros((self.nfiles, 3)))
                arrout = os.path.join(os.path.dirname(self.files[0].name),
                                      'sim values at ({0}, {1}, {2}).prn'.format(
                                          loc[0], loc[1], layer * self.cellz
                                          + self.zi))
                arrpset.values.iloc[:, 2] = arr
                arrpset.values.iloc[:, :2] = np.repeat(np.array(loc)
                                                       [np.newaxis, :],
                                                       self.nfiles, axis=0)
                arrpset.save(arrout, header=True)

        ncols = sum((lmean, lmed, lvar, lstd, lcoefvar, lskew))
        if lperc:
            ncols += 2
        statspset = PointSet(name='vertical line stats at (x,y) = ({0},{1})'.
                             format(loc[0], loc[1]), nodata=self.nodata,
                             nvars=3 + ncols, varnames=['x', 'y', 'z'],
                             values=np.zeros((self.dz, 3 + ncols)))

        statspset.values.iloc[:, :3] = (np.column_stack
                                        (((np.repeat(np.array(loc)
                                                     [np.newaxis, :], self.dz,
                                                     axis=0)),
                                          np.arange(self.zi, self.zi +
                                                    self.cellz * self.dz))))

        j = 3
        if lmean:
            statspset.varnames.append('mean')
            statspset.values.iloc[:, j] = meanline
            j += 1
        if lmed:
            statspset.varnames.append('median')
            statspset.values.iloc[:, j] = medline
            j += 1
        if lskew:
            statspset.varnames.append('skewness')
            statspset.values.iloc[:, j] = skewline
            j += 1
        if lvar:
            statspset.varnames.append('variance')
            statspset.values.iloc[:, j] = varline
            j += 1
        if lstd:
            statspset.varnames.append('std')
            statspset.values.iloc[:, j] = stdline
            j += 1
        if lcoefvar:
            statspset.varnames.append('coefvar')
            statspset.values.iloc[:, j] = coefvarline
            j += 1
        if lperc:
            statspset.varnames.append('lperc')
            statspset.varnames.append('rperc')
            statspset.values.iloc[:, -2:] = percline

        # reset the reading pointer in each grid file
        self.reset_read()
        # update varnames
        statspset.flush_varnames()
        return statspset
Exemple #17
0
def test_replace_newaxis(dtype):
    array = np.ones((2, 2), dtype=dtype)[..., np.newaxis]
    result = bn.replace(array, 1, 2)
    assert (result == 2).all().all()
Exemple #18
0
def test_replace_bad_args():
    array = np.ones((10, 10))
    bad_vals = [None, "", [0], "0"]
    for bad_val in bad_vals:
        with pytest.raises(TypeError, match="`old` must be a number"):
            bn.replace(array, bad_val, 0)

        with pytest.raises(TypeError, match="`new` must be a number"):
            bn.replace(array, 0, bad_val)

    with pytest.raises(TypeError, match="Cannot find `a` keyword input"):
        bn.replace(foo=array)

    with pytest.raises(TypeError, match="Cannot find `old` keyword input"):
        bn.replace(a=array)

    with pytest.raises(TypeError, match="Cannot find `new` keyword input"):
        bn.replace(a=array, old=0)

    with pytest.raises(TypeError, match="wrong number of arguments 4"):
        bn.replace(array, 0)

    with pytest.raises(TypeError, match="wrong number of arguments 4"):
        bn.replace(array, 0, 0, 0)
Exemple #19
0
def prepare_photometry(input_folder=None,
                       sectors=None,
                       cameras=None,
                       ccds=None,
                       calc_movement_kernel=False,
                       backgrounds_pixels_threshold=0.5,
                       output_file=None):
    """
	Restructure individual FFI images (in FITS format) into
	a combined HDF5 file which is used in the photometry
	pipeline.

	In this process the background flux in each FFI is
	estimated using the `backgrounds.fit_background` function.

	Parameters:
		input_folder (string): Input folder to create TODO list for. If ``None``, the input directory in the environment variable ``TESSPHOT_INPUT`` is used.
		cameras (iterable of integers, optional): TESS camera number (1-4). If ``None``, all cameras will be processed.
		ccds (iterable of integers, optional): TESS CCD number (1-4). If ``None``, all cameras will be processed.
		calc_movement_kernel (boolean, optional): Should Image Movement Kernels be calculated for each image?
			If it is not calculated, only the default WCS movement kernel will be available when doing the folllowing photometry. Default=False.
		backgrounds_pixels_threshold (float): Percentage of times a pixel has to use used in background calculation in order to be included in the
			final list of contributing pixels. Default=0.5.
		output_file (string, optional): The file path where the output file should be saved.
			If not specified, the file will be saved into the input directory.
			Should only be used for testing, since the file would (proberly) otherwise end up with
			a wrong file name for running with the rest of the pipeline.

	Raises:
		NotADirectoryError: If the specified ``input_folder`` is not an existing directory or if settings table could not be loaded from the catalog SQLite file.

	.. codeauthor:: Rasmus Handberg <*****@*****.**>
	"""

    logger = logging.getLogger(__name__)
    tqdm_settings = {
        'disable': not logger.isEnabledFor(logging.INFO),
        'dynamic_ncols': True
    }

    # Check the input folder, and load the default if not provided:
    if input_folder is None:
        input_folder = os.environ.get(
            'TESSPHOT_INPUT',
            os.path.join(os.path.dirname(__file__), 'tests', 'input'))

    # Check that the given input directory is indeed a directory:
    if not os.path.isdir(input_folder):
        raise NotADirectoryError(
            "The given path does not exist or is not a directory")

    # Make sure cameras and ccds are iterable:
    cameras = (1, 2, 3, 4) if cameras is None else (cameras, )
    ccds = (1, 2, 3, 4) if ccds is None else (ccds, )

    # Common settings for HDF5 datasets:
    args = {'compression': 'lzf', 'shuffle': True, 'fletcher32': True}
    imgchunks = (64, 64)

    # If no sectors are provided, find all the available FFI files and figure out
    # which sectors they are all from:
    if sectors is None:
        sectors = []

        # TODO: Could we change this so we don't have to parse the filenames?
        for fname in find_ffi_files(input_folder):
            m = re.match(r'^tess.+-s(\d+)-.+\.fits', os.path.basename(fname))
            if int(m.group(1)) not in sectors:
                sectors.append(int(m.group(1)))

        # Also collect sectors from TPFs. They are needed for ensuring that
        # catalogs are available. Can be added directly to the sectors list,
        # since the HDF5 creation below will simply skip any sectors with
        # no FFIs available
        for fname in find_tpf_files(input_folder):
            m = re.match(r'^.+-s(\d+)[-_].+_tp\.fits', os.path.basename(fname))
            if int(m.group(1)) not in sectors:
                sectors.append(int(m.group(1)))

        logger.debug("Sectors found: %s", sectors)
    else:
        sectors = (sectors, )

    # Check if any sectors were found/provided:
    if not sectors:
        logger.error("No sectors were found")
        return

    # Make sure that catalog files are available in the input directory.
    # If they are not already, they will be downloaded from the cache:
    for sector, camera, ccd in itertools.product(sectors, cameras, ccds):
        download_catalogs(input_folder, sector, camera=camera, ccd=ccd)

    # Get the number of processes we can spawn in case it is needed for calculations:
    threads = int(
        os.environ.get('SLURM_CPUS_PER_TASK', multiprocessing.cpu_count()))
    logger.info("Using %d processes.", threads)

    # Start pool of workers:
    if threads > 1:
        pool = multiprocessing.Pool(threads)
        m = pool.imap
    else:
        m = map

    # Loop over each combination of camera and CCD:
    for sector, camera, ccd in itertools.product(sectors, cameras, ccds):
        logger.info("Running SECTOR=%s, CAMERA=%s, CCD=%s", sector, camera,
                    ccd)
        tic_total = default_timer()

        # Find all the FFI files associated with this camera and CCD:
        files = find_ffi_files(input_folder,
                               sector=sector,
                               camera=camera,
                               ccd=ccd)
        numfiles = len(files)
        logger.info("Number of files: %d", numfiles)
        if numfiles == 0:
            continue

        # Catalog file:
        catalog_file = find_catalog_files(input_folder,
                                          sector=sector,
                                          camera=camera,
                                          ccd=ccd)
        if len(catalog_file) != 1:
            logger.error(
                "Catalog file could not be found: SECTOR=%s, CAMERA=%s, CCD=%s",
                sector, camera, ccd)
            continue
        logger.debug("Catalog File: %s", catalog_file[0])

        # Load catalog settings from the SQLite database:
        with contextlib.closing(sqlite3.connect(catalog_file[0])) as conn:
            conn.row_factory = sqlite3.Row
            cursor = conn.cursor()
            cursor.execute(
                "SELECT sector,reference_time FROM settings LIMIT 1;")
            row = cursor.fetchone()
            if row is None:
                raise OSError("Settings could not be loaded from catalog")
            #sector = row['sector']
            sector_reference_time = row['reference_time']
            cursor.close()

        # HDF5 file to be created/modified:
        if output_file is None:
            hdf_file = os.path.join(
                input_folder, 'sector{0:03d}_camera{1:d}_ccd{2:d}.hdf5'.format(
                    sector, camera, ccd))
        else:
            output_file = os.path.abspath(output_file)
            if not output_file.endswith('.hdf5'):
                output_file = output_file + '.hdf5'
            hdf_file = output_file
        logger.debug("HDF5 File: %s", hdf_file)

        # Get image shape from the first file:
        img = load_ffi_fits(files[0])
        img_shape = img.shape

        # Open the HDF5 file for editing:
        with h5py.File(hdf_file, 'a', libver='latest') as hdf:

            images = hdf.require_group('images')
            images_err = hdf.require_group('images_err')
            backgrounds = hdf.require_group('backgrounds')
            pixel_flags = hdf.require_group('pixel_flags')
            if 'wcs' in hdf and isinstance(hdf['wcs'], h5py.Dataset):
                del hdf['wcs']
            wcs = hdf.require_group('wcs')
            time_smooth = backgrounds.attrs.get('time_smooth', 3)
            flux_cutoff = backgrounds.attrs.get('flux_cutoff', 8e4)
            bkgiters = backgrounds.attrs.get('bkgiters', 3)
            radial_cutoff = backgrounds.attrs.get('radial_cutoff', 2400)
            radial_pixel_step = backgrounds.attrs.get('radial_pixel_step', 15)
            radial_smooth = backgrounds.attrs.get('radial_smooth', 3)

            if len(backgrounds) < numfiles:
                # Because HDF5 is stupid, and it cant figure out how to delete data from
                # the file once it is in, we are creating another temp hdf5 file that
                # will hold thing we dont need in the final HDF5 file.
                tmp_hdf_file = hdf_file.replace('.hdf5', '.tmp.hdf5')
                with h5py.File(tmp_hdf_file, 'a', libver='latest') as hdftmp:
                    dset_bck_us = hdftmp.require_group(
                        'backgrounds_unsmoothed')

                    if len(pixel_flags) < numfiles:
                        logger.info('Calculating backgrounds...')

                        # Create wrapper function freezing some of the
                        # additional keyword inputs:
                        fit_background_wrapper = functools.partial(
                            fit_background,
                            flux_cutoff=flux_cutoff,
                            bkgiters=bkgiters,
                            radial_cutoff=radial_cutoff,
                            radial_pixel_step=radial_pixel_step,
                            radial_smooth=radial_smooth)

                        tic = default_timer()

                        last_bck_fit = -1 if len(pixel_flags) == 0 else int(
                            sorted(list(pixel_flags.keys()))[-1])
                        k = last_bck_fit + 1
                        for bck, mask in tqdm(m(fit_background_wrapper,
                                                files[k:]),
                                              initial=k,
                                              total=numfiles,
                                              **tqdm_settings):
                            dset_name = '%04d' % k
                            logger.debug("Background %d complete", k)
                            logger.debug("Estimate: %f sec/image",
                                         (default_timer() - tic) /
                                         (k - last_bck_fit))

                            dset_bck_us.create_dataset(dset_name, data=bck)

                            # If we ever defined pixel flags above 256, we have to change this to uint16
                            mask = np.asarray(np.where(
                                mask, PixelQualityFlags.NotUsedForBackground,
                                0),
                                              dtype='uint8')
                            pixel_flags.create_dataset(dset_name,
                                                       data=mask,
                                                       chunks=imgchunks,
                                                       **args)

                            k += 1

                        hdf.flush()
                        hdftmp.flush()
                        toc = default_timer()
                        logger.info("Background estimation: %f sec/image",
                                    (toc - tic) / (numfiles - last_bck_fit))

                    # Smooth the backgrounds along the time axis:
                    logger.info('Smoothing backgrounds in time...')
                    backgrounds.attrs['time_smooth'] = time_smooth
                    backgrounds.attrs['flux_cutoff'] = flux_cutoff
                    backgrounds.attrs['bkgiters'] = bkgiters
                    backgrounds.attrs['radial_cutoff'] = radial_cutoff
                    backgrounds.attrs['radial_pixel_step'] = radial_pixel_step
                    backgrounds.attrs['radial_smooth'] = radial_smooth
                    w = time_smooth // 2
                    tic = default_timer()
                    for k in trange(numfiles, **tqdm_settings):
                        dset_name = '%04d' % k
                        if dset_name in backgrounds: continue

                        indx1 = max(k - w, 0)
                        indx2 = min(k + w + 1, numfiles)
                        logger.debug("Smoothing background %d: %d -> %d", k,
                                     indx1, indx2)

                        block = np.empty(
                            (img_shape[0], img_shape[1], indx2 - indx1),
                            dtype='float32')
                        logger.debug(block.shape)
                        for i, k in enumerate(range(indx1, indx2)):
                            block[:, :, i] = dset_bck_us['%04d' % k]

                        bck = nanmean(block, axis=2)
                        #bck_err = np.sqrt(nansum(block_err**2, axis=2)) / time_smooth

                        backgrounds.create_dataset(dset_name,
                                                   data=bck,
                                                   chunks=imgchunks,
                                                   **args)

                    toc = default_timer()
                    logger.info("Background smoothing: %f sec/image",
                                (toc - tic) / numfiles)

                # Flush changes to the permanent HDF5 file:
                hdf.flush()

                # Delete the temporary HDF5 file again:
                if os.path.exists(tmp_hdf_file):
                    os.remove(tmp_hdf_file)

            if len(images) < numfiles or len(
                    wcs
            ) < numfiles or 'sumimage' not in hdf or 'backgrounds_pixels_used' not in hdf or 'time_start' not in hdf:
                SumImage = np.zeros((img_shape[0], img_shape[1]),
                                    dtype='float64')
                Nimg = np.zeros_like(SumImage, dtype='int32')
                time = np.empty(numfiles, dtype='float64')
                timecorr = np.empty(numfiles, dtype='float32')
                time_start = np.empty(numfiles, dtype='float64')
                time_stop = np.empty(numfiles, dtype='float64')
                cadenceno = np.empty(numfiles, dtype='int32')
                quality = np.empty(numfiles, dtype='int32')
                UsedInBackgrounds = np.zeros_like(SumImage, dtype='int32')

                # Save list of file paths to the HDF5 file:
                filenames = [
                    os.path.basename(fname).rstrip('.gz').encode(
                        'ascii', 'strict') for fname in files
                ]
                hdf.require_dataset('imagespaths', (numfiles, ),
                                    data=filenames,
                                    dtype=h5py.special_dtype(vlen=bytes),
                                    **args)

                is_tess = False
                attributes = {
                    'CAMERA': None,
                    'CCD': None,
                    'DATA_REL': None,
                    'NUM_FRM': None,
                    'NREADOUT': None,
                    'CRMITEN': None,
                    'CRBLKSZ': None,
                    'CRSPOC': None
                }
                logger.info('Final processing of individual images...')
                tic = default_timer()
                for k, fname in enumerate(tqdm(files, **tqdm_settings)):
                    dset_name = '%04d' % k

                    # Load the FITS file data and the header:
                    flux0, hdr, flux0_err = load_ffi_fits(fname,
                                                          return_header=True,
                                                          return_uncert=True)

                    # Check if this is real TESS data:
                    # Could proberly be done more elegant, but if it works, it works...
                    if not is_tess and hdr.get(
                            'TELESCOP') == 'TESS' and hdr.get(
                                'NAXIS1') == 2136 and hdr.get(
                                    'NAXIS2') == 2078:
                        is_tess = True

                    # Pick out the important bits from the header:
                    # Keep time in BTJD. If we want BJD we could
                    # simply add BJDREFI + BJDREFF:
                    time_start[k] = hdr['TSTART']
                    time_stop[k] = hdr['TSTOP']
                    time[k] = 0.5 * (hdr['TSTART'] + hdr['TSTOP'])
                    timecorr[k] = hdr.get('BARYCORR', 0)

                    # Get cadence-numbers from headers, if they are available.
                    # This header is not added before sector 6, so in that case
                    # we are doing a simple scaling of the timestamps.
                    if 'FFIINDEX' in hdr:
                        cadenceno[k] = hdr['FFIINDEX']
                    elif is_tess:
                        # The following numbers comes from unofficial communication
                        # with Doug Caldwell and Roland Vanderspek:
                        # The timestamp in TJD and the corresponding cadenceno:
                        first_time = 0.5 * (1325.317007851970 +
                                            1325.337841177751) - 3.9072474e-03
                        first_cadenceno = 4697
                        timedelt = 1800 / 86400
                        # Extracpolate the cadenceno as a simple linear relation:
                        offset = first_cadenceno - first_time / timedelt
                        cadenceno[k] = np.round((time[k] - timecorr[k]) /
                                                timedelt + offset)
                    else:
                        cadenceno[k] = k + 1

                    # Data quality flags:
                    quality[k] = hdr.get('DQUALITY', 0)

                    if k == 0:
                        for key in attributes.keys():
                            attributes[key] = hdr.get(key)
                    else:
                        for key, value in attributes.items():
                            if hdr.get(key) != value:
                                logger.error("%s is not constant!", key)

                    # Find pixels marked for manual exclude:
                    manexcl = pixel_manual_exclude(flux0, hdr)

                    # Add manual excludes to pixel flags:
                    if np.any(manexcl):
                        pixel_flags[dset_name][
                            manexcl] |= PixelQualityFlags.ManualExclude

                    if dset_name not in images:
                        # Mask out manually excluded data before saving:
                        flux0[manexcl] = np.nan
                        flux0_err[manexcl] = np.nan

                        # Load background from HDF file and subtract background from image,
                        # if the background has not already been subtracted:
                        if not hdr.get('BACKAPP', False):
                            flux0 -= backgrounds[dset_name]

                        # Save image subtracted the background in HDF5 file:
                        images.create_dataset(dset_name,
                                              data=flux0,
                                              chunks=imgchunks,
                                              **args)
                        images_err.create_dataset(dset_name,
                                                  data=flux0_err,
                                                  chunks=imgchunks,
                                                  **args)
                    else:
                        flux0 = np.asarray(images[dset_name])
                        flux0[manexcl] = np.nan

                    # Save the World Coordinate System of each image:
                    if dset_name not in wcs:
                        dset = wcs.create_dataset(
                            dset_name, (1, ),
                            dtype=h5py.special_dtype(vlen=bytes),
                            **args)
                        dset[0] = WCS(header=hdr).to_header_string(
                            relax=True).strip().encode('ascii', 'strict')

                    # Add together images for sum-image:
                    if TESSQualityFlags.filter(quality[k]):
                        Nimg += np.isfinite(flux0)
                        replace(flux0, np.nan, 0)
                        SumImage += flux0

                    # Add together the number of times each pixel was used in the background estimation:
                    UsedInBackgrounds += (
                        np.asarray(pixel_flags[dset_name])
                        & PixelQualityFlags.NotUsedForBackground == 0)

                # Normalize sumimage
                SumImage /= Nimg

                # Single boolean image indicating if the pixel was (on average) used in the background estimation:
                if 'backgrounds_pixels_used' not in hdf:
                    UsedInBackgrounds = (UsedInBackgrounds / numfiles >
                                         backgrounds_pixels_threshold)
                    dset_uibkg = hdf.create_dataset('backgrounds_pixels_used',
                                                    data=UsedInBackgrounds,
                                                    dtype='bool',
                                                    chunks=imgchunks,
                                                    **args)
                    dset_uibkg.attrs[
                        'threshold'] = backgrounds_pixels_threshold

                # Save attributes
                images.attrs['SECTOR'] = sector
                for key, value in attributes.items():
                    logger.debug("Saving attribute %s = %s", key, value)
                    images.attrs[key] = value

                # Set pixel offsets:
                if is_tess:
                    images.attrs['PIXEL_OFFSET_ROW'] = 0
                    images.attrs['PIXEL_OFFSET_COLUMN'] = 44
                else:
                    images.attrs['PIXEL_OFFSET_ROW'] = 0
                    images.attrs['PIXEL_OFFSET_COLUMN'] = 0

                # Add other arrays to HDF5 file:
                if 'time' in hdf: del hdf['time']
                if 'timecorr' in hdf: del hdf['timecorr']
                if 'time_start' in hdf: del hdf['time_start']
                if 'time_stop' in hdf: del hdf['time_stop']
                if 'sumimage' in hdf: del hdf['sumimage']
                if 'cadenceno' in hdf: del hdf['cadenceno']
                if 'quality' in hdf: del hdf['quality']
                hdf.create_dataset('sumimage', data=SumImage, **args)
                hdf.create_dataset('time', data=time, **args)
                hdf.create_dataset('timecorr', data=timecorr, **args)
                hdf.create_dataset('time_start', data=time_start, **args)
                hdf.create_dataset('time_stop', data=time_stop, **args)
                hdf.create_dataset('cadenceno', data=cadenceno, **args)
                hdf.create_dataset('quality', data=quality, **args)
                hdf.flush()

                logger.info("Individual image processing: %f sec/image",
                            (default_timer() - tic) / numfiles)
            else:
                # Extract things that are needed further down:
                SumImage = np.asarray(hdf['sumimage'])
                timecorr = np.asarray(hdf['timecorr'])
                time_start = np.asarray(hdf['time_start'])
                time_stop = np.asarray(hdf['time_stop'])
                quality = np.asarray(hdf['quality'])

            # Detections and flagging of Background Shenanigans:
            if pixel_flags.attrs.get('bkgshe_done', -1) < numfiles - 1:
                logger.info("Detecting background shenanigans...")
                tic_bkgshe = default_timer()

                # Load settings and create wrapper function with keywords set:
                bkgshe_threshold = pixel_flags.attrs.get(
                    'bkgshe_threshold', 40)
                pixel_flags.attrs['bkgshe_threshold'] = bkgshe_threshold
                pixel_background_shenanigans_wrapper = functools.partial(
                    pixel_background_shenanigans, SumImage=SumImage)

                tmp_hdf_file = hdf_file.replace('.hdf5', '.tmp.hdf5')
                with h5py.File(tmp_hdf_file, 'a', libver='latest') as hdftmp:
                    # Temporary dataset that will be used to store large array
                    # of background shenanigans indicator images:
                    pixel_flags_ind = hdftmp.require_dataset(
                        'pixel_flags_individual',
                        shape=(SumImage.shape[0], SumImage.shape[1], numfiles),
                        chunks=(SumImage.shape[0], SumImage.shape[1], 1),
                        dtype='float32')

                    # Run the background shenanigans extractor in parallel:
                    last_bkgshe = pixel_flags_ind.attrs.get('bkgshe_done', -1)
                    if last_bkgshe < numfiles - 1:
                        tic = default_timer()
                        k = last_bkgshe + 1
                        for bckshe in tqdm(m(
                                pixel_background_shenanigans_wrapper,
                                _iterate_hdf_group(images, start=k)),
                                           initial=k,
                                           total=numfiles,
                                           **tqdm_settings):
                            pixel_flags_ind[:, :, k] = bckshe
                            pixel_flags_ind.attrs['bkgshe_done'] = k
                            k += 1
                            hdftmp.flush()
                        logger.info("Background Shenanigans: %f sec/image",
                                    (default_timer() - tic) /
                                    (numfiles - last_bkgshe))

                    # Calculate the mean Background Shenanigans indicator:
                    if 'mean_shenanigans' not in hdftmp:
                        logger.info("Calculating mean shenanigans...")
                        tic = default_timer()

                        # Calculate robust mean by calculating the
                        # median in chunks and then taking the mean of them.
                        # This is to avoid loading the entire array into memory
                        mean_shenanigans = np.zeros_like(SumImage,
                                                         dtype='float64')
                        block = 25
                        indicies = list(range(numfiles))
                        np.random.seed(0)
                        np.random.shuffle(indicies)
                        mean_shenanigans_block = np.empty(
                            (SumImage.shape[0], SumImage.shape[1], block))
                        for k in trange(0, numfiles, block, **tqdm_settings):
                            # Take median of a random block of images:
                            for j, i in enumerate(indicies[k:k + block]):
                                mean_shenanigans_block[:, :,
                                                       j] = pixel_flags_ind[:, :,
                                                                            i]
                            bckshe = nanmedian(mean_shenanigans_block, axis=2)

                            # Add the median block to the mean image:
                            replace(bckshe, np.NaN, 0)
                            mean_shenanigans += bckshe
                        mean_shenanigans /= np.ceil(numfiles / block)
                        logger.info(
                            "Mean Background Shenanigans: %f sec/image",
                            (default_timer() - tic) / numfiles)

                        # Save the mean shenanigans to the HDF5 file:
                        hdftmp.create_dataset('mean_shenanigans',
                                              data=mean_shenanigans)
                    else:
                        mean_shenanigans = np.asarray(
                            hdftmp['mean_shenanigans'])

                    #msmax = max(np.abs(np.min(mean_shenanigans)), np.abs(np.max(mean_shenanigans)))
                    #fig = plt.figure()
                    #plot_image(mean_shenanigans, scale='linear', vmin=-msmax, vmax=msmax, cmap='coolwarm', make_cbar=True, xlabel=None, ylabel=None)
                    #fig.savefig('test.png', bbox_inches='tight')

                    logger.info("Setting background shenanigans...")
                    tic = default_timer()
                    for k in trange(numfiles, **tqdm_settings):
                        dset_name = '%04d' % k
                        bckshe = np.asarray(pixel_flags_ind[:, :, k])

                        #img = bckshe - mean_shenanigans
                        #img[np.abs(img) <= bkgshe_threshold/2] = 0
                        #fig = plt.figure(figsize=(8,9))
                        #ax = fig.add_subplot(111)
                        #plot_image(img, ax=ax, scale='linear', vmin=-bkgshe_threshold, vmax=bkgshe_threshold, xlabel=None, ylabel=None, cmap="RdBu_r", make_cbar=True)
                        #ax.set_xticks([])
                        #ax.set_yticks([])
                        #fig.savefig(dset_name + '.png', bbox_inches='tight')
                        #plt.close(fig)

                        # Create the mask as anything that significantly pops out
                        # (both positive and negative) in the image:
                        bckshe = np.abs(bckshe -
                                        mean_shenanigans) > bkgshe_threshold

                        # Clear any old flags:
                        indx = (np.asarray(pixel_flags[dset_name])
                                & PixelQualityFlags.BackgroundShenanigans != 0)
                        if np.any(indx):
                            pixel_flags[dset_name][
                                indx] -= PixelQualityFlags.BackgroundShenanigans

                        # Save the new flags to the permanent HDF5 file:
                        if np.any(bckshe):
                            pixel_flags[dset_name][
                                bckshe] |= PixelQualityFlags.BackgroundShenanigans

                        pixel_flags.attrs['bkgshe_done'] = k
                        hdf.flush()
                    logger.info("Setting Background Shenanigans: %f sec/image",
                                (default_timer() - tic) / numfiles)

                # Delete the temporary HDF5 file again:
                if os.path.exists(tmp_hdf_file):
                    os.remove(tmp_hdf_file)

                logger.info("Total Background Shenanigans: %f sec/image",
                            (default_timer() - tic_bkgshe) / numfiles)

            # Check that the time vector is sorted:
            if not np.all(hdf['time'][:-1] < hdf['time'][1:]):
                logger.error("Time vector is not sorted")
                return

            # Check that the sector reference time is within the timespan of the time vector:
            sector_reference_time_tjd = sector_reference_time - 2457000
            if sector_reference_time_tjd < hdf['time'][
                    0] or sector_reference_time_tjd > hdf['time'][-1]:
                logger.error("Sector reference time outside timespan of data")
                #return

            # Find the reference image:
            refindx = find_nearest(hdf['time'], sector_reference_time_tjd)
            logger.info("WCS reference frame: %d", refindx)

            # Save WCS to the file:
            wcs.attrs['ref_frame'] = refindx

            if calc_movement_kernel and 'movement_kernel' not in hdf:
                # Calculate image motion:
                logger.info("Calculation Image Movement Kernels...")
                imk = ImageMovementKernel(image_ref=images['%04d' % refindx],
                                          warpmode='translation')
                kernel = np.empty((numfiles, imk.n_params), dtype='float64')

                tic = default_timer()

                datasets = _iterate_hdf_group(images)
                for k, knl in enumerate(
                        tqdm(m(imk.calc_kernel, datasets), **tqdm_settings)):
                    kernel[k, :] = knl
                    logger.debug("Kernel: %s", knl)
                    logger.debug("Estimate: %f sec/image",
                                 (default_timer() - tic) / (k + 1))

                toc = default_timer()
                logger.info("Movement Kernel: %f sec/image",
                            (toc - tic) / numfiles)

                # Save Image Motion Kernel to HDF5 file:
                dset = hdf.create_dataset('movement_kernel',
                                          data=kernel,
                                          **args)
                dset.attrs['warpmode'] = imk.warpmode
                dset.attrs['ref_frame'] = refindx

            # Transfer quality flags from TPF files from the same CAMERA and CCD to the FFIs:
            if not hdf['quality'].attrs.get('TRANSFER_FROM_TPF', False):
                logger.info("Transfering QUALITY flags from TPFs to FFIs...")

                # Select (max) five random TPF targets from the given sector, camera and ccd:
                tpffiles = find_tpf_files(input_folder,
                                          sector=sector,
                                          camera=camera,
                                          ccd=ccd,
                                          findmax=5)
                if len(tpffiles) == 0:
                    logger.warning(
                        "No TPF files found for SECTOR=%d, CAMERA=%d, CCD=%d and quality flags could therefore not be propergated.",
                        sector, camera, ccd)
                else:
                    # Run through each of the found TPF files and build the quality column from them,
                    # by simply setting the flag if it is found in any of the files:
                    quality_tpf = np.zeros(numfiles, dtype='int32')
                    for tpffile in tpffiles:
                        quality_tpf |= quality_from_tpf(
                            tpffile, time_start - timecorr,
                            time_stop - timecorr)

                    # Inspect the differences with the the qualities set in
                    indx_diff = (quality | quality_tpf != quality)
                    logger.info("%d qualities will be updated (%.1f%%).",
                                np.sum(indx_diff),
                                100 * np.sum(indx_diff) / numfiles)

                    # New quality:
                    quality |= quality_tpf

                    # Update the quality column in the HDF5 file:
                    hdf['quality'][:] = quality
                    hdf['quality'].attrs['TRANSFER_FROM_TPF'] = True
                    hdf.flush()

        logger.info("Done.")
        logger.info("Total: %f sec/image",
                    (default_timer() - tic_total) / numfiles)

    # Close workers again:
    if threads > 1:
        pool.close()
        pool.join()
Exemple #20
0
 def time_replace(self, dtype, shape, order):
     bn.replace(self.arr, self.old, self.new)
Exemple #21
0
    def __init__(self, protobuf):

        # Load the protobuf file:
        pd = PixelData()
        ph = PixelHeader()
        with gzip.open(protobuf, 'rb') as fid:
            d = fid.read()
            pd.ParseFromString(d)
            ph.ParseFromString(d)

        #
        self.PixelHeader = ph
        self.camera = int(self.PixelHeader.camera_number)
        self.ccd = int(self.PixelHeader.ccd_number)

        #print(ph)
        #print(pd.target_data)
        #print(pd.collateral_data)

        # Store pixel data as 1D arrays:
        self.target_data = np.array(
            pd.target_data[1:],
            dtype='float64')  # FIXME: Why is there a leading zero?!
        self.collateral_data = np.array(
            pd.collateral_data[1:],
            dtype='float64')  # FIXME: Why is there a leading one?!

        # Replace missing data with NaN:
        replace(self.target_data, 0xFFFFFFFF, np.nan)
        replace(self.collateral_data, 0xFFFFFFFF, np.nan)

        # Properties which will be filled out later:
        self.dark = None

        # TODO: All the following is actually common to all dataset with same target_pixel_table_id
        # Find rows and columns on the 2D CCD matching the 1D pixel data:
        target_pixel_table_id = int(self.PixelHeader.target_pixel_table_id)
        target_pixel_table = etree.parse(
            'test_data/%04d-target-pixel-table.xml' %
            (target_pixel_table_id, )).getroot()

        Npixels = len(self.target_data)
        self.rows = np.zeros(Npixels, dtype='int32')
        self.columns = np.zeros(Npixels, dtype='int32')
        self.outputs = np.zeros(Npixels, dtype='str')
        for pixel in target_pixel_table.xpath(
                './ccd[@cameraNumber="%d"][@ccdNumber="%d"]/pixel' %
            (self.camera, self.ccd)):
            index = int(pixel.get('index')) - 1
            column = int(pixel.get('column'))
            self.rows[index] = int(pixel.get('row'))
            self.columns[index] = column
            # Figure out what CCD outputs each column corresponds to:
            if column >= 1581:
                self.outputs[index] = 'D'
            elif column >= 1069:
                self.outputs[index] = 'C'
            elif column >= 557:
                self.outputs[index] = 'B'
            elif column >= 45:
                self.outputs[index] = 'A'

        # Convert the row and column addresses to indicies in the flatfield and 2d black images:
        self.index_columns = self.columns - 1
        self.index_rows = 512 - self.rows  # FIXME: 2078 instead of 512

        #print(self.outputs)
        #print(self.rows, self.columns)
        #print(self.index_rows, self.index_columns)

        Ncollateral = len(self.collateral_data)
        collateral_rows = np.zeros(Ncollateral, dtype='int32')
        collateral_columns = np.zeros(Ncollateral, dtype='int32')

        collateral_pixel_table_id = int(
            self.PixelHeader.collateral_pixel_table_id)
        collateral_pixel_table = etree.parse(
            'test_data/%04d-collateral-pixel-table.xml' %
            (collateral_pixel_table_id, )).getroot()
        for pixel in collateral_pixel_table.xpath(
                './ccd[@cameraNumber="%d"][@ccdNumber="%d"]/pixel' %
            (self.camera, self.ccd)):
            index = int(pixel.get('index')) - 1
            collateral_rows[index] = int(pixel.get('row'))
            collateral_columns[index] = int(pixel.get('column'))

        unique_collateral_columns = np.unique(collateral_columns)
        Ncolcolumns = len(unique_collateral_columns)
        self.masked_smear = np.full((10, Ncolcolumns), np.nan, dtype='float64')
        self.virtual_smear = np.full((10, Ncolcolumns),
                                     np.nan,
                                     dtype='float64')
        for index, (row, column) in enumerate(
                zip(collateral_rows, collateral_columns)):
            index_column = np.where(column == unique_collateral_columns)[0]
            if column >= 2093 or column <= 44:
                # Virtual columns or Serial register columns
                pass
            elif row >= 2069:
                # Virtual rows
                index_row = (2078 - row)
                self.virtual_smear[index_row,
                                   index_column] = self.collateral_data[index]
            elif row >= 2059:
                # Smear rows
                index_row = (2068 - row)
                self.masked_smear[index_row,
                                  index_column] = self.collateral_data[index]
            elif row >= 2049:
                # Buffer rows
                pass
            else:
                print("Invalid collateral pixel: (%d,%d)" % (row, column))

        self.collateral_columns = unique_collateral_columns

        print(self.collateral_columns)
        print(self.masked_smear)
        print(self.virtual_smear)
Exemple #22
0
    def data_table(self, data, headers=None):
        """
        Return Orange.data.Table given rows of `headers` (iterable of iterable)
        and rows of `data` (iterable of iterable; if ``numpy.ndarray``, might
        as well **have it sorted column-major**, e.g. ``order='F'``).

        Basically, the idea of subclasses is to produce those two iterables,
        however they might.

        If `headers` is not provided, the header rows are extracted from `data`,
        assuming they precede it.
        """
        if not headers:
            headers, data = self.parse_headers(data)

        # Consider various header types (single-row, two-row, three-row, none)
        if 3 == len(headers):
            names, types, flags = map(list, headers)
        else:
            if 1 == len(headers):
                HEADER1_FLAG_SEP = '#'
                # First row format either:
                #   1) delimited column names
                #   2) -||- with type and flags prepended, separated by #,
                #      e.g. d#sex,c#age,cC#IQ
                _flags, names = zip(*[i.split(HEADER1_FLAG_SEP, 1) if HEADER1_FLAG_SEP in i else ('', i)
                                      for i in headers[0]])
                names = list(names)
            elif 2 == len(headers):
                names, _flags = map(list, headers)
            else:
                # Use heuristics for everything
                names, _flags = [], []
            types = [''.join(filter(str.isupper, flag)).lower() for flag in _flags]
            flags = [Flags.join(filter(str.islower, flag)) for flag in _flags]

        # Determine maximum row length
        rowlen = max(map(len, (names, types, flags)))

        def _equal_length(lst):
            lst.extend(['']*(rowlen - len(lst)))
            return lst

        # Ensure all data is of equal width in a column-contiguous array
        data = np.array([_equal_length(list(row)) for row in data if any(row)],
                        copy=False, dtype=object, order='F')

        # Data may actually be longer than headers were
        try:
            rowlen = data.shape[1]
        except IndexError:
            pass
        else:
            for lst in (names, types, flags):
                _equal_length(lst)

        NAMEGEN = namegen('Feature ', 1)
        Xcols, attrs = [], []
        Mcols, metas = [], []
        Ycols, clses = [], []
        Wcols = []

        # Rename variables if necessary
        # Reusing across files still works if both files have same duplicates
        name_counts = Counter(names)
        del name_counts[""]
        if len(name_counts) != len(names) and name_counts:
            uses = {name: 0 for name, count in name_counts.items() if count > 1}
            for i, name in enumerate(names):
                if name in uses:
                    uses[name] += 1
                    names[i] = "{}_{}".format(name, uses[name])

        # Iterate through the columns
        for col in range(rowlen):
            flag = Flags(Flags.split(flags[col]))
            if flag.i:
                continue

            type_flag = types and types[col].strip()
            try:
                orig_values = [np.nan if i in MISSING_VALUES else i
                               for i in (i.strip() for i in data[:, col])]
            except IndexError:
                # No data instances leads here
                orig_values = []
                # In this case, coltype could be anything. It's set as-is
                # only to satisfy test_table.TableTestCase.test_append
                coltype = DiscreteVariable

            coltype_kwargs = {}
            valuemap = []
            values = orig_values

            if type_flag in StringVariable.TYPE_HEADERS:
                coltype = StringVariable
            elif type_flag in ContinuousVariable.TYPE_HEADERS:
                coltype = ContinuousVariable
                try:
                    values = [float(i) for i in orig_values]
                except ValueError:
                    for row, num in enumerate(orig_values):
                        try:
                            float(num)
                        except ValueError:
                            break
                    raise ValueError('Non-continuous value in (1-based) '
                                     'line {}, column {}'.format(row + len(headers) + 1,
                                                                 col + 1))

            elif type_flag in TimeVariable.TYPE_HEADERS:
                coltype = TimeVariable

            elif (type_flag in DiscreteVariable.TYPE_HEADERS or
                  _RE_DISCRETE_LIST.match(type_flag)):
                if _RE_DISCRETE_LIST.match(type_flag):
                    valuemap = Flags.split(type_flag)
                    coltype_kwargs.update(ordered=True)
                else:
                    valuemap = sorted(set(orig_values) - {np.nan})

            else:
                # No known type specified, use heuristics
                is_discrete = is_discrete_values(orig_values)
                if is_discrete:
                    valuemap = sorted(is_discrete)
                else:
                    try:
                        values = [float(i) for i in orig_values]
                    except ValueError:
                        tvar = TimeVariable('_')
                        try:
                            values = [tvar.parse(i) for i in orig_values]
                        except ValueError:
                            coltype = StringVariable
                        else:
                            coltype = TimeVariable
                    else:
                        coltype = ContinuousVariable

            if valuemap:
                # Map discrete data to ints
                def valuemap_index(val):
                    try:
                        return valuemap.index(val)
                    except ValueError:
                        return np.nan

                values = np.vectorize(valuemap_index, otypes=[float])(orig_values)
                coltype = DiscreteVariable
                coltype_kwargs.update(values=valuemap)

            if coltype is StringVariable:
                values = ['' if i is np.nan else i
                          for i in orig_values]

            if flag.m or coltype is StringVariable:
                append_to = (Mcols, metas)
            elif flag.w:
                append_to = (Wcols, None)
            elif flag.c:
                append_to = (Ycols, clses)
            else:
                append_to = (Xcols, attrs)

            cols, domain_vars = append_to
            cols.append(col)
            var = None
            if domain_vars is not None:
                if names and names[col]:
                    # Use existing variable if available
                    var = coltype.make(names[col].strip(), **coltype_kwargs)
                else:
                    # Never use existing for un-named variables
                    var = coltype(next(NAMEGEN), **coltype_kwargs)
                var.attributes.update(flag.attributes)
                domain_vars.append(var)

                # Reorder discrete values to match existing variable
                if var.is_discrete and not var.ordered:
                    new_order, old_order = var.values, coltype_kwargs.get('values', var.values)
                    if new_order != old_order:
                        offset = len(new_order)
                        column = values if data.ndim > 1 else data
                        column += offset
                        for i, val in enumerate(var.values):
                            try:
                                oldval = old_order.index(val)
                            except ValueError:
                                continue
                            bn.replace(column, offset + oldval, new_order.index(val))

            if isinstance(var, TimeVariable) or coltype is TimeVariable:
                # Re-parse the values because only now after coltype.make call
                # above, variable var is the correct one
                _var = var if isinstance(var, TimeVariable) else TimeVariable('_')
                values = [_var.parse(i) for i in orig_values]

            # Write back the changed data. This is needeed to pass the
            # correct, converted values into Table.from_numpy below
            try:
                data[:, col] = values
            except IndexError:
                pass

        domain = Domain(attrs, clses, metas)

        if not data.size:
            return Table.from_domain(domain, 0)

        table = Table.from_numpy(domain,
                                 data[:, Xcols].astype(float, order='C'),
                                 data[:, Ycols].astype(float, order='C'),
                                 data[:, Mcols].astype(object, order='C'),
                                 data[:, Wcols].astype(float, order='C'))
        return table
Exemple #23
0
    def data_table(self, data, headers=None):
        """
        Return Orange.data.Table given rows of `headers` (iterable of iterable)
        and rows of `data` (iterable of iterable; if ``numpy.ndarray``, might
        as well **have it sorted column-major**, e.g. ``order='F'``).

        Basically, the idea of subclasses is to produce those two iterables,
        however they might.

        If `headers` is not provided, the header rows are extracted from `data`,
        assuming they precede it.
        """
        if not headers:
            headers, data = self.parse_headers(data)

        # Consider various header types (single-row, two-row, three-row, none)
        if 3 == len(headers):
            names, types, flags = map(list, headers)
        else:
            if 1 == len(headers):
                HEADER1_FLAG_SEP = '#'
                # First row format either:
                #   1) delimited column names
                #   2) -||- with type and flags prepended, separated by #,
                #      e.g. d#sex,c#age,cC#IQ
                _flags, names = zip(*[i.split(HEADER1_FLAG_SEP, 1) if HEADER1_FLAG_SEP in i else ('', i)
                                      for i in headers[0]])
                names = list(names)
            elif 2 == len(headers):
                names, _flags = map(list, headers)
            else:
                # Use heuristics for everything
                names, _flags = [], []
            types = [''.join(filter(str.isupper, flag)).lower() for flag in _flags]
            flags = [Flags.join(filter(str.islower, flag)) for flag in _flags]

        # Determine maximum row length
        rowlen = max(map(len, (names, types, flags)))

        def _equal_length(lst):
            lst.extend(['']*(rowlen - len(lst)))
            return lst

        # Ensure all data is of equal width in a column-contiguous array
        data = np.array([_equal_length(list(row)) for row in data if any(row)],
                        copy=False, dtype=object, order='F')

        # Data may actually be longer than headers were
        try: rowlen = data.shape[1]
        except IndexError: pass
        else:
            for lst in (names, types, flags):
                _equal_length(lst)

        NAMEGEN = namegen('Feature ', 1)
        Xcols, attrs = [], []
        Mcols, metas = [], []
        Ycols, clses = [], []
        Wcols = []

        # Iterate through the columns
        for col in range(rowlen):
            flag = Flags(Flags.split(flags[col]))
            if flag.i: continue

            type_flag = types and types[col].strip()
            try:
                orig_values = [np.nan if i in MISSING_VALUES else i
                               for i in (i.strip() for i in data[:, col])]
            except IndexError:
                # No data instances leads here
                orig_values = []
                # In this case, coltype could be anything. It's set as-is
                # only to satisfy test_table.TableTestCase.test_append
                coltype = DiscreteVariable

            coltype_kwargs = {}
            valuemap = []
            values = orig_values

            if type_flag in StringVariable.TYPE_HEADERS:
                coltype = StringVariable
            elif type_flag in ContinuousVariable.TYPE_HEADERS:
                coltype = ContinuousVariable
                try:
                    values = [float(i) for i in orig_values]
                except ValueError:
                    for row, num in enumerate(orig_values):
                        try: float(num)
                        except ValueError: break
                    raise ValueError('Non-continuous value in (1-based) '
                                     'line {}, column {}'.format(row + len(headers) + 1,
                                                                 col + 1))

            elif type_flag in TimeVariable.TYPE_HEADERS:
                coltype = TimeVariable

            elif (type_flag in DiscreteVariable.TYPE_HEADERS or
                  _RE_DISCRETE_LIST.match(type_flag)):
                if _RE_DISCRETE_LIST.match(type_flag):
                    valuemap = Flags.split(type_flag)
                    coltype_kwargs.update(ordered=True)
                else:
                    valuemap = sorted(set(orig_values) - {np.nan})

            else:
                # No known type specified, use heuristics
                is_discrete = is_discrete_values(orig_values)
                if is_discrete:
                    valuemap = sorted(is_discrete)
                else:
                    try: values = [float(i) for i in orig_values]
                    except ValueError:
                        tvar = TimeVariable('_')
                        try: values = [tvar.parse(i) for i in orig_values]
                        except ValueError:
                            coltype = StringVariable
                        else:
                            coltype = TimeVariable
                    else:
                        coltype = ContinuousVariable

            if valuemap:
                # Map discrete data to ints
                def valuemap_index(val):
                    try: return valuemap.index(val)
                    except ValueError: return np.nan

                values = np.vectorize(valuemap_index, otypes=[float])(orig_values)
                coltype = DiscreteVariable
                coltype_kwargs.update(values=valuemap)

            if coltype is StringVariable:
                values = ['' if i is np.nan else i
                          for i in orig_values]

            if flag.m or coltype is StringVariable:
                append_to = (Mcols, metas)
            elif flag.w:
                append_to = (Wcols, None)
            elif flag.c:
                append_to = (Ycols, clses)
            else:
                append_to = (Xcols, attrs)

            cols, domain_vars = append_to
            cols.append(col)
            if domain_vars is not None:
                if names and names[col]:
                    # Use existing variable if available
                    var = coltype.make(names[col].strip(), **coltype_kwargs)
                else:
                    # Never use existing for un-named variables
                    var = coltype(next(NAMEGEN), **coltype_kwargs)
                var.attributes.update(flag.attributes)
                domain_vars.append(var)

                # Reorder discrete values to match existing variable
                if var.is_discrete and not var.ordered:
                    new_order, old_order = var.values, coltype_kwargs.get('values', var.values)
                    if new_order != old_order:
                        offset = len(new_order)
                        column = values if data.ndim > 1 else data
                        column += offset
                        for i, val in enumerate(var.values):
                            try: oldval = old_order.index(val)
                            except ValueError: continue
                            bn.replace(column, offset + oldval, new_order.index(val))

            if coltype is TimeVariable:
                # Re-parse the values because only now after coltype.make call
                # above, variable var is the correct one
                values = [var.parse(i) for i in orig_values]

            # Write back the changed data. This is needeed to pass the
            # correct, converted values into Table.from_numpy below
            try: data[:, col] = values
            except IndexError: pass

        domain = Domain(attrs, clses, metas)

        if not data.size:
            return Table.from_domain(domain, 0)

        table = Table.from_numpy(domain,
                                 data[:, Xcols].astype(float, order='C'),
                                 data[:, Ycols].astype(float, order='C'),
                                 data[:, Mcols].astype(object, order='C'),
                                 data[:, Wcols].astype(float, order='C'))
        return table
Exemple #24
0
def create_hdf5(input_folder=None, cameras=None, ccds=None):
	"""
	Restructure individual FFI images (in FITS format) into
	a combined HDF5 file which is used in the photometry
	pipeline.

	In this process the background flux in each FFI is
	estimated using the `backgrounds.fit_background` function.

	Parameters:
		input_folder (string): Input folder to create TODO list for. If ``None``, the input directory in the environment variable ``TESSPHOT_INPUT`` is used.
		cameras (iterable of integers, optional): TESS camera number (1-4). If ``None``, all cameras will be processed.
		ccds (iterable of integers, optional): TESS CCD number (1-4). If ``None``, all cameras will be processed.

	Raises:
		IOError: If the specified ``input_folder`` is not an existing directory or if settings table could not be loaded from the catalog SQLite file.

	.. codeauthor:: Rasmus Handberg <*****@*****.**>
	"""

	logger = logging.getLogger(__name__)

	# Check the input folder, and load the default if not provided:
	if input_folder is None:
		input_folder = os.environ.get('TESSPHOT_INPUT', os.path.join(os.path.dirname(__file__), 'tests', 'input'))

	# Check that the given input directory is indeed a directory:
	if not os.path.isdir(input_folder):
		raise IOError("The given path does not exist or is not a directory")

	# Make sure cameras and ccds are iterable:
	cameras = (1, 2, 3, 4) if cameras is None else (cameras, )
	ccds = (1, 2, 3, 4) if ccds is None else (ccds, )

	# Common settings for HDF5 datasets:
	args = {
		'compression': 'lzf',
		'shuffle': True,
		'fletcher32': True
	}
	imgchunks = (64, 64)

	# Get the number of processes we can spawn in case it is needed for calculations:
	threads = int(os.environ.get('SLURM_CPUS_PER_TASK', multiprocessing.cpu_count()))
	logger.info("Using %d processes.", threads)

	# Loop over each combination of camera and CCD:
	for camera, ccd in itertools.product(cameras, ccds):
		logger.info("Running CAMERA=%s, CCD=%s", camera, ccd)
		tic_total = default_timer()

		# Find all the FFI files associated with this camera and CCD:
		files = find_ffi_files(input_folder, camera, ccd)
		numfiles = len(files)
		logger.info("Number of files: %d", numfiles)
		if numfiles == 0:
			continue

		# Catalog file:
		catalog_file = os.path.join(input_folder, 'catalog_camera{0:d}_ccd{1:d}.sqlite'.format(camera, ccd))
		logger.debug("Catalog File: %s", catalog_file)
		if not os.path.exists(catalog_file):
			logger.error("Catalog file could not be found: '%s'", catalog_file)
			continue

		# Load catalog settings from the SQLite database:
		conn = sqlite3.connect(catalog_file)
		conn.row_factory = sqlite3.Row
		cursor = conn.cursor()
		cursor.execute("SELECT sector,reference_time FROM settings LIMIT 1;")
		row = cursor.fetchone()
		if row is None:
			raise IOError("Settings could not be loaded from catalog")
		#sector = row['sector']
		sector_reference_time = row['reference_time']
		cursor.close()
		conn.close()

		# HDF5 file to be created/modified:
		hdf_file = os.path.join(input_folder, 'camera{0:d}_ccd{1:d}.hdf5'.format(camera, ccd))
		logger.debug("HDF5 File: %s", hdf_file)

		# Get image shape from the first file:
		img = load_ffi_fits(files[0])
		img_shape = img.shape

		# Open the HDF5 file for editing:
		with h5py.File(hdf_file, 'a', libver='latest') as hdf:

			images = hdf.require_group('images')
			images_err = hdf.require_group('images_err')
			backgrounds = hdf.require_group('backgrounds')
			masks = hdf.require_group('backgrounds_masks')
			if 'wcs' in hdf and isinstance(hdf['wcs'], h5py.Dataset): del hdf['wcs']
			wcs = hdf.require_group('wcs')
			time_smooth = backgrounds.attrs.get('time_smooth', 3)

			if len(backgrounds) < numfiles:
				# Because HDF5 is stupid, and it cant figure out how to delete data from
				# the file once it is in, we are creating another temp hdf5 file that
				# will hold thing we dont need in the final HDF5 file.
				tmp_hdf_file = hdf_file.replace('.hdf5', '.tmp.hdf5')
				with h5py.File(tmp_hdf_file, 'a', libver='latest') as hdftmp:
					dset_bck_us = hdftmp.require_group('backgrounds_unsmoothed')

					if len(masks) < numfiles:

						tic = default_timer()
						if threads > 1:
							pool = multiprocessing.Pool(threads)
							m = pool.imap
						else:
							m = map

						last_bck_fit = -1 if len(masks) == 0 else int(sorted(list(masks.keys()))[-1])
						k = last_bck_fit+1
						for bck, mask in m(fit_background, files[k:]):
							dset_name = '%04d' % k
							logger.debug("Background %d complete", k)
							logger.debug("Estimate: %f sec/image", (default_timer()-tic)/(k-last_bck_fit))

							dset_bck_us.create_dataset(dset_name, data=bck)

							indicies = np.asarray(np.nonzero(mask), dtype='uint16')
							masks.create_dataset(dset_name, data=indicies, **args)

							k += 1

						if threads > 1:
							pool.close()
							pool.join()

						hdf.flush()
						hdftmp.flush()
						toc = default_timer()
						logger.info("Background estimation: %f sec/image", (toc-tic)/(numfiles-last_bck_fit))

					# Smooth the backgrounds along the time axis:
					backgrounds.attrs['time_smooth'] = time_smooth
					w = time_smooth//2
					tic = default_timer()
					for k in range(numfiles):
						dset_name = '%04d' % k
						if dset_name in backgrounds: continue

						indx1 = max(k-w, 0)
						indx2 = min(k+w+1, numfiles)
						logger.debug("Smoothing background %d: %d -> %d", k, indx1, indx2)

						block = np.empty((img_shape[0], img_shape[1], indx2-indx1), dtype='float32')
						logger.debug(block.shape)
						for i, k in enumerate(range(indx1, indx2)):
							block[:, :, i] = dset_bck_us['%04d' % k]

						bck = nanmean(block, axis=2)
						#bck_err = np.sqrt(ss(block_err, axis=2)) / time_smooth

						backgrounds.create_dataset(dset_name, data=bck, chunks=imgchunks, **args)

					toc = default_timer()
					logger.info("Background smoothing: %f sec/image", (toc-tic)/numfiles)

				# Flush changes to the permanent HDF5 file:
				hdf.flush()

				# Delete the temporary HDF5 file again:
				if os.path.exists(tmp_hdf_file):
					os.remove(tmp_hdf_file)


			if len(images) < numfiles or len(wcs) < numfiles or 'sumimage' not in hdf:
				SumImage = np.zeros((img_shape[0], img_shape[1]), dtype='float64')
				time = np.empty(numfiles, dtype='float64')
				timecorr = np.empty(numfiles, dtype='float32')
				cadenceno = np.empty(numfiles, dtype='int32')
				quality = np.empty(numfiles, dtype='int32')

				# Save list of file paths to the HDF5 file:
				filenames = [os.path.basename(fname).rstrip('.gz').encode('ascii', 'strict') for fname in files]
				hdf.require_dataset('imagespaths', (numfiles,), data=filenames, dtype=h5py.special_dtype(vlen=bytes), **args)

				is_tess = False
				attributes = {
					'DATA_REL': None,
					'NUM_FRM': None,
					'CRMITEN': None,
					'CRBLKSZ': None,
					'CRSPOC': None
				}
				for k, fname in enumerate(files):
					logger.debug("Processing image: %.2f%% - %s", 100*k/numfiles, fname)
					dset_name ='%04d' % k

					# Load the FITS file data and the header:
					flux0, hdr, flux0_err = load_ffi_fits(fname, return_header=True, return_uncert=True)

					# Check if this is real TESS data:
					# Could proberly be done more elegant, but if it works, it works...
					if not is_tess and hdr.get('TELESCOP') == 'TESS' and hdr.get('NAXIS1') == 2136 and hdr.get('NAXIS2') == 2078:
						is_tess = True

					# Pick out the important bits from the header:
					# Keep time in BTJD. If we want BJD we could
					# simply add BJDREFI + BJDREFF:
					time[k] = 0.5*(hdr['TSTART'] + hdr['TSTOP'])
					timecorr[k] = hdr.get('BARYCORR', 0)

					# Cadence-number is currently not in the FFIs.
					# The following numbers comes from unofficial communication
					# with Doug Caldwell and Roland Vanderspek:
					# The timestamp in TJD and the corresponding cadenceno:
					first_time = 0.5*(1325.317007851970 + 1325.337841177751) - 3.9072474E-03
					first_cadenceno = 4697
					timedelt = 1800/86400
					# Extracpolate the cadenceno as a simple linear relation:
					offset = first_cadenceno - first_time/timedelt
					cadenceno[k] = np.round((time[k] - timecorr[k])/timedelt + offset)

					# Data quality flags:
					quality[k] = hdr.get('DQUALITY', 0)

					if k == 0:
						for key in attributes.keys():
							attributes[key] = hdr.get(key)
					else:
						for key, value in attributes.items():
							if hdr.get(key) != value:
								logger.error("%s is not constant!", key)

					#if hdr.get('SECTOR') != sector:
					#	logger.error("Incorrect SECTOR: Catalog=%s, FITS=%s", sector, hdr.get('SECTOR'))
					if hdr.get('CAMERA') != camera or hdr.get('CCD') != ccd:
						logger.error("Incorrect CAMERA/CCD: FITS=(%s, %s)", hdr.get('CAMERA'), hdr.get('CCD'))

					if dset_name not in images:
						# Load background from HDF file and subtract background from image,
						# if the background has not already been subtracted:
						if not hdr.get('BACKAPP', False):
							flux0 -= backgrounds[dset_name]

						# Save image subtracted the background in HDF5 file:
						images.create_dataset(dset_name, data=flux0, chunks=imgchunks, **args)
						images_err.create_dataset(dset_name, data=flux0_err, chunks=imgchunks, **args)
					else:
						flux0 = np.asarray(images[dset_name])

					# Save the World Coordinate System of each image:
					if dset_name not in wcs:
						dset = wcs.create_dataset(dset_name, (1,), dtype=h5py.special_dtype(vlen=bytes), **args)
						dset[0] = WCS(header=hdr).to_header_string(relax=True).strip().encode('ascii', 'strict')

					# Add together images for sum-image:
					if TESSQualityFlags.filter(quality[k]):
						replace(flux0, np.nan, 0)
						SumImage += flux0

				SumImage /= numfiles

				# Save attributes
				for key, value in attributes.items():
					logger.debug("Saving attribute %s = %s", key, value)
					images.attrs[key] = value

				# Set pixel offsets:
				if is_tess:
					images.attrs['PIXEL_OFFSET_ROW'] = 0
					images.attrs['PIXEL_OFFSET_COLUMN'] = 44
				else:
					images.attrs['PIXEL_OFFSET_ROW'] = 0
					images.attrs['PIXEL_OFFSET_COLUMN'] = 0

				# Add other arrays to HDF5 file:
				if 'time' in hdf: del hdf['time']
				if 'timecorr' in hdf: del hdf['timecorr']
				if 'sumimage' in hdf: del hdf['sumimage']
				if 'cadenceno' in hdf: del hdf['cadenceno']
				if 'quality' in hdf: del hdf['quality']
				hdf.create_dataset('sumimage', data=SumImage, **args)
				hdf.create_dataset('time', data=time, **args)
				hdf.create_dataset('timecorr', data=timecorr, **args)
				hdf.create_dataset('cadenceno', data=cadenceno, **args)
				hdf.create_dataset('quality', data=quality, **args)
				hdf.flush()

			# Check that the time vector is sorted:
			if not np.all(hdf['time'][:-1] < hdf['time'][1:]):
				logger.error("Time vector is not sorted")
				return

			# Check that the sector reference time is within the timespan of the time vector:
			sector_reference_time_tjd = sector_reference_time - 2457000
			if sector_reference_time_tjd < hdf['time'][0] or sector_reference_time_tjd > hdf['time'][-1]:
				logger.error("Sector reference time outside timespan of data")
				#return

			# Find the reference image:
			refindx = np.searchsorted(hdf['time'], sector_reference_time_tjd, side='left')
			if refindx > 0 and (refindx == len(hdf['time']) or abs(sector_reference_time_tjd - hdf['time'][refindx-1]) < abs(sector_reference_time_tjd - hdf['time'][refindx])):
				refindx -= 1
			logger.info("WCS reference frame: %d", refindx)

			# Save WCS to the file:
			wcs.attrs['ref_frame'] = refindx

			if 'movement_kernel' not in hdf:
				# Calculate image motion:
				logger.info("Calculation Image Movement Kernels...")
				imk = ImageMovementKernel(image_ref=images['%04d' % refindx], warpmode='translation')
				kernel = np.empty((numfiles, imk.n_params), dtype='float64')

				tic = default_timer()
				if threads > 1:
					pool = multiprocessing.Pool(threads)

					datasets = _iterate_hdf_group(images)
					for k, knl in enumerate(pool.imap(imk.calc_kernel, datasets)):
						kernel[k, :] = knl
						logger.debug("Kernel: %s", knl)
						logger.debug("Estimate: %f sec/image", (default_timer()-tic)/(k+1))

					pool.close()
					pool.join()
				else:
					for k, dset in enumerate(images):
						kernel[k, :] = imk.calc_kernel(images[dset])
						logger.info("Kernel: %s", kernel[k, :])
						logger.debug("Estimate: %f sec/image", (default_timer()-tic)/(k+1))

				toc = default_timer()
				logger.info("Movement Kernel: %f sec/image", (toc-tic)/numfiles)

				# Save Image Motion Kernel to HDF5 file:
				dset = hdf.create_dataset('movement_kernel', data=kernel, **args)
				dset.attrs['warpmode'] = imk.warpmode
				dset.attrs['ref_frame'] = refindx

		logger.info("Done.")
		logger.info("Total: %f sec/image", (default_timer()-tic_total)/numfiles)