Example #1
def test_memory_leak() -> None:
    import resource

    arr = np.arange(1).reshape((1, 1))

    n_attempts = 3
    results = []

    for _ in range(n_attempts):
        starting = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

        for _ in range(1000):
            for axis in [None, 0, 1]:
                bn.nansum(arr, axis=axis)
                bn.nanargmax(arr, axis=axis)
                bn.nanargmin(arr, axis=axis)
                bn.nanmedian(arr, axis=axis)
                bn.nansum(arr, axis=axis)
                bn.nanmean(arr, axis=axis)
                bn.nanmin(arr, axis=axis)
                bn.nanmax(arr, axis=axis)
                bn.nanvar(arr, axis=axis)

        ending = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

        diff = ending - starting
        diff_bytes = diff * resource.getpagesize()
        # For 1.3.0 release, this had value of ~100kB
        if diff_bytes:

    assert len(results) < n_attempts
Example #2
def mad(data, sigma=True, axis=None, force=False, medval=0.0):
    Return the median absolute deviation (or the absolute deviation
    about a fixed value - default zero - if force is set to True). By
    default returns the equivalent sigma. Axis functionality adapted
    from https://github.com/keflavich/agpy/blob/master/agpy/mad.py
    Flips nans to True (default false) to use with nans.

    data : np.ndarray
        Data set.
    sigma : bool, optional
        Enables std estimation from MAD.
    axis : {int, None}, optional
        Axis to evaluate MAD along.
    force : bool, optional
        Force the median to be a given value.
    medval : float, optional
        Forced median value.

    mad : float
        MAD estimation. If sigma is True, MAD*1.4826 is returned.
    # Check for nans in the data
    nans = False
    if np.isnan(data).any():
        nans = True

    if axis > 0:
        if force:
            med = medval
            if nans:
                med = nanmedian(data.swapaxes(0, axis), axis=0)
                med = np.median(data.swapaxes(0, axis), axis=0)
        if nans:
            mad = nanmedian(np.abs(data.swapaxes(0, axis) - med), axis=0)
            mad = np.median(np.abs(data.swapaxes(0, axis) - med), axis=0)
        if force:
            med = medval
            if nans:
                med = nanmedian(data, axis=axis)
                med = np.median(data, axis=axis)
        if nans:
            mad = nanmedian(np.abs(data - med), axis=axis)
            mad = np.median(np.abs(data - med), axis=axis)

    if not sigma:
        return mad
        return mad * 1.4826
Example #3
def theil_sen(x, y, n_samples=100000):
	Computes the Theil-Sen estimator for 2D data.

	This complexity is O(n**2), which can be poor for large n. We will perform a sampling
	of data points to get an unbiased, but larger variance estimator.
	The sampling will be done by picking two points at random, and computing the slope,
	up to n_samples times.

		x (ndarray): 1-d np array, the control variate.
		y (ndarray): 1-d np.array, the ind variate.
		n_samples (int): how many points to sample.
    if x.shape[0] != y.shape[0]:
        raise ValueError("x and y must be the same shape.")
    n = x.shape[0]

    i1 = np.random.randint(0, n, n_samples)
    i2 = np.random.randint(0, n, n_samples)
    slopes = _slope(x[i1], x[i2], y[i1], y[i2])

    slope_ = nanmedian(slopes)
    #find the optimal b as the median of y_i - slope*x_i
    intercepts = np.empty(n, dtype='float64')
    for i in range(n):
        intercepts[i] = y[i] - slope_ * x[i]
    intercept_ = nanmedian(intercepts)

    return np.array([slope_, intercept_])
Example #4
File: mad.py Project: wiai/agpy
def bottleneck_MAD(arr, c=0.6745, axis=None):
    Median Absolute Deviation along given axis of an array:

    median(abs(a - median(a))) / c

    c = 0.6745 is the constant to convert from MAD to std; it is used by


    from bottleneck import nanmedian
    import numpy as np

    if not arr.dtype.isnative:
        kind = str(arr.dtype.kind)
        sz = str(arr.dtype.itemsize)
        dt = '=' + kind + sz
        data = arr.astype(dt)
        data = arr

    if data.ndim == 1:
        d = nanmedian(data)
        m = nanmedian(ma.fabs(data - d) / c)
        d = nanmedian(data, axis=axis)
        if axis > 0:
            aswp = np.swapaxes(data, 0, axis)
            aswp = data
        m = nanmedian(ma.fabs(aswp - d) / c, axis=0)

    return m
Example #5
    def smear(self, img):
        """CCD dark current and smear correction.

			 - Should we weight everything with the number of rows used in masked vs virtual regions?
			 - Should we take self.frametransfer_time into account?
			 - Cosmic ray rejection requires images before and after in time?
        self.logger.info("Doing smear correction...")

        # Remove cosmic rays in collateral data:
        # TODO: Can cosmic rays also show up in virtual pixels? If so, also include img.virtual_smear
        #index_collateral_cosmicrays = cosmic_rays(img.masked_smear)
        index_collateral_cosmicrays = np.zeros_like(img.masked_smear,
        img.masked_smear[index_collateral_cosmicrays] = np.nan

        # Average the masked and virtual smear across their rows:
        masked_smear = nanmedian(img.masked_smear, axis=0)
        virtual_smear = nanmedian(img.virtual_smear, axis=0)

        # Estimate dark current:
        # TODO: Should this be self.frametransfer_time?
        fdark = nanmedian(masked_smear - virtual_smear *
                          (self.exposure_time + self.readout_time) /
        img.dark = fdark  # Save for later use
        self.logger.info('Dark current: %f', img.dark)
        if np.isnan(fdark):
            fdark = 0

        # Correct the smear regions for the dark current:
        masked_smear -= fdark
        virtual_smear -= fdark * (self.exposure_time +
                                  self.readout_time) / self.exposure_time

        # Weights from number of pixels in different regions:
        Nms = np.sum(~np.isnan(img.masked_smear), axis=0)
        Nvs = np.sum(~np.isnan(img.virtual_smear), axis=0)
        c_ms = Nms / np.maximum(Nms + Nvs, 1)
        c_vs = Nvs / np.maximum(Nms + Nvs, 1)

        # Weights as in Kepler where you only have one row in each sector:
        #g_ms = ~np.isnan(masked_smear)
        #g_vs = ~np.isnan(virtual_smear)
        #c_ms = g_ms/np.maximum(g_ms + g_vs, 1)
        #c_vs = g_vs/np.maximum(g_ms + g_vs, 1)

        # Estimate the smear for all columns, taking into account
        # that some columns could be missing:
        replace(masked_smear, np.nan, 0)
        replace(virtual_smear, np.nan, 0)
        fsmear = c_ms * masked_smear + c_vs * virtual_smear

        # Correct the science pixels for dark current and smear:
        img.target_data -= fdark
        for k, col in enumerate(img.collateral_columns):
            img.target_data[img.columns == col] -= fsmear[k]

        return img
Example #6
File: mad.py Project: Fade89/agpy
def bottleneck_MAD(arr, c=0.6745, axis=None):
    Median Absolute Deviation along given axis of an array:

    median(abs(a - median(a))) / c

    c = 0.6745 is the constant to convert from MAD to std; it is used by


    from bottleneck import nanmedian
    import numpy as np

    if not arr.dtype.isnative:
        kind = str(arr.dtype.kind)
        sz = str(arr.dtype.itemsize)
        dt = '=' + kind + sz
        data = arr.astype(dt)
        data = arr

    if data.ndim == 1:
        d = nanmedian(data)
        m = nanmedian(ma.fabs(data - d) / c)
        d = nanmedian(data, axis=axis)
        if axis > 0:
            aswp = np.swapaxes(data,0,axis)
            aswp = data
        m = nanmedian(ma.fabs(aswp - d) / c, axis=0)

    return m
Example #7
def _median_central(x, width_points):
    y = move_median(x, width_points, min_count=1)
    yny = append(y[width_points // 2:], [NaN] * (width_points // 2))
    for k in range(width_points // 2):
        yny[k] = nanmedian(x[:(2 * k + 1)])
        yny[-(k + 1)] = nanmedian(x[-(2 * k + 1):])
    return yny
Example #8
def _move_median_central_1d(x, width_points):
    y = move_median(x, width_points, min_count=1)
    y = np.roll(y, -width_points // 2 + 1)
    for k in range(width_points // 2 + 1):
        y[k] = nanmedian(x[:(k + 2)])
        y[-(k + 1)] = nanmedian(x[-(k + 2):])
    return y
def robust_median_filter(flux, size=375):
    if size % 2 == 0: size = size + 1  #Make even results odd
    half_sizes = np.array([-(size - 1) / 2, ((size - 1) / 2) + 1], dtype='int')
    if np.ndim(flux) == 2:  #For 2D spectrum
        ny, nx = np.shape(flux)  #Calculate npix in x and y
    else:  #Else for 1D spectrum
        nx = len(flux)  #Calculate npix
    median_result = np.zeros(
        nx)  #Create array that will store the smoothed median spectrum
    if np.ndim(flux) == 2:  #Run this loop for 2D
        for i in range(
        ):  #This loop does the running of the median down the spectrum each pixel
            x_left, x_right = i + half_sizes
            if x_left < 0:
                x_left = 0
            elif x_right > nx:
                x_right = nx
            median_result[i] = bn.nanmedian(
                flux[:, x_left:x_right]
            )  #Calculate median between x_left and x_right for a given pixel
    else:  #Run this loop for 1D
        for i in range(
        ):  #This loop does the running of the median down the spectrum each pixel
            x_left, x_right = i + half_sizes
            if x_left < 0:
                x_left = 0
            elif x_right > nx:
                x_right = nx
            median_result[i] = bn.nanmedian(
            )  #Calculate median between x_left and x_right for a given pixel
    return median_result
Example #10
def rms_timescale(time, flux, timescale=3600 / 86400):
	Compute robust RMS on specified timescale. Using MAD scaled to RMS.

		time (ndarray): Timestamps in days.
		flux (ndarray): Flux to calculate RMS for.
		timescale (float, optional): Timescale to bin timeseries before calculating RMS. Default=1 hour.

		float: Robust RMS on specified timescale.

	.. codeauthor:: Rasmus Handberg <*****@*****.**>

    # Construct the bin edges seperated by the timescale:
    bins = np.arange(np.nanmin(time), np.nanmax(time), timescale)
    bins = np.append(bins, np.nanmax(time))

    # Bin the timeseries to one hour:
    indx = np.isfinite(flux)
    flux_bin, _, _ = binned_statistic(time[indx],

    # Compute robust RMS value (MAD scaled to RMS)
    return mad_to_sigma * nanmedian(np.abs(flux_bin - nanmedian(flux_bin)))
Example #11
def mad(a, c=0.6745, axis=None):
    Compute the median absolute deviation along the specified axis.

    median(abs(a - median(a))) / c

    Returns the median absolute deviation of the array elements.

    a : array_like
        Input array or object that can be converted to an array.
    axis : int, optional
        Axis along which the medians are computed. The default (axis=None)
        is to compute the median along a flattened version of the array.
    c : float, optional
        The scaling factor applied to the raw median aboslute deviation.
        The default is to scale to match the standard deviation.

    mad : ndarray
        A new array holding the result. 

    if (axis is None):
        _shape = a.shape
        a.shape = np.product(a.shape, axis=0)
        m = nanmedian(np.fabs(a - nanmedian(a))) / c
        a.shape = _shape
        m = np.apply_along_axis(
            lambda x: nanmedian(np.fabs(x - nanmedian(x))) / c, axis, a)

    return m
Example #12
def test_memory_leak():
    import resource

    arr = np.arange(1).reshape((1, 1))

    starting = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

    for i in range(1000):
        for axis in [None, 0, 1]:
            bn.nansum(arr, axis=axis)
            bn.nanargmax(arr, axis=axis)
            bn.nanargmin(arr, axis=axis)
            bn.nanmedian(arr, axis=axis)
            bn.nansum(arr, axis=axis)
            bn.nanmean(arr, axis=axis)
            bn.nanmin(arr, axis=axis)
            bn.nanmax(arr, axis=axis)
            bn.nanvar(arr, axis=axis)

    ending = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

    diff = ending - starting
    diff_bytes = diff * resource.getpagesize()
    # For 1.3.0 release, this had value of ~100kB
    assert diff_bytes == 0
Example #13
def crude_skycor(fitslist, ext, mask=None, nsimul=100, noisechisel_grad=False, bootmedian=True):
    if isinstance(fitslist, str):
        fitslist = [fitslist]
    if isinstance(fitslist, list):
        for fits_name in fitslist:
            fits_image = fits.open(fits_name)

            if mask is not None:
                print("Input mask accepted: " + mask)
                mask_fits = fits.open(mask)
                shape_mask = mask_fits[0].data.shape
                shape_fits = fits_image[ext].data.shape
                mask_array = mask_fits[0].data
                if (shape_mask == (1014, 1014)) & (shape_fits == (1024, 1024)):
                    mask_array = np.zeros(shape_fits)
                    border = 5
                               0+border:1024-border] = mask_fits[0].data
                if bootmedian:
                    skylvl = bm.bootmedian(sample_input=fits_image[ext].data[~np.isnan(mask_array)],
                                           nsimul=nsimul, errors=1)
                if not bootmedian:
                    median_sky = bn.nanmedian(fits_image[ext].data[~np.isnan(mask_array)])
                    #sigma_sky = bn.nanstd(fits_image[ext].data[~np.isnan(mask_array)])
                    sigma_sky = 0
                    skylvl = {"median": median_sky,
                              "s1_up": median_sky+sigma_sky,
                              "s1_down": median_sky-sigma_sky,
                              "std1_down": sigma_sky,
                              "std1_up": sigma_sky}
                if bootmedian:
                    skylvl = bm.bootmedian(sample_input=fits_image[ext].data, nsimul=nsimul, errors=1)
                if not bootmedian:
                    median_sky = bn.nanmedian(fits_image[ext].data)
                    #sigma_sky = bn.nanstd(fits_image[ext].data)
                    sigma_sky = 0

                    skylvl = {"median": median_sky,
                              "s1_up": median_sky+sigma_sky,
                              "s1_down": median_sky-sigma_sky,
                              "std1_down": sigma_sky,
                              "std1_up": sigma_sky}

            print(np.abs(skylvl["median"] - skylvl["s1_up"])/2.)
            print("Skylvl: " + str(skylvl["median"]) + " +/- " + str(np.abs(skylvl["s1_up"] - skylvl["s1_down"])/2.))
            fits_image[ext].data = fits_image[ext].data - skylvl["median"]
            fits_image[0].header['SKYSTD'] = skylvl["std1_down"]
            fits_image[0].header['SKYLVL'] = skylvl["median"]
            fits_image[ext].header['SKYSTD'] = skylvl["std1_down"]
            fits_image[ext].header['SKYLVL'] = skylvl["median"]
            os.system("rm " + fits_name)
Example #14
def factor_outlierlimit(factor, n_extremum=5):
    x_m = factor.values
    median = bn.nanmedian(x_m, axis=1).reshape(-1, 1)
    Dmad = bn.nanmedian(abs(x_m - median), axis=1).reshape(-1, 1)
    upper = (median + n_extremum * Dmad)
    lower = (median - n_extremum * Dmad)
    with np.errstate(invalid='ignore'):
        res = np.clip(x_m, lower, upper)
    return pd.DataFrame(res, factor.index, factor.columns)
Example #15
def _get_fluctuations(image_array, correction='median'):

    # Calculate the mean PV for each image
    if correction == 'mean':
        mean_values = bn.nanmean(bn.nanmean(image_array, axis=1), axis=1)
    elif correction == 'median':
        mean_values = bn.nanmedian(bn.nanmedian(image_array, axis=1), axis=1)

    return mean_values
Example #16
def k2p2_saturated(SumImage, MASKS, idx):

    # Get logger for printing messages:
    logger = logging.getLogger(__name__)

    no_masks = MASKS.shape[0]

    column_mask = np.zeros_like(SumImage, dtype='bool')
    saturated_mask = np.zeros_like(MASKS, dtype='bool')
    pixels_added = 0

    # Loop through the different masks:
    for u in range(no_masks):
        # Create binary version of mask and extract
        # the rows and columns which it spans and
        # the highest value in it:
        mask = np.asarray(MASKS[u, :, :], dtype='bool')
        mask_rows, mask_columns = np.where(mask)
        mask_max = np.nanmax(SumImage[mask])

        # Loop through the columns of the mask:
        for c in set(mask_columns):

            column_mask[:, c] = True

            # Extract the pixels that are in this column and in the mask:
            pixels = SumImage[mask & column_mask]

            # Calculate ratio as defined in Lund & Handberg (2014):
            ratio = np.abs(nanmedian(np.diff(pixels))) / np.nanmax(pixels)
            if ratio < 0.01 and nanmedian(pixels) >= mask_max / 2:
                logger.debug("Column %d - RATIO = %f - Saturated", c, ratio)

                # Has significant flux and is in saturated column:
                add_to_mask = (idx & column_mask)

                # Make sure the pixels we add are directly connected to the highest flux pixel:
                new_mask_labels, numfeatures = ndimage.label(add_to_mask)
                imax = np.unravel_index(
                    np.nanargmax(SumImage * mask * column_mask),
                add_to_mask &= (new_mask_labels == new_mask_labels[imax])

                # Modify the mask:
                pixels_added += np.sum(add_to_mask) - np.sum(mask[column_mask])
                logger.debug("  %d pixels should be added to column %d",
                             np.sum(add_to_mask) - np.sum(mask[column_mask]),
                saturated_mask[u][add_to_mask] = True
                logger.debug("Column %d - RATIO = %f", c, ratio)

            column_mask[:, c] = False

    return saturated_mask, pixels_added
Example #17
def _nanmedian(array, axis=None):
    """Bottleneck nanmedian function that handle tuple axis."""

    if isinstance(axis, tuple):
        array = _move_tuple_axes_first(array, axis=axis)
        axis = 0

    if isinstance(array, Quantity):
        return array.__array_wrap__(bottleneck.nanmedian(array, axis=axis))
        return bottleneck.nanmedian(array, axis=axis)
Example #18
def _nanmedian(array, axis=None):
    """Bottleneck nanmedian function that handle tuple axis."""

    if isinstance(axis, tuple):
        array = _move_tuple_axes_first(array, axis=axis)
        axis = 0

    if isinstance(array, Quantity):
        return array.__array_wrap__(bottleneck.nanmedian(array, axis=axis))
        return bottleneck.nanmedian(array, axis=axis)
Example #19
def mad(x):
	Median absolute deviation scaled to standard deviation.

		x (ndarray): Array to calculate robust standard deviation for.

		float: Median absolute deviation scaled to standard deviation.
    return mad_to_sigma * nanmedian(np.abs(x - nanmedian(x)))
Example #20
def bootfit(x, y, nsimul, errors=1):
    m_array = np.empty(nsimul)
    m_array[:] = np.nan
    b_array = np.empty(nsimul)
    b_array[:] = np.nan

    boot_polyfit_results = miniutils.parallel_progbar(boot_polyfit,
                                                      zip([x]*nsimul, [y]*nsimul, np.random.randint(0,100*nsimul,nsimul)),
                                                      nprocs=4, starmap=True)
    # boot_polyfit_results = miniutils.parallel_progbar(boot_polyfit, zip([muGaia]*nsimul, [muVis]*nsimul, np.random.randint(0,10*nsimul,nsimul)),
                                                  # nprocs=4, starmap=True)
#    for i in tqdm(range(nsimul)):
#        index_resamp = bootstrap_resample(index_array)
#        m_temp, b_temp = np.polyfit(x[index_array], y[index_array], 1)
    m_array = np.array(boot_polyfit_results)[:,0]
    b_array = np.array(boot_polyfit_results)[:,1]

    m_median = bn.nanmedian(m_array)
    b_median = bn.nanmedian(b_array)

    if(errors == 1):
        m_s1_up = np.percentile(m_array, s1_up_q*100)
        m_s1_down = np.percentile(m_array, s1_down_q*100)
        m_s2_up = np.percentile(m_array, s2_up_q*100)
        m_s2_down = np.percentile(m_array, s2_down_q*100)
        m_s3_up = np.percentile(m_array, s3_up_q*100)
        m_s3_down = np.percentile(m_array, s3_down_q*100)

        b_s1_up = np.percentile(b_array, s1_up_q*100)
        b_s1_down = np.percentile(b_array, s1_down_q*100)
        b_s2_up = np.percentile(b_array, s2_up_q*100)
        b_s2_down = np.percentile(b_array, s2_down_q*100)
        b_s3_up = np.percentile(b_array, s3_up_q*100)
        b_s3_down = np.percentile(b_array, s3_down_q*100)

    if(errors == 0):
        s1_up = 0
        s1_down = 0
        s2_up = 0
        s2_down = 0
        s3_up = 0
        s3_down = 0

    output = {"m_median": m_median, "m_s1_up": m_s1_up, "m_s1_down": m_s1_down,
              "m_s2_up": m_s2_up, "m_s2_down": m_s2_down, "m_s3_up": m_s3_up,
              "m_s3_down": m_s3_down, "b_median": b_median, "b_s1_up": b_s1_up,
              "b_s1_down": b_s1_down, "b_s2_up": b_s2_up, "b_s2_down": b_s2_down,
              "b_s3_up": b_s3_up, "b_s3_down": b_s3_down, }
Example #21
    def fit(self, flux, Ncbvs=2, sigma_clip=4.0, maxiter=3):

        # Find the median flux, as it is used for
        # initial guesses later on:
        median_flux = nanmedian(flux)

        # Start looping over the number of CBVs to include:
        bic = np.empty(self.cbv.shape[1] + 1, dtype='float64')
        solutions = []
        for Ncbvs in range(self.cbv.shape[1] + 1):

            # Initial guesses for coefficients:
            coeffs0 = np.zeros(Ncbvs + 1, dtype='float64')
            coeffs0[-1] = median_flux

            iters = 0
            fluxi = np.copy(flux)
            while iters <= maxiter:
                iters += 1

                # Do the fit:
                res = minimize(self._lhood,
                               args=(fluxi, ),
                flux_filter = self.mdl(res.x)

                # Do robust sigma clipping:
                absdev = np.abs(fluxi - flux_filter)
                mad = 1.4826 * nanmedian(absdev)
                indx = np.greater(absdev,
                                  sigma_clip * mad,
                if np.any(indx):
                    fluxi[indx] = np.nan

            # Calculate the Bayesian Information Criterion (BIC) and store the solution:
            bic[Ncbvs] = np.log(np.sum(
                np.isfinite(fluxi))) * len(coeffs0) + res.fun

        # Use the solution which minimizes the BIC:
        indx = np.argmin(bic)
        flux_filter = self.mdl(solutions[indx].x)

        #plt.plot(bic, '.-')

        return flux_filter
Example #22
def nanmedian(array, axis=None):
    A nanmedian function that uses bottleneck if available.
        if isinstance(axis, tuple):
            array = move_tuple_axes_first(array, axis=axis)
            axis = 0

        if isinstance(array, u.Quantity):
            return array.__array_wrap__(bn.nanmedian(array, axis=axis))
            return bn.nanmedian(array, axis=axis)
        return np.nanmedian(array, axis=axis)
Example #23
def nanmad(data, sigma=True, axis=None):
    Return the median absolute deviation.  Axis functionality adapted
    from https://github.com/keflavich/agpy/blob/master/agpy/mad.py
    if axis>0:
        med = nanmedian(data.swapaxes(0,axis),axis=0)
        mad = nanmedian(np.abs(data.swapaxes(0,axis) - med),axis=0)
        med = nanmedian(data,axis=axis)
        mad = nanmedian(np.abs(data - med),axis=axis)
    if not sigma:
        return mad
        return mad*1.4826
Example #24
    def entropy_cleaning(self, matrix, targ_limit=150):
		Entropy-cleaning of lightcurve matrix using the SVD U-matrix.

			matrix (:class:`numpy.ndarray`):
			targ_limit (int, optional): Maximum number of targets to remove during cleaning.

		.. codeauthor:: Mikkel N. Lund <*****@*****.**>
        logger = logging.getLogger(__name__)

        # Calculate the principle components:
        pca = PCA(self.ncomponents, random_state=self.random_state)
        U, _, _ = pca._fit(matrix)

        ent = compute_entropy(U)
        logger.info('Entropy start: %s', ent)

        targets_removed = 0
        components = np.arange(self.ncomponents)

        with np.errstate(invalid='ignore'):
            while np.any(ent < self.threshold_entropy):
                com = components[ent < self.threshold_entropy][0]

                # Remove highest relative weight target
                m = nanmedian(U[:, com])
                s = mad_to_sigma * nanmedian(np.abs(U[:, com] - m))
                dev = np.abs(U[:, com] - m) / s

                idx0 = np.argmax(dev)

                # Remove the star from the lightcurve matrix:
                star_no = np.ones(U.shape[0], dtype=bool)
                star_no[idx0] = False
                matrix = matrix[star_no, :]

                targets_removed += 1
                if targets_removed >= targ_limit:

                U, _, _ = pca._fit(matrix)
                ent = compute_entropy(U)

        logger.info('Entropy end: %s', ent)
        logger.info('Targets removed: %d', targets_removed)
        return matrix
Example #25
def singleVar(content):
    return dict(min=nanmin(content),
                valid=numpy.sum(numpy.isfinite(content)) * 100.0 /
Example #26
def moving_nanmedian_cyclic(t, x, w, dt=None):
	Calculate cyclic moving average of input with given window (in t-units)
	taking into account NaNs in the data.
    if len(t) != len(x):
        raise ValueError("t and x must have the same length.")
    if dt is None:
        dt = median(np.diff(t))
    # Calculate width of filter:
    width_points = int(w / dt)
    if width_points <= 1:
        return x
    if width_points % 2 == 0:
        width_points += 1  # Filter is much faster when using an odd number of points!
    wh = width_points // 2
    N = len(x)
    if wh >= N:
        return np.zeros_like(x) + nanmedian(x)
    # Stich ends onto the array:
    xny = np.concatenate((x[-wh - 1:N - 1], x, x[1:wh + 1]))
    # Run moving median on longer series:
    N = len(xny)
    y = _median_central(xny, width_points)
    # Cut out the central part again:
    y = y[wh:N - wh]
    return y
Example #27
def weighted_mean(_line):
    max_weight = 50

    # print _line.shape

    median_2d = bottleneck.nanmedian(_line,
    std = bottleneck.nanstd(_line, axis=1)
    std_2d = std.reshape(_line.shape[0], 1).repeat(_line.shape[1], axis=1)

    weight_2d = numpy.fabs(std_2d / (_line - median_2d))
    #    weight_2d[weight_2d > max_weight] = max_weight
    weight_2d[numpy.isinf(weight_2d)] = max_weight

    for i in range(3):
        avg = bottleneck.nansum(_line * weight_2d, axis=1) / bottleneck.nansum(
            weight_2d, axis=1)
        avg_2d = avg.reshape(_line.shape[0], 1).repeat(_line.shape[1], axis=1)

        std = numpy.sqrt(
            bottleneck.nansum(((_line - avg_2d)**2 * weight_2d), axis=1) /
            bottleneck.nansum(weight_2d, axis=1))
        std_2d = std.reshape(_line.shape[0], 1).repeat(_line.shape[1], axis=1)

        weight_2d = numpy.fabs(std_2d / (_line - avg_2d))
        #weight_2d[weight_2d > max_weight] = max_weight
        weight_2d[numpy.isinf(weight_2d)] = max_weight

    return bottleneck.nansum(_line * weight_2d, axis=1) / bottleneck.nansum(
        weight_2d, axis=1)
Example #28
def _nanmedian(array, axis=None):
    """Bottleneck nanmedian function that handle tuple axis."""

    if isinstance(axis, tuple):
        array = _move_tuple_axes_first(array, axis=axis)
        axis = 0
    return bottleneck.nanmedian(array, axis=axis)
Example #29
def _nanmedian(array, axis=None):
    """Bottleneck nanmedian function that handle tuple axis."""

    if isinstance(axis, tuple):
        array = _move_tuple_axes_first(array, axis=axis)
        axis = 0
    return bottleneck.nanmedian(array, axis=axis)
Example #30
def theil_sen(x, y, sample= "auto", n_samples = 1e7):
    assert x.shape[0] == y.shape[0]
    n = x.shape[0]
    if n < 100 or not sample:
        ix = np.argsort( x )
        slopes = np.empty(int(n*(n-1)*0.5))
        for c, pair in enumerate(itertools.combinations(range(n), 2)):
            i,j = ix[pair[0]], ix[pair[1]]
            slopes[c] = slope(x[i], x[j], y[i], y[j])
        i1 = np.random.randint(int(0), int(n), int(n_samples))
        i2 = np.random.randint(int(0), int(n), int(n_samples))
        slopes = slope(x[i1], x[i2], y[i1], y[i2])

    slope_ = bottleneck.nanmedian(slopes)
    #find the optimal b as the median of y_i - slope*x_i
    intercepts = np.empty(n)
    for c in range(n):
        intercepts[c] = y[c] - slope_*x[c]
    intercept_ = bottleneck.median(intercepts)

    return np.array([slope_, intercept_])
Example #31
def demedian(arr, axis=None):
    Subtract the median along the specified axis.
    arr : ndarray
        Input array.
    axis : {int, None}, optional
        The axis along which to remove the median. The default (None) is
        to subtract the median of the flattened array.
    y : ndarray
        A copy with the median along the specified axis removed.
    >>> arr = np.array([1, np.nan, 2, 10])
    >>> demedian(arr)
    array([ -1.,  NaN,   0.,   8.])        
    marr = bn.nanmedian(arr, axis) 
    if (axis != 0) and (not axis is None) and (not np.isscalar(marr)):
        ind = [slice(None)] * arr.ndim
        ind[axis] = np.newaxis
        marr =  marr[ind]
    return arr - marr   
Example #32
def weighted_mean(_line):
    max_weight = 50
    # print _line.shape
    median_2d = bottleneck.nanmedian(_line, axis=1).reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1)
    std = bottleneck.nanstd(_line, axis=1)
    std_2d = std.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1)
    weight_2d = numpy.fabs(std_2d / (_line - median_2d))
#    weight_2d[weight_2d > max_weight] = max_weight
    weight_2d[numpy.isinf(weight_2d)] = max_weight
    for i in range(3):
        avg = bottleneck.nansum(_line*weight_2d, axis=1)/bottleneck.nansum(weight_2d, axis=1)
        avg_2d = avg.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1)
        std = numpy.sqrt(bottleneck.nansum(((_line - avg_2d)**2 * weight_2d), axis=1)/bottleneck.nansum(weight_2d, axis=1))
        std_2d = std.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1)
        weight_2d = numpy.fabs(std_2d / (_line - avg_2d))
        #weight_2d[weight_2d > max_weight] = max_weight
        weight_2d[numpy.isinf(weight_2d)] = max_weight
    return bottleneck.nansum(_line*weight_2d, axis=1)/bottleneck.nansum(weight_2d, axis=1)
Example #33
def demedian(arr, axis=None):
    Subtract the median along the specified axis.
    arr : ndarray
        Input array.
    axis : {int, None}, optional
        The axis along which to remove the median. The default (None) is
        to subtract the median of the flattened array.
    y : ndarray
        A copy with the median along the specified axis removed.
    >>> arr = np.array([1, np.nan, 2, 10])
    >>> demedian(arr)
    array([ -1.,  NaN,   0.,   8.])        
    marr = bn.nanmedian(arr, axis)
    if (axis != 0) and (not axis is None) and (not np.isscalar(marr)):
        ind = [slice(None)] * arr.ndim
        ind[axis] = np.newaxis
        marr = marr[ind]
    return arr - marr
Example #34
def is_guide_ota(primhdu, ext, w=20, debug=False):

    logger = logging.getLogger("IsGuideOTA")

    binning = primhdu.header['BINNING']
    skylevel = primhdu.header['SKYLEVEL']
    gain = primhdu.header['GAIN']
    skynoise = primhdu.header['SKYNOISE']

    logger.debug("Checking OTA %s (bin=%d, sky=%.1f, skynoise=%.2f)" % (
        ext.name, binning, skylevel, skynoise))

    if (not is_image_extension(ext)):
        logger.debug("extension is not a valid image extension")
        return False

    excesses = numpy.empty((8,8))
    excesses[:,:] = numpy.NaN

    if (debug):
        center_hdu = [pyfits.PrimaryHDU()]
        corner_hdu = [pyfits.PrimaryHDU()]

    for cx, cy in itertools.product(range(8), repeat=2):

        # Get pixel coord for this cell
        x1,x2,y1,y2 = cell2ota__get_target_region(cx, cy, binning=binning, trimcell=0)
        x1,x2,y1,y2 = int(x1),int(x2),int(y1),int(y2)
        x21 = (x2-x1)//2

        # extract the mean value in the bottom corner
        corner = bottleneck.nanmean(ext.data[y1:y1+w, x1:x1+w].astype(numpy.float32))

        # also get the value in the bottom center
        center = bottleneck.nanmean(ext.data[y1:y1+w, x1+x21-w//2:x1+x21+w//2].astype(numpy.float32))

        if (debug):
            print(cx,cy,corner, center)
            corner_hdu.append(pyfits.ImageHDU(data=ext.data[y1:y1+w, x1:x1+w]))
            center_hdu.append(pyfits.ImageHDU(data=ext.data[y1:y1+w, x1+x21-w//2:x1+x21+w//2]))

        excess = corner - center
        #print ext.name, cx, cy, corner, center, excess
        excesses[cx,cy] = excess

    _mean = bottleneck.nanmean(excesses)
    _median = bottleneck.nanmedian(excesses)

    is_guideota = (_median > 10*skynoise)
    logger.debug("Found corner excess mean=%.1f, median=%.1f --> guide-OTA: %s" % (
        _mean, _median, "YES" if is_guideota else "NO"))

    if (debug):
        return is_guideota, excesses, _mean, _median, skynoise, corner_hdu, center_hdu

    return is_guideota
Example #35
def euclid_normalize_mask(fits_list):
    euclid.normalize_mask  - Antiguo normalize_eur_mask
    if not isinstance(fits_list, list):
        fits_list = [fits_list]

    corrected_files = np.array([])
    for fits_name in fits_list:
        fits_file = fits.open(fits_name)
        for i in np.linspace(1, 36, 36).astype("int"):
            # Translate to GNUASTRO
            # Normalize as a function of width and center
            print("Normalizing CCD " + str(i))
            # execute_cmd(cmd_text="astarithmetic -h " + str(j) + " " + outname + " -h" + str(j+2) + " " + outname + " 0 gt nan where")
            fits_file[i].data = np.divide(
                bn.nanmedian(fits_file[i].data[1798:2298, 1818:2318]))

        if os.path.exists(fits_name):
        execute_cmd(cmd_text="astfits -h0 " + fits_name +
                    " --update=NORMAL,True")
        corrected_files = np.append(corrected_files, fits_name)
    return (corrected_files)
Example #36
    def _estimate(self, dataset):
        """Estimate and save the displacements for the time series.

        num_states_retained : int
            Number of states to retain at each time step of the HMM.
        max_displacement : array of int
            The maximum allowed displacement magnitudes in [y,x].

            The estimated displacements and partial results of motion


        params = self._params
        if params['verbose']:
            print('Estimating model parameters.')
        shifts = self._estimate_shifts(dataset)
        references, variances = _whole_frame_shifting(dataset, shifts)
        if params['max_displacement'] is None:
            max_displacement = np.array(dataset.frame_shape[:3]) // 2
            max_displacement = np.array(params['max_displacement'])
        gains = nanmedian(
            (variances / references).reshape(-1, references.shape[-1]))
        if not (np.all(np.isfinite(gains)) and np.all(gains > 0)):
            raise Exception('Failed to estimate positive gains')
        pixel_means, pixel_variances = _pixel_distribution(dataset)
        movement_model = MovementModel.estimate(shifts)
        if shifts[0].shape[-1] == 2:
            shifts = [
                np.concatenate([np.zeros(s.shape[:-1] + (1, ), dtype=int), s],
                               axis=-1) for s in shifts

        min_shifts = np.nanmin(
            [np.nanmin(s.reshape(-1, s.shape[-1]), 0) for s in shifts], 0)
        max_shifts = np.nanmax(
            [np.nanmax(s.reshape(-1, s.shape[-1]), 0) for s in shifts], 0)

        # add a bit of extra room to move around
        if max_displacement.size == 2:
            max_displacement = np.hstack(([0], max_displacement))
        extra_buffer = ((max_displacement - max_shifts + min_shifts) //
        min_displacements = min_shifts - extra_buffer
        max_displacements = max_shifts + extra_buffer

        displacements = self._neighbor_viterbi(dataset, references, gains,
                                               max_displacements, pixel_means,

        return self._post_process(displacements)
    def fit(self, X, y):
        X_y = self._check_params(X, y)
        self.X = X_y[0]
        self.y = X_y[1].reshape((-1, 1))
        n, p = X.shape

        S = []    # list of selected features
        F = range(p)    # list of unselected features

        if self.n_features != 'auto':
            feature_mi_matrix = np.zeros((self.n_features, p))
            feature_mi_matrix = np.zeros((n, p))
        feature_mi_matrix[:] = np.nan
        S_mi = []

        # Find the first feature
        k_min = 3
        range_k = 7
        xy_MI = np.empty((range_k, p))
        for i in range(range_k):
            xy_MI[i, :] = self._get_first_mi_vector(i + k_min)
        xy_MI = bn.nanmedian(xy_MI, axis=0)

        S, F = self._add_remove(S, F, bn.nanargmax(xy_MI))

        if self.verbose > 0:
            self._info_print(S, S_mi)

        # Find the next features
        if self.n_features == 'auto':
            n_features = np.inf
            n_features = self.n_features

        while len(S) < n_features:
            s = len(S) - 1
            feature_mi_matrix[s, F] = self._get_mi_vector(F, S[-1])
            fmm = feature_mi_matrix[:len(S), F]
            if bn.allnan(bn.nanmean(fmm, axis=0)):
            MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0)
            if np.isnan(MRMR).all():
            selected = F[bn.nanargmax(MRMR)]
            S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0)))
            S, F = self._add_remove(S, F, selected)
            if self.verbose > 0:
                self._info_print(S, S_mi)
            if self.n_features == 'auto' and len(S) > 10:
                MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1)
                if np.abs(np.mean(MI_dd[-5:])) < 1e-3:
        self.n_features_ = len(S)
        self.ranking_ = S
        self.mi_ = S_mi

        return self
Example #38
def med_over_images(masked_arr, axis=0):
    Calculate median pixel value along specified axis
    Uses bottleneck.nanmedian for speed
    dat = masked_arr.data.copy()
    dat[masked_arr.mask] = np.NaN
    return bn.nanmedian(dat, axis=axis)
Example #39
def theil_sen(x,y, sample= "auto", n_samples = 1e7):
    Computes the Theil-Sen estimator for 2d data.
        x: 1-d np array, the control variate
        y: 1-d np.array, the ind variate.
        sample: if n>100, the performance can be worse, so we sample n_samples.
                Set to False to not sample.
        n_samples: how many points to sample.
    This complexity is O(n**2), which can be poor for large n. We will perform a sampling
    of data points to get an unbiased, but larger variance estimator. 
    The sampling will be done by picking two points at random, and computing the slope,
    up to n_samples times.
    assert x.shape[0] == y.shape[0], "x and y must be the same shape."
    n = x.shape[0]
    if n < 100 or not sample:
        ix = np.argsort( x )
        slopes = np.empty( n*(n-1)*0.5 )
        for c, pair in enumerate(itertools.combinations( range(n),2 ) ): #it creates range(n) =( 
            i,j = ix[pair[0]], ix[pair[1]]
            slopes[c] = slope( x[i], x[j], y[i],y[j] )
        i1 = np.random.randint(0, n, n_samples)
        i2 = np.random.randint(0, n, n_samples)
        print '...checking for unwanted zeros...'
        zero_check=np.where(np.abs((x[i1]-x[i2])) != 0)
        print '...calculating slopes...'
        slopes = slope( x[i1], x[i2], y[i1], y[i2] )
        print 'slope min and max are:',np.amin(slopes),np.amax(slopes)
        #print histogram
    slope_ = bottleneck.nanmedian( slopes )
    print '...done! Now finding intercepts...'
    #find the optimal b as the median of y_i - slope*x_i
    intercepts = np.empty( n )
    for c in xrange(n):
        intercepts[c] = y[c] - slope_*x[c]

    #print histogram_i
    #print cumul_i
    intercept_ = bottleneck.median( intercepts )

    return np.array( [slope_,intercept_]) #c95[0],c95[1],c95i[0],c95i[1]] )
Example #40
File: hmm.py Project: emb2162/sima
    def _estimate(self, dataset):
        """Estimate and save the displacements for the time series.

        num_states_retained : int
            Number of states to retain at each time step of the HMM.
        max_displacement : array of int
            The maximum allowed displacement magnitudes in [y,x].

            The estimated displacements and partial results of motion


        params = self._params
        if params['verbose']:
            print('Estimating model parameters.')
        shifts = self._estimate_shifts(dataset)
        references, variances = _whole_frame_shifting(dataset, shifts)
        if params['max_displacement'] is None:
            max_displacement = np.array(dataset.frame_shape[:3]) // 2
            max_displacement = np.array(params['max_displacement'])
        gains = nanmedian(
            (variances / references).reshape(-1, references.shape[-1]))
        if not (np.all(np.isfinite(gains)) and np.all(gains > 0)):
            raise Exception('Failed to estimate positive gains')
        pixel_means, pixel_variances = _pixel_distribution(dataset)
        movement_model = MovementModel.estimate(shifts)
        if shifts[0].shape[-1] == 2:
            shifts = [np.concatenate([np.zeros(s.shape[:-1] + (1,), dtype=int),
                                      s], axis=-1) for s in shifts]

        min_shifts = np.nanmin([np.nanmin(s.reshape(-1, s.shape[-1]), 0)
                                for s in shifts], 0)
        max_shifts = np.nanmax([np.nanmax(s.reshape(-1, s.shape[-1]), 0)
                                for s in shifts], 0)

        # add a bit of extra room to move around
        if max_displacement.size == 2:
            max_displacement = np.hstack(([0], max_displacement))
        extra_buffer = ((max_displacement - max_shifts + min_shifts) // 2
        min_displacements = min_shifts - extra_buffer
        max_displacements = max_shifts + extra_buffer

        displacements = self._neighbor_viterbi(
            dataset, references, gains, movement_model, min_displacements,
            max_displacements, pixel_means, pixel_variances)

        return self._post_process(displacements)
Example #41
File: hmm3d.py Project: asaich/sima
    def _estimate(self, dataset):
        """Estimate and save the displacements for the time series.

        num_states_retained : int
            Number of states to retain at each time step of the HMM.
        max_displacement : array of int
            The maximum allowed displacement magnitudes in [y,x].

            The estimated displacements and partial results of motion
        params = self._params
        if params.verbose:
            print 'Estimating model parameters.'
        if params.max_displacement is not None:
            params.max_displacement = np.array(params.max_displacement)
        shifts = VolumeTranslation(params.max_displacement).estimate(dataset)
        references, variances, offset = _whole_frame_shifting(dataset, shifts)
        assert np.all(offset == 0)
        gains = nanmedian(
            (variances / references).reshape(-1, references.shape[-1]))
        if not (np.all(np.isfinite(gains)) and np.all(gains > 0)):
            raise Exception('Failed to estimate positive gains')
        pixel_means, pixel_variances = _pixel_distribution(dataset)
        movement_model = MovementModel.estimate(shifts)

        # TODO: detect unreasonable shifts before doing this calculation
        min_shifts = np.nanmin(list(it.chain(*it.chain(*shifts))), 0)
        max_shifts = np.nanmax(list(it.chain(*it.chain(*shifts))), 0)

        # add a bit of extra room to move around
        if params.max_displacement is None:
            extra_buffer = 5
            extra_buffer = (
                (params.max_displacement - max_shifts + min_shifts) / 2
        min_displacements = (min_shifts - extra_buffer)
        max_displacements = (max_shifts + extra_buffer)

        return self._neighbor_viterbi(
            dataset, references, gains, movement_model, min_displacements,
            max_displacements, pixel_means,
            pixel_variances, params.num_states_retained, params.verbose)
def sample_background_using_ds9_regions(hdu, sky_regions):

    wcs = astWCS.WCS(hdu.header, mode='pyfits')
    pixelscale = wcs.getPixelSizeDeg() * 3600.

    data = hdu.data

    center_xy = wcs.wcs2pix(sky_regions[:,0], sky_regions[:,1])
    # print center_xy
    center_xy = numpy.array(center_xy)
    cx = center_xy[:,0]
    cy = center_xy[:,1]
    width = sky_regions[:,2]/2. / pixelscale
    height = sky_regions[:,3]/2. / pixelscale

    in_ota = ((cx + width) > 0) & ((cx - width) < data.shape[1]) & \
             ((cy + height) > 0) & ((cy - height) < data.shape[0])
    cx = cx[in_ota]
    cy = cy[in_ota]
    w = width[in_ota]
    h = height[in_ota]

    if (cx.size <= 0):
        # no boxes in this OTA
        return None
    left = numpy.floor(cx - w).astype(numpy.int)
    right = numpy.ceil(cx + w).astype(numpy.int)
    top = numpy.ceil(cy + h).astype(numpy.int)
    bottom = numpy.floor(cy - h).astype(numpy.int)

    left[left < 0] = 0
    bottom[bottom < 0] = 0
    results = []
    for box in range(cx.shape[0]):

        cutout = data[bottom[box]:top[box], left[box]:right[box]]
        median = bottleneck.nanmedian(cutout.astype(numpy.float32))
        if (numpy.isfinite(median)):
            results.append([cx[box], cy[box], median])

    #print results
    if (len(results) <= 0):
        return None

    return numpy.array(results)
Example #43
File: _hmm.py Project: asaich/sima
    def _estimate(self, dataset):
        """Estimate and save the displacements for the time series.

        num_states_retained : int
            Number of states to retain at each time step of the HMM.
        max_displacement : array of int
            The maximum allowed displacement magnitudes in [y,x].

            The estimated displacements and partial results of motion
        params = self._params
        if params.verbose:
            print 'Estimating model parameters.'
        if params.max_displacement is not None:
            params.max_displacement = np.array(params.max_displacement)
            params.max_displacement = np.array([-1, -1])  # TODO

        shifts = sima.motion.frame_align.PlaneTranslation2D(
            params.max_displacement, n_processes=params.n_processes
        references, variances, offset = _whole_frame_shifting(dataset, shifts)
        gains = nanmedian(
            (variances / references).reshape(-1, references.shape[-1]))
        assert np.all(np.isfinite(gains)) and np.all(gains > 0)
        pixel_means, pixel_variances = _pixel_distribution(dataset)
        cov_matrix_est, decay_matrix, log_transition_matrix, mean_shift = \
            _estimate_movement_model(shifts, dataset.frame_shape[1])

        # add a bit of extra room to move around
        min_shifts = np.nanmin(list(it.chain(*it.chain(*shifts))), 0)
        max_shifts = np.nanmax(list(it.chain(*it.chain(*shifts))), 0)
        extra_buffer = ((params.max_displacement - max_shifts + min_shifts) / 2
        extra_buffer[params.max_displacement < 0] = 5
        min_displacements = (min_shifts - extra_buffer)
        max_displacements = (max_shifts + extra_buffer)

        return self._neighbor_viterbi(
            dataset, log_transition_matrix, references, gains, decay_matrix,
            cov_matrix_est, mean_shift, offset, min_displacements,
            max_displacements, pixel_means, pixel_variances,
            params.num_states_retained, params.verbose)
Example #44
def monte_lines(numtrys):
    bigarr = np.zeros((1,3))
    for i in range(numtrys):
        v, I = ADE.ADE_gauss(1000,500,50)
        I *= 55/I.max()
        I += 3. * np.random.randn(I.size)
    #    ADE.eplot(v,I)
        moments = ADE.ADE_moments(v, I, threshold=np.inf,err=np.abs(I)**0.5)
        bigarr = np.vstack((bigarr,moments))

    bigarr = bigarr[1:]
#    print bigarr

    return bn.nanmedian(bigarr,axis=0), bn.nanstd(bigarr,axis=0)
def bn_median(masked_array, axis=None):
    Perform fast median on masked array


    masked_array : `numpy.ma.masked_array`
        Array of which to find the median.

    axis : int, optional
        Axis along which to perform the median. Default is to find the median of
        the flattened array.
    import bottleneck as bn
    data = masked_array.filled(fill_value=np.NaN)
    med = bn.nanmedian(data, axis=axis)
    # construct a masked array result, setting the mask from any NaN entries
    return np.ma.array(med, mask=np.isnan(med)) # bn_median
Example #46
def theil_sen(x, y, sample="auto", n_samples=1e7):
    Computes the Theil-Sen estimator for 2d data.
        x: 1-d np array, the control variate
        y: 1-d np.array, the ind variate.
        sample: if n>100, the performance can be worse, so we sample n_samples.
                Set to False to not sample.
        n_samples: how many points to sample.
    This complexity is O(n**2), which can be poor for large n. We will perform a sampling
    of data points to get an unbiased, but larger variance estimator. 
    The sampling will be done by picking two points at random, and computing the slope,
    up to n_samples times.
    assert x.shape[0] == y.shape[0], "x and y must be the same shape."
    n = x.shape[0]

    if n < 100 or not sample:
        ix = np.argsort(x)
        slopes = np.empty(n * (n - 1) * 0.5)
        for c, pair in enumerate(itertools.combinations(range(n), 2)):  # it creates range(n) =(
            i, j = ix[pair[0]], ix[pair[1]]
            slopes[c] = slope(x[i], x[j], y[i], y[j])
        i1 = np.random.randint(0, n, n_samples)
        i2 = np.random.randint(0, n, n_samples)
        slopes = slope(x[i1], x[i2], y[i1], y[i2])
        # pdb.set_trace()

    slope_ = bottleneck.nanmedian(slopes)
    # find the optimal b as the median of y_i - slope*x_i
    intercepts = np.empty(n)
    for c in xrange(n):
        intercepts[c] = y[c] - slope_ * x[c]
    intercept_ = bottleneck.median(intercepts)

    return np.array([slope_, intercept_])
Example #47
File: group.py Project: fhal/la
def group_median(x, groups, axis=0):
    Median with groups along an axis.
    x : ndarray
        Input data.
    groups : list
        List of group membership of each element along the given axis.
    axis : int, {default: 0}
        axis along which the ranking is calculated.
    idx : ndarray
        The group median of the data along axis 0.


    # Find set of unique groups
    ugroups = unique_group(groups)
    # Convert groups to a numpy array
    groups = np.asarray(groups)    
    # Loop through unique groups and normalize
    xmedian = np.nan * np.zeros(x.shape)
    for group in ugroups:
        idx = groups == group
        idxall = [slice(None)] * x.ndim
        idxall[axis] = idx
        if idx.sum() > 0:
            ns = bn.nanmedian(x[idxall], axis=axis)
            xmedian[idxall] = np.expand_dims(ns, axis)
    return xmedian
Example #48
File: grid.py Project: iled/gsimcli
    def stats(self, lmean=False, lmed=False, lskew=False, lvar=False,
              lstd=False, lcoefvar=False, lperc=False, p=0.95):
        """Calculate some statistics among every realisation.

        Each statistic is calculated node-wise along the complete number of

        lmean : boolean, default False
            Calculate the mean.
        lmed : boolean, default False
            Calculate the median.
        lskew : boolean, default False
            Calculate skewness.
        lvar : boolean, default False
            Calculate the variance.
        lstd : boolean, default False
            Calculate the standard deviation.
        lcoefvar : boolean, default False
            Calculate the coefficient of variation.
        lperc : boolean, default False
            Calculate the percentile `100 * (1 - p)`.
        p : number, default 0.95
            Probability value.

        retdict : dict of GridArr
            Dictionary containing one GridArr for each calculated statistic.

        See Also
        stats_area : same but considering a circular (and horizontal) area of
        a specified radius around a given point.

        # check if the map files are already opened or not
        if isinstance(self.files[0], file):
            opened_files = True
            opened_files = False

        if lmean:
            meanmap = np.zeros(self.cells)
        if lmed:
            medmap = np.zeros(self.cells)
        if lskew:
            skewmap = np.zeros(self.cells)
        if lvar:
            varmap = np.zeros(self.cells)
        if lstd:
            stdmap = np.zeros(self.cells)
        if lcoefvar:
            coefvarmap = np.zeros(self.cells)
        if lperc:
            percmap = np.zeros((self.cells, 2))

        arr = np.zeros(self.nfiles)
        skip = True
        offset = os.SEEK_SET
        for cell in xrange(self.cells - self.header):
            for i, gridfile in enumerate(self.files):
                # deal with map files not open yet
                if opened_files:
                    grid = gridfile
                    grid = open(gridfile, 'rb')

                if skip:
                    skip_lines(grid, self.header)
                arr[i] = grid.readline()

            if not opened_files:
                offset = grid.tell()

            skip = False
            # replace no data's with NaN
            bn.replace(arr, self.nodata, np.nan)
            if lmean:
                meanmap[cell] = bn.nanmean(arr)
            if lmed:
                medmap[cell] = bn.nanmedian(arr)
            if lskew:
                skewmap[cell] = pd.Series(arr).skew()
            if lvar:
                varmap[cell] = bn.nanvar(arr, ddof=1)
            if lstd:
                stdmap[cell] = bn.nanstd(arr, ddof=1)
            if lcoefvar:
                if lstd and lmean:
                    coefvarmap[cell] = stdmap[cell] / meanmap[cell] * 100
                    std = bn.nanstd(arr, ddof=1)
                    mean = bn.nanmean(arr)
                    coefvarmap[cell] = std / mean * 100
            if lperc:
                percmap[cell] = pd.Series(arr).quantile([(1 - p) / 2,
                                                         1 - (1 - p) / 2])

        retdict = dict()

        if lmean:
            meangrid = GridArr(name='meanmap', dx=self.dx, dy=self.dy,
                               dz=self.dz, nodata=self.nodata, val=meanmap)
            retdict['meanmap'] = meangrid
        if lmed:
            medgrid = GridArr(name='medianmap', dx=self.dx, dy=self.dy,
                              dz=self.dz, nodata=self.nodata, val=medmap)
            retdict['medianmap'] = medgrid
        if lskew:
            skewgrid = GridArr(name='skewmap', dx=self.dx, dy=self.dy,
                               dz=self.dz, nodata=self.nodata, val=skewmap)
            retdict['skewmap'] = skewgrid
        if lvar:
            vargrid = GridArr(name='varmap', dx=self.dx, dy=self.dy,
                              dz=self.dz, nodata=self.nodata, val=varmap)
            retdict['varmap'] = vargrid
        if lstd:
            stdgrid = GridArr(name='stdmap', dx=self.dx, dy=self.dy,
                              dz=self.dz, nodata=self.nodata, val=stdmap)
            retdict['stdmap'] = stdgrid
        if lcoefvar:
            coefvargrid = GridArr(name='coefvarmap', dx=self.dx, dy=self.dy,
                                  dz=self.dz, nodata=self.nodata,
            retdict['coefvarmap'] = coefvargrid
        if lperc:
            percgrid = GridArr(name='percmap', dx=self.dx, dy=self.dy,
                               dz=self.dz, nodata=self.nodata, val=percmap)
            retdict['percmap'] = percgrid

        return retdict
Example #49
File: grid.py Project: iled/gsimcli
    def stats_area(self, loc, tol=0, lmean=False, lmed=False, lskew=False,
                   lvar=False, lstd=False, lcoefvar=False, lperc=False,
                   p=0.95, save=False):
        """Calculate some statistics among every realisation, considering a
        circular (only horizontaly) area of radius `tol` around the point
        located at `loc`.

        loc : array_like
            Location of the vertical line [x, y].
        tol : number, default 0
            Tolerance radius used to search for neighbour nodes.
        lmean : boolean, default False
            Calculate the mean.
        lmed : boolean, default False
            Calculate the median.
        lskew : boolean, default False
            Calculate skewness.
        lvar : boolean, default False
            Calculate the variance.
        lstd : boolean, default False
            Calculate the standard deviation.
        lcoefvar : boolean, default False
            Calculate the coefficient of variation.
        lperc : boolean, default False
            Calculate the percentile `100 * (1 - p)`.
        p : number, default 0.95
            Probability value.
        save : boolean, default False
            Write the points used to calculate the chosen statistics in
            PointSet format to a file named 'sim values at (x, y, line).prn'.

        statspset : PointSet
            PointSet instance containing the calculated statistics.

        .. TODO: checkar stats variance com geoms

        if lmean:
            meanline = np.zeros(self.dz)
        if lmed:
            medline = np.zeros(self.dz)
        if lskew:
            skewline = np.zeros(self.dz)
        if lvar:
            varline = np.zeros(self.dz)
        if lstd:
            stdline = np.zeros(self.dz)
        if lcoefvar:
            coefvarline = np.zeros(self.dz)
        if lperc:
            percline = np.zeros((self.dz, 2))

        # convert the coordinates of the first point to grid nodes
        loc = coord_to_grid(loc, [self.cellx, self.celly, self.cellz],
                            [self.xi, self.yi, self.zi])[:2]
        # find the nodes coordinates within a circle centred in the first point
        neighbours_nodes = circle(loc[0], loc[1], tol)
        # compute the lines numbers for each point in the neighbourhood, across
        # each grid layer. this yields a N*M matrix, with N equal to the number
        # of neighbour nodes, and M equal to the number of layers in the grid.
        neighbours_lines = [line_zmirror(node, [self.dx, self.dy, self.dz])
                            for node in neighbours_nodes]
        # sort the lines in ascending order
        neighbours_lines = np.sort(neighbours_lines, axis=0)
        # create an array to store the neighbour nodes in each grid file
        nnodes = neighbours_lines.shape[0]
        arr = np.zeros(self.nfiles * nnodes)

        skip = True
        curr_line = np.zeros(self.nfiles)

        for layer in xrange(neighbours_lines.shape[1]):
            for i, line in enumerate(neighbours_lines[:, layer]):
                for j, grid in enumerate(self.files):
                    # skip header lines only once per grid file
                    if skip and self.header:
                        skip_lines(grid, self.header)

                    # advance to the next line with a neighbour node
                    skip_lines(grid, int(line - curr_line[j] - 1))
                    # read the line and store its value
                    a = grid.readline()
                    arr[i + j * nnodes] = float(a)

                    curr_line[j] = line
                    skip = False

            # replace no data's with NaN
            bn.replace(arr, self.nodata, np.nan)
            # compute the required statistics
            if lmean:
                meanline[layer] = bn.nanmean(arr)
            if lmed:
                medline[layer] = bn.nanmedian(arr)
            if lskew:
                skewline[layer] = pd.Series(arr).skew()
            if lvar:
                varline[layer] = bn.nanvar(arr, ddof=1)
            if lstd:
                stdline[layer] = bn.nanstd(arr, ddof=1)
            if lcoefvar:
                if lstd and lmean:
                    coefvarline[layer] = stdline[layer] / meanline[layer] * 100
                    std = bn.nanstd(arr, ddof=1)
                    mean = bn.nanmean(arr)
                    coefvarline[layer] = std / mean * 100
            if lperc:
                percline[layer] = pd.Series(arr).quantile([(1 - p) / 2,
                                                           1 - (1 - p) / 2])
            if save and tol == 0:
                # FIXME: not working with the tolerance feature
                # need to adjust the arrpset or cherry-pick arr
                arrpset = PointSet('realisations at location ({0}, {1}, {2})'.
                                   format(loc[0], loc[1], layer * self.cellz +
                                          self.zi), self.nodata, 3,
                                   ['x', 'y', 'value'],
                                   values=np.zeros((self.nfiles, 3)))
                arrout = os.path.join(os.path.dirname(self.files[0].name),
                                      'sim values at ({0}, {1}, {2}).prn'.format(
                                          loc[0], loc[1], layer * self.cellz
                                          + self.zi))
                arrpset.values.iloc[:, 2] = arr
                arrpset.values.iloc[:, :2] = np.repeat(np.array(loc)
                                                       [np.newaxis, :],
                                                       self.nfiles, axis=0)
                arrpset.save(arrout, header=True)

        ncols = sum((lmean, lmed, lvar, lstd, lcoefvar, lskew))
        if lperc:
            ncols += 2
        statspset = PointSet(name='vertical line stats at (x,y) = ({0},{1})'.
                             format(loc[0], loc[1]), nodata=self.nodata,
                             nvars=3 + ncols, varnames=['x', 'y', 'z'],
                             values=np.zeros((self.dz, 3 + ncols)))

        statspset.values.iloc[:, :3] = (np.column_stack
                                                     [np.newaxis, :], self.dz,
                                          np.arange(self.zi, self.zi +
                                                    self.cellz * self.dz))))

        j = 3
        if lmean:
            statspset.values.iloc[:, j] = meanline
            j += 1
        if lmed:
            statspset.values.iloc[:, j] = medline
            j += 1
        if lskew:
            statspset.values.iloc[:, j] = skewline
            j += 1
        if lvar:
            statspset.values.iloc[:, j] = varline
            j += 1
        if lstd:
            statspset.values.iloc[:, j] = stdline
            j += 1
        if lcoefvar:
            statspset.values.iloc[:, j] = coefvarline
            j += 1
        if lperc:
            statspset.values.iloc[:, -2:] = percline

        # reset the reading pointer in each grid file
        # update varnames
        return statspset
Example #50
    def _build_epsf_step(self, stars, epsf=None):
        A single iteration of improving an ePSF.

        stars : `EPSFStars` object
            The stars used to build the ePSF.

        epsf : `EPSFModel` object, optional
            The initial ePSF model.  If not input, then the ePSF will be
            built from scratch.

        epsf : `EPSFModel` object
            The updated ePSF.

        if len(stars) < 1:
            raise ValueError('stars must contain at least one EPSFStar or '
                             'LinkedEPSFStar object.')

        if epsf is None:
            # create an initial ePSF (array of zeros)
            epsf = self._create_initial_epsf(stars)
            # improve the input ePSF
            epsf = copy.deepcopy(epsf)

        # compute a 3D stack of 2D residual images
        residuals = self._resample_residuals(stars, epsf)


        # compute the sigma-clipped median along the 3D stack
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', category=RuntimeWarning)
            warnings.simplefilter('ignore', category=AstropyUserWarning)
            residuals = self.sigclip(residuals, axis=0, masked=False,
            if HAS_BOTTLENECK:
                residuals = bottleneck.nanmedian(residuals, axis=0)
                residuals = np.nanmedian(residuals, axis=0)


        # interpolate any missing data (np.nan)
        mask = ~np.isfinite(residuals)
        if np.any(mask):
            residuals = _interpolate_missing_data(residuals, mask,

            # fill any remaining nans (outer points) with zeros
            residuals[~np.isfinite(residuals)] = 0.


        # add the residuals to the previous ePSF image
        new_epsf = epsf.normalized_data + residuals

        # smooth the ePSF
        new_epsf = self._smooth_epsf(new_epsf)

        # recenter the ePSF
        new_epsf = self._recenter_epsf(new_epsf, epsf,

        # normalize the ePSF data
        new_epsf /= np.sum(new_epsf, dtype=np.float64)

        # return the new ePSF object
        xcenter = (new_epsf.shape[1] - 1) / 2.
        ycenter = (new_epsf.shape[0] - 1) / 2.

        epsf_new = EPSFModel(data=new_epsf, origin=(xcenter, ycenter),
                             normalize=False, oversampling=epsf.oversampling)

        return epsf_new
Example #51
def nanmedian(array, axis=None):
    if isinstance(axis, tuple):
        array = _move_tuple_axes_first(array, axis=axis)
        axis = 0
    return bt.nanmedian(array, axis=axis)
Example #52
def deltaconvert(series, visualize=False, max_adj_outliers=10):

    """Perform delta-conversion to given pd.Series.

    Delta-conversion returns 3 series as a tuple and possibly error message.

    First series (D) contains daily returns where all the data has been removed
    that could make comparison difficult with other assets.

    Second series (W) contains weekly returns where all the data has been removed
    that could make comparison difficult with other assets.

    Third series (DS) contains daily returns where all the outliers and erroneus
    data points have been removed but holes in data are not taken into account.
    This is more suitable for calculating performance scores etc.

    series           -- series to use
    visualize        -- visualize results
    max_adj_outliers -- maximum number of adjancent outliers, if there are
                        actually more adjancent outliers than this then they will
                        not be considered outliers anymore. default value: 10
    # MEDIAN_LEN = 50

    series = series.dropna()

    if len(series) < 50:
        raise DeltaConversionException("Not enough data")

    lines_taken = 0

    if series.index[0] > series.index[-1]:
        raise DeltaConversionException("Wrong cronological order")

    # closes = []
    # dates = []
    # datesord = []

    # for line in lines:
        # splitted = line.split(",")
        # closes.append(float(splitted[column]))
        # dt = datetime.strptime(splitted[0], "%Y-%m-%d").date()
        # dates.append(dt)
        # datesord.append(dt.toordinal())

    # if datesord[-1] < datesord[0]:
    #     closes.reverse()
    #     dates.reverse()
    #     datesord.reverse()
    #     lines.reverse()

    closes = series
    dates = series.index

    num_invalid_prices = 0
    deltapct = [np.nan]
    changescores = [np.nan]
    invalid_price_indices = []
    for i in range(1, len(series)):
        if closes[i - 1] > 0 and closes[i] > 0:
            change = closes[i] / closes[i - 1]
            deltapct.append(change - 1)
            changescore = change_to_score(change)
            num_invalid_prices += 1
            logging.debug("Cannot determine changescore at {} ({} / {})".format(dates[i], closes[i], closes[i - 1]))

    # # remove zeroes (data may only end with price zero if stock goes bankrupt...)
    # first_nonzero_idx = [i for i, val in enumerate(closes[:-1]) if val == 0]
    # del closes[:first_nonzero_idx]
    # del dates[:first_nonzero_idx]
    # lines_taken += first_nonzero_idx
    # if first_nonzero_idx > 0:
    #     logging.debug("{}: removed {} zero-lines from the beginning.".format(filename, first_nonzero_idx))

    num_gaps = 0
    num_invalid_chrono_orders = 0
    gap_indices = []
    for i in range(len(dates) - 1, 0, -1):
        d = (dates[i] - dates[i - 1]).days
        # standard weekends are only allowed
        if d == 3:
            if dates[i].weekday() != 0:  # not monday
                # deltapct[i] = np.nan
                # changescores[i] = np.nan
                num_gaps += 1
                logging.log(5, "Non-weekend gap of 2 day(s) at {}".format(dates[i]))
        elif d > 1:
            # deltapct[i] = np.nan
            # changescores[i] = np.nan
            num_gaps += 1
            logging.log(5, "Non-weekend gap of {} day(s) at {}".format(d, dates[i]))
        elif d <= 0:
            del deltapct[i], dates[i], closes[closes.index[i]], changescores[i]
            logging.warning(5, "Invalid chronological order ({} day(s)) at {}"
                            .format(d - 1, dates[i]))
            num_invalid_chrono_orders += 1

    deltapct = np.asarray(deltapct)
    changescores = np.asarray(changescores)
    std_score = bn.nanstd(changescores)
    zscores = np.abs(changescores) / std_score
    mean_z = bn.nanmean(zscores)
    zscores_set = list(set(zscores[(~np.isnan(zscores)) & (zscores > 0)]))

    outlier_z = None
    maxpctdiff = 0
    for i in range(int(len(zscores_set) * .95), len(zscores_set)):
        pctdiff = zscores_set[i] / zscores_set[i - 1]
        maxpctdiff = pctdiff
        # logging.info("{}: {}".format(i / len(zscores_set), pctdiff))
        if pctdiff >= 2:
            outlier_z = zscores_set[i]
            second_highest_z = zscores_set[i - 1]

    possible_outliers = []
    confirmed_outliers = []
    localmean_factors = []

    if outlier_z:
        logging.log(5, "Outlier z-score: {:.2f}, earlier z-score: {:.2f}, mean z-score: {:.5f}"
                    .format(outlier_z, second_highest_z, mean_z))

        for i in range(len(zscores)):
            if zscores[i] >= outlier_z:
                localmean = bn.nanmean(zscores[max(0, i - 50):min(len(zscores) + 1, i + 50)])
                localmean_factor = np.sqrt(mean_z / localmean)
                score = (zscores[i] / second_highest_z) * localmean_factor
                logging.log(5, "Possible outlier at {}: localmean_factor: {:.2f}, zscore: {:.2f}, score: {:.2f}"
                            .format(dates[i], localmean_factor, zscores[i], score))
                if score >= ZSCORE_CUT_RATIO:
                    logging.debug("Possible outlier at {} (z-score={:.2f}, deltapct={:.2%})"
                                  .format(dates[i], zscores[i], deltapct[i]))
                    # deltapct[i] = np.nan

        if len(possible_outliers) == 1:
            confirmed_outliers = possible_outliers

        for i in range(1, len(possible_outliers)):

            firstidx = possible_outliers[i - 1]
            secondidx = possible_outliers[i]
            # opposite signs and not too far from each other
            if deltapct[firstidx] * deltapct[secondidx] < 0 \
                    and secondidx - firstidx + 1 <= max_adj_outliers:
                firstnonan = None
                for i2 in range(firstidx, -1, -1):
                    if not np.isnan(deltapct[i2]):
                        firstnonan = i2
                confirmed = False
                if not firstnonan:
                    confirmed = True
                if firstnonan:
                    if i == 1:
                        left_mean = bn.nanmedian(closes[max(0, firstnonan - (max_adj_outliers - 1)):firstnonan + 1])
                        left_mean = bn.nanmedian(closes[max(0, possible_outliers[i - 2], \
                                                            firstnonan - (max_adj_outliers - 1)):firstnonan + 1])
                    right_mean = bn.nanmedian(closes[firstidx:secondidx])
                    changescore = change_to_score(right_mean / left_mean)
                    zscore = abs(changescore) / std_score
                    score_left_vs_mid = (zscore / second_highest_z) * localmean_factors[i - 1]
                    left_mean = right_mean
                    right_mean = bn.nanmedian(closes[secondidx:min(secondidx + max_adj_outliers, len(closes))])
                    changescore = change_to_score(right_mean / left_mean)
                    zscore = abs(changescore) / std_score
                    score_mid_vs_right = (zscore / second_highest_z) * localmean_factors[i]
                    if score_left_vs_mid > ZSCORE_CUT_RATIO * .75 and score_mid_vs_right > ZSCORE_CUT_RATIO * .75:
                        confirmed = True
                if confirmed:
                    indices = [i2 for i2 in range(firstidx, secondidx + 1)]
                    deltapct[indices] = np.nan
                    confirmed_outliers += indices

        logging.debug("No possible outliers found based on initial z-score analysis (maxpctdiff: {})"

    if visualize:
        # TODO: make this work with DataFrame
        # closes_arr = np.asarray(closes.get_values())
        # datesord = np.asarray(datesord)
        # plt.subplot(2, 1, 1)
        # plt.plot(datesord - datesord[0], closes_arr, 'b*')
        # plt.plot(datesord[gap_indices] - datesord[0], closes_arr[gap_indices], 'ob')
        # plt.plot(datesord[confirmed_outliers] - datesord[0], closes_arr[confirmed_outliers], 'or')
        # plt.plot(datesord[invalid_price_indices] - datesord[0], closes_arr[invalid_price_indices], 'om')
        # plt.subplot(2, 1, 2)
        # plt.plot(datesord - datesord[0], zscores, 'o')
        # plt.show()

    logging.debug("Conversion result: lines = {}, invalid closes = {}, gaps = {}, invalid dates = {}, outliers = {}"
                  .format(len(series) - lines_taken, num_invalid_prices, num_gaps,
                          num_invalid_chrono_orders, len(confirmed_outliers)))

    indices_to_rem = list(set(gap_indices + confirmed_outliers + invalid_price_indices))
    # datesordmod = np.delete(datesord, indices_to_rem)
    datesmod = dates.copy()
    datesmod = datesmod.delete(indices_to_rem)
    deltapctmod = np.delete(deltapct, indices_to_rem)
    closesmod = closes.drop(closes.index[indices_to_rem])
    assert(not np.any(np.isnan(deltapctmod[1:])))

    weeklydeltapct = []
    weeklydatesmod = []
    lastidx = -1
    # resample to W-FRI (could be done with pandas)
    for i in range(len(closesmod)):
        if datesmod[i].weekday() == 4:
            dd = (datesmod[i] - datesmod[lastidx]).days
            if lastidx >= 0 or dd == 7:
                if closesmod[lastidx] >= 0:
                    weeklydeltapct.append(closesmod[i] / closesmod[lastidx] - 1)
                logging.log(5, "Weekly bar at {} skipped (delta: {} days)".format(datesmod[i], i, dd))
            lastidx = i

    res_daily = pd.Series(deltapctmod, datesmod)
    res_weekly = pd.Series(weeklydeltapct, weeklydatesmod)

    indices_to_rem = list(set(confirmed_outliers + invalid_price_indices))
    datesmod = dates.copy()
    datesmod = datesmod.delete(indices_to_rem)
    deltapctmod = np.delete(deltapct, indices_to_rem)
    assert(not np.any(np.isnan(deltapctmod[1:])))

    res_dailyscore = pd.Series(deltapctmod, datesmod)

    return res_daily, res_weekly, res_dailyscore
Example #53
def parallel_compute(queue, return_queue, shmem_buffer, shmem_results, size_x, size_y, len_filelist, operation):
    #queue, shmem_buffer, shmem_results, size_x, size_y, len_filelist = worker_args

    # buffer = shmem_as_ndarray(shmem_buffer).reshape((size_x, size_y, len_filelist))
    buffer = shmem_buffer.to_ndarray()
    # result_buffer = shmem_as_ndarray(shmem_results).reshape((size_x, size_y))
    result_buffer = shmem_results.to_ndarray()

    logger = logging.getLogger("ParallelImcombine")
    logger.debug("Operation: %s, #samples/pixel: %d" % (operation, len_filelist))

    while (True):
        line = queue.get()
        if (line is None):

        if (operation == "median"):
            result_buffer[line,:] = numpy.median(buffer[line,:,:], axis=1)

        elif (operation == "medsigclip"):
            # Do not use (yet), is slow as hell 
            # (maskedarrays are pure python, not C as all the rest)

            #print buffer[line,:,:].shape
            _sigma_plus  = numpy.ones(shape=(buffer.shape[1],buffer.shape[2])) * 1e9
            _sigma_minus = numpy.ones(shape=(buffer.shape[1],buffer.shape[2])) * 1e9
            _median = numpy.median(buffer[line,:,:], axis=1)

            nrep = 3
            valid_pixels = numpy.ma.MaskedArray(buffer[line,:,:])

            for rep in range(nrep):

                _median_2d = _median.reshape(_median.shape[0],1).repeat(buffer.shape[2], axis=1)
                _min = _median_2d - 3 * _sigma_minus
                _max = _median_2d + 3 * _sigma_plus

                #valid_pixels = numpy.ma.masked_inside(buffer[line,:,:], _min, _max)
                valid = (buffer[line,:,:] > _min) & (buffer[line,:,:] < _max)

                valid_pixels = numpy.ma.array(buffer[line,:,:], mask=valid)
                #valid_pixels = numpy.ma.MaskedArray(buffer[line,:,:], valid)

                #print _min.shape, valid.shape, valid_pixels.shape

                #if (numpy.sum(valid, axis=1).any() <= 0):
                #    break

                #_median = numpy.median(buffer[line,:,:][valid], axis=1)
                _median = numpy.median(valid_pixels, axis=1)
                if (rep < nrep-1):
                    #_sigma_plus = scipy.stats.scoreatpercentile(buffer[line,:,:][valid], 84) - _median
                    #_sigma_minus = _median - scipy.stats.scoreatpercentile(buffer[line,:,:][valid], 16) 
                    _sigma_plus = scipy.stats.scoreatpercentile(valid_pixels, 84) - _median
                    _sigma_minus = _median - scipy.stats.scoreatpercentile(valid_pixels, 16) 

            result_buffer[line,:] = _median

        elif (operation == "sigclipx"):
            rep_count = 2

            _line = buffer[line,:,:].astype(numpy.float32)
            # print _line.shape

            mask = numpy.isfinite(_line)
            #print "line.shape=",_line.shape
            # numpy.savetxt("line_block_%d.dat" % (line), _line)

            def sigclip_pixel(pixelvalue):
                mask = numpy.isfinite(pixelvalue)
                old_mask = mask
                rep = 0
                while (rep < rep_count and numpy.sum(mask) > 3):
                    old_mask = mask

                    mss = scipy.stats.scoreatpercentile(pixelvalue[mask], [16,50,84])
                    lower = mss[1] - 3 * (mss[1] - mss[0]) # median - 3*sigma
                    upper = mss[1] + 3 * (mss[2] - mss[1]) # median + 3*sigma

                    mask = (pixelvalue > lower) & (pixelvalue < upper)

                    rep += 1
                    if (rep == rep_count or numpy.sum(mask) < 3):
                        mask = old_mask

                return numpy.mean(pixelvalue[mask])

            result_buffer[line,:] = [sigclip_pixel(_line[x,:]) for x in range(_line.shape[0])]

        elif (operation == "sigmaclipmean"):
            _line = buffer[line,:,:].astype(numpy.float64)
            output = numpy.zeros(shape=(_line.shape[0]))
            podi_cython.sigma_clip_mean(_line, output)
            result_buffer[line,:] = output

        elif (operation == "sigmaclipmedian"):
            _line = buffer[line,:,:].astype(numpy.float64)
            output = numpy.zeros(shape=(_line.shape[0]))
            podi_cython.sigma_clip_median(_line, output)
            result_buffer[line,:] = output

        elif (operation == "weightedmean"):
            _line = buffer[line,:,:].astype(numpy.float32)
            result_buffer[line,:] = weighted_mean(_line)

        elif (operation == "medclip"):
            intermediate = numpy.sort(buffer[line,:,:], axis=1)
            result_buffer[line,:] = numpy.median(intermediate[:,1:-2], axis=1)

        elif (operation == "min"):
            result_buffer[line,:] = numpy.min(buffer[line,:,:], axis=1)

        elif (operation == "max"):
            result_buffer[line,:] = numpy.max(buffer[line,:,:], axis=1)

        elif (operation == "nanmean"):
            result_buffer[line,:] = scipy.stats.nanmean(buffer[line,:,:], axis=1)

        elif (operation == "nanmedian"):
            result_buffer[line,:] = scipy.stats.nanmedian(buffer[line,:,:], axis=1)

        elif (operation == "nanmedian.bn"):
            x = numpy.array(buffer[line,:,:], dtype=numpy.float32)
            result_buffer[line,:] = bottleneck.nanmedian(x, axis=1)
            x = None
            del x
        elif (operation == "nanmean.bn"):
            x = numpy.array(buffer[line,:,:], dtype=numpy.float32)
            result_buffer[line,:] = bottleneck.nanmean(x, axis=1)
            x = None
            del x
            result_buffer[line,:] = numpy.mean(buffer[line,:,:], axis=1)             

    buffer = None
    shmem_buffer = None
    del shmem_buffer
    del buffer

def subtract_background(data, radius, angle, radius_range, binfac, logger=None):
    This routine takes the input in polar coordinates and fits a straight line
    to the radial profile inside and outside of the allowed range. This is 
    assumed to be the background level (in analogy to the algorithm used in the 
    IRAF task mkpupil).

    Input data:
    - data (the actual intensity values for all pixels)
    - radius (the r in the polar coordianates)
    - angle (the phi in polar coordinates)
    - radius range (r_inner, r_outer, d_radius)
    - binfac (the binning used for the data)

    if (logger is None):
        logger = logging.getLogger("BGSub")

    # Compute the radial bin size in binned pixels
    logger.debug("subtracting background - binfac=%d" % (binfac))
    r_inner, r_outer, dr_full = radius_range
    dr = dr_full/binfac
    r_inner /= binfac
    r_outer /= binfac

    # Compute the number of radial bins
    # Here: Add some correction if the center position is outside the covered area
    max_radius = 1.3 * r_outer #math.sqrt(data.shape[0] * data.shape[1])
    # Splitting up image into a number of rings
    n_radii = int(math.ceil(max_radius / dr))

    # Compute the background level as a linear interpolation of the levels 
    # inside and outside of the pupil ghost
    logger.info("Computing background-level ...")
    # Define the background ring levels
    radii = numpy.arange(0, max_radius, dr)
    background_levels = numpy.zeros(shape=(n_radii))
    background_level_errors = numpy.ones(shape=(n_radii)) * 1e9
    background_levels[:] = numpy.NaN
    for i in range(n_radii):

        ri = i * dr
        ro = ri + dr

        if (ri < r_inner):
            ro = numpy.min([ro, r_inner])
        elif (ro > r_outer):
            ri = numpy.max([ri, r_outer])
#        else:
#            # Skip the rings within the pupil ghost range for now
#            continue
        #print i, ri, ro
        median, count = get_median_level(data, radius, ri, ro)
        background_levels[i] = median
        background_level_errors[i] = 1. / math.sqrt(count) if count > 0 else 1e9

    # Now fit a straight line to the continuum, assuming it varies 
    # only linearly (if at all) with radius
    # define our (line) fitting function
    #print "XXXXXXX", radii.shape, background_levels.shape
    numpy.savetxt("radial__%s" % ("x"),
                               background_levels.reshape((-1,1)), axis=1))
    #print "saved"

    # Find average intensity at the largest radii
    avg_level = bottleneck.nanmedian(background_levels[radii>4000])
    #print "avg_level=",avg_level

    # Compute a profile without background interpolation to allow for easier 
    # scaling of the pupilghost when subtracting the pupilghost from the data 
    # frames
    # Normalize profile
    normalize_region = ((radii < 1100) & (radii > 600)) |  \
                       ((radii > 4000) & (radii < 4600))
    normalize_flux = numpy.mean(background_levels[normalize_region])
    logger.info("normalization flux = %f" % (normalize_flux))

    # Subtract background and normalize all measurements
    normalized_bgsub_profile = (background_levels - normalize_flux) / normalize_flux

    # fitfunc = lambda p, x: p[0] + p[1] * x
    # errfunc = lambda p, x, y, err: (y - fitfunc(p, x)) / err

    # bg_for_fit = background_levels
    # #bg_for_fit[numpy.isnan(background_levels)] = 0
    # bg_for_fit[((radii > ri) & (radii < ro))] = 0
    # pinit = [0.0, 0.0] # Assume no slope and constant level of 0
    # out = scipy.optimize.leastsq(errfunc, pinit,
    #                        args=(radii, background_levels, background_level_errors), full_output=1)

    # pfinal = out[0]
    # covar = out[1]
    # stdout_write(" best-fit: %.2e + %.3e * x\n" % (pfinal[0], pfinal[1]))
    #print pfinal
    #print covar

    # #
    # # Now we have the fit for the background, compute the 2d background 
    # # image and subtract it out
    # #
    # x = numpy.linspace(0, max_radius, 100)
    # y_fit = radii * pfinal[1] + pfinal[0]
    # background = pfinal[0] + pfinal[1] * radius
    # bg_sub = ((data - normalize_flux) / normalize_flux) - background

    # bg_sub_profile = background_levels - (pfinal[0] + pfinal[1]*radii)
    # numpy.savetxt("radial__%s" % ("bgsub"),
    #               numpy.append(radii.reshape((-1,1)),
    #                            bg_sub_profile.reshape((-1,1)), axis=1))

    # Use the profile and fit a spline to the underlying shape
    spl = fit_spline_background(radii, background_levels, logger=logger)

    background_1d = spl(radius.flatten())
    background_2d = background_1d.reshape(radius.shape)

    bg_sub = (data - background_2d) / background_2d
    #if (write_intermediate):
    #    bgsub_hdu = pyfits.PrimaryHDU(data=bg_sub)
    #    bgsub_hdu.writeto("bgsub.fits", clobber=True)

    # Combine all radial template profiles so we can store them in the output 
    # file. This is required to allow for faster and more accurate scaling
    # of the pupilghost during subtraction from the data files
    profiles = numpy.empty((radii.shape[0], 4))
    profiles[:,0] = radii[:]
    profiles[:,1] = normalized_bgsub_profile[:]
    profiles[:,2] = spl(radii[:])
    profiles[:,3] = (background_levels - profiles[:,2]) / profiles[:,2]
    peak_flux = numpy.max(profiles[:,3][numpy.isfinite(profiles[:,3])])

    # numpy.savetxt("radial__%s" % ("norm+bgsub"),
    #               numpy.append(radii.reshape((-1,1)),
    #                            normalized_bgsub_profile.reshape((-1,1)), axis=1))

    return bg_sub, profiles, peak_flux
Example #55
    def _fit(self, X, y):
        self.X, y = self._check_params(X, y)
        n, p = X.shape
        self.y = y.reshape((n, 1))

        # list of selected features
        S = []
        # list of all features
        F = range(p)

        if self.n_features != 'auto':
            feature_mi_matrix = np.zeros((self.n_features, p))
            feature_mi_matrix = np.zeros((n, p))
        feature_mi_matrix[:] = np.nan
        S_mi = []

        # ----------------------------------------------------------------------
        # ----------------------------------------------------------------------

        # check a range of ks (3-10), and choose the one with the max median MI
        k_min = 3
        k_max = 11
        xy_MI = np.zeros((k_max-k_min, p))
        xy_MI[:] = np.nan
        for i, k in enumerate(range(k_min, k_max)):
            xy_MI [i, :] = mi.get_first_mi_vector(self, k)
        xy_MI = bn.nanmedian(xy_MI, axis=0)

        # choose the best, add it to S, remove it from F
        S, F = self._add_remove(S, F, bn.nanargmax(xy_MI))

        # notify user
        if self.verbose > 0:
            self._print_results(S, S_mi)

        # ----------------------------------------------------------------------
        # ----------------------------------------------------------------------

        while len(S) < self.n_features:
            # loop through the remaining unselected features and calculate MI
            s = len(S) - 1
            feature_mi_matrix[s, F] = mi.get_mi_vector(self, F, s)

            # make decision based on the chosen FS algorithm
            fmm = feature_mi_matrix[:len(S),F]
            if self.method == 'JMI':
                selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))]
            elif self.method == 'JMIM':
                selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))]
            elif self.method == 'MRMR':
                MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0)
                selected = F[bn.nanargmax(MRMR)]

            # record the JMIM of the newly selected feature and add it to S
            S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0)))
            S, F = self._add_remove(S, F, selected)

            # notify user
            if self.verbose > 0:
                self._print_results(S, S_mi)

            # if n_features == 'auto', let's check the S_mi to stop
            if self.n_features == 'auto' and len(S) > 10:
                # smooth the 1st derivative of the MI values of previously sel
                MI_dd = signal.savgol_filter(S_mi[1:],9,2,1)
                # does the mean of the last 5 converge to 0?
                if np.abs(np.mean(MI_dd[-5:])) < 1e-3:

        # ----------------------------------------------------------------------
        # SAVE RESULTS
        # ----------------------------------------------------------------------
        self.n_features_ = len(S)
        self.support_ = np.zeros(p, dtype=np.bool)
        self.support_[S] = 1
        self.ranking_ = S
        self.mi_ = S_mi

        return self
Example #56
def parallel_compute(queue, shmem_buffer, shmem_results, size_x, size_y, len_filelist, operation):
    #queue, shmem_buffer, shmem_results, size_x, size_y, len_filelist = worker_args

    buffer = shmem_as_ndarray(shmem_buffer).reshape((size_x, size_y, len_filelist))
    result_buffer = shmem_as_ndarray(shmem_results).reshape((size_x, size_y))

    while (True):
        cmd_quit, line = queue.get()
        if (cmd_quit):

        if (operation == "median"):
            result_buffer[line,:] = numpy.median(buffer[line,:,:], axis=1)

        elif (operation == "medsigclip"):
            # Do not use (yet), is slow as hell 
            # (maskedarrays are pure python, not C as all the rest)

            #print buffer[line,:,:].shape
            _sigma_plus  = numpy.ones(shape=(buffer.shape[1],buffer.shape[2])) * 1e9
            _sigma_minus = numpy.ones(shape=(buffer.shape[1],buffer.shape[2])) * 1e9
            _median = numpy.median(buffer[line,:,:], axis=1)

            nrep = 3
            valid_pixels = numpy.ma.MaskedArray(buffer[line,:,:])

            for rep in range(nrep):

                _median_2d = _median.reshape(_median.shape[0],1).repeat(buffer.shape[2], axis=1)
                _min = _median_2d - 3 * _sigma_minus
                _max = _median_2d + 3 * _sigma_plus

                #valid_pixels = numpy.ma.masked_inside(buffer[line,:,:], _min, _max)
                valid = (buffer[line,:,:] > _min) & (buffer[line,:,:] < _max)

                valid_pixels = numpy.ma.array(buffer[line,:,:], mask=valid)
                #valid_pixels = numpy.ma.MaskedArray(buffer[line,:,:], valid)

                #print _min.shape, valid.shape, valid_pixels.shape

                #if (numpy.sum(valid, axis=1).any() <= 0):
                #    break

                #_median = numpy.median(buffer[line,:,:][valid], axis=1)
                _median = numpy.median(valid_pixels, axis=1)
                if (rep < nrep-1):
                    #_sigma_plus = scipy.stats.scoreatpercentile(buffer[line,:,:][valid], 84) - _median
                    #_sigma_minus = _median - scipy.stats.scoreatpercentile(buffer[line,:,:][valid], 16) 
                    _sigma_plus = scipy.stats.scoreatpercentile(valid_pixels, 84) - _median
                    _sigma_minus = _median - scipy.stats.scoreatpercentile(valid_pixels, 16) 

            result_buffer[line,:] = _median

        elif (operation == "medclip"):
            intermediate = numpy.sort(buffer[line,:,:], axis=1)
            result_buffer[line,:] = numpy.median(intermediate[:,1:-2], axis=1)

        elif (operation == "min"):
            result_buffer[line,:] = numpy.min(buffer[line,:,:], axis=1)

        elif (operation == "max"):
            result_buffer[line,:] = numpy.max(buffer[line,:,:], axis=1)

        elif (operation == "nanmean"):
            result_buffer[line,:] = scipy.stats.nanmean(buffer[line,:,:], axis=1)

        elif (operation == "nanmedian"):
            #print "nanmedian"
            result_buffer[line,:] = scipy.stats.nanmedian(buffer[line,:,:], axis=1)

        elif (operation == "nanmean.bn"):
            x = numpy.array(buffer[line,:,:], dtype=numpy.float32)
            result_buffer[line,:] = bottleneck.nanmean(x, axis=1)

        elif (operation == "nanmedian.bn"):
            #print "nanmedian"
            x = numpy.array(buffer[line,:,:], dtype=numpy.float32)
            result_buffer[line,:] = bottleneck.nanmedian(x, axis=1)
            #result_buffer[line,:] = scipy.stats.nanmedian(buffer[line,:,:], axis=1)

        elif (operation == "nansum.bn"):
            x = numpy.array(buffer[line,:,:], dtype=numpy.float32)
            result_buffer[line,:] = bottleneck.nansum(x, axis=1)

            result_buffer[line,:] = numpy.mean(buffer[line,:,:], axis=1)             

Example #57
        xvals = np.arange(newweight.shape[1])
        yvals = np.arange(newweight.shape[0])
        X,Y = np.meshgrid(xvals,yvals)
        badx = X[newweight == False]
        bady = Y[newweight == False]
        imagearr[bady,badx,:,i] = np.nan
        #med_image = np.zeros(imagearr[:,:,:,-1].shape)
        #med_image[:,:,0] = bn.nanmedian(imagearr[:,:,0,:i+1],axis=2)
        #med_image[:,:,1] = bn.nanmedian(imagearr[:,:,1,:i+1],axis=2)
        #med_image[:,:,2] = bn.nanmedian(imagearr[:,:,2,:i+1],axis=2)
        avg_image = jointimage*weightimage.reshape(weightimage.shape+(1,))+unwarped_newimage*newweight.reshape(newweight.shape+(1,))
        weightimage += newweight
        avg_image /= weightimage.reshape(weightimage.shape+(1,))
        #Replace the currframe with the avg_image:
        currframe = frame(cap,-1,image=ski_util.img_as_ubyte(avg_image))
        #currframe = frame(cap,-1,image=ski_util.img_as_ubyte(med_image))
    medianed_image = np.zeros(imagearr[:,:,:,0].shape)
    medianed_image[:,:,0] = bn.nanmedian(imagearr[:,:,0,:],axis=2)
    medianed_image[:,:,1] = bn.nanmedian(imagearr[:,:,1,:],axis=2)
    medianed_image[:,:,2] = bn.nanmedian(imagearr[:,:,2,:],axis=2)

    for i in range(len(chronological_order)):
        curridx = chronological_order[i]
        print frameorder[curridx],
        currimage = insert_image(medianed_image,imagearr[:,:,:,curridx],adjustmask=1,overlap=20)

Example #58
        # skyvalue[:,:] = numpy.NaN

        for cx, cy in itertools.product(range(8), repeat=2):

            # Get pixel coord for this cell
            #print binning

            x1,x2,y1,y2 = cell2ota__get_target_region(cx, cy, binning=binning, trimcell=0)
            x21 = (x2-x1)/2

            # extract the mean value in the bottom corner
            corner = bottleneck.nanmean(ext.data[y1:y1+w, x1:x1+w].astype(numpy.float32))

            # also get the value in the bottom center
            center = bottleneck.nanmean(ext.data[y1:y1+w, x1+x21-w/2:x1+x21+w//2].astype(numpy.float32))

            excess = corner - center
            #print ext.name, cx, cy, corner, center, excess
            excesses[cx,cy] = excess

        _mean = bottleneck.nanmean(excesses)
        _median = bottleneck.nanmedian(excesses)

        guideota = (_median > 10*skynoise)
        print(ext.name, _mean, _median, skylevel, guideota)
