Ejemplo n.º 1
0
def experiment(modelname, datasetname, datasetpath, num_trials, alpha, kreg,
               lamda, randomized, n_data_conf, n_data_val, bsz, predictor):
    ### Experiment logic
    naive_bool = predictor == 'Naive'
    if predictor in ['Naive', 'APS']:
        lamda = 0  # No regularization.

    ### Data Loading
    logits = get_logits_dataset(modelname, datasetname, datasetpath)

    ### Instantiate and wrap model
    model = get_model(modelname)

    ### Perform experiment
    top1s = np.zeros((num_trials, ))
    top5s = np.zeros((num_trials, ))
    coverages = np.zeros((num_trials, ))
    sizes = np.zeros((num_trials, ))
    for i in tqdm(range(num_trials)):
        top1_avg, top5_avg, cvg_avg, sz_avg = trial(model, logits, alpha, kreg,
                                                    lamda, randomized,
                                                    n_data_conf, n_data_val,
                                                    bsz, naive_bool)
        top1s[i] = top1_avg
        top5s[i] = top5_avg
        coverages[i] = cvg_avg
        sizes[i] = sz_avg
        print(
            f'\n\tTop1: {np.median(top1s[0:i+1]):.3f}, Top5: {np.median(top5s[0:i+1]):.3f}, Coverage: {np.median(coverages[0:i+1]):.3f}, Size: {np.median(sizes[0:i+1]):.3f}\033[F',
            end='')
    print('')
    return np.median(top1s), np.median(top5s), np.median(coverages), np.median(
        sizes), mad(top1s), mad(top5s), mad(coverages), mad(sizes)
Ejemplo n.º 2
0
def mad_outlier(obj, nmads=3, verbose=False):
    """Outlier detection based on median absolute deviation

    Notes
    -----
    Removes cells with a number of median absolute deviations below
    the median of either of two quality metrics. The quality metrics
    are the log of the library size and the log of number of detected
    genes. The principle is similar to Lun et al. Three mads is the
    default.

    Parameters
    ----------
    obj : :class:`adobo.data.dataset`
        A data class object.
    nmads : `int`
        Number of median absolute deviations below the median for the
        cell to be considered an outlier. Default: 3
    verbose : `bool`
        Be verbose or not. Default: False

    References
    ----------
    .. [1] Lun et al. (2016) F1000Res, A step-by-step workflow for
           low-level analysis of single-cell RNA-seq data with Bioconductor,
           https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5112579/

    Returns
    -------
    Modifies the passed object.
    """
    # reset
    obj.meta_cells.status[obj.meta_cells.status != 'OK'] = 'OK'
    # lib size
    ls = obj.meta_cells.total_reads
    # detected genes
    dg = obj.meta_cells.detected_genes

    ls_log = np.log2(ls + 1)
    dg_log = np.log2(dg + 1)
    # only check below
    lower_ls = np.median(ls_log) - mad(ls_log) * nmads
    lower_dg = np.median(dg_log) - mad(dg_log) * nmads
    remove = np.logical_or(ls_log < lower_ls, dg_log < lower_dg)

    r = obj.meta_cells.index.isin(remove[remove].index)
    obj.meta_cells.status[r] = 'EXCLUDE'

    if verbose:
        print('Removed %s cells' % np.sum(r))
Ejemplo n.º 3
0
def noise_floor(sat_thresh, noi_thresh, power):
    """Computes the noise floor of a rf power array

    Exclude channels with signal above :samp:`sat_thresh` multiplied by :samp:`standard deviation` of power array.
    The Median Absolute Deviation :samp:`MAD` is used to quantify the noise level of the remaining
    channels. The noise floor :samp:`noi_thresh` is defined to be the :samp:`median` of noisy data + :samp:`noi_thresh` multiplied by the
    :samp:`MAD` of noisy data.

    :param sat_thresh: An integer multiple of standard deviation of rf power array, used to exclude channels with potential satellites. :class:`~int`
    :param noi_thresh: An integer multiple of the noisy data MAD, used to compute a noise floor. :class:`~int`
    :param power: Rf power array :class:`~numpy.ndarry`

    :returns:
        noise_threshold: The power level of the noise floor in dBm :class:`~int`

    """

    # compute the standard deviation of data, and use it to identify occupied channels
    σ = np.std(power)

    # Any channel with a max power >= σ has a satellite
    sat_cut = sat_thresh * σ
    chans_pow_max = np.amax(power, axis=0)

    # Exclude the channels with sats, to only have noise data
    noise_chans = np.where(chans_pow_max < sat_cut)[0]
    noise_data = power[:, noise_chans]

    # noise median, noise mad, noise threshold = μ + 3*σ
    μ_noise = np.median(noise_data)
    σ_noise = mad(noise_data, axis=None)
    noise_threshold = μ_noise + noi_thresh * σ_noise

    return noise_threshold
Ejemplo n.º 4
0
def get_gene_stats(xvals, col_idxs, tissues):
    """
    Compute summary stats across all samples for a given gene & tissue
    """

    xmin, xq1, xmed, xmean, xq3, xmax, xsd, xmad = [], [], [], [], [], [], [], []

    for tissue in tissues.keys():
        tidx = [col_idxs[s] for s in tissues[tissue] if s in col_idxs.keys()]
        if len(tidx) > 0:
            tvals = xvals[tidx]
            xmin.append(np.nanmin(tvals))
            xq1.append(np.nanquantile(tvals, q=0.25))
            xmed.append(np.nanmedian(tvals))
            xmean.append(np.nanmean(tvals))
            xq3.append(np.nanquantile(tvals, q=0.75))
            xmax.append(np.nanmax(tvals))
            xsd.append(np.nanstd(tvals))
            xmad.append(mad(tvals))
        else:
            xmin.append(np.nan)
            xq1.append(np.nan)
            xmed.append(np.nan)
            xmean.append(np.nan)
            xq3.append(np.nan)
            xmax.append(np.nan)
            xsd.append(np.nan)
            xmad.append(np.nan)

    return xmin, xq1, xmed, xmean, xq3, xmax, xsd, xmad
Ejemplo n.º 5
0
def mod_zscore(arr):
    """Modified z-score, as defined by Iglewicz and Hoaglin

    :param arr: Array
    :type arr: array-like

    :return: Modified z-scores of the elements of the input array
    :type: ndaray
    """
    return 0.6745 * (np.asarray(arr) - np.median(arr)) / mad(arr)
Ejemplo n.º 6
0
def _estimate_local_noise(x: np.ndarray, robust: bool = True) -> float:
    r"""
    Estimates noise in a 1D signal. Assumes that the noise is gaussian iid.

    aux function of estimate_noise

    Parameters
    ----------
    x : 1D array
        The size of x must be at least 4. If the size is smaller, the function
        will return 0.
    robust : bool
        If True, estimates the noise using the median absolute deviation. Else
        uses the standard deviation.

    Returns
    -------
    noise : non negative number.

    """
    d2x = np.diff(x, n=2)
    sorted_index = np.argsort(np.abs(d2x))
    d2x = d2x[sorted_index]
    # if d2x follows a normal distribution ~ N(0, 2*sigma), its sample mean
    # has a normal distribution ~ N(0,  2 * sigma / sqrt(n - 2)) where n is the
    # size of d2x.
    # d2x with high absolute values are removed until this the mean of d2x is
    # lower than its standard deviation.
    # start at 90th percentile and decrease it in each iteration.
    # The loop stops at the 20th percentile even if this condition is not meet
    n_deviations = 3    # dummy values to initialize the loop
    percentile_counter = 9  # start at 90th percentile
    noise_std = 0
    while (n_deviations > 1.0) and (percentile_counter > 2):
        percentile_index = percentile_counter * d2x.size // 10
        # the minimum number of elements required to compute the MAD
        if percentile_index <= 2:
            break
        # dev_threshold = 2 / np.sqrt(percentile - 2)

        if robust:
            noise_std = mad(d2x[:percentile_index], scale="normal")
            noise_mean = np.median(d2x[:percentile_index])
        else:
            noise_std = d2x[:percentile_index].std()
            noise_mean = d2x[:percentile_index].mean()

        # if all the values in d2x are equal, noise_std is equal to zero
        if noise_std > 0:
            n_deviations = abs(noise_mean / noise_std)
        else:
            break
        percentile_counter -= 1
    noise = noise_std / 2
    return noise
def _match_cats(xr, yr, er, xt, yt, et, errors=False):
    if len(xr) > len(xt):
        xs = np.array(xt)
        ys = np.array(yt)
        es = np.array(et)
        xl = np.array(xr)
        yl = np.array(yr)
        el = np.array(er)
    else:
        xs = np.array(xr)
        ys = np.array(yr)
        es = np.array(er)
        xl = np.array(xt)
        yl = np.array(yt)
        el = np.array(et)

    # simple closest match iterative
    dx = 0.0
    dy = 0.0
    for _i in range(3):
        i_sl = []
        i_ls = []
        r_sl = []
        for i in range(len(xs)):
            r = np.sqrt((xl - xs[i] - dx)**2 + (yl - ys[i] - dy)**2)
            r_sl.append(r.min())
            i_sl.append(i)
            i_ls.append(r.argmin())
        r_sl = np.array(r_sl)
        # combined error, minimum error should be 2
        e_c = np.sqrt(el[i_ls]**2 + es[i_sl]**2).clip(2)

        # only 3sigma matches
        ii = np.where(
            r_sl - np.median(r_sl) < 3.0 * 1.48 * mad(r_sl, axis=None))[0]

        dx = np.sum(
            (xl[i_ls] - xs[i_sl])[ii] / e_c[ii]**2) / np.sum(1.0 / e_c[ii]**2)
        dy = np.sum(
            (yl[i_ls] - ys[i_sl])[ii] / e_c[ii]**2) / np.sum(1.0 / e_c[ii]**2)

    if errors:
        ex = np.sqrt(1.0 / np.sum(1.0 / e_c**2))
        if len(xr) > len(xt):
            return dx, dy, ex, i_ls, i_sl
        else:
            return -dx, -dy, ex, i_sl, i_ls

    if len(xr) > len(xt):
        return dx, dy, i_ls, i_sl
    else:
        return -dx, -dy, i_sl, i_ls
Ejemplo n.º 8
0
def describe_data(data_set):
    """
    basic descriptive statistic calculation
    descr method with added median_absolute_deviation
    :param data_set: pandas data frame
    :return:
    pandas data frame with descriptive stats
    """
    describe_stats = data_set.describe().reset_index()
    describe_stats.loc[8] = ['mad'] + [mad(data_set[f'hist_{i}']) for i in range(27)] + [0]
    describe_stats.set_index('index', inplace=True, drop=True)
    assert isinstance(describe_stats, object)
    return describe_stats
Ejemplo n.º 9
0
 def __getWeight(self, d, weight_type):
     if(weight_type=='uniform'):
         return 1.0
     elif(weight_type=='PCC'):
         return abs(np.corrcoef(self.X_[:,d], self.Y_)[0,1])
     elif(weight_type=='MAD'):
         if(self.types_[d][0]=='B'):
             return (self.X_[:,d]*1.4826).std()
         else:
             weight =  mad(self.X_[:,d])
             if(weight!=0.0):
                 return weight**-1
             else:
                 return (self.X_[:,d]*1.4826).std()
Ejemplo n.º 10
0
def experiment(modelname, datasetname, datasetpath, num_trials, alpha,
               n_data_conf, n_data_val, bsz):
    ### Experiment logic
    ### Data Loading
    logits = get_logits_dataset(modelname, datasetname, datasetpath)

    ### Perform experiment
    top1s = np.zeros((num_trials, ))
    top5s = np.zeros((num_trials, ))
    coverages = np.zeros((num_trials, ))
    sizes = np.zeros((num_trials, ))
    for i in tqdm(range(num_trials)):
        top1_avg, top5_avg, cvg_avg, sz_avg = trial(logits, alpha, n_data_conf,
                                                    n_data_val, bsz)
        top1s[i] = top1_avg
        top5s[i] = top5_avg
        coverages[i] = cvg_avg
        sizes[i] = sz_avg
        print(
            f'\n\tTop1: {np.median(top1s[0:i+1]):.3f}, Top5: {np.median(top5s[0:i+1]):.3f}, Coverage: {np.median(coverages[0:i+1]):.3f}, Size: {np.median(sizes[0:i+1]):.3f}\033[F',
            end='')
    print('')
    return np.median(top1s), np.median(top5s), np.median(coverages), np.median(
        sizes), mad(top1s), mad(top5s), mad(coverages), mad(sizes)
Ejemplo n.º 11
0
def mad_wavelet(image):
    """ image: Median absolute deviation of the first wavelet scale.
    (WARNING: sorry to disapoint, this is not a wavelet for mad scientists)

    Parameters
    ----------
    image: array
        An image or cube of images
    Returns
    -------
    mad: array
        median absolute deviation for each image in the cube
    """
    sigma = mad(Starlet(image, lvl=2).coefficients[:, 0, ...], axis=(-2, -1))
    return sigma
Ejemplo n.º 12
0
def nan_mad(ref_map):
    """Compute mad while ignoring nans"""
    ref_map_mad = []
    for j in ref_map:
        if j != []:
            j = np.asarray(j)
            j = j[~np.isnan(j)]
            ref_map_mad.append(mad(j))
        else:
            ref_map_mad.append(np.nan)

    ref_map_mad = np.asarray(ref_map_mad)
    ref_map_mad[np.where(ref_map_mad == np.nan)] = np.nanmean(ref_map_mad)

    return ref_map_mad
Ejemplo n.º 13
0
 def getFeatureWeight(self, cost_type='uniform'):
     weights = np.ones(self.D_)
     if (cost_type == 'MAD'):
         from scipy.stats import median_abs_deviation as mad
         for d in range(self.D_):
             weight = mad(self.X_[:, d], scale='normal')
             if (self.feature_types_[d] == 'B' or abs(weight) < self.tol_):
                 weights[d] = (self.X_[:, d] * 1.4826).std()
             else:
                 weights[d] = weight**-1
     elif (cost_type == 'standard'):
         weights = np.std(self.X_, axis=0)**-1
     elif (cost_type == 'normalize'):
         weights = (self.X_.max(axis=0) - self.X_.min(axis=0))**-1
     elif (cost_type == 'robust'):
         weights = (np.quantile(self.X_, 0.75, axis=0) -
                    np.quantile(self.X_, 0.25, axis=0))**-1
     return weights
Ejemplo n.º 14
0
def noise_floor(sat_thresh, noi_thresh, data=None):
    """Computes the noise floor of the data """

    # compute the standard deviation of data, and use it to identify occupied channels
    σ = np.std(data)

    # Any channel with a max power >= σ has a satellite
    sat_cut = sat_thresh * σ
    chans_pow_max = np.amax(data, axis=0)

    # Exclude the channels with sats, to only have noise data
    noise_chans = np.where(chans_pow_max < sat_cut)[0]
    noise_data = data[:, noise_chans]

    # noise median, noise mad, noise threshold = μ + 3*σ
    μ_noise = np.median(noise_data)
    σ_noise = mad(noise_data, axis=None)
    noise_threshold = μ_noise + noi_thresh * σ_noise

    return (data, noise_threshold)
Ejemplo n.º 15
0
 def getFeatureWeight(self, cost_type='uniform'):
     weights = np.ones(self.D_)
     if (cost_type == 'MAD'):
         for d in range(self.D_):
             weight = mad(self.X_[:, d], scale='normal')
             if (self.feature_types_[d] == 'B' or abs(weight) < self.tol_):
                 weights[d] = (self.X_[:, d] * 1.4826).std()
             else:
                 weights[d] = weight**-1
     elif (cost_type == 'standard'):
         weights = np.std(self.X_, axis=0)**-1
     elif (cost_type == 'normalize'):
         weights = (self.X_.max(axis=0) - self.X_.min(axis=0))**-1
     elif (cost_type == 'robust'):
         q25, q75 = np.percentile(self.X_, [0.25, 0.75], axis=0)
         for d in range(self.D_):
             if (q75[d] - q25[d] == 0):
                 weights[d] = self.tol_**-1
             else:
                 weights = (q75[d] - q25)**-1
     return weights
Ejemplo n.º 16
0
def nan_mad(good_ref_map):
    """Compute MAD of values in pixel of healpix map while ignoring nans.

    :param good_ref_map: Reference healpix map, output from :func:`~embers.tile_maps.beam_utils.good_ref_maps`

    :returns:
        - ref_map_mad - Median Absolute Deviation of the input healpix map pixels

    """

    ref_map_mad = []
    for j in good_ref_map:
        if j != []:
            j = np.asarray(j)
            j = j[~np.isnan(j)]
            ref_map_mad.append(mad(j))
        else:
            ref_map_mad.append(np.nan)

    ref_map_mad = np.asarray(ref_map_mad)
    ref_map_mad[np.where(ref_map_mad == np.nan)] = np.nanmean(ref_map_mad)

    return ref_map_mad
Ejemplo n.º 17
0
def test_jacobian():

    # Compile the Jacobian
    _a = tt.dscalar()
    _b = tt.dscalar()
    log_jac = theano.function([_a, _b], StarryProcess(a=_a, b=_b).log_jac())

    # Log probability
    def log_prob(p):
        if np.any(p < 0):
            return -np.inf
        elif np.any(p > 1):
            return -np.inf
        else:
            return log_jac(*p)

    # Run the sampler
    ndim, nwalkers, nsteps = 2, 50, 10000
    p0 = np.random.random(size=(nwalkers, ndim))
    sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob)
    sampler.run_mcmc(p0, nsteps)

    # Transform to latitude params
    a, b = sampler.chain.T.reshape(2, -1)
    mu, sigma = beta2gauss(a, b)

    # Compute the 2d histogram
    m1, m2 = 0, 80
    s1, s2 = 0, 45
    hist, _, _ = np.histogram2d(mu, sigma, range=((m1, m2), (s1, s2)))
    hist /= np.max(hist)

    # Check that the variation is less than 10% across the domain
    std = 1.4826 * mad(hist.flatten())
    mean = np.mean(hist.flatten())
    assert std / mean < 0.1
Ejemplo n.º 18
0
def get_desc_stats(l):
    return max(l), min(l), mean(l), median(l), stdev(l), mad(l)
Ejemplo n.º 19
0
def find_bad_channels(inst, picks='eeg',
                      method='correlation',
                      mad_threshold=1,
                      std_threshold=1,
                      r_threshold=0.4,
                      percent_threshold=0.1,
                      time_step=1.0,
                      sfreq=None,
                      return_z_scores=False,
                      channels=None):

    # arguments to be passed to pick_types
    kwargs = {pick: True for pick in [picks]}

    # check that tha input data can be handled by the function
    if isinstance(inst, BaseRaw):
        # only keep data from desired channels
        inst = inst.copy().pick_types(**kwargs)
        dat = inst.get_data() * 1e6  # to microvolt
        channels = inst.ch_names
        sfreq = inst.info['sfreq']
    elif isinstance(inst, np.ndarray):
        dat = inst
        if not channels:
            raise ValueError('If "inst" is not an instance of BaseRaw a list '
                             'of channel names must be provided')
    else:
        raise ValueError('inst must be an instance of BaseRaw or a numpy array')

    # save shape of data
    n_channels, n_samples = dat.shape
    if n_channels != len(channels):
        raise ValueError("Number and channels and data dimensions don't match")

    # make sure method arguments are in a list
    if not isinstance(method, list):
        method = [method]

    # place holder for results
    bad_channels = dict()

    # 1) find channels with zero or near zero activity
    if 'flat' in method:
        # compute estimates of channel activity
        mad_flats = mad(dat, scale=1, axis=1) < mad_threshold
        std_flats = np.std(dat, axis=1) < std_threshold

        # flat channels identified
        flats = np.argwhere(np.logical_or(mad_flats, std_flats))
        flats = np.asarray([channels[int(flat)] for flat in flats])

        # warn user if too many channels were identified as flat
        if flats.shape[0] > (n_channels / 2):
            warnings.warn('Too many channels have been identified as "flat"! '
                          'Make sure the input values in "inst" are provided '
                          'on a volt scale. '
                          'Otherwise try choosing another (meaningful) '
                          'threshold for identification.')

        bad_channels.update(flat=flats)

    # 3) find bad channels by deviation (high variability in amplitude)
    if 'deviation' in method:

        # mean absolute deviation (MAD) scores for each channel
        mad_scores = \
            [mad(dat[i, :], scale=1) for i in range(n_channels)]

        # compute robust z-scores for each channel
        rz_scores = \
            0.6745 * (mad_scores - np.nanmedian(mad_scores)) / mad(mad_scores,
                                                                   scale=1)

        # channels identified by deviation criterion
        bad_deviation = \
            [channels[i] for i in np.where(np.abs(rz_scores) >= 5.0)[0]]

        bad_channels.update(deviation=np.asarray(bad_deviation))

        if return_z_scores:
            bad_channels.update(deviation_z_scores=rz_scores)

    # 3) find channels with low correlation to other channels
    if 'correlation' in method:

        # check that sampling frequency argument was provided
        if not sfreq:
            raise ValueError('If "inst" is not an instance of BaseRaw a '
                             'sampling frequency must be provided. Usually '
                             'the sampling frequency of the EEG recording in'
                             'question.')

        # based on the length of the provided data,
        # determine size and amount of time windows for analyses
        corr_frames = time_step * sfreq
        corr_window = np.arange(corr_frames)

        # sample index (i.e., time offsets) for each window to time window
        # to use for correlation analyis
        corr_offsets = np.arange(1, (n_samples - corr_frames), corr_frames)
        n_corr_steps = corr_offsets.shape[0]
        # place holders for correlation coefficients
        max_r = np.ones((n_channels, n_corr_steps))
        channel_r = np.ones((n_corr_steps, n_channels))

        # create time windows for analysis
        dat_t = np.transpose(dat)
        dat_windowed = np.reshape(
            np.transpose(dat_t[0: corr_window.shape[0] * n_corr_steps, :]),
            (n_channels, corr_window.shape[0], n_corr_steps),
            order="F",)

        # compute (pearson) correlation coefficient across channels
        # (for each channel and analysis time window)
        # take the absolute of the 98th percentile of the correlations with
        # the other channels as a measure of how well that channel is correlated
        # to other channels
        for k in range(0, n_corr_steps):
            eeg_portion = np.transpose(np.squeeze(dat_windowed[:, :, k]))
            window_correlation = np.corrcoef(np.transpose(eeg_portion))
            abs_corr = \
                np.abs(
                    np.subtract(
                        window_correlation, np.diag(np.diag(window_correlation))
                    )
                )
            channel_r[k, :] = np.quantile(abs_corr, 0.98, axis=0)

        # fill in the actual correlations
        max_r[np.arange(0, n_channels), :] = np.transpose(channel_r)

        # check which channels correlate badly with the other channels (i.e.,
        # are below correlation threshold) in a certain fraction of windows
        # (bad_time_threshold)
        thresholded_correlations = max_r < r_threshold
        thresholded_correlations = thresholded_correlations.astype(int)
        frac_bad_corr_windows = np.mean(thresholded_correlations, axis=1)

        # find the corresponding channel names and return
        bad_idxs = np.argwhere(frac_bad_corr_windows > percent_threshold)
        uncorrelated_channels = [channels[int(bad)] for bad in bad_idxs]

        bad_channels.update(correlation=np.asarray(uncorrelated_channels))  # noqa: E501

    return bad_channels
Ejemplo n.º 20
0
def baseline_noise_estimation(y: np.ndarray) -> Tuple[np.ndarray, float]:
    # Noise estimation
    # ----------------
    # if y = s + b + e
    # where s is the peak signal, b is a baseline and e is an error term.
    # some assumptions:
    # 1. s is symmetric. This ensures that the cumulative sum of the
    #     difference is of the peak is ~ 0.
    # 2. e ~ N(0, sigma) iid.
    # 3. The derivative of b, |db/dx| is small. in particular , for two co
    # consecutive points |b[n + 1] - b[n]| << sigma
    #
    # From this we can say that for two consecutive points the following
    # approximation is valid:
    #
    #  dy[n] = y[n + 1] - y[n] ~ s[n + 1] - s[n] + e
    #
    # If there's no peak signal, then:
    #
    # dy[n] ~= e ~ N(0, sqrt(2) * sigma)
    #
    # (The sqrt(2) term comes from adding two independent normal random
    # variables with std = sigma.
    # To remove zones with peaks we use an iterative approach, where we remove
    # the higher 90th percentile of the signal. The noise is computed as the std
    # of the remaining values from dy. The MAD is used as a robust estimator of
    # std. Using this noise value, we find baseline points and using these
    # points we compute a new noise value using the dy values. If the difference
    # is greater than 50 percent, the procedure is repeated, but now using
    # the higher 80th percentile of the signal...
    #
    # Baseline estimation
    # -------------------
    # The points where dy is smaller than three times the noise are considered
    # as baseline. The baseline is then interpolated in the peak zones.

    quantiles = np.linspace(0.1, 0.9, 9)[::-1]
    dy = np.diff(y)
    dy_abs = np.abs(dy)
    noise_found = False
    noise = 0

    for q in quantiles:

        # initial noise estimation
        threshold = np.quantile(y, q)
        noise = mad(dy[y[1:] < threshold]) / np.sqrt(2)

        # prevent noise equal to zero or nan
        if np.isnan(noise) or np.isclose(noise, 0):
            noise = np.finfo(np.float64).eps

        # detect baseline points
        baseline_mask = (dy_abs <= (3 * noise)) & (y[1:] < threshold)
        baseline_index = np.where(baseline_mask)[0] + 1

        # compare the noise value obtained with the index selected as baseline
        new_noise = mad(dy[baseline_index - 1]) / np.sqrt(2)
        dnoise = np.abs(new_noise - noise) / noise

        # checks the difference with the new noise value
        if dnoise <= 0.5:
            noise = new_noise
            baseline_mask = (dy_abs <= (3 * noise)) & (y[1:] < threshold)
            baseline_index = np.where(baseline_mask)[0] + 1
            if baseline_index.size:
                baseline_index = _remove_non_consecutive_index(baseline_index)
                noise_found = True
                break

    # fallback to the noise value using q = 0.25 if there was no convergence
    if (not noise_found) or (baseline_index.size == 0):
        threshold = np.quantile(y, 0.5)
        noise = mad(dy[y[1:] < threshold]) / np.sqrt(2)
        baseline_index = np.where(dy_abs <= (3 * noise))[0] + 1
        # if baseline is still empty, return a constant baseline
        if baseline_index.size == 0:
            noise = max(np.finfo(np.float64).eps, mad(y[y < threshold]))
            baseline = np.ones_like(y) * y.min()
            return baseline, noise

    # append first and last elements if they are not part of the baseline
    # this is a necessary step before interpolation.
    baseline_x, baseline_y = _get_baseline_points(y, baseline_index)

    # interpolate baseline to have the same size as y
    interpolator = interp1d(baseline_x, baseline_y)
    baseline = interpolator(np.arange(y.size))
    return baseline, noise
Ejemplo n.º 21
0
plt.plot(wavd, flux, alpha=0.4, label='raw')
# edge removal
ts = 10
te = -100
wavd = wavd[ts:te]
flux = flux[ts:te]

# outlier
md = medfilt(flux, kernel_size=17)  # IRD/MMF
# md=medfilt(flux,kernel_size=7) #REACH

medf = flux - md
plt.plot(wavd, medf, color='gray', alpha=0.4, label='flux-median_filt')
sn = 5.0

plt.axhline(sn * mad(medf), color='gray', ls='dashed', alpha=0.4)
mask = np.abs(medf - np.median(medf)) < sn * mad(medf)
plt.plot(wavd[mask],
         medf[mask],
         '+',
         color='C5',
         alpha=0.4,
         label='flux-median_filt')

# Wavelength mask
mask = mask * (wavd < 15690.)
###

flux = flux[mask]
wavd = wavd[mask]
plt.plot(wavd, flux, alpha=0.7, color='C2', label='cleaned')
Ejemplo n.º 22
0
Archivo: rfp.py Proyecto: joeduris/rfp
def plot_phase_and_roc(fld, ncar, dgrid, xlamds, kernel_size=11, printQ=False):
    # ipeak = np.argmax(np.sum(np.sum(np.abs(fld)**2,axis=1),axis=1))
    imid = int(ncar / 2)
    dimid = int(ncar * 0.1)
    # ipeak = np.argmax(np.abs(fld[:,imid,imid])**2)
    ipeak = np.argmax(
        np.sum(np.sum(np.abs(fld[:, imid - dimid:imid + dimid,
                                 imid - dimid:imid + dimid])**2,
                      axis=1),
               axis=1))
    xs = dgrid * np.linspace(-1, 1, ncar)
    dx_m = xs[1] - xs[0]
    xs *= 1e6

    powx = rolling_average(np.abs(fld[ipeak, :, imid])**2,
                           kernel_size=kernel_size)
    powy = rolling_average(np.abs(fld[ipeak, imid])**2,
                           kernel_size=kernel_size)
    wx = fwhm(powx)[0] * dx_m
    wy = fwhm(powy)[0] * dx_m
    plt.plot(xs, powx, label='x')
    plt.plot(xs, powy, label='y')
    plt.xlabel('Transverse position (um)')
    plt.ylabel('Slice power (arb.)')
    plt.tight_layout()
    plt.legend()
    plt.show()

    xphase = np.unwrap(np.angle(fld[ipeak, :, imid]))
    yphase = np.unwrap(np.angle(fld[ipeak, imid]))
    # plt.plot(xs,xphase,label='x');
    # plt.plot(xs,yphase);
    yphase = rolling_average(yphase, kernel_size=kernel_size)
    xphase = rolling_average(xphase, kernel_size=kernel_size)
    plt.plot(xs, xphase, label='x')
    plt.plot(xs, yphase, label='y')
    plt.xlabel('Transverse position (um)')
    plt.ylabel('phase (rad)')
    plt.tight_layout()
    plt.legend()
    plt.show()

    phasefactor = -2. * np.pi / xlamds * dx_m**2
    rocy = np.diff(np.diff(yphase))
    rocx = np.diff(np.diff(xphase))
    rocy = rolling_average(rocy, kernel_size=kernel_size)
    rocx = rolling_average(rocx, kernel_size=kernel_size)
    #     plt.plot(xs[1:-1],rocx); plt.plot(xs[1:-1],rocy); plt.show()
    rocy = 1. / phasefactor / rocy
    rocx = 1. / phasefactor / rocx
    plt.plot(xs[1:-1], rocx, label='x')
    plt.plot(xs[1:-1], rocy, label='y')
    rocs = np.reshape([rocy, rocx], -1)
    rocmean = np.median(rocs)
    rocstd = mad(rocs)
    try:
        ylim = np.array([-1, 1]) * rocstd + rocmean
        plt.ylim(ylim)
    except:
        pass
    plt.xlabel('Transverse position (um)')
    plt.ylabel('Radius of curvature (m)')
    plt.tight_layout()
    plt.legend()
    plt.show()

    Rx = rocx[imid]
    Ry = rocy[imid]
    pi = np.pi
    z0x = pi**2 * Rx * wx**4 / (pi**2 * wx**4 + Rx**2 * xlamds**2)
    z0y = pi**2 * Ry * wy**4 / (pi**2 * wy**4 + Ry**2 * xlamds**2)
    w0x = Rx * wx * xlamds / np.sqrt(pi**2 * wx**4 + Rx**2 * xlamds**2)
    w0y = Ry * wy * xlamds / np.sqrt(pi**2 * wy**4 + Ry**2 * xlamds**2)
    zrx = pi * w0x**2 / xlamds
    zry = pi * w0y**2 / xlamds
    #Solve[{R == z (1 + (zr/z)^2), w^2 == w0^2 (1 + (z/zr)^2)} /. zr -> \[Pi] w0^2/\[Lambda]0, {z, w0}]

    zwdic = {
        'Rx': Rx,
        'Ry': Ry,
        'wx': wx,
        'wy': wy,
        'z0x': z0x,
        'z0y': z0y,
        'w0x': w0x,
        'w0y': w0y,
        'zrx': zrx,
        'zry': zry
    }
    if printQ: print(zwdic)

    return zwdic
Ejemplo n.º 23
0
def fitIntegratedIntensity(stack,
                           line,
                           outDir,
                           fwhm=None,
                           maxAbsVel=250 * u.km / u.s,
                           snThreshold=3.0):
    '''
    
    calculate integrated intensity via a gaussian fit

    input: 

        stack: single stack
    
        outDir: output directory for plots and diagnostics
    
        fwhm: fwhm to use for upper limit estimate

        maxAbsVel: maximum velocity at which we expect emission.
    
        snThreshold: S/N threshold for peak finding


    Date        Programmer      Description of Changes
    ----------------------------------------------------------------------
    5/13/2021   A.A. Kepley     Original Code

    '''

    # default is to scale to normal distribution
    from scipy.stats import median_absolute_deviation as mad

    from astropy.modeling import models, fitting
    from scipy import integrate
    from scipy.stats import f

    spectral_axis = stack['spectral_axis']
    stack_profile = stack['stack_profile_' + line]

    chanwidth = spectral_axis[1] - spectral_axis[0]

    lineFreeChans = (spectral_axis > maxAbsVel) | (spectral_axis < -maxAbsVel)

    # mad is already scaled to gaussian distribution
    noisePerChan = mad(stack_profile[lineFreeChans]) * stack_profile.unit

    lineChans = (spectral_axis < maxAbsVel) & (spectral_axis > -maxAbsVel)

    plt.clf()
    fig, myax = plt.subplots(nrows=1, ncols=1, figsize=(8, 6))

    plt.plot(spectral_axis, stack_profile, label='data')
    plt.xlabel('Velocity - ' + spectral_axis.unit.to_string())
    plt.ylabel('Average Intensity - ' + stack_profile.unit.to_string())
    plt.title(stack['galaxy'] + ' ' + line + ' ' + stack['bin_type'] + ' ' +
              stack['bin_mean'].to_string())
    plt.text(0.07,
             0.95,
             "Noise=" + noisePerChan.to_string(),
             transform=myax.transAxes)
    plt.axhspan(-3.0 * noisePerChan.value,
                3.0 * noisePerChan.value,
                color='gray',
                alpha=0.2)

    if np.any(stack_profile[lineChans] > snThreshold * noisePerChan):
        # fit line using Gaussian

        # construct weight vector
        weights = np.ones(len(stack_profile)) / noisePerChan.value

        # start off by fitting one Gassian
        amp_est = max(stack_profile)
        peak_cut = spectral_axis[stack_profile > amp_est * 0.5]
        fwhm_est = max(peak_cut) - min(peak_cut)
        sigma_est = fwhm_est / 2.355
        init_g = models.Gaussian1D(amplitude=amp_est.value,
                                   stddev=sigma_est.value)

        init_g.amplitude.min = 0.0
        init_g.stddev.min = 6.0
        init_g.stddev.max = 200.0

        fit_g = fitting.LevMarLSQFitter()

        result_g = fit_g(init_g,
                         spectral_axis.value,
                         stack_profile.value,
                         weights=weights)

        myax.plot(spectral_axis,
                  result_g(spectral_axis.value),
                  label='1 Gauss')

        # Now fit two Gaussians
        init_g1 = models.Gaussian1D(amplitude=result_g.amplitude / 2.0,
                                    stddev=result_g.stddev / 2.0,
                                    mean=result_g.mean + result_g.stddev)

        init_g1.amplitude.min = 0.0
        init_g1.stddev.min = 6.0
        init_g1.stddev.max = 200.0

        init_g2 = models.Gaussian1D(amplitude=result_g.amplitude / 2.0,
                                    stddev=result_g.stddev / 2.0,
                                    mean=result_g.mean - result_g.stddev)

        init_g2.amplitude.min = 0.0
        init_g2.stddev.min = 6.0
        init_g2.stddev.max = 200.0

        init_g1_g2 = init_g1 + init_g2

        fit_g1_g2 = fitting.LevMarLSQFitter()

        result_g1_g2 = fit_g1_g2(init_g1_g2,
                                 spectral_axis.value,
                                 stack_profile.value,
                                 weights=weights)

        plt.plot(spectral_axis,
                 result_g1_g2(spectral_axis.value),
                 label='2 Gauss')

        # calculate reduced chi-square for each fit.

        chisquare_g, chisquare_r_g = chiSquare(stack_profile.value,
                                               result_g(spectral_axis.value),
                                               1.0 / weights,
                                               nparams=3)
        chisquare_g1_g2, chisquare_r_g1_g2 = chiSquare(
            stack_profile.value,
            result_g1_g2(spectral_axis.value),
            1.0 / weights,
            nparams=6)

        # calculate f-values by taking ratio of chisquare values
        ## TODO: I think this is right. It's the definitely, but the
        ## distribution to compare to is below.
        fval = chisquare_r_g / chisquare_r_g1_g2

        # sf = survival function = 1 - cdf
        # first parameter in f is the difference in the number of degrees
        # of freedom between the two fits (here 6-3). The second is the
        # number of degrees of freedom in the 2nd (2 gaussian
        # fit). Citation wikipedia article on F statistics and
        # regression. It's consistent with the description in Bevington
        # and Robinson.
        pvalue = f.sf(fval, 6 - 3, (len(stack_profile) - 6.0))

        myax.text(0.07,
                  0.9,
                  "Chi_1=" + str(chisquare_r_g),
                  transform=myax.transAxes)
        myax.text(0.07,
                  0.85,
                  "Chi_2=" + str(chisquare_r_g1_g2),
                  transform=myax.transAxes)
        myax.text(0.07, 0.8, "F=" + str(fval), transform=myax.transAxes)
        myax.text(0.07, 0.75, "p=" + str(pvalue), transform=myax.transAxes)

        # For diagnostics purposes.
        # print(chisquare_r_g, chisquare_r_g1_g2,fval,pvalue)

        # calculate integrated intensity from fits. If the pvalue is
        # small, we reject the null hypothesis that the double-gaussian
        # fits as well as a single gaussian.
        if pvalue < 0.05:
            stack_int = integrate.quad(
                result_g1_g2, -maxAbsVel.value,
                maxAbsVel.value)[0] * stack_profile.unit * spectral_axis.unit
            stack_int_err = np.sqrt(
                fwhm_est / chanwidth) * chanwidth * noisePerChan
            fwhm = fwhm_est
            myax.text(0.07, 0.7, 'Best: 2 Gauss', transform=myax.transAxes)
        else:
            stack_int = integrate.quad(
                result_g, -maxAbsVel.value,
                maxAbsVel.value)[0] * stack_profile.unit * spectral_axis.unit
            stack_int_err = np.sqrt(
                fwhm_est / chanwidth) * chanwidth * noisePerChan
            fwhm = fwhm_est

            myax.text(0.07, 0.7, 'Best: 1 Gauss', transform=myax.transAxes)

        uplim = False

    elif fwhm:

        stack_int_err = np.sqrt(fwhm / chanwidth) * chanwidth * noisePerChan

        stack_int = snThreshold * stack_int_err

        uplim = True

    else:
        stack_int = np.nan * spectral_axis.unit * stack_profile.unit
        stack_int_err = np.nan * spectral_axis.unit * stack_profile.unit
        fwhm = np.nan * spectral_axis.unit
        uplim = True

    plt.legend(loc='upper right')
    plotname = stack['galaxy'] + '_' + line + '_' + stack[
        'bin_type'] + '_' + str(stack['bin_mean'].value) + '_fit.png'
    plt.savefig(os.path.join(outDir, plotname))
    plt.close()

    return stack_int, stack_int_err, fwhm, uplim
Ejemplo n.º 24
0
def dho_fit(
    t,
    y,
    yerr,
    init_func=None,
    neg_lp_func=None,
    optimizer_func=None,
    n_opt=20,
    user_bounds=None,
    scipy_opt_kwargs={},
    scipy_opt_options={},
    debug=False,
):
    """
    Fit DHO to time series

    The default settings are optimized for normalized LCs.

    Args:
        t (array(float)): Time stamps of the input time series (the default unit is day).
        y (array(float)): y values of the input time series.
        yerr (array(float)): Measurement errors for y values.
        init_func (object, optional): A user-provided function to generate initial
            guesses for the optimizer. Defaults to None.
        neg_lp_func (object, optional): A user-provided function to compute negative
            probability given an array of parameters, an array of time series values and
            a celerite GP instance. Defaults to None.
        optimizer_func (object, optional): A user-provided optimizer function.
            Defaults to None.
        n_opt (int, optional): Number of optimizers to run.. Defaults to 20.
        user_bounds (list, optional): Parameter boundaries for the default optimizer and
            the default flat prior. Defaults to None.
        scipy_opt_kwargs (dict, optional): Keyword arguments for scipy.optimize.minimize.
            Defaults to {}.
        scipy_opt_options (dict, optional): "options" argument for scipy.optimize.minimize.
            Defaults to {}.
        debug (bool, optional): Turn on/off debug mode. Defaults to False.

    Raises:
        celerite.solver.LinAlgError: For non-positive definite autocovariance matrices.

    Returns:
        array(float): Best-fit DHO parameters
    """

    # determine user defined boundaries if any
    if user_bounds is not None and (len(user_bounds) == 4):
        bounds = user_bounds
    else:
        bounds = [(-15, 15)] * 4
        bounds[2:] = [(a[0] - 8, a[1] - 8) for a in bounds[2:]]

    # re-position/normalize lc
    t = t - t[0]
    y = y - np.median(y)
    y_std = mad(y) * 1.4826
    y = y / y_std
    yerr = yerr / y_std

    # determine negative log probability function
    if neg_lp_func is None:
        neg_lp = partial(neg_lp_flat, bounds=np.array(bounds), mode="param")
    else:
        neg_lp = neg_lp_func

    # initialize parameter, kernel and GP
    kernel = DHO_term(*dho_log_param_init())
    gp = GP(kernel, mean=0)
    gp.compute(t, yerr)

    # determine initialize function
    if init_func is None:
        init = partial(dho_log_param_init)
    else:
        init = init_func

    # determine the optimizer function
    if optimizer_func is None:
        scipy_opt_kwargs.update({"method": "L-BFGS-B", "bounds": bounds})
        opt = partial(
            scipy_opt,
            mode="param",
            opt_kwargs=scipy_opt_kwargs,
            opt_options=scipy_opt_options,
            debug=debug,
        )
    else:
        opt = optimizer_func

    # get best-fit solution & adjust MA params (multiply by y_std)
    best_fit_return = opt(y, gp, init, neg_lp, n_opt)
    best_fit_return[2:] = best_fit_return[2:] * y_std

    return best_fit_return
Ejemplo n.º 25
0
        resultArr[n, :-1] *= 60.0  # gps to gpm

        if args.print:
            printResult(result, model, full=False)
        if args.plot:
            plotResids(result, model, measFlows, testDateString, pp)
            plotScheds(result, model, measFlows, testDateString, pp)

    if args.plot:
        pp.close()

    if args.print:
        print(resultArr)

    meds = np.median(resultArr, axis=0)
    mads = mad(resultArr, axis=0)

    for n in range(len(meds)):
        if flowData.get(activeFlowLabels[n]):
            flowData[activeFlowLabels[n]] = (meds[n], mads[n])

    if args.updateSheet:
        updateSheet(testDate.timestamp(), flowData)

    if args.dataOut:
        # check for existence.  if exists just append line.  if not, put out header line first
        if os.path.exists(args.dataOut):
            df = open(args.dataOut, 'a')
        else:
            df = open(args.dataOut, 'w')
            if args.csv:
Ejemplo n.º 26
0
def good_maps(ref_map):
    """Creates a ref map with only good satellites"""

    pointings = ["0", "2", "4"]

    # load data from map .npz file
    f = Path(f"{map_dir}/{ref_map}")
    tile_data = np.load(f, allow_pickle=True)
    tile_data = {key: tile_data[key].item() for key in tile_data}
    ref_map = tile_data["ref_map"]

    # Good sats from which to make plots
    good_sats = [
        25338,
        25982,
        25984,
        25985,
        28654,
        40086,
        40087,
        40091,
        41179,
        41180,
        41182,
        41183,
        41184,
        41185,
        41187,
        41188,
        41189,
        44387,
    ]

    orbcomm = [
        25982,
        25984,
        25985,
        40086,
        40087,
        40091,
        41179,
        41180,
        41182,
        41183,
        41184,
        41185,
        41187,
        41188,
        41189,
    ]
    noaa = [25338, 28654]

    meteor = [44387]

    sat_types = [orbcomm, noaa, meteor]
    sat_types_names = ["orbcomm", "noaa", "meteor"]

    map_dict = {}

    for index, s_type in enumerate(sat_types):

        # Empty good map
        good_map = [[] for pixel in range(hp.nside2npix(nside))]

        for p in pointings:

            # append to good map from all good sat data
            for sat in s_type:
                for pix in range(hp.nside2npix(nside)):
                    good_map[pix].extend(ref_map[p][sat][pix])

        mad_map = []
        for j in good_map:
            if j != []:
                j = np.asarray(j)
                j = j[~np.isnan(j)]
                mad_map.append(mad(j))
            else:
                mad_map.append(np.nan)

        good_map = [np.nanmedian(pixel) for pixel in good_map]

        map_type = [good_map, mad_map]

        map_dict[sat_types_names[index]] = map_type

    return map_dict
Ejemplo n.º 27
0
def carma_fit(
    t,
    y,
    yerr,
    p,
    q,
    init_func=None,
    neg_lp_func=None,
    optimizer_func=None,
    n_opt=20,
    user_bounds=None,
    scipy_opt_kwargs={},
    scipy_opt_options={},
    debug=False,
):
    """
    Fit an arbitrary CARMA model

    The default settings are optimized for normalized LCs.

    Args:
        t (array(float)): Time stamps of the input time series (the default unit is day).
        y (array(float)): y values of the input time series.
        yerr (array(float)): Measurement errors for y values.
        p (int): The p order of a CARMA(p, q) model.
        q (int): The q order of a CARMA(p, q) model.
        init_func (object, optional): A user-provided function to generate initial
            guesses for the optimizer. Defaults to None.
        neg_lp_func (object, optional): A user-provided function to compute negative
            probability given an array of parameters, an array of time series values and
            a celerite GP instance. Defaults to None.
        optimizer_func (object, optional): A user-provided optimizer function.
            Defaults to None.
        n_opt (int, optional): Number of optimizers to run.
            Defaults to 20.
        user_bounds (array(float), optional): Parameter boundaries for the default
            optimizer. If p > 2, these are boundaries for the coefficients of the
            factored polynomial. Defaults to None.
        scipy_opt_kwargs (dict, optional): Keyword arguments for scipy.optimize.minimize.
            Defaults to {}.
        scipy_opt_options (dict, optional): "options" argument for scipy.optimize.minimize.
            Defaults to {}.
        debug (bool, optional): Turn on/off debug mode. Defaults to False.

    Raises:
        celerite.solver.LinAlgError: For non-positive definite autocovariance matrices.

    Returns:
        array(float): Best-fit parameters
    """
    # set core config
    dim = int(p + q + 1)
    mode = "fcoeff" if p > 2 else "param"

    # init bounds for fitting
    if user_bounds is not None and (len(user_bounds) == dim):
        bounds = user_bounds
    else:
        bounds = [(-15, 15)] * dim
        bounds[p:-1] = [(a[0] - 5, a[1] - 5) for a in bounds[p:-1]]
        bounds[-1] = (-15, 5)

    # re-position lc
    t = t - t[0]
    y = y - np.median(y)
    y_std = mad(y) * 1.4826
    y = y / y_std
    yerr = yerr / y_std

    # initialize parameter and kernel
    ARpars, MApars = sample_carma(p, q)
    kernel = CARMA_term(np.log(ARpars), np.log(MApars))
    gp = GP(kernel, mean=0)
    gp.compute(t, yerr)

    # determine/set init func
    if init_func is not None:
        init = init_func
    else:
        init = partial(carma_log_fcoeff_init, p, q)

    # determine/set negative log probability function
    if neg_lp_func is None:
        neg_lp = partial(neg_lp_flat, bounds=np.array(bounds), mode=mode)
    else:
        neg_lp = neg_lp_func

    # determine/set optimizer function
    if optimizer_func is None:
        scipy_opt_kwargs.update({"method": "L-BFGS-B", "bounds": bounds})
        opt = partial(
            scipy_opt,
            mode=mode,
            opt_kwargs=scipy_opt_kwargs,
            opt_options=scipy_opt_options,
            debug=debug,
        )
    else:
        opt = optimizer_func

    # get best-fit solution & adjust MA params (multiply by y_std)
    best_fit_return = opt(y, gp, init, neg_lp, n_opt)
    best_fit_return[p:] = best_fit_return[p:] * y_std

    return best_fit_return
Ejemplo n.º 28
0
def sumIntegratedIntensity(spectral_axis,
                           stack_profile,
                           fwhm=None,
                           maxAbsVel=250.0 * u.km / u.s,
                           snThreshold=3.0):
    '''
    calculate the straight sum of the integrated intensity.
    
    Date        Programmer      Description of Changes
    ----------------------------------------------------------------------
    5/13/2021   A.A. Kepley     Original Code

    '''

    from astropy.modeling import models, fitting
    from scipy import integrate

    # default is to scale to normal distribution
    from scipy.stats import median_absolute_deviation as mad

    chanwidth = spectral_axis[1] - spectral_axis[0]

    lineFreeChans = (spectral_axis > maxAbsVel) | (spectral_axis < -maxAbsVel)

    # mad is already scaled to gaussian distribution
    noisePerChan = mad(stack_profile[lineFreeChans]) * stack_profile.unit

    lineChans = (spectral_axis < maxAbsVel) & (spectral_axis > -maxAbsVel)

    if np.any(stack_profile[lineChans] > snThreshold * noisePerChan):
        # sum line

        # start off by fitting one Gassian
        amp_est = max(stack_profile)
        peak_cut = spectral_axis[stack_profile > amp_est * 0.5]
        fwhm = max(peak_cut) - min(peak_cut)
        sigma_est = fwhm / 2.355
        init_g = models.Gaussian1D(amplitude=amp_est, stddev=sigma_est)
        fit_g = fitting.LevMarLSQFitter()
        result_g = fit_g(init_g, spectral_axis, stack_profile)

        newLineChans = ((spectral_axis <
                         (result_g.mean + 3.0 * result_g.stddev)) &
                        (spectral_axis >
                         (result_g.mean - 3.0 * result_g.stddev)))

        stack_sum = np.sum(stack_profile[newLineChans] * chanwidth)
        stack_sum_err = np.sqrt(fwhm / chanwidth) * chanwidth * noisePerChan

        uplim = False

    elif fwhm:
        stack_sum_err = np.sqrt(fwhm / chanwidth) * chanwidth * noisePerChan
        stack_sum = snThreshold * stack_sum_err
        uplim = True

    else:
        stack_sum_err = np.nan * stack_profile.unit * spectral_axis.unit
        stack_sum = np.nan * stack_profile.unit * spectral_axis.unit
        fwhm = np.nan * spectral_axis.unit
        uplim = True

    return stack_sum, stack_sum_err, fwhm, uplim
Ejemplo n.º 29
0
        timer = timeit.Timer(stmt=stmt, globals=globals())

        N = timer.autorange()[0]
        if N < 10:
            N *= 10
        vals = timer.repeat(N, 1)
        meas_times[i] = vals
        repeats[i] = N
        size[i] = np.log2(np.prod(grid))

        torch.cuda.empty_cache()
    except RuntimeError as ex:
        print(ex)
        break

# %%

median = np.array([np.median(times) for times in meas_times])
med_ab_dev = np.array([mad(times, scale='normal') for times in meas_times])

tag = COMPUTER + '_' + DEVICE + '_step'
np.savez('data\\' + tag,
         computer=COMPUTER,
         device=DEVICE,
         size=size,
         n_repeats=repeats,
         med=median,
         mad=med_ab_dev)

np.save(ps.paths['data'] + '..\\' + tag, np.array(meas_times, dtype='object'))
ax.set_yticks(np.arange(len(lbls))+.5)
ax.set_yticklabels(np.flipud(np.asarray(lbls)), fontsize="x-small")#plt.savefig(os.path.join(dst, "thal_glm.pdf"), bbox_inches = "tight")
ax.set_ylabel("Neocortical 'trisynaptic' timepoint", fontsize="x-small")
ax.yaxis.set_label_coords(-0.22,0.5)

plt.savefig(os.path.join(dst,"nc_density_at_nc_timepoint.pdf"), bbox_inches = "tight")

#%%
ratio_mean_density = np.array(mean_thal_density_per_brain/mean_nc_density_per_brain)
ratio_std_density = np.array(std_thal_density_per_brain/std_nc_density_per_brain)
#calculate median also
median_thal_density_per_brain = np.median(thal_density_per_brain, axis = 0)
median_nc_density_per_brain = np.median(nc_density_per_brain, axis = 0)
ratio_median_density = np.array(median_thal_density_per_brain/median_nc_density_per_brain)
from scipy.stats import median_absolute_deviation as mad
mad_thal_density_per_brain = mad(thal_density_per_brain, axis = 0)
mad_nc_density_per_brain = mad(nc_density_per_brain, axis = 0)
ratio_mad_density = np.array(mad_thal_density_per_brain/mad_nc_density_per_brain)

import pandas as pd
df = pd.DataFrame()
d = 4 #decimals to round to
df["mean_thal_density"] = np.round(mean_thal_density_per_brain, d)
df["mean_nc_density"] = np.round(mean_nc_density_per_brain, d)
df["std_thal_density"] = np.round(std_thal_density_per_brain, d)
df["std_nc_density"] = np.round(std_nc_density_per_brain, d)
df["median_thal_density"] = np.round(median_thal_density_per_brain, d)
df["median_nc_density"] = np.round(median_nc_density_per_brain, d)
df["mad_thal_density"] = np.round(mad_thal_density_per_brain, d)
df["mad_nc_density"] = np.round(mad_nc_density_per_brain, d)