예제 #1
0
def filter_data(file_names):  # pragma: no cover
    from scipy.stats import median_absolute_deviation
    D = []
    chi2 = []
    dx = []
    amplitude = []
    regs = []
    for name in file_names:
        # try:
        spectrum = Spectrum(name, fast_load=True)
        D.append(spectrum.header["D2CCD"])
        dx.append(spectrum.header["PIXSHIFT"])
        regs.append(np.log10(spectrum.header["PSF_REG"]))
        amplitude.append(np.sum(spectrum.data[300:]))
        if "CHI2_FIT" in spectrum.header:
            chi2.append(spectrum.header["CHI2_FIT"])
        # except:
        #    print(f"fail to open {name}")
    D = np.array(D)
    dx = np.array(dx)
    regs = np.array(regs)
    chi2 = np.array(chi2)
    k = np.arange(len(D))
    plt.plot(k, amplitude)
    plt.show()
    plt.plot(k, D)
    # plt.plot(k, np.polyval(np.polyfit(k, reg, deg=1), k))
    plt.axhline(np.median(D))
    plt.axhline(np.median(D) + 3 * median_absolute_deviation(D))
    plt.axhline(np.median(D) - 3 * median_absolute_deviation(D))
    plt.grid()
    plt.title("D2CCD")
    plt.show()
    filter_indices = np.logical_and(D > np.median(D) - 3 * median_absolute_deviation(D),
                                    D < np.median(D) + 3 * median_absolute_deviation(D))
    if len(chi2) > 0:
        filter_indices *= np.logical_and(chi2 > np.median(chi2) - 3 * median_absolute_deviation(chi2),
                                         chi2 < np.median(chi2) + 3 * median_absolute_deviation(chi2))
    filter_indices *= np.logical_and(dx > np.median(dx) - 3 * median_absolute_deviation(dx),
                                     dx < np.median(dx) + 3 * median_absolute_deviation(dx))
    filter_indices *= np.logical_and(regs > np.median(regs) - 3 * median_absolute_deviation(regs),
                                     regs < np.median(regs) + 3 * median_absolute_deviation(regs))
    plt.plot(k, D)
    plt.title("D2CCD")
    plt.plot(k[filter_indices], D[filter_indices], "ko")
    plt.show()
    plt.plot(k, dx)
    plt.title("dx")
    plt.plot(k[filter_indices], dx[filter_indices], "ko")
    plt.show()
    plt.plot(k, regs)
    plt.title("regs")
    plt.plot(k[filter_indices], regs[filter_indices], "ko")
    plt.show()
    if len(chi2) > 0:
        plt.title("chi2")
        plt.plot(k, chi2)
        plt.plot(k[filter_indices], chi2[filter_indices], "ko")
        plt.show()
    return np.array(file_names)[filter_indices]
 def get_statistics(self, vec):
     expStats = {
         # "position": {
         "N": len(vec),
         "mean": np.mean(vec),
         "median": np.median(vec),
         "q1": np.percentile(vec, 25),
         "q3": np.percentile(vec, 75),
         # },
         # "spread": {
         "range": np.ptp(vec),
         "variance": np.var(vec),
         "std": np.std(vec),
         "iqr": stats.iqr(vec),
         "mad": stats.median_absolute_deviation(vec),
         "cv": stats.variation(vec),
         "mad/median": stats.median_absolute_deviation(vec)/np.median(vec),
         "iqr/median": stats.iqr(vec)/np.median(vec),
         # },
         # "distribution": {
         # "log_histogram": np.histogram(np.log(vec))
         # }
     }
     for stat in expStats:
         expStats[stat] = round(expStats[stat], 2)
     return expStats
예제 #3
0
def getROIs(obj, cobj, filename='brain.mat', atlas='language'):
    roi_names_all = obj.roi.values
    roi_names = np.unique(roi_names_all)
    
    roi_atlas = obj.roi[{'neuroid': [l == atlas for l in obj['atlas'].values]}].values
    roi_atlas_u = np.unique(roi_atlas)

    SPM_dim = (79,95,69)

    # Create empty brain matrix
    brain = np.empty(SPM_dim) # The original data dimensions from that particular subject
    brain[:] = np.nan

    assert set(obj.roi.values)==set(cobj.roi.values)

    d = {}
    for roiID in roi_atlas_u:
        roi = obj[{'neuroid': [roi == roiID for roi in obj['roi'].values]}]
        cobj_roi = cobj[{'neuroid': [roi == roiID for roi in cobj['roi'].values]}]

        unique_atlas = np.unique(roi.atlas.values)

        assert len(unique_atlas) == 1

        # Ceil by median ROI val
        cobj_roi_med = cobj_roi.median().values
        c_vals = roi/cobj_roi_med

        # Not ceiled
        roi_mean = roi.mean().values
        roi_med = roi.median().values

        roi_std = roi.std().values
        roi_sem = roi_std/np.sqrt(len(roi.values))

        roi_mad = stats.median_absolute_deviation(roi)
        roi_mad_m = roi_mad/np.sqrt(len(roi.values))

        # Ceiled
        croi_med = c_vals.median().values
        croi_mad = stats.median_absolute_deviation(c_vals)
        croi_mad_m = croi_mad/np.sqrt(len(c_vals.values))


        # obj = obj[{'neuroid': [roi == roiID for roi in obj['roi'].values]}]/cobj_roi_med

        for idx, element in enumerate(c_vals.values):
            brain[(c_vals.col_to_coord_1.values[idx])-1, (c_vals.col_to_coord_2.values[idx])-1, \
                  (c_vals.col_to_coord_3.values[idx])-1] = element


        d[roiID + '_save'] = [roi_mean, roi_med, roi_std, roi_sem, roi_mad, roi_mad_m, \
                              croi_med, croi_mad, croi_mad_m, cobj_roi_med, 
                              len(c_vals.values), c_vals]

    # Save brain matrix
    sio.savemat(filename, {'brain_matrix':brain})
    
    return d
예제 #4
0
    def sample_trend(self, t, hyper=False):

        t = self.scalers['t'].transform(t)

        s, A = changepoints(t, self.n_changepoints, self.changepoint_range)

        if hyper:

            if self.trend_hierarchical:
            
                m = np.random.normal(self.pe['m_mu'], self.pe['m_sigma'])
                k = np.random.normal(self.pe['k_mu'], self.pe['k_sigma'])
                delta = np.random.laplace(0, self.pe['delta_b'], (A.shape[1], 1))

            else:

                warnings.warn('hyper=True but trend not hierarchical, using MLEs')

                # MLEs for normal distribution
                m_mu = np.mean(self.pe['m'])
                m_sd = np.std(self.pe['m'])
                m = np.random.normal(m_mu, m_sd)

                # MLEs for normal distribution
                k_mu = np.mean(self.pe['k'])
                k_sd = np.std(self.pe['k'])
                k = np.random.normal(k_mu, k_sd)

                # Median and MAD are the MLEs for mu and beta parameter of laplace distribution
                delta_mu = np.median(self.pe['delta'], axis=0)
                delta_b = st.median_absolute_deviation(self.pe['delta'], axis=0)
                delta = np.random.laplace(delta_mu, delta_b, (self.n_changepoints, 1))

        else:

            delta = self.pe['delta'].T

            # Fix dimensions
            m = np.repeat(self.pe['m'][None, :], t.shape[0], axis=0)
            k = np.repeat(self.pe['k'][None, :], t.shape[0], axis=0)

        if any(t > 1):

            # Median and MAD are the MLEs for mu and beta parameter of laplace distribution
            delta_mu = np.median(self.pe['delta'])
            delta_b = st.median_absolute_deviation(np.ravel(self.pe['delta']))

            n_future_changepoints = A.shape[1] - self.n_changepoints
            future_delta = np.random.laplace(delta_mu, delta_b, (n_future_changepoints, delta.shape[1]))
            delta = np.r_[delta, future_delta]

        g_t = m
        g_t += (k + A @ delta) * t[:, None]
        g_t += A @ (-s[:, None] * delta)

        return t, g_t
예제 #5
0
 def tensor_function(self, tensor):
     if self.ignore_value is not None:
         mask = (tensor != self.ignore_value)
         median = np.median(tensor[mask])
         mad = median_absolute_deviation(tensor[mask], axis=None)
         tensor[mask] = ((tensor - median) / (mad + self.eps))[mask]
     else:
         median = np.median(tensor)
         mad = median_absolute_deviation(tensor, axis=None)
         tensor = (tensor - median) / (mad + self.eps)
     if self.min is not None or self.max is not None:
         tensor = np.clip(tensor, a_min=self.min, a_max=self.max)
     return tensor
예제 #6
0
def remove_outliers_based_on_mad(x_data, y_data):
    mad_x = median_absolute_deviation(x_data)
    median_x = np.median(x_data)
    mad_y = median_absolute_deviation(y_data)
    median_y = np.median(y_data)
    filtered_x = []
    filtered_y = []
    for x, y in zip(x_data, y_data):
        if np.fabs((x - median_x) / mad_x) < OUTLIER_THRESHOLD_MAD and np.fabs(
            (y - median_y) / mad_y) < OUTLIER_THRESHOLD_MAD:
            filtered_x.append(x)
            filtered_y.append(y)
    return filtered_x, filtered_y
    def get_statistics(raw_data):
        """
        Return statistics from all the fitness values found after running a metaheuristic several times. The oncoming
        statistics are ``nob`` (number of observations), ``Min`` (minimum), ``Max`` (maximum), ``Avg`` (average),
        ``Std`` (standard deviation), ``Skw`` (skewness), ``Kur`` (kurtosis), ``IQR`` (interquartile range),
        ``Med`` (median), and ``MAD`` (Median absolute deviation).

        :param list raw_data:
            List of the fitness values.
        :return: dict
        """
        # Get descriptive statistics
        dst = st.describe(raw_data)

        # Store statistics
        return dict(nob=dst.nobs,
                    Min=dst.minmax[0],
                    Max=dst.minmax[1],
                    Avg=dst.mean,
                    Std=np.std(raw_data),
                    Skw=dst.skewness,
                    Kur=dst.kurtosis,
                    IQR=st.iqr(raw_data),
                    Med=np.median(raw_data),
                    MAD=st.median_absolute_deviation(raw_data))
예제 #8
0
def extract_features(train_sequences):
	root = np.apply_along_axis(cal_sq_root, 2, train_sequences)
	train_sequences = np.insert(train_sequences,-1,root,axis = 2)	#add m dimension m = sqrt(x^2, y^2, z^2)
	frequency_domain = np.fft.fft(train_sequences, axis=1)	#changing in to frequency domain
	frequency_domain = np.absolute(frequency_domain)	#taking absolute to remove complex numbers
	#features from frequency_domain
	kur = kurtosis(frequency_domain, axis = 1)			#kutosis 
	integral = np.trapz(frequency_domain, axis = 1)		#taking integration (trapezodial) 
	skewness = skew(frequency_domain, axis = 1)			#skewness
	min_fd = np.min(frequency_domain, axis = 1)			#minimum
	max_fd = np.max(frequency_domain, axis = 1)			#maximum
	min_max_sum_fd = np.sum([min_fd, max_fd],axis= 0)	#minimum maximum sum
	var_fd = np.var(frequency_domain, axis=1)			#variance
	mean_fd  = np.mean(frequency_domain, axis=1)		#mean
	min_max_sub_fd = np.subtract(max_fd,min_fd)			#minimum maximum subtract
	#features from time_domain
	var= np.var(train_sequences, axis=1)				#variance
	mean = np.mean(train_sequences, axis=1)				#mean
	min = np.min(train_sequences, axis = 1)				#minimum
	max = np.max(train_sequences, axis = 1)				#maximum
	min_max_sum = np.sum([min, max],axis= 0)			#minimum maximum sum
	qr = iqr(train_sequences, axis = 1)					#inter quartile range
	mad = median_absolute_deviation(train_sequences, axis = 1)#mean absolute deviation
	min_max_sub = np.subtract(max,min)					#minimum maximum subtract
	
	feature = np.concatenate((var,mean,min,max,min_max_sum, qr, mad, min_max_sub, kur, integral, skewness, min_fd, max_fd, min_max_sum_fd, var_fd, mean_fd, min_max_sub_fd), axis=1)							#concat features
	return feature
예제 #9
0
def stat_summarizer(figure):
    avg_perf = np.nanmean(figure)
    min_perf = np.nanmin(figure)
    q1_perf = np.nanquantile(figure, 0.25)
    med_perf = np.nanmedian(figure)
    q3_perf = np.nanquantile(figure, 0.75)
    max_perf = np.nanmax(figure)
    stdev = np.nanstd(figure)
    medianabdev = stats.median_absolute_deviation(figure, nan_policy='omit')
    sharpe = avg_perf / stdev
    sharpemad = med_perf / medianabdev

    finaldict = {
        'avg_perf': avg_perf,
        'med_perf': med_perf,
        'stdev': stdev,
        'medianabdev': medianabdev,
        'min_perf': min_perf,
        'max_perf': max_perf,
        'q1_perf': q1_perf,
        'q3_perf': q3_perf,
        'sharpe': sharpe,
        'sharpemad': sharpemad
    }

    return finaldict
예제 #10
0
def stat_summarizer_old(figure):
    avg_perf = np.nanmean(figure)
    min_perf = np.nanmin(figure)
    q1_perf = np.nanquantile(figure, 0.25)
    med_perf = np.nanmedian(figure)
    q3_perf = np.nanquantile(figure, 0.75)
    max_perf = np.nanmax(figure)
    iqr_perf = q3_perf - q1_perf
    max_min = max_perf - min_perf
    maxq3 = max_perf - q3_perf
    q1min = q1_perf - min_perf
    stdev = np.nanstd(figure)
    medianabdev = stats.median_absolute_deviation(figure, nan_policy='omit')

    finaldict = {
        'avg_perf': avg_perf,
        'min_perf': min_perf,
        'q1_perf': q1_perf,
        'med_perf': med_perf,
        'q3_perf': q3_perf,
        'max_perf': max_perf,
        'iqr_perf': iqr_perf,
        'max_min': max_min,
        'maxq3': maxq3,
        'q1min': q1min,
        'stdev': stdev,
        'medianabdev': medianabdev
    }

    return finaldict
예제 #11
0
def t4_rule(dataframe, df_quote=None):

    if df_quote is not None:
        # Do nothing #
        print('Part not yet done. Please remove the quote data')
    else:

        np_price = dataframe['price'].to_numpy()
        roll = rolling_window(np_price, 51)
        roll = np.insert(roll, [0] * 25, roll[0], axis=0)
        roll = np.insert(roll, [-1] * 25, roll[-1], axis=0)
        roll = np.insert(roll, 0, np.arange(len(roll)), axis=1)

        dat = np.apply_along_axis(roll_delete, 1, roll, len(roll))

        # Calc median #
        med_list = np.median(dat, -1)
        # Calc mad #
        mad_list = stats.median_absolute_deviation(dat, -1)

        # Add to dataframe #
        dataframe['median'] = pd.Series(med_list, index=dataframe.index)
        dataframe['mad'] = pd.Series(mad_list, index=dataframe.index)

        # Output data #
        condition = (dataframe['price'] <= (dataframe['median'] + 5*dataframe['mad'])) & \
                    (dataframe['price'] >= (dataframe['median'] - 5*dataframe['mad']))
        dat_out = dataframe[condition]

    return dat_out
예제 #12
0
 def get_all_feature(self, inputs):
     inputs = np.array(inputs)
     # 最小值
     min = np.min(inputs)
     # 最大值
     max = np.max(inputs)
     # 均值
     mean = np.mean(inputs)
     # 中值
     median = np.median(inputs)
     # 中值绝对偏差
     mad = stats.median_absolute_deviation(inputs)
     # 标准差
     std = np.std(inputs, ddof=1)
     # 偏度
     skew = stats.skew(inputs)
     # 峰度
     kurtosis = stats.kurtosis(inputs)
     # 四分位数范围
     iqr = stats.iqr(inputs)
     # 能量度量
     energy = self.energy(inputs)
     # FFT变换
     process = np.abs(fft(inputs)) / len(inputs) / 2
     # 频域偏度系数
     wskew = stats.skew(process)
     # 频域峰度系数
     wkurtosis = stats.kurtosis(process)
     # 将所有特征合并为数组
     array = [
         min, max, mean, median, mad, std, skew, kurtosis, iqr, energy,
         wskew, wkurtosis
     ]
     return array
예제 #13
0
 def div_signal(self, data_in):
     div_means = np.zeros(self.num_divs)
     div_len = int(np.floor(data_in.size / self.num_divs))
     for i in range(self.num_divs):
         div_means[i] = stats.median_absolute_deviation(
             data_in[i * div_len:(i + 1) * div_len])
     return div_means
예제 #14
0
def remove_outliers_based_on_mad(feature):
    mad = median_absolute_deviation(feature)
    median = np.median(feature)
    return [
        i for i in feature
        if np.fabs((i - median) / mad) < OUTLIER_THRESHOLD_MAD
    ]
def calculate_statistics(list_values):
    coefficient_of_Variation = scipy.stats.variation(list_values)
    inter_quartile_range = scipy.stats.iqr(list_values)
    kstat = scipy.stats.kstat(list_values)
    standard_error_of_mean = stats.sem(list_values)
    median_absolute_deviation = stats.median_absolute_deviation(list_values)
    return coefficient_of_Variation, inter_quartile_range, kstat, standard_error_of_mean, median_absolute_deviation
예제 #16
0
    def advance(self, delt):

        self.save()
        self.row = self.row + delt
        print(self.row)
        if self.row < 1:
            self.row = self.nRows
        if self.row > self.nRows:
            self.row = 1
        self.setSpec()

        self.fitAll()
        self.fitResiduals()

        dW = self.lines_fit['dW'] - getpolyfit(self.lines_fit['wave'],
                                               self.poly)
        sigma_robust = stats.median_absolute_deviation(dW)
        print('robust_sigma = {}'.format(sigma_robust))
        index = np.where(np.abs(dW) > 5.0 * sigma_robust)
        self.lines_fit['a'][index] = 0.0
        self.lines_fit['b'][index] = 0.0
        self.lines_fit['wave_fit'][index] = 0.0
        self.lines_fit['amplitude'][index] = 0.0
        self.lines_fit['sigma'][index] = 0.0
        self.fitResiduals()

        #print('{}/{}'.format(self.row, self.nRows))
        self.draw()
예제 #17
0
def robust_std(im, method='biweight'):
    # get robust stdev
    # Method can be biweight or mad, otherwise just normal stdev
    # im must be numpy array
    from astropy.stats import biweight_midvariance
    from scipy.stats import median_absolute_deviation
    if method == 'biweight':
        var = biweight_midvariance(im, axis=None, ignore_nan=True)
        std = np.sqrt(var)
    elif method == 'mad':
        std = median_absolute_deviation(im, axis=None, nan_policy='omit')
    else:
        std = np.nanstd(im, axis=None)

    # From Mike's code:
    """
	m = np.nanmedian(im) # median value
	d = im - m       # deviation
	ad = np.abs(d)      # absolute deviation
	mad = np.nanmedian(ad) # median absolute deviation
	if mad == 0: std = 0. # no deviation -> zero stdev
	else:
		wt = biweight(d/1.483/mad) # weights
		sum_wt = wt.sum()
		sum_wt2 = (wt**2).sum()
		m = (im*wt).sum() / sum_wt # weighted mean
		d = im-m # deviation from weighted mean
		var = (d**2 * wt).sum() / (sum_wt-sum_wt2/sum_wt) # weighted var
		std = n.sqrt(var) # weighted stdev
	"""
    return std
예제 #18
0
def cal_numerical(target_df_1, numeric_feature, numerical_df):
    '''
    Calculate metrices for numerical features
        including counts, missing values, Median and MAD, range/scaling
    '''

    # get counts of non NA values
    count_log = target_df_1[numeric_feature].count()
    numerical_df.loc[numeric_feature, 'count'] = count_log

    # get missing value counts
    missing_count_log = target_df_1[numeric_feature].isna().sum()
    numerical_df.loc[numeric_feature, 'missing_count'] = missing_count_log

    # distribution
    # Median and MAD
    median_log = target_df_1[numeric_feature].median()
    numerical_df.loc[numeric_feature, 'median'] = median_log
    if missing_count_log == 0:
        mad_log = stats.median_absolute_deviation(target_df_1[numeric_feature])
        numerical_df.loc[numeric_feature, 'mad'] = mad_log
    else:
        numerical_df.loc[numeric_feature, 'mad'] = 0

    # range/ scaling
    range_log = target_df_1[numeric_feature].max(
    ) - target_df_1[numeric_feature].min()
    numerical_df.loc[numeric_feature, 'range'] = range_log

    return numerical_df
예제 #19
0
def testing():
    with open(
            '/braintree/home/msch/.result_caching/neural_nlp.score/'
            'benchmark=Pereira2018-encoding,model=gpt2-xl,subsample=None.pkl',
            'rb') as f:
        ceiled_score = pickle.load(f)['data']
        best_layer = ceiled_score.sel(aggregation='center').argmax('layer')
        ceiled_score = ceiled_score.isel(layer=best_layer.values)
    # overview
    ceiled_center, ceiled_error = ceiled_score.sel(
        aggregation='center'), ceiled_score.sel(aggregation='error')
    print(f"ceiled: {ceiled_center.values:.2f}-+{ceiled_error.values:.2f}")
    unceiled_score = ceiled_score.raw
    unceiled_center, unceiled_error = unceiled_score.sel(
        aggregation='center'), unceiled_score.sel(aggregation='error')
    print(
        f"unceiled: {unceiled_center.values:.2f}-+{unceiled_error.values:.2f}")
    ceiling_score = ceiled_score.ceiling
    ceiling_center = ceiling_score.sel(aggregation='center').values
    print(f"ceiling: {ceiling_center:.2f}-+["
          f"{ceiling_score.sel(aggregation='error_low').values:.2f},"
          f"{ceiling_score.sel(aggregation='error_high').values:.2f}]")
    # reproduce
    raw = unceiled_score.raw
    subject_scores = raw.groupby('subject').median('neuroid')
    repr_center, repr_error = subject_scores.median(
    ), standard_error_of_the_mean(subject_scores, dim='subject')
    repr_center, repr_error = repr_center / ceiling_center, repr_error / ceiling_center
    print(f"reproduce: {repr_center.values:.2f}-+{repr_error.values:.2f}")
    # MAD
    mad_error = median_absolute_deviation(subject_scores.values)
    mad_error /= ceiling_center
    print(f"MAD: {repr_center.values:.2f}-+{mad_error:.2f}")
예제 #20
0
파일: math.py 프로젝트: wcfiore/your
def smad_plotter(freq_time, sigma=5.0, clip=True):
    """
    spectal Median Absolute Deviation clipper
   
    Args:
        
        freq_time: the frequency time data

        sigma (float): sigma at which to clip data

        clip (bool): if true replaces clips the data else replaces it with zeroes

    Returns:

        np.ndarray: clipped/flagged data
    """
    medians = np.median(freq_time, axis=0)
    sigs = 1.4826 * sigma * stats.median_absolute_deviation(freq_time, axis=0)
    if clip:
        return np.clip(freq_time, a_min=medians - sigs, a_max=medians + sigs)
    else:
        for j, sig in enumerate(sigs):
            freq_time[np.absolute(freq_time[:, j] - medians[j]) >= sig,
                      j] = 0.0
        return freq_time
예제 #21
0
    def calibrate(self, win_len = 0.5, win_overlap = 0.5, k=5):
        
        if self.x_c is None:
            print("First use clean_windows() or set_clean_windows() to set calibration data")
            return
        
        # Calculation of covariance matrix.
        cov_x = np.cov(self.x_c)
        l1_mean = geometric_median(cov_x.reshape((-1, self.n_channels * self.n_channels)))
        C = l1_mean.reshape((self.n_channels,self.n_channels))
        self.mixing = sqrtm(np.real(C))
        evals, evecs = np.linalg.eig(self.mixing)  # compute PCA
        indx = np.argsort(evals)  # sort in ascending
        evecs = evecs[:, indx]
        
        # Projection of the data into component space.
        y_c = np.dot(evecs.T, self.x_c)

        # Calculation of mean and std.dev of RMS values accross win_len second windows for each component i.
        n_samples = y_c.shape[1]
        win_samples= int(win_len * self.sf)
        offsets = np.int_(np.arange(0, n_samples - win_samples, np.round(win_samples * (1 - win_overlap))))

        rms_scores=[]
        for o in offsets:
            rms = np.sqrt(y_c[:,o:o+win_samples] ** 2).mean(axis=1)
            rms_scores.append(rms)
        
        #Determine threshold per component
        #Use median it's more robust
        sig= median_absolute_deviation(rms_scores,axis = 0)
        mu = np.median(rms_scores,axis = 0)
        self.threshold = mu + k * sig
        self.threshold = np.diag(self.threshold.dot(np.transpose(evecs)))
예제 #22
0
    def get_noise(self, method="default", rmbkgd=True):
        """ get an estimation of the image's noise

        Parameters
        ----------
        method: [string/None] -optional-
            - None/default: become sep if a sourcebackground has been loaded, nmad otherwise.
            - nmad: get the median absolute deviation of self.data
            - sep: (float) global scatter estimated by sep (python Sextractor), i.e. rms for background subs image
            - std: (float) estimated as half of the counts difference between the 16 and 84 percentiles

        rmbkgd: [bool]
            // ignored if method != std //
            shall the std method be measured on background subtraced image ?

        Return
        ------
        float (see method)
        """
        if method is None or method in ["default"]:
            method = "sep" if hasattr(self,"_sourcebackground") else "nmad"

        if method in ["nmad"]:
            from scipy import stats
            return stats.median_absolute_deviation(self.data[~np.isnan(self.data)])

        if method in ["std","16-84","84-16"]:
            data_ = self.get_data(rmbkgd=rmbkgd, applymask=True, alltrue=True)
            lowersigma,upsigma = np.percentile(data_[data_==data_], [16,84]) # clean nans out
            return 0.5*(upsigma-lowersigma)

        if method in ["sep","sextractor", "globalrms"]:
            return self.sourcebackground.globalrms

        raise NotImplementedError(f"method {method} has not been implemented. Use: 'std'")
예제 #23
0
    def fold(self, pos, accept_lim=0.2, spread=0.1):
        """ Fold low acceptance walkers into main distribution

        At the end of the burn-in, some walkers appear stuck with low
        acceptance fraction. These can be selected using a threshold, and
        folded back into the main distribution, estimated based on the median
        of the walkers with an acceptance fraction above the threshold.

        The stuck walkers are relocated with multivariate Gaussian, with mean
        equal to the median of the high acceptancew walkers, and a standard
        deviation equal to the median absolute deviation of these.

        Parameters
        ----------
        pos : array
            The final position of the walkers after the burn-in phase.
        accept_lim: float
            The value below which walkers will be labelled as bad and/or hence
            stuck.
        """
        idx = self.sampler.acceptance_fraction < accept_lim
        nbad = np.shape(pos[idx, :])[0]
        if nbad > 0:
            flatchains = self.sampler.chain[~idx, :, :].reshape(
                (-1, self.ndim))
            good_med = np.median(flatchains, axis=0)
            good_mad = st.median_absolute_deviation(flatchains,
                                                    axis=0) * spread
            pos[idx, :] = np.array([
                np.random.randn(self.ndim) * good_mad + good_med
                for n in range(nbad)
            ])
        return pos
예제 #24
0
def q4_rule(dataframe):
        
    # Add Midquote and spread to the dataframe #
    dataframe['midquote'] = (dataframe['ofr'] + dataframe['bid'])/2
    dataframe['spread'] = (dataframe['ofr'] - dataframe['bid'])
    
    np_price = dataframe['midquote'].to_numpy()
    # print(len(np_price))
    roll = rolling_window(np_price, 51)
    roll = np.insert(roll, [0]*25, roll[0], axis = 0)
    roll = np.insert(roll, [-1]*25, roll[-1], axis = 0)
    roll = np.insert(roll, 0, np.arange(len(roll)), axis = 1)
    
    dat = np.apply_along_axis(roll_delete, 1, roll, len(roll))
    
    # Calc median #
    med_list = np.median(dat, -1)
    # Calc mad #
    mad_list = stats.median_absolute_deviation(dat, -1)
    
    # Add to dataframe #
    dataframe['median'] = pd.Series(med_list, index = dataframe.index)
    dataframe['mad'] = pd.Series(mad_list, index = dataframe.index)
    
    # Output data #
    condition = (dataframe['midquote'] <= (dataframe['median'] + 5*dataframe['mad'])) & \
                (dataframe['midquote'] >= (dataframe['median'] - 5*dataframe['mad']))
    dat_out = dataframe[condition]
    
    return dat_out
예제 #25
0
    def calc_rowwise_medmaxmad(self, column=''):
        """
        Compute median, maximum, and median absolute devation from an array of values
        specified by the string of the input column name and add columns to hold the results.
        Input values might be from a filtered raster of iceberg pixel drafts or a series of measurements.
        
        Parameters
        ---------
        column: str, default ''
            Column name on which to compute median, maximum, and median absolute deviation
        """

        req_cols = [
            column
        ]  # e.g. 'draft' for iceberg water depths, 'depth' for measured depths
        self._validate(self._gdf, req_cols)

        for key in [column + '_med', column + '_max', column + '_mad']:
            try:
                self._gdf[key]
            except KeyError:
                self._gdf[key] = float

        for datarow in self._gdf.itertuples(index=True, name='Pandas'):
            indata = datarow[self._gdf.columns.get_loc(column) +
                             1]  #needs the +1 because an index column is added
            self._gdf.at[datarow.Index, column + '_med'] = np.nanmedian(indata)
            self._gdf.at[datarow.Index, column + '_max'] = np.nanmax(indata)
            self._gdf.at[datarow.Index,
                         column + '_mad'] = stats.median_absolute_deviation(
                             indata, nan_policy='omit')

        # set type for column (since default is now object)
        for key in [column + '_med', column + '_max', column + '_mad']:
            self._gdf[str(key)] = self._gdf[str(key)].astype('float64')
예제 #26
0
파일: gmug.py 프로젝트: cjhopp/scripts
def cassm_clock_correct(gmug_tr,
                        vbox_tr,
                        trig_tr,
                        which=0,
                        debug=0,
                        name=None):
    """
    Find first CASSM shot in common and use cross correlation to estimate
    the clock offset between the two systems.

    Will be done for each GMuG file, but not each Vibbox file

    :param gmug_st: Trace of B81 on gmug
    :param vbox_st: Trace of B81 on vbox
    :param trig_tr: Trace of the CASSM trigger
    :param which: 0 for first or -1 for last trigger
    :param debug: Debug flag for correlation plot
    :param name: Name of output h5 file for plot naming if debug > 0

    :return:
    """
    # Use derivative of PPS signal to find pulse start
    dt = np.diff(trig_tr.data)
    # Use 70 * MAD threshold
    samp_to_trig = np.where(
        dt > np.mean(dt) + 70 * median_absolute_deviation(dt))[0][which]
    trig1_time = vbox_tr.stats.starttime + (float(samp_to_trig) /
                                            vbox_tr.stats.sampling_rate)
    print('    Trigger: {}'.format(trig1_time))
    cc_vbox = vbox_tr.copy().trim(trig1_time,
                                  endtime=trig1_time + 0.01).detrend('demean')
    cc_gmug = gmug_tr.copy().trim(trig1_time,
                                  endtime=trig1_time + 0.2).detrend('demean')
    print('        Vbox {}--{}'.format(vbox_tr.stats.starttime,
                                       vbox_tr.stats.endtime))
    print('        GMuG {}--{}'.format(gmug_tr.stats.starttime,
                                       gmug_tr.stats.endtime))
    try:
        cc_gmug.resample(cc_vbox.stats.sampling_rate)
    except AttributeError as e:  # Outside range of gmug waveform
        return 0., np.array([0.0]), UTCDateTime()
    ccc = normxcorr2(cc_vbox.data, cc_gmug.data)
    max_cc = np.argmax(ccc[0])
    max_cc_sec = float(max_cc) / cc_vbox.stats.sampling_rate
    if debug > 0:
        fig, axes = plt.subplots(nrows=2)
        vbox_x = np.arange(start=max_cc, stop=max_cc + cc_vbox.data.shape[0])
        axes[0].plot(cc_gmug.data / np.max(cc_gmug.data),
                     color='k',
                     linewidth=0.7)
        axes[1].axvline(x=max_cc, linestyle=':', color='gray')
        axes[0].axvline(x=max_cc, linestyle=':', color='gray')
        axes[0].plot(vbox_x,
                     cc_vbox.data / np.max(cc_vbox.data),
                     color='r',
                     linewidth=0.7)
        axes[1].plot(ccc[0], color='b', linewidth=0.7)
        plt.savefig(name.replace('.h5', 'time_cc.png'))
        plt.close('all')
    return max_cc_sec, ccc, trig1_time
예제 #27
0
def calculate_IQR(data, column_name):
    Q1 = data[column_name].quantile(0.25)
    Q3 = data[column_name].quantile(0.75)
    IQR = Q3 - Q1
    if (IQR == 0):
        IQR = stats.median_absolute_deviation(data[column_name].values,
                                              scale=1)
    return IQR
예제 #28
0
def robust_scale(x):
    if rpy2 is None:
        raise ImportError("bw_SJr requires rpy2 which is not installed.")
    stats = importr("stats")
    base = importr("base")
    return (x - np.median(x)) / (
        scipystats.median_absolute_deviation(x) + np.finfo(float).eps
    )
def detect_peaks_one_median(x, n=3):
    median = np.median(x)
    mad = median_absolute_deviation(x)
    x_clean = np.copy(x)
    x_clean[abs(x_clean) > abs(median + n * mad)] = np.random.uniform(
        median - n * mad, median + n * mad,
        len(x_clean[abs(x_clean) > abs(median + n * mad)]))
    return x_clean, (median - n * mad, median + n * mad)
예제 #30
0
def ZRscore_outlier(df):
    med = np.median(df)
    ma = stats.median_absolute_deviation(df)
    for i in df: 
        z = (0.6745*(i-med))/ (np.median(ma))
        if np.abs(z) > 3: 
            out.append(i)
    print("Outliers:",out)