Example #1
0
 def orderByFoldChange(self, bidirection=True):
     Raw = self.canopus.Quant
     Quant = Quant = self.canopus.QuantNormalized
     pseudoCount = np.percentile(Quant.where(Quant > 0).stack().values, 0.1)
     A = self.conditionLeft.samples
     B = self.conditionRight.samples
     entries = np.array(
         list(self.__enoughEntries(Raw, A) | self.__enoughEntries(Raw, B)))
     entries = np.array(list(set(Raw.index[entries]) & set(Quant.index)))
     Quant = Quant.loc[entries, :]
     fold_changes = (trim_mean(Quant.loc[:, A], 0.1, axis=1) +
                     pseudoCount) / (trim_mean(Quant.loc[:, B], 0.1, axis=1)
                                     + pseudoCount)
     W = np.log10(fold_changes)
     if self.binning:
         binsize = 0.5 if self.binning == True else self.binning
         scale = 1.0 / binsize
         W = np.round(W * scale) / scale
     if bidirection:
         W = np.abs(W)
     table = pd.DataFrame(
         dict(compound=Quant.index,
              weight=W,
              fold_change=fold_changes,
              category=self.__assign_specific_class__(Quant.index)))
     table.sort_values(by="weight", ascending=False, inplace=True)
     table.set_index("compound", drop=True, inplace=True)
     #table[table.weight < 1] = 0.0 # we do not trust the lower values anyways
     self.ordering = table
Example #2
0
def extraMyfeatures(epochs,channelList,epoch_length,lower_threshold=0.4,higher_threshold=3.4,l_freq=11,h_freq=16):
    """
    Types of features decribed in: https://osf.io/aqgxe/ paper
    1. root-mean-square of a segment
    2. peak frequency power
    3. peak frequency
    """
    full_prop=[] 
    data = epochs.get_data()       
    for d in data:    
        temp_p=[]
        #fig,ax = plt.subplots(nrows=2,ncols=3,figsize=(8,8))
        for ii,(name) in enumerate(zip(channelList)):#,ax.flatten())):
            rms = window_rms(d[ii,:],epochs.info['sfreq'])
            l = stats.trim_mean(rms,0.05) + lower_threshold * trimmed_std(rms,0.05)
            h = stats.trim_mean(rms,0.05) + higher_threshold * trimmed_std(rms,0.05)
            prop = (sum(rms>l)+sum(rms<h))/(sum(rms<h) - sum(rms<l))
            if np.isinf(prop):# if the denominator is zero, don't divide it
                prop = (sum(rms>l)+sum(rms<h))
            temp_p.append(prop)
        full_prop.append(temp_p)
    full_prop = np.array(full_prop)
    psds,freq = mne.time_frequency.psd_multitaper(epochs,fmin=l_freq,fmax=h_freq,tmin=0,tmax=epoch_length,low_bias=True,n_jobs=3)
    psds = 10* np.log10(psds)
    features = np.concatenate((full_prop,psds.max(2),freq[np.argmax(psds,2)]),1)
    return features
Example #3
0
def trimci(x, tr=.2, alpha=.05, null_value=0):

    """
    Compute a 1-alpha confidence interval for the trimmed mean
    The default amount of trimming is tr=.2

    :param x: 1-D array
    :param tr:
    :param alpha:
    :param null_value: The p-value returned by this function is based on the value
        specified by the argument null_value, which defaults to 0
    :return:
    """

    x=x[~np.isnan(x)]
    se = np.sqrt(winvar(x, tr)) / ((1 - 2 * tr) * np.sqrt(len(x)))
    trimci_res = np.zeros(2)
    df = len(x) - 2 * np.floor(tr * len(x)) - 1
    trimci_res[0] = trim_mean(x, tr) - t.ppf(1 - alpha / 2, df) * se
    trimci_res[1] = trim_mean(x, tr) + t.ppf(1 - alpha / 2, df) * se
    test = (trim_mean(x, tr) - null_value) / se
    sig = 2 * (1 - t.cdf(abs(test), df))

    results={"ci": trimci_res, "estimate": trim_mean(x,tr),
             "test_stat": test, "se": se, "p_value": sig,
             "n": len(x)}

    return results
Example #4
0
def compute_motion_compensation(motion_matrix, stab_mode,
                                trimmed_mean_percentage):
    if stab_mode == 'mean':
        u = motion_matrix[:, :, 0].mean()
        v = motion_matrix[:, :, 1].mean()
    elif stab_mode == 'trimmed_mean':
        u = trim_mean(motion_matrix[:, :, 0],
                      trimmed_mean_percentage,
                      axis=None)
        v = trim_mean(motion_matrix[:, :, 1],
                      trimmed_mean_percentage,
                      axis=None)
    elif stab_mode == 'median':
        u, v = np.median(motion_matrix[:, :, 0]), np.median(motion_matrix[:, :,
                                                                          1])
    elif stab_mode == 'mode':
        us, vs = cv2.cartToPolar(motion_matrix[:, :, 0], motion_matrix[:, :,
                                                                       1])
        mu, mv = mode(us.ravel())[0], mode(vs.ravel())[0]
        u, v = cv2.polarToCart(mu, mv)
        u, v = u[0][0], v[0][0]
    else:
        raise NotImplemented(
            "Choose one of implemented modes: mean, trimmed_mean, median")
    return u, v
Example #5
0
    def by_samples(cls, wrenches, proportion_to_cut=0.1):
        """
        Construct the wrench by sampled data, allowing to filter.

        Parameters
        ----------
        wrenches : list of :class:`Wrench`
            List of wrenches.
        proportion_to_cut : :obj:`float`
            Fraction to cut off of both tails of the distribution

        Returns
        -------
        Wrench
            The mean wrench after trimming distribution from both tails.

        Examples
        --------
        >>> w1 = Wrench([1, 1, 1], [.1,.1,.1])
        >>> w2 = Wrench([2, 2, 2], [.2,.2,.2])
        >>> w3 = Wrench([3, 3, 3], [.3,.3,.3])
        >>> w = Wrench.by_samples([w1, w2, w3])
        >>> print(w)
        Wrench(Vector(2.000, 2.000, 2.000), Vector(0.200, 0.200, 0.200))
        """
        if not stats:
            raise NotImplementedError("Not supported on this platform")

        forces = [w.force for w in wrenches]
        torques = [w.torque for w in wrenches]
        force = stats.trim_mean(forces, proportion_to_cut, axis=0).tolist()
        torque = stats.trim_mean(torques, proportion_to_cut, axis=0).tolist()
        return cls(force, torque)
Example #6
0
def main():
   #a. Read in the data set
   df = pd.read_csv(data_file)

   #b. Replacing all occurrences of missing data with NaN
   df.replace('.', np.NaN, inplace = True) 

   #Compute the mean and the median for column "lowbwt"
   #drop rows if there are any NaN
   arr = pd.to_numeric(df.dropna(axis=0)['lowbwt']).values
   mean = np.mean(arr) 
   median = np.median(arr) 
   tmean = stats.trim_mean(arr, 0.05)
   print(mean, median, tmean)

   #drop rows if there are any NaN
   arr = pd.to_numeric(df.dropna(axis=0)['life60']).values
   mean = np.mean(arr) 
   median = np.median(arr) 
   tmean = stats.trim_mean(arr, 0.05)
   print(mean, median, tmean)

   #drop rows if there are any NaN
   arr = pd.to_numeric(df.dropna(axis=0)['life92']).values
   mean = np.mean(arr) 
   median = np.median(arr) 
   tmean = stats.trim_mean(arr, 0.05)
   print(mean, median, tmean)
def find_peaks(raw,channelList,windowSize,threshold,hh,result):         
    ms = np.zeros([6,2])
    RMS = np.zeros((len(channelList),raw._data[0,:].shape[0]))
    idx_left = [0,2,4]
    idx_right = [1,3,5]
    for ii,name in enumerate(channelList):
        segment,_ = raw[ii,:]
        RMS[ii,:] = eegPinelineDesign.window_rms(segment[0,:],windowSize) 
        mph = trim_mean(RMS[ii,100000:-30000],0.05) + threshold * eegPinelineDesign.trimmed_std(RMS[ii,100000:-30000],0.05) 
        mpl = trim_mean(RMS[ii,100000:-30000],0.05) + hh * eegPinelineDesign.trimmed_std(RMS[ii,100000:-30000],0.05)
        ms[ii,:] = [mph, mpl]
    peaks = []
    for time_stamp,duration in zip(result.Onset,result.Duration):
        start, stop = time_stamp - duration, time_stamp + duration
        start_, stop_ = raw.time_as_index([start,stop])
        segment,times = raw[:,start_:stop_]
        temp = []
        for ii, name in enumerate(channelList):
            info = mne.create_info([name],raw.info['sfreq'])
            E = mne.EvokedArray(segment[ii,:].reshape(1,-1),info)
            _,peak = E.get_peak(mode='pos')
            
            temporal_mark = np.argmin(abs(times-(peak+start)<0.01))
            temp.append(segment[ii,temporal_mark])
        peaks.append(temp)
    peaks = np.array(peaks)
    #peaks = peaks / peaks.std(0)
    return peaks[:,idx_left], peaks[:,idx_right]
def calculate_central_tendency(dataframe, neutralise_categories, category1, metadata, mode, zeros, with_authors, print_ = True):
    """
    Calculates central tendencies of a table
    """
    if print_ == True:
        print("central tendency:",mode,zeros,with_authors)

    if with_authors == "with authors":
        dataframe_values = dataframe.values
        dataframe_values = np.reshape(dataframe_values,-1)

        if zeros == "without zeros":
            dataframe_values = np.trim_zeros(np.sort(dataframe_values))

        if mode == "median":
            tendency = dataframe.stack().median()
        elif mode == "mean":
            tendency = dataframe.stack().mean()
        elif mode == "trimming mean":
            tendency = (stats.trim_mean(dataframe, proportiontocut = 0.1))
            

    elif with_authors == "without authors":
        dataframe_without_category = np.empty([0, ])

        #TODO: a better way for that without roating it?
        dataframe = dataframe.rename(lambda x: x +"_"+ metadata.loc[x,category1])
        dataframe = dataframe.T         
        dataframe = dataframe.rename(lambda x: x +"_"+ metadata.loc[x,category1])
        dataframe = dataframe.T
        
        # For each author
        for neutralise_category1 in neutralise_categories:
            # For each author
            for neutralise_category2 in neutralise_categories:
                # If they are not equal
                if neutralise_category1 == neutralise_category2:
                    pass
                else:
                    # Creamos índices para sacar los dataframes de cada autor
                    category_columns = dataframe.columns.to_series().str.endswith("_"+neutralise_category1)
                    category_rows = dataframe.index.to_series().str.endswith("_"+neutralise_category2)

                    # We take the values
                    values_without_category = dataframe.loc[category_rows,category_columns].values
                    # Numbers are 
                    values_without_category = np.reshape(values_without_category,-1)
                    dataframe_without_category = np.concatenate((dataframe_without_category,values_without_category),axis=0)

            if mode == "median":
                tendency = (np.median(dataframe_without_category))
            elif mode == "mean":
                tendency = (np.mean(dataframe_without_category))
            elif mode == "trimming mean":
                tendency = (stats.trim_mean(dataframe_without_category, proportiontocut = 0.1))
                

    #print("\n\n",tendency)    
    return tendency
Example #9
0
def lineplot_radio(f):
	#TIEMPOS_EN_C.txt TIEMPOS_EN_ASM.txt RADIO
	tests_c = []
	tests_asm = []
	radio_sizes = []

	fobj = open(f, 'r')
	for line in fobj:
		words = line.split(' ')
		tests_c.append(words[0])
		tests_asm.append(words[1])
		radio_sizes.append(words[2].rstrip('\n'))
	fobj.close()

	buffer_c = []
	for file in tests_c:
		times_list = fileTolist(file)
		buffer_c.append(times_list)
	
	#Normalizo
	for i in xrange(len(radio_sizes)):
		buffer_c[i] = map((lambda x: x/256**2), buffer_c[i])

	cMeans = []
	cStd = []
	for xs in buffer_c:
		cMeans.append(trim_mean(xs, 0.25))
		cStd.append(np.std(xs))

	buffer_asm = []
	for file in tests_asm:
		times_list = fileTolist(file)
		buffer_asm.append(times_list)
	
	#Normalizo
	for i in xrange(len(radio_sizes)):
		buffer_asm[i] = map((lambda x: x/256**2), buffer_asm[i])

	asmMeans = []
	asmStd = []
	for xs in buffer_asm:
		asmMeans.append(trim_mean(xs, 0.25))
		asmStd.append(np.std(xs))


	fig, ax = plt.subplots()

	plt.plot(radio_sizes, cMeans, 'ro')
	rects1 = ax.errorbar(radio_sizes, cMeans, yerr=cStd)

	plt.plot(radio_sizes, asmMeans, 'ro')
	rects2 = ax.errorbar(radio_sizes, asmMeans, yerr=asmStd)

	ax.set_ylabel('#ticks/pixel')
	ax.set_title(u'Blur C vs Blur ASM en función del tamaño del radio')
	ax.set_xlabel(u'Radio')
	ax.legend( (rects1[0], rects2[0]), ('C', 'ASM'), loc=2 )

	plt.savefig('lineplot_radio.pdf')
Example #10
0
 def trimmed_mean(self, alpha):
     if all(is_scalar(x) for x in self):
         return stats.trim_mean(self, alpha)
     elif get_dimension(self) > 0:
         return tuple(stats.trim_mean(self, alpha, axis=0))
     else:
         raise Exception(
             "I don't know how to take the trimmed_mean of these values.")
Example #11
0
 def fix_linesearch(i, x):
     _r = dv.region(i)
     x[dv.diffusivity_indices(i)] = min(
         x[dv.diffusivity_indices(i)],
         trim_mean(x[dv.diffusivity_indices(_r)], .25))
     x[dv.potential_indices(i)] = min(
         x[dv.potential_indices(i)],
         trim_mean(x[dv.potential_indices(_r)], .25))
Example #12
0
def lineplot_blur(f):
	tests_c = []
	tests_asm = []
	img_sizes = []

	fobj = open(f, 'r')
	for line in fobj:
		words = line.split(' ')
		tests_c.append(words[0])
		tests_asm.append(words[1])
		img_sizes.append(words[2].rstrip('\n'))
	fobj.close()

	buffer_c = []
	for file in tests_c:
		times_list = fileTolist(file)
		buffer_c.append(times_list)
	
	#Normalizo
	for i in xrange(len(img_sizes)):
		buffer_c[i] = map((lambda x: x/(float(img_sizes[i])**2)), buffer_c[i])

	cMeans = []
	cStd = []
	for xs in buffer_c:
		cMeans.append(trim_mean(xs, 0.25))
		cStd.append(np.std(xs))

	buffer_asm = []
	for file in tests_asm:
		times_list = fileTolist(file)
		buffer_asm.append(times_list)
	
	#Normalizo
	for i in xrange(len(img_sizes)):
		buffer_asm[i] = map((lambda x: x/(float(img_sizes[i])**2)), buffer_asm[i])

	asmMeans = []
	asmStd = []
	for xs in buffer_asm:
		asmMeans.append(trim_mean(xs, 0.25))
		asmStd.append(np.std(xs))


	fig, ax = plt.subplots()

	plt.plot(img_sizes, cMeans, 'ro')
	rects1 = ax.errorbar(img_sizes, cMeans, yerr=cStd)

	plt.plot(img_sizes, asmMeans, 'ro')
	rects2 = ax.errorbar(img_sizes, asmMeans, yerr=asmStd)

	ax.set_ylabel('#ticks/pixel')
	ax.set_title(u'Blur C vs Blur ASM')
	ax.set_xlabel(u'Ancho de imagen')
	ax.legend( (rects1[0], rects2[0]), ('C', 'ASM'), loc=2 )

	plt.savefig('blur_lineplot.pdf')
Example #13
0
 def liquidity(df_one):
     stom, stoq, stoa = [None] * 3
     if df_one[-21:][df_one>0].size>=(21*0.5):
         stom = np.log(trim_mean(df_one[-21:][df_one>0],0.1))
     if df_one[-63:][df_one>0].size>=(63*0.5):
         stoq = np.log(trim_mean(df_one[-63:][df_one>0],0.1))
     if df_one[df_one>0].size>=(252*0.5):
         stoa = np.log(trim_mean(df_one[df_one>0],0.1))
     return {'code': df_one.name, 'stom': stom, 'stoq': stoq, 'stoa': stoa}
Example #14
0
def trimmed_mean(values, axis=1):
    '''Returns the trimmed mean of each row of a matrix'''
    if isinstance(values, _sparse.csr_matrix):
        ret = _st.trim_mean(values, proportiontocut=.25, axis=axis)
        return ret
    else:
        return _st.trim_mean(_np.asmatrix(values),
                             proportiontocut=.25,
                             axis=axis)
def psuedo_rms(lower_threshold, higher_threshold,signal,sample_size=500):
    from scipy.stats import trim_mean
    rms = window_rms(signal,sample_size)
    l = trim_mean(rms,0.05) + lower_threshold * trimmed_std(rms,0.05)
    h = trim_mean(rms,0.05) + higher_threshold* trimmed_std(rms,0.05)
    prop = (sum(rms>l)+sum(rms<h))/(sum(rms<h) - sum(rms<l))
    if np.isinf(prop):
        prop = (sum(rms>l)+sum(rms<h))
    return prop
Example #16
0
def make_meta_epochs(epochs, y, n_bin=100):
    from scipy.stats import trim_mean
    from mne.epochs import EpochsArray
    meta_data = list()  # EEG data
    meta_y = list()  # regressors
    n = len(epochs)

    # make continuous y into bins to become categorical
    if len(np.unique(y)) < n_bin:
        hist, bin_edge = np.histogram(y, n_bin)
        y_ = y
        for low, high in zip(bin_edge[:-1], bin_edge[1:]):
            sel = np.where((y >= low) & (y < high))[0]
            y_[sel] = .5 * (high + low)
        y = y_

    # if discrete and few categories
    if len(np.unique(y)) < n_bin:
        already_used = list()
        for this_y in np.unique(y):
            for ii in range(n / len(np.unique(y)) / n_bin):
                sel = np.where(y == this_y)[0]
                sel = [ii for ii in sel if ii not in already_used][:n_bin]
                if not len(sel):
                    continue
                meta_data.append(
                    trim_mean(epochs._data[sel, :, :], .05, axis=0))
                meta_y.append(this_y)
                already_used += sel
    else:
        hist, bin_edge = np.histogram(y, n_bin)
        for low, high in zip(bin_edge[:-1], bin_edge[1:]):
            sel = np.where((y >= low) & (y < high))[0]
            this_y = .5 * (high + low)
            if not len(sel):
                continue
            meta_data.append(trim_mean(epochs._data[sel, :, :], .05, axis=0))
            meta_y.append(this_y)

    events = np.vstack(
        (np.zeros(len(meta_y)), np.zeros(len(meta_y)), meta_y)).T
    events = np.array(np.round(events), int)

    # transform into epochs
    new_epochs = EpochsArray(meta_data,
                             epochs.info,
                             events=events,
                             verbose=False)
    new_epochs.events = np.array(new_epochs.events, float)
    new_epochs.events[:, 2] = meta_y

    # XXX why change time and sfreq?
    new_epochs.times = epochs.times
    new_epochs.info['sfreq'] = epochs.info['sfreq']
    return new_epochs
Example #17
0
def yuen(x, y, tr=.2, alpha=.05):

    """
   Perform Yuen's test for trimmed means on the data in x and y.
   The default amount of trimming is 20%
   Missing values are automatically removed.

   A confidence interval for the trimmed mean of x minus the
   the trimmed mean of y is computed and returned in yuen['ci'].
   The p-value is returned in yuen['p_value']

   x, y: The data for the two groups are stored in x and y
   tr=.2: indicates that the default amount of trimming is .2
          tr=0 results in using the sample mean

   For an omnibus test with more than two independent groups,
   use t1way (may not be implemented yet).

    :param x:
    :param y:
    :param tr:
    :param alpha:
    :return:
    """

    if tr ==.5:
        raise Exception("Using tr=.5 is not allowed; use a method designed for medians "
                        "(they may not be implemented yet")
    if tr>.25:
        raise Warning("with tr>.25 type I error control might be poor")

    x=x[~np.isnan(x)]
    y=y[~np.isnan(y)]

    h1 = len(x) - 2 * np.floor(tr * len(x))
    h2 = len(y) - 2 * np.floor(tr * len(y))
    q1 = (len(x) - 1) * winvar(x, tr) / (h1 * (h1 - 1))
    q2 = (len(y) - 1) * winvar(y, tr) / (h2 * (h2 - 1))
    df = (q1 + q2) ** 2 / ((q1 ** 2 / (h1 - 1)) + (q2 ** 2 / (h2 - 1)))
    crit = t.ppf(1 - alpha / 2, df)
    dif = trim_mean(x, tr) - trim_mean(y, tr)
    low = dif - crit * np.sqrt(q1 + q2)
    up = dif + crit * np.sqrt(q1 + q2)
    test = abs(dif / np.sqrt(q1 + q2))
    yuen_results = 2 * (1 - t.cdf(test, df))


    results={'n1': len(x), 'n2': len(y),
             'est_1': trim_mean(x, tr), 'est_2': trim_mean(y, tr),
             'ci': [low, up], 'p_value': yuen_results,
             'dif': dif, 'se': np.sqrt(q1 + q2),
             'test_stat': test, 'crit': crit,
             'df': df}

    return results
def impdivsborrows(M, S, K, T, C, P, r, D, Td):
    cout.info("Deriving borrows")
    bor = -1 / (T / base) * log(
        (C - P + K * exp(-r * (T / base)) + D * exp(-r * (Td / base))) / S)
    yld = -1 / (T / base) * log((C - P + K * exp(-r * (T / base))) / S)
    idx = np.where(M > 1)[0]
    bor = np.take(bor, idx, axis=0)
    bor = stats.trim_mean(bor, 0.01)
    yld = np.take(yld, idx, axis=0)
    yld = stats.trim_mean(yld, 0.01)
    return np.nan_to_num(bor), np.nan_to_num(yld)
Example #19
0
def yuend(x, y, tr=.2, alpha=.05):

    """
     Compare the trimmed means of two dependent random variables
     using the data in x and y.
     The default amount of trimming is 20%

     Any pair with a missing value is eliminated

     A confidence interval for the trimmed mean of x minus the
     the trimmed mean of y is computed and returned in yuend['ci'].
     The significance level is returned in yuend['p_value']

     For inferences based on difference scores, use trimci

    :param x:
    :param y:
    :param tr:
    :param alpha:
    :return:
    """

    from hypothesize.measuring_associations import wincor

    if type(x) is not np.ndarray:
        x, y=pandas_to_arrays([x, y])

    m = np.c_[x, y] # cbind
    m = m[~np.isnan(m).any(axis=1)]
    x = m[:,0]
    y = m[:, 1]

    h1 = len(x) - 2 * np.floor(tr * len(x))
    q1 = (len(x) - 1) * winvar(x, tr)
    q2 = (len(y) - 1) * winvar(y, tr)
    q3 = (len(x) - 1) * wincor(x, y, tr)['wcov']

    df = h1 - 1
    se = np.sqrt((q1 + q2 - 2 * q3) / (h1 * (h1 - 1)))
    crit = t.ppf(1 - alpha / 2, df)
    dif = trim_mean(x, tr) - trim_mean(y, tr)
    low = dif - crit * se
    up = dif + crit * se
    test = dif / se
    yuend_res = 2 * (1 - t.cdf(abs(test), df))

    keys=['ci', 'p_value', 'est1', 'est2',
          'dif', 'se', 'teststat', 'n', 'df']

    vals=[[low, up], yuend_res, trim_mean(x,tr), trim_mean(x,tr),
          dif, se, test, len(x), df]

    return dict(zip(keys,vals))
Example #20
0
def make_meta_epochs(epochs, y, n_bin=100):
    from scipy.stats import trim_mean
    from mne.epochs import EpochsArray
    meta_data = list()  # EEG data
    meta_y = list()  # regressors
    n = len(epochs)

    # make continuous y into bins to become categorical
    if len(np.unique(y)) < n_bin:
        hist, bin_edge = np.histogram(y, n_bin)
        y_ = y
        for low, high in zip(bin_edge[:-1], bin_edge[1:]):
            sel = np.where((y >= low) & (y < high))[0]
            y_[sel] = .5 * (high + low)
        y = y_

    # if discrete and few categories
    if len(np.unique(y)) < n_bin:
        already_used = list()
        for this_y in np.unique(y):
            for ii in range(n / len(np.unique(y)) / n_bin):
                sel = np.where(y == this_y)[0]
                sel = [ii for ii in sel if ii not in already_used][:n_bin]
                if not len(sel):
                    continue
                meta_data.append(trim_mean(epochs._data[sel, :, :], .05,
                                           axis=0))
                meta_y.append(this_y)
                already_used += sel
    else:
        hist, bin_edge = np.histogram(y, n_bin)
        for low, high in zip(bin_edge[:-1], bin_edge[1:]):
            sel = np.where((y >= low) & (y < high))[0]
            this_y = .5 * (high + low)
            if not len(sel):
                continue
            meta_data.append(trim_mean(epochs._data[sel, :, :], .05, axis=0))
            meta_y.append(this_y)

    events = np.vstack((np.zeros(len(meta_y)),
                        np.zeros(len(meta_y)), meta_y)).T
    events = np.array(np.round(events), int)

    # transform into epochs
    new_epochs = EpochsArray(meta_data, epochs.info, events=events,
                             verbose=False)
    new_epochs.events = np.array(new_epochs.events, float)
    new_epochs.events[:, 2] = meta_y

    # XXX why change time and sfreq?
    new_epochs.times = epochs.times
    new_epochs.info['sfreq'] = epochs.info['sfreq']
    return new_epochs
Example #21
0
def orderByAbsoluteDifference(Quant, group1, group2):
    group1 = re.compile(group1)
    group2 = re.compile(group2)
    A = [m for m in Quant.columns if group1.match(m)]
    B = [m for m in Quant.columns if group2.match(m)]
    fold_changes = trim_mean(Quant.loc[:, A], 0.1, axis=1) - trim_mean(
        Quant.loc[:, B], 0.1, axis=1)
    table = pd.DataFrame(
        dict(compound=Quant.index,
             weight=np.abs(fold_changes),
             difference=fold_changes))
    table.sort_values(by="weight", ascending=False, inplace=True)
    return table
Example #22
0
def trimvar(x, trimming):
    """
    computes the trimmed variance of array x .
    Input :
        x : input data as numpy array
        trimming, float : trimming percentage to be used
     
    Output:
        The trimmed variance of x.
    
    """
    # division by n
    return (sps.trim_mean(np.square(x - sps.trim_mean(x, trimming)), trimming))
Example #23
0
def barplot_diff(f):

	tests_c = []
	tests_asm = []
	img_sizes = []

	fobj = open(f, 'r')
	for line in fobj:
		words = line.split(' ')
		tests_c.append(fileTolist(words[0]))
		tests_asm.append(fileTolist(words[1]))
		img_sizes.append(words[2].rstrip('\n'))
	fobj.close()

	cMeans = [trim_mean(x, 0.25) for x in tests_c]
	cStd = [np.std(x) for x in tests_c]

	asmMeans = [trim_mean(x, 0.25) for x in tests_asm]
	asmStd = [np.std(x) for x in tests_asm]

	N = len(img_sizes)

	ind = np.arange(N)  # the x locations for the groups
	width = 0.35       # the width of the bars

	fig, ax = plt.subplots()
	rects1 = ax.bar(ind, cMeans, width, color='r', yerr=cStd)

	rects2 = ax.bar(ind+width, asmMeans, width, color='y', yerr=asmStd)

	# add some text for labels, title and axes ticks
	ax.set_ylabel('#ticks')
	ax.set_title(u'Diff C vs Diff ASM')
	ax.set_xticks(ind+width)
	ax.set_xlabel(u'Ancho de imagen')
	ax.set_xticklabels( img_sizes )

	ax.legend( (rects1[0], rects2[0]), ('C', 'ASM'), loc=2 )

	def autolabel(rects):
	  # attach some text labels
	  for rect in rects:
	      height = rect.get_height()
	      ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%.1f'%(round(height,2)),
	              ha='center', va='bottom')

	# autolabel(rects1)
	# autolabel(rects2)

	plt.savefig('barplot.diff.c.vs.asm.pdf')
Example #24
0
def std(X, trimming=0):
    """
    Column-wise standard devaition or trimmed std. 
    Trimming to be entered as fraction. 
    """

    if trimming == 0:
        s = np.power(np.var(X, axis=0), .5)
        s = np.array(s).reshape(-1)
    else:
        var = sps.trim_mean(np.square(X - sps.trim_mean(X, trimming, 0)),
                            trimming, 0)
        s = np.sqrt(var)
    return s
Example #25
0
def orderByFoldChange(Quant, group1, group2):
    pseudoCount = np.percentile(Quant.where(Quant > 0).stack().values, 1)
    group1 = re.compile(group1)
    group2 = re.compile(group2)
    A = [m for m in Quant.columns if group1.match(m)]
    B = [m for m in Quant.columns if group2.match(m)]
    fold_changes = (trim_mean(Quant.loc[:, A], 0.1, axis=1) + pseudoCount) / (
        trim_mean(Quant.loc[:, B], 0.1, axis=1) + pseudoCount)
    table = pd.DataFrame(
        dict(compound=Quant.index,
             weight=np.abs(np.log10(fold_changes)),
             fold_change=fold_changes))
    table.sort_values(by="weight", ascending=False, inplace=True)
    return t
Example #26
0
def compute_tmean(data, mode="function", frac=0.1):
    if mode == "worker":
        ret = []
        for vs in data.values():
            vs = list(vs.values())
            ret.append(scstats.trim_mean(vs, frac))
        return np.mean(ret)
    else:
        data = invert_dict(data)
        ret = []
        for vs in data.values():
            vs = list(vs.values())
            ret.extend(vs)
        return scstats.trim_mean(ret, frac)
Example #27
0
    def trimmed_mean(self, alpha):
        self._set_array()
        if self.dim == 1:
            return Scalar(stats.trim_mean(self.array, alpha))
        elif self.dim is not None:
            return Vector(stats.trim_mean(self.array, alpha, axis=0))
        elif self.index_set is not None:

            def fn(t):
                return self[t].trimmed_mean(alpha)

            return TimeFunction.from_index_set(self.index_set, fn)
        else:
            raise Exception(
                "I don't know how to take the trimmed_mean of these values.")
def plot_company_vs_comparables(company,
                                comparables,
                                metric_obtainer,
                                metric_name,
                                axs,
                                subplot,
                                trim_mean_by=0.1):
    metric_comparables = [
        x for x in list(map(metric_obtainer, comparables)) if x
    ]
    plot_val_vs_industry(company.ticker, round(metric_obtainer(company), 2),
                         round(trim_mean(metric_comparables, trim_mean_by), 2),
                         metric_name, metric_name, axs[subplot[0], subplot[1]])
    return metric_obtainer(company), trim_mean(metric_comparables,
                                               trim_mean_by)
Example #29
0
def trimmed_mean(data, cap):
    '''
    data == data
    cap == teh percentage cap (input in decimal values)
    calculates trimmed mean with percentage cap
    '''
    return stats.trim_mean(data, cap)
Example #30
0
def _tk(bss_data):
    """
    Temporal Kurtosis.

    Parameters
    ----------
    bss_data : array
        Array with dimensions CxTxE, where C is the number of components, T the
        number of time instants and E the number of events

    Returns
    -------
    res : array
        Vector of length C with the computed values for each component
    """
    # As _t_dim < _ev_dim, the events dimension will be shifted down one
    # position after computing the kurtosis (as time dimension will have
    # disappeared)
    ev_dim = _ev_dim - 1

    try:
        # Time kurtosis
        kurt_data = sp_stats.kurtosis(bss_data, axis=_t_dim)
        # Trimmed mean of the time kurtosis
        res = sp_stats.trim_mean(kurt_data, 0.01, ev_dim)
    except IndexError:
        raise _chk_parameters(bss_data=bss_data)
    # bss_data dimensionality has to be checked explicitly, as a ND array with
    # N > 3 does not raise an exception
    if bss_data.ndim > 3:
        raise _chk_parameters(bss_data=bss_data)
    return res
Example #31
0
def simulate_pb_type_I_error(data, samp_size, g, h):  #param, dist, samp_size

    nboot = 1000
    nsims = 2000
    l = round(.05 * nboot / 2) - 1
    u = nboot - l - 2
    mu = ghtrim(g, h)

    sig_ups = []
    sig_lows = []
    for s in range(nsims):
        experiment_data = np.random.choice(data, size=samp_size)
        bdat = np.random.choice(experiment_data, size=(nboot, samp_size))
        effects = trim_mean(bdat, .2, axis=1) - mu
        up = np.sort(effects)[u]
        low = np.sort(effects)[l]

        if low >= 0:
            sig_lows.append(1)

        elif up <= 0:
            sig_ups.append(1)

        # if (low>0 and up>0) or (low<0 and up<0):
        #     print('found sig')

    prob_low = (np.sum(sig_lows) / nsims)
    prob_up = (np.sum(sig_ups) / nsims)

    return prob_low, prob_up
Example #32
0
 def BG_algo(nbrhd, d, y, obs):
     nbrhd.remove(obs)
     tmd_mean = stats.trim_mean(nbrhd, d)
     std = np.std(nbrhd)
     obs_dif = abs(obs - tmd_mean)
     acc = 3 * std + y
     return obs_dif <= acc
Example #33
0
    def std(self, X, trimming=0):
        """
        Column-wise standard devaition or trimmed std. 
        Trimming to be entered as fraction. 
        """

        if trimming == 0:
            s = np.std(X, axis=0)
            s = np.array(s).reshape(-1)
        else:
            var = sps.trim_mean(np.square(X - sps.trim_mean(X, trimming, 0)),
                                trimming, 0)
            s = np.sqrt(var)

        setattr(self, "col_std_", s)
        return s
Example #34
0
 def BG_algo(nbrhd,d, y, obs):
     nbrhd.remove(obs)
     tmd_mean = stats.trim_mean(nbrhd, d)
     std = np.std(nbrhd)
     obs_dif = abs(obs-tmd_mean)
     acc = 3*std+y
     return obs_dif <= acc
Example #35
0
def get_aggregate_function(config, section, option):
    to_func = {'mean': np.mean,
               'median': np.median,
               'tmean_20pc': lambda x : trim_mean(x, 0.2),
               'tmean_25pc': lambda x : trim_mean(x, 0.25),
               'tmean_33pc': lambda x : trim_mean(x, 1.0/3),
               }
    
    if config.has_option(section, option):
        s = config.get(section, option)
        if s in to_func:
            return to_func[s]
        else:
            raise Exception('Unknown aggregate function: ' + s)
    else:
        return to_func['mean']
Example #36
0
def  articulos_libras_promedio (df_bd_preprocesada):
    df_pivot = df_bd_preprocesada[['InvoiceNo','Quantity', 'Monto']].groupby(['InvoiceNo']).sum()
    df_pivot=df_pivot.reset_index()
    media_cortada_articulos = stats.trim_mean(df_pivot['Quantity'], 0.3)
    print ( "\n-----------------------Articulos/Libras promedio-------------------------------\n ")
    print('El promedio de articulos por compra es  {} ' .format(media_cortada_articulos))
    print ( "\n------------------------------------------------------\n ")

    #print('la desviación estandar en cantidad de articulos por compra es  {} ' .format(df_pivot['Quantity'].std()))
    #print('el maximo de articulos por compra es  {} ' .format(df_pivot['Quantity'].max()))

    #-----El resultado es muy alto y poco real, eliminar valores demasiado altos para  mejorar la media-------
    #-----Solucionado con trim_mean-----------------------------------
    #---------------------Promedio de libras gastadas----------------------------
    media_cortada_monto = stats.trim_mean(df_pivot['Monto'], 0.3)
    print('El promedio de libras gastadas por compra es  {} ' .format(media_cortada_monto))
Example #37
0
def average_predictions(predictions: List[pd.DataFrame],
                        column: str,
                        method='mean',
                        min=None,
                        max=None) -> pd.DataFrame:
    preds = []
    for p in predictions:
        pred = to_numpy(p[column].values.tolist())
        preds.append(pred)

    preds = np.row_stack(preds)
    if min is not None or max is not None:
        preds = np.clip(preds, min, max)

    if method == 'mean':
        y_pred = np.mean(preds, axis=0)
    elif method == 'trim_mean':
        y_pred = trim_mean(preds, proportiontocut=0.1, axis=0)
    elif method == 'median':
        y_pred = np.median(preds, axis=0)
    else:
        raise KeyError(method)

    result = pd.DataFrame.from_dict({
        'id_code': predictions[0]['image_id'].values,
        'diagnosis': y_pred.tolist()
    })
    return result
Example #38
0
    def calc_diff_conf_int(self, pair, reps=5000, ci=0.95):
        """
        Bootstrap a confidence interval for the central tendency of differences

        :param TestPair pair: pairs to calculate the confidence interval
        :param int reps: how many bootstraping repetitions to perform
        :param float ci: confidence interval for the low and high estimate.
            0.95, i.e. "2 sigma", by default
        :return: tuple with low estimate, median, and high estimate of
            truncated mean of differences of observations
        """
        # because the samples are not independent, we calculate mean of
        # differences not a difference of means
        diffs = self.data.iloc[:, pair.index1] - self.data.iloc[:, pair.index2]

        cent_tend = []
        observ_count = len(diffs)

        for _ in range(reps):
            boot = np.random.choice(diffs, replace=True, size=observ_count)
            # use trimmed mean as the pairing of samples in not perfect:
            # the noise source could get activated in the middle of testing
            # of the test set, causing some results to be unusable
            # discard 50% of samples total (cut 25% from the median) to exclude
            # non central modes
            cent_tend.append(stats.trim_mean(boot, 0.25))

        return np.quantile(cent_tend, [(1 - ci) / 2, 0.5, 1 - (1 - ci) / 2])
def tobc(curr_arr, block_size, central_tendency, cutting_ratio, percentile):
    answer = []
    for curr_col in range(np.shape(curr_arr)[1]):
        curr_row = 0
        curr_col_moving = curr_col
        sum_ = 0
        median_list = []
        while curr_row < np.shape(curr_arr)[0]:
            sum_ = sum_ + curr_arr[curr_row][curr_col_moving]
            median_list.append(curr_arr[curr_row][curr_col_moving])
            curr_col_moving = curr_col_moving + 1
            if curr_col_moving == np.shape(curr_arr)[1]:
                curr_col_moving = 0
            curr_row = curr_row + 1
        if central_tendency == "sum":
            temp_var = sum_
        if central_tendency == "mean":
            temp_var = sum_ / block_size[0]
        if central_tendency == "median":
            temp_var = median(median_list)
        if central_tendency == "trim_mean":
            temp_var = stats.trim_mean(np.array(median_list), cutting_ratio)
        if central_tendency == "percentile":
            temp_var = np.percentile(np.array(median_list), percentile)
        if central_tendency == "win_mean":
            temp_var = mean(winsorize(np.array(median_list), cutting_ratio))
        answer.append(temp_var)
    mid_answer = np.copy(circulant(answer).transpose())
    return mid_answer
def trimean(samples, trim=0.10):
    """
    Trim the top and bottom `trim`% of the samples and then calculate the mean
    This is a good way to remove outliers.

    In [15]: trimean([0,1,2,3,4,5,6,7,8,9,10])
    Out[15]: 5.0

    In [16]: trimean([0,1,2,3,4,5,6,7,8,9,10,11])
    Out[16]: 5.5

    In [17]: trimean([0,1,2,3,4,5,6,7,8,9,10,50])
    Out[17]: 5.5

    In [18]: trimean([0,1,2,3,4,5,6,7,8,9,10,50,50])
    Out[18]: 9.545454545454545

    In [19]: trimean([0,1,2,3,4,5,6,7,8,9,10,50,50,50])
    Out[19]: 12.916666666666666

    In [20]: trimean([0,1,2,3,4,5,6,7,8,9,10,50,50,50,100])
    Out[20]: 15.76923076923077

    :param samples: All the samples
    :param trim: % to trim from top and bottom
    :return: One number representing the whole sample series
    """
    return stats.trim_mean(samples, trim)
Example #41
0
def scan_correction(dn, tidx):
    'Scales the amplitude of each scan to be most like the first scan'
    for j in [0, 1]:
        null_spek = trim_mean(dn[tidx:, :, j, 0], 0.2, 0)
        null_std = dn[tidx:, :, j, 0].std(0)
        for i in range(0, dn.shape[-1], 2):
            spec = trim_mean(dn[tidx:, :, j, i], 0.2, 0)
            c = np.linalg.lstsq(spec[:, None], null_spek[:, None])
            dn[:, :, j, i] *= c[0][0]

        null_spek = trim_mean(dn[tidx:, :, j, 1], 0.2, 0)
        for i in range(1, dn.shape[-1], 2):
            spec = trim_mean(dn[tidx:, :, j, i], 0.2, 0)
            c = np.linalg.lstsq(spec[:, None], null_spek[:, None])
            # print c[0][0]
            dn[:, :, j, i] *= c[0][0]
    return dn
def trimmed_mean(full_list_X, full_list_Y):
    rowx = list()
    rowy = list()
    tmeanX = list()
    tmeanY = list()
    for i in range(len(full_list_X)):
        x = full_list_X[i]
        for j in range(len(full_list_X[i][0])):
            y = zip(*x)[j]
            rowx.append(stats.trim_mean(y, 0.25))
        temp = zip(*(full_list_Y[i]))
        y1 = list(temp[0])
        tmeanY.append(y1[0])
        tmeanX.append(rowx)
        rowx = list()
    return tmeanX, tmeanY
def avaragePlacementDistance(accountBet):
    placementDates = [i['DayMarker'] for i in accountBet]
    placementDates.sort()
    placementDatesTmp = shift(placementDates,1)

    difference = [(datetime.strptime(b, '%Y-%m-%d %H:%M:%S') - datetime.strptime(a, '%Y-%m-%d %H:%M:%S')).total_seconds()/3600.0 for a,b in zip (placementDates, placementDatesTmp)]

    diffTrimmed = difference[:-1]
    if any(t < 0 for t in diffTrimmed):
        print 'hernya'
        print diffTrimmed

    ret = stats.trim_mean(difference, 0.01)

    if len(diffTrimmed) == 0:
        print  'hernya'
        return 0

    if (ret < 0.001):
        ret = float(sum(diffTrimmed)) / len(diffTrimmed)

    return ret
def extract_features(record,i):
    log = dict()
    log['subject_id'] = i+1
    mean_features = mean(record,axis=0)
    median_features = median(record,axis=0)
    trim_mean_10_features = stats.trim_mean(record,0.1,axis = 0)
    trim_mean_25_features = stats.trim_mean(record,0.25,axis = 0)
    std_dev_features = std(record,axis =0)
    iqr_features = np.subtract(*np.percentile(record, [75, 25],axis=0))
    mad_features = mean(absolute(record - mean(record, axis = 0)), axis = 0)
    print mad_features
    log['mean_jitter(local)'] = mean_features[0]
    log['mean_jitter(local,absolute)']= mean_features[1]
    log['mean_jitter(ppq5)'] = mean_features[2]
    log['mean_jitter(rap)'] = mean_features[3]
    log['mean_jitter(ddp)'] = mean_features[4]
    log['mean_shimmer(local)'] = mean_features[5]
    log['mean_shimmer(local,dB)'] = mean_features[6]
    log['mean_shimmer(apq3)'] = mean_features[7]
    log['mean_shimmer(apq5)'] = mean_features[8]
    log['mean_shimmer(apq11)'] = mean_features[9]
    log['mean_shimmer(dda)'] = mean_features[10]
    log['mean_AC'] = mean_features[11]
    log['mean_NTH'] = mean_features[12]
    log['mean_HTN'] = mean_features[13]
    log['mean_median_pitch'] = mean_features[14]
    log['mean_mean_pitch'] = mean_features[15]
    log['mean_std_dev'] = mean_features[16]
    log['mean_min_pitch'] = mean_features[17]
    log['mean_max_pitch'] = mean_features[18]
    log['mean_num_pulses'] = mean_features[19]
    log['mean_num_periods'] = mean_features[20]
    log['mean_mean_period'] = mean_features[21]
    log['mean_std_dev_period'] = mean_features[22]
    log['mean_frac_locallyunvoiced_frames'] = mean_features[23]
    log['mean_num_voice_breaks'] = mean_features[24]
    log['mean_degree_voicebreaks']= mean_features[25]
    log['median_jitter(local)'] = median_features[0]
    log['median_jitter(local,absolute)'] = median_features[1]
    log['median_jitter(rap)'] = median_features[2]
    log['median_jitter(ppq5)'] = median_features[3]
    log['median_jitter(ddp)'] = median_features[4]
    log['median_shimmer(local)'] = median_features[5]
    log['median_shimmer(local,dB)'] = median_features[6]
    log['median_shimmer(apq3)'] = median_features[7]
    log['median_shimmer(apq5)'] = median_features[8]
    log['median_shimmer(apq11)'] = median_features[9]
    log['median_shimmer(dda)'] = median_features[10]
    log['median_AC'] = median_features[11]
    log['median_NTH'] = median_features[12]
    log['median_HTN'] = median_features[13]
    log['median_median_pitch'] = median_features[14]
    log['median_mean_pitch'] = median_features[15]
    log['median_std_dev'] = median_features[16]
    log['median_min_pitch'] = median_features[17]
    log['median_max_pitch'] = median_features[18]
    log['median_num_pulses'] = median_features[19]
    log['median_num_periods'] = median_features[20]
    log['median_mean_period'] = median_features[21]
    log['median_std_dev_period'] = median_features[22]
    log['median_frac_locallyunvoiced_frames'] = median_features[23]
    log['median_num_voice_breaks'] = median_features[24]
    log['median_degree_voicebreaks'] = median_features[25]
    log['trim25mean_jitter(local)'] = trim_mean_10_features[0]
    log['trim25mean_jitter(local,absolute)']= trim_mean_10_features[1]
    log['trim25mean_jitter(ppq5)'] = trim_mean_10_features[2]
    log['trim25mean_jitter(rap)'] = trim_mean_10_features[3]
    log['trim25mean_jitter(ddp)'] = trim_mean_10_features[4]
    log['trim25mean_shimmer(local)'] = trim_mean_10_features[5]
    log['trim25mean_shimmer(local,dB)'] = trim_mean_10_features[6]
    log['trim25mean_shimmer(apq3)'] = trim_mean_10_features[7]
    log['trim25mean_shimmer(apq5)'] = trim_mean_10_features[8]
    log['trim25mean_shimmer(apq11)'] = trim_mean_10_features[9]
    log['trim25mean_shimmer(dda)'] = trim_mean_10_features[10]
    log['trim25mean_AC'] = trim_mean_10_features[11]
    log['trim25mean_NTH'] = trim_mean_10_features[12]
    log['trim25mean_HTN'] = trim_mean_10_features[13]
    log['trim25mean_median_pitch'] = trim_mean_10_features[14]
    log['trim25mean_mean_pitch'] = trim_mean_10_features[15]
    log['trim25mean_std_dev'] = trim_mean_10_features[16]
    log['trim25mean_min_pitch'] = trim_mean_10_features[17]
    log['trim25mean_max_pitch'] = trim_mean_10_features[18]
    log['trim25mean_num_pulses'] = trim_mean_10_features[19]
    log['trim25mean_num_periods'] = trim_mean_10_features[20]
    log['trim25mean_mean_period'] = trim_mean_10_features[21]
    log['trim25mean_std_dev_period'] = trim_mean_10_features[22]
    log['trim25mean_frac_locallyunvoiced_frames'] = trim_mean_10_features[23]
    log['trim25mean_num_voice_breaks'] = trim_mean_10_features[24]
    log['trim25mean_degree_voicebreaks']= trim_mean_10_features[25]
    log['trim25mean_jitter(local)'] = trim_mean_25_features[0]
    log['trim25mean_jitter(local,absolute)']= trim_mean_25_features[1]
    log['trim25mean_jitter(ppq5)'] = trim_mean_25_features[2]
    log['trim25mean_jitter(rap)'] = trim_mean_25_features[3]
    log['trim25mean_jitter(ddp)'] = trim_mean_25_features[4]
    log['trim25mean_shimmer(local)'] = trim_mean_25_features[5]
    log['trim25mean_shimmer(local,dB)'] = trim_mean_25_features[6]
    log['trim25mean_shimmer(apq3)'] = trim_mean_25_features[7]
    log['trim25mean_shimmer(apq5)'] = trim_mean_25_features[8]
    log['trim25mean_shimmer(apq11)'] = trim_mean_25_features[9]
    log['trim25mean_shimmer(dda)'] = trim_mean_25_features[10]
    log['trim25mean_AC'] = trim_mean_25_features[11]
    log['trim25mean_NTH'] = trim_mean_25_features[12]
    log['trim25mean_HTN'] = trim_mean_25_features[13]
    log['trim25mean_median_pitch'] = trim_mean_25_features[14]
    log['trim25mean_mean_pitch'] = trim_mean_25_features[15]
    log['trim25mean_std_dev'] = trim_mean_25_features[16]
    log['trim25mean_min_pitch'] = trim_mean_25_features[17]
    log['trim25mean_max_pitch'] = trim_mean_25_features[18]
    log['trim25mean_num_pulses'] = trim_mean_25_features[19]
    log['trim25mean_num_periods'] = trim_mean_25_features[20]
    log['trim25mean_mean_period'] = trim_mean_25_features[21]
    log['trim25mean_std_dev_period'] = trim_mean_25_features[22]
    log['trim25mean_frac_locallyunvoiced_frames'] = trim_mean_25_features[23]
    log['trim25mean_num_voice_breaks'] = trim_mean_25_features[24]
    log['trim25mean_degree_voicebreaks']= trim_mean_25_features[25]
    log['std_jitter(local)'] = std_dev_features[0]
    log['std_jitter(local,absolute)']= std_dev_features[1]
    log['std_jitter(ppq5)'] = std_dev_features[2]
    log['std_jitter(rap)'] = std_dev_features[3]
    log['std_jitter(ddp)'] = std_dev_features[4]
    log['std_shimmer(local)'] = std_dev_features[5]
    log['std_shimmer(local,dB)'] = std_dev_features[6]
    log['std_shimmer(apq3)'] = std_dev_features[7]
    log['std_shimmer(apq5)'] = std_dev_features[8]
    log['std_shimmer(apq11)'] = std_dev_features[9]
    log['std_shimmer(dda)'] = std_dev_features[10]
    log['std_AC'] = std_dev_features[11]
    log['std_NTH'] = std_dev_features[12]
    log['std_HTN'] = std_dev_features[13]
    log['std_median_pitch'] = std_dev_features[14]
    log['std_mean_pitch'] = std_dev_features[15]
    log['std_std_dev'] = std_dev_features[16]
    log['std_min_pitch'] = std_dev_features[17]
    log['std_max_pitch'] = std_dev_features[18]
    log['std_num_pulses'] = std_dev_features[19]
    log['std_num_periods'] = std_dev_features[20]
    log['std_mean_period'] = std_dev_features[21]
    log['std_std_dev_period'] = std_dev_features[22]
    log['std_frac_locallyunvoiced_frames'] = std_dev_features[23]
    log['std_num_voice_breaks'] = std_dev_features[24]
    log['std_degree_voicebreaks']= std_dev_features[25]
    log['iqr_jitter(local)'] = iqr_features[0]
    log['iqr_jitter(local,absolute)']= iqr_features[1]
    log['iqr_jitter(ppq5)'] = iqr_features[2]
    log['iqr_jitter(rap)'] = iqr_features[3]
    log['iqr_jitter(ddp)'] = iqr_features[4]
    log['iqr_shimmer(local)'] = iqr_features[5]
    log['iqr_shimmer(local,dB)'] = iqr_features[6]
    log['iqr_shimmer(apq3)'] = iqr_features[7]
    log['iqr_shimmer(apq5)'] = iqr_features[8]
    log['iqr_shimmer(apq11)'] = iqr_features[9]
    log['iqr_shimmer(dda)'] = iqr_features[10]
    log['iqr_AC'] = iqr_features[11]
    log['iqr_NTH'] = iqr_features[12]
    log['iqr_HTN'] = iqr_features[13]
    log['iqr_median_pitch'] = iqr_features[14]
    log['iqr_mean_pitch'] = iqr_features[15]
    log['iqr_std_dev'] = iqr_features[16]
    log['iqr_min_pitch'] = iqr_features[17]
    log['iqr_max_pitch'] = iqr_features[18]
    log['iqr_num_pulses'] = iqr_features[19]
    log['iqr_num_periods'] = iqr_features[20]
    log['iqr_mean_period'] = iqr_features[21]
    log['iqr_std_dev_period'] = iqr_features[22]
    log['iqr_frac_locallyunvoiced_frames'] = iqr_features[23]
    log['iqr_num_voice_breaks'] = iqr_features[24]
    log['iqr_degree_voicebreaks']= iqr_features[25]
    log['mad_jitter(local)'] = mad_features[0]
    log['mad_jitter(local,absolute)']= mad_features[1]
    log['mad_jitter(ppq5)'] = mad_features[2]
    log['mad_jitter(rap)'] = mad_features[3]
    log['mad_jitter(ddp)'] = mad_features[4]
    log['mad_shimmer(local)'] = mad_features[5]
    log['mad_shimmer(local,dB)'] = mad_features[6]
    log['mad_shimmer(apq3)'] = mad_features[7]
    log['mad_shimmer(apq5)'] = mad_features[8]
    log['mad_shimmer(apq11)'] = mad_features[9]
    log['mad_shimmer(dda)'] = mad_features[10]
    log['mad_AC'] = mad_features[11]
    log['mad_NTH'] = mad_features[12]
    log['mad_HTN'] = mad_features[13]
    log['mad_median_pitch'] = mad_features[14]
    log['mad_mean_pitch'] = mad_features[15]
    log['mad_std_dev'] = mad_features[16]
    log['mad_min_pitch'] = mad_features[17]
    log['mad_max_pitch'] = mad_features[18]
    log['mad_num_pulses'] = mad_features[19]
    log['mad_num_periods'] = mad_features[20]
    log['mad_mean_period'] = mad_features[21]
    log['mad_std_dev_period'] = mad_features[22]
    log['mad_frac_locallyunvoiced_frames'] = mad_features[23]
    log['mad_num_voice_breaks'] = mad_features[24]
    log['mad_degree_voicebreaks']= mad_features[25]




    return log
Example #45
0
	def _filt_run(self,dat,filt,do_sim=False,vplot=True,nrange=1):
		
		if self.doplot and vplot:
			errorbar(dat[0],dat[1],dat[2],fmt="o")
		
		new = True
		if new:
			mymodel = Model(self.fitfunc_small_te,extra_args=[dat[1],dat[2],False])
		else:
			mymodel = Model(self.fitfunc_te) #,extra_args=[dat[1],dat[2],False])
			
		# get some good guesses
		try:
			scale = trim_mean(dat[1],0.3)
		except:
			scale = mean(dat[1])
		offset = 1.0 #trim_mean(dat[1],0.3)
		t0    = median(dat[0])
		umin  = 1.0
		b     = 0.0  ## trending slope
		mydata  = RealData(dat[0],dat[1],sx=1.0/(60*24),sy=dat[2])
		
		trange = list(linspace(min(dat[0]),max(dat[0]),nrange))
		maxi = (dat[1] == max(dat[1])).nonzero()[0]		
		trange.extend(list(dat[0][maxi]))
		trange.extend([t0, max(dat[0]) + 10, max(dat[0]) + 100])
		
		final_output = None
		for t0i in trange:
			for te in 10**linspace(log10(2),log10(200),nrange):
				if new:
					pinit = [te,umin,t0i] # ,scale,offset,b]
				else:
					pinit = [te,umin,t0i ,scale,offset,b]
				
				myodr = ODR(mydata,mymodel,beta0=pinit)
				myoutput = myodr.run()
				if final_output is None:
					final_output = myoutput
					old_sd_beta = final_output.sd_beta
					continue

				if trim_mean(log10(myoutput.sd_beta / final_output.sd_beta),0.0) < 0.0 and \
					myoutput.res_var <= final_output.res_var and (myoutput.sd_beta == 0.0).sum() <= (final_output.sd_beta == 0.0).sum():
					final_output = myoutput
					
		if 1:
			t = linspace(min(dat[0]),max([max(dat[0]),final_output.beta[2] + 6*final_output.beta[0]]),1500)
			if new:
				tmp = self.fitfunc_small_te(final_output.beta,dat[0],dat[1],dat[2],True)
				#print tmp, "***"
				p = list(final_output.beta)
				p.extend([tmp[0],tmp[1],tmp[2]])
				y = array(self.modelfunc_small_te(p,t))
			else:
				p = final_output.beta
				y = self.fitfunc_te(final_output.beta,t)
				#print final_output.beta 
			if self.doplot:
				plot(t,y)
				xlabel('Time [days]')
				ylabel('Relative Flux Density')
			
			if do_sim:
				for i in range(10):
					tmp = r.multivariate_normal(myoutput.beta, myoutput.cov_beta)
					if self.doplot:
						plot(t, self.a_te(tmp[0],tmp[1],tmp[2],tmp[3],tmp[4],tmp[5],t),"-")
			
		return (final_output, p, new)
Example #46
0
def main( method, method_value, additional, additional_value , sideLen):
    distanze = dict()
    statistica = dict()

    #leggo valori grezzi
    for csv in [f for f in os.listdir('./') if f.endswith("csv") and f == data]:
        df = pd.read_csv(csv, header=0, decimal='.')
        header = df.columns.values
        for row in df.itertuples():
            nomeCella=row[1]
            for i in range(2, len(row)):
                if i % 2 != 0 and row[i] != defaultValue : #statistiche solo su distance
                    beaconID = (header[i-1][9:])
                    if (nomeCella,beaconID) in distanze:
                        distanze[nomeCella, beaconID].append(row[i])
                    else:
                        distanze[nomeCella,beaconID] = [ row[i] ]

    print distanze
    #calcolo statistiche

    if method == "percentile":
        percentili = dict.fromkeys(distanze.keys())
        for key in distanze:
            percentili[key] = stats.scoreatpercentile(distanze[key], method_value)
        for row in df.itertuples():  # itera per tuple il csv
                for i in range(1, len(row)):  # itera su ogni tupla cercando valori di default da rimpiazzare (es. -1000)
                    if i % 2 != 0 and row[i] == defaultValue and header[i-1][9:]!="kpOU":   #statistiche solo su distance
                        df.ix[row[0], i - 1] = percentili[row[1],(header[i-1][9:])]
        #print percentili
        statistica = percentili


    elif method == "truncated_mean":
        truncatedMeans = dict.fromkeys(distanze.keys())
        for key in distanze:
            truncatedMeans[key] = stats.trim_mean(distanze[key], method_value)
        for row in df.itertuples():  # itera per tuple il csv
            for i in range(2, len(row)):  # itera su ogni tupla cercando valori di default da rimpiazzare (es. -1000)
                if i % 2 != 0 and row[i] == defaultValue and header[i-1][9:]!="kpOU":
                    df.ix[row[0], i - 1] = truncatedMeans[row[1],(header[i-1][9:])]
        #print truncatedMeans
        statistica = truncatedMeans


    elif method == "simple_mean":
        simpleMeans = dict.fromkeys(distanze.keys())
        for key in distanze:
            simpleMeans[key] = numpy.mean(distanze)
        for row in df.itertuples():  # itera per tuple il csv
            for i in range(1, len(row)):   # itera su ogni tupla cercando valori di default da rimpiazzare (es. -1000)
                if i % 2 != 0 and row[i] == defaultValue:
                    df.ix[row[0], i - 1] = simpleMeans[i]
        #print simpleMeans
        statistica = simpleMeans

    else:
        print "not supported yet!"
        return


                    # for index, riga in avs.iterrows():
                    #     if riga["Cella"] == row[1] and i>2: #salta colonna "cella" poiche letterale
                    #         if row[i] == 0.0 or row[i]==["nan"] or float(row[i]) - 50 > float(riga[i+1]) or float(row[i])+50 < float(riga[i]):
                    #         #print "cella:", row[1],"riga:", row[0],"colonna:",i,"valPre:" ,row[i], "valDop:", riga[i], row[i]
                    #             df.ix[row[0],i-1] = riga[i+1]
    if additional == "replace":
        count=0
        percentiliUP = dict.fromkeys(distanze.keys())
        for key in distanze:
            percentiliUP[key] = stats.scoreatpercentile(distanze[key], (100-additional_value/2) ) #percentile per ogni ID
        percentiliDOWN = dict.fromkeys(distanze.keys())
        for key in distanze:
            percentiliDOWN[key] = stats.scoreatpercentile(distanze[key], additional_value/2 ) #percentile per ogni ID
        for row in df.itertuples():  # itera per tuple il csv
            for i in range(2, len(row)):  # itera su ogni tupla cercando gli 0, i= indice colonna
                if i % 2 != 0 and header[i-1][9:]!="kpOU":
                    if row[i] < percentiliDOWN[row[1],(header[i-1][9:])] or row[i] > percentiliUP[row[1],(header[i-1][9:])] :
                        #print row[i], percentiliDOWN[i], row[i], percentiliUP[i], additional_value
                        df.ix[row[0], i - 1] = statistica[row[1],(header[i-1][9:])]
                        count += 1
    else:
        print "additional not supported yet, skipped"


    if(additional_value != ""):
        print str(count) + " values replaced with " + method + " statistics"


    #adding Row/Column coordinates for each cell
    df["riga"] = 0
    df["colonna"] = 0
    header = df.columns.values   #reload header
    indexRowColumn = len(header)-1
    lastCell=""
    lastRow = 1
    lastCol = 0
    for row in df.itertuples():  # itera per tuple il csv
        if  row[indexRowColumn] == 0 and row[1] != lastCell:
            lastCell = row[1]
            lastCol += 1
            if lastCol > sideLen:
                lastCol = 1
                lastRow += 1
        df.ix[row[0], "riga"] = lastRow  # row
        df.ix[row[0], "colonna"] = lastCol  # column

    # saving to file
    df.to_csv(outputFile, encoding='utf-8')

    #drawing
    for col in df.columns.tolist():
        if col != "cella" and col != "colonna" and col != "riga" and not "rssi" in col:

            # heatmap for each sensor
            dataheat = [go.Heatmap( x=df['riga'],
                                    y=df['colonna'],
                                    z=df[col].tolist(),
                                        reversescale=True)]
            layout = go.Layout(title=method + "_" + col, width=800, height=640)
            fig = go.Figure(data=dataheat, layout=layout)
            py.image.save_as(fig, filename = "new_images/" + method + str(method_value) + "-" + additional + str(additional_value) + "_" + col + "-heatmap.png")

            #contour
            # dataheat = [go.Contour(x=df['riga'],
            #                        y=df['colonna'],
            #                        z=df[col].tolist(),
            #                        reversescale=False)]
            # layout = go.Layout(title=method + "_" + col, width=800, height=640)
            # fig = go.Figure(data=dataheat, layout=layout)
            # py.image.save_as(fig, filename="new_images/" + method + "_" + col + "-contour.png")


    #py.plot(dataheat, filename='labelled-heatmap.png')   online!
    print("   Done!")
Example #47
0
def meaner(dat, t, llim, ulim, proportiontocut=0.0):
    return trim_mean(dat[fi(t, llim):fi(t, ulim)],  axis=0, proportiontocut=proportiontocut)
def get_Onest_Amplitude_Duration_of_spindles(raw,channelList,
                                        annotations=None,
                                        moving_window_size=200,
                                        lower_threshold=.9,
                                        syn_channels=3,
                                        l_bound=0.5,h_bound=2,
                                        tol=1,higher_threshold=3.5,
                                        front=300,back=100,
                                        sleep_stage=True,
                                        proba=True,
                                        validation_windowsize=3,
                                        l_freq=11,h_freq=16):
    """
    raw: data after preprocessing
    channelList: channel list of interest, and in this study we use       'F3','F4','C3','C4','O1','O2'
    annotations: pandas DataFrame object containing manual annotations, such as sleep stages, spindle locations.
    moving_window_size: size of the moving window for convolved root mean square computation. It should work better when it is the sampling frequency, which, in this case is 500 (we downsample subjects with 1000 Hz sampling rate). 
    lower_threshold: highpass threshold for spindle detection: decision making = trimmed_mean + lower_T * trimmed_std
    higher_threshold: lowpass threshold for spindle detection: decision making = trimmed_mean + higher_T * trimmed_std
    syn_channels: criteria for selecting spindles: at least # of channels have spindle instance and also in the mean channel
    l_bound: low boundary for duration of a spindle instance
    h_bound: high boundary for duration of a spindle instance
    tol : tolerance for determing spindles (criteria in time)
    front : First few seconds of recordings that we are not interested because there might be artifacts, or it is confirmed subjects could not fall asleep within such a short period
    back : last few seconds of recordings that we are not interested due to the recording procedures
    """
    # process the data without any other information
    time=np.linspace(0,raw.last_samp/raw.info['sfreq'],raw._data[0,:].shape[0])
    RMS = np.zeros((len(channelList),raw._data[0,:].shape[0]))
    peak_time={} #preallocate
    sfreq=raw.info['sfreq']
    mph,mpl = {},{}

    for ii, names in enumerate(channelList):

        peak_time[names]=[]
        segment,_ = raw[ii,:]
        RMS[ii,:] = window_rms(segment[0,:],moving_window_size) 
        mph[names] = trim_mean(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) + lower_threshold * trimmed_std(RMS[ii,:],0.05) 
        mpl[names] = trim_mean(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) + higher_threshold * trimmed_std(RMS[ii,:],0.05)
        pass_ = RMS[ii,:] > mph[names]#should be greater than then mean not the threshold to compute duration

        up = np.where(np.diff(pass_.astype(int))>0)
        down = np.where(np.diff(pass_.astype(int))<0)
        up = up[0]
        down = down[0]
        #######key to idenfity segments that goes beyond the lower threshold########
        #print(down[0],up[0])
        if down[0] < up[0]:
            down = down[1:]
        #print(down[0],up[0])
        #############################
        if (up.shape > down.shape) or (up.shape < down.shape):
            size = np.min([up.shape,down.shape])
            up = up[:size]
            down = down[:size]
        C = np.vstack((up,down))
        for pairs in C.T:
            if l_bound < (time[pairs[1]] - time[pairs[0]]) < h_bound:
                SegmentForPeakSearching = RMS[ii,pairs[0]:pairs[1]]
                if np.max(SegmentForPeakSearching) < mpl[names]:
                    temp_temp_time = time[pairs[0]:pairs[1]]
                    ints_temp = np.argmax(SegmentForPeakSearching)
                    peak_time[names].append(temp_temp_time[ints_temp])
    peak_time['mean']=[];peak_at=[];duration=[]
    RMS_mean=hmean(RMS)
    # apply the same algorithm to the mean of the RMSs
    mph['mean'] = trim_mean(RMS_mean[int(front*sfreq):-int(back*sfreq)],0.05) + lower_threshold * trimmed_std(RMS_mean,0.05)
    mpl['mean'] = trim_mean(RMS_mean[int(front*sfreq):-int(back*sfreq)],0.05) + higher_threshold * trimmed_std(RMS_mean,0.05)
    pass_ =RMS_mean > mph['mean']
    up = np.where(np.diff(pass_.astype(int))>0)
    down= np.where(np.diff(pass_.astype(int))<0)
    up = up[0]
    down = down[0]
    ###############################
    #print(down[0],up[0])
    if down[0] < up[0]:
        down = down[1:]
    #print(down[0],up[0])
    #############################
    if (up.shape > down.shape) or (up.shape < down.shape):
        size = np.min([up.shape,down.shape])
        up = up[:size]
        down = down[:size]
    C = np.vstack((up,down))
    for pairs in C.T:
        
        if l_bound < (time[pairs[1]] - time[pairs[0]]) < h_bound:
            SegmentForPeakSearching = RMS_mean[pairs[0]:pairs[1],]
            if np.max(SegmentForPeakSearching)< mpl['mean']:
                temp_time = time[pairs[0]:pairs[1]]
                ints_temp = np.argmax(SegmentForPeakSearching)
                peak_time['mean'].append(temp_time[ints_temp])
                peak_at.append(SegmentForPeakSearching[ints_temp])
                duration_temp = time[pairs[1]] - time[pairs[0]]
                duration.append(duration_temp) 
    time_find=[];mean_peak_power=[];Duration=[];
    for item,PEAK,duration_time in zip(peak_time['mean'],peak_at,duration):
        temp_timePoint=[]
        for ii, names in enumerate(channelList):
            try:
                temp_timePoint.append(min(enumerate(peak_time[names]), key=lambda x: abs(x[1]-item))[1])
            except:
                temp_timePoint.append(item + 2)
        try:
            if np.sum((abs(np.array(temp_timePoint) - item)<tol).astype(int))>=syn_channels:
                time_find.append(float(item))
                mean_peak_power.append(PEAK)
                Duration.append(duration_time)
        except:
            pass
    ############ the end of the processing in which no other inputs ##
    #### update the spindles we found if we want to add information of sleep stages ######
    if sleep_stage:
        
        temp_time_find=[];temp_mean_peak_power=[];temp_duration=[];
        # seperate out stage 2
        stages = annotations[annotations.Annotation.apply(stage_check)]
        On = stages[::2];Off = stages[1::2]
        stage_on_off = list(zip(On.Onset.values, Off.Onset.values))
        if abs(np.diff(stage_on_off[0]) - 30) < 2:
            pass
        else:
            On = stages[1::2];Off = stages[::2]
            stage_on_off = list(zip(On.Onset.values[1:], Off.Onset.values[2:]))
        for single_time_find, single_mean_peak_power, single_duration in zip(time_find,mean_peak_power,Duration):
            for on_time,off_time in stage_on_off:
                if intervalCheck([on_time,off_time],single_time_find,tol=tol):
                    temp_time_find.append(single_time_find)
                    temp_mean_peak_power.append(single_mean_peak_power)
                    temp_duration.append(single_duration)
        time_find=temp_time_find;mean_peak_power=temp_mean_peak_power;Duration=temp_duration
    
    ####### decision function based on spindles we have just found ####
    """
    A single floating representation is computed based on the validation window size (say 3 seconds), and information like peak power densities and peak frequencies are added to the feature space.
    We fit the standandardized features with the labels (spindles found by the automated pipeline)
    A prediction probability is computed using scikit-learn::logisticregression
    """
    decision_features=None;auto_proba=None;auto_label=None
    if proba:
        result = pd.DataFrame({'Onset':time_find,'Duration':Duration,'Annotation':['spindle']*len(Duration)})     
        auto_label,_ = discritized_onset_label_auto(raw,result,validation_windowsize)
        events = mne.make_fixed_length_events(raw,id=1,start=front,stop=raw.times[-1]-back,duration=validation_windowsize)
        epochs = mne.Epochs(raw,events,event_id=1,tmin=0,tmax=validation_windowsize,preload=True)
        data = epochs.get_data()[:,:,:-1]
        full_prop=[]        
        for d in data:    
            temp_p=[]
            #fig,ax = plt.subplots(nrows=2,ncols=3,figsize=(8,8))
            for ii,(name) in enumerate(zip(channelList)):#,ax.flatten())):
                rms = window_rms(d[ii,:],500)
                l = trim_mean(rms,0.05) + lower_threshold * trimmed_std(rms,0.05)
                h = trim_mean(rms,0.05) + higher_threshold * trimmed_std(rms,0.05)
                prop = (sum(rms>l)+sum(rms<h))/(sum(rms<h) - sum(rms<l))
                if np.isinf(prop):
                    prop = (sum(rms>l)+sum(rms<h))
                temp_p.append(prop)
                
            
            full_prop.append(temp_p)
        psds,freq = mne.time_frequency.psd_multitaper(epochs,fmin=l_freq,fmax=h_freq,tmin=0,tmax=3,low_bias=True,)
        psds = 10* np.log10(psds)
        features = pd.DataFrame(np.concatenate((np.array(full_prop),psds.max(2),freq[np.argmax(psds,2)]),1))
        decision_features = StandardScaler().fit_transform(features.values,auto_label)
        clf = LogisticRegressionCV(Cs=np.logspace(-4,6,11),cv=5,tol=1e-7,max_iter=int(1e7))
        clf.fit(decision_features,auto_label)
        auto_proba=clf.predict_proba(decision_features)[:,-1]
    return time_find,mean_peak_power,Duration,mph,mpl,auto_proba,auto_label
Example #49
0
data = load_iris()
x = data['data']
y = data['target']
col_names = data['feature_names']
# Let’s now demonstrate how to calculate the mean, trimmed mean, and range values:
# 1.    Calculate and print the mean value of each column in the Iris dataset
print "col name,mean value"
for i,col_name in enumerate(col_names):
    print "%s,%0.2f"%(col_name,np.mean(x[:,i]))
print
# 2.    Trimmed mean calculation.
p = 0.1 # 10% trimmed mean
print
print "col name,trimmed mean value"
for i,col_name in enumerate(col_names):
    print "%s,%0.2f"%(col_name,trim_mean(x[:,i],p))
print
# 3.    Data dispersion, calculating and display the range values.
print "col_names,max,min,range"
for i,col_name in enumerate(col_names):
    print "%s,%0.2f,%0.2f,%0.2f"% (col_name,max(x[:,i]),min(x[:,i]),max(x[:,i])-min(x[:,i]))
print
# Finally, we will show the variance, standard deviation, mean absolute deviation, and median absolute deviation calculations:
# 4.    Data dispersion, variance and standard deviation
print "col_names,variance,std-dev"
for i,col_name in enumerate(col_names):
    print "%s,%0.2f,%0.2f"%(col_name,np.var(x[:,i]),np.std(x[:,i]))
print
# 5.    Mean absolute deviation calculation
def mad(x,axis=None):
    mean = np.mean(x,axis=axis)
import numpy as np
import featurespace_fun as fsf
import matplotlib.pyplot as plt
from nilearn.masking import apply_mask
from nilearn.image import smooth_img
from scipy.stats import norm
from statsmodels.sandbox.stats.multicomp import fdrcorrection0
from scipy.stats import ttest_1samp
import sys
from scipy.stats.mstats import trimmed_mean_ci
from scipy.stats import ttest_1samp, trim_mean

models = sys.argv[1:]

#models = ['logBSC_H200_ds_conv', 'logMFS_ds']

mask = 'brainmask_group_template.nii.gz'

scores_bsc = np.arctanh(apply_mask(smooth_img(glob.glob('MaThe/avg_maps/model_{}_*whole*'.format(models[0])), fwhm=3.0), mask_img=mask))
scores_mfs = np.arctanh(apply_mask(smooth_img(glob.glob('MaThe/avg_maps/model_{}_*whole*'.format(models[1])), fwhm=3.0), mask_img=mask))
diff_scores = scores_bsc - scores_mfs
mean_diff = trim_mean(diff_scores, 0.08, axis=0)
trim_mean_ci = trimmed_mean_ci(diff_scores, (0.08, 0.08), axis=0)
which_ones = np.logical_not(np.logical_or(trim_mean_ci[0,:] > 0, trim_mean_ci[1,:] < 0))
mean_diff[which_ones] = 0

display = fsf.plot_diff_avg_whole(mean_diff, 0.001)
display.savefig('mean_diff_smoothed_trim_model_{}.svg'.format('_'.join(models)))
display.savefig('mean_diff_smoothed_trim_model_{}.png'.format('_'.join(models)))
fsf.save_map_avg_whole(mean_diff, threshold=None, model='diff_smooth_trim_'+'_'.join(models))
    def find_onset_duration(self,lower_threshold,higher_threshold):
        from scipy.stats import trim_mean,hmean
        self.lower_threshold = lower_threshold
        self.higher_threshold = higher_threshold
        front = self.front
        back = self.back
        raw = self.raw
        channelList = self.channelList
        moving_window_size = self.moving_window_size
        l_bound = self.l_bound
        h_bound = self.h_bound
        tol = self.tol
        syn_channels = self.syn_channels
        

        sfreq=raw.info['sfreq']
        time=np.linspace(0,raw.last_samp/sfreq,raw.last_samp)
        RMS = np.zeros((len(channelList),raw._data[0,:].shape[0]))
        peak_time={} 
        mph,mpl = {},{}
        
        for ii,names in tqdm(enumerate(channelList)):
            peak_time[names]=[]
            segment,_ = raw[ii,:]
            RMS[ii,:] = window_rms(segment[0,:],moving_window_size) 
            mph[names] = trim_mean(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) + lower_threshold * trimmed_std(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) 
            mpl[names] = trim_mean(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) + higher_threshold * trimmed_std(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05)
            pass_ = RMS[ii,:] > mph[names]#should be greater than then mean not the threshold to compute duration
            #pass_ = (RMS[ii,:] > mph[names]) & (RMS[ii,:] < mpl[names])
            up = np.where(np.diff(pass_.astype(int))>0)
            down = np.where(np.diff(pass_.astype(int))<0)
            up = up[0]
            down = down[0]
            if down[0] < up[0]:
                down = down[1:]
            if (up.shape > down.shape) or (up.shape < down.shape):
                size = np.min([up.shape,down.shape])
                up = up[:size]
                down = down[:size]
            C = np.vstack((up,down))
            for pairs in C.T:
                if l_bound < (time[pairs[1]] - time[pairs[0]]) < h_bound:
                    SegmentForPeakSearching = RMS[ii,pairs[0]:pairs[1]]
                    if np.max(SegmentForPeakSearching) < mpl[names]:
                        temp_temp_time = time[pairs[0]:pairs[1]]
                        ints_temp = np.argmax(SegmentForPeakSearching)
                        peak_time[names].append(temp_temp_time[ints_temp])
        peak_time['mean'],peak_at,duration=[],[],[]
        RMS_mean = hmean(RMS)
        mph['mean'] = trim_mean(RMS_mean[int(front*sfreq):-int(back*sfreq)],0.05) + lower_threshold * trimmed_std(RMS_mean,0.05)
        mpl['mean'] = trim_mean(RMS_mean[int(front*sfreq):-int(back*sfreq)],0.05) + higher_threshold * trimmed_std(RMS_mean,0.05)
        pass_ = RMS_mean > mph['mean']
        #pass_ = (RMS_mean > mph['mean']) & (RMS_mean < mpl['mean'])
        up = np.where(np.diff(pass_.astype(int))>0)
        down= np.where(np.diff(pass_.astype(int))<0)
        up = up[0]
        down = down[0]
        if down[0] < up[0]:
            down = down[1:]
        if (up.shape > down.shape) or (up.shape < down.shape):
            size = np.min([up.shape,down.shape])
            up = up[:size]
            down = down[:size]
        C = np.vstack((up,down))
        for pairs in C.T:
            if l_bound < (time[pairs[1]] - time[pairs[0]]) < h_bound:
                SegmentForPeakSearching = RMS_mean[pairs[0]:pairs[1],]
                if np.max(SegmentForPeakSearching)< mpl['mean']:
                    temp_time = time[pairs[0]:pairs[1]]
                    ints_temp = np.argmax(SegmentForPeakSearching)
                    peak_time['mean'].append(temp_time[ints_temp])
                    peak_at.append(SegmentForPeakSearching[ints_temp])
                    duration_temp = time[pairs[1]] - time[pairs[0]]
                    duration.append(duration_temp)
        time_find=[];mean_peak_power=[];Duration=[];
        for item,PEAK,duration_time in zip(peak_time['mean'],peak_at,duration):
            temp_timePoint=[]
            for ii, names in enumerate(channelList):
                try:
                    temp_timePoint.append(min(enumerate(peak_time[names]), key=lambda x: abs(x[1]-item))[1])
                except:
                    temp_timePoint.append(item + 2)
            try:
                if np.sum((abs(np.array(temp_timePoint) - item)<tol).astype(int))>=syn_channels:
                    time_find.append(float(item))
                    mean_peak_power.append(PEAK)
                    Duration.append(duration_time)
                    #print(float(item),PEAK,duration_time)
            except:
                pass
        self.time_find = time_find
        self.mean_peak_power = mean_peak_power
        self.Duration = Duration
def get_Onest_Amplitude_Duration_of_spindles(raw,channelList,file_to_read,moving_window_size=200,threshold=.9,syn_channels=3,l_freq=0,h_freq=200,l_bound=0.5,h_bound=2,tol=1):
    """
    raw: data after preprocessing
    channelList: channel list of interest
    file_to_read: raw data file names
    moving_window_size: size of the moving window for convolved root mean square computation
    threshold: threshold for spindle detection: threshold = mean + threshold * std
    syn_channels: criteria for selecting spindles: at least # of channels have spindle instance and also in the mean channel
    l_freq: high pass frequency for spindle range
    h_freq: low pass frequency for spindle range
    l_bound: low boundary for duration of a spindle instance
    h_bound: high boundary for duration of a spindle instance
    tol : tolerance for determing spindles (criteria in time)
    """
    mul=threshold;nn=4.5
    
    time=np.linspace(0,raw.last_samp/raw.info['sfreq'],raw._data[0,:].shape[0])
    RMS = np.zeros((len(channelList),raw._data[0,:].shape[0]))
    peak_time={} #preallocate
    fig=plt.figure(figsize=(40,40))
    ax=plt.subplot(311)
    ax1=plt.subplot(312,sharex=ax)
    ax2=plt.subplot(313,sharex=ax)
    for ii, names in enumerate(channelList):

        peak_time[names]=[]
        segment,_ = raw[ii,:]
        RMS[ii,:] = window_rms(segment[0,:],moving_window_size) # window of 200ms
        mph = trim_mean(RMS[ii,100000:-30000],0.05) + mul * trimmed_std(RMS[ii,:],0.05) # higher sd = more strict criteria
        mpl = trim_mean(RMS[ii,100000:-30000],0.05) + nn * trimmed_std(RMS[ii,:],0.05)
        pass_= RMS[ii,:] > mph

        up = np.where(np.diff(pass_.astype(int))>0)
        down = np.where(np.diff(pass_.astype(int))<0)
        up = up[0]
        down = down[0]
        ###############################
        #print(down[0],up[0])
        if down[0] < up[0]:
            down = down[1:]
        #print(down[0],up[0])
        #############################
        if (up.shape > down.shape) or (up.shape < down.shape):
            size = np.min([up.shape,down.shape])
            up = up[:size]
            down = down[:size]
        C = np.vstack((up,down))
        for pairs in C.T:
            if l_bound < (time[pairs[1]] - time[pairs[0]]) < h_bound:
                TimePoint = np.mean([time[pairs[1]],time[pairs[0]]])
                SegmentForPeakSearching = RMS[ii,pairs[0]:pairs[1]]
                if np.max(SegmentForPeakSearching) < mpl:
                    temp_temp_time = time[pairs[0]:pairs[1]]
                    ints_temp = np.argmax(SegmentForPeakSearching)
                    peak_time[names].append(temp_temp_time[ints_temp])
                    ax.scatter(temp_temp_time[ints_temp],mph+0.1*mph,marker='s',
                               color='blue')
        ax.plot(time,RMS[ii,:],alpha=0.2,label=names)
        ax2.plot(time,segment[0,:],label=names,alpha=0.3)
        ax2.set(xlabel="time",ylabel="$\mu$V",xlim=(time[0],time[-1]),title=file_to_read[:-5]+' band pass %.1f - %.1f Hz' %(l_freq,h_freq))
        ax.set(xlabel="time",ylabel='RMS Amplitude',xlim=(time[0],time[-1]),title='auto detection on each channels')
        ax1.set(xlabel='time',ylabel='Amplitude')
        ax.axhline(mph,color='r',alpha=0.03)
        ax2.legend();ax.legend()

    peak_time['mean']=[];peak_at=[];duration=[]
    RMS_mean=hmean(RMS)
    ax1.plot(time,RMS_mean,color='k',alpha=0.3)
    mph = trim_mean(RMS_mean[100000:-30000],0.05) + mul * RMS_mean.std()
    mpl = trim_mean(RMS_mean[100000:-30000],0.05) + nn * RMS_mean.std()
    pass_ = RMS_mean > mph
    up = np.where(np.diff(pass_.astype(int))>0)
    down= np.where(np.diff(pass_.astype(int))<0)
    up = up[0]
    down = down[0]
    ###############################
    #print(down[0],up[0])
    if down[0] < up[0]:
        down = down[1:]
    #print(down[0],up[0])
    #############################
    if (up.shape > down.shape) or (up.shape < down.shape):
        size = np.min([up.shape,down.shape])
        up = up[:size]
        down = down[:size]
    C = np.vstack((up,down))
    for pairs in C.T:
        
        if 0.5 < (time[pairs[1]] - time[pairs[0]]) < 2:
            TimePoint = np.mean([time[pairs[1]] , time[pairs[0]]])
            SegmentForPeakSearching = RMS_mean[pairs[0]:pairs[1],]
            if np.max(SegmentForPeakSearching)< mpl:
                temp_time = time[pairs[0]:pairs[1]]
                ints_temp = np.argmax(SegmentForPeakSearching)
                peak_time['mean'].append(temp_time[ints_temp])
                peak_at.append(SegmentForPeakSearching[ints_temp])
                ax1.scatter(temp_time[ints_temp],mph+0.1*mph,marker='s',color='blue')
                duration_temp = time[pairs[1]] - time[pairs[0]]
                duration.append(duration_temp)
    ax1.axhline(mph,color='r',alpha=1.)
    ax1.set_xlim([time[0],time[-1]])


    time_find=[];mean_peak_power=[];Duration=[]
    for item,PEAK,duration_time in zip(peak_time['mean'],peak_at,duration):
        temp_timePoint=[]
        for ii, names in enumerate(channelList):
            try:
                temp_timePoint.append(min(enumerate(peak_time[names]), key=lambda x: abs(x[1]-item))[1])
            except:
                temp_timePoint.append(item + 2)
        try:
            if np.sum((abs(np.array(temp_timePoint) - item)<tol).astype(int))>syn_channels:
                time_find.append(float(item))
                mean_peak_power.append(PEAK)
                Duration.append(duration_time)
        except:
            pass
    return time_find,mean_peak_power,Duration,fig,ax,ax1,ax2,peak_time,peak_at
Example #53
0
from matplotlib import pyplot as plt
from scipy.stats import trim_mean
from numpy import random
from tmc import trimmeancomplex

a     = 0.02#rd.rand()
N     = 100;
Nnan  = 15;
sqrtR = array([[1, 0.2],[0.04, 2]])
R     = dot(sqrtR,sqrtR.transpose())
xtab  = dot(R,rd.randn(2,N))
x     = xtab[0,:]+1j*xtab[1,:];
indperm = random.randint(1,N,N);
x[indperm[range(Nnan)]] = nan+1j*nan
mytt  = nanmean(trimmeancomplex(x,a,visu=1));
tt    = trim_mean(x,(1.0-a)/2.0)

from scipy import optimize as opt

def n1(u,x):
    indnotnan = 1-isnan(x)
    r=sum(abs(u[0]-x.real[indnotnan])+abs(u[1]-x.imag[indnotnan]))
    return r
    
init0 = [x[0].real, x[0].imag];
res = opt.fmin(n1,init0,args=(x,))
print res
print [nanmedian(x).real,nanmedian(x).imag]
print [nanmean(mytt).real, nanmean(mytt).imag]
#plt.plot(x.real,x.imag,'x')
#plt.hold(True)
Example #54
0
def animate(i):
    global opts, framegrabber, frames, params, flowStartFrame
    global roi, flowMask, lmask, rmask, tmask, bmask
    global flowVals, times, history
    global logfile
    global codes

    update = [imdisp, foedisp, q_foe, b_latdiv, b_verdiv, b_ttc]    
    t1 = time.time()
    # ------------------------------------------------------------
    # Compute optical flow
    # ------------------------------------------------------------    
    # grab the current frame, update indices
    clrframe = framegrabber.next()
    currFrame = cv2.cvtColor(clrframe, cv2.COLOR_BGR2GRAY)
    framenum = i + flowStartFrame
    times[i] = framenum

    prvs = sum(frames) / float(opts.frameavg)
    nxt = (sum(frames[1:]) + currFrame) / float(opts.frameavg)
    flow = cv2.calcOpticalFlowFarneback(prvs[startY:stopY,startX:stopX]
                                        , nxt[startY:stopY,startX:stopX]
                                        , **params)
    mag, angle = cv2.cartToPolar(flow[...,0], flow[...,1])

    # ------------------------------------------------------------
    # Remove outlier flow
    # ------------------------------------------------------------    
    if opts.nofilt:
        thresh_mask = flowMask
    else:
        # clean up flow estimates, remove outliers
        # thresh_mask = threshold_local(mag, shape=(20,20), llim=0, ulim=0.96)
        # global_mask = threshold_global(mag, llim=0.00, ulim=0.99)[0]
        global_mask = np.ones_like(flowMask)
        lthresh = 1e-3
        thresh_mask = (mag > lthresh) & global_mask
        flow[~thresh_mask] = 0
        mag[~thresh_mask] = 0

    # ------------------------------------------------------------
    # estimate the location of the FoE
    # ------------------------------------------------------------    
    # S = generic2dFilter(angle, (foeW, foeW), matchWin, step=dt, padded=True)
    # participants = generic2dFilter(thresh_mask, (foeW, foeW), np.sum
    #                                , padded=True, step=dt)
    # S /= participants
    # foe_y_subsearch, foe_x_subsearch = np.unravel_index(np.argmin(S), S.shape)
    # foe_y, foe_x = startY + foeW//2 + foe_y_subsearch*dt , startX + foeW//2 + foe_x_subsearch*dt
    # foe_x, foe_y = FindFoE(flow[...,0][foeW//2:-foeW//2],flow[...,1][foeW//2:-foeW//2])
    # print
    # print foe_x, foe_y
    # foe_x, foe_y = startX + maskW//2, startY + maskW//2
    foe_x, foe_y = 183, 80
    p0, p1 = (foe_x-foeW//2, foe_y-foeW//2), (foe_x+foeW//2, foe_y+foeW//2)
    # confidence= participants[foe_y_subsearch, foe_x_subsearch] / (foeW**2)
    confidence=0
    divTemplates = generate2dTemplates(p0, p1, thresh_mask.shape, thresh_mask)
    foe_tmask, foe_bmask, foe_lmask, foe_rmask = divTemplates
    foeSlice_y, foeSlice_x = slice(p0[1],p1[1]+1), slice(p0[0],p1[0]+1)

    # ------------------------------------------------------------
    # estimate divergence parameters and ttc for this frame
    # ------------------------------------------------------------
    xDiv = (np.sum(flow[rmask,0]) - np.sum(flow[lmask,0])) \
           /(np.sum((lmask|rmask) & thresh_mask) + EPS)
    yDiv = (np.sum(flow[tmask,1]) - np.sum(flow[bmask,1])) \
           /(np.sum((tmask|bmask) & thresh_mask) + EPS)
    xDiv_foe = (np.sum(flow[foe_rmask,0]) - np.sum(flow[foe_lmask,0])) \
               /(np.sum(foe_lmask|foe_rmask) + EPS)
    yDiv_foe = (np.sum(flow[foe_tmask, 1]) - np.sum(flow[foe_bmask, 1])) \
               /(np.sum(foe_tmask|foe_bmask) + EPS)
    ttc = 2/(xDiv + yDiv + EPS)
    history[:, :-1] = history[:, 1:]; history[:, -1] = (xDiv,yDiv,ttc)

    # ------------------------------------------------------------    
    # use estimation history to estimate new values
    # ------------------------------------------------------------
    if i > history.shape[1]:
        flowVals[:-1, i] = np.sum(history[:-1]*w_forget, axis=1)/sum(w_forget)
        # flowVals[-1, i] = np.median(history[-1,-3:])

        # m, y0, _, _, std = stats.linregress(np.arange(5)
        #                                     , flowVals[-1,i-4:i+1]*w_forget[:5]/sum(w_forget[:5]))
        # flowVals[2, i] =  m*times[i] + y0
        # flowVals[2, i] = np.median(history[-1, -3:])
        flowVals[2, i] = stats.trim_mean(history[-1, -5:],0.4)
    else:
        flowVals[:, i] = (xDiv,yDiv,ttc)

    # ------------------------------------------------------------            
    # write out results
    # ------------------------------------------------------------
    t2 = time.time()
    out = (framenum, xDiv, yDiv, ttc, 100.*confidence, t2-t1)
    if opts.log:
        print >> logfile, ','.join(map(str,out))
    if not opts.quiet:
        sys.stdout.write("\r%4d %+6.2f %+6.2f %6.2f %6.2f %6.2f" % out)
        sys.stdout.flush()

    # ------------------------------------------------------------
    # update figure
    # ------------------------------------------------------------    
    b_latdiv.set_height(flowVals[0,i])
    b_verdiv.set_height(flowVals[1,i])
    b_ttc.set_height(flowVals[2,i])
    foedisp.set_data(clrframe[foeSlice_y, foeSlice_x, ::-1].copy())
    # clrframe[mag <= lthresh, :] = codes.colors[0][::-1]
    # clrframe[~global_mask, :] = codes.colors[-1][::-1]
    cv2.rectangle(clrframe, p0, p1, color=(0,255,0))
    cv2.rectangle(clrframe, p0, (foe_x, foe_y+foeW//2), color=(255,0,0))
    if opts.vis == "color_overlay":
        cf.colorFlow(flow, clrframe[...,::-1]
                     , slice(startX,stopX), slice(startY,stopY), thresh_mask)
        dispim = clrframe[..., ::-1]
    elif opts.vis == "color":
        dispim = cf.flowToColor(flow)
    elif opts.vis == "quiver":
        update.append(q_img) # add this object to those that are to be updated
        q_img.set_UVC(flow[flow_strides, flow_strides, 0]
                      , flow[flow_strides, flow_strides, 1]
                      , (mag[flow_strides, flow_strides] \
                         *255/(np.max(mag)-np.min(mag)+EPS)))
        dispim = clrframe[..., ::-1]
    imdisp.set_data(dispim)

    unitmag = 2*np.ones(foedisp.get_size())
    foeKern = generateFoEkernel(foeW)
    foeKern[foeW//2, foeW//2] = angle[foe_y,foe_x]
    sim = (foeKern-angle[foeSlice_y,foeSlice_x])**2
    X, Y = cv2.polarToCart(unitmag, angle[foeSlice_y,foeSlice_x].astype(np.float))
    q_foe.set_UVC(X[1:-1:2, 1:-1:2], Y[1:-1:2, 1:-1:2]
                  , (sim[1:-1:2, 1:-1:2] \
                     *255/(np.max(sim)-np.min(sim)+EPS)))

    # shift the frame buffer
    frames[:-1] = frames[1:]; frames[-1] = currFrame

    return update
def trimmed_mean(x, percent=0.2):
    return stats.trim_mean(x, percent)
def thresholding_filterbased_spindle_searching(raw,channelList,annotations,moving_window_size=200,lower_threshold=.9,
                                        syn_channels=3,l_bound=0.5,h_bound=2,tol=1,higher_threshold=3.5,
                                        front=300,back=100,sleep_stage=True,proba=False,validation_windowsize=3):
    
    
    time=np.linspace(0,raw.last_samp/raw.info['sfreq'],raw._data[0,:].shape[0])
    RMS = np.zeros((len(channelList),raw._data[0,:].shape[0]))
    peak_time={} #preallocate
    sfreq=raw.info['sfreq']
    mph,mpl = {},{}

    for ii, names in enumerate(channelList):

        peak_time[names]=[]
        segment,_ = raw[ii,:]
        RMS[ii,:] = window_rms(segment[0,:],moving_window_size) 
        mph[names] = trim_mean(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) + lower_threshold * trimmed_std(RMS[ii,:],0.05) 
        mpl[names] = trim_mean(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) + higher_threshold * trimmed_std(RMS[ii,:],0.05)
        pass_ = RMS[ii,:] > mph[names]#should be greater than then mean not the threshold to compute duration

        up = np.where(np.diff(pass_.astype(int))>0)
        down = np.where(np.diff(pass_.astype(int))<0)
        up = up[0]
        down = down[0]
        ###############################
        #print(down[0],up[0])
        if down[0] < up[0]:
            down = down[1:]
        #print(down[0],up[0])
        #############################
        if (up.shape > down.shape) or (up.shape < down.shape):
            size = np.min([up.shape,down.shape])
            up = up[:size]
            down = down[:size]
        C = np.vstack((up,down))
        for pairs in C.T:
            if l_bound < (time[pairs[1]] - time[pairs[0]]) < h_bound:
                SegmentForPeakSearching = RMS[ii,pairs[0]:pairs[1]]
                if np.max(SegmentForPeakSearching) < mpl[names]:
                    temp_temp_time = time[pairs[0]:pairs[1]]
                    ints_temp = np.argmax(SegmentForPeakSearching)
                    peak_time[names].append(temp_temp_time[ints_temp])
                    
        

    peak_time['mean']=[];peak_at=[];duration=[]
    RMS_mean=hmean(RMS)
    
    mph['mean'] = trim_mean(RMS_mean[int(front*sfreq):-int(back*sfreq)],0.05) + lower_threshold * trimmed_std(RMS_mean,0.05)
    mpl['mean'] = trim_mean(RMS_mean[int(front*sfreq):-int(back*sfreq)],0.05) + higher_threshold * trimmed_std(RMS_mean,0.05)
    pass_ =RMS_mean > mph['mean']
    up = np.where(np.diff(pass_.astype(int))>0)
    down= np.where(np.diff(pass_.astype(int))<0)
    up = up[0]
    down = down[0]
    ###############################
    #print(down[0],up[0])
    if down[0] < up[0]:
        down = down[1:]
    #print(down[0],up[0])
    #############################
    if (up.shape > down.shape) or (up.shape < down.shape):
        size = np.min([up.shape,down.shape])
        up = up[:size]
        down = down[:size]
    C = np.vstack((up,down))
    for pairs in C.T:
        
        if l_bound < (time[pairs[1]] - time[pairs[0]]) < h_bound:
            SegmentForPeakSearching = RMS_mean[pairs[0]:pairs[1],]
            if np.max(SegmentForPeakSearching)< mpl['mean']:
                temp_time = time[pairs[0]:pairs[1]]
                ints_temp = np.argmax(SegmentForPeakSearching)
                peak_time['mean'].append(temp_time[ints_temp])
                peak_at.append(SegmentForPeakSearching[ints_temp])
                duration_temp = time[pairs[1]] - time[pairs[0]]
                duration.append(duration_temp) 
            
        
    time_find=[];mean_peak_power=[];Duration=[];
    for item,PEAK,duration_time in zip(peak_time['mean'],peak_at,duration):
        temp_timePoint=[]
        for ii, names in enumerate(channelList):
            try:
                temp_timePoint.append(min(enumerate(peak_time[names]), key=lambda x: abs(x[1]-item))[1])
            except:
                temp_timePoint.append(item + 2)
        try:
            if np.sum((abs(np.array(temp_timePoint) - item)<tol).astype(int))>=syn_channels:
                time_find.append(float(item))
                mean_peak_power.append(PEAK)
                Duration.append(duration_time)
        except:
            pass
    if sleep_stage:
        
        temp_time_find=[];temp_mean_peak_power=[];temp_duration=[];
        # seperate out stage 2
        stages = annotations[annotations.Annotation.apply(stage_check)]
        On = stages[::2];Off = stages[1::2]
        stage_on_off = list(zip(On.Onset.values, Off.Onset.values))
        if abs(np.diff(stage_on_off[0]) - 30) < 2:
            pass
        else:
            On = stages[1::2];Off = stages[::2]
            stage_on_off = list(zip(On.Onset.values[1:], Off.Onset.values[2:]))
        for single_time_find, single_mean_peak_power, single_duration in zip(time_find,mean_peak_power,Duration):
            for on_time,off_time in stage_on_off:
                if intervalCheck([on_time,off_time],single_time_find,tol=tol):
                    temp_time_find.append(single_time_find)
                    temp_mean_peak_power.append(single_mean_peak_power)
                    temp_duration.append(single_duration)
        time_find=temp_time_find;mean_peak_power=temp_mean_peak_power;Duration=temp_duration
    
    result = pd.DataFrame({'Onset':time_find,'Duration':Duration,'Annotation':['spindle']*len(Duration)})     
    auto_label,_ = discritized_onset_label_auto(raw,result,validation_windowsize)
    decision_features=None
    if proba:
        events = mne.make_fixed_length_events(raw,id=1,start=0,duration=validation_windowsize)
        epochs = mne.Epochs(raw,events,event_id=1,tmin=0,tmax=validation_windowsize,preload=True)
        data = epochs.get_data()[:,:,:-1]
        full_prop=[]        
        for d in data:    
            temp_p=[]
            #fig,ax = plt.subplots(nrows=2,ncols=3,figsize=(8,8))
            for ii,(name) in enumerate(zip(channelList)):#,ax.flatten())):
                rms = window_rms(d[ii,:],500)
                l = trim_mean(rms,0.05) + lower_threshold * trimmed_std(rms,0.05)
                h = trim_mean(rms,0.05) + higher_threshold * trimmed_std(rms,0.05)
                prop = (sum(rms>l)+sum(rms<h))/(sum(rms<h) - sum(rms<l))
                temp_p.append(prop)
                
            
            full_prop.append(temp_p)
        psds,freq = mne.time_frequency.psd_multitaper(epochs,fmin=11,fmax=16,tmin=0,tmax=3,low_bias=True,)
        psds = 10* np.log10(psds)
        features = pd.DataFrame(np.concatenate((np.array(full_prop),psds.max(2),freq[np.argmax(psds,2)]),1))
        decision_features = StandardScaler().fit_transform(features.values,auto_label)
        clf = LogisticRegressionCV(Cs=np.logspace(-4,6,11),cv=5,tol=1e-7,max_iter=int(1e7))
        clf.fit(decision_features,auto_label)
        auto_proba=clf.predict_proba(decision_features)[:,-1]
            
    return time_find,mean_peak_power,Duration,mph,mpl,auto_proba,auto_label