def orderByFoldChange(self, bidirection=True): Raw = self.canopus.Quant Quant = Quant = self.canopus.QuantNormalized pseudoCount = np.percentile(Quant.where(Quant > 0).stack().values, 0.1) A = self.conditionLeft.samples B = self.conditionRight.samples entries = np.array( list(self.__enoughEntries(Raw, A) | self.__enoughEntries(Raw, B))) entries = np.array(list(set(Raw.index[entries]) & set(Quant.index))) Quant = Quant.loc[entries, :] fold_changes = (trim_mean(Quant.loc[:, A], 0.1, axis=1) + pseudoCount) / (trim_mean(Quant.loc[:, B], 0.1, axis=1) + pseudoCount) W = np.log10(fold_changes) if self.binning: binsize = 0.5 if self.binning == True else self.binning scale = 1.0 / binsize W = np.round(W * scale) / scale if bidirection: W = np.abs(W) table = pd.DataFrame( dict(compound=Quant.index, weight=W, fold_change=fold_changes, category=self.__assign_specific_class__(Quant.index))) table.sort_values(by="weight", ascending=False, inplace=True) table.set_index("compound", drop=True, inplace=True) #table[table.weight < 1] = 0.0 # we do not trust the lower values anyways self.ordering = table
def extraMyfeatures(epochs,channelList,epoch_length,lower_threshold=0.4,higher_threshold=3.4,l_freq=11,h_freq=16): """ Types of features decribed in: https://osf.io/aqgxe/ paper 1. root-mean-square of a segment 2. peak frequency power 3. peak frequency """ full_prop=[] data = epochs.get_data() for d in data: temp_p=[] #fig,ax = plt.subplots(nrows=2,ncols=3,figsize=(8,8)) for ii,(name) in enumerate(zip(channelList)):#,ax.flatten())): rms = window_rms(d[ii,:],epochs.info['sfreq']) l = stats.trim_mean(rms,0.05) + lower_threshold * trimmed_std(rms,0.05) h = stats.trim_mean(rms,0.05) + higher_threshold * trimmed_std(rms,0.05) prop = (sum(rms>l)+sum(rms<h))/(sum(rms<h) - sum(rms<l)) if np.isinf(prop):# if the denominator is zero, don't divide it prop = (sum(rms>l)+sum(rms<h)) temp_p.append(prop) full_prop.append(temp_p) full_prop = np.array(full_prop) psds,freq = mne.time_frequency.psd_multitaper(epochs,fmin=l_freq,fmax=h_freq,tmin=0,tmax=epoch_length,low_bias=True,n_jobs=3) psds = 10* np.log10(psds) features = np.concatenate((full_prop,psds.max(2),freq[np.argmax(psds,2)]),1) return features
def trimci(x, tr=.2, alpha=.05, null_value=0): """ Compute a 1-alpha confidence interval for the trimmed mean The default amount of trimming is tr=.2 :param x: 1-D array :param tr: :param alpha: :param null_value: The p-value returned by this function is based on the value specified by the argument null_value, which defaults to 0 :return: """ x=x[~np.isnan(x)] se = np.sqrt(winvar(x, tr)) / ((1 - 2 * tr) * np.sqrt(len(x))) trimci_res = np.zeros(2) df = len(x) - 2 * np.floor(tr * len(x)) - 1 trimci_res[0] = trim_mean(x, tr) - t.ppf(1 - alpha / 2, df) * se trimci_res[1] = trim_mean(x, tr) + t.ppf(1 - alpha / 2, df) * se test = (trim_mean(x, tr) - null_value) / se sig = 2 * (1 - t.cdf(abs(test), df)) results={"ci": trimci_res, "estimate": trim_mean(x,tr), "test_stat": test, "se": se, "p_value": sig, "n": len(x)} return results
def compute_motion_compensation(motion_matrix, stab_mode, trimmed_mean_percentage): if stab_mode == 'mean': u = motion_matrix[:, :, 0].mean() v = motion_matrix[:, :, 1].mean() elif stab_mode == 'trimmed_mean': u = trim_mean(motion_matrix[:, :, 0], trimmed_mean_percentage, axis=None) v = trim_mean(motion_matrix[:, :, 1], trimmed_mean_percentage, axis=None) elif stab_mode == 'median': u, v = np.median(motion_matrix[:, :, 0]), np.median(motion_matrix[:, :, 1]) elif stab_mode == 'mode': us, vs = cv2.cartToPolar(motion_matrix[:, :, 0], motion_matrix[:, :, 1]) mu, mv = mode(us.ravel())[0], mode(vs.ravel())[0] u, v = cv2.polarToCart(mu, mv) u, v = u[0][0], v[0][0] else: raise NotImplemented( "Choose one of implemented modes: mean, trimmed_mean, median") return u, v
def by_samples(cls, wrenches, proportion_to_cut=0.1): """ Construct the wrench by sampled data, allowing to filter. Parameters ---------- wrenches : list of :class:`Wrench` List of wrenches. proportion_to_cut : :obj:`float` Fraction to cut off of both tails of the distribution Returns ------- Wrench The mean wrench after trimming distribution from both tails. Examples -------- >>> w1 = Wrench([1, 1, 1], [.1,.1,.1]) >>> w2 = Wrench([2, 2, 2], [.2,.2,.2]) >>> w3 = Wrench([3, 3, 3], [.3,.3,.3]) >>> w = Wrench.by_samples([w1, w2, w3]) >>> print(w) Wrench(Vector(2.000, 2.000, 2.000), Vector(0.200, 0.200, 0.200)) """ if not stats: raise NotImplementedError("Not supported on this platform") forces = [w.force for w in wrenches] torques = [w.torque for w in wrenches] force = stats.trim_mean(forces, proportion_to_cut, axis=0).tolist() torque = stats.trim_mean(torques, proportion_to_cut, axis=0).tolist() return cls(force, torque)
def main(): #a. Read in the data set df = pd.read_csv(data_file) #b. Replacing all occurrences of missing data with NaN df.replace('.', np.NaN, inplace = True) #Compute the mean and the median for column "lowbwt" #drop rows if there are any NaN arr = pd.to_numeric(df.dropna(axis=0)['lowbwt']).values mean = np.mean(arr) median = np.median(arr) tmean = stats.trim_mean(arr, 0.05) print(mean, median, tmean) #drop rows if there are any NaN arr = pd.to_numeric(df.dropna(axis=0)['life60']).values mean = np.mean(arr) median = np.median(arr) tmean = stats.trim_mean(arr, 0.05) print(mean, median, tmean) #drop rows if there are any NaN arr = pd.to_numeric(df.dropna(axis=0)['life92']).values mean = np.mean(arr) median = np.median(arr) tmean = stats.trim_mean(arr, 0.05) print(mean, median, tmean)
def find_peaks(raw,channelList,windowSize,threshold,hh,result): ms = np.zeros([6,2]) RMS = np.zeros((len(channelList),raw._data[0,:].shape[0])) idx_left = [0,2,4] idx_right = [1,3,5] for ii,name in enumerate(channelList): segment,_ = raw[ii,:] RMS[ii,:] = eegPinelineDesign.window_rms(segment[0,:],windowSize) mph = trim_mean(RMS[ii,100000:-30000],0.05) + threshold * eegPinelineDesign.trimmed_std(RMS[ii,100000:-30000],0.05) mpl = trim_mean(RMS[ii,100000:-30000],0.05) + hh * eegPinelineDesign.trimmed_std(RMS[ii,100000:-30000],0.05) ms[ii,:] = [mph, mpl] peaks = [] for time_stamp,duration in zip(result.Onset,result.Duration): start, stop = time_stamp - duration, time_stamp + duration start_, stop_ = raw.time_as_index([start,stop]) segment,times = raw[:,start_:stop_] temp = [] for ii, name in enumerate(channelList): info = mne.create_info([name],raw.info['sfreq']) E = mne.EvokedArray(segment[ii,:].reshape(1,-1),info) _,peak = E.get_peak(mode='pos') temporal_mark = np.argmin(abs(times-(peak+start)<0.01)) temp.append(segment[ii,temporal_mark]) peaks.append(temp) peaks = np.array(peaks) #peaks = peaks / peaks.std(0) return peaks[:,idx_left], peaks[:,idx_right]
def calculate_central_tendency(dataframe, neutralise_categories, category1, metadata, mode, zeros, with_authors, print_ = True): """ Calculates central tendencies of a table """ if print_ == True: print("central tendency:",mode,zeros,with_authors) if with_authors == "with authors": dataframe_values = dataframe.values dataframe_values = np.reshape(dataframe_values,-1) if zeros == "without zeros": dataframe_values = np.trim_zeros(np.sort(dataframe_values)) if mode == "median": tendency = dataframe.stack().median() elif mode == "mean": tendency = dataframe.stack().mean() elif mode == "trimming mean": tendency = (stats.trim_mean(dataframe, proportiontocut = 0.1)) elif with_authors == "without authors": dataframe_without_category = np.empty([0, ]) #TODO: a better way for that without roating it? dataframe = dataframe.rename(lambda x: x +"_"+ metadata.loc[x,category1]) dataframe = dataframe.T dataframe = dataframe.rename(lambda x: x +"_"+ metadata.loc[x,category1]) dataframe = dataframe.T # For each author for neutralise_category1 in neutralise_categories: # For each author for neutralise_category2 in neutralise_categories: # If they are not equal if neutralise_category1 == neutralise_category2: pass else: # Creamos índices para sacar los dataframes de cada autor category_columns = dataframe.columns.to_series().str.endswith("_"+neutralise_category1) category_rows = dataframe.index.to_series().str.endswith("_"+neutralise_category2) # We take the values values_without_category = dataframe.loc[category_rows,category_columns].values # Numbers are values_without_category = np.reshape(values_without_category,-1) dataframe_without_category = np.concatenate((dataframe_without_category,values_without_category),axis=0) if mode == "median": tendency = (np.median(dataframe_without_category)) elif mode == "mean": tendency = (np.mean(dataframe_without_category)) elif mode == "trimming mean": tendency = (stats.trim_mean(dataframe_without_category, proportiontocut = 0.1)) #print("\n\n",tendency) return tendency
def lineplot_radio(f): #TIEMPOS_EN_C.txt TIEMPOS_EN_ASM.txt RADIO tests_c = [] tests_asm = [] radio_sizes = [] fobj = open(f, 'r') for line in fobj: words = line.split(' ') tests_c.append(words[0]) tests_asm.append(words[1]) radio_sizes.append(words[2].rstrip('\n')) fobj.close() buffer_c = [] for file in tests_c: times_list = fileTolist(file) buffer_c.append(times_list) #Normalizo for i in xrange(len(radio_sizes)): buffer_c[i] = map((lambda x: x/256**2), buffer_c[i]) cMeans = [] cStd = [] for xs in buffer_c: cMeans.append(trim_mean(xs, 0.25)) cStd.append(np.std(xs)) buffer_asm = [] for file in tests_asm: times_list = fileTolist(file) buffer_asm.append(times_list) #Normalizo for i in xrange(len(radio_sizes)): buffer_asm[i] = map((lambda x: x/256**2), buffer_asm[i]) asmMeans = [] asmStd = [] for xs in buffer_asm: asmMeans.append(trim_mean(xs, 0.25)) asmStd.append(np.std(xs)) fig, ax = plt.subplots() plt.plot(radio_sizes, cMeans, 'ro') rects1 = ax.errorbar(radio_sizes, cMeans, yerr=cStd) plt.plot(radio_sizes, asmMeans, 'ro') rects2 = ax.errorbar(radio_sizes, asmMeans, yerr=asmStd) ax.set_ylabel('#ticks/pixel') ax.set_title(u'Blur C vs Blur ASM en función del tamaño del radio') ax.set_xlabel(u'Radio') ax.legend( (rects1[0], rects2[0]), ('C', 'ASM'), loc=2 ) plt.savefig('lineplot_radio.pdf')
def trimmed_mean(self, alpha): if all(is_scalar(x) for x in self): return stats.trim_mean(self, alpha) elif get_dimension(self) > 0: return tuple(stats.trim_mean(self, alpha, axis=0)) else: raise Exception( "I don't know how to take the trimmed_mean of these values.")
def fix_linesearch(i, x): _r = dv.region(i) x[dv.diffusivity_indices(i)] = min( x[dv.diffusivity_indices(i)], trim_mean(x[dv.diffusivity_indices(_r)], .25)) x[dv.potential_indices(i)] = min( x[dv.potential_indices(i)], trim_mean(x[dv.potential_indices(_r)], .25))
def lineplot_blur(f): tests_c = [] tests_asm = [] img_sizes = [] fobj = open(f, 'r') for line in fobj: words = line.split(' ') tests_c.append(words[0]) tests_asm.append(words[1]) img_sizes.append(words[2].rstrip('\n')) fobj.close() buffer_c = [] for file in tests_c: times_list = fileTolist(file) buffer_c.append(times_list) #Normalizo for i in xrange(len(img_sizes)): buffer_c[i] = map((lambda x: x/(float(img_sizes[i])**2)), buffer_c[i]) cMeans = [] cStd = [] for xs in buffer_c: cMeans.append(trim_mean(xs, 0.25)) cStd.append(np.std(xs)) buffer_asm = [] for file in tests_asm: times_list = fileTolist(file) buffer_asm.append(times_list) #Normalizo for i in xrange(len(img_sizes)): buffer_asm[i] = map((lambda x: x/(float(img_sizes[i])**2)), buffer_asm[i]) asmMeans = [] asmStd = [] for xs in buffer_asm: asmMeans.append(trim_mean(xs, 0.25)) asmStd.append(np.std(xs)) fig, ax = plt.subplots() plt.plot(img_sizes, cMeans, 'ro') rects1 = ax.errorbar(img_sizes, cMeans, yerr=cStd) plt.plot(img_sizes, asmMeans, 'ro') rects2 = ax.errorbar(img_sizes, asmMeans, yerr=asmStd) ax.set_ylabel('#ticks/pixel') ax.set_title(u'Blur C vs Blur ASM') ax.set_xlabel(u'Ancho de imagen') ax.legend( (rects1[0], rects2[0]), ('C', 'ASM'), loc=2 ) plt.savefig('blur_lineplot.pdf')
def liquidity(df_one): stom, stoq, stoa = [None] * 3 if df_one[-21:][df_one>0].size>=(21*0.5): stom = np.log(trim_mean(df_one[-21:][df_one>0],0.1)) if df_one[-63:][df_one>0].size>=(63*0.5): stoq = np.log(trim_mean(df_one[-63:][df_one>0],0.1)) if df_one[df_one>0].size>=(252*0.5): stoa = np.log(trim_mean(df_one[df_one>0],0.1)) return {'code': df_one.name, 'stom': stom, 'stoq': stoq, 'stoa': stoa}
def trimmed_mean(values, axis=1): '''Returns the trimmed mean of each row of a matrix''' if isinstance(values, _sparse.csr_matrix): ret = _st.trim_mean(values, proportiontocut=.25, axis=axis) return ret else: return _st.trim_mean(_np.asmatrix(values), proportiontocut=.25, axis=axis)
def psuedo_rms(lower_threshold, higher_threshold,signal,sample_size=500): from scipy.stats import trim_mean rms = window_rms(signal,sample_size) l = trim_mean(rms,0.05) + lower_threshold * trimmed_std(rms,0.05) h = trim_mean(rms,0.05) + higher_threshold* trimmed_std(rms,0.05) prop = (sum(rms>l)+sum(rms<h))/(sum(rms<h) - sum(rms<l)) if np.isinf(prop): prop = (sum(rms>l)+sum(rms<h)) return prop
def make_meta_epochs(epochs, y, n_bin=100): from scipy.stats import trim_mean from mne.epochs import EpochsArray meta_data = list() # EEG data meta_y = list() # regressors n = len(epochs) # make continuous y into bins to become categorical if len(np.unique(y)) < n_bin: hist, bin_edge = np.histogram(y, n_bin) y_ = y for low, high in zip(bin_edge[:-1], bin_edge[1:]): sel = np.where((y >= low) & (y < high))[0] y_[sel] = .5 * (high + low) y = y_ # if discrete and few categories if len(np.unique(y)) < n_bin: already_used = list() for this_y in np.unique(y): for ii in range(n / len(np.unique(y)) / n_bin): sel = np.where(y == this_y)[0] sel = [ii for ii in sel if ii not in already_used][:n_bin] if not len(sel): continue meta_data.append( trim_mean(epochs._data[sel, :, :], .05, axis=0)) meta_y.append(this_y) already_used += sel else: hist, bin_edge = np.histogram(y, n_bin) for low, high in zip(bin_edge[:-1], bin_edge[1:]): sel = np.where((y >= low) & (y < high))[0] this_y = .5 * (high + low) if not len(sel): continue meta_data.append(trim_mean(epochs._data[sel, :, :], .05, axis=0)) meta_y.append(this_y) events = np.vstack( (np.zeros(len(meta_y)), np.zeros(len(meta_y)), meta_y)).T events = np.array(np.round(events), int) # transform into epochs new_epochs = EpochsArray(meta_data, epochs.info, events=events, verbose=False) new_epochs.events = np.array(new_epochs.events, float) new_epochs.events[:, 2] = meta_y # XXX why change time and sfreq? new_epochs.times = epochs.times new_epochs.info['sfreq'] = epochs.info['sfreq'] return new_epochs
def yuen(x, y, tr=.2, alpha=.05): """ Perform Yuen's test for trimmed means on the data in x and y. The default amount of trimming is 20% Missing values are automatically removed. A confidence interval for the trimmed mean of x minus the the trimmed mean of y is computed and returned in yuen['ci']. The p-value is returned in yuen['p_value'] x, y: The data for the two groups are stored in x and y tr=.2: indicates that the default amount of trimming is .2 tr=0 results in using the sample mean For an omnibus test with more than two independent groups, use t1way (may not be implemented yet). :param x: :param y: :param tr: :param alpha: :return: """ if tr ==.5: raise Exception("Using tr=.5 is not allowed; use a method designed for medians " "(they may not be implemented yet") if tr>.25: raise Warning("with tr>.25 type I error control might be poor") x=x[~np.isnan(x)] y=y[~np.isnan(y)] h1 = len(x) - 2 * np.floor(tr * len(x)) h2 = len(y) - 2 * np.floor(tr * len(y)) q1 = (len(x) - 1) * winvar(x, tr) / (h1 * (h1 - 1)) q2 = (len(y) - 1) * winvar(y, tr) / (h2 * (h2 - 1)) df = (q1 + q2) ** 2 / ((q1 ** 2 / (h1 - 1)) + (q2 ** 2 / (h2 - 1))) crit = t.ppf(1 - alpha / 2, df) dif = trim_mean(x, tr) - trim_mean(y, tr) low = dif - crit * np.sqrt(q1 + q2) up = dif + crit * np.sqrt(q1 + q2) test = abs(dif / np.sqrt(q1 + q2)) yuen_results = 2 * (1 - t.cdf(test, df)) results={'n1': len(x), 'n2': len(y), 'est_1': trim_mean(x, tr), 'est_2': trim_mean(y, tr), 'ci': [low, up], 'p_value': yuen_results, 'dif': dif, 'se': np.sqrt(q1 + q2), 'test_stat': test, 'crit': crit, 'df': df} return results
def impdivsborrows(M, S, K, T, C, P, r, D, Td): cout.info("Deriving borrows") bor = -1 / (T / base) * log( (C - P + K * exp(-r * (T / base)) + D * exp(-r * (Td / base))) / S) yld = -1 / (T / base) * log((C - P + K * exp(-r * (T / base))) / S) idx = np.where(M > 1)[0] bor = np.take(bor, idx, axis=0) bor = stats.trim_mean(bor, 0.01) yld = np.take(yld, idx, axis=0) yld = stats.trim_mean(yld, 0.01) return np.nan_to_num(bor), np.nan_to_num(yld)
def yuend(x, y, tr=.2, alpha=.05): """ Compare the trimmed means of two dependent random variables using the data in x and y. The default amount of trimming is 20% Any pair with a missing value is eliminated A confidence interval for the trimmed mean of x minus the the trimmed mean of y is computed and returned in yuend['ci']. The significance level is returned in yuend['p_value'] For inferences based on difference scores, use trimci :param x: :param y: :param tr: :param alpha: :return: """ from hypothesize.measuring_associations import wincor if type(x) is not np.ndarray: x, y=pandas_to_arrays([x, y]) m = np.c_[x, y] # cbind m = m[~np.isnan(m).any(axis=1)] x = m[:,0] y = m[:, 1] h1 = len(x) - 2 * np.floor(tr * len(x)) q1 = (len(x) - 1) * winvar(x, tr) q2 = (len(y) - 1) * winvar(y, tr) q3 = (len(x) - 1) * wincor(x, y, tr)['wcov'] df = h1 - 1 se = np.sqrt((q1 + q2 - 2 * q3) / (h1 * (h1 - 1))) crit = t.ppf(1 - alpha / 2, df) dif = trim_mean(x, tr) - trim_mean(y, tr) low = dif - crit * se up = dif + crit * se test = dif / se yuend_res = 2 * (1 - t.cdf(abs(test), df)) keys=['ci', 'p_value', 'est1', 'est2', 'dif', 'se', 'teststat', 'n', 'df'] vals=[[low, up], yuend_res, trim_mean(x,tr), trim_mean(x,tr), dif, se, test, len(x), df] return dict(zip(keys,vals))
def make_meta_epochs(epochs, y, n_bin=100): from scipy.stats import trim_mean from mne.epochs import EpochsArray meta_data = list() # EEG data meta_y = list() # regressors n = len(epochs) # make continuous y into bins to become categorical if len(np.unique(y)) < n_bin: hist, bin_edge = np.histogram(y, n_bin) y_ = y for low, high in zip(bin_edge[:-1], bin_edge[1:]): sel = np.where((y >= low) & (y < high))[0] y_[sel] = .5 * (high + low) y = y_ # if discrete and few categories if len(np.unique(y)) < n_bin: already_used = list() for this_y in np.unique(y): for ii in range(n / len(np.unique(y)) / n_bin): sel = np.where(y == this_y)[0] sel = [ii for ii in sel if ii not in already_used][:n_bin] if not len(sel): continue meta_data.append(trim_mean(epochs._data[sel, :, :], .05, axis=0)) meta_y.append(this_y) already_used += sel else: hist, bin_edge = np.histogram(y, n_bin) for low, high in zip(bin_edge[:-1], bin_edge[1:]): sel = np.where((y >= low) & (y < high))[0] this_y = .5 * (high + low) if not len(sel): continue meta_data.append(trim_mean(epochs._data[sel, :, :], .05, axis=0)) meta_y.append(this_y) events = np.vstack((np.zeros(len(meta_y)), np.zeros(len(meta_y)), meta_y)).T events = np.array(np.round(events), int) # transform into epochs new_epochs = EpochsArray(meta_data, epochs.info, events=events, verbose=False) new_epochs.events = np.array(new_epochs.events, float) new_epochs.events[:, 2] = meta_y # XXX why change time and sfreq? new_epochs.times = epochs.times new_epochs.info['sfreq'] = epochs.info['sfreq'] return new_epochs
def orderByAbsoluteDifference(Quant, group1, group2): group1 = re.compile(group1) group2 = re.compile(group2) A = [m for m in Quant.columns if group1.match(m)] B = [m for m in Quant.columns if group2.match(m)] fold_changes = trim_mean(Quant.loc[:, A], 0.1, axis=1) - trim_mean( Quant.loc[:, B], 0.1, axis=1) table = pd.DataFrame( dict(compound=Quant.index, weight=np.abs(fold_changes), difference=fold_changes)) table.sort_values(by="weight", ascending=False, inplace=True) return table
def trimvar(x, trimming): """ computes the trimmed variance of array x . Input : x : input data as numpy array trimming, float : trimming percentage to be used Output: The trimmed variance of x. """ # division by n return (sps.trim_mean(np.square(x - sps.trim_mean(x, trimming)), trimming))
def barplot_diff(f): tests_c = [] tests_asm = [] img_sizes = [] fobj = open(f, 'r') for line in fobj: words = line.split(' ') tests_c.append(fileTolist(words[0])) tests_asm.append(fileTolist(words[1])) img_sizes.append(words[2].rstrip('\n')) fobj.close() cMeans = [trim_mean(x, 0.25) for x in tests_c] cStd = [np.std(x) for x in tests_c] asmMeans = [trim_mean(x, 0.25) for x in tests_asm] asmStd = [np.std(x) for x in tests_asm] N = len(img_sizes) ind = np.arange(N) # the x locations for the groups width = 0.35 # the width of the bars fig, ax = plt.subplots() rects1 = ax.bar(ind, cMeans, width, color='r', yerr=cStd) rects2 = ax.bar(ind+width, asmMeans, width, color='y', yerr=asmStd) # add some text for labels, title and axes ticks ax.set_ylabel('#ticks') ax.set_title(u'Diff C vs Diff ASM') ax.set_xticks(ind+width) ax.set_xlabel(u'Ancho de imagen') ax.set_xticklabels( img_sizes ) ax.legend( (rects1[0], rects2[0]), ('C', 'ASM'), loc=2 ) def autolabel(rects): # attach some text labels for rect in rects: height = rect.get_height() ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%.1f'%(round(height,2)), ha='center', va='bottom') # autolabel(rects1) # autolabel(rects2) plt.savefig('barplot.diff.c.vs.asm.pdf')
def std(X, trimming=0): """ Column-wise standard devaition or trimmed std. Trimming to be entered as fraction. """ if trimming == 0: s = np.power(np.var(X, axis=0), .5) s = np.array(s).reshape(-1) else: var = sps.trim_mean(np.square(X - sps.trim_mean(X, trimming, 0)), trimming, 0) s = np.sqrt(var) return s
def orderByFoldChange(Quant, group1, group2): pseudoCount = np.percentile(Quant.where(Quant > 0).stack().values, 1) group1 = re.compile(group1) group2 = re.compile(group2) A = [m for m in Quant.columns if group1.match(m)] B = [m for m in Quant.columns if group2.match(m)] fold_changes = (trim_mean(Quant.loc[:, A], 0.1, axis=1) + pseudoCount) / ( trim_mean(Quant.loc[:, B], 0.1, axis=1) + pseudoCount) table = pd.DataFrame( dict(compound=Quant.index, weight=np.abs(np.log10(fold_changes)), fold_change=fold_changes)) table.sort_values(by="weight", ascending=False, inplace=True) return t
def compute_tmean(data, mode="function", frac=0.1): if mode == "worker": ret = [] for vs in data.values(): vs = list(vs.values()) ret.append(scstats.trim_mean(vs, frac)) return np.mean(ret) else: data = invert_dict(data) ret = [] for vs in data.values(): vs = list(vs.values()) ret.extend(vs) return scstats.trim_mean(ret, frac)
def trimmed_mean(self, alpha): self._set_array() if self.dim == 1: return Scalar(stats.trim_mean(self.array, alpha)) elif self.dim is not None: return Vector(stats.trim_mean(self.array, alpha, axis=0)) elif self.index_set is not None: def fn(t): return self[t].trimmed_mean(alpha) return TimeFunction.from_index_set(self.index_set, fn) else: raise Exception( "I don't know how to take the trimmed_mean of these values.")
def plot_company_vs_comparables(company, comparables, metric_obtainer, metric_name, axs, subplot, trim_mean_by=0.1): metric_comparables = [ x for x in list(map(metric_obtainer, comparables)) if x ] plot_val_vs_industry(company.ticker, round(metric_obtainer(company), 2), round(trim_mean(metric_comparables, trim_mean_by), 2), metric_name, metric_name, axs[subplot[0], subplot[1]]) return metric_obtainer(company), trim_mean(metric_comparables, trim_mean_by)
def trimmed_mean(data, cap): ''' data == data cap == teh percentage cap (input in decimal values) calculates trimmed mean with percentage cap ''' return stats.trim_mean(data, cap)
def _tk(bss_data): """ Temporal Kurtosis. Parameters ---------- bss_data : array Array with dimensions CxTxE, where C is the number of components, T the number of time instants and E the number of events Returns ------- res : array Vector of length C with the computed values for each component """ # As _t_dim < _ev_dim, the events dimension will be shifted down one # position after computing the kurtosis (as time dimension will have # disappeared) ev_dim = _ev_dim - 1 try: # Time kurtosis kurt_data = sp_stats.kurtosis(bss_data, axis=_t_dim) # Trimmed mean of the time kurtosis res = sp_stats.trim_mean(kurt_data, 0.01, ev_dim) except IndexError: raise _chk_parameters(bss_data=bss_data) # bss_data dimensionality has to be checked explicitly, as a ND array with # N > 3 does not raise an exception if bss_data.ndim > 3: raise _chk_parameters(bss_data=bss_data) return res
def simulate_pb_type_I_error(data, samp_size, g, h): #param, dist, samp_size nboot = 1000 nsims = 2000 l = round(.05 * nboot / 2) - 1 u = nboot - l - 2 mu = ghtrim(g, h) sig_ups = [] sig_lows = [] for s in range(nsims): experiment_data = np.random.choice(data, size=samp_size) bdat = np.random.choice(experiment_data, size=(nboot, samp_size)) effects = trim_mean(bdat, .2, axis=1) - mu up = np.sort(effects)[u] low = np.sort(effects)[l] if low >= 0: sig_lows.append(1) elif up <= 0: sig_ups.append(1) # if (low>0 and up>0) or (low<0 and up<0): # print('found sig') prob_low = (np.sum(sig_lows) / nsims) prob_up = (np.sum(sig_ups) / nsims) return prob_low, prob_up
def BG_algo(nbrhd, d, y, obs): nbrhd.remove(obs) tmd_mean = stats.trim_mean(nbrhd, d) std = np.std(nbrhd) obs_dif = abs(obs - tmd_mean) acc = 3 * std + y return obs_dif <= acc
def std(self, X, trimming=0): """ Column-wise standard devaition or trimmed std. Trimming to be entered as fraction. """ if trimming == 0: s = np.std(X, axis=0) s = np.array(s).reshape(-1) else: var = sps.trim_mean(np.square(X - sps.trim_mean(X, trimming, 0)), trimming, 0) s = np.sqrt(var) setattr(self, "col_std_", s) return s
def BG_algo(nbrhd,d, y, obs): nbrhd.remove(obs) tmd_mean = stats.trim_mean(nbrhd, d) std = np.std(nbrhd) obs_dif = abs(obs-tmd_mean) acc = 3*std+y return obs_dif <= acc
def get_aggregate_function(config, section, option): to_func = {'mean': np.mean, 'median': np.median, 'tmean_20pc': lambda x : trim_mean(x, 0.2), 'tmean_25pc': lambda x : trim_mean(x, 0.25), 'tmean_33pc': lambda x : trim_mean(x, 1.0/3), } if config.has_option(section, option): s = config.get(section, option) if s in to_func: return to_func[s] else: raise Exception('Unknown aggregate function: ' + s) else: return to_func['mean']
def articulos_libras_promedio (df_bd_preprocesada): df_pivot = df_bd_preprocesada[['InvoiceNo','Quantity', 'Monto']].groupby(['InvoiceNo']).sum() df_pivot=df_pivot.reset_index() media_cortada_articulos = stats.trim_mean(df_pivot['Quantity'], 0.3) print ( "\n-----------------------Articulos/Libras promedio-------------------------------\n ") print('El promedio de articulos por compra es {} ' .format(media_cortada_articulos)) print ( "\n------------------------------------------------------\n ") #print('la desviación estandar en cantidad de articulos por compra es {} ' .format(df_pivot['Quantity'].std())) #print('el maximo de articulos por compra es {} ' .format(df_pivot['Quantity'].max())) #-----El resultado es muy alto y poco real, eliminar valores demasiado altos para mejorar la media------- #-----Solucionado con trim_mean----------------------------------- #---------------------Promedio de libras gastadas---------------------------- media_cortada_monto = stats.trim_mean(df_pivot['Monto'], 0.3) print('El promedio de libras gastadas por compra es {} ' .format(media_cortada_monto))
def average_predictions(predictions: List[pd.DataFrame], column: str, method='mean', min=None, max=None) -> pd.DataFrame: preds = [] for p in predictions: pred = to_numpy(p[column].values.tolist()) preds.append(pred) preds = np.row_stack(preds) if min is not None or max is not None: preds = np.clip(preds, min, max) if method == 'mean': y_pred = np.mean(preds, axis=0) elif method == 'trim_mean': y_pred = trim_mean(preds, proportiontocut=0.1, axis=0) elif method == 'median': y_pred = np.median(preds, axis=0) else: raise KeyError(method) result = pd.DataFrame.from_dict({ 'id_code': predictions[0]['image_id'].values, 'diagnosis': y_pred.tolist() }) return result
def calc_diff_conf_int(self, pair, reps=5000, ci=0.95): """ Bootstrap a confidence interval for the central tendency of differences :param TestPair pair: pairs to calculate the confidence interval :param int reps: how many bootstraping repetitions to perform :param float ci: confidence interval for the low and high estimate. 0.95, i.e. "2 sigma", by default :return: tuple with low estimate, median, and high estimate of truncated mean of differences of observations """ # because the samples are not independent, we calculate mean of # differences not a difference of means diffs = self.data.iloc[:, pair.index1] - self.data.iloc[:, pair.index2] cent_tend = [] observ_count = len(diffs) for _ in range(reps): boot = np.random.choice(diffs, replace=True, size=observ_count) # use trimmed mean as the pairing of samples in not perfect: # the noise source could get activated in the middle of testing # of the test set, causing some results to be unusable # discard 50% of samples total (cut 25% from the median) to exclude # non central modes cent_tend.append(stats.trim_mean(boot, 0.25)) return np.quantile(cent_tend, [(1 - ci) / 2, 0.5, 1 - (1 - ci) / 2])
def tobc(curr_arr, block_size, central_tendency, cutting_ratio, percentile): answer = [] for curr_col in range(np.shape(curr_arr)[1]): curr_row = 0 curr_col_moving = curr_col sum_ = 0 median_list = [] while curr_row < np.shape(curr_arr)[0]: sum_ = sum_ + curr_arr[curr_row][curr_col_moving] median_list.append(curr_arr[curr_row][curr_col_moving]) curr_col_moving = curr_col_moving + 1 if curr_col_moving == np.shape(curr_arr)[1]: curr_col_moving = 0 curr_row = curr_row + 1 if central_tendency == "sum": temp_var = sum_ if central_tendency == "mean": temp_var = sum_ / block_size[0] if central_tendency == "median": temp_var = median(median_list) if central_tendency == "trim_mean": temp_var = stats.trim_mean(np.array(median_list), cutting_ratio) if central_tendency == "percentile": temp_var = np.percentile(np.array(median_list), percentile) if central_tendency == "win_mean": temp_var = mean(winsorize(np.array(median_list), cutting_ratio)) answer.append(temp_var) mid_answer = np.copy(circulant(answer).transpose()) return mid_answer
def trimean(samples, trim=0.10): """ Trim the top and bottom `trim`% of the samples and then calculate the mean This is a good way to remove outliers. In [15]: trimean([0,1,2,3,4,5,6,7,8,9,10]) Out[15]: 5.0 In [16]: trimean([0,1,2,3,4,5,6,7,8,9,10,11]) Out[16]: 5.5 In [17]: trimean([0,1,2,3,4,5,6,7,8,9,10,50]) Out[17]: 5.5 In [18]: trimean([0,1,2,3,4,5,6,7,8,9,10,50,50]) Out[18]: 9.545454545454545 In [19]: trimean([0,1,2,3,4,5,6,7,8,9,10,50,50,50]) Out[19]: 12.916666666666666 In [20]: trimean([0,1,2,3,4,5,6,7,8,9,10,50,50,50,100]) Out[20]: 15.76923076923077 :param samples: All the samples :param trim: % to trim from top and bottom :return: One number representing the whole sample series """ return stats.trim_mean(samples, trim)
def scan_correction(dn, tidx): 'Scales the amplitude of each scan to be most like the first scan' for j in [0, 1]: null_spek = trim_mean(dn[tidx:, :, j, 0], 0.2, 0) null_std = dn[tidx:, :, j, 0].std(0) for i in range(0, dn.shape[-1], 2): spec = trim_mean(dn[tidx:, :, j, i], 0.2, 0) c = np.linalg.lstsq(spec[:, None], null_spek[:, None]) dn[:, :, j, i] *= c[0][0] null_spek = trim_mean(dn[tidx:, :, j, 1], 0.2, 0) for i in range(1, dn.shape[-1], 2): spec = trim_mean(dn[tidx:, :, j, i], 0.2, 0) c = np.linalg.lstsq(spec[:, None], null_spek[:, None]) # print c[0][0] dn[:, :, j, i] *= c[0][0] return dn
def trimmed_mean(full_list_X, full_list_Y): rowx = list() rowy = list() tmeanX = list() tmeanY = list() for i in range(len(full_list_X)): x = full_list_X[i] for j in range(len(full_list_X[i][0])): y = zip(*x)[j] rowx.append(stats.trim_mean(y, 0.25)) temp = zip(*(full_list_Y[i])) y1 = list(temp[0]) tmeanY.append(y1[0]) tmeanX.append(rowx) rowx = list() return tmeanX, tmeanY
def avaragePlacementDistance(accountBet): placementDates = [i['DayMarker'] for i in accountBet] placementDates.sort() placementDatesTmp = shift(placementDates,1) difference = [(datetime.strptime(b, '%Y-%m-%d %H:%M:%S') - datetime.strptime(a, '%Y-%m-%d %H:%M:%S')).total_seconds()/3600.0 for a,b in zip (placementDates, placementDatesTmp)] diffTrimmed = difference[:-1] if any(t < 0 for t in diffTrimmed): print 'hernya' print diffTrimmed ret = stats.trim_mean(difference, 0.01) if len(diffTrimmed) == 0: print 'hernya' return 0 if (ret < 0.001): ret = float(sum(diffTrimmed)) / len(diffTrimmed) return ret
def extract_features(record,i): log = dict() log['subject_id'] = i+1 mean_features = mean(record,axis=0) median_features = median(record,axis=0) trim_mean_10_features = stats.trim_mean(record,0.1,axis = 0) trim_mean_25_features = stats.trim_mean(record,0.25,axis = 0) std_dev_features = std(record,axis =0) iqr_features = np.subtract(*np.percentile(record, [75, 25],axis=0)) mad_features = mean(absolute(record - mean(record, axis = 0)), axis = 0) print mad_features log['mean_jitter(local)'] = mean_features[0] log['mean_jitter(local,absolute)']= mean_features[1] log['mean_jitter(ppq5)'] = mean_features[2] log['mean_jitter(rap)'] = mean_features[3] log['mean_jitter(ddp)'] = mean_features[4] log['mean_shimmer(local)'] = mean_features[5] log['mean_shimmer(local,dB)'] = mean_features[6] log['mean_shimmer(apq3)'] = mean_features[7] log['mean_shimmer(apq5)'] = mean_features[8] log['mean_shimmer(apq11)'] = mean_features[9] log['mean_shimmer(dda)'] = mean_features[10] log['mean_AC'] = mean_features[11] log['mean_NTH'] = mean_features[12] log['mean_HTN'] = mean_features[13] log['mean_median_pitch'] = mean_features[14] log['mean_mean_pitch'] = mean_features[15] log['mean_std_dev'] = mean_features[16] log['mean_min_pitch'] = mean_features[17] log['mean_max_pitch'] = mean_features[18] log['mean_num_pulses'] = mean_features[19] log['mean_num_periods'] = mean_features[20] log['mean_mean_period'] = mean_features[21] log['mean_std_dev_period'] = mean_features[22] log['mean_frac_locallyunvoiced_frames'] = mean_features[23] log['mean_num_voice_breaks'] = mean_features[24] log['mean_degree_voicebreaks']= mean_features[25] log['median_jitter(local)'] = median_features[0] log['median_jitter(local,absolute)'] = median_features[1] log['median_jitter(rap)'] = median_features[2] log['median_jitter(ppq5)'] = median_features[3] log['median_jitter(ddp)'] = median_features[4] log['median_shimmer(local)'] = median_features[5] log['median_shimmer(local,dB)'] = median_features[6] log['median_shimmer(apq3)'] = median_features[7] log['median_shimmer(apq5)'] = median_features[8] log['median_shimmer(apq11)'] = median_features[9] log['median_shimmer(dda)'] = median_features[10] log['median_AC'] = median_features[11] log['median_NTH'] = median_features[12] log['median_HTN'] = median_features[13] log['median_median_pitch'] = median_features[14] log['median_mean_pitch'] = median_features[15] log['median_std_dev'] = median_features[16] log['median_min_pitch'] = median_features[17] log['median_max_pitch'] = median_features[18] log['median_num_pulses'] = median_features[19] log['median_num_periods'] = median_features[20] log['median_mean_period'] = median_features[21] log['median_std_dev_period'] = median_features[22] log['median_frac_locallyunvoiced_frames'] = median_features[23] log['median_num_voice_breaks'] = median_features[24] log['median_degree_voicebreaks'] = median_features[25] log['trim25mean_jitter(local)'] = trim_mean_10_features[0] log['trim25mean_jitter(local,absolute)']= trim_mean_10_features[1] log['trim25mean_jitter(ppq5)'] = trim_mean_10_features[2] log['trim25mean_jitter(rap)'] = trim_mean_10_features[3] log['trim25mean_jitter(ddp)'] = trim_mean_10_features[4] log['trim25mean_shimmer(local)'] = trim_mean_10_features[5] log['trim25mean_shimmer(local,dB)'] = trim_mean_10_features[6] log['trim25mean_shimmer(apq3)'] = trim_mean_10_features[7] log['trim25mean_shimmer(apq5)'] = trim_mean_10_features[8] log['trim25mean_shimmer(apq11)'] = trim_mean_10_features[9] log['trim25mean_shimmer(dda)'] = trim_mean_10_features[10] log['trim25mean_AC'] = trim_mean_10_features[11] log['trim25mean_NTH'] = trim_mean_10_features[12] log['trim25mean_HTN'] = trim_mean_10_features[13] log['trim25mean_median_pitch'] = trim_mean_10_features[14] log['trim25mean_mean_pitch'] = trim_mean_10_features[15] log['trim25mean_std_dev'] = trim_mean_10_features[16] log['trim25mean_min_pitch'] = trim_mean_10_features[17] log['trim25mean_max_pitch'] = trim_mean_10_features[18] log['trim25mean_num_pulses'] = trim_mean_10_features[19] log['trim25mean_num_periods'] = trim_mean_10_features[20] log['trim25mean_mean_period'] = trim_mean_10_features[21] log['trim25mean_std_dev_period'] = trim_mean_10_features[22] log['trim25mean_frac_locallyunvoiced_frames'] = trim_mean_10_features[23] log['trim25mean_num_voice_breaks'] = trim_mean_10_features[24] log['trim25mean_degree_voicebreaks']= trim_mean_10_features[25] log['trim25mean_jitter(local)'] = trim_mean_25_features[0] log['trim25mean_jitter(local,absolute)']= trim_mean_25_features[1] log['trim25mean_jitter(ppq5)'] = trim_mean_25_features[2] log['trim25mean_jitter(rap)'] = trim_mean_25_features[3] log['trim25mean_jitter(ddp)'] = trim_mean_25_features[4] log['trim25mean_shimmer(local)'] = trim_mean_25_features[5] log['trim25mean_shimmer(local,dB)'] = trim_mean_25_features[6] log['trim25mean_shimmer(apq3)'] = trim_mean_25_features[7] log['trim25mean_shimmer(apq5)'] = trim_mean_25_features[8] log['trim25mean_shimmer(apq11)'] = trim_mean_25_features[9] log['trim25mean_shimmer(dda)'] = trim_mean_25_features[10] log['trim25mean_AC'] = trim_mean_25_features[11] log['trim25mean_NTH'] = trim_mean_25_features[12] log['trim25mean_HTN'] = trim_mean_25_features[13] log['trim25mean_median_pitch'] = trim_mean_25_features[14] log['trim25mean_mean_pitch'] = trim_mean_25_features[15] log['trim25mean_std_dev'] = trim_mean_25_features[16] log['trim25mean_min_pitch'] = trim_mean_25_features[17] log['trim25mean_max_pitch'] = trim_mean_25_features[18] log['trim25mean_num_pulses'] = trim_mean_25_features[19] log['trim25mean_num_periods'] = trim_mean_25_features[20] log['trim25mean_mean_period'] = trim_mean_25_features[21] log['trim25mean_std_dev_period'] = trim_mean_25_features[22] log['trim25mean_frac_locallyunvoiced_frames'] = trim_mean_25_features[23] log['trim25mean_num_voice_breaks'] = trim_mean_25_features[24] log['trim25mean_degree_voicebreaks']= trim_mean_25_features[25] log['std_jitter(local)'] = std_dev_features[0] log['std_jitter(local,absolute)']= std_dev_features[1] log['std_jitter(ppq5)'] = std_dev_features[2] log['std_jitter(rap)'] = std_dev_features[3] log['std_jitter(ddp)'] = std_dev_features[4] log['std_shimmer(local)'] = std_dev_features[5] log['std_shimmer(local,dB)'] = std_dev_features[6] log['std_shimmer(apq3)'] = std_dev_features[7] log['std_shimmer(apq5)'] = std_dev_features[8] log['std_shimmer(apq11)'] = std_dev_features[9] log['std_shimmer(dda)'] = std_dev_features[10] log['std_AC'] = std_dev_features[11] log['std_NTH'] = std_dev_features[12] log['std_HTN'] = std_dev_features[13] log['std_median_pitch'] = std_dev_features[14] log['std_mean_pitch'] = std_dev_features[15] log['std_std_dev'] = std_dev_features[16] log['std_min_pitch'] = std_dev_features[17] log['std_max_pitch'] = std_dev_features[18] log['std_num_pulses'] = std_dev_features[19] log['std_num_periods'] = std_dev_features[20] log['std_mean_period'] = std_dev_features[21] log['std_std_dev_period'] = std_dev_features[22] log['std_frac_locallyunvoiced_frames'] = std_dev_features[23] log['std_num_voice_breaks'] = std_dev_features[24] log['std_degree_voicebreaks']= std_dev_features[25] log['iqr_jitter(local)'] = iqr_features[0] log['iqr_jitter(local,absolute)']= iqr_features[1] log['iqr_jitter(ppq5)'] = iqr_features[2] log['iqr_jitter(rap)'] = iqr_features[3] log['iqr_jitter(ddp)'] = iqr_features[4] log['iqr_shimmer(local)'] = iqr_features[5] log['iqr_shimmer(local,dB)'] = iqr_features[6] log['iqr_shimmer(apq3)'] = iqr_features[7] log['iqr_shimmer(apq5)'] = iqr_features[8] log['iqr_shimmer(apq11)'] = iqr_features[9] log['iqr_shimmer(dda)'] = iqr_features[10] log['iqr_AC'] = iqr_features[11] log['iqr_NTH'] = iqr_features[12] log['iqr_HTN'] = iqr_features[13] log['iqr_median_pitch'] = iqr_features[14] log['iqr_mean_pitch'] = iqr_features[15] log['iqr_std_dev'] = iqr_features[16] log['iqr_min_pitch'] = iqr_features[17] log['iqr_max_pitch'] = iqr_features[18] log['iqr_num_pulses'] = iqr_features[19] log['iqr_num_periods'] = iqr_features[20] log['iqr_mean_period'] = iqr_features[21] log['iqr_std_dev_period'] = iqr_features[22] log['iqr_frac_locallyunvoiced_frames'] = iqr_features[23] log['iqr_num_voice_breaks'] = iqr_features[24] log['iqr_degree_voicebreaks']= iqr_features[25] log['mad_jitter(local)'] = mad_features[0] log['mad_jitter(local,absolute)']= mad_features[1] log['mad_jitter(ppq5)'] = mad_features[2] log['mad_jitter(rap)'] = mad_features[3] log['mad_jitter(ddp)'] = mad_features[4] log['mad_shimmer(local)'] = mad_features[5] log['mad_shimmer(local,dB)'] = mad_features[6] log['mad_shimmer(apq3)'] = mad_features[7] log['mad_shimmer(apq5)'] = mad_features[8] log['mad_shimmer(apq11)'] = mad_features[9] log['mad_shimmer(dda)'] = mad_features[10] log['mad_AC'] = mad_features[11] log['mad_NTH'] = mad_features[12] log['mad_HTN'] = mad_features[13] log['mad_median_pitch'] = mad_features[14] log['mad_mean_pitch'] = mad_features[15] log['mad_std_dev'] = mad_features[16] log['mad_min_pitch'] = mad_features[17] log['mad_max_pitch'] = mad_features[18] log['mad_num_pulses'] = mad_features[19] log['mad_num_periods'] = mad_features[20] log['mad_mean_period'] = mad_features[21] log['mad_std_dev_period'] = mad_features[22] log['mad_frac_locallyunvoiced_frames'] = mad_features[23] log['mad_num_voice_breaks'] = mad_features[24] log['mad_degree_voicebreaks']= mad_features[25] return log
def _filt_run(self,dat,filt,do_sim=False,vplot=True,nrange=1): if self.doplot and vplot: errorbar(dat[0],dat[1],dat[2],fmt="o") new = True if new: mymodel = Model(self.fitfunc_small_te,extra_args=[dat[1],dat[2],False]) else: mymodel = Model(self.fitfunc_te) #,extra_args=[dat[1],dat[2],False]) # get some good guesses try: scale = trim_mean(dat[1],0.3) except: scale = mean(dat[1]) offset = 1.0 #trim_mean(dat[1],0.3) t0 = median(dat[0]) umin = 1.0 b = 0.0 ## trending slope mydata = RealData(dat[0],dat[1],sx=1.0/(60*24),sy=dat[2]) trange = list(linspace(min(dat[0]),max(dat[0]),nrange)) maxi = (dat[1] == max(dat[1])).nonzero()[0] trange.extend(list(dat[0][maxi])) trange.extend([t0, max(dat[0]) + 10, max(dat[0]) + 100]) final_output = None for t0i in trange: for te in 10**linspace(log10(2),log10(200),nrange): if new: pinit = [te,umin,t0i] # ,scale,offset,b] else: pinit = [te,umin,t0i ,scale,offset,b] myodr = ODR(mydata,mymodel,beta0=pinit) myoutput = myodr.run() if final_output is None: final_output = myoutput old_sd_beta = final_output.sd_beta continue if trim_mean(log10(myoutput.sd_beta / final_output.sd_beta),0.0) < 0.0 and \ myoutput.res_var <= final_output.res_var and (myoutput.sd_beta == 0.0).sum() <= (final_output.sd_beta == 0.0).sum(): final_output = myoutput if 1: t = linspace(min(dat[0]),max([max(dat[0]),final_output.beta[2] + 6*final_output.beta[0]]),1500) if new: tmp = self.fitfunc_small_te(final_output.beta,dat[0],dat[1],dat[2],True) #print tmp, "***" p = list(final_output.beta) p.extend([tmp[0],tmp[1],tmp[2]]) y = array(self.modelfunc_small_te(p,t)) else: p = final_output.beta y = self.fitfunc_te(final_output.beta,t) #print final_output.beta if self.doplot: plot(t,y) xlabel('Time [days]') ylabel('Relative Flux Density') if do_sim: for i in range(10): tmp = r.multivariate_normal(myoutput.beta, myoutput.cov_beta) if self.doplot: plot(t, self.a_te(tmp[0],tmp[1],tmp[2],tmp[3],tmp[4],tmp[5],t),"-") return (final_output, p, new)
def main( method, method_value, additional, additional_value , sideLen): distanze = dict() statistica = dict() #leggo valori grezzi for csv in [f for f in os.listdir('./') if f.endswith("csv") and f == data]: df = pd.read_csv(csv, header=0, decimal='.') header = df.columns.values for row in df.itertuples(): nomeCella=row[1] for i in range(2, len(row)): if i % 2 != 0 and row[i] != defaultValue : #statistiche solo su distance beaconID = (header[i-1][9:]) if (nomeCella,beaconID) in distanze: distanze[nomeCella, beaconID].append(row[i]) else: distanze[nomeCella,beaconID] = [ row[i] ] print distanze #calcolo statistiche if method == "percentile": percentili = dict.fromkeys(distanze.keys()) for key in distanze: percentili[key] = stats.scoreatpercentile(distanze[key], method_value) for row in df.itertuples(): # itera per tuple il csv for i in range(1, len(row)): # itera su ogni tupla cercando valori di default da rimpiazzare (es. -1000) if i % 2 != 0 and row[i] == defaultValue and header[i-1][9:]!="kpOU": #statistiche solo su distance df.ix[row[0], i - 1] = percentili[row[1],(header[i-1][9:])] #print percentili statistica = percentili elif method == "truncated_mean": truncatedMeans = dict.fromkeys(distanze.keys()) for key in distanze: truncatedMeans[key] = stats.trim_mean(distanze[key], method_value) for row in df.itertuples(): # itera per tuple il csv for i in range(2, len(row)): # itera su ogni tupla cercando valori di default da rimpiazzare (es. -1000) if i % 2 != 0 and row[i] == defaultValue and header[i-1][9:]!="kpOU": df.ix[row[0], i - 1] = truncatedMeans[row[1],(header[i-1][9:])] #print truncatedMeans statistica = truncatedMeans elif method == "simple_mean": simpleMeans = dict.fromkeys(distanze.keys()) for key in distanze: simpleMeans[key] = numpy.mean(distanze) for row in df.itertuples(): # itera per tuple il csv for i in range(1, len(row)): # itera su ogni tupla cercando valori di default da rimpiazzare (es. -1000) if i % 2 != 0 and row[i] == defaultValue: df.ix[row[0], i - 1] = simpleMeans[i] #print simpleMeans statistica = simpleMeans else: print "not supported yet!" return # for index, riga in avs.iterrows(): # if riga["Cella"] == row[1] and i>2: #salta colonna "cella" poiche letterale # if row[i] == 0.0 or row[i]==["nan"] or float(row[i]) - 50 > float(riga[i+1]) or float(row[i])+50 < float(riga[i]): # #print "cella:", row[1],"riga:", row[0],"colonna:",i,"valPre:" ,row[i], "valDop:", riga[i], row[i] # df.ix[row[0],i-1] = riga[i+1] if additional == "replace": count=0 percentiliUP = dict.fromkeys(distanze.keys()) for key in distanze: percentiliUP[key] = stats.scoreatpercentile(distanze[key], (100-additional_value/2) ) #percentile per ogni ID percentiliDOWN = dict.fromkeys(distanze.keys()) for key in distanze: percentiliDOWN[key] = stats.scoreatpercentile(distanze[key], additional_value/2 ) #percentile per ogni ID for row in df.itertuples(): # itera per tuple il csv for i in range(2, len(row)): # itera su ogni tupla cercando gli 0, i= indice colonna if i % 2 != 0 and header[i-1][9:]!="kpOU": if row[i] < percentiliDOWN[row[1],(header[i-1][9:])] or row[i] > percentiliUP[row[1],(header[i-1][9:])] : #print row[i], percentiliDOWN[i], row[i], percentiliUP[i], additional_value df.ix[row[0], i - 1] = statistica[row[1],(header[i-1][9:])] count += 1 else: print "additional not supported yet, skipped" if(additional_value != ""): print str(count) + " values replaced with " + method + " statistics" #adding Row/Column coordinates for each cell df["riga"] = 0 df["colonna"] = 0 header = df.columns.values #reload header indexRowColumn = len(header)-1 lastCell="" lastRow = 1 lastCol = 0 for row in df.itertuples(): # itera per tuple il csv if row[indexRowColumn] == 0 and row[1] != lastCell: lastCell = row[1] lastCol += 1 if lastCol > sideLen: lastCol = 1 lastRow += 1 df.ix[row[0], "riga"] = lastRow # row df.ix[row[0], "colonna"] = lastCol # column # saving to file df.to_csv(outputFile, encoding='utf-8') #drawing for col in df.columns.tolist(): if col != "cella" and col != "colonna" and col != "riga" and not "rssi" in col: # heatmap for each sensor dataheat = [go.Heatmap( x=df['riga'], y=df['colonna'], z=df[col].tolist(), reversescale=True)] layout = go.Layout(title=method + "_" + col, width=800, height=640) fig = go.Figure(data=dataheat, layout=layout) py.image.save_as(fig, filename = "new_images/" + method + str(method_value) + "-" + additional + str(additional_value) + "_" + col + "-heatmap.png") #contour # dataheat = [go.Contour(x=df['riga'], # y=df['colonna'], # z=df[col].tolist(), # reversescale=False)] # layout = go.Layout(title=method + "_" + col, width=800, height=640) # fig = go.Figure(data=dataheat, layout=layout) # py.image.save_as(fig, filename="new_images/" + method + "_" + col + "-contour.png") #py.plot(dataheat, filename='labelled-heatmap.png') online! print(" Done!")
def meaner(dat, t, llim, ulim, proportiontocut=0.0): return trim_mean(dat[fi(t, llim):fi(t, ulim)], axis=0, proportiontocut=proportiontocut)
def get_Onest_Amplitude_Duration_of_spindles(raw,channelList, annotations=None, moving_window_size=200, lower_threshold=.9, syn_channels=3, l_bound=0.5,h_bound=2, tol=1,higher_threshold=3.5, front=300,back=100, sleep_stage=True, proba=True, validation_windowsize=3, l_freq=11,h_freq=16): """ raw: data after preprocessing channelList: channel list of interest, and in this study we use 'F3','F4','C3','C4','O1','O2' annotations: pandas DataFrame object containing manual annotations, such as sleep stages, spindle locations. moving_window_size: size of the moving window for convolved root mean square computation. It should work better when it is the sampling frequency, which, in this case is 500 (we downsample subjects with 1000 Hz sampling rate). lower_threshold: highpass threshold for spindle detection: decision making = trimmed_mean + lower_T * trimmed_std higher_threshold: lowpass threshold for spindle detection: decision making = trimmed_mean + higher_T * trimmed_std syn_channels: criteria for selecting spindles: at least # of channels have spindle instance and also in the mean channel l_bound: low boundary for duration of a spindle instance h_bound: high boundary for duration of a spindle instance tol : tolerance for determing spindles (criteria in time) front : First few seconds of recordings that we are not interested because there might be artifacts, or it is confirmed subjects could not fall asleep within such a short period back : last few seconds of recordings that we are not interested due to the recording procedures """ # process the data without any other information time=np.linspace(0,raw.last_samp/raw.info['sfreq'],raw._data[0,:].shape[0]) RMS = np.zeros((len(channelList),raw._data[0,:].shape[0])) peak_time={} #preallocate sfreq=raw.info['sfreq'] mph,mpl = {},{} for ii, names in enumerate(channelList): peak_time[names]=[] segment,_ = raw[ii,:] RMS[ii,:] = window_rms(segment[0,:],moving_window_size) mph[names] = trim_mean(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) + lower_threshold * trimmed_std(RMS[ii,:],0.05) mpl[names] = trim_mean(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) + higher_threshold * trimmed_std(RMS[ii,:],0.05) pass_ = RMS[ii,:] > mph[names]#should be greater than then mean not the threshold to compute duration up = np.where(np.diff(pass_.astype(int))>0) down = np.where(np.diff(pass_.astype(int))<0) up = up[0] down = down[0] #######key to idenfity segments that goes beyond the lower threshold######## #print(down[0],up[0]) if down[0] < up[0]: down = down[1:] #print(down[0],up[0]) ############################# if (up.shape > down.shape) or (up.shape < down.shape): size = np.min([up.shape,down.shape]) up = up[:size] down = down[:size] C = np.vstack((up,down)) for pairs in C.T: if l_bound < (time[pairs[1]] - time[pairs[0]]) < h_bound: SegmentForPeakSearching = RMS[ii,pairs[0]:pairs[1]] if np.max(SegmentForPeakSearching) < mpl[names]: temp_temp_time = time[pairs[0]:pairs[1]] ints_temp = np.argmax(SegmentForPeakSearching) peak_time[names].append(temp_temp_time[ints_temp]) peak_time['mean']=[];peak_at=[];duration=[] RMS_mean=hmean(RMS) # apply the same algorithm to the mean of the RMSs mph['mean'] = trim_mean(RMS_mean[int(front*sfreq):-int(back*sfreq)],0.05) + lower_threshold * trimmed_std(RMS_mean,0.05) mpl['mean'] = trim_mean(RMS_mean[int(front*sfreq):-int(back*sfreq)],0.05) + higher_threshold * trimmed_std(RMS_mean,0.05) pass_ =RMS_mean > mph['mean'] up = np.where(np.diff(pass_.astype(int))>0) down= np.where(np.diff(pass_.astype(int))<0) up = up[0] down = down[0] ############################### #print(down[0],up[0]) if down[0] < up[0]: down = down[1:] #print(down[0],up[0]) ############################# if (up.shape > down.shape) or (up.shape < down.shape): size = np.min([up.shape,down.shape]) up = up[:size] down = down[:size] C = np.vstack((up,down)) for pairs in C.T: if l_bound < (time[pairs[1]] - time[pairs[0]]) < h_bound: SegmentForPeakSearching = RMS_mean[pairs[0]:pairs[1],] if np.max(SegmentForPeakSearching)< mpl['mean']: temp_time = time[pairs[0]:pairs[1]] ints_temp = np.argmax(SegmentForPeakSearching) peak_time['mean'].append(temp_time[ints_temp]) peak_at.append(SegmentForPeakSearching[ints_temp]) duration_temp = time[pairs[1]] - time[pairs[0]] duration.append(duration_temp) time_find=[];mean_peak_power=[];Duration=[]; for item,PEAK,duration_time in zip(peak_time['mean'],peak_at,duration): temp_timePoint=[] for ii, names in enumerate(channelList): try: temp_timePoint.append(min(enumerate(peak_time[names]), key=lambda x: abs(x[1]-item))[1]) except: temp_timePoint.append(item + 2) try: if np.sum((abs(np.array(temp_timePoint) - item)<tol).astype(int))>=syn_channels: time_find.append(float(item)) mean_peak_power.append(PEAK) Duration.append(duration_time) except: pass ############ the end of the processing in which no other inputs ## #### update the spindles we found if we want to add information of sleep stages ###### if sleep_stage: temp_time_find=[];temp_mean_peak_power=[];temp_duration=[]; # seperate out stage 2 stages = annotations[annotations.Annotation.apply(stage_check)] On = stages[::2];Off = stages[1::2] stage_on_off = list(zip(On.Onset.values, Off.Onset.values)) if abs(np.diff(stage_on_off[0]) - 30) < 2: pass else: On = stages[1::2];Off = stages[::2] stage_on_off = list(zip(On.Onset.values[1:], Off.Onset.values[2:])) for single_time_find, single_mean_peak_power, single_duration in zip(time_find,mean_peak_power,Duration): for on_time,off_time in stage_on_off: if intervalCheck([on_time,off_time],single_time_find,tol=tol): temp_time_find.append(single_time_find) temp_mean_peak_power.append(single_mean_peak_power) temp_duration.append(single_duration) time_find=temp_time_find;mean_peak_power=temp_mean_peak_power;Duration=temp_duration ####### decision function based on spindles we have just found #### """ A single floating representation is computed based on the validation window size (say 3 seconds), and information like peak power densities and peak frequencies are added to the feature space. We fit the standandardized features with the labels (spindles found by the automated pipeline) A prediction probability is computed using scikit-learn::logisticregression """ decision_features=None;auto_proba=None;auto_label=None if proba: result = pd.DataFrame({'Onset':time_find,'Duration':Duration,'Annotation':['spindle']*len(Duration)}) auto_label,_ = discritized_onset_label_auto(raw,result,validation_windowsize) events = mne.make_fixed_length_events(raw,id=1,start=front,stop=raw.times[-1]-back,duration=validation_windowsize) epochs = mne.Epochs(raw,events,event_id=1,tmin=0,tmax=validation_windowsize,preload=True) data = epochs.get_data()[:,:,:-1] full_prop=[] for d in data: temp_p=[] #fig,ax = plt.subplots(nrows=2,ncols=3,figsize=(8,8)) for ii,(name) in enumerate(zip(channelList)):#,ax.flatten())): rms = window_rms(d[ii,:],500) l = trim_mean(rms,0.05) + lower_threshold * trimmed_std(rms,0.05) h = trim_mean(rms,0.05) + higher_threshold * trimmed_std(rms,0.05) prop = (sum(rms>l)+sum(rms<h))/(sum(rms<h) - sum(rms<l)) if np.isinf(prop): prop = (sum(rms>l)+sum(rms<h)) temp_p.append(prop) full_prop.append(temp_p) psds,freq = mne.time_frequency.psd_multitaper(epochs,fmin=l_freq,fmax=h_freq,tmin=0,tmax=3,low_bias=True,) psds = 10* np.log10(psds) features = pd.DataFrame(np.concatenate((np.array(full_prop),psds.max(2),freq[np.argmax(psds,2)]),1)) decision_features = StandardScaler().fit_transform(features.values,auto_label) clf = LogisticRegressionCV(Cs=np.logspace(-4,6,11),cv=5,tol=1e-7,max_iter=int(1e7)) clf.fit(decision_features,auto_label) auto_proba=clf.predict_proba(decision_features)[:,-1] return time_find,mean_peak_power,Duration,mph,mpl,auto_proba,auto_label
data = load_iris() x = data['data'] y = data['target'] col_names = data['feature_names'] # Let’s now demonstrate how to calculate the mean, trimmed mean, and range values: # 1. Calculate and print the mean value of each column in the Iris dataset print "col name,mean value" for i,col_name in enumerate(col_names): print "%s,%0.2f"%(col_name,np.mean(x[:,i])) print # 2. Trimmed mean calculation. p = 0.1 # 10% trimmed mean print print "col name,trimmed mean value" for i,col_name in enumerate(col_names): print "%s,%0.2f"%(col_name,trim_mean(x[:,i],p)) print # 3. Data dispersion, calculating and display the range values. print "col_names,max,min,range" for i,col_name in enumerate(col_names): print "%s,%0.2f,%0.2f,%0.2f"% (col_name,max(x[:,i]),min(x[:,i]),max(x[:,i])-min(x[:,i])) print # Finally, we will show the variance, standard deviation, mean absolute deviation, and median absolute deviation calculations: # 4. Data dispersion, variance and standard deviation print "col_names,variance,std-dev" for i,col_name in enumerate(col_names): print "%s,%0.2f,%0.2f"%(col_name,np.var(x[:,i]),np.std(x[:,i])) print # 5. Mean absolute deviation calculation def mad(x,axis=None): mean = np.mean(x,axis=axis)
import numpy as np import featurespace_fun as fsf import matplotlib.pyplot as plt from nilearn.masking import apply_mask from nilearn.image import smooth_img from scipy.stats import norm from statsmodels.sandbox.stats.multicomp import fdrcorrection0 from scipy.stats import ttest_1samp import sys from scipy.stats.mstats import trimmed_mean_ci from scipy.stats import ttest_1samp, trim_mean models = sys.argv[1:] #models = ['logBSC_H200_ds_conv', 'logMFS_ds'] mask = 'brainmask_group_template.nii.gz' scores_bsc = np.arctanh(apply_mask(smooth_img(glob.glob('MaThe/avg_maps/model_{}_*whole*'.format(models[0])), fwhm=3.0), mask_img=mask)) scores_mfs = np.arctanh(apply_mask(smooth_img(glob.glob('MaThe/avg_maps/model_{}_*whole*'.format(models[1])), fwhm=3.0), mask_img=mask)) diff_scores = scores_bsc - scores_mfs mean_diff = trim_mean(diff_scores, 0.08, axis=0) trim_mean_ci = trimmed_mean_ci(diff_scores, (0.08, 0.08), axis=0) which_ones = np.logical_not(np.logical_or(trim_mean_ci[0,:] > 0, trim_mean_ci[1,:] < 0)) mean_diff[which_ones] = 0 display = fsf.plot_diff_avg_whole(mean_diff, 0.001) display.savefig('mean_diff_smoothed_trim_model_{}.svg'.format('_'.join(models))) display.savefig('mean_diff_smoothed_trim_model_{}.png'.format('_'.join(models))) fsf.save_map_avg_whole(mean_diff, threshold=None, model='diff_smooth_trim_'+'_'.join(models))
def find_onset_duration(self,lower_threshold,higher_threshold): from scipy.stats import trim_mean,hmean self.lower_threshold = lower_threshold self.higher_threshold = higher_threshold front = self.front back = self.back raw = self.raw channelList = self.channelList moving_window_size = self.moving_window_size l_bound = self.l_bound h_bound = self.h_bound tol = self.tol syn_channels = self.syn_channels sfreq=raw.info['sfreq'] time=np.linspace(0,raw.last_samp/sfreq,raw.last_samp) RMS = np.zeros((len(channelList),raw._data[0,:].shape[0])) peak_time={} mph,mpl = {},{} for ii,names in tqdm(enumerate(channelList)): peak_time[names]=[] segment,_ = raw[ii,:] RMS[ii,:] = window_rms(segment[0,:],moving_window_size) mph[names] = trim_mean(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) + lower_threshold * trimmed_std(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) mpl[names] = trim_mean(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) + higher_threshold * trimmed_std(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) pass_ = RMS[ii,:] > mph[names]#should be greater than then mean not the threshold to compute duration #pass_ = (RMS[ii,:] > mph[names]) & (RMS[ii,:] < mpl[names]) up = np.where(np.diff(pass_.astype(int))>0) down = np.where(np.diff(pass_.astype(int))<0) up = up[0] down = down[0] if down[0] < up[0]: down = down[1:] if (up.shape > down.shape) or (up.shape < down.shape): size = np.min([up.shape,down.shape]) up = up[:size] down = down[:size] C = np.vstack((up,down)) for pairs in C.T: if l_bound < (time[pairs[1]] - time[pairs[0]]) < h_bound: SegmentForPeakSearching = RMS[ii,pairs[0]:pairs[1]] if np.max(SegmentForPeakSearching) < mpl[names]: temp_temp_time = time[pairs[0]:pairs[1]] ints_temp = np.argmax(SegmentForPeakSearching) peak_time[names].append(temp_temp_time[ints_temp]) peak_time['mean'],peak_at,duration=[],[],[] RMS_mean = hmean(RMS) mph['mean'] = trim_mean(RMS_mean[int(front*sfreq):-int(back*sfreq)],0.05) + lower_threshold * trimmed_std(RMS_mean,0.05) mpl['mean'] = trim_mean(RMS_mean[int(front*sfreq):-int(back*sfreq)],0.05) + higher_threshold * trimmed_std(RMS_mean,0.05) pass_ = RMS_mean > mph['mean'] #pass_ = (RMS_mean > mph['mean']) & (RMS_mean < mpl['mean']) up = np.where(np.diff(pass_.astype(int))>0) down= np.where(np.diff(pass_.astype(int))<0) up = up[0] down = down[0] if down[0] < up[0]: down = down[1:] if (up.shape > down.shape) or (up.shape < down.shape): size = np.min([up.shape,down.shape]) up = up[:size] down = down[:size] C = np.vstack((up,down)) for pairs in C.T: if l_bound < (time[pairs[1]] - time[pairs[0]]) < h_bound: SegmentForPeakSearching = RMS_mean[pairs[0]:pairs[1],] if np.max(SegmentForPeakSearching)< mpl['mean']: temp_time = time[pairs[0]:pairs[1]] ints_temp = np.argmax(SegmentForPeakSearching) peak_time['mean'].append(temp_time[ints_temp]) peak_at.append(SegmentForPeakSearching[ints_temp]) duration_temp = time[pairs[1]] - time[pairs[0]] duration.append(duration_temp) time_find=[];mean_peak_power=[];Duration=[]; for item,PEAK,duration_time in zip(peak_time['mean'],peak_at,duration): temp_timePoint=[] for ii, names in enumerate(channelList): try: temp_timePoint.append(min(enumerate(peak_time[names]), key=lambda x: abs(x[1]-item))[1]) except: temp_timePoint.append(item + 2) try: if np.sum((abs(np.array(temp_timePoint) - item)<tol).astype(int))>=syn_channels: time_find.append(float(item)) mean_peak_power.append(PEAK) Duration.append(duration_time) #print(float(item),PEAK,duration_time) except: pass self.time_find = time_find self.mean_peak_power = mean_peak_power self.Duration = Duration
def get_Onest_Amplitude_Duration_of_spindles(raw,channelList,file_to_read,moving_window_size=200,threshold=.9,syn_channels=3,l_freq=0,h_freq=200,l_bound=0.5,h_bound=2,tol=1): """ raw: data after preprocessing channelList: channel list of interest file_to_read: raw data file names moving_window_size: size of the moving window for convolved root mean square computation threshold: threshold for spindle detection: threshold = mean + threshold * std syn_channels: criteria for selecting spindles: at least # of channels have spindle instance and also in the mean channel l_freq: high pass frequency for spindle range h_freq: low pass frequency for spindle range l_bound: low boundary for duration of a spindle instance h_bound: high boundary for duration of a spindle instance tol : tolerance for determing spindles (criteria in time) """ mul=threshold;nn=4.5 time=np.linspace(0,raw.last_samp/raw.info['sfreq'],raw._data[0,:].shape[0]) RMS = np.zeros((len(channelList),raw._data[0,:].shape[0])) peak_time={} #preallocate fig=plt.figure(figsize=(40,40)) ax=plt.subplot(311) ax1=plt.subplot(312,sharex=ax) ax2=plt.subplot(313,sharex=ax) for ii, names in enumerate(channelList): peak_time[names]=[] segment,_ = raw[ii,:] RMS[ii,:] = window_rms(segment[0,:],moving_window_size) # window of 200ms mph = trim_mean(RMS[ii,100000:-30000],0.05) + mul * trimmed_std(RMS[ii,:],0.05) # higher sd = more strict criteria mpl = trim_mean(RMS[ii,100000:-30000],0.05) + nn * trimmed_std(RMS[ii,:],0.05) pass_= RMS[ii,:] > mph up = np.where(np.diff(pass_.astype(int))>0) down = np.where(np.diff(pass_.astype(int))<0) up = up[0] down = down[0] ############################### #print(down[0],up[0]) if down[0] < up[0]: down = down[1:] #print(down[0],up[0]) ############################# if (up.shape > down.shape) or (up.shape < down.shape): size = np.min([up.shape,down.shape]) up = up[:size] down = down[:size] C = np.vstack((up,down)) for pairs in C.T: if l_bound < (time[pairs[1]] - time[pairs[0]]) < h_bound: TimePoint = np.mean([time[pairs[1]],time[pairs[0]]]) SegmentForPeakSearching = RMS[ii,pairs[0]:pairs[1]] if np.max(SegmentForPeakSearching) < mpl: temp_temp_time = time[pairs[0]:pairs[1]] ints_temp = np.argmax(SegmentForPeakSearching) peak_time[names].append(temp_temp_time[ints_temp]) ax.scatter(temp_temp_time[ints_temp],mph+0.1*mph,marker='s', color='blue') ax.plot(time,RMS[ii,:],alpha=0.2,label=names) ax2.plot(time,segment[0,:],label=names,alpha=0.3) ax2.set(xlabel="time",ylabel="$\mu$V",xlim=(time[0],time[-1]),title=file_to_read[:-5]+' band pass %.1f - %.1f Hz' %(l_freq,h_freq)) ax.set(xlabel="time",ylabel='RMS Amplitude',xlim=(time[0],time[-1]),title='auto detection on each channels') ax1.set(xlabel='time',ylabel='Amplitude') ax.axhline(mph,color='r',alpha=0.03) ax2.legend();ax.legend() peak_time['mean']=[];peak_at=[];duration=[] RMS_mean=hmean(RMS) ax1.plot(time,RMS_mean,color='k',alpha=0.3) mph = trim_mean(RMS_mean[100000:-30000],0.05) + mul * RMS_mean.std() mpl = trim_mean(RMS_mean[100000:-30000],0.05) + nn * RMS_mean.std() pass_ = RMS_mean > mph up = np.where(np.diff(pass_.astype(int))>0) down= np.where(np.diff(pass_.astype(int))<0) up = up[0] down = down[0] ############################### #print(down[0],up[0]) if down[0] < up[0]: down = down[1:] #print(down[0],up[0]) ############################# if (up.shape > down.shape) or (up.shape < down.shape): size = np.min([up.shape,down.shape]) up = up[:size] down = down[:size] C = np.vstack((up,down)) for pairs in C.T: if 0.5 < (time[pairs[1]] - time[pairs[0]]) < 2: TimePoint = np.mean([time[pairs[1]] , time[pairs[0]]]) SegmentForPeakSearching = RMS_mean[pairs[0]:pairs[1],] if np.max(SegmentForPeakSearching)< mpl: temp_time = time[pairs[0]:pairs[1]] ints_temp = np.argmax(SegmentForPeakSearching) peak_time['mean'].append(temp_time[ints_temp]) peak_at.append(SegmentForPeakSearching[ints_temp]) ax1.scatter(temp_time[ints_temp],mph+0.1*mph,marker='s',color='blue') duration_temp = time[pairs[1]] - time[pairs[0]] duration.append(duration_temp) ax1.axhline(mph,color='r',alpha=1.) ax1.set_xlim([time[0],time[-1]]) time_find=[];mean_peak_power=[];Duration=[] for item,PEAK,duration_time in zip(peak_time['mean'],peak_at,duration): temp_timePoint=[] for ii, names in enumerate(channelList): try: temp_timePoint.append(min(enumerate(peak_time[names]), key=lambda x: abs(x[1]-item))[1]) except: temp_timePoint.append(item + 2) try: if np.sum((abs(np.array(temp_timePoint) - item)<tol).astype(int))>syn_channels: time_find.append(float(item)) mean_peak_power.append(PEAK) Duration.append(duration_time) except: pass return time_find,mean_peak_power,Duration,fig,ax,ax1,ax2,peak_time,peak_at
from matplotlib import pyplot as plt from scipy.stats import trim_mean from numpy import random from tmc import trimmeancomplex a = 0.02#rd.rand() N = 100; Nnan = 15; sqrtR = array([[1, 0.2],[0.04, 2]]) R = dot(sqrtR,sqrtR.transpose()) xtab = dot(R,rd.randn(2,N)) x = xtab[0,:]+1j*xtab[1,:]; indperm = random.randint(1,N,N); x[indperm[range(Nnan)]] = nan+1j*nan mytt = nanmean(trimmeancomplex(x,a,visu=1)); tt = trim_mean(x,(1.0-a)/2.0) from scipy import optimize as opt def n1(u,x): indnotnan = 1-isnan(x) r=sum(abs(u[0]-x.real[indnotnan])+abs(u[1]-x.imag[indnotnan])) return r init0 = [x[0].real, x[0].imag]; res = opt.fmin(n1,init0,args=(x,)) print res print [nanmedian(x).real,nanmedian(x).imag] print [nanmean(mytt).real, nanmean(mytt).imag] #plt.plot(x.real,x.imag,'x') #plt.hold(True)
def animate(i): global opts, framegrabber, frames, params, flowStartFrame global roi, flowMask, lmask, rmask, tmask, bmask global flowVals, times, history global logfile global codes update = [imdisp, foedisp, q_foe, b_latdiv, b_verdiv, b_ttc] t1 = time.time() # ------------------------------------------------------------ # Compute optical flow # ------------------------------------------------------------ # grab the current frame, update indices clrframe = framegrabber.next() currFrame = cv2.cvtColor(clrframe, cv2.COLOR_BGR2GRAY) framenum = i + flowStartFrame times[i] = framenum prvs = sum(frames) / float(opts.frameavg) nxt = (sum(frames[1:]) + currFrame) / float(opts.frameavg) flow = cv2.calcOpticalFlowFarneback(prvs[startY:stopY,startX:stopX] , nxt[startY:stopY,startX:stopX] , **params) mag, angle = cv2.cartToPolar(flow[...,0], flow[...,1]) # ------------------------------------------------------------ # Remove outlier flow # ------------------------------------------------------------ if opts.nofilt: thresh_mask = flowMask else: # clean up flow estimates, remove outliers # thresh_mask = threshold_local(mag, shape=(20,20), llim=0, ulim=0.96) # global_mask = threshold_global(mag, llim=0.00, ulim=0.99)[0] global_mask = np.ones_like(flowMask) lthresh = 1e-3 thresh_mask = (mag > lthresh) & global_mask flow[~thresh_mask] = 0 mag[~thresh_mask] = 0 # ------------------------------------------------------------ # estimate the location of the FoE # ------------------------------------------------------------ # S = generic2dFilter(angle, (foeW, foeW), matchWin, step=dt, padded=True) # participants = generic2dFilter(thresh_mask, (foeW, foeW), np.sum # , padded=True, step=dt) # S /= participants # foe_y_subsearch, foe_x_subsearch = np.unravel_index(np.argmin(S), S.shape) # foe_y, foe_x = startY + foeW//2 + foe_y_subsearch*dt , startX + foeW//2 + foe_x_subsearch*dt # foe_x, foe_y = FindFoE(flow[...,0][foeW//2:-foeW//2],flow[...,1][foeW//2:-foeW//2]) # print # print foe_x, foe_y # foe_x, foe_y = startX + maskW//2, startY + maskW//2 foe_x, foe_y = 183, 80 p0, p1 = (foe_x-foeW//2, foe_y-foeW//2), (foe_x+foeW//2, foe_y+foeW//2) # confidence= participants[foe_y_subsearch, foe_x_subsearch] / (foeW**2) confidence=0 divTemplates = generate2dTemplates(p0, p1, thresh_mask.shape, thresh_mask) foe_tmask, foe_bmask, foe_lmask, foe_rmask = divTemplates foeSlice_y, foeSlice_x = slice(p0[1],p1[1]+1), slice(p0[0],p1[0]+1) # ------------------------------------------------------------ # estimate divergence parameters and ttc for this frame # ------------------------------------------------------------ xDiv = (np.sum(flow[rmask,0]) - np.sum(flow[lmask,0])) \ /(np.sum((lmask|rmask) & thresh_mask) + EPS) yDiv = (np.sum(flow[tmask,1]) - np.sum(flow[bmask,1])) \ /(np.sum((tmask|bmask) & thresh_mask) + EPS) xDiv_foe = (np.sum(flow[foe_rmask,0]) - np.sum(flow[foe_lmask,0])) \ /(np.sum(foe_lmask|foe_rmask) + EPS) yDiv_foe = (np.sum(flow[foe_tmask, 1]) - np.sum(flow[foe_bmask, 1])) \ /(np.sum(foe_tmask|foe_bmask) + EPS) ttc = 2/(xDiv + yDiv + EPS) history[:, :-1] = history[:, 1:]; history[:, -1] = (xDiv,yDiv,ttc) # ------------------------------------------------------------ # use estimation history to estimate new values # ------------------------------------------------------------ if i > history.shape[1]: flowVals[:-1, i] = np.sum(history[:-1]*w_forget, axis=1)/sum(w_forget) # flowVals[-1, i] = np.median(history[-1,-3:]) # m, y0, _, _, std = stats.linregress(np.arange(5) # , flowVals[-1,i-4:i+1]*w_forget[:5]/sum(w_forget[:5])) # flowVals[2, i] = m*times[i] + y0 # flowVals[2, i] = np.median(history[-1, -3:]) flowVals[2, i] = stats.trim_mean(history[-1, -5:],0.4) else: flowVals[:, i] = (xDiv,yDiv,ttc) # ------------------------------------------------------------ # write out results # ------------------------------------------------------------ t2 = time.time() out = (framenum, xDiv, yDiv, ttc, 100.*confidence, t2-t1) if opts.log: print >> logfile, ','.join(map(str,out)) if not opts.quiet: sys.stdout.write("\r%4d %+6.2f %+6.2f %6.2f %6.2f %6.2f" % out) sys.stdout.flush() # ------------------------------------------------------------ # update figure # ------------------------------------------------------------ b_latdiv.set_height(flowVals[0,i]) b_verdiv.set_height(flowVals[1,i]) b_ttc.set_height(flowVals[2,i]) foedisp.set_data(clrframe[foeSlice_y, foeSlice_x, ::-1].copy()) # clrframe[mag <= lthresh, :] = codes.colors[0][::-1] # clrframe[~global_mask, :] = codes.colors[-1][::-1] cv2.rectangle(clrframe, p0, p1, color=(0,255,0)) cv2.rectangle(clrframe, p0, (foe_x, foe_y+foeW//2), color=(255,0,0)) if opts.vis == "color_overlay": cf.colorFlow(flow, clrframe[...,::-1] , slice(startX,stopX), slice(startY,stopY), thresh_mask) dispim = clrframe[..., ::-1] elif opts.vis == "color": dispim = cf.flowToColor(flow) elif opts.vis == "quiver": update.append(q_img) # add this object to those that are to be updated q_img.set_UVC(flow[flow_strides, flow_strides, 0] , flow[flow_strides, flow_strides, 1] , (mag[flow_strides, flow_strides] \ *255/(np.max(mag)-np.min(mag)+EPS))) dispim = clrframe[..., ::-1] imdisp.set_data(dispim) unitmag = 2*np.ones(foedisp.get_size()) foeKern = generateFoEkernel(foeW) foeKern[foeW//2, foeW//2] = angle[foe_y,foe_x] sim = (foeKern-angle[foeSlice_y,foeSlice_x])**2 X, Y = cv2.polarToCart(unitmag, angle[foeSlice_y,foeSlice_x].astype(np.float)) q_foe.set_UVC(X[1:-1:2, 1:-1:2], Y[1:-1:2, 1:-1:2] , (sim[1:-1:2, 1:-1:2] \ *255/(np.max(sim)-np.min(sim)+EPS))) # shift the frame buffer frames[:-1] = frames[1:]; frames[-1] = currFrame return update
def trimmed_mean(x, percent=0.2): return stats.trim_mean(x, percent)
def thresholding_filterbased_spindle_searching(raw,channelList,annotations,moving_window_size=200,lower_threshold=.9, syn_channels=3,l_bound=0.5,h_bound=2,tol=1,higher_threshold=3.5, front=300,back=100,sleep_stage=True,proba=False,validation_windowsize=3): time=np.linspace(0,raw.last_samp/raw.info['sfreq'],raw._data[0,:].shape[0]) RMS = np.zeros((len(channelList),raw._data[0,:].shape[0])) peak_time={} #preallocate sfreq=raw.info['sfreq'] mph,mpl = {},{} for ii, names in enumerate(channelList): peak_time[names]=[] segment,_ = raw[ii,:] RMS[ii,:] = window_rms(segment[0,:],moving_window_size) mph[names] = trim_mean(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) + lower_threshold * trimmed_std(RMS[ii,:],0.05) mpl[names] = trim_mean(RMS[ii,int(front*sfreq):-int(back*sfreq)],0.05) + higher_threshold * trimmed_std(RMS[ii,:],0.05) pass_ = RMS[ii,:] > mph[names]#should be greater than then mean not the threshold to compute duration up = np.where(np.diff(pass_.astype(int))>0) down = np.where(np.diff(pass_.astype(int))<0) up = up[0] down = down[0] ############################### #print(down[0],up[0]) if down[0] < up[0]: down = down[1:] #print(down[0],up[0]) ############################# if (up.shape > down.shape) or (up.shape < down.shape): size = np.min([up.shape,down.shape]) up = up[:size] down = down[:size] C = np.vstack((up,down)) for pairs in C.T: if l_bound < (time[pairs[1]] - time[pairs[0]]) < h_bound: SegmentForPeakSearching = RMS[ii,pairs[0]:pairs[1]] if np.max(SegmentForPeakSearching) < mpl[names]: temp_temp_time = time[pairs[0]:pairs[1]] ints_temp = np.argmax(SegmentForPeakSearching) peak_time[names].append(temp_temp_time[ints_temp]) peak_time['mean']=[];peak_at=[];duration=[] RMS_mean=hmean(RMS) mph['mean'] = trim_mean(RMS_mean[int(front*sfreq):-int(back*sfreq)],0.05) + lower_threshold * trimmed_std(RMS_mean,0.05) mpl['mean'] = trim_mean(RMS_mean[int(front*sfreq):-int(back*sfreq)],0.05) + higher_threshold * trimmed_std(RMS_mean,0.05) pass_ =RMS_mean > mph['mean'] up = np.where(np.diff(pass_.astype(int))>0) down= np.where(np.diff(pass_.astype(int))<0) up = up[0] down = down[0] ############################### #print(down[0],up[0]) if down[0] < up[0]: down = down[1:] #print(down[0],up[0]) ############################# if (up.shape > down.shape) or (up.shape < down.shape): size = np.min([up.shape,down.shape]) up = up[:size] down = down[:size] C = np.vstack((up,down)) for pairs in C.T: if l_bound < (time[pairs[1]] - time[pairs[0]]) < h_bound: SegmentForPeakSearching = RMS_mean[pairs[0]:pairs[1],] if np.max(SegmentForPeakSearching)< mpl['mean']: temp_time = time[pairs[0]:pairs[1]] ints_temp = np.argmax(SegmentForPeakSearching) peak_time['mean'].append(temp_time[ints_temp]) peak_at.append(SegmentForPeakSearching[ints_temp]) duration_temp = time[pairs[1]] - time[pairs[0]] duration.append(duration_temp) time_find=[];mean_peak_power=[];Duration=[]; for item,PEAK,duration_time in zip(peak_time['mean'],peak_at,duration): temp_timePoint=[] for ii, names in enumerate(channelList): try: temp_timePoint.append(min(enumerate(peak_time[names]), key=lambda x: abs(x[1]-item))[1]) except: temp_timePoint.append(item + 2) try: if np.sum((abs(np.array(temp_timePoint) - item)<tol).astype(int))>=syn_channels: time_find.append(float(item)) mean_peak_power.append(PEAK) Duration.append(duration_time) except: pass if sleep_stage: temp_time_find=[];temp_mean_peak_power=[];temp_duration=[]; # seperate out stage 2 stages = annotations[annotations.Annotation.apply(stage_check)] On = stages[::2];Off = stages[1::2] stage_on_off = list(zip(On.Onset.values, Off.Onset.values)) if abs(np.diff(stage_on_off[0]) - 30) < 2: pass else: On = stages[1::2];Off = stages[::2] stage_on_off = list(zip(On.Onset.values[1:], Off.Onset.values[2:])) for single_time_find, single_mean_peak_power, single_duration in zip(time_find,mean_peak_power,Duration): for on_time,off_time in stage_on_off: if intervalCheck([on_time,off_time],single_time_find,tol=tol): temp_time_find.append(single_time_find) temp_mean_peak_power.append(single_mean_peak_power) temp_duration.append(single_duration) time_find=temp_time_find;mean_peak_power=temp_mean_peak_power;Duration=temp_duration result = pd.DataFrame({'Onset':time_find,'Duration':Duration,'Annotation':['spindle']*len(Duration)}) auto_label,_ = discritized_onset_label_auto(raw,result,validation_windowsize) decision_features=None if proba: events = mne.make_fixed_length_events(raw,id=1,start=0,duration=validation_windowsize) epochs = mne.Epochs(raw,events,event_id=1,tmin=0,tmax=validation_windowsize,preload=True) data = epochs.get_data()[:,:,:-1] full_prop=[] for d in data: temp_p=[] #fig,ax = plt.subplots(nrows=2,ncols=3,figsize=(8,8)) for ii,(name) in enumerate(zip(channelList)):#,ax.flatten())): rms = window_rms(d[ii,:],500) l = trim_mean(rms,0.05) + lower_threshold * trimmed_std(rms,0.05) h = trim_mean(rms,0.05) + higher_threshold * trimmed_std(rms,0.05) prop = (sum(rms>l)+sum(rms<h))/(sum(rms<h) - sum(rms<l)) temp_p.append(prop) full_prop.append(temp_p) psds,freq = mne.time_frequency.psd_multitaper(epochs,fmin=11,fmax=16,tmin=0,tmax=3,low_bias=True,) psds = 10* np.log10(psds) features = pd.DataFrame(np.concatenate((np.array(full_prop),psds.max(2),freq[np.argmax(psds,2)]),1)) decision_features = StandardScaler().fit_transform(features.values,auto_label) clf = LogisticRegressionCV(Cs=np.logspace(-4,6,11),cv=5,tol=1e-7,max_iter=int(1e7)) clf.fit(decision_features,auto_label) auto_proba=clf.predict_proba(decision_features)[:,-1] return time_find,mean_peak_power,Duration,mph,mpl,auto_proba,auto_label